summaryrefslogtreecommitdiff
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.c10
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_vertex.c5
-rw-r--r--src/gallium/drivers/cell/ppu/cell_texture.c92
-rw-r--r--src/gallium/drivers/failover/fo_context.c20
-rw-r--r--src/gallium/drivers/failover/fo_state.c6
-rw-r--r--src/gallium/drivers/galahad/glhd_context.c70
-rw-r--r--src/gallium/drivers/galahad/glhd_objects.c8
-rw-r--r--src/gallium/drivers/galahad/glhd_objects.h6
-rw-r--r--src/gallium/drivers/galahad/glhd_screen.c45
-rw-r--r--src/gallium/drivers/i915/TODO25
-rw-r--r--src/gallium/drivers/i915/i915_batch.h5
-rw-r--r--src/gallium/drivers/i915/i915_batchbuffer.h35
-rw-r--r--src/gallium/drivers/i915/i915_blit.c6
-rw-r--r--src/gallium/drivers/i915/i915_context.c4
-rw-r--r--src/gallium/drivers/i915/i915_context.h5
-rw-r--r--src/gallium/drivers/i915/i915_debug.c4
-rw-r--r--src/gallium/drivers/i915/i915_debug.h1
-rw-r--r--src/gallium/drivers/i915/i915_fpc_translate.c8
-rw-r--r--src/gallium/drivers/i915/i915_prim_vbuf.c15
-rw-r--r--src/gallium/drivers/i915/i915_reg.h3
-rw-r--r--src/gallium/drivers/i915/i915_resource.h10
-rw-r--r--src/gallium/drivers/i915/i915_resource_buffer.c25
-rw-r--r--src/gallium/drivers/i915/i915_resource_texture.c131
-rw-r--r--src/gallium/drivers/i915/i915_screen.c2
-rw-r--r--src/gallium/drivers/i915/i915_state.c5
-rw-r--r--src/gallium/drivers/i915/i915_state_emit.c65
-rw-r--r--src/gallium/drivers/i915/i915_state_sampler.c20
-rw-r--r--src/gallium/drivers/i915/i915_surface.c98
-rw-r--r--src/gallium/drivers/i915/i915_surface.h1
-rw-r--r--src/gallium/drivers/i915/i915_winsys.h29
-rw-r--r--src/gallium/drivers/i965/Makefile2
-rw-r--r--src/gallium/drivers/i965/SConscript3
-rw-r--r--src/gallium/drivers/i965/brw_batchbuffer.c4
-rw-r--r--src/gallium/drivers/i965/brw_batchbuffer.h5
-rw-r--r--src/gallium/drivers/i965/brw_clip.c6
-rw-r--r--src/gallium/drivers/i965/brw_clip.h2
-rw-r--r--src/gallium/drivers/i965/brw_clip_line.c14
-rw-r--r--src/gallium/drivers/i965/brw_clip_state.c4
-rw-r--r--src/gallium/drivers/i965/brw_clip_tri.c8
-rw-r--r--src/gallium/drivers/i965/brw_clip_util.c12
-rw-r--r--src/gallium/drivers/i965/brw_context.c10
-rw-r--r--src/gallium/drivers/i965/brw_context.h16
-rw-r--r--src/gallium/drivers/i965/brw_defines.h361
-rw-r--r--src/gallium/drivers/i965/brw_disasm.c179
-rw-r--r--src/gallium/drivers/i965/brw_disasm.h6
-rw-r--r--src/gallium/drivers/i965/brw_draw_upload.c14
-rw-r--r--src/gallium/drivers/i965/brw_eu_emit.c158
-rw-r--r--src/gallium/drivers/i965/brw_gs.c4
-rw-r--r--src/gallium/drivers/i965/brw_gs_state.c2
-rw-r--r--src/gallium/drivers/i965/brw_misc_state.c29
-rw-r--r--src/gallium/drivers/i965/brw_pipe_clear.c4
-rw-r--r--src/gallium/drivers/i965/brw_pipe_rast.c2
-rw-r--r--src/gallium/drivers/i965/brw_pipe_surface.c (renamed from src/gallium/drivers/i965/brw_screen_surface.c)85
-rw-r--r--src/gallium/drivers/i965/brw_pipe_vertex.c22
-rw-r--r--src/gallium/drivers/i965/brw_reg.h52
-rw-r--r--src/gallium/drivers/i965/brw_resource_buffer.c7
-rw-r--r--src/gallium/drivers/i965/brw_resource_texture.c35
-rw-r--r--src/gallium/drivers/i965/brw_resource_texture_layout.c2
-rw-r--r--src/gallium/drivers/i965/brw_screen.c59
-rw-r--r--src/gallium/drivers/i965/brw_screen.h14
-rw-r--r--src/gallium/drivers/i965/brw_sf_emit.c4
-rw-r--r--src/gallium/drivers/i965/brw_sf_state.c4
-rw-r--r--src/gallium/drivers/i965/brw_state.h2
-rw-r--r--src/gallium/drivers/i965/brw_state_upload.c2
-rw-r--r--src/gallium/drivers/i965/brw_structs.h238
-rw-r--r--src/gallium/drivers/i965/brw_structs_dump.c2
-rw-r--r--src/gallium/drivers/i965/brw_urb.c4
-rw-r--r--src/gallium/drivers/i965/brw_vs.h1
-rw-r--r--src/gallium/drivers/i965/brw_vs_emit.c17
-rw-r--r--src/gallium/drivers/i965/brw_vs_state.c8
-rw-r--r--src/gallium/drivers/i965/brw_winsys.h4
-rw-r--r--src/gallium/drivers/i965/brw_winsys_debug.c6
-rw-r--r--src/gallium/drivers/i965/brw_wm_emit.c12
-rw-r--r--src/gallium/drivers/i965/brw_wm_fp.c2
-rw-r--r--src/gallium/drivers/i965/brw_wm_glsl.c6
-rw-r--r--src/gallium/drivers/i965/brw_wm_state.c10
-rw-r--r--src/gallium/drivers/i965/intel_decode.c563
-rw-r--r--src/gallium/drivers/i965/intel_decode.h4
-rw-r--r--src/gallium/drivers/i965/intel_structs.h2
-rw-r--r--src/gallium/drivers/identity/SConscript2
-rw-r--r--src/gallium/drivers/identity/id_context.c70
-rw-r--r--src/gallium/drivers/identity/id_objects.c9
-rw-r--r--src/gallium/drivers/identity/id_objects.h6
-rw-r--r--src/gallium/drivers/identity/id_screen.c45
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_alpha.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_alpha.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend.h5
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.c85
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.h10
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c36
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.h7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c58
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.c11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.c131
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.h28
-rw-r--r--src/gallium/drivers/llvmpipe/lp_perf.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c16
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_priv.h13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.c29
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene_queue.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c5
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.h14
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.h3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_vbuf.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c227
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.h7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_setup.c254
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_vertex.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_surface.c46
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test.h7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_blend.c65
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_conv.c66
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_format.c76
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_main.c11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_printf.c96
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_round.c69
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_sincos.c65
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample.c22
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.c108
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.h8
-rw-r--r--src/gallium/drivers/noop/noop_pipe.c136
-rw-r--r--src/gallium/drivers/noop/noop_public.h5
-rw-r--r--src/gallium/drivers/noop/noop_state.c34
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.h3
-rw-r--r--src/gallium/drivers/nouveau/nouveau_winsys.h7
-rw-r--r--src/gallium/drivers/nouveau/nv_object.xml.h57
-rw-r--r--src/gallium/drivers/nv50/nv50_buffer.c1
-rw-r--r--src/gallium/drivers/nv50/nv50_context.c4
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h1
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c53
-rw-r--r--src/gallium/drivers/nv50/nv50_resource.c8
-rw-r--r--src/gallium/drivers/nv50/nv50_resource.h7
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c4
-rw-r--r--src/gallium/drivers/nv50/nv50_shader_state.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c14
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c12
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c69
-rw-r--r--src/gallium/drivers/nv50/nv50_tex.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_transfer.c27
-rw-r--r--src/gallium/drivers/nv50/nv50_transfer.h2
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c2
-rw-r--r--src/gallium/drivers/nvc0/Makefile34
-rw-r--r--src/gallium/drivers/nvc0/SConscript36
-rw-r--r--src/gallium/drivers/nvc0/nv50_defs.xml.h142
-rw-r--r--src/gallium/drivers/nvc0/nv50_texture.xml.h259
-rw-r--r--src/gallium/drivers/nvc0/nvc0_2d.xml.h380
-rw-r--r--src/gallium/drivers/nvc0/nvc0_3d.xml.h1182
-rw-r--r--src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h98
-rw-r--r--src/gallium/drivers/nvc0/nvc0_buffer.c489
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.c164
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.h227
-rw-r--r--src/gallium/drivers/nvc0/nvc0_draw.c88
-rw-r--r--src/gallium/drivers/nvc0/nvc0_fence.c203
-rw-r--r--src/gallium/drivers/nvc0/nvc0_fence.h48
-rw-r--r--src/gallium/drivers/nvc0/nvc0_formats.c462
-rw-r--r--src/gallium/drivers/nvc0/nvc0_graph_macros.h235
-rw-r--r--src/gallium/drivers/nvc0/nvc0_m2mf.xml.h138
-rw-r--r--src/gallium/drivers/nvc0/nvc0_miptree.c327
-rw-r--r--src/gallium/drivers/nvc0/nvc0_mm.c274
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc.c693
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc.h653
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_emit.c979
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_optimize.c1236
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_print.c377
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_regalloc.c927
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.c678
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.h88
-rw-r--r--src/gallium/drivers/nvc0/nvc0_push.c279
-rw-r--r--src/gallium/drivers/nvc0/nvc0_push2.c333
-rw-r--r--src/gallium/drivers/nvc0/nvc0_query.c337
-rw-r--r--src/gallium/drivers/nvc0/nvc0_resource.c71
-rw-r--r--src/gallium/drivers/nvc0/nvc0_resource.h201
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.c669
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.h192
-rw-r--r--src/gallium/drivers/nvc0/nvc0_shader_state.c180
-rw-r--r--src/gallium/drivers/nvc0/nvc0_state.c865
-rw-r--r--src/gallium/drivers/nvc0/nvc0_state_validate.c430
-rw-r--r--src/gallium/drivers/nvc0/nvc0_stateobj.h81
-rw-r--r--src/gallium/drivers/nvc0/nvc0_surface.c377
-rw-r--r--src/gallium/drivers/nvc0/nvc0_tex.c277
-rw-r--r--src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c2004
-rw-r--r--src/gallium/drivers/nvc0/nvc0_transfer.c381
-rw-r--r--src/gallium/drivers/nvc0/nvc0_transfer.h38
-rw-r--r--src/gallium/drivers/nvc0/nvc0_vbo.c654
-rw-r--r--src/gallium/drivers/nvc0/nvc0_winsys.h120
-rw-r--r--src/gallium/drivers/nvfx/nv04_2d.c2
-rw-r--r--src/gallium/drivers/nvfx/nv30_fragtex.c11
-rw-r--r--src/gallium/drivers/nvfx/nv40_fragtex.c13
-rw-r--r--src/gallium/drivers/nvfx/nvfx_buffer.c1
-rw-r--r--src/gallium/drivers/nvfx/nvfx_context.c6
-rw-r--r--src/gallium/drivers/nvfx/nvfx_context.h13
-rw-r--r--src/gallium/drivers/nvfx/nvfx_draw.c14
-rw-r--r--src/gallium/drivers/nvfx/nvfx_fragprog.c16
-rw-r--r--src/gallium/drivers/nvfx/nvfx_fragtex.c8
-rw-r--r--src/gallium/drivers/nvfx/nvfx_miptree.c26
-rw-r--r--src/gallium/drivers/nvfx/nvfx_push.c57
-rw-r--r--src/gallium/drivers/nvfx/nvfx_query.c12
-rw-r--r--src/gallium/drivers/nvfx/nvfx_resource.c8
-rw-r--r--src/gallium/drivers/nvfx/nvfx_resource.h8
-rw-r--r--src/gallium/drivers/nvfx/nvfx_screen.c70
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state.c2
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_emit.c68
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_fb.c53
-rw-r--r--src/gallium/drivers/nvfx/nvfx_surface.c68
-rw-r--r--src/gallium/drivers/nvfx/nvfx_transfer.c26
-rw-r--r--src/gallium/drivers/nvfx/nvfx_transfer.h2
-rw-r--r--src/gallium/drivers/nvfx/nvfx_vbo.c39
-rw-r--r--src/gallium/drivers/nvfx/nvfx_vertprog.c14
-rw-r--r--src/gallium/drivers/r300/r300_blit.c124
-rw-r--r--src/gallium/drivers/r300/r300_chipset.c3
-rw-r--r--src/gallium/drivers/r300/r300_chipset.h4
-rw-r--r--src/gallium/drivers/r300/r300_context.c109
-rw-r--r--src/gallium/drivers/r300/r300_context.h146
-rw-r--r--src/gallium/drivers/r300/r300_cs.h24
-rw-r--r--src/gallium/drivers/r300/r300_debug.c2
-rw-r--r--src/gallium/drivers/r300/r300_defines.h4
-rw-r--r--src/gallium/drivers/r300/r300_emit.c352
-rw-r--r--src/gallium/drivers/r300/r300_emit.h4
-rw-r--r--src/gallium/drivers/r300/r300_flush.c14
-rw-r--r--src/gallium/drivers/r300/r300_fs.c13
-rw-r--r--src/gallium/drivers/r300/r300_fs.h9
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.c10
-rw-r--r--src/gallium/drivers/r300/r300_query.c3
-rw-r--r--src/gallium/drivers/r300/r300_reg.h9
-rw-r--r--src/gallium/drivers/r300/r300_render.c307
-rw-r--r--src/gallium/drivers/r300/r300_render_stencilref.c10
-rw-r--r--src/gallium/drivers/r300/r300_render_translate.c38
-rw-r--r--src/gallium/drivers/r300/r300_resource.c5
-rw-r--r--src/gallium/drivers/r300/r300_screen.c27
-rw-r--r--src/gallium/drivers/r300/r300_screen.h6
-rw-r--r--src/gallium/drivers/r300/r300_screen_buffer.c260
-rw-r--r--src/gallium/drivers/r300/r300_screen_buffer.h32
-rw-r--r--src/gallium/drivers/r300/r300_state.c246
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c95
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.h30
-rw-r--r--src/gallium/drivers/r300/r300_texture.c80
-rw-r--r--src/gallium/drivers/r300/r300_texture.h18
-rw-r--r--src/gallium/drivers/r300/r300_texture_desc.c57
-rw-r--r--src/gallium/drivers/r300/r300_texture_desc.h5
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c2
-rw-r--r--src/gallium/drivers/r300/r300_transfer.c57
-rw-r--r--src/gallium/drivers/r300/r300_transfer.h14
-rw-r--r--src/gallium/drivers/r300/r300_vs.c5
-rw-r--r--src/gallium/drivers/r300/r300_winsys.h58
-rw-r--r--src/gallium/drivers/r600/Makefile3
-rw-r--r--src/gallium/drivers/r600/SConscript1
-rw-r--r--src/gallium/drivers/r600/eg_asm.c49
-rw-r--r--src/gallium/drivers/r600/eg_state_inlines.h2
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c380
-rw-r--r--src/gallium/drivers/r600/r600.h14
-rw-r--r--src/gallium/drivers/r600/r600_asm.c1689
-rw-r--r--src/gallium/drivers/r600/r600_asm.h34
-rw-r--r--src/gallium/drivers/r600/r600_blit.c55
-rw-r--r--src/gallium/drivers/r600/r600_buffer.c144
-rw-r--r--src/gallium/drivers/r600/r600_opcodes.h4
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c55
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h61
-rw-r--r--src/gallium/drivers/r600/r600_resource.h65
-rw-r--r--src/gallium/drivers/r600/r600_shader.c564
-rw-r--r--src/gallium/drivers/r600/r600_shader.h4
-rw-r--r--src/gallium/drivers/r600/r600_sq.h6
-rw-r--r--src/gallium/drivers/r600/r600_state.c288
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c76
-rw-r--r--src/gallium/drivers/r600/r600_state_inlines.h1
-rw-r--r--src/gallium/drivers/r600/r600_texture.c245
-rw-r--r--src/gallium/drivers/r600/r600_translate.c60
-rw-r--r--src/gallium/drivers/r600/r600_upload.c114
-rw-r--r--src/gallium/drivers/r600/r700_asm.c11
-rw-r--r--src/gallium/drivers/rbug/rbug_context.c72
-rw-r--r--src/gallium/drivers/rbug/rbug_core.c8
-rw-r--r--src/gallium/drivers/rbug/rbug_objects.c9
-rw-r--r--src/gallium/drivers/rbug/rbug_objects.h6
-rw-r--r--src/gallium/drivers/rbug/rbug_screen.c46
-rw-r--r--src/gallium/drivers/softpipe/sp_context.c10
-rw-r--r--src/gallium/drivers/softpipe/sp_context.h8
-rw-r--r--src/gallium/drivers/softpipe/sp_flush.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_flush.h2
-rw-r--r--src/gallium/drivers/softpipe/sp_limits.h42
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_blend.c14
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c5
-rw-r--r--src/gallium/drivers/softpipe/sp_state.h2
-rw-r--r--src/gallium/drivers/softpipe/sp_state_derived.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_state_sampler.c54
-rw-r--r--src/gallium/drivers/softpipe/sp_state_shader.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_state_vertex.c6
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c172
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.h31
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_tile_cache.c57
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_tile_cache.h36
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.c80
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.h5
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.c14
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.h22
-rw-r--r--src/gallium/drivers/svga/SConscript2
-rw-r--r--src/gallium/drivers/svga/svga_cmd.c4
-rw-r--r--src/gallium/drivers/svga/svga_context.c1
-rw-r--r--src/gallium/drivers/svga/svga_context.h6
-rw-r--r--src/gallium/drivers/svga/svga_draw.c1
-rw-r--r--src/gallium/drivers/svga/svga_draw_arrays.c4
-rw-r--r--src/gallium/drivers/svga/svga_draw_elements.c13
-rw-r--r--src/gallium/drivers/svga/svga_pipe_blit.c60
-rw-r--r--src/gallium/drivers/svga/svga_pipe_rasterizer.c2
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer.c5
-rw-r--r--src/gallium/drivers/svga/svga_resource_texture.c81
-rw-r--r--src/gallium/drivers/svga/svga_resource_texture.h2
-rw-r--r--src/gallium/drivers/svga/svga_screen.c2
-rw-r--r--src/gallium/drivers/svga/svga_state_constants.c2
-rw-r--r--src/gallium/drivers/svga/svga_state_fs.c2
-rw-r--r--src/gallium/drivers/svga/svga_state_need_swtnl.c12
-rw-r--r--src/gallium/drivers/svga/svga_state_tss.c1
-rw-r--r--src/gallium/drivers/svga/svga_state_vdecl.c6
-rw-r--r--src/gallium/drivers/svga/svga_state_vs.c4
-rw-r--r--src/gallium/drivers/svga/svga_surface.c98
-rw-r--r--src/gallium/drivers/svga/svga_surface.h3
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_backend.c10
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_draw.c15
-rw-r--r--src/gallium/drivers/trace/tr_context.c137
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.c50
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.h2
-rw-r--r--src/gallium/drivers/trace/tr_screen.c77
-rw-r--r--src/gallium/drivers/trace/tr_texture.c6
-rw-r--r--src/gallium/drivers/trace/tr_texture.h4
328 files changed, 25919 insertions, 4942 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c
index 143eca848f..f9b83c8666 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.c
+++ b/src/gallium/drivers/cell/ppu/cell_context.c
@@ -37,6 +37,7 @@
#include "pipe/p_format.h"
#include "util/u_memory.h"
#include "pipe/p_screen.h"
+#include "util/u_inlines.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
@@ -61,6 +62,11 @@ static void
cell_destroy_context( struct pipe_context *pipe )
{
struct cell_context *cell = cell_context(pipe);
+ unsigned i;
+
+ for (i = 0; i < cell->num_vertex_buffers; i++) {
+ pipe_resource_reference(&cell->vertex_buffer[i].buffer, NULL);
+ }
util_delete_keymap(cell->fragment_ops_cache, NULL);
@@ -100,8 +106,8 @@ static const struct debug_named_value cell_debug_flags[] = {
static unsigned int
cell_is_resource_referenced( struct pipe_context *pipe,
- struct pipe_resource *texture,
- unsigned face, unsigned level)
+ struct pipe_resource *texture,
+ unsigned level, int layer)
{
/**
* FIXME: Optimize.
diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
index a065d68b5a..eb22a09a91 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
@@ -82,8 +82,9 @@ cell_set_vertex_buffers(struct pipe_context *pipe,
assert(count <= PIPE_MAX_ATTRIBS);
- memcpy(cell->vertex_buffer, buffers, count * sizeof(buffers[0]));
- cell->num_vertex_buffers = count;
+ util_copy_vertex_buffers(cell->vertex_buffer,
+ &cell->num_vertex_buffers,
+ buffers, count);
cell->dirty |= CELL_NEW_VERTEX;
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
index b3042df779..946a7050e5 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.c
+++ b/src/gallium/drivers/cell/ppu/cell_texture.c
@@ -304,47 +304,34 @@ untwiddle_image_uint(uint w, uint h, uint tile_size, uint *dst,
static struct pipe_surface *
-cell_get_tex_surface(struct pipe_screen *screen,
- struct pipe_resource *pt,
- unsigned face, unsigned level, unsigned zslice,
- unsigned usage)
+cell_create_surface(struct pipe_context *ctx,
+ struct pipe_resource *pt,
+ const struct pipe_surface *surf_tmpl)
{
struct cell_resource *ct = cell_resource(pt);
struct pipe_surface *ps;
+ assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
ps = CALLOC_STRUCT(pipe_surface);
if (ps) {
pipe_reference_init(&ps->reference, 1);
pipe_resource_reference(&ps->texture, pt);
- ps->format = pt->format;
- ps->width = u_minify(pt->width0, level);
- ps->height = u_minify(pt->height0, level);
- ps->offset = ct->level_offset[level];
+ ps->format = surf_tmpl->format;
+ ps->context = ctx;
+ ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level);
+ ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level);
/* XXX may need to override usage flags (see sp_texture.c) */
- ps->usage = usage;
- ps->face = face;
- ps->level = level;
- ps->zslice = zslice;
-
- if (pt->target == PIPE_TEXTURE_CUBE) {
- unsigned h_tile = align(ps->height, TILE_SIZE);
- ps->offset += face * util_format_get_nblocksy(ps->format, h_tile) * ct->stride[level];
- }
- else if (pt->target == PIPE_TEXTURE_3D) {
- unsigned h_tile = align(ps->height, TILE_SIZE);
- ps->offset += zslice * util_format_get_nblocksy(ps->format, h_tile) * ct->stride[level];
- }
- else {
- assert(face == 0);
- assert(zslice == 0);
- }
+ ps->usage = surf_tmpl->usage;
+ ps->u.tex.level = surf_tmpl->u.tex.level;
+ ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+ ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
}
return ps;
}
static void
-cell_tex_surface_destroy(struct pipe_surface *surf)
+cell_surface_destroy(struct pipe_context *ctx, struct pipe_surface *surf)
{
pipe_resource_reference(&surf->texture, NULL);
FREE(surf);
@@ -358,44 +345,39 @@ cell_tex_surface_destroy(struct pipe_surface *surf)
*/
static struct pipe_transfer *
cell_get_transfer(struct pipe_context *ctx,
- struct pipe_resource *resource,
- struct pipe_subresource sr,
- unsigned usage,
- const struct pipe_box *box)
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
{
struct cell_resource *ct = cell_resource(resource);
struct cell_transfer *ctrans;
enum pipe_format format = resource->format;
assert(resource);
- assert(sr.level <= resource->last_level);
+ assert(level <= resource->last_level);
/* make sure the requested region is in the image bounds */
- assert(box->x + box->width <= u_minify(resource->width0, sr.level));
- assert(box->y + box->height <= u_minify(resource->height0, sr.level));
- assert(box->z + box->depth <= u_minify(resource->depth0, sr.level));
+ assert(box->x + box->width <= u_minify(resource->width0, level));
+ assert(box->y + box->height <= u_minify(resource->height0, level));
+ assert(box->z + box->depth <= (u_minify(resource->depth0, level) + resource->array_size - 1));
ctrans = CALLOC_STRUCT(cell_transfer);
if (ctrans) {
struct pipe_transfer *pt = &ctrans->base;
pipe_resource_reference(&pt->resource, resource);
- pt->sr = sr;
+ pt->level = level;
pt->usage = usage;
pt->box = *box;
- pt->stride = ct->stride[sr.level];
+ pt->stride = ct->stride[level];
- ctrans->offset = ct->level_offset[sr.level];
+ ctrans->offset = ct->level_offset[level];
- if (resource->target == PIPE_TEXTURE_CUBE) {
- unsigned h_tile = align(u_minify(resource->height0, sr.level), TILE_SIZE);
- ctrans->offset += sr.face * util_format_get_nblocksy(format, h_tile) * pt->stride;
- }
- else if (resource->target == PIPE_TEXTURE_3D) {
- unsigned h_tile = align(u_minify(resource->height0, sr.level), TILE_SIZE);
+ if (resource->target == PIPE_TEXTURE_CUBE || resource->target == PIPE_TEXTURE_3D) {
+ unsigned h_tile = align(u_minify(resource->height0, level), TILE_SIZE);
ctrans->offset += box->z * util_format_get_nblocksy(format, h_tile) * pt->stride;
}
else {
- assert(sr.face == 0);
assert(box->z == 0);
}
@@ -439,7 +421,7 @@ cell_transfer_map(struct pipe_context *ctx, struct pipe_transfer *transfer)
/* Better test would be resource->is_linear
*/
if (transfer->resource->target != PIPE_BUFFER) {
- const uint level = ctrans->base.sr.level;
+ const uint level = ctrans->base.level;
const uint texWidth = u_minify(pt->width0, level);
const uint texHeight = u_minify(pt->height0, level);
unsigned size;
@@ -500,7 +482,7 @@ cell_transfer_unmap(struct pipe_context *ctx,
struct cell_transfer *ctrans = cell_transfer(transfer);
struct pipe_resource *pt = transfer->resource;
struct cell_resource *ct = cell_resource(pt);
- const uint level = ctrans->base.sr.level;
+ const uint level = ctrans->base.level;
const uint texWidth = u_minify(pt->width0, level);
const uint texHeight = u_minify(pt->height0, level);
const uint stride = ct->stride[level];
@@ -548,12 +530,13 @@ cell_transfer_unmap(struct pipe_context *ctx,
*/
static void
cell_flush_frontbuffer(struct pipe_screen *_screen,
- struct pipe_surface *surface,
+ struct pipe_resource *resource,
+ unsigned level, unsigned layer,
void *context_private)
{
struct cell_screen *screen = cell_screen(_screen);
struct sw_winsys *winsys = screen->winsys;
- struct cell_resource *ct = cell_resource(surface->texture);
+ struct cell_resource *ct = cell_resource(resource);
if (!ct->dt)
return;
@@ -564,10 +547,10 @@ cell_flush_frontbuffer(struct pipe_screen *_screen,
unsigned *map = winsys->displaytarget_map(winsys, ct->dt,
(PIPE_TRANSFER_READ |
PIPE_TRANSFER_WRITE));
- unsigned *src = (unsigned *)(ct->data + ct->level_offset[surface->level]);
+ unsigned *src = (unsigned *)(ct->data + ct->level_offset[level]);
- untwiddle_image_uint(surface->width,
- surface->height,
+ untwiddle_image_uint(u_minify(resource->width0, level),
+ u_minify(resource->height0, level),
TILE_SIZE,
map,
ct->dt_stride,
@@ -605,6 +588,7 @@ cell_user_buffer_create(struct pipe_screen *screen,
buffer->base.width0 = bytes;
buffer->base.height0 = 1;
buffer->base.depth0 = 1;
+ buffer->base.array_size = 1;
buffer->userBuffer = TRUE;
buffer->data = ptr;
@@ -641,9 +625,6 @@ cell_init_screen_texture_funcs(struct pipe_screen *screen)
screen->resource_get_handle = cell_resource_get_handle;
screen->user_buffer_create = cell_user_buffer_create;
- screen->get_tex_surface = cell_get_tex_surface;
- screen->tex_surface_destroy = cell_tex_surface_destroy;
-
screen->flush_frontbuffer = cell_flush_frontbuffer;
}
@@ -657,4 +638,7 @@ cell_init_texture_transfer_funcs(struct cell_context *cell)
cell->pipe.transfer_flush_region = u_default_transfer_flush_region;
cell->pipe.transfer_inline_write = u_default_transfer_inline_write;
+
+ cell->pipe.create_surface = cell_create_surface;
+ cell->pipe.surface_destroy = cell_surface_destroy;
}
diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c
index ec3609291e..d60718d971 100644
--- a/src/gallium/drivers/failover/fo_context.c
+++ b/src/gallium/drivers/failover/fo_context.c
@@ -29,6 +29,7 @@
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "pipe/p_context.h"
+#include "util/u_inlines.h"
#include "fo_context.h"
#include "fo_winsys.h"
@@ -38,6 +39,11 @@
static void failover_destroy( struct pipe_context *pipe )
{
struct failover_context *failover = failover_context( pipe );
+ unsigned i;
+
+ for (i = 0; i < failover->num_vertex_buffers; i++) {
+ pipe_resource_reference(&failover->vertex_buffers[i].buffer, NULL);
+ }
FREE( failover );
}
@@ -89,14 +95,14 @@ static void failover_draw_vbo( struct pipe_context *pipe,
static unsigned int
failover_is_resource_referenced( struct pipe_context *_pipe,
- struct pipe_resource *resource,
- unsigned face, unsigned level)
+ struct pipe_resource *resource,
+ unsigned level, int layer)
{
struct failover_context *failover = failover_context( _pipe );
struct pipe_context *pipe = (failover->mode == FO_HW) ?
failover->hw : failover->sw;
- return pipe->is_resource_referenced(pipe, resource, face, level);
+ return pipe->is_resource_referenced(pipe, resource, level, layer);
}
struct pipe_context *failover_create( struct pipe_context *hw,
@@ -137,10 +143,10 @@ struct pipe_context *failover_create( struct pipe_context *hw,
failover->pipe.resource_copy_region = hw->resource_copy_region;
#if 0
- failover->pipe.texture_create = hw->texture_create;
- failover->pipe.texture_destroy = hw->texture_destroy;
- failover->pipe.get_tex_surface = hw->get_tex_surface;
- failover->pipe.texture_update = hw->texture_update;
+ failover->pipe.resource_create = hw->resource_create;
+ failover->pipe.resource_destroy = hw->resource_destroy;
+ failover->pipe.create_surface = hw->create_surface;
+ failover->pipe.surface_destroy = hw->surface_destroy;
#endif
failover->pipe.flush = hw->flush;
diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c
index c265f381b6..af1fd953aa 100644
--- a/src/gallium/drivers/failover/fo_state.c
+++ b/src/gallium/drivers/failover/fo_state.c
@@ -574,10 +574,10 @@ failover_set_vertex_buffers(struct pipe_context *pipe,
{
struct failover_context *failover = failover_context(pipe);
- memcpy(failover->vertex_buffers, vertex_buffers,
- count * sizeof(vertex_buffers[0]));
+ util_copy_vertex_buffers(failover->vertex_buffers,
+ &failover->num_vertex_buffers,
+ vertex_buffers, count);
failover->dirty |= FO_NEW_VERTEX_BUFFER;
- failover->num_vertex_buffers = count;
failover->sw->set_vertex_buffers( failover->sw, count, vertex_buffers );
failover->hw->set_vertex_buffers( failover->hw, count, vertex_buffers );
}
diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c
index 50f66079c2..8cbf0b1de4 100644
--- a/src/gallium/drivers/galahad/glhd_context.c
+++ b/src/gallium/drivers/galahad/glhd_context.c
@@ -381,6 +381,8 @@ galahad_create_vertex_elements_state(struct pipe_context *_pipe,
struct galahad_context *glhd_pipe = galahad_context(_pipe);
struct pipe_context *pipe = glhd_pipe->pipe;
+ /* XXX check if stride lines up with element size, at least for floats */
+
return pipe->create_vertex_elements_state(pipe,
num_elements,
vertex_elements);
@@ -662,17 +664,13 @@ galahad_set_index_buffer(struct pipe_context *_pipe,
static void
galahad_resource_copy_region(struct pipe_context *_pipe,
struct pipe_resource *_dst,
- struct pipe_subresource subdst,
+ unsigned dst_level,
unsigned dstx,
unsigned dsty,
unsigned dstz,
struct pipe_resource *_src,
- struct pipe_subresource subsrc,
- unsigned srcx,
- unsigned srcy,
- unsigned srcz,
- unsigned width,
- unsigned height)
+ unsigned src_level,
+ const struct pipe_box *src_box)
{
struct galahad_context *glhd_pipe = galahad_context(_pipe);
struct galahad_resource *glhd_resource_dst = galahad_resource(_dst);
@@ -689,17 +687,13 @@ galahad_resource_copy_region(struct pipe_context *_pipe,
pipe->resource_copy_region(pipe,
dst,
- subdst,
+ dst_level,
dstx,
dsty,
dstz,
src,
- subsrc,
- srcx,
- srcy,
- srcz,
- width,
- height);
+ src_level,
+ src_box);
}
static void
@@ -781,8 +775,8 @@ galahad_flush(struct pipe_context *_pipe,
static unsigned int
galahad_is_resource_referenced(struct pipe_context *_pipe,
struct pipe_resource *_resource,
- unsigned face,
- unsigned level)
+ unsigned level,
+ int layer)
{
struct galahad_context *glhd_pipe = galahad_context(_pipe);
struct galahad_resource *glhd_resource = galahad_resource(_resource);
@@ -791,8 +785,8 @@ galahad_is_resource_referenced(struct pipe_context *_pipe,
return pipe->is_resource_referenced(pipe,
resource,
- face,
- level);
+ level,
+ layer);
}
static struct pipe_sampler_view *
@@ -823,10 +817,40 @@ galahad_context_sampler_view_destroy(struct pipe_context *_pipe,
galahad_sampler_view(_view));
}
+static struct pipe_surface *
+galahad_context_create_surface(struct pipe_context *_pipe,
+ struct pipe_resource *_resource,
+ const struct pipe_surface *templ)
+{
+ struct galahad_context *glhd_context = galahad_context(_pipe);
+ struct galahad_resource *glhd_resource = galahad_resource(_resource);
+ struct pipe_context *pipe = glhd_context->pipe;
+ struct pipe_resource *resource = glhd_resource->resource;
+ struct pipe_surface *result;
+
+ result = pipe->create_surface(pipe,
+ resource,
+ templ);
+
+ if (result)
+ return galahad_surface_create(glhd_context, glhd_resource, result);
+ return NULL;
+}
+
+static void
+galahad_context_surface_destroy(struct pipe_context *_pipe,
+ struct pipe_surface *_surface)
+{
+ galahad_surface_destroy(galahad_context(_pipe),
+ galahad_surface(_surface));
+}
+
+
+
static struct pipe_transfer *
galahad_context_get_transfer(struct pipe_context *_context,
struct pipe_resource *_resource,
- struct pipe_subresource sr,
+ unsigned level,
unsigned usage,
const struct pipe_box *box)
{
@@ -838,7 +862,7 @@ galahad_context_get_transfer(struct pipe_context *_context,
result = context->get_transfer(context,
resource,
- sr,
+ level,
usage,
box);
@@ -915,7 +939,7 @@ galahad_context_transfer_unmap(struct pipe_context *_context,
static void
galahad_context_transfer_inline_write(struct pipe_context *_context,
struct pipe_resource *_resource,
- struct pipe_subresource sr,
+ unsigned level,
unsigned usage,
const struct pipe_box *box,
const void *data,
@@ -929,7 +953,7 @@ galahad_context_transfer_inline_write(struct pipe_context *_context,
context->transfer_inline_write(context,
resource,
- sr,
+ level,
usage,
box,
data,
@@ -1004,6 +1028,8 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
glhd_pipe->base.is_resource_referenced = galahad_is_resource_referenced;
glhd_pipe->base.create_sampler_view = galahad_context_create_sampler_view;
glhd_pipe->base.sampler_view_destroy = galahad_context_sampler_view_destroy;
+ glhd_pipe->base.create_surface = galahad_context_create_surface;
+ glhd_pipe->base.surface_destroy = galahad_context_surface_destroy;
glhd_pipe->base.get_transfer = galahad_context_get_transfer;
glhd_pipe->base.transfer_destroy = galahad_context_transfer_destroy;
glhd_pipe->base.transfer_map = galahad_context_transfer_map;
diff --git a/src/gallium/drivers/galahad/glhd_objects.c b/src/gallium/drivers/galahad/glhd_objects.c
index 6c5a21ae70..b50d85655e 100644
--- a/src/gallium/drivers/galahad/glhd_objects.c
+++ b/src/gallium/drivers/galahad/glhd_objects.c
@@ -71,7 +71,8 @@ galahad_resource_destroy(struct galahad_resource *glhd_resource)
struct pipe_surface *
-galahad_surface_create(struct galahad_resource *glhd_resource,
+galahad_surface_create(struct galahad_context *glhd_context,
+ struct galahad_resource *glhd_resource,
struct pipe_surface *surface)
{
struct galahad_surface *glhd_surface;
@@ -100,10 +101,11 @@ error:
}
void
-galahad_surface_destroy(struct galahad_surface *glhd_surface)
+galahad_surface_destroy(struct galahad_context *glhd_context,
+ struct galahad_surface *glhd_surface)
{
pipe_resource_reference(&glhd_surface->base.texture, NULL);
- pipe_surface_reference(&glhd_surface->surface, NULL);
+ glhd_context->pipe->surface_destroy(glhd_context->pipe, glhd_surface->surface);
FREE(glhd_surface);
}
diff --git a/src/gallium/drivers/galahad/glhd_objects.h b/src/gallium/drivers/galahad/glhd_objects.h
index dc74c5bebc..13dc748588 100644
--- a/src/gallium/drivers/galahad/glhd_objects.h
+++ b/src/gallium/drivers/galahad/glhd_objects.h
@@ -149,11 +149,13 @@ void
galahad_resource_destroy(struct galahad_resource *glhd_resource);
struct pipe_surface *
-galahad_surface_create(struct galahad_resource *glhd_resource,
+galahad_surface_create(struct galahad_context *glhd_context,
+ struct galahad_resource *glhd_resource,
struct pipe_surface *surface);
void
-galahad_surface_destroy(struct galahad_surface *glhd_surface);
+galahad_surface_destroy(struct galahad_context *glhd_context,
+ struct galahad_surface *glhd_surface);
struct pipe_sampler_view *
galahad_sampler_view_create(struct galahad_context *glhd_context,
diff --git a/src/gallium/drivers/galahad/glhd_screen.c b/src/gallium/drivers/galahad/glhd_screen.c
index b6cc41d908..b4825bef66 100644
--- a/src/gallium/drivers/galahad/glhd_screen.c
+++ b/src/gallium/drivers/galahad/glhd_screen.c
@@ -223,39 +223,6 @@ galahad_screen_resource_destroy(struct pipe_screen *screen,
galahad_resource_destroy(galahad_resource(_resource));
}
-static struct pipe_surface *
-galahad_screen_get_tex_surface(struct pipe_screen *_screen,
- struct pipe_resource *_resource,
- unsigned face,
- unsigned level,
- unsigned zslice,
- unsigned usage)
-{
- struct galahad_screen *glhd_screen = galahad_screen(_screen);
- struct galahad_resource *glhd_resource = galahad_resource(_resource);
- struct pipe_screen *screen = glhd_screen->screen;
- struct pipe_resource *resource = glhd_resource->resource;
- struct pipe_surface *result;
-
- result = screen->get_tex_surface(screen,
- resource,
- face,
- level,
- zslice,
- usage);
-
- if (result)
- return galahad_surface_create(glhd_resource, result);
- return NULL;
-}
-
-static void
-galahad_screen_tex_surface_destroy(struct pipe_surface *_surface)
-{
- galahad_surface_destroy(galahad_surface(_surface));
-}
-
-
static struct pipe_resource *
galahad_screen_user_buffer_create(struct pipe_screen *_screen,
@@ -281,16 +248,18 @@ galahad_screen_user_buffer_create(struct pipe_screen *_screen,
static void
galahad_screen_flush_frontbuffer(struct pipe_screen *_screen,
- struct pipe_surface *_surface,
+ struct pipe_resource *_resource,
+ unsigned level, unsigned layer,
void *context_private)
{
struct galahad_screen *glhd_screen = galahad_screen(_screen);
- struct galahad_surface *glhd_surface = galahad_surface(_surface);
+ struct galahad_resource *glhd_resource = galahad_resource(_resource);
struct pipe_screen *screen = glhd_screen->screen;
- struct pipe_surface *surface = glhd_surface->surface;
+ struct pipe_resource *resource = glhd_resource->resource;
screen->flush_frontbuffer(screen,
- surface,
+ resource,
+ level, layer,
context_private);
}
@@ -360,8 +329,6 @@ galahad_screen_create(struct pipe_screen *screen)
glhd_screen->base.resource_from_handle = galahad_screen_resource_from_handle;
glhd_screen->base.resource_get_handle = galahad_screen_resource_get_handle;
glhd_screen->base.resource_destroy = galahad_screen_resource_destroy;
- glhd_screen->base.get_tex_surface = galahad_screen_get_tex_surface;
- glhd_screen->base.tex_surface_destroy = galahad_screen_tex_surface_destroy;
glhd_screen->base.user_buffer_create = galahad_screen_user_buffer_create;
glhd_screen->base.flush_frontbuffer = galahad_screen_flush_frontbuffer;
glhd_screen->base.fence_reference = galahad_screen_fence_reference;
diff --git a/src/gallium/drivers/i915/TODO b/src/gallium/drivers/i915/TODO
new file mode 100644
index 0000000000..94c428bebf
--- /dev/null
+++ b/src/gallium/drivers/i915/TODO
@@ -0,0 +1,25 @@
+Random list of problems with i915g:
+
+- Dies with BadDrawable on GLXFBconfig changes/destruction. Makes piglit totally
+ unusable :( Upgrading xserver helped here, it doesn't crash anymore. Still
+ broken, it doesn't update the viewport/get new buffers.
+
+- Tends to hang the chip after a few minutes of openarena. Looks tiling related,
+ at the last frame rendered has tiling corruption over the complete frame.
+
+- Kills the chip in 3D_PRIMITIVE LINELIST with mesa-demos/fbotexture in
+ wireframe mode.
+
+- Tiling is funny: If unlucky, it renders/samples all black. No clue yet what's
+ going on. Seems to depend on tiny details like whethever the sampler
+ relocation is fenced/unfenced (broken _with_ fenced reloc using tiling bits!).
+
+- Y-tiling is even more fun. i915c doesn't use it, maybe there's a reason?
+ Texture sampling from Y-tiled buffers seems to work, though (save above
+ problems).
+
+- Need to validate buffers before usage. Currently do_exec on the batchbuffer
+ can fail with -ENOSPC.
+
+Other bugs can be found here:
+https://bugs.freedesktop.org/buglist.cgi?bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&component=Drivers/Gallium/i915g
diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h
index c411b84ccd..6e93da7620 100644
--- a/src/gallium/drivers/i915/i915_batch.h
+++ b/src/gallium/drivers/i915/i915_batch.h
@@ -38,7 +38,10 @@
i915_winsys_batchbuffer_dword(i915->batch, dword)
#define OUT_RELOC(buf, usage, offset) \
- i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset)
+ i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset, false)
+
+#define OUT_RELOC_FENCED(buf, usage, offset) \
+ i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset, true)
#define FLUSH_BATCH(fence) \
i915_flush(i915, fence)
diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h
index c1cd314e7b..d92b2ccb31 100644
--- a/src/gallium/drivers/i915/i915_batchbuffer.h
+++ b/src/gallium/drivers/i915/i915_batchbuffer.h
@@ -29,42 +29,47 @@
#define I915_BATCHBUFFER_H
#include "i915_winsys.h"
+#include "util/u_debug.h"
struct i915_context;
+static INLINE size_t
+i915_winsys_batchbuffer_space(struct i915_winsys_batchbuffer *batch)
+{
+ return batch->size - (batch->ptr - batch->map);
+}
+
static INLINE boolean
i915_winsys_batchbuffer_check(struct i915_winsys_batchbuffer *batch,
size_t dwords,
size_t relocs)
{
- return dwords * 4 <= batch->size - (batch->ptr - batch->map) &&
+ return dwords * 4 <= i915_winsys_batchbuffer_space(batch) &&
relocs <= (batch->max_relocs - batch->relocs);
}
-static INLINE size_t
-i915_winsys_batchbuffer_space(struct i915_winsys_batchbuffer *batch)
+static INLINE void
+i915_winsys_batchbuffer_dword_unchecked(struct i915_winsys_batchbuffer *batch,
+ unsigned dword)
{
- return batch->size - (batch->ptr - batch->map);
+ *(unsigned *)batch->ptr = dword;
+ batch->ptr += 4;
}
static INLINE void
i915_winsys_batchbuffer_dword(struct i915_winsys_batchbuffer *batch,
unsigned dword)
{
- if (i915_winsys_batchbuffer_space(batch) < 4)
- return;
-
- *(unsigned *)batch->ptr = dword;
- batch->ptr += 4;
+ assert (i915_winsys_batchbuffer_space(batch) >= 4);
+ i915_winsys_batchbuffer_dword_unchecked(batch, dword);
}
static INLINE void
i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch,
- void *data,
- size_t size)
+ void *data,
+ size_t size)
{
- if (i915_winsys_batchbuffer_space(batch) < size)
- return;
+ assert (i915_winsys_batchbuffer_space(batch) >= size);
memcpy(data, batch->ptr, size);
batch->ptr += size;
@@ -74,9 +79,9 @@ static INLINE int
i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch,
struct i915_winsys_buffer *buffer,
enum i915_winsys_buffer_usage usage,
- size_t offset)
+ size_t offset, bool fenced)
{
- return batch->iws->batchbuffer_reloc(batch, buffer, usage, offset);
+ return batch->iws->batchbuffer_reloc(batch, buffer, usage, offset, fenced);
}
#endif
diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c
index cdf20c0055..97c2566515 100644
--- a/src/gallium/drivers/i915/i915_blit.c
+++ b/src/gallium/drivers/i915/i915_blit.c
@@ -74,7 +74,7 @@ i915_fill_blit(struct i915_context *i915,
OUT_BATCH(BR13);
OUT_BATCH((y << 16) | x);
OUT_BATCH(((y + h) << 16) | (x + w));
- OUT_RELOC(dst_buffer, I915_USAGE_2D_TARGET, dst_offset);
+ OUT_RELOC_FENCED(dst_buffer, I915_USAGE_2D_TARGET, dst_offset);
OUT_BATCH(color);
}
@@ -138,8 +138,8 @@ i915_copy_blit(struct i915_context *i915,
OUT_BATCH(BR13);
OUT_BATCH((dst_y << 16) | dst_x);
OUT_BATCH((dst_y2 << 16) | dst_x2);
- OUT_RELOC(dst_buffer, I915_USAGE_2D_TARGET, dst_offset);
+ OUT_RELOC_FENCED(dst_buffer, I915_USAGE_2D_TARGET, dst_offset);
OUT_BATCH((src_y << 16) | src_x);
OUT_BATCH(((int) src_pitch & 0xffff));
- OUT_RELOC(src_buffer, I915_USAGE_2D_SOURCE, src_offset);
+ OUT_RELOC_FENCED(src_buffer, I915_USAGE_2D_SOURCE, src_offset);
}
diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c
index 847dd6dd47..9be3161925 100644
--- a/src/gallium/drivers/i915/i915_context.c
+++ b/src/gallium/drivers/i915/i915_context.c
@@ -107,6 +107,10 @@ static void i915_destroy(struct pipe_context *pipe)
if(i915->batch)
i915->iws->batchbuffer_destroy(i915->batch);
+ for (i = 0; i < i915->num_vertex_buffers; i++) {
+ pipe_resource_reference(&i915->vertex_buffer[i].buffer, NULL);
+ }
+
/* unbind framebuffer */
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
pipe_surface_reference(&i915->framebuffer.cbufs[i], NULL);
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index 3ae61d0ea7..d15e1723d8 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -193,8 +193,7 @@ struct i915_velems_state {
};
-struct i915_context
-{
+struct i915_context {
struct pipe_context base;
struct i915_winsys *iws;
@@ -273,7 +272,7 @@ struct i915_context
#define I915_HW_PROGRAM (1<<I915_CACHE_PROGRAM)
#define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS)
#define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0))
-#define I915_HW_INVARIENT (1<<(I915_MAX_CACHE+1))
+#define I915_HW_INVARIANT (1<<(I915_MAX_CACHE+1))
/***********************************************************************
diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c
index 57d3390dea..87c435a2f3 100644
--- a/src/gallium/drivers/i915/i915_debug.c
+++ b/src/gallium/drivers/i915/i915_debug.c
@@ -46,10 +46,12 @@ static const struct debug_named_value debug_options[] = {
};
unsigned i915_debug = 0;
+boolean i915_tiling = TRUE;
void i915_debug_init(struct i915_screen *screen)
{
i915_debug = debug_get_flags_option("I915_DEBUG", debug_options, 0);
+ i915_tiling = !debug_get_bool_option("I915_NO_TILING", FALSE);
}
@@ -974,7 +976,7 @@ i915_dump_hardware_dirty(struct i915_context *i915, const char *func)
{I915_HW_PROGRAM, "program"},
{I915_HW_CONSTANTS, "constants"},
{I915_HW_IMMEDIATE, "immediate"},
- {I915_HW_INVARIENT, "invarient"},
+ {I915_HW_INVARIANT, "invariant"},
{0, NULL},
};
int i;
diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h
index fa60799d0c..11af7662f0 100644
--- a/src/gallium/drivers/i915/i915_debug.h
+++ b/src/gallium/drivers/i915/i915_debug.h
@@ -46,6 +46,7 @@ struct i915_winsys_batchbuffer;
#define DBG_CONSTANTS 0x20
extern unsigned i915_debug;
+extern boolean i915_tiling;
#ifdef DEBUG
static INLINE boolean
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 25c53210be..9e20010c4a 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -924,6 +924,14 @@ i915_translate_instructions(struct i915_fp_compile *p,
tgsi_parse_token( &parse );
switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ /*
+ * We only support one cbuf, but we still need to ignore the property
+ * correctly so we don't hit the assert at the end of the switch case.
+ */
+ assert(parse.FullToken.FullProperty.Property.PropertyName ==
+ TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS);
+ break;
case TGSI_TOKEN_TYPE_DECLARATION:
if (parse.FullToken.FullDeclaration.Declaration.File
== TGSI_FILE_CONSTANT) {
diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c
index bd046bd905..baebbc7bae 100644
--- a/src/gallium/drivers/i915/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915/i915_prim_vbuf.c
@@ -172,6 +172,7 @@ i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size)
*
* Side effects:
* Updates hw_offset, sw_offset, index and allocates a new buffer.
+ * Will set i915->vbo to null on buffer allocation.
*/
static void
i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size)
@@ -179,8 +180,16 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size)
struct i915_context *i915 = i915_render->i915;
struct i915_winsys *iws = i915->iws;
- if (i915_render->vbo)
+ if (i915_render->vbo) {
iws->buffer_destroy(iws, i915_render->vbo);
+ /*
+ * XXX If buffers where referenced then this should be done in
+ * update_vbo_state but since they arn't and malloc likes to reuse
+ * memory we need to set it to null
+ */
+ i915->vbo = NULL;
+ i915_render->vbo = NULL;
+ }
i915->vbo_flushed = 0;
@@ -198,7 +207,7 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size)
#endif
i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size,
- 64, I915_NEW_VERTEX);
+ I915_NEW_VERTEX);
}
/**
@@ -726,7 +735,7 @@ i915_vbuf_render_create(struct i915_context *i915)
i915_render->pool_fifo = u_fifo_create(6);
for (i = 0; i < 6; i++)
u_fifo_add(i915_render->pool_fifo,
- iws->buffer_create(iws, i915_render->pool_buffer_size, 64,
+ iws->buffer_create(iws, i915_render->pool_buffer_size,
I915_NEW_VERTEX));
#else
(void)i;
diff --git a/src/gallium/drivers/i915/i915_reg.h b/src/gallium/drivers/i915/i915_reg.h
index cc28891e4a..5e4e80ddf6 100644
--- a/src/gallium/drivers/i915/i915_reg.h
+++ b/src/gallium/drivers/i915/i915_reg.h
@@ -753,7 +753,7 @@
#define MT_COMPRESS_DXT1_RGB (4<<3)
#define MS3_USE_FENCE_REGS (1<<2)
#define MS3_TILED_SURFACE (1<<1)
-#define MS3_TILE_WALK (1<<0)
+#define MS3_TILE_WALK_Y (1<<0)
#define MS4_PITCH_SHIFT 21
#define MS4_CUBE_FACE_ENA_NEGX (1<<20)
@@ -851,6 +851,7 @@
#define MI_FLUSH ((0<<29)|(4<<23))
#define FLUSH_MAP_CACHE (1<<0)
#define INHIBIT_FLUSH_RENDER_CACHE (1<<2)
+#define MI_NOOP 0
#define CMD_3D (0x3<<29)
diff --git a/src/gallium/drivers/i915/i915_resource.h b/src/gallium/drivers/i915/i915_resource.h
index 753bd266b1..86620e6a12 100644
--- a/src/gallium/drivers/i915/i915_resource.h
+++ b/src/gallium/drivers/i915/i915_resource.h
@@ -49,6 +49,10 @@ struct i915_buffer {
#define I915_MAX_TEXTURE_3D_LEVELS 8 /* max 128x128x128 */
+struct offset_pair {
+ unsigned short nblocksx;
+ unsigned short nblocksy;
+};
struct i915_texture {
struct u_resource b;
@@ -63,14 +67,18 @@ struct i915_texture {
/* Explicitly store the offset of each image for each cube face or
* depth value.
+ *
+ * Array [depth] off offsets.
*/
- unsigned *image_offset[I915_MAX_TEXTURE_2D_LEVELS]; /**< array [depth] of offsets */
+ struct offset_pair *image_offset[I915_MAX_TEXTURE_2D_LEVELS];
/* The data is held here:
*/
struct i915_winsys_buffer *buffer;
};
+unsigned i915_texture_offset(struct i915_texture *tex,
+ unsigned level, unsigned layer);
void i915_init_screen_resource_functions(struct i915_screen *is);
void i915_init_resource_functions(struct i915_context *i915);
diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c b/src/gallium/drivers/i915/i915_resource_buffer.c
index 0d379497df..450203d60a 100644
--- a/src/gallium/drivers/i915/i915_resource_buffer.c
+++ b/src/gallium/drivers/i915/i915_resource_buffer.c
@@ -62,7 +62,7 @@ i915_buffer_destroy(struct pipe_screen *screen,
static void *
i915_buffer_transfer_map( struct pipe_context *pipe,
- struct pipe_transfer *transfer )
+ struct pipe_transfer *transfer )
{
struct i915_buffer *buffer = i915_buffer(transfer->resource);
return buffer->data + transfer->box.x;
@@ -71,19 +71,19 @@ i915_buffer_transfer_map( struct pipe_context *pipe,
static void
i915_buffer_transfer_inline_write( struct pipe_context *rm_ctx,
- struct pipe_resource *resource,
- struct pipe_subresource sr,
- unsigned usage,
- const struct pipe_box *box,
- const void *data,
- unsigned stride,
- unsigned slice_stride)
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned layer_stride)
{
struct i915_buffer *buffer = i915_buffer(resource);
memcpy(buffer->data + box->x,
- data,
- box->width);
+ data,
+ box->width);
}
@@ -115,7 +115,7 @@ i915_buffer_create(struct pipe_screen *screen,
buf->b.vtbl = &i915_buffer_vtbl;
pipe_reference_init(&buf->b.b.reference, 1);
buf->b.b.screen = screen;
-
+
buf->data = MALLOC(template->width0);
buf->free_on_destroy = TRUE;
@@ -135,7 +135,7 @@ struct pipe_resource *
i915_user_buffer_create(struct pipe_screen *screen,
void *ptr,
unsigned bytes,
- unsigned bind)
+ unsigned bind)
{
struct i915_buffer *buf = CALLOC_STRUCT(i915_buffer);
@@ -152,6 +152,7 @@ i915_user_buffer_create(struct pipe_screen *screen,
buf->b.b.width0 = bytes;
buf->b.b.height0 = 1;
buf->b.b.depth0 = 1;
+ buf->b.b.array_size = 1;
buf->data = ptr;
buf->free_on_destroy = FALSE;
diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c
index d45346b32a..f19106f341 100644
--- a/src/gallium/drivers/i915/i915_resource_texture.c
+++ b/src/gallium/drivers/i915/i915_resource_texture.c
@@ -106,6 +106,23 @@ get_pot_stride(enum pipe_format format, unsigned width)
return util_next_power_of_two(util_format_get_stride(format, width));
}
+static INLINE const char*
+get_tiling_string(enum i915_winsys_buffer_tile tile)
+{
+ switch(tile) {
+ case I915_TILE_NONE:
+ return "none";
+ case I915_TILE_X:
+ return "x";
+ case I915_TILE_Y:
+ return "y";
+ default:
+ assert(FALSE);
+ return "?";
+ }
+}
+
+
/*
* More advanced helper funcs
*/
@@ -120,28 +137,56 @@ i915_texture_set_level_info(struct i915_texture *tex,
assert(!tex->image_offset[level]);
tex->nr_images[level] = nr_images;
- tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned));
- tex->image_offset[level][0] = 0;
+ tex->image_offset[level] = MALLOC(nr_images * sizeof(struct offset_pair));
+ tex->image_offset[level][0].nblocksx = 0;
+ tex->image_offset[level][0].nblocksy = 0;
+}
+
+INLINE unsigned i915_texture_offset(struct i915_texture *tex,
+ unsigned level, unsigned layer)
+{
+ unsigned x, y;
+ x = tex->image_offset[level][layer].nblocksx
+ * util_format_get_blocksize(tex->b.b.format);
+ y = tex->image_offset[level][layer].nblocksy;
+
+ return y * tex->stride + x;
}
static void
i915_texture_set_image_offset(struct i915_texture *tex,
unsigned level, unsigned img,
- unsigned x, unsigned y)
+ unsigned nblocksx, unsigned nblocksy)
{
/* for the first image and level make sure offset is zero */
- assert(!(img == 0 && level == 0) || (x == 0 && y == 0));
+ assert(!(img == 0 && level == 0) || (nblocksx == 0 && nblocksy == 0));
assert(img < tex->nr_images[level]);
- tex->image_offset[level][img] = y * tex->stride + x * util_format_get_blocksize(tex->b.b.format);
+ tex->image_offset[level][img].nblocksx = nblocksx;
+ tex->image_offset[level][img].nblocksy = nblocksy;
#if DEBUG_TEXTURES
- debug_printf("%s: %p level %u, img %u (%u, %u) %p\n", __FUNCTION__,
- tex, level, img, x, y,
- (void*)(uintptr_t)tex->image_offset[level][img]);
+ debug_printf("%s: %p level %u, img %u (%u, %u)\n", __FUNCTION__,
+ tex, level, img, x, y);
#endif
}
+static enum i915_winsys_buffer_tile
+i915_texture_tiling(struct pipe_resource *pt)
+{
+ if (!i915_tiling)
+ return I915_TILE_NONE;
+
+ if (pt->target == PIPE_TEXTURE_1D)
+ return I915_TILE_NONE;
+
+ if (util_format_is_s3tc(pt->format))
+ /* XXX X-tiling might make sense */
+ return I915_TILE_NONE;
+
+ return I915_TILE_X;
+}
+
/*
* Shared layout functions
@@ -163,9 +208,10 @@ i9x5_scanout_layout(struct i915_texture *tex)
i915_texture_set_image_offset(tex, 0, 0, 0, 0);
if (pt->width0 >= 240) {
- tex->stride = get_pot_stride(pt->format, pt->width0);
+ tex->stride = align(util_format_get_stride(pt->format, pt->width0), 64);
tex->total_nblocksy = align_nblocksy(pt->format, pt->height0, 8);
tex->tiling = I915_TILE_X;
+ /* special case for cursors */
} else if (pt->width0 == 64 && pt->height0 == 64) {
tex->stride = get_pot_stride(pt->format, pt->width0);
tex->total_nblocksy = align_nblocksy(pt->format, pt->height0, 8);
@@ -200,7 +246,7 @@ i9x5_display_target_layout(struct i915_texture *tex)
i915_texture_set_level_info(tex, 0, 1);
i915_texture_set_image_offset(tex, 0, 0, 0, 0);
- tex->stride = get_pot_stride(pt->format, pt->width0);
+ tex->stride = align(util_format_get_stride(pt->format, pt->width0), 64);
tex->total_nblocksy = align_nblocksy(pt->format, pt->height0, 8);
tex->tiling = I915_TILE_X;
@@ -357,6 +403,8 @@ i915_texture_layout(struct i915_texture * tex)
{
struct pipe_resource *pt = &tex->b.b;
+ tex->tiling = i915_texture_tiling(pt);
+
switch (pt->target) {
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
@@ -603,6 +651,8 @@ i945_texture_layout(struct i915_texture * tex)
{
struct pipe_resource *pt = &tex->b.b;
+ tex->tiling = i915_texture_tiling(pt);
+
switch (pt->target) {
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
@@ -650,7 +700,7 @@ i915_texture_get_handle(struct pipe_screen * screen,
static void
i915_texture_destroy(struct pipe_screen *screen,
- struct pipe_resource *pt)
+ struct pipe_resource *pt)
{
struct i915_texture *tex = i915_texture(pt);
struct i915_winsys *iws = i915_screen(screen)->iws;
@@ -667,10 +717,10 @@ i915_texture_destroy(struct pipe_screen *screen,
static struct pipe_transfer *
i915_texture_get_transfer(struct pipe_context *context,
- struct pipe_resource *resource,
- struct pipe_subresource sr,
- unsigned usage,
- const struct pipe_box *box)
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
{
struct i915_texture *tex = i915_texture(resource);
struct pipe_transfer *transfer = CALLOC_STRUCT(pipe_transfer);
@@ -678,37 +728,31 @@ i915_texture_get_transfer(struct pipe_context *context,
return NULL;
transfer->resource = resource;
- transfer->sr = sr;
+ transfer->level = level;
transfer->usage = usage;
transfer->box = *box;
transfer->stride = tex->stride;
+ /* FIXME: layer_stride */
return transfer;
}
-
static void *
i915_texture_transfer_map(struct pipe_context *pipe,
- struct pipe_transfer *transfer)
+ struct pipe_transfer *transfer)
{
struct pipe_resource *resource = transfer->resource;
struct i915_texture *tex = i915_texture(resource);
struct i915_winsys *iws = i915_screen(pipe->screen)->iws;
- struct pipe_subresource sr = transfer->sr;
struct pipe_box *box = &transfer->box;
enum pipe_format format = resource->format;
unsigned offset;
char *map;
- if (resource->target == PIPE_TEXTURE_CUBE) {
- offset = tex->image_offset[sr.level][sr.face];
- } else if (resource->target == PIPE_TEXTURE_3D) {
- offset = tex->image_offset[sr.level][box->z];
- } else {
- offset = tex->image_offset[sr.level][0];
- assert(sr.face == 0);
+ if (resource->target != PIPE_TEXTURE_3D &&
+ resource->target != PIPE_TEXTURE_CUBE)
assert(box->z == 0);
- }
+ offset = i915_texture_offset(tex, transfer->level, box->z);
map = iws->buffer_map(iws, tex->buffer,
(transfer->usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE);
@@ -754,7 +798,6 @@ i915_texture_create(struct pipe_screen *screen,
struct i915_screen *is = i915_screen(screen);
struct i915_winsys *iws = is->iws;
struct i915_texture *tex = CALLOC_STRUCT(i915_texture);
- size_t tex_size;
unsigned buf_usage = 0;
if (!tex)
@@ -773,8 +816,6 @@ i915_texture_create(struct pipe_screen *screen,
goto fail;
}
- tex_size = tex->stride * tex->total_nblocksy;
-
/* for scanouts and cursors, cursors arn't scanouts */
/* XXX: use a custom flag for cursors, don't rely on magically
@@ -785,27 +826,15 @@ i915_texture_create(struct pipe_screen *screen,
else
buf_usage = I915_NEW_TEXTURE;
- tex->buffer = iws->buffer_create(iws, tex_size, 64, buf_usage);
+ tex->buffer = iws->buffer_create_tiled(iws, &tex->stride, tex->total_nblocksy,
+ &tex->tiling, buf_usage);
if (!tex->buffer)
goto fail;
- /* setup any hw fences */
- if (tex->tiling) {
- iws->buffer_set_fence_reg(iws, tex->buffer, tex->stride, tex->tiling);
- }
-
-
-#if 0
- void *ptr = ws->buffer_map(ws, tex->buffer,
- PIPE_BUFFER_USAGE_CPU_WRITE);
- memset(ptr, 0x80, tex_size);
- ws->buffer_unmap(ws, tex->buffer);
-#endif
-
- I915_DBG(DBG_TEXTURE, "%s: %p size %u, stride %u, blocks (%u, %u)\n", __func__,
- tex, (unsigned int)tex_size, tex->stride,
+ I915_DBG(DBG_TEXTURE, "%s: %p stride %u, blocks (%u, %u) tiling %s\n", __func__,
+ tex, tex->stride,
tex->stride / util_format_get_blocksize(tex->b.b.format),
- tex->total_nblocksy);
+ tex->total_nblocksy, get_tiling_string(tex->tiling));
return &tex->b.b;
@@ -824,10 +853,11 @@ i915_texture_from_handle(struct pipe_screen * screen,
struct i915_winsys *iws = is->iws;
struct i915_winsys_buffer *buffer;
unsigned stride;
+ enum i915_winsys_buffer_tile tiling;
assert(screen);
- buffer = iws->buffer_from_handle(iws, whandle, &stride);
+ buffer = iws->buffer_from_handle(iws, whandle, &tiling, &stride);
/* Only supports one type */
if ((template->target != PIPE_TEXTURE_2D &&
@@ -847,6 +877,7 @@ i915_texture_from_handle(struct pipe_screen * screen,
tex->b.b.screen = screen;
tex->stride = stride;
+ tex->tiling = tiling;
tex->total_nblocksy = align_nblocksy(tex->b.b.format, tex->b.b.height0, 8);
i915_texture_set_level_info(tex, 0, 1);
@@ -854,10 +885,10 @@ i915_texture_from_handle(struct pipe_screen * screen,
tex->buffer = buffer;
- I915_DBG(DBG_TEXTURE, "%s: %p stride %u, blocks (%ux%u)\n", __func__,
+ I915_DBG(DBG_TEXTURE, "%s: %p stride %u, blocks (%u, %u) tiling %s\n", __func__,
tex, tex->stride,
tex->stride / util_format_get_blocksize(tex->b.b.format),
- tex->total_nblocksy);
+ tex->total_nblocksy, get_tiling_string(tex->tiling));
return &tex->b.b;
}
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index a3c5113800..bdbc08e808 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -35,7 +35,6 @@
#include "i915_debug.h"
#include "i915_context.h"
#include "i915_screen.h"
-#include "i915_surface.h"
#include "i915_resource.h"
#include "i915_winsys.h"
#include "i915_public.h"
@@ -387,7 +386,6 @@ i915_screen_create(struct i915_winsys *iws)
is->base.fence_finish = i915_fence_finish;
i915_init_screen_resource_functions(is);
- i915_init_screen_surface_functions(is);
i915_debug_init(is);
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index bbfcff6bc4..f5b60ed594 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -760,8 +760,9 @@ static void i915_set_vertex_buffers(struct pipe_context *pipe,
*/
draw_flush(i915->draw);
- memcpy(i915->vertex_buffer, buffers, count * sizeof(buffers[0]));
- i915->num_vertex_buffers = count;
+ util_copy_vertex_buffers(i915->vertex_buffer,
+ &i915->num_vertex_buffers,
+ buffers, count);
/* pass-through to draw module */
draw_set_vertex_buffers(i915->draw, count, buffers);
diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c
index 49dff1f775..86c0297649 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -86,6 +86,22 @@ framebuffer_size(const struct pipe_framebuffer_state *fb,
}
}
+static inline uint32_t
+buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling)
+{
+ uint32_t tiling_bits = 0;
+
+ switch (tiling) {
+ case I915_TILE_Y:
+ tiling_bits |= BUF_3D_TILE_WALK_Y;
+ case I915_TILE_X:
+ tiling_bits |= BUF_3D_TILED_SURFACE;
+ case I915_TILE_NONE:
+ break;
+ }
+
+ return tiling_bits;
+}
/* Push the state into the sarea and/or texture memory.
*/
@@ -126,7 +142,7 @@ i915_emit_hardware_state(struct i915_context *i915 )
save_relocs = i915->batch->relocs;
/* 14 dwords, 0 relocs */
- if (i915->hardware_dirty & I915_HW_INVARIENT)
+ if (i915->hardware_dirty & I915_HW_INVARIANT)
{
OUT_BATCH(_3DSTATE_AA_CMD |
AA_LINE_ECAAR_WIDTH_ENABLE |
@@ -220,7 +236,6 @@ i915_emit_hardware_state(struct i915_context *i915 )
struct pipe_surface *depth_surface = i915->framebuffer.zsbuf;
if (cbuf_surface) {
- unsigned ctile = BUF_3D_USE_FENCE;
struct i915_texture *tex = i915_texture(cbuf_surface->texture);
assert(tex);
@@ -228,30 +243,32 @@ i915_emit_hardware_state(struct i915_context *i915 )
OUT_BATCH(BUF_3D_ID_COLOR_BACK |
BUF_3D_PITCH(tex->stride) | /* pitch in bytes */
- ctile);
+ buf_3d_tiling_bits(tex->tiling));
OUT_RELOC(tex->buffer,
I915_USAGE_RENDER,
- cbuf_surface->offset);
+ 0);
}
/* What happens if no zbuf??
*/
if (depth_surface) {
- unsigned ztile = BUF_3D_USE_FENCE;
struct i915_texture *tex = i915_texture(depth_surface->texture);
+ unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level,
+ depth_surface->u.tex.first_layer);
assert(tex);
+ assert(offset == 0);
OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
assert(tex);
OUT_BATCH(BUF_3D_ID_DEPTH |
BUF_3D_PITCH(tex->stride) | /* pitch in bytes */
- ztile);
+ buf_3d_tiling_bits(tex->tiling));
OUT_RELOC(tex->buffer,
I915_USAGE_RENDER,
- depth_surface->offset);
+ 0);
}
{
@@ -293,12 +310,11 @@ i915_emit_hardware_state(struct i915_context *i915 )
if (enabled & (1 << unit)) {
struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture);
struct i915_winsys_buffer *buf = texture->buffer;
- uint offset = 0;
assert(buf);
count++;
- OUT_RELOC(buf, I915_USAGE_SAMPLER, offset);
+ OUT_RELOC(buf, I915_USAGE_SAMPLER, 0);
OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */
OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */
}
@@ -391,18 +407,33 @@ i915_emit_hardware_state(struct i915_context *i915 )
#if 01
/* drawing surface size */
/* 6 dwords, 0 relocs */
+ if (i915->hardware_dirty & I915_HW_STATIC)
{
uint w, h;
- boolean k = framebuffer_size(&i915->framebuffer, &w, &h);
- (void)k;
- assert(k);
+ struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
+ struct i915_texture *tex = i915_texture(cbuf_surface->texture);
+ unsigned x, y;
+ int layer;
+ uint32_t draw_offset;
+ boolean ret;
+
+ ret = framebuffer_size(&i915->framebuffer, &w, &h);
+ assert(ret);
+ layer = cbuf_surface->u.tex.first_layer;
+
+ x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx;
+ y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy;
+
+ draw_offset = x | (y << 16);
+
+ /* XXX flush only required when the draw_offset changes! */
+ OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(((w - 1) & 0xffff) | ((h - 1) << 16));
- OUT_BATCH(0);
- OUT_BATCH(0);
+ OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS);
+ OUT_BATCH(draw_offset);
+ OUT_BATCH((w - 1 + x) | ((h - 1 + y) << 16));
+ OUT_BATCH(draw_offset);
}
#endif
diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c
index 9771274ca1..916cb76753 100644
--- a/src/gallium/drivers/i915/i915_state_sampler.c
+++ b/src/gallium/drivers/i915/i915_state_sampler.c
@@ -243,6 +243,23 @@ static uint translate_texture_format(enum pipe_format pipeFormat)
}
}
+static inline uint32_t
+ms3_tiling_bits(enum i915_winsys_buffer_tile tiling)
+{
+ uint32_t tiling_bits = 0;
+
+ switch (tiling) {
+ case I915_TILE_Y:
+ tiling_bits |= MS3_TILE_WALK_Y;
+ case I915_TILE_X:
+ tiling_bits |= MS3_TILED_SURFACE;
+ case I915_TILE_NONE:
+ break;
+ }
+
+ return tiling_bits;
+}
+
static void update_map(struct i915_context *i915,
uint unit,
const struct i915_texture *tex,
@@ -254,7 +271,6 @@ static void update_map(struct i915_context *i915,
const uint width = pt->width0, height = pt->height0, depth = pt->depth0;
const uint num_levels = pt->last_level;
unsigned max_lod = num_levels * 4;
- unsigned tiled = MS3_USE_FENCE_REGS;
assert(tex);
assert(width);
@@ -272,7 +288,7 @@ static void update_map(struct i915_context *i915,
(((height - 1) << MS3_HEIGHT_SHIFT)
| ((width - 1) << MS3_WIDTH_SHIFT)
| format
- | tiled);
+ | ms3_tiling_bits(tex->tiling));
/*
* XXX When min_filter != mag_filter and there's just one mipmap level,
diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c
index f40876e708..becc6e93c2 100644
--- a/src/gallium/drivers/i915/i915_surface.c
+++ b/src/gallium/drivers/i915/i915_surface.c
@@ -43,11 +43,10 @@
*/
static void
i915_surface_copy(struct pipe_context *pipe,
- struct pipe_resource *dst, struct pipe_subresource subdst,
+ struct pipe_resource *dst, unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
- struct pipe_resource *src, struct pipe_subresource subsrc,
- unsigned srcx, unsigned srcy, unsigned srcz,
- unsigned width, unsigned height)
+ struct pipe_resource *src, unsigned src_level,
+ const struct pipe_box *src_box)
{
struct i915_texture *dst_tex = i915_texture(dst);
struct i915_texture *src_tex = i915_texture(src);
@@ -55,29 +54,17 @@ i915_surface_copy(struct pipe_context *pipe,
struct pipe_resource *spt = &src_tex->b.b;
unsigned dst_offset, src_offset; /* in bytes */
- if (dst->target == PIPE_TEXTURE_CUBE) {
- dst_offset = dst_tex->image_offset[subdst.level][subdst.face];
- }
- else if (dst->target == PIPE_TEXTURE_3D) {
- dst_offset = dst_tex->image_offset[subdst.level][dstz];
- }
- else {
- dst_offset = dst_tex->image_offset[subdst.level][0];
- assert(subdst.face == 0);
+ /* XXX cannot copy 3d regions at this time */
+ assert(src_box->depth == 1);
+ if (dst->target != PIPE_TEXTURE_CUBE &&
+ dst->target != PIPE_TEXTURE_3D)
assert(dstz == 0);
- }
- if (src->target == PIPE_TEXTURE_CUBE) {
- src_offset = src_tex->image_offset[subsrc.level][subsrc.face];
- }
- else if (src->target == PIPE_TEXTURE_3D) {
- src_offset = src_tex->image_offset[subsrc.level][srcz];
- }
- else {
- src_offset = src_tex->image_offset[subsrc.level][0];
- assert(subsrc.face == 0);
- assert(srcz == 0);
- }
+ dst_offset = i915_texture_offset(dst_tex, dst_level, dstz);
+ if (src->target != PIPE_TEXTURE_CUBE &&
+ src->target != PIPE_TEXTURE_3D)
+ assert(src_box->z == 0);
+ src_offset = i915_texture_offset(src_tex, src_level, src_box->z);
assert( dst != src );
assert( util_format_get_blocksize(dpt->format) == util_format_get_blocksize(spt->format) );
@@ -90,7 +77,8 @@ i915_surface_copy(struct pipe_context *pipe,
util_format_get_blocksize(dpt->format),
(unsigned short) src_tex->stride, src_tex->buffer, src_offset,
(unsigned short) dst_tex->stride, dst_tex->buffer, dst_offset,
- (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height );
+ (short) src_box->x, (short) src_box->y, (short) dstx, (short) dsty,
+ (short) src_box->width, (short) src_box->height );
}
@@ -104,6 +92,7 @@ i915_clear_render_target(struct pipe_context *pipe,
struct i915_texture *tex = i915_texture(dst->texture);
struct pipe_resource *pt = &tex->b.b;
union util_color uc;
+ unsigned offset = i915_texture_offset(tex, dst->u.tex.level, dst->u.tex.first_layer);
assert(util_format_get_blockwidth(pt->format) == 1);
assert(util_format_get_blockheight(pt->format) == 1);
@@ -113,7 +102,7 @@ i915_clear_render_target(struct pipe_context *pipe,
util_format_get_blocksize(pt->format),
XY_COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB,
(unsigned short) tex->stride,
- tex->buffer, dst->offset,
+ tex->buffer, offset,
(short) dstx, (short) dsty,
(short) width, (short) height,
uc.ui );
@@ -132,6 +121,7 @@ i915_clear_depth_stencil(struct pipe_context *pipe,
struct pipe_resource *pt = &tex->b.b;
unsigned packedds;
unsigned mask = 0;
+ unsigned offset = i915_texture_offset(tex, dst->u.tex.level, dst->u.tex.first_layer);
assert(util_format_get_blockwidth(pt->format) == 1);
assert(util_format_get_blockheight(pt->format) == 1);
@@ -151,7 +141,7 @@ i915_clear_depth_stencil(struct pipe_context *pipe,
util_format_get_blocksize(pt->format),
mask,
(unsigned short) tex->stride,
- tex->buffer, dst->offset,
+ tex->buffer, offset,
(short) dstx, (short) dsty,
(short) width, (short) height,
packedds );
@@ -163,42 +153,37 @@ i915_clear_depth_stencil(struct pipe_context *pipe,
static struct pipe_surface *
-i915_get_tex_surface(struct pipe_screen *screen,
- struct pipe_resource *pt,
- unsigned face, unsigned level, unsigned zslice,
- unsigned flags)
+i915_create_surface(struct pipe_context *ctx,
+ struct pipe_resource *pt,
+ const struct pipe_surface *surf_tmpl)
{
- struct i915_texture *tex = i915_texture(pt);
struct pipe_surface *ps;
- unsigned offset; /* in bytes */
- if (pt->target == PIPE_TEXTURE_CUBE) {
- offset = tex->image_offset[level][face];
- }
- else if (pt->target == PIPE_TEXTURE_3D) {
- offset = tex->image_offset[level][zslice];
- }
- else {
- offset = tex->image_offset[level][0];
- assert(face == 0);
- assert(zslice == 0);
- }
+ assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
+ if (pt->target != PIPE_TEXTURE_CUBE &&
+ pt->target != PIPE_TEXTURE_3D)
+ assert(surf_tmpl->u.tex.first_layer == 0);
ps = CALLOC_STRUCT(pipe_surface);
if (ps) {
+ /* could subclass pipe_surface and store offset as it used to do */
pipe_reference_init(&ps->reference, 1);
pipe_resource_reference(&ps->texture, pt);
- ps->format = pt->format;
- ps->width = u_minify(pt->width0, level);
- ps->height = u_minify(pt->height0, level);
- ps->offset = offset;
- ps->usage = flags;
+ ps->format = surf_tmpl->format;
+ ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level);
+ ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level);
+ ps->u.tex.level = surf_tmpl->u.tex.level;
+ ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+ ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+ ps->usage = surf_tmpl->usage;
+ ps->context = ctx;
}
return ps;
}
static void
-i915_tex_surface_destroy(struct pipe_surface *surf)
+i915_surface_destroy(struct pipe_context *ctx,
+ struct pipe_surface *surf)
{
pipe_resource_reference(&surf->texture, NULL);
FREE(surf);
@@ -211,13 +196,6 @@ i915_init_surface_functions(struct i915_context *i915)
i915->base.resource_copy_region = i915_surface_copy;
i915->base.clear_render_target = i915_clear_render_target;
i915->base.clear_depth_stencil = i915_clear_depth_stencil;
-}
-
-/* No good reason for these to be in the screen.
- */
-void
-i915_init_screen_surface_functions(struct i915_screen *is)
-{
- is->base.get_tex_surface = i915_get_tex_surface;
- is->base.tex_surface_destroy = i915_tex_surface_destroy;
+ i915->base.create_surface = i915_create_surface;
+ i915->base.surface_destroy = i915_surface_destroy;
}
diff --git a/src/gallium/drivers/i915/i915_surface.h b/src/gallium/drivers/i915/i915_surface.h
index 448106d566..70b61de80f 100644
--- a/src/gallium/drivers/i915/i915_surface.h
+++ b/src/gallium/drivers/i915/i915_surface.h
@@ -32,7 +32,6 @@ struct i915_context;
struct i915_screen;
void i915_init_surface_functions( struct i915_context *i915 );
-void i915_init_screen_surface_functions( struct i915_screen *is );
#endif /* I915_SCREEN_H */
diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h
index 5385e403d2..24ea416f01 100644
--- a/src/gallium/drivers/i915/i915_winsys.h
+++ b/src/gallium/drivers/i915/i915_winsys.h
@@ -53,6 +53,7 @@ enum i915_winsys_buffer_type
I915_NEW_VERTEX
};
+/* These need to be in sync with the definitions of libdrm-intel! */
enum i915_winsys_buffer_tile
{
I915_TILE_NONE,
@@ -106,7 +107,7 @@ struct i915_winsys {
int (*batchbuffer_reloc)(struct i915_winsys_batchbuffer *batch,
struct i915_winsys_buffer *reloc,
enum i915_winsys_buffer_usage usage,
- unsigned offset);
+ unsigned offset, bool fenced);
/**
* Flush a bufferbatch.
@@ -130,10 +131,24 @@ struct i915_winsys {
*/
struct i915_winsys_buffer *
(*buffer_create)(struct i915_winsys *iws,
- unsigned size, unsigned alignment,
+ unsigned size,
enum i915_winsys_buffer_type type);
/**
+ * Create a tiled buffer.
+ *
+ * *stride, height are in bytes. The winsys tries to allocate the buffer with
+ * the tiling mode provide in *tiling. If tiling is no possible, *tiling will
+ * be set to I915_TILE_NONE. The calculated stride (incorporateing hw/kernel
+ * requirements) is always returned in *stride.
+ */
+ struct i915_winsys_buffer *
+ (*buffer_create_tiled)(struct i915_winsys *iws,
+ unsigned *stride, unsigned height,
+ enum i915_winsys_buffer_tile *tiling,
+ enum i915_winsys_buffer_type type);
+
+ /**
* Creates a buffer from a handle.
* Used to implement pipe_screen::resource_from_handle.
* Also provides the stride information needed for the
@@ -142,6 +157,7 @@ struct i915_winsys {
struct i915_winsys_buffer *
(*buffer_from_handle)(struct i915_winsys *iws,
struct winsys_handle *whandle,
+ enum i915_winsys_buffer_tile *tiling,
unsigned *stride);
/**
@@ -154,15 +170,6 @@ struct i915_winsys {
unsigned stride);
/**
- * Fence a buffer with a fence reg.
- * Not to be confused with pipe_fence_handle.
- */
- int (*buffer_set_fence_reg)(struct i915_winsys *iws,
- struct i915_winsys_buffer *buffer,
- unsigned stride,
- enum i915_winsys_buffer_tile tile);
-
- /**
* Map a buffer.
*/
void *(*buffer_map)(struct i915_winsys *iws,
diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index b0b0970338..a0331f8058 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -33,6 +33,7 @@ C_SOURCES = \
brw_pipe_flush.c \
brw_pipe_misc.c \
brw_pipe_sampler.c \
+ brw_pipe_surface.c \
brw_pipe_vertex.c \
brw_pipe_clear.c \
brw_pipe_rast.c \
@@ -66,7 +67,6 @@ C_SOURCES = \
brw_resource_buffer.c \
brw_resource_texture.c \
brw_resource_texture_layout.c \
- brw_screen_surface.c \
brw_batchbuffer.c \
brw_winsys_debug.c \
intel_decode.c
diff --git a/src/gallium/drivers/i965/SConscript b/src/gallium/drivers/i965/SConscript
index 019af682f6..3ef6c88030 100644
--- a/src/gallium/drivers/i965/SConscript
+++ b/src/gallium/drivers/i965/SConscript
@@ -36,6 +36,8 @@ i965 = env.ConvenienceLibrary(
'brw_pipe_query.c',
'brw_pipe_rast.c',
'brw_pipe_sampler.c',
+ 'brw_pipe_surface.c',
+ 'brw_pipe_surface.c',
'brw_pipe_shader.c',
'brw_pipe_vertex.c',
'brw_resource.c',
@@ -43,7 +45,6 @@ i965 = env.ConvenienceLibrary(
'brw_resource_texture.c',
'brw_resource_texture_layout.c',
'brw_screen.c',
- 'brw_screen_surface.c',
'brw_structs_dump.c',
'brw_sf.c',
'brw_sf_emit.c',
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index e80067f3b1..3c93579246 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -64,13 +64,11 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch)
}
struct brw_batchbuffer *
-brw_batchbuffer_alloc(struct brw_winsys_screen *sws,
- struct brw_chipset chipset)
+brw_batchbuffer_alloc(struct brw_winsys_screen *sws)
{
struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer);
batch->sws = sws;
- batch->chipset = chipset;
brw_batchbuffer_reset(batch);
return batch;
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
index 6ca9f617f5..6ecb91857d 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.h
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -26,7 +26,6 @@ struct brw_batchbuffer {
struct brw_winsys_screen *sws;
struct brw_winsys_buffer *buf;
- struct brw_chipset chipset;
/**
* Values exported to speed up the writing the batchbuffer,
@@ -47,8 +46,8 @@ struct brw_batchbuffer {
/*@}*/
};
-struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws,
- struct brw_chipset chipset );
+struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws );
+
void brw_batchbuffer_free(struct brw_batchbuffer *batch);
diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
index ccba205e8c..66b13ea58e 100644
--- a/src/gallium/drivers/i965/brw_clip.c
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -66,16 +66,14 @@ compile_clip_prog( struct brw_context *brw,
c.func.single_program_flow = 1;
- c.chipset = brw->chipset;
c.key = *key;
- c.need_ff_sync = c.chipset.is_igdng;
/* Need to locate the two positions present in vertex + header.
* These are currently hardcoded:
*/
c.header_position_offset = ATTR_SIZE;
- if (c.chipset.is_igdng)
+ if (brw->gen == 5)
delta = 3 * REG_SIZE;
else
delta = REG_SIZE;
@@ -97,7 +95,7 @@ compile_clip_prog( struct brw_context *brw,
if (c.key.output_edgeflag != BRW_OUTPUT_NOT_PRESENT)
c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
c.nr_regs = (c.key.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
else
c.nr_regs = (c.key.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h
index 80e3a11a37..f123b73c06 100644
--- a/src/gallium/drivers/i965/brw_clip.h
+++ b/src/gallium/drivers/i965/brw_clip.h
@@ -125,12 +125,10 @@ struct brw_clip_compile {
GLuint last_tmp;
GLboolean need_direction;
- struct brw_chipset chipset;
GLuint last_mrf;
GLuint header_position_offset;
- GLboolean need_ff_sync;
GLuint nr_color_attrs;
GLuint offset_color0;
diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c
index 66caadc4d5..4ed7362171 100644
--- a/src/gallium/drivers/i965/brw_clip_line.c
+++ b/src/gallium/drivers/i965/brw_clip_line.c
@@ -32,6 +32,7 @@
#include "util/u_debug.h"
#include "brw_defines.h"
+#include "brw_context.h"
#include "brw_eu.h"
#include "brw_clip.h"
@@ -41,7 +42,7 @@
static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
{
GLuint i = 0,j;
-
+ struct brw_context *brw = c->func.brw;
/* Register usage is static, precompute here:
*/
c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
@@ -79,7 +80,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
i++;
}
- if (c->need_ff_sync) {
+ if (brw->needs_ff_sync) {
c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
i++;
}
@@ -120,6 +121,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
static void clip_and_emit_line( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
struct brw_indirect vtx0 = brw_indirect(0, 0);
struct brw_indirect vtx1 = brw_indirect(1, 0);
struct brw_indirect newvtx0 = brw_indirect(2, 0);
@@ -146,7 +148,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_clip_init_clipmask(c);
/* -ve rhw workaround */
- if (c->chipset.is_965) {
+ if (brw->has_negative_rhw_bug) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
@@ -183,7 +185,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
* Both can be negative on GM965/G965 due to RHW workaround
* if so, this object should be rejected.
*/
- if (c->chipset.is_965) {
+ if (brw->has_negative_rhw_bug) {
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
is_neg2 = brw_IF(p, BRW_EXECUTE_1);
{
@@ -208,7 +210,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
/* If both are positive, do nothing */
/* Only on GM965/G965 */
- if (c->chipset.is_965) {
+ if (brw->has_negative_rhw_bug) {
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
is_neg2 = brw_IF(p, BRW_EXECUTE_1);
}
@@ -223,7 +225,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
- if (c->chipset.is_965) {
+ if (brw->has_negative_rhw_bug) {
brw_ENDIF(p, is_neg2);
}
}
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
index 5c3ccfd8d0..f56edf3177 100644
--- a/src/gallium/drivers/i965/brw_clip_state.c
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -109,7 +109,7 @@ clip_unit_create_from_key(struct brw_context *brw,
/* Although up to 16 concurrent Clip threads are allowed on IGDNG,
* only 2 threads can output VUEs at a time.
*/
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
clip.thread4.max_threads = 16 - 1;
else
clip.thread4.max_threads = 2 - 1;
@@ -134,7 +134,7 @@ clip_unit_create_from_key(struct brw_context *brw,
clip.clip5.api_mode = BRW_CLIP_API_OGL;
clip.clip5.clip_mode = key->clip_mode;
- if (BRW_IS_G4X(brw))
+ if (brw->is_g4x)
clip.clip5.negative_w_clip_test = 1;
clip.clip6.clipper_viewport_state_ptr = 0;
diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c
index 069524bc14..7d400e6028 100644
--- a/src/gallium/drivers/i965/brw_clip_tri.c
+++ b/src/gallium/drivers/i965/brw_clip_tri.c
@@ -30,6 +30,7 @@
*/
#include "brw_defines.h"
+#include "brw_context.h"
#include "brw_eu.h"
#include "brw_clip.h"
@@ -43,6 +44,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
GLuint nr_verts )
{
GLuint i = 0,j;
+ struct brw_context *brw = c->func.brw;
/* Register usage is static, precompute here:
*/
@@ -69,7 +71,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
for (j = 0; j < 3; j++) {
GLuint delta = c->key.nr_attrs*16 + 32;
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
delta = c->key.nr_attrs * 16 + 32 * 3;
brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
@@ -110,7 +112,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
i++;
}
- if (c->need_ff_sync) {
+ if (brw->needs_ff_sync) {
c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
i++;
}
@@ -563,7 +565,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
/* if -ve rhw workaround bit is set,
do cliptest */
- if (c->chipset.is_965) {
+ if (p->brw->has_negative_rhw_bug) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c
index 23e51ee9bc..5713f25da7 100644
--- a/src/gallium/drivers/i965/brw_clip_util.c
+++ b/src/gallium/drivers/i965/brw_clip_util.c
@@ -31,6 +31,7 @@
#include "brw_defines.h"
+#include "brw_context.h"
#include "brw_eu.h"
#include "brw_clip.h"
@@ -126,6 +127,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
GLboolean force_edgeflag)
{
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
struct brw_reg tmp = get_tmp(c);
GLuint i;
@@ -142,7 +144,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
for (i = 0; i < c->key.nr_attrs; i++) {
GLuint delta = i*16 + 32;
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
delta = i * 16 + 32 * 3;
if (delta == c->offset_edgeflag) {
@@ -176,7 +178,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
if (i & 1) {
GLuint delta = i*16 + 32;
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
delta = i * 16 + 32 * 3;
brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
@@ -350,7 +352,8 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
void brw_clip_ff_sync(struct brw_clip_compile *c)
{
- if (c->need_ff_sync) {
+ struct brw_context *brw = c->func.brw;
+ if (brw->needs_ff_sync) {
struct brw_compile *p = &c->func;
struct brw_instruction *need_ff_sync;
@@ -379,7 +382,8 @@ void brw_clip_ff_sync(struct brw_clip_compile *c)
void brw_clip_init_ff_sync(struct brw_clip_compile *c)
{
- if (c->need_ff_sync) {
+ struct brw_context *brw = c->func.brw;
+ if (brw->needs_ff_sync) {
struct brw_compile *p = &c->func;
brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index 227bc790de..41a468a32f 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -107,7 +107,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen,
void *priv)
{
struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context);
-
+ struct brw_screen *brs = brw_screen(screen);
if (!brw) {
debug_printf("%s: failed to alloc context\n", __FUNCTION__);
return NULL;
@@ -117,7 +117,10 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen,
brw->base.priv = priv;
brw->base.destroy = brw_destroy_context;
brw->sws = brw_screen(screen)->sws;
- brw->chipset = brw_screen(screen)->chipset;
+ brw->is_g4x = brs->is_g4x;
+ brw->needs_ff_sync = brs->needs_ff_sync;
+ brw->has_negative_rhw_bug = brs->has_negative_rhw_bug;
+ brw->gen = brs->gen;
brw_init_resource_functions( brw );
brw_pipe_blend_init( brw );
@@ -131,6 +134,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen,
brw_pipe_shader_init( brw );
brw_pipe_vertex_init( brw );
brw_pipe_clear_init( brw );
+ brw_pipe_surface_init( brw );
brw_hw_cc_init( brw );
@@ -144,7 +148,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen,
make_empty_list(&brw->query.active_head);
- brw->batch = brw_batchbuffer_alloc( brw->sws, brw->chipset );
+ brw->batch = brw_batchbuffer_alloc( brw->sws );
if (brw->batch == NULL)
goto fail;
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 56d351f97d..45fc26dd7d 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -529,7 +529,14 @@ struct brw_query_object {
struct brw_context
{
struct pipe_context base;
- struct brw_chipset chipset;
+ int gen;
+ boolean has_negative_rhw_bug;
+ boolean needs_ff_sync;
+ boolean is_g4x;
+
+ int urb_size;
+ int vs_max_threads;
+ int wm_max_threads;
struct brw_winsys_screen *sws;
@@ -821,6 +828,7 @@ void brw_pipe_sampler_cleanup( struct brw_context *brw );
void brw_pipe_shader_cleanup( struct brw_context *brw );
void brw_pipe_vertex_cleanup( struct brw_context *brw );
void brw_pipe_clear_cleanup( struct brw_context *brw );
+void brw_pipe_surface_init( struct brw_context *brw );
void brw_hw_cc_init( struct brw_context *brw );
void brw_hw_cc_cleanup( struct brw_context *brw );
@@ -853,11 +861,5 @@ brw_context( struct pipe_context *ctx )
return (struct brw_context *)ctx;
}
-
-#define BRW_IS_965(brw) ((brw)->chipset.is_965)
-#define BRW_IS_IGDNG(brw) ((brw)->chipset.is_igdng)
-#define BRW_IS_G4X(brw) ((brw)->chipset.is_g4x)
-
-
#endif
diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h
index e201ce4d7c..7547eae97c 100644
--- a/src/gallium/drivers/i965/brw_defines.h
+++ b/src/gallium/drivers/i965/brw_defines.h
@@ -463,6 +463,13 @@
#define BRW_COMPRESSION_2NDHALF 1
#define BRW_COMPRESSION_COMPRESSED 2
+#define GEN6_COMPRESSION_1Q 0
+#define GEN6_COMPRESSION_2Q 1
+#define GEN6_COMPRESSION_3Q 2
+#define GEN6_COMPRESSION_4Q 3
+#define GEN6_COMPRESSION_1H 0
+#define GEN6_COMPRESSION_2H 2
+
#define BRW_CONDITIONAL_NONE 0
#define BRW_CONDITIONAL_Z 1
#define BRW_CONDITIONAL_NZ 2
@@ -502,6 +509,27 @@
#define BRW_MASK_ENABLE 0
#define BRW_MASK_DISABLE 1
+/** @{
+ *
+ * Gen6 has replaced "mask enable/disable" with WECtrl, which is
+ * effectively the same but much simpler to think about. Now, there
+ * are two contributors ANDed together to whether channels are
+ * executed: The predication on the instruction, and the channel write
+ * enable.
+ */
+/**
+ * This is the default value. It means that a channel's write enable is set
+ * if the per-channel IP is pointing at this instruction.
+ */
+#define BRW_WE_NORMAL 0
+/**
+ * This is used like BRW_MASK_DISABLE, and causes all channels to have
+ * their write enable set. Note that predication still contributes to
+ * whether the channel actually gets written.
+ */
+#define BRW_WE_ALL 1
+/** @} */
+
#define BRW_OPCODE_MOV 1
#define BRW_OPCODE_SEL 2
#define BRW_OPCODE_NOT 4
@@ -531,6 +559,8 @@
#define BRW_OPCODE_POP 47
#define BRW_OPCODE_WAIT 48
#define BRW_OPCODE_SEND 49
+#define BRW_OPCODE_SENDC 50
+#define BRW_OPCODE_MATH 56
#define BRW_OPCODE_ADD 64
#define BRW_OPCODE_MUL 65
#define BRW_OPCODE_AVG 66
@@ -550,6 +580,7 @@
#define BRW_OPCODE_DP2 87
#define BRW_OPCODE_DPA2 88
#define BRW_OPCODE_LINE 89
+#define BRW_OPCODE_PLN 90
#define BRW_OPCODE_NOP 126
#define BRW_PREDICATE_NONE 0
@@ -599,6 +630,8 @@
#define BRW_ARF_NOTIFICATION_COUNT 0x90
#define BRW_ARF_IP 0xA0
+#define BRW_MRF_COMPR4 (1 << 7)
+
#define BRW_AMASK 0
#define BRW_IMASK 1
#define BRW_LMASK 2
@@ -645,13 +678,14 @@
#define BRW_POLYGON_FACING_BACK 1
#define BRW_MESSAGE_TARGET_NULL 0
-#define BRW_MESSAGE_TARGET_MATH 1
+#define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */
#define BRW_MESSAGE_TARGET_SAMPLER 2
#define BRW_MESSAGE_TARGET_GATEWAY 3
-#define BRW_MESSAGE_TARGET_DATAPORT_READ 4
-#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5
+#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 /* sampler cache on GEN6 */
+#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 /* render cache on Gen6 */
#define BRW_MESSAGE_TARGET_URB 6
#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7
+#define BRW_MESSAGE_TARGET_CONST_CACHE 9 /* GEN6 */
#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
@@ -674,20 +708,15 @@
#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3
-
-/* for IGDNG only */
+#define BRW_SAMPLER_MESSAGE_SAMPLE_GEN5 0
+#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5 1
+#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_GEN5 2
+#define BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5 3
+#define BRW_SAMPLER_MESSAGE_SAMPLE_DERIVS_GEN5 4
+#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5 5
+#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE_GEN5 6
+
+/* for GEN5 only */
#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
#define BRW_SAMPLER_SIMD_MODE_SIMD8 1
#define BRW_SAMPLER_SIMD_MODE_SIMD16 2
@@ -705,10 +734,24 @@
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+/* This one stays the same across generations. */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+/* GEN4 */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
-#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
+#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2
#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+/* G45, GEN5 */
+#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
+#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
+#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3
+#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
+#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
+/* GEN6 */
+#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
+#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5
+#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
@@ -728,6 +771,16 @@
#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+/* GEN6 */
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE_GEN6 7
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE_GEN6 8
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE_GEN6 9
+#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE_GEN6 10
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORLD_SCATTERED_WRITE_GEN6 11
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6 12
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE_GEN6 13
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE_GEN6 14
+
#define BRW_MATH_FUNCTION_INV 1
#define BRW_MATH_FUNCTION_LOG 2
#define BRW_MATH_FUNCTION_EXP 3
@@ -736,7 +789,8 @@
#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */
#define BRW_MATH_FUNCTION_COS 7 /* was 8 */
#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */
-#define BRW_MATH_FUNCTION_TAN 9
+#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */
+#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */
#define BRW_MATH_FUNCTION_POW 10
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
@@ -787,17 +841,33 @@
#define CMD_PIPELINED_STATE_POINTERS 0x7800
#define CMD_BINDING_TABLE_PTRS 0x7801
+# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8)
+# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9)
+# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12)
+
+#define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */
+# define PS_SAMPLER_STATE_CHANGE (1 << 12)
+# define GS_SAMPLER_STATE_CHANGE (1 << 9)
+# define VS_SAMPLER_STATE_CHANGE (1 << 8)
+/* DW1: VS */
+/* DW2: GS */
+/* DW3: PS */
#define CMD_VERTEX_BUFFER 0x7808
# define BRW_VB0_INDEX_SHIFT 27
+# define GEN6_VB0_INDEX_SHIFT 26
# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26)
# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26)
+# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20)
+# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20)
# define BRW_VB0_PITCH_SHIFT 0
#define CMD_VERTEX_ELEMENT 0x7809
# define BRW_VE0_INDEX_SHIFT 27
+# define GEN6_VE0_INDEX_SHIFT 26
# define BRW_VE0_FORMAT_SHIFT 16
# define BRW_VE0_VALID (1 << 26)
+# define GEN6_VE0_VALID (1 << 25)
# define BRW_VE0_SRC_OFFSET_SHIFT 0
# define BRW_VE1_COMPONENT_NOSTORE 0
# define BRW_VE1_COMPONENT_STORE_SRC 1
@@ -816,6 +886,236 @@
#define CMD_INDEX_BUFFER 0x780a
#define CMD_VF_STATISTICS_965 0x780b
#define CMD_VF_STATISTICS_GM45 0x680b
+#define CMD_3D_CC_STATE_POINTERS 0x780e /* GEN6+ */
+
+#define CMD_URB 0x7805 /* GEN6+ */
+# define GEN6_URB_VS_SIZE_SHIFT 16
+# define GEN6_URB_VS_ENTRIES_SHIFT 0
+# define GEN6_URB_GS_ENTRIES_SHIFT 8
+# define GEN6_URB_GS_SIZE_SHIFT 0
+
+#define CMD_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */
+# define GEN6_CC_VIEWPORT_MODIFY (1 << 12)
+# define GEN6_SF_VIEWPORT_MODIFY (1 << 11)
+# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10)
+
+#define CMD_3D_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */
+
+#define CMD_3D_VS_STATE 0x7810 /* GEN6+ */
+/* DW2 */
+# define GEN6_VS_SPF_MODE (1 << 31)
+# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_VS_SAMPLER_COUNT_SHIFT 27
+# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16)
+/* DW4 */
+# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20
+# define GEN6_VS_URB_READ_LENGTH_SHIFT 11
+# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW5 */
+# define GEN6_VS_MAX_THREADS_SHIFT 25
+# define GEN6_VS_STATISTICS_ENABLE (1 << 10)
+# define GEN6_VS_CACHE_DISABLE (1 << 1)
+# define GEN6_VS_ENABLE (1 << 0)
+
+#define CMD_3D_GS_STATE 0x7811 /* GEN6+ */
+/* DW2 */
+# define GEN6_GS_SPF_MODE (1 << 31)
+# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_GS_SAMPLER_COUNT_SHIFT 27
+# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+/* DW4 */
+# define GEN6_GS_URB_READ_LENGTH_SHIFT 11
+# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4
+# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0
+/* DW5 */
+# define GEN6_GS_MAX_THREADS_SHIFT 25
+# define GEN6_GS_STATISTICS_ENABLE (1 << 10)
+# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9)
+# define GEN6_GS_RENDERING_ENABLE (1 << 8)
+/* DW6 */
+# define GEN6_GS_ENABLE (1 << 15)
+
+#define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */
+/* DW1 */
+# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10)
+/**
+ * Just does cheap culling based on the clip distance. Bits must be
+ * disjoint with USER_CLIP_CLIP_DISTANCE bits.
+ */
+# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT 0
+/* DW2 */
+# define GEN6_CLIP_ENABLE (1 << 31)
+# define GEN6_CLIP_API_OGL (0 << 30)
+# define GEN6_CLIP_API_D3D (1 << 30)
+# define GEN6_CLIP_XY_TEST (1 << 28)
+# define GEN6_CLIP_Z_TEST (1 << 27)
+# define GEN6_CLIP_GB_TEST (1 << 26)
+/** 8-bit field of which user clip distances to clip aganist. */
+# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT 16
+# define GEN6_CLIP_MODE_NORMAL (0 << 13)
+# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13)
+# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13)
+# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9)
+# define GEN6_CLIP_BARYCENTRIC_ENABLE (1 << 8)
+# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4
+# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2
+# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0
+/* DW3 */
+# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17
+# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6
+# define GEN6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5)
+
+#define CMD_3D_SF_STATE 0x7813 /* GEN6+ */
+/* DW1 */
+# define GEN6_SF_NUM_OUTPUTS_SHIFT 22
+# define GEN6_SF_SWIZZLE_ENABLE (1 << 21)
+# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20)
+# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
+# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW2 */
+# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11)
+# define GEN6_SF_STATISTICS_ENABLE (1 << 10)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7)
+# define GEN6_SF_FRONT_SOLID (0 << 5)
+# define GEN6_SF_FRONT_WIREFRAME (1 << 5)
+# define GEN6_SF_FRONT_POINT (2 << 5)
+# define GEN6_SF_BACK_SOLID (0 << 3)
+# define GEN6_SF_BACK_WIREFRAME (1 << 3)
+# define GEN6_SF_BACK_POINT (2 << 3)
+# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1)
+# define GEN6_SF_WINDING_CCW (1 << 0)
+/* DW3 */
+# define GEN6_SF_LINE_AA_ENABLE (1 << 31)
+# define GEN6_SF_CULL_BOTH (0 << 29)
+# define GEN6_SF_CULL_NONE (1 << 29)
+# define GEN6_SF_CULL_FRONT (2 << 29)
+# define GEN6_SF_CULL_BACK (3 << 29)
+# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */
+# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16)
+# define GEN6_SF_SCISSOR_ENABLE (1 << 11)
+# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8)
+# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8)
+# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8)
+# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8)
+/* DW4 */
+# define GEN6_SF_TRI_PROVOKE_SHIFT 29
+# define GEN6_SF_LINE_PROVOKE_SHIFT 27
+# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25
+# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14)
+# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14)
+# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12)
+# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12)
+# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11)
+# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */
+/* DW5: depth offset constant */
+/* DW6: depth offset scale */
+/* DW7: depth offset clamp */
+/* DW8 */
+# define ATTRIBUTE_1_OVERRIDE_W (1 << 31)
+# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30)
+# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29)
+# define ATTRIBUTE_1_OVERRIDE_X (1 << 28)
+# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25
+# define ATTRIBUTE_1_SWIZZLE_SHIFT 22
+# define ATTRIBUTE_1_SOURCE_SHIFT 16
+# define ATTRIBUTE_0_OVERRIDE_W (1 << 15)
+# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14)
+# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13)
+# define ATTRIBUTE_0_OVERRIDE_X (1 << 12)
+# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9
+# define ATTRIBUTE_0_SWIZZLE_SHIFT 6
+# define ATTRIBUTE_0_SOURCE_SHIFT 0
+
+# define ATTRIBUTE_SWIZZLE_INPUTATTR 0
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3
+# define ATTRIBUTE_SWIZZLE_SHIFT 6
+
+/* DW16: Point sprite texture coordinate enables */
+/* DW17: Constant interpolation enables */
+/* DW18: attr 0-7 wrap shortest enables */
+/* DW19: attr 8-16 wrap shortest enables */
+
+#define CMD_3D_WM_STATE 0x7814 /* GEN6+ */
+/* DW1: kernel pointer */
+/* DW2 */
+# define GEN6_WM_SPF_MODE (1 << 31)
+# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_WM_SAMPLER_COUNT_SHIFT 27
+# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16)
+/* DW3: scratch space */
+/* DW4 */
+# define GEN6_WM_STATISTICS_ENABLE (1 << 31)
+# define GEN6_WM_DEPTH_CLEAR (1 << 30)
+# define GEN6_WM_DEPTH_RESOLVE (1 << 28)
+# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27)
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0
+/* DW5 */
+# define GEN6_WM_MAX_THREADS_SHIFT 25
+# define GEN6_WM_KILL_ENABLE (1 << 22)
+# define GEN6_WM_COMPUTED_DEPTH (1 << 21)
+# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20)
+# define GEN6_WM_DISPATCH_ENABLE (1 << 19)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16)
+# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14)
+# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13)
+# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 11)
+# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9)
+# define GEN6_WM_USES_SOURCE_W (1 << 8)
+# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
+# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2)
+# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1)
+# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0)
+/* DW6 */
+# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20
+# define GEN6_WM_POSOFFSET_NONE (0 << 18)
+# define GEN6_WM_POSOFFSET_CENTROID (2 << 18)
+# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18)
+# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16)
+# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16)
+# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16)
+# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
+# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
+# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
+# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
+# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
+# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
+# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9)
+# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1)
+# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1)
+# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1)
+# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1)
+# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0)
+/* DW7: kernel 1 pointer */
+/* DW8: kernel 2 pointer */
+
+#define CMD_3D_CONSTANT_VS_STATE 0x7815 /* GEN6+ */
+#define CMD_3D_CONSTANT_GS_STATE 0x7816 /* GEN6+ */
+#define CMD_3D_CONSTANT_PS_STATE 0x7817 /* GEN6+ */
+# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15)
+# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14)
+# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13)
+# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12)
+
+#define CMD_3D_SAMPLE_MASK 0x7818 /* GEN6+ */
#define CMD_DRAW_RECT 0x7900
#define CMD_BLEND_CONSTANT_COLOR 0x7901
@@ -827,6 +1127,25 @@
#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
#define CMD_AA_LINE_PARAMETERS 0x790a
+#define CMD_GS_SVB_INDEX 0x790b /* CTG+ */
+/* DW1 */
+# define SVB_INDEX_SHIFT 29
+# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */
+/* DW2: SVB index */
+/* DW3: SVB maximum index */
+
+#define CMD_3D_MULTISAMPLE 0x790d /* SNB+ */
+/* DW1 */
+# define MS_PIXEL_LOCATION_CENTER (0 << 4)
+# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
+# define MS_NUMSAMPLES_1 (0 << 1)
+# define MS_NUMSAMPLES_4 (2 << 1)
+# define MS_NUMSAMPLES_8 (3 << 1)
+
+#define CMD_3D_CLEAR_PARAMS 0x7910 /* ILK+ */
+# define DEPTH_CLEAR_VALID (1 << 15)
+/* DW1: depth clear value */
+
#define CMD_PIPE_CONTROL 0x7a00
#define CMD_3D_PRIM 0x7b00
@@ -839,8 +1158,8 @@
#define R02_PRIM_END 0x1
#define R02_PRIM_START 0x2
-#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \
- (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */
+#define URB_SIZES(brw) (brw->gen == 5 ? 1024 : \
+ (brw->is_g4x ? 384 : 256)) /* 512 bit units */
diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c
index 28c83515ba..b093569f0c 100644
--- a/src/gallium/drivers/i965/brw_disasm.c
+++ b/src/gallium/drivers/i965/brw_disasm.c
@@ -49,12 +49,14 @@ struct {
[BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
@@ -69,13 +71,14 @@ struct {
[BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
[BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
- [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
@@ -144,7 +147,6 @@ char *chan_sel[4] = {
};
char *dest_condmod[16] = {
- [0] = NULL
};
char *debug_ctrl[2] = {
@@ -157,6 +159,16 @@ char *saturate[2] = {
[1] = ".sat"
};
+char *accwr[2] = {
+ [0] = "",
+ [1] = "AccWrEnable"
+};
+
+char *wectrl[2] = {
+ [0] = "WE_normal",
+ [1] = "WE_all"
+};
+
char *exec_size[8] = {
[0] = "1",
[1] = "2",
@@ -204,6 +216,7 @@ char *compr_ctrl[4] = {
[0] = "",
[1] = "sechalf",
[2] = "compr",
+ [3] = "compr4",
};
char *dep_ctrl[4] = {
@@ -233,6 +246,16 @@ char *reg_encoding[8] = {
[7] = "F"
};
+int reg_type_size[8] = {
+ [0] = 4,
+ [1] = 4,
+ [2] = 2,
+ [3] = 2,
+ [4] = 1,
+ [5] = 1,
+ [7] = 4
+};
+
char *imm_encoding[8] = {
[0] = "UD",
[1] = "D",
@@ -321,6 +344,11 @@ char *math_precision[2] = {
[1] = "partial_precision"
};
+char *urb_opcode[2] = {
+ [0] = "urb_write",
+ [1] = "ff_sync",
+};
+
char *urb_swizzle[4] = {
[BRW_URB_SWIZZLE_NONE] = "",
[BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
@@ -416,6 +444,11 @@ static int print_opcode (FILE *file, int id)
static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
{
int err = 0;
+
+ /* Clear the Compr4 instruction compression bit. */
+ if (_reg_file == BRW_MESSAGE_REGISTER_FILE)
+ _reg_nr &= ~(1 << 7);
+
if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
switch (_reg_nr & 0xf0) {
case BRW_ARF_NULL:
@@ -427,6 +460,9 @@ static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
case BRW_ARF_ACCUMULATOR:
format (file, "acc%d", _reg_nr & 0x0f);
break;
+ case BRW_ARF_FLAG:
+ format (file, "f%d", _reg_nr & 0x0f);
+ break;
case BRW_ARF_MASK:
format (file, "mask%d", _reg_nr & 0x0f);
break;
@@ -457,7 +493,7 @@ static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
return err;
}
-static int dest (FILE *file, const struct brw_instruction *inst)
+static int dest (FILE *file, struct brw_instruction *inst)
{
int err = 0;
@@ -469,7 +505,8 @@ static int dest (FILE *file, const struct brw_instruction *inst)
if (err == -1)
return 0;
if (inst->bits1.da1.dest_subreg_nr)
- format (file, ".%d", inst->bits1.da1.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.da1.dest_subreg_nr /
+ reg_type_size[inst->bits1.da1.dest_reg_type]);
format (file, "<%d>", inst->bits1.da1.dest_horiz_stride);
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
}
@@ -477,7 +514,8 @@ static int dest (FILE *file, const struct brw_instruction *inst)
{
string (file, "g[a0");
if (inst->bits1.ia1.dest_subreg_nr)
- format (file, ".%d", inst->bits1.ia1.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.ia1.dest_subreg_nr /
+ reg_type_size[inst->bits1.ia1.dest_reg_type]);
if (inst->bits1.ia1.dest_indirect_offset)
format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
string (file, "]");
@@ -493,7 +531,8 @@ static int dest (FILE *file, const struct brw_instruction *inst)
if (err == -1)
return 0;
if (inst->bits1.da16.dest_subreg_nr)
- format (file, ".%d", inst->bits1.da16.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.da16.dest_subreg_nr /
+ reg_type_size[inst->bits1.da16.dest_reg_type]);
string (file, "<1>");
err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
@@ -534,7 +573,7 @@ static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
if (err == -1)
return 0;
if (sub_reg_num)
- format (file, ".%d", sub_reg_num);
+ format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */
src_align1_region (file, _vert_stride, _width, _horiz_stride);
err |= control (file, "src reg encoding", reg_encoding, type, NULL);
return err;
@@ -588,11 +627,12 @@ static int src_da16 (FILE *file,
if (err == -1)
return 0;
if (_subreg_nr)
- format (file, ".%d", _subreg_nr);
+ /* bit4 for subreg number byte addressing. Make this same meaning as
+ in da1 case, so output looks consistent. */
+ format (file, ".%d", 16 / reg_type_size[_reg_type]);
string (file, "<");
err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
- string (file, ",1,1>");
- err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+ string (file, ",4,1>");
/*
* Three kinds of swizzle display:
* identity - nothing printed
@@ -619,11 +659,12 @@ static int src_da16 (FILE *file,
err |= control (file, "channel select", chan_sel, swz_z, NULL);
err |= control (file, "channel select", chan_sel, swz_w, NULL);
}
+ err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
return err;
}
-static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) {
+static int imm (FILE *file, GLuint type, struct brw_instruction *inst) {
switch (type) {
case BRW_REGISTER_TYPE_UD:
format (file, "0x%08xUD", inst->bits3.ud);
@@ -652,7 +693,7 @@ static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) {
return 0;
}
-static int src0 (FILE *file, const struct brw_instruction *inst)
+static int src0 (FILE *file, struct brw_instruction *inst)
{
if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
return imm (file, inst->bits1.da1.src0_reg_type,
@@ -712,7 +753,7 @@ static int src0 (FILE *file, const struct brw_instruction *inst)
}
}
-static int src1 (FILE *file, const struct brw_instruction *inst)
+static int src1 (FILE *file, struct brw_instruction *inst)
{
if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
return imm (file, inst->bits1.da1.src1_reg_type,
@@ -772,7 +813,7 @@ static int src1 (FILE *file, const struct brw_instruction *inst)
}
}
-int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
+int brw_disasm_insn (FILE *file, struct brw_instruction *inst, int gen)
{
int err = 0;
int space = 0;
@@ -822,7 +863,8 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
err |= src1 (file, inst);
}
- if (inst->header.opcode == BRW_OPCODE_SEND) {
+ if (inst->header.opcode == BRW_OPCODE_SEND ||
+ inst->header.opcode == BRW_OPCODE_SENDC) {
newline (file);
pad (file, 16);
space = 0;
@@ -842,24 +884,70 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
inst->bits3.math.precision, &space);
break;
case BRW_MESSAGE_TARGET_SAMPLER:
- format (file, " (%d, %d, ",
- inst->bits3.sampler.binding_table_index,
- inst->bits3.sampler.sampler);
- err |= control (file, "sampler target format", sampler_target_format,
- inst->bits3.sampler.return_format, NULL);
- string (file, ")");
+ if (gen >= 5) {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.sampler_gen5.binding_table_index,
+ inst->bits3.sampler_gen5.sampler,
+ inst->bits3.sampler_gen5.msg_type,
+ inst->bits3.sampler_gen5.simd_mode);
+ } else if (0 /* FINISHME: is_g4x */) {
+ format (file, " (%d, %d)",
+ inst->bits3.sampler_g4x.binding_table_index,
+ inst->bits3.sampler_g4x.sampler);
+ } else {
+ format (file, " (%d, %d, ",
+ inst->bits3.sampler.binding_table_index,
+ inst->bits3.sampler.sampler);
+ err |= control (file, "sampler target format", sampler_target_format,
+ inst->bits3.sampler.return_format, NULL);
+ string (file, ")");
+ }
+ break;
+ case BRW_MESSAGE_TARGET_DATAPORT_READ:
+ if (gen >= 6) {
+ format (file, " (%d, %d, %d, %d, %d, %d)",
+ inst->bits3.dp_render_cache.binding_table_index,
+ inst->bits3.dp_render_cache.msg_control,
+ inst->bits3.dp_render_cache.msg_type,
+ inst->bits3.dp_render_cache.send_commit_msg,
+ inst->bits3.dp_render_cache.msg_length,
+ inst->bits3.dp_render_cache.response_length);
+ } else if (gen >= 5 /* FINISHME: || is_g4x */) {
+ format (file, " (%d, %d, %d)",
+ inst->bits3.dp_read_gen5.binding_table_index,
+ inst->bits3.dp_read_gen5.msg_control,
+ inst->bits3.dp_read_gen5.msg_type);
+ } else {
+ format (file, " (%d, %d, %d)",
+ inst->bits3.dp_read.binding_table_index,
+ inst->bits3.dp_read.msg_control,
+ inst->bits3.dp_read.msg_type);
+ }
break;
case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
- format (file, " (%d, %d, %d, %d)",
- inst->bits3.dp_write.binding_table_index,
- (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
- inst->bits3.dp_write.msg_control,
- inst->bits3.dp_write.msg_type,
- inst->bits3.dp_write.send_commit_msg);
+ if (gen >= 6) {
+ format (file, " (%d, %d, %d, %d, %d, %d)",
+ inst->bits3.dp_render_cache.binding_table_index,
+ inst->bits3.dp_render_cache.msg_control,
+ inst->bits3.dp_render_cache.msg_type,
+ inst->bits3.dp_render_cache.send_commit_msg,
+ inst->bits3.dp_render_cache.msg_length,
+ inst->bits3.dp_render_cache.response_length);
+ } else {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.dp_write.binding_table_index,
+ (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
+ inst->bits3.dp_write.msg_control,
+ inst->bits3.dp_write.msg_type,
+ inst->bits3.dp_write.send_commit_msg);
+ }
break;
case BRW_MESSAGE_TARGET_URB:
- format (file, " %d", inst->bits3.urb.offset);
- space = 1;
+ if (gen >= 5) {
+ format (file, " %d", inst->bits3.urb_gen5.offset);
+ } else {
+ format (file, " %d", inst->bits3.urb.offset);
+ }
err |= control (file, "urb swizzle", urb_swizzle,
inst->bits3.urb.swizzle_control, &space);
err |= control (file, "urb allocate", urb_allocate,
@@ -868,6 +956,11 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
inst->bits3.urb.used, &space);
err |= control (file, "urb complete", urb_complete,
inst->bits3.urb.complete, &space);
+ if (gen >= 5) {
+ format (file, " mlen %d, rlen %d\n",
+ inst->bits3.urb_gen5.msg_length,
+ inst->bits3.urb_gen5.response_length);
+ }
break;
case BRW_MESSAGE_TARGET_THREAD_SPAWNER:
break;
@@ -877,10 +970,17 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
}
if (space)
string (file, " ");
- format (file, "mlen %d",
- inst->bits3.generic.msg_length);
- format (file, " rlen %d",
- inst->bits3.generic.response_length);
+ if (gen >= 5) {
+ format (file, "mlen %d",
+ inst->bits3.generic_gen5.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic_gen5.response_length);
+ } else {
+ format (file, "mlen %d",
+ inst->bits3.generic.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic.response_length);
+ }
}
pad (file, 64);
if (inst->header.opcode != BRW_OPCODE_NOP) {
@@ -891,7 +991,8 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space);
err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
- if (inst->header.opcode == BRW_OPCODE_SEND)
+ if (inst->header.opcode == BRW_OPCODE_SEND ||
+ inst->header.opcode == BRW_OPCODE_SENDC)
err |= control (file, "end of thread", end_of_thread,
inst->bits3.generic.end_of_thread, &space);
if (space)
@@ -905,13 +1006,13 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
int brw_disasm (FILE *file,
- const struct brw_instruction *inst,
- unsigned count)
+ struct brw_instruction *inst,
+ unsigned count, int gen)
{
int i, err;
for (i = 0; i < count; i++) {
- err = brw_disasm_insn(stderr, &inst[i]);
+ err = brw_disasm_insn(stderr, &inst[i], gen);
if (err)
return err;
}
diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h
index ba5b109c48..ce451ed5a0 100644
--- a/src/gallium/drivers/i965/brw_disasm.h
+++ b/src/gallium/drivers/i965/brw_disasm.h
@@ -27,10 +27,10 @@
struct brw_instruction;
-int brw_disasm_insn (FILE *file, const struct brw_instruction *inst);
+int brw_disasm_insn (FILE *file, struct brw_instruction *inst, int gen);
int brw_disasm (FILE *file,
- const struct brw_instruction *inst,
- unsigned count);
+ struct brw_instruction *inst,
+ unsigned count, int gen);
#endif
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index ebeb1e146a..04ec5c81a6 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -89,13 +89,16 @@ static int brw_prepare_vertices(struct brw_context *brw)
vb->buffer->width0 - vb->buffer_offset :
MAX2(vb->buffer->width0 - vb->buffer_offset,
vb->stride * (max_index + 1 - min_index)));
+ boolean flushed;
- ret = u_upload_buffer( brw->vb.upload_vertex,
+ ret = u_upload_buffer( brw->vb.upload_vertex,
+ 0,
vb->buffer_offset + min_index * vb->stride,
size,
vb->buffer,
&offset,
- &upload_buf );
+ &upload_buf,
+ &flushed );
if (ret)
return ret;
@@ -167,7 +170,7 @@ static int brw_emit_vertex_buffers( struct brw_context *brw )
OUT_RELOC(brw->vb.vb[i].bo,
BRW_USAGE_VERTEX,
brw->vb.vb[i].offset);
- if (BRW_IS_IGDNG(brw)) {
+ if (brw->gen == 5) {
OUT_RELOC(brw->vb.vb[i].bo,
BRW_USAGE_VERTEX,
brw->vb.vb[i].bo->size - 1);
@@ -251,13 +254,16 @@ static int brw_prepare_indices(struct brw_context *brw)
/* Turn userbuffer into a proper hardware buffer?
*/
if (brw_buffer_is_user_buffer(index_buffer)) {
+ boolean flushed;
ret = u_upload_buffer( brw->vb.upload_index,
+ 0,
index_offset,
ib_size,
index_buffer,
&offset,
- &upload_buf );
+ &upload_buf,
+ &flushed );
if (ret)
return ret;
diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c
index 00d8eaccbc..ba1159e4c3 100644
--- a/src/gallium/drivers/i965/brw_eu_emit.c
+++ b/src/gallium/drivers/i965/brw_eu_emit.c
@@ -255,19 +255,19 @@ static void brw_set_math_message( struct brw_context *brw,
{
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
- insn->bits3.math_igdng.function = function;
- insn->bits3.math_igdng.int_type = integer_type;
- insn->bits3.math_igdng.precision = low_precision;
- insn->bits3.math_igdng.saturate = saturate;
- insn->bits3.math_igdng.data_type = dataType;
- insn->bits3.math_igdng.snapshot = 0;
- insn->bits3.math_igdng.header_present = 0;
- insn->bits3.math_igdng.response_length = response_length;
- insn->bits3.math_igdng.msg_length = msg_length;
- insn->bits3.math_igdng.end_of_thread = 0;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
- insn->bits2.send_igdng.end_of_thread = 0;
+ if (brw->gen == 5) {
+ insn->bits3.math_gen5.function = function;
+ insn->bits3.math_gen5.int_type = integer_type;
+ insn->bits3.math_gen5.precision = low_precision;
+ insn->bits3.math_gen5.saturate = saturate;
+ insn->bits3.math_gen5.data_type = dataType;
+ insn->bits3.math_gen5.snapshot = 0;
+ insn->bits3.math_gen5.header_present = 0;
+ insn->bits3.math_gen5.response_length = response_length;
+ insn->bits3.math_gen5.msg_length = msg_length;
+ insn->bits3.math_gen5.end_of_thread = 0;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
+ insn->bits2.send_gen5.end_of_thread = 0;
} else {
insn->bits3.math.function = function;
insn->bits3.math.int_type = integer_type;
@@ -295,18 +295,18 @@ static void brw_set_ff_sync_message( struct brw_context *brw,
{
brw_set_src1(insn, brw_imm_d(0));
- insn->bits3.urb_igdng.opcode = 1;
- insn->bits3.urb_igdng.offset = offset;
- insn->bits3.urb_igdng.swizzle_control = swizzle_control;
- insn->bits3.urb_igdng.allocate = allocate;
- insn->bits3.urb_igdng.used = used;
- insn->bits3.urb_igdng.complete = complete;
- insn->bits3.urb_igdng.header_present = 1;
- insn->bits3.urb_igdng.response_length = response_length;
- insn->bits3.urb_igdng.msg_length = msg_length;
- insn->bits3.urb_igdng.end_of_thread = end_of_thread;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
- insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ insn->bits3.urb_gen5.opcode = 1;
+ insn->bits3.urb_gen5.offset = offset;
+ insn->bits3.urb_gen5.swizzle_control = swizzle_control;
+ insn->bits3.urb_gen5.allocate = allocate;
+ insn->bits3.urb_gen5.used = used;
+ insn->bits3.urb_gen5.complete = complete;
+ insn->bits3.urb_gen5.header_present = 1;
+ insn->bits3.urb_gen5.response_length = response_length;
+ insn->bits3.urb_gen5.msg_length = msg_length;
+ insn->bits3.urb_gen5.end_of_thread = end_of_thread;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_gen5.end_of_thread = end_of_thread;
}
static void brw_set_urb_message( struct brw_context *brw,
@@ -322,19 +322,19 @@ static void brw_set_urb_message( struct brw_context *brw,
{
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
- insn->bits3.urb_igdng.opcode = 0; /* ? */
- insn->bits3.urb_igdng.offset = offset;
- insn->bits3.urb_igdng.swizzle_control = swizzle_control;
- insn->bits3.urb_igdng.allocate = allocate;
- insn->bits3.urb_igdng.used = used; /* ? */
- insn->bits3.urb_igdng.complete = complete;
- insn->bits3.urb_igdng.header_present = 1;
- insn->bits3.urb_igdng.response_length = response_length;
- insn->bits3.urb_igdng.msg_length = msg_length;
- insn->bits3.urb_igdng.end_of_thread = end_of_thread;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
- insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ if (brw->gen == 5) {
+ insn->bits3.urb_gen5.opcode = 0; /* ? */
+ insn->bits3.urb_gen5.offset = offset;
+ insn->bits3.urb_gen5.swizzle_control = swizzle_control;
+ insn->bits3.urb_gen5.allocate = allocate;
+ insn->bits3.urb_gen5.used = used; /* ? */
+ insn->bits3.urb_gen5.complete = complete;
+ insn->bits3.urb_gen5.header_present = 1;
+ insn->bits3.urb_gen5.response_length = response_length;
+ insn->bits3.urb_gen5.msg_length = msg_length;
+ insn->bits3.urb_gen5.end_of_thread = end_of_thread;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_gen5.end_of_thread = end_of_thread;
} else {
insn->bits3.urb.opcode = 0; /* ? */
insn->bits3.urb.offset = offset;
@@ -361,18 +361,18 @@ static void brw_set_dp_write_message( struct brw_context *brw,
{
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
- insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
- insn->bits3.dp_write_igdng.msg_control = msg_control;
- insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
- insn->bits3.dp_write_igdng.msg_type = msg_type;
- insn->bits3.dp_write_igdng.send_commit_msg = 0;
- insn->bits3.dp_write_igdng.header_present = 1;
- insn->bits3.dp_write_igdng.response_length = response_length;
- insn->bits3.dp_write_igdng.msg_length = msg_length;
- insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
- insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ if (brw->gen == 5) {
+ insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
+ insn->bits3.dp_write_gen5.msg_control = msg_control;
+ insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_write_gen5.msg_type = msg_type;
+ insn->bits3.dp_write_gen5.send_commit_msg = 0;
+ insn->bits3.dp_write_gen5.header_present = 1;
+ insn->bits3.dp_write_gen5.response_length = response_length;
+ insn->bits3.dp_write_gen5.msg_length = msg_length;
+ insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits2.send_gen5.end_of_thread = end_of_thread;
} else {
insn->bits3.dp_write.binding_table_index = binding_table_index;
insn->bits3.dp_write.msg_control = msg_control;
@@ -398,18 +398,18 @@ static void brw_set_dp_read_message( struct brw_context *brw,
{
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
- insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
- insn->bits3.dp_read_igdng.msg_control = msg_control;
- insn->bits3.dp_read_igdng.msg_type = msg_type;
- insn->bits3.dp_read_igdng.target_cache = target_cache;
- insn->bits3.dp_read_igdng.header_present = 1;
- insn->bits3.dp_read_igdng.response_length = response_length;
- insn->bits3.dp_read_igdng.msg_length = msg_length;
- insn->bits3.dp_read_igdng.pad1 = 0;
- insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
- insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ if (brw->gen == 5) {
+ insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
+ insn->bits3.dp_read_gen5.msg_control = msg_control;
+ insn->bits3.dp_read_gen5.msg_type = msg_type;
+ insn->bits3.dp_read_gen5.target_cache = target_cache;
+ insn->bits3.dp_read_gen5.header_present = 1;
+ insn->bits3.dp_read_gen5.response_length = response_length;
+ insn->bits3.dp_read_gen5.msg_length = msg_length;
+ insn->bits3.dp_read_gen5.pad1 = 0;
+ insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
+ insn->bits2.send_gen5.end_of_thread = end_of_thread;
} else {
insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
@@ -437,18 +437,18 @@ static void brw_set_sampler_message(struct brw_context *brw,
assert(eot == 0);
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
- insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
- insn->bits3.sampler_igdng.sampler = sampler;
- insn->bits3.sampler_igdng.msg_type = msg_type;
- insn->bits3.sampler_igdng.simd_mode = simd_mode;
- insn->bits3.sampler_igdng.header_present = header_present;
- insn->bits3.sampler_igdng.response_length = response_length;
- insn->bits3.sampler_igdng.msg_length = msg_length;
- insn->bits3.sampler_igdng.end_of_thread = eot;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
- insn->bits2.send_igdng.end_of_thread = eot;
- } else if (BRW_IS_G4X(brw)) {
+ if (brw->gen == 5) {
+ insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
+ insn->bits3.sampler_gen5.sampler = sampler;
+ insn->bits3.sampler_gen5.msg_type = msg_type;
+ insn->bits3.sampler_gen5.simd_mode = simd_mode;
+ insn->bits3.sampler_gen5.header_present = header_present;
+ insn->bits3.sampler_gen5.response_length = response_length;
+ insn->bits3.sampler_gen5.msg_length = msg_length;
+ insn->bits3.sampler_gen5.end_of_thread = eot;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
+ insn->bits2.send_gen5.end_of_thread = eot;
+ } else if (brw->is_g4x) {
insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
insn->bits3.sampler_g4x.sampler = sampler;
insn->bits3.sampler_g4x.msg_type = msg_type;
@@ -478,7 +478,7 @@ static struct brw_instruction *next_insn( struct brw_compile *p,
if (0 && (BRW_DEBUG & DEBUG_DISASSEM))
{
if (p->nr_insn)
- brw_disasm_insn(stderr, &p->store[p->nr_insn-1]);
+ brw_disasm_insn(stderr, &p->store[p->nr_insn-1], p->brw->gen);
}
assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
@@ -658,7 +658,7 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p,
struct brw_instruction *insn;
GLuint br = 1;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
br = 2;
if (p->single_program_flow) {
@@ -699,7 +699,7 @@ void brw_ENDIF(struct brw_compile *p,
{
GLuint br = 1;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
br = 2;
if (p->single_program_flow) {
@@ -813,7 +813,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *insn;
GLuint br = 1;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
br = 2;
if (p->single_program_flow)
@@ -856,7 +856,7 @@ void brw_land_fwd_jump(struct brw_compile *p,
struct brw_instruction *landing = &p->store[p->nr_insn];
GLuint jmpi = 1;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
jmpi = 2;
assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c
index 06826635a8..2a8165b83e 100644
--- a/src/gallium/drivers/i965/brw_gs.c
+++ b/src/gallium/drivers/i965/brw_gs.c
@@ -51,13 +51,13 @@ static enum pipe_error compile_gs_prog( struct brw_context *brw,
memset(&c, 0, sizeof(c));
c.key = *key;
- c.need_ff_sync = BRW_IS_IGDNG(brw);
+ c.need_ff_sync = brw->gen == 5;
/* Need to locate the two positions present in vertex + header.
* These are currently hardcoded:
*/
c.nr_attrs = c.key.nr_attrs;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
else
c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c
index b64ec286ce..6e070f6d75 100644
--- a/src/gallium/drivers/i965/brw_gs_state.c
+++ b/src/gallium/drivers/i965/brw_gs_state.c
@@ -103,7 +103,7 @@ gs_unit_create_from_key(struct brw_context *brw,
else
gs.thread4.max_threads = 0;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
gs.thread4.rendering_enable = 1;
if (BRW_DEBUG & DEBUG_STATS)
diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index b5029ceb69..d53ce6ccfd 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -239,7 +239,7 @@ static int prepare_depthbuffer(struct brw_context *brw)
static int emit_depthbuffer(struct brw_context *brw)
{
struct pipe_surface *surface = brw->curr.fb.zsbuf;
- unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5;
+ unsigned int len = (brw->is_g4x || brw->gen == 5) ? 6 : 5;
if (surface == NULL) {
BEGIN_BATCH(len, IGNORE_CLIPRECTS);
@@ -250,7 +250,7 @@ static int emit_depthbuffer(struct brw_context *brw)
OUT_BATCH(0);
OUT_BATCH(0);
- if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ if (brw->is_g4x || brw->gen == 5)
OUT_BATCH(0);
ADVANCE_BATCH();
@@ -287,17 +287,18 @@ static int emit_depthbuffer(struct brw_context *brw)
OUT_BATCH(((pitch * cpp) - 1) |
(format << 18) |
(BRW_TILEWALK_YMAJOR << 26) |
- ((surface->layout != PIPE_SURFACE_LAYOUT_LINEAR) << 27) |
+ /* always linear ?
+ ((surface->layout != PIPE_SURFACE_LAYOUT_LINEAR) << 27) |*/
(BRW_SURFACE_2D << 29));
OUT_RELOC(bo,
BRW_USAGE_DEPTH_BUFFER,
- surface->offset);
+ brw_surface(surface)->offset);
OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
((pitch - 1) << 6) |
((surface->height - 1) << 19));
OUT_BATCH(0);
- if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ if (brw->is_g4x || brw->gen == 5)
OUT_BATCH(0);
ADVANCE_BATCH();
@@ -362,10 +363,10 @@ const struct brw_tracked_state brw_line_stipple = {
/***********************************************************************
- * Misc invarient state packets
+ * Misc invariant state packets
*/
-static int upload_invarient_state( struct brw_context *brw )
+static int upload_invariant_state( struct brw_context *brw )
{
{
/* 0x61040000 Pipeline Select */
@@ -373,7 +374,7 @@ static int upload_invarient_state( struct brw_context *brw )
struct brw_pipeline_select ps;
memset(&ps, 0, sizeof(ps));
- if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ if (brw->is_g4x || brw->gen == 5)
ps.header.opcode = CMD_PIPELINE_SELECT_GM45;
else
ps.header.opcode = CMD_PIPELINE_SELECT_965;
@@ -412,7 +413,7 @@ static int upload_invarient_state( struct brw_context *brw )
struct brw_vf_statistics vfs;
memset(&vfs, 0, sizeof(vfs));
- if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ if (brw->is_g4x || brw->gen == 5)
vfs.opcode = CMD_VF_STATISTICS_GM45;
else
vfs.opcode = CMD_VF_STATISTICS_965;
@@ -423,7 +424,7 @@ static int upload_invarient_state( struct brw_context *brw )
BRW_BATCH_STRUCT(brw, &vfs);
}
- if (!BRW_IS_965(brw))
+ if (!(brw->gen == 4))
{
struct brw_aa_line_parameters balp;
@@ -438,7 +439,7 @@ static int upload_invarient_state( struct brw_context *brw )
{
struct brw_polygon_stipple_offset bpso;
- /* This is invarient state in gallium:
+ /* This is invariant state in gallium:
*/
memset(&bpso, 0, sizeof(bpso));
bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
@@ -452,13 +453,13 @@ static int upload_invarient_state( struct brw_context *brw )
return 0;
}
-const struct brw_tracked_state brw_invarient_state = {
+const struct brw_tracked_state brw_invariant_state = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
- .emit = upload_invarient_state
+ .emit = upload_invariant_state
};
@@ -479,7 +480,7 @@ static int upload_state_base_address( struct brw_context *brw )
/* Output the structure (brw_state_base_address) directly to the
* batchbuffer, so we can emit relocations inline.
*/
- if (BRW_IS_IGDNG(brw)) {
+ if (brw->gen == 5) {
BEGIN_BATCH(8, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
OUT_BATCH(1); /* General state base address */
diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c
index d5cff338a6..7bf3ea6994 100644
--- a/src/gallium/drivers/i965/brw_pipe_clear.c
+++ b/src/gallium/drivers/i965/brw_pipe_clear.c
@@ -64,7 +64,7 @@ try_clear( struct brw_context *brw,
debug_printf("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
__FUNCTION__,
(void *)surface->bo, pitch * cpp,
- surface->base.offset,
+ surface->offset,
x1, y1, x2 - x1, y2 - y1);
BR13 = 0xf0 << 16;
@@ -99,7 +99,7 @@ try_clear( struct brw_context *brw,
OUT_BATCH((y2 << 16) | x2);
OUT_RELOC(surface->bo,
BRW_USAGE_BLIT_DEST,
- surface->base.offset);
+ surface->offset);
OUT_BATCH(value);
ADVANCE_BATCH();
diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c
index 4c1a6d7dcd..c86681d149 100644
--- a/src/gallium/drivers/i965/brw_pipe_rast.c
+++ b/src/gallium/drivers/i965/brw_pipe_rast.c
@@ -35,7 +35,7 @@ calculate_clip_key_rast( const struct brw_context *brw,
{
memset(key, 0, sizeof *key);
- if (brw->chipset.is_igdng)
+ if (brw->gen == 5)
key->clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
else
key->clip_mode = BRW_CLIPMODE_NORMAL;
diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_pipe_surface.c
index f288fdbcd3..58a610089e 100644
--- a/src/gallium/drivers/i965/brw_screen_surface.c
+++ b/src/gallium/drivers/i965/brw_pipe_surface.c
@@ -35,6 +35,7 @@
#include "pipe/p_screen.h"
#include "brw_screen.h"
+#include "brw_context.h"
#include "brw_defines.h"
#include "brw_resource.h"
#include "brw_winsys.h"
@@ -108,9 +109,10 @@ void brw_update_texture( struct brw_screen *brw_screen,
* where it would be illegal (perhaps due to tiling constraints) to do
* this in-place.
*
- * Currently not implmented, not sure if it's needed.
+ * Currently not implemented, not sure if it's needed.
*/
static struct brw_surface *create_linear_view( struct brw_screen *brw_screen,
+ struct pipe_context *pipe,
struct brw_texture *tex,
union brw_surface_id id,
unsigned usage )
@@ -123,9 +125,10 @@ static struct brw_surface *create_linear_view( struct brw_screen *brw_screen,
* texture's storage.
*/
static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen,
- struct brw_texture *tex,
- union brw_surface_id id,
- unsigned usage )
+ struct pipe_context *pipe,
+ struct brw_texture *tex,
+ union brw_surface_id id,
+ unsigned usage )
{
struct brw_surface *surface;
@@ -137,17 +140,18 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen,
/* XXX: ignoring render-to-slice-of-3d-texture
*/
- assert(id.bits.zslice == 0);
+ assert(tex->b.b.target != PIPE_TEXTURE_3D || id.bits.layer == 0);
+ surface->base.context = pipe;
surface->base.format = tex->b.b.format;
surface->base.width = u_minify(tex->b.b.width0, id.bits.level);
surface->base.height = u_minify(tex->b.b.height0, id.bits.level);
- surface->base.offset = tex->image_offset[id.bits.level][id.bits.face];
surface->base.usage = usage;
- surface->base.zslice = id.bits.zslice;
- surface->base.face = id.bits.face;
- surface->base.level = id.bits.level;
+ surface->base.u.tex.first_layer = id.bits.layer;
+ surface->base.u.tex.last_layer = surface->base.u.tex.first_layer;
+ surface->base.u.tex.level = id.bits.level;
surface->id = id;
+ surface->offset = tex->image_offset[id.bits.level][id.bits.layer];
surface->cpp = tex->cpp;
surface->pitch = tex->pitch;
surface->tiling = tex->tiling;
@@ -159,26 +163,21 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen,
surface->ss.ss0.surface_type = BRW_SURFACE_2D;
if (tex->tiling == BRW_TILING_NONE) {
- surface->ss.ss1.base_addr = surface->base.offset;
+ surface->ss.ss1.base_addr = surface->offset;
} else {
- uint32_t tile_offset = surface->base.offset % 4096;
-
- surface->ss.ss1.base_addr = surface->base.offset - tile_offset;
-
- if (brw_screen->chipset.is_g4x) {
- if (tex->tiling == BRW_TILING_X) {
- /* Note that the low bits of these fields are missing, so
- * there's the possibility of getting in trouble.
- */
- surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4;
- surface->ss.ss5.y_offset = tile_offset / 512 / 2;
- } else {
- surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4;
+ uint32_t tile_offset = surface->offset % 4096;
+
+ surface->ss.ss1.base_addr = surface->offset - tile_offset;
+
+ if (tex->tiling == BRW_TILING_X) {
+ /* Note that the low bits of these fields are missing, so
+ * there's the possibility of getting in trouble.
+ */
+ surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4;
+ surface->ss.ss5.y_offset = tile_offset / 512 / 2;
+ } else {
+ surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4;
surface->ss.ss5.y_offset = tile_offset / 128 / 2;
- }
- }
- else {
- assert(tile_offset == 0);
}
}
@@ -198,23 +197,21 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen,
/* Get a surface which is view into a texture
*/
-static struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen,
- struct pipe_resource *pt,
- unsigned face, unsigned level,
- unsigned zslice,
- unsigned usage )
+static struct pipe_surface *brw_create_surface(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_surface *surf_tmpl)
{
struct brw_texture *tex = brw_texture(pt);
- struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_screen *bscreen = brw_screen(pipe->screen);
struct brw_surface *surface;
union brw_surface_id id;
int type;
- id.bits.face = face;
- id.bits.level = level;
- id.bits.zslice = zslice;
+ assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
+ id.bits.level = surf_tmpl->u.tex.level;
+ id.bits.layer = surf_tmpl->u.tex.first_layer;
- if (need_linear_view(bscreen, tex, id, usage))
+ if (need_linear_view(bscreen, tex, id, surf_tmpl->usage))
type = BRW_VIEW_LINEAR;
else
type = BRW_VIEW_IN_PLACE;
@@ -227,10 +224,10 @@ static struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen,
switch (type) {
case BRW_VIEW_LINEAR:
- surface = create_linear_view( bscreen, tex, id, usage );
+ surface = create_linear_view( bscreen, pipe, tex, id, surf_tmpl->usage );
break;
case BRW_VIEW_IN_PLACE:
- surface = create_in_place_view( bscreen, tex, id, usage );
+ surface = create_in_place_view( bscreen, pipe, tex, id, surf_tmpl->usage );
break;
}
@@ -239,7 +236,8 @@ static struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen,
}
-static void brw_tex_surface_destroy( struct pipe_surface *surf )
+static void brw_surface_destroy( struct pipe_context *pipe,
+ struct pipe_surface *surf )
{
struct brw_surface *surface = brw_surface(surf);
@@ -249,13 +247,12 @@ static void brw_tex_surface_destroy( struct pipe_surface *surf )
bo_reference(&surface->bo, NULL);
pipe_resource_reference( &surface->base.texture, NULL );
-
FREE(surface);
}
-void brw_screen_tex_surface_init( struct brw_screen *brw_screen )
+void brw_pipe_surface_init( struct brw_context *brw )
{
- brw_screen->base.get_tex_surface = brw_get_tex_surface;
- brw_screen->base.tex_surface_destroy = brw_tex_surface_destroy;
+ brw->base.create_surface = brw_create_surface;
+ brw->base.surface_destroy = brw_surface_destroy;
}
diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
index 007239efc4..b23454b580 100644
--- a/src/gallium/drivers/i965/brw_pipe_vertex.c
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -203,7 +203,7 @@ static void brw_translate_vertex_elements(struct brw_context *brw,
brw_velems->ve[i].ve1.vfcomponent2 = comp2;
brw_velems->ve[i].ve1.vfcomponent3 = comp3;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
brw_velems->ve[i].ve1.dst_offset = 0;
else
brw_velems->ve[i].ve1.dst_offset = i * 4;
@@ -248,7 +248,6 @@ static void brw_set_vertex_buffers(struct pipe_context *pipe,
const struct pipe_vertex_buffer *buffers)
{
struct brw_context *brw = brw_context(pipe);
- unsigned i;
/* Check for no change */
if (count == brw->curr.num_vertex_buffers &&
@@ -257,18 +256,9 @@ static void brw_set_vertex_buffers(struct pipe_context *pipe,
count * sizeof buffers[0]) == 0)
return;
- /* Adjust refcounts */
- for (i = 0; i < count; i++)
- pipe_resource_reference(&brw->curr.vertex_buffer[i].buffer,
- buffers[i].buffer);
-
- for ( ; i < brw->curr.num_vertex_buffers; i++)
- pipe_resource_reference(&brw->curr.vertex_buffer[i].buffer,
- NULL);
-
- /* Copy remaining data */
- memcpy(brw->curr.vertex_buffer, buffers, count * sizeof buffers[0]);
- brw->curr.num_vertex_buffers = count;
+ util_copy_vertex_buffers(brw->curr.vertex_buffer,
+ &brw->curr.num_vertex_buffers,
+ buffers, count);
brw->state.dirty.mesa |= PIPE_NEW_VERTEX_BUFFER;
}
@@ -318,9 +308,13 @@ brw_pipe_vertex_init( struct brw_context *brw )
void
brw_pipe_vertex_cleanup( struct brw_context *brw )
{
+ unsigned i;
/* Release bound pipe vertex_buffers
*/
+ for (i = 0; i < brw->curr.num_vertex_buffers; i++) {
+ pipe_resource_reference(&brw->curr.vertex_buffer[i].buffer, NULL);
+ }
/* Release some other stuff
*/
diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h
index ba10f9d5df..53c7c43571 100644
--- a/src/gallium/drivers/i965/brw_reg.h
+++ b/src/gallium/drivers/i965/brw_reg.h
@@ -93,18 +93,54 @@
#define PCI_CHIP_G45_G 0x2E22
#define PCI_CHIP_G41_G 0x2E32
#define PCI_CHIP_B43_G 0x2E42
+#define PCI_CHIP_B43_G1 0x2E92
#define PCI_CHIP_ILD_G 0x0042
#define PCI_CHIP_ILM_G 0x0046
-struct brw_chipset {
- unsigned pci_id:16;
- unsigned is_965:1;
- unsigned is_igdng:1;
- unsigned is_g4x:1;
- unsigned pad:13;
-};
-
+#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* Desktop */
+#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112
+#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122
+#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* Mobile */
+#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116
+#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
+#define PCI_CHIP_SANDYBRIDGE_S 0x010A /* Server */
+
+#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
+ devid == PCI_CHIP_Q45_G || \
+ devid == PCI_CHIP_G45_G || \
+ devid == PCI_CHIP_G41_G || \
+ devid == PCI_CHIP_B43_G || \
+ devid == PCI_CHIP_B43_G1)
+#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
+#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
+
+#define IS_GEN4(devid) (devid == PCI_CHIP_I965_G || \
+ devid == PCI_CHIP_I965_Q || \
+ devid == PCI_CHIP_I965_G_1 || \
+ devid == PCI_CHIP_I965_GM || \
+ devid == PCI_CHIP_I965_GME || \
+ devid == PCI_CHIP_I946_GZ || \
+ IS_G4X(devid))
+
+#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G)
+#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G)
+#define IS_GEN5(devid) (IS_ILD(devid) || IS_ILM(devid))
+
+#define IS_IRONLAKE(devid) IS_GEN5(devid)
+
+#define IS_GEN6(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
+ devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
+ devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \
+ devid == PCI_CHIP_SANDYBRIDGE_S)
+
+#define IS_965(devid) (IS_GEN4(devid) || \
+ IS_G4X(devid) || \
+ IS_GEN5(devid) || \
+ IS_GEN6(devid))
/* XXX: hacks
*/
diff --git a/src/gallium/drivers/i965/brw_resource_buffer.c b/src/gallium/drivers/i965/brw_resource_buffer.c
index 5f9e8a87c9..afb96ee3e7 100644
--- a/src/gallium/drivers/i965/brw_resource_buffer.c
+++ b/src/gallium/drivers/i965/brw_resource_buffer.c
@@ -92,9 +92,9 @@ brw_buffer_transfer_unmap( struct pipe_context *pipe,
static unsigned brw_buffer_is_referenced( struct pipe_context *pipe,
- struct pipe_resource *resource,
- unsigned face,
- unsigned level)
+ struct pipe_resource *resource,
+ unsigned level,
+ int layer)
{
struct brw_context *brw = brw_context(pipe);
struct brw_winsys_buffer *batch_bo = brw->batch->buf;
@@ -194,6 +194,7 @@ brw_user_buffer_create(struct pipe_screen *screen,
buf->b.b.width0 = bytes;
buf->b.b.height0 = 1;
buf->b.b.depth0 = 1;
+ buf->b.b.array_size = 1;
buf->user_buffer = ptr;
diff --git a/src/gallium/drivers/i965/brw_resource_texture.c b/src/gallium/drivers/i965/brw_resource_texture.c
index 3860d18a7a..0cb895f35d 100644
--- a/src/gallium/drivers/i965/brw_resource_texture.c
+++ b/src/gallium/drivers/i965/brw_resource_texture.c
@@ -229,8 +229,8 @@ static void brw_texture_destroy(struct pipe_screen *screen,
static unsigned brw_texture_is_referenced( struct pipe_context *pipe,
struct pipe_resource *texture,
- unsigned face,
- unsigned level )
+ unsigned level,
+ int layer )
{
struct brw_context *brw = brw_context(pipe);
struct brw_screen *bscreen = brw_screen(pipe->screen);
@@ -246,7 +246,7 @@ static unsigned brw_texture_is_referenced( struct pipe_context *pipe,
if (bscreen->sws->bo_references( batch_bo, tex->bo ))
return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
- /* Find any view on this texture for this face/level and see if it
+ /* Find any view on this texture for this level/layer and see if it
* is referenced:
*/
for (i = 0; i < 2; i++) {
@@ -254,7 +254,7 @@ static unsigned brw_texture_is_referenced( struct pipe_context *pipe,
if (surf->bo == tex->bo)
continue;
- if (surf->id.bits.face != face ||
+ if (!(layer == -1 || surf->id.bits.layer == layer) ||
surf->id.bits.level != level)
continue;
@@ -274,10 +274,10 @@ static unsigned brw_texture_is_referenced( struct pipe_context *pipe,
static struct pipe_transfer *
brw_texture_get_transfer(struct pipe_context *context,
- struct pipe_resource *resource,
- struct pipe_subresource sr,
- unsigned usage,
- const struct pipe_box *box)
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
{
struct brw_texture *tex = brw_texture(resource);
struct pipe_transfer *transfer = CALLOC_STRUCT(pipe_transfer);
@@ -285,10 +285,11 @@ brw_texture_get_transfer(struct pipe_context *context,
return NULL;
transfer->resource = resource;
- transfer->sr = sr;
+ transfer->level = level;
transfer->usage = usage;
transfer->box = *box;
transfer->stride = tex->pitch * tex->cpp;
+ /* FIXME: layer_stride */
return transfer;
}
@@ -301,24 +302,16 @@ brw_texture_transfer_map(struct pipe_context *pipe,
struct pipe_resource *resource = transfer->resource;
struct brw_texture *tex = brw_texture(transfer->resource);
struct brw_winsys_screen *sws = brw_screen(pipe->screen)->sws;
- struct pipe_subresource sr = transfer->sr;
struct pipe_box *box = &transfer->box;
enum pipe_format format = resource->format;
unsigned usage = transfer->usage;
unsigned offset;
char *map;
- if (resource->target == PIPE_TEXTURE_CUBE) {
- offset = tex->image_offset[sr.level][sr.face];
- }
- else if (resource->target == PIPE_TEXTURE_3D) {
- offset = tex->image_offset[sr.level][box->z];
- }
- else {
- offset = tex->image_offset[sr.level][0];
- assert(sr.face == 0);
+ if (resource->target != PIPE_TEXTURE_3D &&
+ resource->target != PIPE_TEXTURE_CUBE)
assert(box->z == 0);
- }
+ offset = tex->image_offset[transfer->level][box->z];
map = sws->bo_map(tex->bo,
BRW_DATA_OTHER,
@@ -399,7 +392,7 @@ brw_texture_create( struct pipe_screen *screen,
if (tex->compressed == 0 &&
!bscreen->no_tiling)
{
- if (bscreen->chipset.is_965 &&
+ if (bscreen->gen < 5 &&
util_format_is_depth_or_stencil(template->format))
tex->tiling = BRW_TILING_Y;
else
diff --git a/src/gallium/drivers/i965/brw_resource_texture_layout.c b/src/gallium/drivers/i965/brw_resource_texture_layout.c
index 2187bdd82c..afecc77e31 100644
--- a/src/gallium/drivers/i965/brw_resource_texture_layout.c
+++ b/src/gallium/drivers/i965/brw_resource_texture_layout.c
@@ -388,7 +388,7 @@ GLboolean brw_texture_layout(struct brw_screen *brw_screen,
{
switch (tex->b.b.target) {
case PIPE_TEXTURE_CUBE:
- if (brw_screen->chipset.is_igdng)
+ if (brw_screen->gen == 5)
brw_layout_cubemap_idgng( tex );
else
brw_layout_3d_cube( tex );
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index 29486f5b81..bf805fd080 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -97,7 +97,7 @@ brw_get_name(struct pipe_screen *screen)
static char buffer[128];
const char *chipset;
- switch (brw_screen(screen)->chipset.pci_id) {
+ switch (brw_screen(screen)->pci_id) {
case PCI_CHIP_I965_G:
chipset = "I965_G";
break;
@@ -405,8 +405,6 @@ struct pipe_screen *
brw_screen_create(struct brw_winsys_screen *sws)
{
struct brw_screen *bscreen;
- struct brw_chipset chipset;
-
#ifdef DEBUG
BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0);
BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0);
@@ -415,46 +413,30 @@ brw_screen_create(struct brw_winsys_screen *sws)
BRW_DUMP = debug_get_flags_option("BRW_DUMP", dump_names, 0);
#endif
- memset(&chipset, 0, sizeof chipset);
-
- chipset.pci_id = sws->pci_id;
-
- switch (chipset.pci_id) {
- case PCI_CHIP_I965_G:
- case PCI_CHIP_I965_Q:
- case PCI_CHIP_I965_G_1:
- case PCI_CHIP_I946_GZ:
- case PCI_CHIP_I965_GM:
- case PCI_CHIP_I965_GME:
- chipset.is_965 = TRUE;
- break;
-
- case PCI_CHIP_GM45_GM:
- case PCI_CHIP_IGD_E_G:
- case PCI_CHIP_Q45_G:
- case PCI_CHIP_G45_G:
- case PCI_CHIP_G41_G:
- case PCI_CHIP_B43_G:
- chipset.is_g4x = TRUE;
- break;
-
- case PCI_CHIP_ILD_G:
- case PCI_CHIP_ILM_G:
- chipset.is_igdng = TRUE;
- break;
+ bscreen = CALLOC_STRUCT(brw_screen);
+ if (!bscreen)
+ return NULL;
- default:
+ bscreen->pci_id = sws->pci_id;
+ if (IS_GEN6(sws->pci_id)) {
+ bscreen->gen = 6;
+ bscreen->needs_ff_sync = TRUE;
+ } else if (IS_GEN5(sws->pci_id)) {
+ bscreen->gen = 5;
+ bscreen->needs_ff_sync = TRUE;
+ } else if (IS_965(sws->pci_id)) {
+ bscreen->gen = 4;
+ if (IS_G4X(sws->pci_id)) {
+ bscreen->is_g4x = true;
+ }
+ } else {
debug_printf("%s: unknown pci id 0x%x, cannot create screen\n",
- __FUNCTION__, chipset.pci_id);
+ __FUNCTION__, sws->pci_id);
+ free(bscreen);
return NULL;
}
-
- bscreen = CALLOC_STRUCT(brw_screen);
- if (!bscreen)
- return NULL;
-
- bscreen->chipset = chipset;
+ sws->gen = bscreen->gen;
bscreen->sws = sws;
bscreen->base.winsys = NULL;
bscreen->base.destroy = brw_destroy_screen;
@@ -470,7 +452,6 @@ brw_screen_create(struct brw_winsys_screen *sws)
bscreen->base.fence_finish = brw_fence_finish;
brw_init_screen_resource_functions(bscreen);
- brw_screen_tex_surface_init(bscreen);
bscreen->no_tiling = debug_get_option("BRW_NO_TILING", FALSE) != NULL;
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
index 522a3bf899..a62e1afc40 100644
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -43,7 +43,11 @@ struct brw_winsys_screen;
struct brw_screen
{
struct pipe_screen base;
- struct brw_chipset chipset;
+ int gen;
+ boolean has_negative_rhw_bug;
+ boolean needs_ff_sync;
+ boolean is_g4x;
+ int pci_id;
struct brw_winsys_screen *sws;
boolean no_tiling;
};
@@ -52,9 +56,8 @@ struct brw_screen
union brw_surface_id {
struct {
- unsigned face:3;
- unsigned zslice:13;
unsigned level:16;
+ unsigned layer:16;
} bits;
unsigned value;
};
@@ -63,8 +66,9 @@ union brw_surface_id {
struct brw_surface
{
struct pipe_surface base;
-
+
union brw_surface_id id;
+ unsigned offset;
unsigned cpp;
unsigned pitch;
unsigned draw_offset;
@@ -96,7 +100,5 @@ brw_surface(struct pipe_surface *surface)
unsigned
brw_surface_pitch( const struct pipe_surface *surface );
-void brw_screen_tex_surface_init( struct brw_screen *brw_screen );
-
#endif /* BRW_SCREEN_H */
diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c
index 497634ec9e..901c334164 100644
--- a/src/gallium/drivers/i965/brw_sf_emit.c
+++ b/src/gallium/drivers/i965/brw_sf_emit.c
@@ -161,7 +161,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c )
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
jmpi = 2;
brw_push_insn_state(p);
@@ -205,7 +205,7 @@ static void do_flatshade_line( struct brw_sf_compile *c )
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
jmpi = 2;
brw_push_insn_state(p);
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
index 6c299a86b4..eec024650c 100644
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -148,7 +148,7 @@ sf_unit_create_from_key(struct brw_context *brw,
sf.thread3.dispatch_grf_start_reg = 3;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
sf.thread3.urb_entry_read_offset = 3;
else
sf.thread3.urb_entry_read_offset = 1;
@@ -161,7 +161,7 @@ sf_unit_create_from_key(struct brw_context *brw,
/* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or
* 48(IGDNG) threads
*/
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
chipset_max_threads = 48;
else
chipset_max_threads = 24;
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
index d2bbd0123d..380d511f9b 100644
--- a/src/gallium/drivers/i965/brw_state.h
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -56,7 +56,7 @@ const struct brw_tracked_state brw_clip_prog;
const struct brw_tracked_state brw_clip_unit;
const struct brw_tracked_state brw_curbe_buffer;
const struct brw_tracked_state brw_curbe_offsets;
-const struct brw_tracked_state brw_invarient_state;
+const struct brw_tracked_state brw_invariant_state;
const struct brw_tracked_state brw_gs_prog;
const struct brw_tracked_state brw_gs_unit;
const struct brw_tracked_state brw_line_stipple;
diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c
index f8b91eff81..cdbf270e06 100644
--- a/src/gallium/drivers/i965/brw_state_upload.c
+++ b/src/gallium/drivers/i965/brw_state_upload.c
@@ -69,7 +69,7 @@ const struct brw_tracked_state *atoms[] =
/* Command packets:
*/
- &brw_invarient_state,
+ &brw_invariant_state,
&brw_state_base_address,
&brw_binding_table_pointers,
diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h
index e97ddeb5e1..b0d75b4f82 100644
--- a/src/gallium/drivers/i965/brw_structs.h
+++ b/src/gallium/drivers/i965/brw_structs.h
@@ -279,7 +279,7 @@ struct brw_aa_line_parameters
struct header header;
struct {
- GLuint aa_coverage_scope:8;
+ GLuint aa_coverage_slope:8;
GLuint pad0:8;
GLuint aa_coverage_bias:8;
GLuint pad1:8;
@@ -659,7 +659,105 @@ struct brw_clip_unit_state
GLfloat viewport_ymax;
};
+struct gen6_blend_state
+{
+ struct {
+ GLuint dest_blend_factor:5;
+ GLuint source_blend_factor:5;
+ GLuint pad3:1;
+ GLuint blend_func:3;
+ GLuint pad2:1;
+ GLuint ia_dest_blend_factor:5;
+ GLuint ia_source_blend_factor:5;
+ GLuint pad1:1;
+ GLuint ia_blend_func:3;
+ GLuint pad0:1;
+ GLuint ia_blend_enable:1;
+ GLuint blend_enable:1;
+ } blend0;
+
+ struct {
+ GLuint post_blend_clamp_enable:1;
+ GLuint pre_blend_clamp_enable:1;
+ GLuint clamp_range:2;
+ GLuint pad0:4;
+ GLuint x_dither_offset:2;
+ GLuint y_dither_offset:2;
+ GLuint dither_enable:1;
+ GLuint alpha_test_func:3;
+ GLuint alpha_test_enable:1;
+ GLuint pad1:1;
+ GLuint logic_op_func:4;
+ GLuint logic_op_enable:1;
+ GLuint pad2:1;
+ GLuint write_disable_b:1;
+ GLuint write_disable_g:1;
+ GLuint write_disable_r:1;
+ GLuint write_disable_a:1;
+ GLuint pad3:1;
+ GLuint alpha_to_coverage_dither:1;
+ GLuint alpha_to_one:1;
+ GLuint alpha_to_coverage:1;
+ } blend1;
+};
+struct gen6_color_calc_state
+{
+ struct {
+ GLuint alpha_test_format:1;
+ GLuint pad0:14;
+ GLuint round_disable:1;
+ GLuint bf_stencil_ref:8;
+ GLuint stencil_ref:8;
+ } cc0;
+
+ union {
+ GLfloat alpha_ref_f;
+ struct {
+ GLuint ui:8;
+ GLuint pad0:24;
+ } alpha_ref_fi;
+ } cc1;
+
+ GLfloat constant_r;
+ GLfloat constant_g;
+ GLfloat constant_b;
+ GLfloat constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+ struct {
+ GLuint pad0:3;
+ GLuint bf_stencil_pass_depth_pass_op:3;
+ GLuint bf_stencil_pass_depth_fail_op:3;
+ GLuint bf_stencil_fail_op:3;
+ GLuint bf_stencil_func:3;
+ GLuint bf_stencil_enable:1;
+ GLuint pad1:2;
+ GLuint stencil_write_enable:1;
+ GLuint stencil_pass_depth_pass_op:3;
+ GLuint stencil_pass_depth_fail_op:3;
+ GLuint stencil_fail_op:3;
+ GLuint stencil_func:3;
+ GLuint stencil_enable:1;
+ } ds0;
+
+ struct {
+ GLuint bf_stencil_write_mask:8;
+ GLuint bf_stencil_test_mask:8;
+ GLuint stencil_write_mask:8;
+ GLuint stencil_test_mask:8;
+ } ds1;
+
+ struct {
+ GLuint pad0:26;
+ GLuint depth_write_enable:1;
+ GLuint depth_test_func:3;
+ GLuint pad1:1;
+ GLuint depth_test_enable:1;
+ } ds2;
+};
struct brw_cc_unit_state
{
@@ -814,6 +912,13 @@ struct brw_sf_unit_state
};
+struct gen6_scissor_rect
+{
+ GLuint xmin:16;
+ GLuint ymin:16;
+ GLuint xmax:16;
+ GLuint ymax:16;
+};
struct brw_gs_unit_state
{
@@ -825,7 +930,7 @@ struct brw_gs_unit_state
struct
{
GLuint pad0:8;
- GLuint rendering_enable:1; /* for IGDNG */
+ GLuint rendering_enable:1; /* for Ironlake */
GLuint pad4:1;
GLuint stats_enable:1;
GLuint nr_urb_entries:7;
@@ -935,7 +1040,7 @@ struct brw_wm_unit_state
GLfloat global_depth_offset_constant;
GLfloat global_depth_offset_scale;
- /* for IGDNG only */
+ /* for Ironlake only */
struct {
GLuint pad0:1;
GLuint grf_reg_count_1:3;
@@ -962,6 +1067,15 @@ struct brw_sampler_default_color {
GLfloat color[4];
};
+struct gen5_sampler_default_color {
+ uint8_t ub[4];
+ float f[4];
+ uint16_t hf[4];
+ uint16_t us[4];
+ int16_t s[4];
+ uint8_t b[4];
+};
+
struct brw_sampler_state
{
@@ -973,7 +1087,7 @@ struct brw_sampler_state
GLuint mag_filter:3;
GLuint mip_filter:2;
GLuint base_level:5;
- GLuint pad:1;
+ GLuint min_mag_neq:1;
GLuint lod_preclamp:1;
GLuint default_color_mode:1;
GLuint pad0:1;
@@ -985,7 +1099,8 @@ struct brw_sampler_state
GLuint r_wrap_mode:3;
GLuint t_wrap_mode:3;
GLuint s_wrap_mode:3;
- GLuint pad:3;
+ GLuint cube_control_mode:1;
+ GLuint pad:2;
GLuint max_lod:10;
GLuint min_lod:10;
} ss1;
@@ -999,7 +1114,9 @@ struct brw_sampler_state
struct brw_ss3
{
- GLuint pad:19;
+ GLuint non_normalized_coord:1;
+ GLuint pad:12;
+ GLuint address_round:6;
GLuint max_aniso:3;
GLuint chroma_key_mode:1;
GLuint chroma_key_index:2;
@@ -1044,6 +1161,15 @@ struct brw_sf_viewport
} scissor;
};
+struct gen6_sf_viewport {
+ GLfloat m00;
+ GLfloat m11;
+ GLfloat m22;
+ GLfloat m30;
+ GLfloat m31;
+ GLfloat m32;
+};
+
/* Documented in the subsystem/shared-functions/sampler chapter...
*/
struct brw_surface_state
@@ -1055,7 +1181,12 @@ struct brw_surface_state
GLuint cube_neg_y:1;
GLuint cube_pos_x:1;
GLuint cube_neg_x:1;
- GLuint pad:4;
+ GLuint pad:2;
+ /* Required on gen6 for surfaces accessed through render cache messages.
+ */
+ GLuint render_cache_read_write:1;
+ /* Ironlake and newer: instead of replicating one of the texels */
+ GLuint cube_corner_average:1;
GLuint mipmap_layout_mode:1;
GLuint vert_line_stride_ofs:1;
GLuint vert_line_stride:1;
@@ -1202,7 +1333,8 @@ struct brw_instruction
GLuint predicate_inverse:1;
GLuint execution_size:3;
GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */
- GLuint pad0:2;
+ GLuint acc_wr_control:1;
+ GLuint cmpt_control:1;
GLuint debug_control:1;
GLuint saturate:1;
} header;
@@ -1250,7 +1382,7 @@ struct brw_instruction
GLuint dest_writemask:4;
GLuint dest_subreg_nr:1;
GLuint dest_reg_nr:8;
- GLuint pad1:2;
+ GLuint dest_horiz_stride:2;
GLuint dest_address_mode:1;
} da16;
@@ -1264,9 +1396,21 @@ struct brw_instruction
GLuint dest_writemask:4;
GLint dest_indirect_offset:6;
GLuint dest_subreg_nr:3;
- GLuint pad1:2;
+ GLuint dest_horiz_stride:2;
GLuint dest_address_mode:1;
} ia16;
+
+ struct {
+ GLuint dest_reg_file:2;
+ GLuint dest_reg_type:3;
+ GLuint src0_reg_file:2;
+ GLuint src0_reg_type:3;
+ GLuint src1_reg_file:2;
+ GLuint src1_reg_type:3;
+ GLuint pad:1;
+
+ GLint jump_count:16;
+ } branch_gen6;
} bits1;
@@ -1339,7 +1483,7 @@ struct brw_instruction
GLuint end_of_thread:1;
GLuint pad1:1;
GLuint sfid:4;
- } send_igdng; /* for IGDNG only */
+ } send_gen5; /* for Ironlake only */
} bits2;
@@ -1413,6 +1557,21 @@ struct brw_instruction
GLuint pad0:12;
} if_else;
+ struct
+ {
+ /* Signed jump distance to the ip to jump to if all channels
+ * are disabled after the break or continue. It should point
+ * to the end of the innermost control flow block, as that's
+ * where some channel could get re-enabled.
+ */
+ int jip:16;
+
+ /* Signed jump distance to the location to resume execution
+ * of this channel if it's enabled for the break or continue.
+ */
+ int uip:16;
+ } break_cont;
+
struct {
GLuint function:4;
GLuint int_type:1;
@@ -1440,7 +1599,7 @@ struct brw_instruction
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } math_igdng;
+ } math_gen5;
struct {
GLuint binding_table_index:8;
@@ -1476,7 +1635,7 @@ struct brw_instruction
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } sampler_igdng;
+ } sampler_gen5;
struct brw_urb_immediate urb;
@@ -1494,7 +1653,7 @@ struct brw_instruction
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } urb_igdng;
+ } urb_gen5;
struct {
GLuint binding_table_index:8;
@@ -1510,6 +1669,18 @@ struct brw_instruction
struct {
GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint msg_type:3;
+ GLuint target_cache:2;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } dp_read_g4x;
+
+ struct {
+ GLuint binding_table_index:8;
GLuint msg_control:3;
GLuint msg_type:3;
GLuint target_cache:2;
@@ -1519,7 +1690,7 @@ struct brw_instruction
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } dp_read_igdng;
+ } dp_read_gen5;
struct {
GLuint binding_table_index:8;
@@ -1546,10 +1717,38 @@ struct brw_instruction
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } dp_write_igdng;
+ } dp_write_gen5;
+
+ /* Sandybridge DP for sample cache, constant cache, render cache */
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:5;
+ GLuint msg_type:3;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_sampler_const_cache;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint slot_group_select:1;
+ GLuint pixel_scoreboard_clear:1;
+ GLuint msg_type:4;
+ GLuint send_commit_msg:1;
+ GLuint pad0:1;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_render_cache;
struct {
- GLuint pad:16;
+ GLuint function_control:16;
GLuint response_length:4;
GLuint msg_length:4;
GLuint msg_target:4;
@@ -1557,14 +1756,15 @@ struct brw_instruction
GLuint end_of_thread:1;
} generic;
+ /* Of this struct, only end_of_thread is not present for gen6. */
struct {
- GLuint pad:19;
+ GLuint function_control:19;
GLuint header_present:1;
GLuint response_length:5;
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } generic_igdng;
+ } generic_gen5;
GLint d;
GLuint ud;
diff --git a/src/gallium/drivers/i965/brw_structs_dump.c b/src/gallium/drivers/i965/brw_structs_dump.c
index cd40fc6d61..f3de2f995b 100644
--- a/src/gallium/drivers/i965/brw_structs_dump.c
+++ b/src/gallium/drivers/i965/brw_structs_dump.c
@@ -72,7 +72,7 @@ brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr)
{
debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
- debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_scope);
+ debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_slope);
debug_printf("\t\t.bits0.aa_coverage_bias = 0x%x\n", (*ptr).bits0.aa_coverage_bias);
debug_printf("\t\t.bits1.aa_coverage_endcap_slope = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_slope);
debug_printf("\t\t.bits1.aa_coverage_endcap_bias = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_bias);
diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c
index 907ec56c6c..b630752809 100644
--- a/src/gallium/drivers/i965/brw_urb.c
+++ b/src/gallium/drivers/i965/brw_urb.c
@@ -147,7 +147,7 @@ static int recalculate_urb_fence( struct brw_context *brw )
brw->urb.constrained = 0;
- if (BRW_IS_IGDNG(brw)) {
+ if (brw->gen == 5) {
brw->urb.nr_vs_entries = 128;
brw->urb.nr_sf_entries = 48;
if (check_urb_layout(brw)) {
@@ -157,7 +157,7 @@ static int recalculate_urb_fence( struct brw_context *brw )
brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
}
- } else if (BRW_IS_G4X(brw)) {
+ } else if (brw->is_g4x) {
brw->urb.nr_vs_entries = 64;
if (check_urb_layout(brw)) {
goto done;
diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h
index 944d88c84c..b6d1091618 100644
--- a/src/gallium/drivers/i965/brw_vs.h
+++ b/src/gallium/drivers/i965/brw_vs.h
@@ -56,7 +56,6 @@ struct brw_vs_compile {
struct brw_compile func;
struct brw_vs_prog_key key;
struct brw_vs_prog_data prog_data;
- struct brw_chipset chipset;
struct brw_vertex_shader *vp;
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 5dcbd597dd..559f0c61d8 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -116,6 +116,7 @@ static boolean find_output_slot( struct brw_vs_compile *c,
*/
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
+ struct brw_context *brw = c->func.brw;
GLuint i, reg = 0, subreg = 0, mrf;
int attributes_in_vue;
@@ -218,7 +219,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
*/
c->nr_outputs = c->prog_data.nr_outputs;
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
mrf = 8;
else
mrf = 4;
@@ -333,7 +334,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
*/
attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
else
c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
@@ -1124,6 +1125,7 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
static void emit_vertex_write( struct brw_vs_compile *c)
{
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
struct brw_reg m0 = brw_message_reg(0);
struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS];
struct brw_reg ndc;
@@ -1143,7 +1145,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
*/
if (c->prog_data.writes_psiz ||
c->key.nr_userclip ||
- c->chipset.is_965)
+ brw->has_negative_rhw_bug)
{
struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
GLuint i;
@@ -1174,7 +1176,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* Later, clipping will detect ucp[6] and ensure the primitive is
* clipped against all fixed planes.
*/
- if (c->chipset.is_965) {
+ if (brw->has_negative_rhw_bug) {
brw_CMP(p,
vec8(brw_null_reg()),
BRW_CONDITIONAL_L,
@@ -1202,7 +1204,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
brw_set_access_mode(p, BRW_ALIGN_1);
brw_MOV(p, offset(m0, 2), ndc);
- if (c->chipset.is_igdng) {
+ if (brw->gen == 5) {
/* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */
brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */
/* m4, m5 contain the distances from vertex to the user clip planeXXX.
@@ -1339,6 +1341,7 @@ static void emit_insn(struct brw_vs_compile *c,
unsigned opcode = inst->Instruction.Opcode;
unsigned label = inst->Label.Label;
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
struct brw_reg args[3], dst;
GLuint i;
@@ -1514,7 +1517,7 @@ static void emit_insn(struct brw_vs_compile *c,
c->loop_depth--;
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
br = 2;
inst0 = inst1 = brw_WHILE(p, c->loop_inst[c->loop_depth]);
@@ -1652,6 +1655,6 @@ void brw_vs_emit(struct brw_vs_compile *c)
if (BRW_DEBUG & DEBUG_VS) {
debug_printf("vs-native:\n");
- brw_disasm(stderr, p->store, p->nr_insn);
+ brw_disasm(stderr, p->store, p->nr_insn, p->brw->gen);
}
}
diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c
index dadbb622e4..6d2ccfd6d9 100644
--- a/src/gallium/drivers/i965/brw_vs_state.c
+++ b/src/gallium/drivers/i965/brw_vs_state.c
@@ -100,7 +100,7 @@ vs_unit_create_from_key(struct brw_context *brw,
*/
vs.thread1.single_program_flow = 0;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
else
vs.thread1.binding_table_entry_count = key->nr_surfaces;
@@ -111,16 +111,16 @@ vs_unit_create_from_key(struct brw_context *brw,
vs.thread3.urb_entry_read_offset = 0;
vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
else
vs.thread4.nr_urb_entries = key->nr_urb_entries;
vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
chipset_max_threads = 72;
- else if (BRW_IS_G4X(brw))
+ else if (brw->is_g4x)
chipset_max_threads = 32;
else
chipset_max_threads = 16;
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index a06f8bb7d6..038f6f788a 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -148,7 +148,7 @@ static INLINE void make_reloc(struct brw_winsys_reloc *reloc,
struct brw_winsys_screen {
unsigned pci_id;
-
+ int gen;
/**
* Buffer functions.
*/
@@ -282,7 +282,7 @@ void brw_dump_data( unsigned pci_id,
enum brw_buffer_data_type data_type,
unsigned offset,
const void *data,
- size_t size );
+ size_t size, int gen );
#endif
diff --git a/src/gallium/drivers/i965/brw_winsys_debug.c b/src/gallium/drivers/i965/brw_winsys_debug.c
index f8f6a539bc..b66b1cfccb 100644
--- a/src/gallium/drivers/i965/brw_winsys_debug.c
+++ b/src/gallium/drivers/i965/brw_winsys_debug.c
@@ -9,7 +9,7 @@ void brw_dump_data( unsigned pci_id,
enum brw_buffer_data_type data_type,
unsigned offset,
const void *data,
- size_t size )
+ size_t size, int gen )
{
if (BRW_DUMP & DUMP_ASM) {
switch (data_type) {
@@ -18,7 +18,7 @@ void brw_dump_data( unsigned pci_id,
case BRW_DATA_GS_VS_PROG:
case BRW_DATA_GS_GS_PROG:
case BRW_DATA_GS_CLIP_PROG:
- brw_disasm( stderr, data, size / sizeof(struct brw_instruction) );
+ brw_disasm( stderr, (struct brw_instruction *)data, size / sizeof(struct brw_instruction), gen );
break;
default:
break;
@@ -77,7 +77,7 @@ void brw_dump_data( unsigned pci_id,
if (BRW_DUMP & DUMP_BATCH) {
switch (data_type) {
case BRW_DATA_BATCH_BUFFER:
- intel_decode(data, size / 4, offset, pci_id);
+ intel_decode(data, size / 4, offset, pci_id, 0);
break;
default:
break;
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
index 8f983a60ae..6301062fd7 100644
--- a/src/gallium/drivers/i965/brw_wm_emit.c
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -848,11 +848,11 @@ static void emit_tex( struct brw_wm_compile *c,
responseLength = 8; /* always */
- if (BRW_IS_IGDNG(p->brw)) {
+ if (p->brw->gen == 5) {
if (shadow)
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
else
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
} else {
if (shadow)
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
@@ -917,8 +917,8 @@ static void emit_txb( struct brw_wm_compile *c,
brw_MOV(p, brw_message_reg(8), coord[3]);
msgLength = 9;
- if (BRW_IS_IGDNG(p->brw))
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
+ if (p->brw->gen == 5)
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
else
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
@@ -1516,6 +1516,6 @@ void brw_wm_emit( struct brw_wm_compile *c )
if (BRW_DEBUG & DEBUG_WM) {
debug_printf("wm-native:\n");
- brw_disasm(stderr, p->store, p->nr_insn);
+ brw_disasm(stderr, p->store, p->nr_insn, p->brw->gen);
}
}
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index f7ee55cc1c..a65e16edec 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -812,7 +812,7 @@ static void precalc_tex( struct brw_wm_compile *c,
}
/* XXX: add GL_EXT_texture_swizzle support to gallium -- by
- * generating shader varients in mesa state tracker.
+ * generating shader variants in mesa state tracker.
*/
/* Release this temp if we ended up allocating it:
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
index 3b3afc39d3..fb8e40d928 100644
--- a/src/gallium/drivers/i965/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -1607,7 +1607,7 @@ static void emit_txb(struct brw_wm_compile *c,
brw_MOV(p, brw_message_reg(5), src[3]); /* bias */
brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
- if (BRW_IS_IGDNG(p->brw)) {
+ if (p->brw->gen == 5) {
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG;
} else {
/* Does it work well on SIMD8? */
@@ -1688,7 +1688,7 @@ static void emit_tex(struct brw_wm_compile *c,
brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */
}
- if (BRW_IS_IGDNG(p->brw)) {
+ if (p->brw->gen == 5) {
if (shadow)
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG;
else
@@ -1970,7 +1970,7 @@ static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_
struct brw_instruction *inst0, *inst1;
GLuint br = 1;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
br = 2;
loop_depth--;
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
index efc2d96be1..a690003ecb 100644
--- a/src/gallium/drivers/i965/brw_wm_state.c
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -70,9 +70,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
key->max_threads = 1;
else {
/* WM maximum threads is number of EUs times number of threads per EU. */
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
key->max_threads = 12 * 6;
- else if (BRW_IS_G4X(brw))
+ else if (brw->is_g4x)
key->max_threads = 10 * 5;
else
key->max_threads = 8 * 4;
@@ -155,7 +155,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
wm.thread1.depth_coef_urb_read_offset = 1;
wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
else
wm.thread1.binding_table_entry_count = key->nr_surfaces;
@@ -174,7 +174,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
wm.wm4.sampler_count = 0; /* hardware requirement */
else
wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
@@ -277,7 +277,7 @@ static enum pipe_error upload_wm_unit( struct brw_context *brw )
grf_reg_count = (align(key.total_grf, 16) / 16 - 1);
per_thread_scratch_space = key.total_scratch / 1024 - 1;
stats_enable = (BRW_DEBUG & DEBUG_STATS) || key.stats_wm;
- sampler_count = BRW_IS_IGDNG(brw) ? 0 :(key.sampler_count + 1) / 4;
+ sampler_count = brw->gen == 5 ? 0 :(key.sampler_count + 1) / 4;
/* Emit WM program relocation */
make_reloc(&reloc[nr_reloc++],
diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c
index 36c04a3165..1abe869f1a 100644
--- a/src/gallium/drivers/i965/intel_decode.c
+++ b/src/gallium/drivers/i965/intel_decode.c
@@ -42,10 +42,11 @@
#include "util/u_memory.h"
#include "util/u_string.h"
+
#include "intel_decode.h"
+#include "brw_reg.h"
/*#include "intel_chipset.h"*/
-#define IS_965(x) 1 /* XXX */
#define IS_9XX(x) 1 /* XXX */
#define BUFFER_FAIL(_count, _len, _name) do { \
@@ -99,10 +100,11 @@ decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
} opcodes_mi[] = {
{ 0x08, 0, 1, 1, "MI_ARB_ON_OFF" },
{ 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" },
+ { 0x30, 0x3f, 3, 3, "MI_BATCH_BUFFER" },
{ 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" },
{ 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" },
{ 0x04, 0, 1, 1, "MI_FLUSH" },
- { 0x22, 0, 3, 3, "MI_LOAD_REGISTER_IMM" },
+ { 0x22, 0x1f, 3, 3, "MI_LOAD_REGISTER_IMM" },
{ 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" },
{ 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" },
{ 0x00, 0, 1, 1, "MI_NOOP" },
@@ -116,6 +118,11 @@ decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
{ 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT" },
};
+ switch ((data[0] & 0x1f800000) >> 23) {
+ case 0x0a:
+ instr_out(data, hw_offset, 0, "MI_BATCH_BUFFER_END\n");
+ return -1;
+ }
for (opcode = 0; opcode < Elements(opcodes_mi); opcode++) {
if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) {
@@ -308,9 +315,13 @@ decode_2d(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
static int
decode_3d_1c(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
{
- switch ((data[0] & 0x00f80000) >> 19) {
+ uint32_t opcode;
+
+ opcode = (data[0] & 0x00f80000) >> 19;
+
+ switch (opcode) {
case 0x11:
- instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISALBE\n");
+ instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n");
return 1;
case 0x10:
instr_out(data, hw_offset, 0, "3DSTATE_SCISSOR_ENABLE\n");
@@ -326,7 +337,8 @@ decode_3d_1c(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
return 1;
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n",
+ opcode);
(*failures)++;
return 1;
}
@@ -384,7 +396,7 @@ i915_get_instruction_dst(const uint32_t *data, int i, char *dstname, int do_mask
sprintf(dstname, "oD%s%s", dstmask, sat);
break;
case 6:
- if (dst_nr > 2)
+ if (dst_nr > 3)
fprintf(out, "bad destination reg U%d\n", dst_nr);
sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat);
break;
@@ -455,7 +467,7 @@ i915_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name)
break;
case 6:
sprintf(name, "U%d", src_nr);
- if (src_nr > 2)
+ if (src_nr > 3)
fprintf(out, "bad src reg %s\n", name);
break;
default:
@@ -800,10 +812,14 @@ i915_decode_instruction(const uint32_t *data, uint32_t hw_offset,
}
static int
-decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830)
+decode_3d_1d(const uint32_t *data, int count,
+ uint32_t hw_offset,
+ uint32_t devid,
+ int *failures)
{
- unsigned int len, i, c, opcode, word, map, sampler, instr;
+ unsigned int len, i, c, idx, word, map, sampler, instr;
char *format;
+ uint32_t opcode;
struct {
uint32_t opcode;
@@ -814,7 +830,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
} opcodes_3d_1d[] = {
{ 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
{ 0x86, 0, 4, 4, "3DSTATE_CHROMA_KEY" },
- { 0x9c, 0, 1, 1, "3DSTATE_CLEAR_PARAMETERS" },
+ { 0x9c, 0, 7, 7, "3DSTATE_CLEAR_PARAMETERS" },
{ 0x88, 0, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" },
{ 0x99, 0, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" },
{ 0x9a, 0, 2, 2, "3DSTATE_DEFAULT_SPECULAR" },
@@ -822,7 +838,6 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
{ 0x97, 0, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" },
{ 0x85, 0, 2, 2, "3DSTATE_DEST_BUFFER_VARIABLES" },
{ 0x80, 0, 5, 5, "3DSTATE_DRAWING_RECTANGLE" },
- { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
{ 0x9d, 0, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" },
{ 0x9e, 0, 4, 4, "3DSTATE_MONO_FILTER" },
{ 0x89, 0, 4, 4, "3DSTATE_FOG_MODE" },
@@ -834,9 +849,11 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
{ 0x8d, 1, 3, 3, "3DSTATE_W_STATE_I830" },
{ 0x01, 1, 2, 2, "3DSTATE_COLOR_FACTOR_I830" },
{ 0x02, 1, 2, 2, "3DSTATE_MAP_COORD_SETBIND_I830" },
- };
+ }, *opcode_3d_1d;
- switch ((data[0] & 0x00ff0000) >> 16) {
+ opcode = (data[0] & 0x00ff0000) >> 16;
+
+ switch (opcode) {
case 0x07:
/* This instruction is unusual. A 0 length means just 1 DWORD instead of
* 2. The 0 length is specified in one place to be unsupported, but
@@ -891,26 +908,56 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
len = (data[0] & 0x0000000f) + 2;
i = 1;
- for (word = 0; word <= 7; word++) {
+ for (word = 0; word <= 8; word++) {
if (data[0] & (1 << (4 + word))) {
if (i >= count)
BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_1");
/* save vertex state for decode */
- if (word == 2) {
- saved_s2_set = 1;
- saved_s2 = data[i];
- }
- if (word == 4) {
- saved_s4_set = 1;
- saved_s4 = data[i];
+ if (IS_9XX(devid)) {
+ if (word == 2) {
+ saved_s2_set = 1;
+ saved_s2 = data[i];
+ }
+ if (word == 4) {
+ saved_s4_set = 1;
+ saved_s4 = data[i];
+ }
}
instr_out(data, hw_offset, i++, "S%d\n", word);
}
}
if (len != i) {
- fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n");
+ fprintf(out, "Bad count in 3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
+ (*failures)++;
+ }
+ return len;
+ case 0x03:
+ instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_2\n");
+ len = (data[0] & 0x0000000f) + 2;
+ i = 1;
+ for (word = 6; word <= 14; word++) {
+ if (data[0] & (1 << word)) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_2");
+
+ if (word == 6)
+ instr_out(data, hw_offset, i++, "TBCF\n");
+ else if (word >= 7 && word <= 10) {
+ instr_out(data, hw_offset, i++, "TB%dC\n", word - 7);
+ instr_out(data, hw_offset, i++, "TB%dA\n", word - 7);
+ } else if (word >= 11 && word <= 14) {
+ instr_out(data, hw_offset, i++, "TM%dS0\n", word - 11);
+ instr_out(data, hw_offset, i++, "TM%dS1\n", word - 11);
+ instr_out(data, hw_offset, i++, "TM%dS2\n", word - 11);
+ instr_out(data, hw_offset, i++, "TM%dS3\n", word - 11);
+ instr_out(data, hw_offset, i++, "TM%dS4\n", word - 11);
+ }
+ }
+ }
+ if (len != i) {
+ fprintf(out, "Bad count in 3DSTATE_LOAD_STATE_IMMEDIATE_2\n");
(*failures)++;
}
return len;
@@ -922,11 +969,27 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
i = 2;
for (map = 0; map <= 15; map++) {
if (data[1] & (1 << map)) {
+ int width, height, pitch, dword;
+ const char *tiling;
+
if (i + 3 >= count)
BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE");
instr_out(data, hw_offset, i++, "map %d MS2\n", map);
- instr_out(data, hw_offset, i++, "map %d MS3\n", map);
- instr_out(data, hw_offset, i++, "map %d MS4\n", map);
+
+ dword = data[i];
+ width = ((dword >> 10) & ((1 << 11) - 1))+1;
+ height = ((dword >> 21) & ((1 << 11) - 1))+1;
+
+ tiling = "none";
+ if (dword & (1 << 2))
+ tiling = "fenced";
+ else if (dword & (1 << 1))
+ tiling = dword & (1 << 0) ? "Y" : "X";
+ instr_out(data, hw_offset, i++, "map %d MS3 [width=%d, height=%d, tiling=%s]\n", map, width, height, tiling);
+
+ dword = data[i];
+ pitch = 4*(((dword >> 21) & ((1 << 11) - 1))+1);
+ instr_out(data, hw_offset, i++, "map %d MS4 [pitch=%d]\n", map, pitch);
}
}
if (len != i) {
@@ -982,8 +1045,8 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
}
return len;
case 0x01:
- if (i830)
- break;
+ if (!IS_9XX(devid))
+ break;
instr_out(data, hw_offset, 0, "3DSTATE_SAMPLER_STATE\n");
instr_out(data, hw_offset, 1, "mask\n");
len = (data[0] & 0x0000003f) + 2;
@@ -1034,30 +1097,61 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
format,
(data[1] & (1 << 31)) ? "en" : "dis");
return len;
+
+ case 0x8e:
+ {
+ const char *name, *tiling;
+
+ len = (data[0] & 0x0000000f) + 2;
+ if (len != 3)
+ fprintf(out, "Bad count in 3DSTATE_BUFFER_INFO\n");
+ if (count < 3)
+ BUFFER_FAIL(count, len, "3DSTATE_BUFFER_INFO");
+
+ switch((data[1] >> 24) & 0x7) {
+ case 0x3: name = "color"; break;
+ case 0x7: name = "depth"; break;
+ default: name = "unknown"; break;
+ }
+
+ tiling = "none";
+ if (data[1] & (1 << 23))
+ tiling = "fenced";
+ else if (data[1] & (1 << 22))
+ tiling = data[1] & (1 << 21) ? "Y" : "X";
+
+ instr_out(data, hw_offset, 0, "3DSTATE_BUFFER_INFO\n");
+ instr_out(data, hw_offset, 1, "%s, tiling = %s, pitch=%d\n", name, tiling, data[1]&0xffff);
+
+ instr_out(data, hw_offset, 2, "address\n");
+ return len;
+ }
}
- for (opcode = 0; opcode < Elements(opcodes_3d_1d); opcode++) {
- if (opcodes_3d_1d[opcode].i830_only && !i830)
+ for (idx = 0; idx < Elements(opcodes_3d_1d); idx++)
+ {
+ opcode_3d_1d = &opcodes_3d_1d[idx];
+ if (opcode_3d_1d->i830_only && IS_9XX(devid))
continue;
- if (((data[0] & 0x00ff0000) >> 16) == opcodes_3d_1d[opcode].opcode) {
+ if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) {
len = 1;
- instr_out(data, hw_offset, 0, "%s\n", opcodes_3d_1d[opcode].name);
- if (opcodes_3d_1d[opcode].max_len > 1) {
+ instr_out(data, hw_offset, 0, "%s\n", opcode_3d_1d->name);
+ if (opcode_3d_1d->max_len > 1) {
len = (data[0] & 0x0000ffff) + 2;
- if (len < opcodes_3d_1d[opcode].min_len ||
- len > opcodes_3d_1d[opcode].max_len)
+ if (len < opcode_3d_1d->min_len ||
+ len > opcode_3d_1d->max_len)
{
fprintf(out, "Bad count in %s\n",
- opcodes_3d_1d[opcode].name);
+ opcode_3d_1d->name);
(*failures)++;
}
}
for (i = 1; i < len; i++) {
if (i >= count)
- BUFFER_FAIL(count, len, opcodes_3d_1d[opcode].name);
+ BUFFER_FAIL(count, len, opcode_3d_1d->name);
instr_out(data, hw_offset, i, "dword %d\n", i);
}
@@ -1065,7 +1159,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
}
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", opcode);
(*failures)++;
return 1;
}
@@ -1075,8 +1169,10 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
int *failures)
{
char immediate = (data[0] & (1 << 23)) == 0;
- unsigned int len, i;
+ unsigned int len, i, ret;
char *primtype;
+ int original_s2 = saved_s2;
+ int original_s4 = saved_s4;
switch ((data[0] >> 18) & 0xf) {
case 0x0: primtype = "TRILIST"; break;
@@ -1089,7 +1185,7 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
case 0x7: primtype = "RECTLIST"; break;
case 0x8: primtype = "POINTLIST"; break;
case 0x9: primtype = "DIB"; break;
- case 0xa: primtype = "CLEAR_RECT"; break;
+ case 0xa: primtype = "CLEAR_RECT"; saved_s4 = 3 << 6; saved_s2 = ~0; break;
default: primtype = "unknown"; break;
}
@@ -1193,6 +1289,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
vertex++;
}
}
+
+ ret = len;
} else {
/* indirect vertices */
len = data[0] & 0x0000ffff; /* index count */
@@ -1210,13 +1308,15 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
if ((data[i] & 0xffff) == 0xffff) {
instr_out(data, hw_offset, i,
" indices: (terminator)\n");
- return i;
+ ret = i;
+ goto out;
} else if ((data[i] >> 16) == 0xffff) {
instr_out(data, hw_offset, i,
" indices: 0x%04x, "
"(terminator)\n",
data[i] & 0xffff);
- return i;
+ ret = i;
+ goto out;
} else {
instr_out(data, hw_offset, i,
" indices: 0x%04x, 0x%04x\n",
@@ -1226,7 +1326,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
fprintf(out,
"3DPRIMITIVE: no terminator found in index buffer\n");
(*failures)++;
- return count;
+ ret = count;
+ goto out;
} else {
/* fixed size vertex index buffer */
for (i = 0; i < len; i += 2) {
@@ -1241,7 +1342,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
}
}
}
- return (len + 1) / 2 + 1;
+ ret = (len + 1) / 2 + 1;
+ goto out;
} else {
/* sequential vertex access */
if (count < 2)
@@ -1250,17 +1352,22 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
"3DPRIMITIVE sequential indirect %s, %d starting from "
"%d\n", primtype, len, data[1] & 0xffff);
instr_out(data, hw_offset, 1, " start\n");
- return 2;
+ ret = 2;
+ goto out;
}
}
- return len;
+out:
+ saved_s2 = original_s2;
+ saved_s4 = original_s4;
+ return ret;
}
static int
-decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_3d(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures)
{
- unsigned int opcode;
+ uint32_t opcode;
+ unsigned int idx;
struct {
uint32_t opcode;
@@ -1277,41 +1384,44 @@ decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
{ 0x0d, 1, 1, "3DSTATE_MODES_4" },
{ 0x0c, 1, 1, "3DSTATE_MODES_5" },
{ 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
- };
+ }, *opcode_3d;
+
+ opcode = (data[0] & 0x1f000000) >> 24;
- switch ((data[0] & 0x1f000000) >> 24) {
+ switch (opcode) {
case 0x1f:
return decode_3d_primitive(data, count, hw_offset, failures);
case 0x1d:
- return decode_3d_1d(data, count, hw_offset, failures, 0);
+ return decode_3d_1d(data, count, hw_offset, devid, failures);
case 0x1c:
return decode_3d_1c(data, count, hw_offset, failures);
}
- for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) {
- if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+ for (idx = 0; idx < Elements(opcodes_3d); idx++) {
+ opcode_3d = &opcodes_3d[idx];
+ if (opcode == opcode_3d->opcode) {
unsigned int len = 1, i;
- instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
- if (opcodes_3d[opcode].max_len > 1) {
+ instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name);
+ if (opcode_3d->max_len > 1) {
len = (data[0] & 0xff) + 2;
- if (len < opcodes_3d[opcode].min_len ||
- len > opcodes_3d[opcode].max_len)
+ if (len < opcode_3d->min_len ||
+ len > opcode_3d->max_len)
{
- fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ fprintf(out, "Bad count in %s\n", opcode_3d->name);
}
}
for (i = 1; i < len; i++) {
if (i >= count)
- BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ BUFFER_FAIL(count, len, opcode_3d->name);
instr_out(data, hw_offset, i, "dword %d\n", i);
}
return len;
}
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode);
(*failures)++;
return 1;
}
@@ -1401,12 +1511,87 @@ get_965_prim_type(uint32_t data)
default: return "fail";
}
}
+static int
+i965_decode_urb_fence(const uint32_t *data, uint32_t hw_offset, int len, int count,
+ int *failures)
+{
+ uint32_t vs_fence, clip_fence, gs_fence, sf_fence, vfe_fence, cs_fence;
+
+ if (len != 3)
+ fprintf(out, "Bad count in URB_FENCE\n");
+ if (count < 3)
+ BUFFER_FAIL(count, len, "URB_FENCE");
+
+ vs_fence = data[1] & 0x3ff;
+ gs_fence = (data[1] >> 10) & 0x3ff;
+ clip_fence = (data[1] >> 20) & 0x3ff;
+ sf_fence = data[2] & 0x3ff;
+ vfe_fence = (data[2] >> 10) & 0x3ff;
+ cs_fence = (data[2] >> 20) & 0x7ff;
+
+ instr_out(data, hw_offset, 0, "URB_FENCE: %s%s%s%s%s%s\n",
+ (data[0] >> 13) & 1 ? "cs " : "",
+ (data[0] >> 12) & 1 ? "vfe " : "",
+ (data[0] >> 11) & 1 ? "sf " : "",
+ (data[0] >> 10) & 1 ? "clip " : "",
+ (data[0] >> 9) & 1 ? "gs " : "",
+ (data[0] >> 8) & 1 ? "vs " : "");
+ instr_out(data, hw_offset, 1,
+ "vs fence: %d, clip_fence: %d, gs_fence: %d\n",
+ vs_fence, clip_fence, gs_fence);
+ instr_out(data, hw_offset, 2,
+ "sf fence: %d, vfe_fence: %d, cs_fence: %d\n",
+ sf_fence, vfe_fence, cs_fence);
+ if (gs_fence < vs_fence)
+ fprintf(out, "gs fence < vs fence!\n");
+ if (clip_fence < gs_fence)
+ fprintf(out, "clip fence < gs fence!\n");
+ if (sf_fence < clip_fence)
+ fprintf(out, "sf fence < clip fence!\n");
+ if (cs_fence < sf_fence)
+ fprintf(out, "cs fence < sf fence!\n");
+
+ return len;
+}
+
+static void
+state_base_out(const uint32_t *data, uint32_t hw_offset, unsigned int index,
+ char *name)
+{
+ if (data[index] & 1) {
+ instr_out(data, hw_offset, index, "%s state base address 0x%08x\n",
+ name, data[index] & ~1);
+ } else {
+ instr_out(data, hw_offset, index, "%s state base not updated\n",
+ name);
+ }
+}
+
+static void
+state_max_out(const uint32_t *data, uint32_t hw_offset, unsigned int index,
+ char *name)
+{
+ if (data[index] & 1) {
+ if (data[index] == 1) {
+ instr_out(data, hw_offset, index,
+ "%s state upper bound disabled\n", name);
+ } else {
+ instr_out(data, hw_offset, index, "%s state upper bound 0x%08x\n",
+ name, data[index] & ~1);
+ }
+ } else {
+ instr_out(data, hw_offset, index, "%s state upper bound not updated\n",
+ name);
+ }
+}
static int
-decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures)
{
- unsigned int opcode, len;
- int i;
+ uint32_t opcode;
+ unsigned int idx, len;
+ int i, sba_len;
+ char *desc1 = NULL;
struct {
uint32_t opcode;
@@ -1435,51 +1620,78 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
{ 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" },
{ 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" },
{ 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
+ { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" },
{ 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
+ { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" },
+ { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" },
{ 0x7b00, 6, 6, "3DPRIMITIVE" },
- };
+ { 0x7802, 4, 4, "3DSTATE_SAMPLER_STATE_POINTERS" },
+ { 0x7805, 3, 3, "3DSTATE_URB" },
+ { 0x780e, 4, 4, "3DSTATE_CC_STATE_POINTERS" },
+ { 0x7810, 6, 6, "3DSTATE_VS_STATE" },
+ { 0x7811, 7, 7, "3DSTATE_GS_STATE" },
+ { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" },
+ { 0x7813, 20, 20, "3DSTATE_SF_STATE" },
+ { 0x7814, 9, 9, "3DSTATE_WM_STATE" },
+ { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" },
+ { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" },
+ { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" },
+ { 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" },
+ { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" },
+ }, *opcode_3d;
len = (data[0] & 0x0000ffff) + 2;
- switch ((data[0] & 0xffff0000) >> 16) {
+ opcode = (data[0] & 0xffff0000) >> 16;
+ switch (opcode) {
+ case 0x6000:
+ len = (data[0] & 0x000000ff) + 2;
+ return i965_decode_urb_fence(data, hw_offset, len, count, failures);
+ case 0x6001:
+ instr_out(data, hw_offset, 0, "CS_URB_STATE\n");
+ instr_out(data, hw_offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n",
+ (data[1] >> 4) & 0x1f,
+ (((data[1] >> 4) & 0x1f) + 1) * 64,
+ data[1] & 0x7);
+ return len;
+ case 0x6002:
+ len = (data[0] & 0x000000ff) + 2;
+ instr_out(data, hw_offset, 0, "CONSTANT_BUFFER: %s\n",
+ (data[0] >> 8) & 1 ? "valid" : "invalid");
+ instr_out(data, hw_offset, 1, "offset: 0x%08x, length: %d bytes\n",
+ data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64);
+ return len;
case 0x6101:
- if (len != 6)
+ if (IS_GEN6(devid))
+ sba_len = 10;
+ else if (IS_IRONLAKE(devid))
+ sba_len = 8;
+ else
+ sba_len = 6;
+ if (len != sba_len)
fprintf(out, "Bad count in STATE_BASE_ADDRESS\n");
- if (count < 6)
+ if (len != sba_len)
BUFFER_FAIL(count, len, "STATE_BASE_ADDRESS");
+ i = 0;
instr_out(data, hw_offset, 0,
"STATE_BASE_ADDRESS\n");
-
- if (data[1] & 1) {
- instr_out(data, hw_offset, 1, "General state at 0x%08x\n",
- data[1] & ~1);
- } else
- instr_out(data, hw_offset, 1, "General state not updated\n");
-
- if (data[2] & 1) {
- instr_out(data, hw_offset, 2, "Surface state at 0x%08x\n",
- data[2] & ~1);
- } else
- instr_out(data, hw_offset, 2, "Surface state not updated\n");
-
- if (data[3] & 1) {
- instr_out(data, hw_offset, 3, "Indirect state at 0x%08x\n",
- data[3] & ~1);
- } else
- instr_out(data, hw_offset, 3, "Indirect state not updated\n");
-
- if (data[4] & 1) {
- instr_out(data, hw_offset, 4, "General state upper bound 0x%08x\n",
- data[4] & ~1);
- } else
- instr_out(data, hw_offset, 4, "General state not updated\n");
-
- if (data[5] & 1) {
- instr_out(data, hw_offset, 5, "Indirect state upper bound 0x%08x\n",
- data[5] & ~1);
- } else
- instr_out(data, hw_offset, 5, "Indirect state not updated\n");
+ i++;
+
+ state_base_out(data, hw_offset, i++, "general");
+ state_base_out(data, hw_offset, i++, "surface");
+ if (IS_GEN6(devid))
+ state_base_out(data, hw_offset, i++, "dynamic");
+ state_base_out(data, hw_offset, i++, "indirect");
+ if (IS_IRONLAKE(devid) || IS_GEN6(devid))
+ state_base_out(data, hw_offset, i++, "instruction");
+
+ state_max_out(data, hw_offset, i++, "general");
+ if (IS_GEN6(devid))
+ state_max_out(data, hw_offset, i++, "dynamic");
+ state_max_out(data, hw_offset, i++, "indirect");
+ if (IS_IRONLAKE(devid) || IS_GEN6(devid))
+ state_max_out(data, hw_offset, i++, "instruction");
return len;
case 0x7800:
@@ -1498,18 +1710,33 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
instr_out(data, hw_offset, 6, "CC state\n");
return len;
case 0x7801:
- if (len != 6)
+ len = (data[0] & 0x000000ff) + 2;
+ if (len != 6 && len != 4)
fprintf(out, "Bad count in 3DSTATE_BINDING_TABLE_POINTERS\n");
- if (count < 6)
- BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS");
+ if (len == 6) {
+ if (count < 6)
+ BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS");
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_BINDING_TABLE_POINTERS\n");
+ instr_out(data, hw_offset, 1, "VS binding table\n");
+ instr_out(data, hw_offset, 2, "GS binding table\n");
+ instr_out(data, hw_offset, 3, "Clip binding table\n");
+ instr_out(data, hw_offset, 4, "SF binding table\n");
+ instr_out(data, hw_offset, 5, "WM binding table\n");
+ } else {
+ if (count < 4)
+ BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS");
- instr_out(data, hw_offset, 0,
- "3DSTATE_BINDING_TABLE_POINTERS\n");
- instr_out(data, hw_offset, 1, "VS binding table\n");
- instr_out(data, hw_offset, 2, "GS binding table\n");
- instr_out(data, hw_offset, 3, "Clip binding table\n");
- instr_out(data, hw_offset, 4, "SF binding table\n");
- instr_out(data, hw_offset, 5, "WM binding table\n");
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_BINDING_TABLE_POINTERS: VS mod %d, "
+ "GS mod %d, PS mod %d\n",
+ (data[0] & (1 << 8)) != 0,
+ (data[0] & (1 << 9)) != 0,
+ (data[0] & (1 << 10)) != 0);
+ instr_out(data, hw_offset, 1, "VS binding table\n");
+ instr_out(data, hw_offset, 2, "GS binding table\n");
+ instr_out(data, hw_offset, 3, "WM binding table\n");
+ }
return len;
@@ -1560,6 +1787,18 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
}
return len;
+ case 0x780d:
+ len = (data[0] & 0xff) + 2;
+ if (len != 4)
+ fprintf(out, "Bad count in 3DSTATE_VIEWPORT_STATE_POINTERS\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DSTATE_VIEWPORT_STATE_POINTERS");
+ instr_out(data, hw_offset, 0, "3DSTATE_VIEWPORT_STATE_POINTERS\n");
+ instr_out(data, hw_offset, 1, "clip\n");
+ instr_out(data, hw_offset, 2, "sf\n");
+ instr_out(data, hw_offset, 3, "cc\n");
+ return len;
+
case 0x780a:
len = (data[0] & 0xff) + 2;
if (len != 3)
@@ -1592,7 +1831,7 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
return len;
case 0x7905:
- if (len != 5 && len != 6)
+ if (len < 5 || len > 7)
fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n");
if (count < len)
BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER");
@@ -1609,9 +1848,36 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
((data[3] & 0x0007ffc0) >> 6) + 1,
((data[3] & 0xfff80000) >> 19) + 1);
instr_out(data, hw_offset, 4, "volume depth\n");
- if (len == 6)
+ if (len >= 6)
instr_out(data, hw_offset, 5, "\n");
+ if (len >= 7)
+ instr_out(data, hw_offset, 6, "render target view extent\n");
+
+ return len;
+ case 0x7a00:
+ len = (data[0] & 0xff) + 2;
+ if (len != 4)
+ fprintf(out, "Bad count in PIPE_CONTROL\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "PIPE_CONTROL");
+
+ switch ((data[0] >> 14) & 0x3) {
+ case 0: desc1 = "no write"; break;
+ case 1: desc1 = "qword write"; break;
+ case 2: desc1 = "PS_DEPTH_COUNT write"; break;
+ case 3: desc1 = "TIMESTAMP write"; break;
+ }
+ instr_out(data, hw_offset, 0,
+ "PIPE_CONTROL: %s, %sdepth stall, %sRC write flush, "
+ "%sinst flush\n",
+ desc1,
+ data[0] & (1 << 13) ? "" : "no ",
+ data[0] & (1 << 12) ? "" : "no ",
+ data[0] & (1 << 11) ? "" : "no ");
+ instr_out(data, hw_offset, 1, "destination address\n");
+ instr_out(data, hw_offset, 2, "immediate dword low\n");
+ instr_out(data, hw_offset, 3, "immediate dword high\n");
return len;
case 0x7b00:
@@ -1633,39 +1899,41 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
return len;
}
- for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) {
- if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) {
+ for (idx = 0; idx < Elements(opcodes_3d); idx++) {
+ opcode_3d = &opcodes_3d[idx];
+ if ((data[0] & 0xffff0000) >> 16 == opcode_3d->opcode) {
unsigned int i;
len = 1;
- instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
- if (opcodes_3d[opcode].max_len > 1) {
+ instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name);
+ if (opcode_3d->max_len > 1) {
len = (data[0] & 0xff) + 2;
- if (len < opcodes_3d[opcode].min_len ||
- len > opcodes_3d[opcode].max_len)
+ if (len < opcode_3d->min_len ||
+ len > opcode_3d->max_len)
{
- fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ fprintf(out, "Bad count in %s\n", opcode_3d->name);
}
}
for (i = 1; i < len; i++) {
if (i >= count)
- BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ BUFFER_FAIL(count, len, opcode_3d->name);
instr_out(data, hw_offset, i, "dword %d\n", i);
}
return len;
}
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_965 opcode = 0x%x\n", opcode);
(*failures)++;
return 1;
}
static int
-decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures)
{
- unsigned int opcode;
+ unsigned int idx;
+ uint32_t opcode;
struct {
uint32_t opcode;
@@ -1689,41 +1957,44 @@ decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failure
{ 0x0f, 1, 1, "3DSTATE_MODES_2" },
{ 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
{ 0x16, 1, 1, "3DSTATE_MODES_4" },
- };
+ }, *opcode_3d;
+
+ opcode = (data[0] & 0x1f000000) >> 24;
- switch ((data[0] & 0x1f000000) >> 24) {
+ switch (opcode) {
case 0x1f:
return decode_3d_primitive(data, count, hw_offset, failures);
case 0x1d:
- return decode_3d_1d(data, count, hw_offset, failures, 1);
+ return decode_3d_1d(data, count, hw_offset, devid, failures);
case 0x1c:
return decode_3d_1c(data, count, hw_offset, failures);
}
- for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) {
- if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+ for (idx = 0; idx < Elements(opcodes_3d); idx++) {
+ opcode_3d = &opcodes_3d[idx];
+ if ((data[0] & 0x1f000000) >> 24 == opcode_3d->opcode) {
unsigned int len = 1, i;
- instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
- if (opcodes_3d[opcode].max_len > 1) {
+ instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name);
+ if (opcode_3d->max_len > 1) {
len = (data[0] & 0xff) + 2;
- if (len < opcodes_3d[opcode].min_len ||
- len > opcodes_3d[opcode].max_len)
+ if (len < opcode_3d->min_len ||
+ len > opcode_3d->max_len)
{
- fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ fprintf(out, "Bad count in %s\n", opcode_3d->name);
}
}
for (i = 1; i < len; i++) {
if (i >= count)
- BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ BUFFER_FAIL(count, len, opcode_3d->name);
instr_out(data, hw_offset, i, "dword %d\n", i);
}
return len;
}
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_i830 opcode = 0x%x\n", opcode);
(*failures)++;
return 1;
}
@@ -1736,8 +2007,12 @@ decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failure
* \param hw_offset hardware address for the buffer
*/
int
-intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid)
+intel_decode(const uint32_t *data, int count,
+ uint32_t hw_offset,
+ uint32_t devid,
+ uint32_t ignore_end_of_batchbuffer)
{
+ int ret;
int index = 0;
int failures = 0;
@@ -1746,8 +2021,23 @@ intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid
while (index < count) {
switch ((data[index] & 0xe0000000) >> 29) {
case 0x0:
- index += decode_mi(data + index, count - index,
+ ret = decode_mi(data + index, count - index,
hw_offset + index * 4, &failures);
+
+ /* If MI_BATCHBUFFER_END happened, then dump the rest of the
+ * output in case we some day want it in debugging, but don't
+ * decode it since it'll just confuse in the common case.
+ */
+ if (ret == -1) {
+ if (ignore_end_of_batchbuffer) {
+ index++;
+ } else {
+ for (index = index + 1; index < count; index++) {
+ instr_out(data, hw_offset, index, "\n");
+ }
+ }
+ } else
+ index += ret;
break;
case 0x2:
index += decode_2d(data + index, count - index,
@@ -1756,13 +2046,16 @@ intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid
case 0x3:
if (IS_965(devid)) {
index += decode_3d_965(data + index, count - index,
- hw_offset + index * 4, &failures);
+ hw_offset + index * 4,
+ devid, &failures);
} else if (IS_9XX(devid)) {
index += decode_3d(data + index, count - index,
- hw_offset + index * 4, &failures);
+ hw_offset + index * 4,
+ devid, &failures);
} else {
index += decode_3d_i830(data + index, count - index,
- hw_offset + index * 4, &failures);
+ hw_offset + index * 4,
+ devid, &failures);
}
break;
default:
diff --git a/src/gallium/drivers/i965/intel_decode.h b/src/gallium/drivers/i965/intel_decode.h
index 7683097b86..7e7c108c0c 100644
--- a/src/gallium/drivers/i965/intel_decode.h
+++ b/src/gallium/drivers/i965/intel_decode.h
@@ -25,5 +25,7 @@
*
*/
-int intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid);
+#include "pipe/p_compiler.h"
+
+int intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, uint32_t ignore_end_of_batchbuffer);
void intel_decode_context_reset(void);
diff --git a/src/gallium/drivers/i965/intel_structs.h b/src/gallium/drivers/i965/intel_structs.h
index 522e3bd92c..ec6eec8910 100644
--- a/src/gallium/drivers/i965/intel_structs.h
+++ b/src/gallium/drivers/i965/intel_structs.h
@@ -1,6 +1,8 @@
#ifndef INTEL_STRUCTS_H
#define INTEL_STRUCTS_H
+#include "brw_types.h"
+
struct br0 {
GLuint length:8;
GLuint pad0:3;
diff --git a/src/gallium/drivers/identity/SConscript b/src/gallium/drivers/identity/SConscript
index b364e0acc8..d24d1ec7c6 100644
--- a/src/gallium/drivers/identity/SConscript
+++ b/src/gallium/drivers/identity/SConscript
@@ -10,4 +10,6 @@ identity = env.ConvenienceLibrary(
'id_screen.c',
])
+env.Alias('identity', identity)
+
Export('identity')
diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c
index de83c24905..3efbd6a246 100644
--- a/src/gallium/drivers/identity/id_context.c
+++ b/src/gallium/drivers/identity/id_context.c
@@ -577,17 +577,13 @@ identity_set_index_buffer(struct pipe_context *_pipe,
static void
identity_resource_copy_region(struct pipe_context *_pipe,
struct pipe_resource *_dst,
- struct pipe_subresource subdst,
+ unsigned dst_level,
unsigned dstx,
unsigned dsty,
unsigned dstz,
struct pipe_resource *_src,
- struct pipe_subresource subsrc,
- unsigned srcx,
- unsigned srcy,
- unsigned srcz,
- unsigned width,
- unsigned height)
+ unsigned src_level,
+ const struct pipe_box *src_box)
{
struct identity_context *id_pipe = identity_context(_pipe);
struct identity_resource *id_resource_dst = identity_resource(_dst);
@@ -598,17 +594,13 @@ identity_resource_copy_region(struct pipe_context *_pipe,
pipe->resource_copy_region(pipe,
dst,
- subdst,
+ dst_level,
dstx,
dsty,
dstz,
src,
- subsrc,
- srcx,
- srcy,
- srcz,
- width,
- height);
+ src_level,
+ src_box);
}
static void
@@ -690,8 +682,8 @@ identity_flush(struct pipe_context *_pipe,
static unsigned int
identity_is_resource_referenced(struct pipe_context *_pipe,
struct pipe_resource *_resource,
- unsigned face,
- unsigned level)
+ unsigned level,
+ int layer)
{
struct identity_context *id_pipe = identity_context(_pipe);
struct identity_resource *id_resource = identity_resource(_resource);
@@ -700,8 +692,8 @@ identity_is_resource_referenced(struct pipe_context *_pipe,
return pipe->is_resource_referenced(pipe,
resource,
- face,
- level);
+ level,
+ layer);
}
static struct pipe_sampler_view *
@@ -732,10 +724,38 @@ identity_context_sampler_view_destroy(struct pipe_context *_pipe,
identity_sampler_view(_view));
}
+static struct pipe_surface *
+identity_context_create_surface(struct pipe_context *_pipe,
+ struct pipe_resource *_resource,
+ const struct pipe_surface *templ)
+{
+ struct identity_context *id_context = identity_context(_pipe);
+ struct identity_resource *id_resource = identity_resource(_resource);
+ struct pipe_context *pipe = id_context->pipe;
+ struct pipe_resource *resource = id_resource->resource;
+ struct pipe_surface *result;
+
+ result = pipe->create_surface(pipe,
+ resource,
+ templ);
+
+ if (result)
+ return identity_surface_create(id_context, id_resource, result);
+ return NULL;
+}
+
+static void
+identity_context_surface_destroy(struct pipe_context *_pipe,
+ struct pipe_surface *_surf)
+{
+ identity_surface_destroy(identity_context(_pipe),
+ identity_surface(_surf));
+}
+
static struct pipe_transfer *
identity_context_get_transfer(struct pipe_context *_context,
struct pipe_resource *_resource,
- struct pipe_subresource sr,
+ unsigned level,
unsigned usage,
const struct pipe_box *box)
{
@@ -747,7 +767,7 @@ identity_context_get_transfer(struct pipe_context *_context,
result = context->get_transfer(context,
resource,
- sr,
+ level,
usage,
box);
@@ -812,12 +832,12 @@ identity_context_transfer_unmap(struct pipe_context *_context,
static void
identity_context_transfer_inline_write(struct pipe_context *_context,
struct pipe_resource *_resource,
- struct pipe_subresource sr,
+ unsigned level,
unsigned usage,
const struct pipe_box *box,
const void *data,
unsigned stride,
- unsigned slice_stride)
+ unsigned layer_stride)
{
struct identity_context *id_context = identity_context(_context);
struct identity_resource *id_resource = identity_resource(_resource);
@@ -826,12 +846,12 @@ identity_context_transfer_inline_write(struct pipe_context *_context,
context->transfer_inline_write(context,
resource,
- sr,
+ level,
usage,
box,
data,
stride,
- slice_stride);
+ layer_stride);
}
@@ -899,6 +919,8 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
id_pipe->base.clear_depth_stencil = identity_clear_depth_stencil;
id_pipe->base.flush = identity_flush;
id_pipe->base.is_resource_referenced = identity_is_resource_referenced;
+ id_pipe->base.create_surface = identity_context_create_surface;
+ id_pipe->base.surface_destroy = identity_context_surface_destroy;
id_pipe->base.create_sampler_view = identity_context_create_sampler_view;
id_pipe->base.sampler_view_destroy = identity_context_sampler_view_destroy;
id_pipe->base.get_transfer = identity_context_get_transfer;
diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c
index 593928f399..6345441052 100644
--- a/src/gallium/drivers/identity/id_objects.c
+++ b/src/gallium/drivers/identity/id_objects.c
@@ -71,7 +71,8 @@ identity_resource_destroy(struct identity_resource *id_resource)
struct pipe_surface *
-identity_surface_create(struct identity_resource *id_resource,
+identity_surface_create(struct identity_context *id_context,
+ struct identity_resource *id_resource,
struct pipe_surface *surface)
{
struct identity_surface *id_surface;
@@ -100,10 +101,12 @@ error:
}
void
-identity_surface_destroy(struct identity_surface *id_surface)
+identity_surface_destroy(struct identity_context *id_context,
+ struct identity_surface *id_surface)
{
pipe_resource_reference(&id_surface->base.texture, NULL);
- pipe_surface_reference(&id_surface->surface, NULL);
+ id_context->pipe->surface_destroy(id_context->pipe,
+ id_surface->surface);
FREE(id_surface);
}
diff --git a/src/gallium/drivers/identity/id_objects.h b/src/gallium/drivers/identity/id_objects.h
index e8deabf4fc..181f2d6623 100644
--- a/src/gallium/drivers/identity/id_objects.h
+++ b/src/gallium/drivers/identity/id_objects.h
@@ -147,11 +147,13 @@ void
identity_resource_destroy(struct identity_resource *id_resource);
struct pipe_surface *
-identity_surface_create(struct identity_resource *id_resource,
+identity_surface_create(struct identity_context *id_context,
+ struct identity_resource *id_resource,
struct pipe_surface *surface);
void
-identity_surface_destroy(struct identity_surface *id_surface);
+identity_surface_destroy(struct identity_context *id_context,
+ struct identity_surface *id_surface);
struct pipe_sampler_view *
identity_sampler_view_create(struct identity_context *id_context,
diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c
index 5fb464b414..644481bb74 100644
--- a/src/gallium/drivers/identity/id_screen.c
+++ b/src/gallium/drivers/identity/id_screen.c
@@ -189,39 +189,6 @@ identity_screen_resource_destroy(struct pipe_screen *screen,
identity_resource_destroy(identity_resource(_resource));
}
-static struct pipe_surface *
-identity_screen_get_tex_surface(struct pipe_screen *_screen,
- struct pipe_resource *_resource,
- unsigned face,
- unsigned level,
- unsigned zslice,
- unsigned usage)
-{
- struct identity_screen *id_screen = identity_screen(_screen);
- struct identity_resource *id_resource = identity_resource(_resource);
- struct pipe_screen *screen = id_screen->screen;
- struct pipe_resource *resource = id_resource->resource;
- struct pipe_surface *result;
-
- result = screen->get_tex_surface(screen,
- resource,
- face,
- level,
- zslice,
- usage);
-
- if (result)
- return identity_surface_create(id_resource, result);
- return NULL;
-}
-
-static void
-identity_screen_tex_surface_destroy(struct pipe_surface *_surface)
-{
- identity_surface_destroy(identity_surface(_surface));
-}
-
-
static struct pipe_resource *
identity_screen_user_buffer_create(struct pipe_screen *_screen,
@@ -247,16 +214,18 @@ identity_screen_user_buffer_create(struct pipe_screen *_screen,
static void
identity_screen_flush_frontbuffer(struct pipe_screen *_screen,
- struct pipe_surface *_surface,
+ struct pipe_resource *_resource,
+ unsigned level, unsigned layer,
void *context_private)
{
struct identity_screen *id_screen = identity_screen(_screen);
- struct identity_surface *id_surface = identity_surface(_surface);
+ struct identity_resource *id_resource = identity_resource(_resource);
struct pipe_screen *screen = id_screen->screen;
- struct pipe_surface *surface = id_surface->surface;
+ struct pipe_resource *resource = id_resource->resource;
screen->flush_frontbuffer(screen,
- surface,
+ resource,
+ level, layer,
context_private);
}
@@ -323,8 +292,6 @@ identity_screen_create(struct pipe_screen *screen)
id_screen->base.resource_from_handle = identity_screen_resource_from_handle;
id_screen->base.resource_get_handle = identity_screen_resource_get_handle;
id_screen->base.resource_destroy = identity_screen_resource_destroy;
- id_screen->base.get_tex_surface = identity_screen_get_tex_surface;
- id_screen->base.tex_surface_destroy = identity_screen_tex_surface_destroy;
id_screen->base.user_buffer_create = identity_screen_user_buffer_create;
id_screen->base.flush_frontbuffer = identity_screen_flush_frontbuffer;
id_screen->base.fence_reference = identity_screen_fence_reference;
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 669e42e300..4068bed393 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -3,8 +3,6 @@ include $(TOP)/configs/current
LIBNAME = llvmpipe
-DEFINES += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS
-
C_SOURCES = \
lp_bld_alpha.c \
lp_bld_blend_aos.c \
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
index e50643790c..518969c320 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
@@ -43,7 +43,7 @@
void
-lp_build_alpha_test(LLVMBuilderRef builder,
+lp_build_alpha_test(struct gallivm_state *gallivm,
unsigned func,
struct lp_type type,
struct lp_build_mask_context *mask,
@@ -54,7 +54,7 @@ lp_build_alpha_test(LLVMBuilderRef builder,
struct lp_build_context bld;
LLVMValueRef test;
- lp_build_context_init(&bld, builder, type);
+ lp_build_context_init(&bld, gallivm, type);
test = lp_build_cmp(&bld, func, alpha, ref);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
index 27ca8aad4d..06206a24d8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
@@ -34,16 +34,18 @@
#ifndef LP_BLD_ALPHA_H
#define LP_BLD_ALPHA_H
+#include "pipe/p_compiler.h"
#include "gallivm/lp_bld.h"
struct pipe_alpha_state;
+struct gallivm_state;
struct lp_type;
struct lp_build_mask_context;
void
-lp_build_alpha_test(LLVMBuilderRef builder,
+lp_build_alpha_test(struct gallivm_state *gallivm,
unsigned func,
struct lp_type type,
struct lp_build_mask_context *mask,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
index 5cecec3d7f..f82ae30bb7 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
@@ -30,6 +30,7 @@
#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_init.h"
#include "pipe/p_format.h"
@@ -61,7 +62,7 @@ lp_build_blend_func(struct lp_build_context *bld,
LLVMValueRef
-lp_build_blend_aos(LLVMBuilderRef builder,
+lp_build_blend_aos(struct gallivm_state *gallivm,
const struct pipe_blend_state *blend,
struct lp_type type,
unsigned rt,
@@ -72,7 +73,7 @@ lp_build_blend_aos(LLVMBuilderRef builder,
void
-lp_build_blend_soa(LLVMBuilderRef builder,
+lp_build_blend_soa(struct gallivm_state *gallivm,
const struct pipe_blend_state *blend,
struct lp_type type,
unsigned rt,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index d1c9b88f9b..c342346a36 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -301,7 +301,7 @@ lp_build_blend_func(struct lp_build_context *bld,
LLVMValueRef
-lp_build_blend_aos(LLVMBuilderRef builder,
+lp_build_blend_aos(struct gallivm_state *gallivm,
const struct pipe_blend_state *blend,
struct lp_type type,
unsigned rt,
@@ -322,7 +322,7 @@ lp_build_blend_aos(LLVMBuilderRef builder,
/* Setup build context */
memset(&bld, 0, sizeof bld);
- lp_build_context_init(&bld.base, builder, type);
+ lp_build_context_init(&bld.base, gallivm, type);
bld.src = src;
bld.dst = dst;
bld.const_ = const_;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
index 30d261e979..4d5bc9642d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
@@ -73,6 +73,7 @@
#include "gallivm/lp_bld_type.h"
#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_init.h"
#include "lp_bld_blend.h"
@@ -211,7 +212,7 @@ lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor)
* \param res the result/output
*/
void
-lp_build_blend_soa(LLVMBuilderRef builder,
+lp_build_blend_soa(struct gallivm_state *gallivm,
const struct pipe_blend_state *blend,
struct lp_type type,
unsigned rt,
@@ -220,6 +221,7 @@ lp_build_blend_soa(LLVMBuilderRef builder,
LLVMValueRef con[4],
LLVMValueRef res[4])
{
+ LLVMBuilderRef builder = gallivm->builder;
struct lp_build_blend_soa_context bld;
unsigned i, j, k;
@@ -227,7 +229,7 @@ lp_build_blend_soa(LLVMBuilderRef builder,
/* Setup build context */
memset(&bld, 0, sizeof bld);
- lp_build_context_init(&bld.base, builder, type);
+ lp_build_context_init(&bld.base, gallivm, type);
for (i = 0; i < 4; ++i) {
bld.src[i] = src[i];
bld.dst[i] = dst[i];
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 7eb76d4fb3..1bf741194c 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -97,6 +97,7 @@ lp_build_stencil_test_single(struct lp_build_context *bld,
LLVMValueRef stencilRef,
LLVMValueRef stencilVals)
{
+ LLVMBuilderRef builder = bld->gallivm->builder;
const unsigned stencilMax = 255; /* XXX fix */
struct lp_type type = bld->type;
LLVMValueRef res;
@@ -107,10 +108,10 @@ lp_build_stencil_test_single(struct lp_build_context *bld,
if (stencil->valuemask != stencilMax) {
/* compute stencilRef = stencilRef & valuemask */
- LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask);
- stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, "");
+ LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask);
+ stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, "");
/* compute stencilVals = stencilVals & valuemask */
- stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, "");
+ stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, "");
}
res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals);
@@ -167,9 +168,10 @@ lp_build_stencil_op_single(struct lp_build_context *bld,
LLVMValueRef stencilVals)
{
+ LLVMBuilderRef builder = bld->gallivm->builder;
struct lp_type type = bld->type;
LLVMValueRef res;
- LLVMValueRef max = lp_build_const_int_vec(type, 0xff);
+ LLVMValueRef max = lp_build_const_int_vec(bld->gallivm, type, 0xff);
unsigned stencil_op;
assert(type.sign);
@@ -210,15 +212,15 @@ lp_build_stencil_op_single(struct lp_build_context *bld,
break;
case PIPE_STENCIL_OP_INCR_WRAP:
res = lp_build_add(bld, stencilVals, bld->one);
- res = LLVMBuildAnd(bld->builder, res, max, "");
+ res = LLVMBuildAnd(builder, res, max, "");
break;
case PIPE_STENCIL_OP_DECR_WRAP:
res = lp_build_sub(bld, stencilVals, bld->one);
- res = LLVMBuildAnd(bld->builder, res, max, "");
+ res = LLVMBuildAnd(builder, res, max, "");
break;
case PIPE_STENCIL_OP_INVERT:
- res = LLVMBuildNot(bld->builder, stencilVals, "");
- res = LLVMBuildAnd(bld->builder, res, max, "");
+ res = LLVMBuildNot(builder, stencilVals, "");
+ res = LLVMBuildAnd(builder, res, max, "");
break;
default:
assert(0 && "bad stencil op mode");
@@ -242,6 +244,7 @@ lp_build_stencil_op(struct lp_build_context *bld,
LLVMValueRef front_facing)
{
+ LLVMBuilderRef builder = bld->gallivm->builder;
LLVMValueRef res;
assert(stencil[0].enabled);
@@ -262,10 +265,11 @@ lp_build_stencil_op(struct lp_build_context *bld,
if (stencil->writemask != 0xff) {
/* mask &= stencil->writemask */
- LLVMValueRef writemask = lp_build_const_int_vec(bld->type, stencil->writemask);
- mask = LLVMBuildAnd(bld->builder, mask, writemask, "");
+ LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
+ stencil->writemask);
+ mask = LLVMBuildAnd(builder, mask, writemask, "");
/* res = (res & mask) | (stencilVals & ~mask) */
- res = lp_build_select_bitwise(bld, writemask, res, stencilVals);
+ res = lp_build_select_bitwise(bld, mask, res, stencilVals);
}
else {
/* res = mask ? res : stencilVals */
@@ -411,25 +415,27 @@ get_s_shift_and_mask(const struct util_format_description *format_desc,
* \param counter is a pointer of the uint32 counter.
*/
void
-lp_build_occlusion_count(LLVMBuilderRef builder,
+lp_build_occlusion_count(struct gallivm_state *gallivm,
struct lp_type type,
LLVMValueRef maskvalue,
LLVMValueRef counter)
{
- LLVMValueRef countmask = lp_build_const_int_vec(type, 1);
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMContextRef context = gallivm->context;
+ LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1);
LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
- LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8Type(), 16);
+ LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8TypeInContext(context), 16);
LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti");
LLVMValueRef maskarray[4] = {
- LLVMConstInt(LLVMInt32Type(), 0, 0),
- LLVMConstInt(LLVMInt32Type(), 4, 0),
- LLVMConstInt(LLVMInt32Type(), 8, 0),
- LLVMConstInt(LLVMInt32Type(), 12, 0),
+ lp_build_const_int32(gallivm, 0),
+ lp_build_const_int32(gallivm, 4),
+ lp_build_const_int32(gallivm, 8),
+ lp_build_const_int32(gallivm, 12)
};
LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4);
LLVMValueRef shufflev = LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev");
- LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32Type(), "shuffle");
- LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32Type(), shuffle);
+ LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32TypeInContext(context), "shuffle");
+ LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32TypeInContext(context), shuffle);
LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig");
LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr");
LLVMBuildStore(builder, incr, counter);
@@ -452,7 +458,7 @@ lp_build_occlusion_count(LLVMBuilderRef builder,
* \param facing contains boolean value indicating front/back facing polygon
*/
void
-lp_build_depth_stencil_test(LLVMBuilderRef builder,
+lp_build_depth_stencil_test(struct gallivm_state *gallivm,
const struct pipe_depth_state *depth,
const struct pipe_stencil_state stencil[2],
struct lp_type z_src_type,
@@ -465,6 +471,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
LLVMValueRef *zs_value,
boolean do_branch)
{
+ LLVMBuilderRef builder = gallivm->builder;
struct lp_type z_type;
struct lp_build_context z_bld;
struct lp_build_context s_bld;
@@ -537,11 +544,11 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
/* Setup build context for Z vals */
- lp_build_context_init(&z_bld, builder, z_type);
+ lp_build_context_init(&z_bld, gallivm, z_type);
/* Setup build context for stencil vals */
s_type = lp_type_int_vec(z_type.width);
- lp_build_context_init(&s_bld, builder, s_type);
+ lp_build_context_init(&s_bld, gallivm, s_type);
/* Load current z/stencil value from z/stencil buffer */
zs_dst_ptr = LLVMBuildBitCast(builder,
@@ -559,14 +566,14 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) {
if (z_mask != 0xffffffff) {
- z_bitmask = lp_build_const_int_vec(z_type, z_mask);
+ z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask);
}
/*
* Align the framebuffer Z 's LSB to the right.
*/
if (z_shift) {
- LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift);
+ LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst");
} else if (z_bitmask) {
/* TODO: Instead of loading a mask from memory and ANDing, it's
@@ -580,7 +587,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
if (s_shift) {
- LLVMValueRef shift = lp_build_const_int_vec(s_type, s_shift);
+ LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift);
stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
stencil_shift = shift; /* used below */
}
@@ -589,7 +596,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
}
if (s_mask != 0xffffffff) {
- LLVMValueRef mask = lp_build_const_int_vec(s_type, s_mask);
+ LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask);
stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
}
@@ -600,12 +607,13 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
if (stencil[0].enabled) {
if (face) {
- LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
/* front_facing = face != 0 ? ~0 : 0 */
front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
front_facing = LLVMBuildSExt(builder, front_facing,
- LLVMIntType(s_bld.type.length*s_bld.type.width),
+ LLVMIntTypeInContext(gallivm->context,
+ s_bld.type.length*s_bld.type.width),
"");
front_facing = LLVMBuildBitCast(builder, front_facing,
s_bld.int_vec_type, "");
@@ -642,7 +650,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
*/
if (!z_type.floating) {
- z_src = lp_build_clamped_float_to_unsigned_norm(builder,
+ z_src = lp_build_clamped_float_to_unsigned_norm(gallivm,
z_src_type,
z_width,
z_src);
@@ -657,7 +665,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
assert(z_src_type.norm);
assert(!z_type.floating);
if (z_src_type.width > z_width) {
- LLVMValueRef shift = lp_build_const_int_vec(z_src_type,
+ LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type,
z_src_type.width - z_width);
z_src = LLVMBuildLShr(builder, z_src, shift, "");
}
@@ -710,7 +718,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
z_fail_mask, front_facing);
/* apply Z-pass operator */
- z_pass_mask = LLVMBuildAnd(z_bld.builder, orig_mask, z_pass, "");
+ z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
stencil_refs, stencil_vals,
z_pass_mask, front_facing);
@@ -720,7 +728,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
/* No depth test: apply Z-pass operator to stencil buffer values which
* passed the stencil test.
*/
- s_pass_mask = LLVMBuildAnd(s_bld.builder, orig_mask, s_pass_mask, "");
+ s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, "");
stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
stencil_refs, stencil_vals,
s_pass_mask, front_facing);
@@ -728,11 +736,11 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
/* Put Z and ztencil bits in the right place */
if (z_dst && z_shift) {
- LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift);
+ LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
z_dst = LLVMBuildShl(builder, z_dst, shift, "");
}
if (stencil_vals && stencil_shift)
- stencil_vals = LLVMBuildShl(s_bld.builder, stencil_vals,
+ stencil_vals = LLVMBuildShl(builder, stencil_vals,
stencil_shift, "");
/* Finally, merge/store the z/stencil values */
@@ -740,7 +748,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder,
(stencil[0].enabled && stencil[0].writemask)) {
if (z_dst && stencil_vals)
- zs_dst = LLVMBuildOr(z_bld.builder, z_dst, stencil_vals, "");
+ zs_dst = LLVMBuildOr(builder, z_dst, stencil_vals, "");
else if (z_dst)
zs_dst = z_dst;
else
@@ -775,7 +783,7 @@ lp_build_depth_write(LLVMBuilderRef builder,
void
-lp_build_deferred_depth_write(LLVMBuilderRef builder,
+lp_build_deferred_depth_write(struct gallivm_state *gallivm,
struct lp_type z_src_type,
const struct util_format_description *format_desc,
struct lp_build_mask_context *mask,
@@ -785,11 +793,12 @@ lp_build_deferred_depth_write(LLVMBuilderRef builder,
struct lp_type z_type;
struct lp_build_context z_bld;
LLVMValueRef z_dst;
+ LLVMBuilderRef builder = gallivm->builder;
/* XXX: pointlessly redo type logic:
*/
z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
- lp_build_context_init(&z_bld, builder, z_type);
+ lp_build_context_init(&z_bld, gallivm, z_type);
zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
LLVMPointerType(z_bld.vec_type, 0), "");
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
index a54ef3a711..e01fc46ec1 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -36,10 +36,14 @@
#define LP_BLD_DEPTH_H
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+
#include "gallivm/lp_bld.h"
struct pipe_depth_state;
+struct gallivm_state;
struct util_format_description;
struct lp_type;
struct lp_build_mask_context;
@@ -51,7 +55,7 @@ lp_depth_type(const struct util_format_description *format_desc,
void
-lp_build_depth_stencil_test(LLVMBuilderRef builder,
+lp_build_depth_stencil_test(struct gallivm_state *gallivm,
const struct pipe_depth_state *depth,
const struct pipe_stencil_state stencil[2],
struct lp_type type,
@@ -71,7 +75,7 @@ lp_build_depth_write(LLVMBuilderRef builder,
LLVMValueRef zs_value);
void
-lp_build_deferred_depth_write(LLVMBuilderRef builder,
+lp_build_deferred_depth_write(struct gallivm_state *gallivm,
struct lp_type z_src_type,
const struct util_format_description *format_desc,
struct lp_build_mask_context *mask,
@@ -79,7 +83,7 @@ lp_build_deferred_depth_write(LLVMBuilderRef builder,
LLVMValueRef zs_value);
void
-lp_build_occlusion_count(LLVMBuilderRef builder,
+lp_build_occlusion_count(struct gallivm_state *gallivm,
struct lp_type type,
LLVMValueRef maskvalue,
LLVMValueRef counter);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index c9da8900d0..45ddf547bf 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -127,13 +127,14 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
LLVMValueRef dady_ptr)
{
struct lp_build_context *coeff_bld = &bld->coeff_bld;
- LLVMBuilderRef builder = coeff_bld->builder;
+ struct gallivm_state *gallivm = coeff_bld->gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef zero = LLVMConstNull(coeff_bld->elem_type);
LLVMValueRef one = LLVMConstReal(coeff_bld->elem_type, 1.0);
- LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
- LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
- LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
- LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
+ LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
+ LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
+ LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
+ LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
unsigned attrib;
unsigned chan;
@@ -144,7 +145,8 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
const unsigned interp = bld->interp[attrib];
for (chan = 0; chan < NUM_CHANNELS; ++chan) {
if (mask & (1 << chan)) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
+ LLVMValueRef index = lp_build_const_int32(gallivm,
+ attrib * NUM_CHANNELS + chan);
LLVMValueRef a0 = zero;
LLVMValueRef dadx = zero;
LLVMValueRef dady = zero;
@@ -231,7 +233,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
* a = {a, a, a, a}
*/
- a = lp_build_broadcast(builder, coeff_bld->vec_type, a);
+ a = lp_build_broadcast(gallivm, coeff_bld->vec_type, a);
/*
* Compute the attrib values on the upper-left corner of each quad.
@@ -273,12 +275,14 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
*/
static void
attribs_update(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
int quad_index,
int start,
int end)
{
+ LLVMBuilderRef builder = gallivm->builder;
struct lp_build_context *coeff_bld = &bld->coeff_bld;
- LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index);
+ LLVMValueRef shuffle = lp_build_const_int_vec(gallivm, coeff_bld->type, quad_index);
LLVMValueRef oow = NULL;
unsigned attrib;
unsigned chan;
@@ -308,7 +312,7 @@ attribs_update(struct lp_build_interp_soa_context *bld,
* Broadcast the attribute value for this quad into all elements
*/
- a = LLVMBuildShuffleVector(coeff_bld->builder,
+ a = LLVMBuildShuffleVector(builder,
a, coeff_bld->undef, shuffle, "");
/*
@@ -380,10 +384,11 @@ pos_init(struct lp_build_interp_soa_context *bld,
LLVMValueRef x0,
LLVMValueRef y0)
{
+ LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
struct lp_build_context *coeff_bld = &bld->coeff_bld;
- bld->x = LLVMBuildSIToFP(coeff_bld->builder, x0, coeff_bld->elem_type, "");
- bld->y = LLVMBuildSIToFP(coeff_bld->builder, y0, coeff_bld->elem_type, "");
+ bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
+ bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
}
@@ -392,6 +397,7 @@ pos_init(struct lp_build_interp_soa_context *bld,
*/
void
lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
unsigned num_inputs,
const struct lp_shader_input *inputs,
LLVMBuilderRef builder,
@@ -417,7 +423,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
/* XXX: we don't support interpolating into any other types */
assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
- lp_build_context_init(&bld->coeff_bld, builder, coeff_type);
+ lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
/* For convenience */
bld->pos = bld->attribs[0];
@@ -453,19 +459,21 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
*/
void
lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
int quad_index)
{
assert(quad_index < 4);
- attribs_update(bld, quad_index, 1, bld->num_attribs);
+ attribs_update(bld, gallivm, quad_index, 1, bld->num_attribs);
}
void
lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
int quad_index)
{
assert(quad_index < 4);
- attribs_update(bld, quad_index, 0, 1);
+ attribs_update(bld, gallivm, quad_index, 0, 1);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
index a7ebdd1bfa..b58b2dc115 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -102,6 +102,7 @@ struct lp_build_interp_soa_context
void
lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
unsigned num_inputs,
const struct lp_shader_input *inputs,
LLVMBuilderRef builder,
@@ -114,11 +115,13 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
void
lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
- int quad_index);
+ struct gallivm_state *gallivm,
+ int quad_index);
void
lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
- int quad_index);
+ struct gallivm_state *gallivm,
+ int quad_index);
#endif /* LP_BLD_INTERP_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 763432ed71..644201ddf7 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -50,6 +50,46 @@
DEBUG_GET_ONCE_BOOL_OPTION(lp_no_rast, "LP_NO_RAST", FALSE)
+/** shared by all contexts */
+unsigned llvmpipe_variant_count;
+
+
+/**
+ * This function is called by the gallivm "garbage collector" when
+ * the LLVM global data structures are freed. We must free all LLVM-related
+ * data. Specifically, all JIT'd shader variants.
+ */
+static void
+garbage_collect_callback(void *cb_data)
+{
+ struct llvmpipe_context *lp = (struct llvmpipe_context *) cb_data;
+ struct lp_fs_variant_list_item *li;
+
+ /* Free all the context's shader variants */
+ li = first_elem(&lp->fs_variants_list);
+ while (!at_end(&lp->fs_variants_list, li)) {
+ struct lp_fs_variant_list_item *next = next_elem(li);
+ llvmpipe_remove_shader_variant(lp, li->base);
+ li = next;
+ }
+
+ /* Free all the context's primitive setup variants */
+ lp_delete_setup_variants(lp);
+
+ /* release references to setup variants, shaders */
+ lp_setup_set_setup_variant(lp->setup, NULL);
+ lp_setup_set_fs_variant(lp->setup, NULL);
+ lp_setup_reset(lp->setup);
+
+ /* This type will be recreated upon demand */
+ lp->jit_context_ptr_type = NULL;
+
+ /* mark all state as dirty to ensure new shaders are jit'd, etc. */
+ lp->dirty = ~0;
+}
+
+
+
static void llvmpipe_destroy( struct pipe_context *pipe )
{
struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
@@ -57,6 +97,9 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
lp_print_counters();
+ gallivm_remove_garbage_collector_callback(garbage_collect_callback,
+ llvmpipe);
+
/* This will also destroy llvmpipe->setup:
*/
if (llvmpipe->draw)
@@ -82,7 +125,11 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
}
}
- lp_delete_setup_variants(llvmpipe);
+ for (i = 0; i < llvmpipe->num_vertex_buffers; i++) {
+ pipe_resource_reference(&llvmpipe->vertex_buffer[i].buffer, NULL);
+ }
+
+ gallivm_destroy(llvmpipe->gallivm);
align_free( llvmpipe );
}
@@ -110,8 +157,10 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
memset(llvmpipe, 0, sizeof *llvmpipe);
make_empty_list(&llvmpipe->fs_variants_list);
+
make_empty_list(&llvmpipe->setup_variants_list);
+
llvmpipe->pipe.winsys = screen->winsys;
llvmpipe->pipe.screen = screen;
llvmpipe->pipe.priv = priv;
@@ -136,10 +185,12 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
llvmpipe_init_context_resource_funcs( &llvmpipe->pipe );
llvmpipe_init_surface_functions(llvmpipe);
+ llvmpipe->gallivm = gallivm_create();
+
/*
* Create drawing context and plug our rendering stage into it.
*/
- llvmpipe->draw = draw_create(&llvmpipe->pipe);
+ llvmpipe->draw = draw_create_gallivm(&llvmpipe->pipe, llvmpipe->gallivm);
if (!llvmpipe->draw)
goto fail;
@@ -173,6 +224,9 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
lp_reset_counters();
+ gallivm_register_garbage_collector_callback(garbage_collect_callback,
+ llvmpipe);
+
return &llvmpipe->pipe;
fail:
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index a35e09e8b4..503f09d810 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -126,14 +126,27 @@ struct llvmpipe_context {
unsigned tex_timestamp;
boolean no_rast;
+ /** List of all fragment shader variants */
struct lp_fs_variant_list_item fs_variants_list;
unsigned nr_fs_variants;
+ /** JIT code generation */
+ struct gallivm_state *gallivm;
+ LLVMTypeRef jit_context_ptr_type;
+
struct lp_setup_variant_list_item setup_variants_list;
unsigned nr_setup_variants;
};
+/**
+ * Fragment and setup variant count, used to trigger garbage collection.
+ * This is global since all variants in all contexts will be free when
+ * we do garbage collection.
+ */
+extern unsigned llvmpipe_variant_count;
+
+
struct pipe_context *
llvmpipe_create_context( struct pipe_screen *screen, void *priv );
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c
index e2c723b7a8..85e3cdec82 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.c
+++ b/src/gallium/drivers/llvmpipe/lp_flush.c
@@ -56,6 +56,13 @@ llvmpipe_flush( struct pipe_context *pipe,
/* ask the setup module to flush */
lp_setup_flush(llvmpipe->setup, flags, fence, reason);
+
+ if (llvmpipe_variant_count > 1000) {
+ /* time to do a garbage collection */
+ gallivm_garbage_collect(llvmpipe->gallivm);
+ llvmpipe_variant_count = 0;
+ }
+
/* Enable to dump BMPs of the color/depth buffers each frame */
if (0) {
if (flags & PIPE_FLUSH_FRAME) {
@@ -101,8 +108,8 @@ llvmpipe_finish( struct pipe_context *pipe,
boolean
llvmpipe_flush_resource(struct pipe_context *pipe,
struct pipe_resource *resource,
- unsigned face,
unsigned level,
+ int layer,
unsigned flush_flags,
boolean read_only,
boolean cpu_access,
@@ -111,7 +118,7 @@ llvmpipe_flush_resource(struct pipe_context *pipe,
{
unsigned referenced;
- referenced = pipe->is_resource_referenced(pipe, resource, face, level);
+ referenced = pipe->is_resource_referenced(pipe, resource, level, layer);
if ((referenced & PIPE_REFERENCED_FOR_WRITE) ||
((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) {
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h
index 3626ce4a86..579d24c68a 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.h
+++ b/src/gallium/drivers/llvmpipe/lp_flush.h
@@ -47,8 +47,8 @@ llvmpipe_finish( struct pipe_context *pipe,
boolean
llvmpipe_flush_resource(struct pipe_context *pipe,
struct pipe_resource *resource,
- unsigned face,
unsigned level,
+ int layer,
unsigned flush_flags,
boolean read_only,
boolean cpu_access,
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index c540f9b362..482a902dd2 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -33,82 +33,83 @@
*/
-#include <llvm-c/Transforms/Scalar.h>
-
#include "util/u_memory.h"
#include "gallivm/lp_bld_init.h"
#include "gallivm/lp_bld_debug.h"
-#include "lp_screen.h"
-#include "gallivm/lp_bld_intr.h"
+#include "lp_context.h"
#include "lp_jit.h"
static void
-lp_jit_init_globals(struct llvmpipe_screen *screen)
+lp_jit_create_types(struct llvmpipe_context *lp)
{
+ struct gallivm_state *gallivm = lp->gallivm;
+ LLVMContextRef lc = gallivm->context;
LLVMTypeRef texture_type;
/* struct lp_jit_texture */
{
LLVMTypeRef elem_types[LP_JIT_TEXTURE_NUM_FIELDS];
- elem_types[LP_JIT_TEXTURE_WIDTH] = LLVMInt32Type();
- elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
- elem_types[LP_JIT_TEXTURE_DEPTH] = LLVMInt32Type();
- elem_types[LP_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type();
+ elem_types[LP_JIT_TEXTURE_WIDTH] =
+ elem_types[LP_JIT_TEXTURE_HEIGHT] =
+ elem_types[LP_JIT_TEXTURE_DEPTH] =
+ elem_types[LP_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32TypeInContext(lc);
elem_types[LP_JIT_TEXTURE_ROW_STRIDE] =
- LLVMArrayType(LLVMInt32Type(), LP_MAX_TEXTURE_LEVELS);
elem_types[LP_JIT_TEXTURE_IMG_STRIDE] =
- LLVMArrayType(LLVMInt32Type(), LP_MAX_TEXTURE_LEVELS);
+ LLVMArrayType(LLVMInt32TypeInContext(lc), LP_MAX_TEXTURE_LEVELS);
elem_types[LP_JIT_TEXTURE_DATA] =
- LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
+ LLVMArrayType(LLVMPointerType(LLVMInt8TypeInContext(lc), 0),
LP_MAX_TEXTURE_LEVELS);
- elem_types[LP_JIT_TEXTURE_MIN_LOD] = LLVMFloatType();
- elem_types[LP_JIT_TEXTURE_MAX_LOD] = LLVMFloatType();
- elem_types[LP_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType();
+ elem_types[LP_JIT_TEXTURE_MIN_LOD] =
+ elem_types[LP_JIT_TEXTURE_MAX_LOD] =
+ elem_types[LP_JIT_TEXTURE_LOD_BIAS] = LLVMFloatTypeInContext(lc);
elem_types[LP_JIT_TEXTURE_BORDER_COLOR] =
- LLVMArrayType(LLVMFloatType(), 4);
+ LLVMArrayType(LLVMFloatTypeInContext(lc), 4);
+
+ texture_type = LLVMStructTypeInContext(lc, elem_types,
+ Elements(elem_types), 0);
- texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
+ LLVMInvalidateStructLayout(gallivm->target, texture_type);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width,
- screen->target, texture_type,
+ gallivm->target, texture_type,
LP_JIT_TEXTURE_WIDTH);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height,
- screen->target, texture_type,
+ gallivm->target, texture_type,
LP_JIT_TEXTURE_HEIGHT);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, depth,
- screen->target, texture_type,
+ gallivm->target, texture_type,
LP_JIT_TEXTURE_DEPTH);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, last_level,
- screen->target, texture_type,
+ gallivm->target, texture_type,
LP_JIT_TEXTURE_LAST_LEVEL);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, row_stride,
- screen->target, texture_type,
+ gallivm->target, texture_type,
LP_JIT_TEXTURE_ROW_STRIDE);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, img_stride,
- screen->target, texture_type,
+ gallivm->target, texture_type,
LP_JIT_TEXTURE_IMG_STRIDE);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data,
- screen->target, texture_type,
+ gallivm->target, texture_type,
LP_JIT_TEXTURE_DATA);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, min_lod,
- screen->target, texture_type,
+ gallivm->target, texture_type,
LP_JIT_TEXTURE_MIN_LOD);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, max_lod,
- screen->target, texture_type,
+ gallivm->target, texture_type,
LP_JIT_TEXTURE_MAX_LOD);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, lod_bias,
- screen->target, texture_type,
+ gallivm->target, texture_type,
LP_JIT_TEXTURE_LOD_BIAS);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, border_color,
- screen->target, texture_type,
+ gallivm->target, texture_type,
LP_JIT_TEXTURE_BORDER_COLOR);
LP_CHECK_STRUCT_SIZE(struct lp_jit_texture,
- screen->target, texture_type);
+ gallivm->target, texture_type);
- LLVMAddTypeName(screen->module, "texture", texture_type);
+ LLVMAddTypeName(gallivm->module, "texture", texture_type);
}
/* struct lp_jit_context */
@@ -116,44 +117,47 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
LLVMTypeRef elem_types[LP_JIT_CTX_COUNT];
LLVMTypeRef context_type;
- elem_types[LP_JIT_CTX_CONSTANTS] = LLVMPointerType(LLVMFloatType(), 0);
- elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatType();
- elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] = LLVMInt32Type();
- elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32Type();
- elem_types[LP_JIT_CTX_BLEND_COLOR] = LLVMPointerType(LLVMInt8Type(), 0);
+ elem_types[LP_JIT_CTX_CONSTANTS] = LLVMPointerType(LLVMFloatTypeInContext(lc), 0);
+ elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatTypeInContext(lc);
+ elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] =
+ elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32TypeInContext(lc);
+ elem_types[LP_JIT_CTX_BLEND_COLOR] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type,
PIPE_MAX_SAMPLERS);
- context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
+ context_type = LLVMStructTypeInContext(lc, elem_types,
+ Elements(elem_types), 0);
+
+ LLVMInvalidateStructLayout(gallivm->target, context_type);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants,
- screen->target, context_type,
+ gallivm->target, context_type,
LP_JIT_CTX_CONSTANTS);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value,
- screen->target, context_type,
+ gallivm->target, context_type,
LP_JIT_CTX_ALPHA_REF);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_front,
- screen->target, context_type,
+ gallivm->target, context_type,
LP_JIT_CTX_STENCIL_REF_FRONT);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_back,
- screen->target, context_type,
+ gallivm->target, context_type,
LP_JIT_CTX_STENCIL_REF_BACK);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color,
- screen->target, context_type,
+ gallivm->target, context_type,
LP_JIT_CTX_BLEND_COLOR);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures,
- screen->target, context_type,
+ gallivm->target, context_type,
LP_JIT_CTX_TEXTURES);
LP_CHECK_STRUCT_SIZE(struct lp_jit_context,
- screen->target, context_type);
+ gallivm->target, context_type);
- LLVMAddTypeName(screen->module, "context", context_type);
+ LLVMAddTypeName(gallivm->module, "context", context_type);
- screen->context_ptr_type = LLVMPointerType(context_type, 0);
+ lp->jit_context_ptr_type = LLVMPointerType(context_type, 0);
}
if (gallivm_debug & GALLIVM_DEBUG_IR) {
- LLVMDumpModule(screen->module);
+ LLVMDumpModule(gallivm->module);
}
}
@@ -161,8 +165,7 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
void
lp_jit_screen_cleanup(struct llvmpipe_screen *screen)
{
- if(screen->pass)
- LLVMDisposePassManager(screen->pass);
+ /* nothing */
}
@@ -170,30 +173,14 @@ void
lp_jit_screen_init(struct llvmpipe_screen *screen)
{
lp_build_init();
+}
- screen->module = lp_build_module;
- screen->provider = lp_build_provider;
- screen->engine = lp_build_engine;
- screen->target = lp_build_target;
-
- screen->pass = LLVMCreateFunctionPassManager(screen->provider);
- LLVMAddTargetData(screen->target, screen->pass);
-
- if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
- /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
- * but there are more on SVN. */
- /* TODO: Add more passes */
- LLVMAddCFGSimplificationPass(screen->pass);
- LLVMAddPromoteMemoryToRegisterPass(screen->pass);
- LLVMAddConstantPropagationPass(screen->pass);
- LLVMAddInstructionCombiningPass(screen->pass);
- LLVMAddGVNPass(screen->pass);
- } else {
- /* We need at least this pass to prevent the backends to fail in
- * unexpected ways.
- */
- LLVMAddPromoteMemoryToRegisterPass(screen->pass);
- }
- lp_jit_init_globals(screen);
+LLVMTypeRef
+lp_jit_get_context_type(struct llvmpipe_context *lp)
+{
+ if (!lp->jit_context_ptr_type)
+ lp_jit_create_types(lp);
+
+ return lp->jit_context_ptr_type;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index 114f21f2d1..a6763dce17 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -120,23 +120,23 @@ enum {
};
-#define lp_jit_context_constants(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_CONSTANTS, "constants")
+#define lp_jit_context_constants(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_CONSTANTS, "constants")
-#define lp_jit_context_alpha_ref_value(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_ALPHA_REF, "alpha_ref_value")
+#define lp_jit_context_alpha_ref_value(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_ALPHA_REF, "alpha_ref_value")
-#define lp_jit_context_stencil_ref_front_value(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_STENCIL_REF_FRONT, "stencil_ref_front")
+#define lp_jit_context_stencil_ref_front_value(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_STENCIL_REF_FRONT, "stencil_ref_front")
-#define lp_jit_context_stencil_ref_back_value(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_STENCIL_REF_BACK, "stencil_ref_back")
+#define lp_jit_context_stencil_ref_back_value(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_STENCIL_REF_BACK, "stencil_ref_back")
-#define lp_jit_context_blend_color(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_BLEND_COLOR, "blend_color")
+#define lp_jit_context_blend_color(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_BLEND_COLOR, "blend_color")
-#define lp_jit_context_textures(_builder, _ptr) \
- lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CTX_TEXTURES, "textures")
+#define lp_jit_context_textures(_gallivm, _ptr) \
+ lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_CTX_TEXTURES, "textures")
@@ -162,4 +162,8 @@ void
lp_jit_screen_init(struct llvmpipe_screen *screen);
+LLVMTypeRef
+lp_jit_get_context_type(struct llvmpipe_context *lp);
+
+
#endif /* LP_JIT_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h
index b23a100b87..455adf7d6f 100644
--- a/src/gallium/drivers/llvmpipe/lp_perf.h
+++ b/src/gallium/drivers/llvmpipe/lp_perf.h
@@ -33,6 +33,7 @@
#ifndef LP_PERF_H
#define LP_PERF_H
+#include "pipe/p_compiler.h"
/**
* Various counters
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index decf3bd449..dafadc1ea9 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -47,6 +47,7 @@
#ifdef DEBUG
int jit_line = 0;
const struct lp_rast_state *jit_state = NULL;
+const struct lp_rasterizer_task *jit_task = NULL;
#endif
@@ -119,8 +120,8 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
* and update the tile's layout info.
*/
(void) llvmpipe_get_texture_tile(lpt,
- zsbuf->face + zsbuf->zslice,
- zsbuf->level,
+ zsbuf->u.tex.first_layer,
+ zsbuf->u.tex.level,
usage,
task->x,
task->y);
@@ -288,7 +289,6 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
-
/**
* Convert the color tile from tiled to linear layout.
* This is generally only done when we're flushing the scene just prior to
@@ -306,15 +306,15 @@ lp_rast_store_linear_color( struct lp_rasterizer_task *task )
for (buf = 0; buf < scene->fb.nr_cbufs; buf++) {
struct pipe_surface *cbuf = scene->fb.cbufs[buf];
- const unsigned face_slice = cbuf->face + cbuf->zslice;
- const unsigned level = cbuf->level;
+ const unsigned layer = cbuf->u.tex.first_layer;
+ const unsigned level = cbuf->u.tex.level;
struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture);
if (!task->color_tiles[buf])
continue;
llvmpipe_unswizzle_cbuf_tile(lpt,
- face_slice,
+ layer,
level,
task->x, task->y,
task->color_tiles[buf]);
@@ -362,7 +362,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
depth = lp_rast_get_depth_block_pointer(task, tile_x + x, tile_y + y);
/* run shader on 4x4 block */
- BEGIN_JIT_CALL(state);
+ BEGIN_JIT_CALL(state, task);
variant->jit_function[RAST_WHOLE]( &state->jit_context,
tile_x + x, tile_y + y,
inputs->frontfacing,
@@ -443,7 +443,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
assert(lp_check_alignment(state->jit_context.blend_color, 16));
/* run shader on 4x4 block */
- BEGIN_JIT_CALL(state);
+ BEGIN_JIT_CALL(state, task);
variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
x, y,
inputs->frontfacing,
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index b30408f097..cd686bc82c 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -45,13 +45,16 @@
*/
#ifdef DEBUG
+struct lp_rasterizer_task;
extern int jit_line;
extern const struct lp_rast_state *jit_state;
+extern const struct lp_rasterizer_task *jit_task;
-#define BEGIN_JIT_CALL(state) \
+#define BEGIN_JIT_CALL(state, task) \
do { \
jit_line = __LINE__; \
jit_state = state; \
+ jit_task = task; \
} while (0)
#define END_JIT_CALL() \
@@ -62,7 +65,7 @@ extern const struct lp_rast_state *jit_state;
#else
-#define BEGIN_JIT_CALL(X)
+#define BEGIN_JIT_CALL(X, Y)
#define END_JIT_CALL()
#endif
@@ -191,8 +194,8 @@ lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
if (usage != LP_TEX_USAGE_WRITE_ALL) {
llvmpipe_swizzle_cbuf_tile(lpt,
- cbuf->face + cbuf->zslice,
- cbuf->level,
+ cbuf->u.tex.first_layer,
+ cbuf->u.tex.level,
task->x, task->y,
task->color_tiles[buf]);
}
@@ -258,7 +261,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
depth = lp_rast_get_depth_block_pointer(task, x, y);
/* run shader on 4x4 block */
- BEGIN_JIT_CALL(state);
+ BEGIN_JIT_CALL(state, task);
variant->jit_function[RAST_WHOLE]( &state->jit_context,
x, y,
inputs->frontfacing,
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
index a4fdf7cff3..5d0f5f8b7b 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -74,6 +74,7 @@ lp_scene_create( struct pipe_context *pipe )
void
lp_scene_destroy(struct lp_scene *scene)
{
+ lp_fence_reference(&scene->fence, NULL);
pipe_mutex_destroy(scene->mutex);
assert(scene->data.head->next == NULL);
FREE(scene->data.head);
@@ -136,30 +137,30 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
int i;
//LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
-
+
for (i = 0; i < scene->fb.nr_cbufs; i++) {
struct pipe_surface *cbuf = scene->fb.cbufs[i];
+ assert(cbuf->u.tex.first_layer == cbuf->u.tex.last_layer);
scene->cbufs[i].stride = llvmpipe_resource_stride(cbuf->texture,
- cbuf->level);
+ cbuf->u.tex.level);
scene->cbufs[i].map = llvmpipe_resource_map(cbuf->texture,
- cbuf->face,
- cbuf->level,
- cbuf->zslice,
+ cbuf->u.tex.level,
+ cbuf->u.tex.first_layer,
LP_TEX_USAGE_READ_WRITE,
LP_TEX_LAYOUT_LINEAR);
}
if (fb->zsbuf) {
struct pipe_surface *zsbuf = scene->fb.zsbuf;
- scene->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->level);
+ assert(zsbuf->u.tex.first_layer == zsbuf->u.tex.last_layer);
+ scene->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->u.tex.level);
scene->zsbuf.blocksize =
util_format_get_blocksize(zsbuf->texture->format);
scene->zsbuf.map = llvmpipe_resource_map(zsbuf->texture,
- zsbuf->face,
- zsbuf->level,
- zsbuf->zslice,
+ zsbuf->u.tex.level,
+ zsbuf->u.tex.first_layer,
LP_TEX_USAGE_READ_WRITE,
LP_TEX_LAYOUT_NONE);
}
@@ -181,9 +182,8 @@ lp_scene_end_rasterization(struct lp_scene *scene )
if (scene->cbufs[i].map) {
struct pipe_surface *cbuf = scene->fb.cbufs[i];
llvmpipe_resource_unmap(cbuf->texture,
- cbuf->face,
- cbuf->level,
- cbuf->zslice);
+ cbuf->u.tex.level,
+ cbuf->u.tex.first_layer);
scene->cbufs[i].map = NULL;
}
}
@@ -192,9 +192,8 @@ lp_scene_end_rasterization(struct lp_scene *scene )
if (scene->zsbuf.map) {
struct pipe_surface *zsbuf = scene->fb.zsbuf;
llvmpipe_resource_unmap(zsbuf->texture,
- zsbuf->face,
- zsbuf->level,
- zsbuf->zslice);
+ zsbuf->u.tex.level,
+ zsbuf->u.tex.first_layer);
scene->zsbuf.map = NULL;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_scene_queue.h b/src/gallium/drivers/llvmpipe/lp_scene_queue.h
index fd7c65a2c8..dd9ab593b4 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene_queue.h
+++ b/src/gallium/drivers/llvmpipe/lp_scene_queue.h
@@ -29,6 +29,8 @@
#ifndef LP_SCENE_QUEUE
#define LP_SCENE_QUEUE
+#include "pipe/p_compiler.h"
+
struct lp_scene_queue;
struct lp_scene;
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 43904343c3..b6919a5c6d 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -289,12 +289,13 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
static void
llvmpipe_flush_frontbuffer(struct pipe_screen *_screen,
- struct pipe_surface *surface,
+ struct pipe_resource *resource,
+ unsigned level, unsigned layer,
void *context_private)
{
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
struct sw_winsys *winsys = screen->winsys;
- struct llvmpipe_resource *texture = llvmpipe_resource(surface->texture);
+ struct llvmpipe_resource *texture = llvmpipe_resource(resource);
assert(texture->dt);
if (texture->dt)
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h
index 731526dfab..7f69a11a6e 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.h
+++ b/src/gallium/drivers/llvmpipe/lp_screen.h
@@ -34,12 +34,10 @@
#ifndef LP_SCREEN_H
#define LP_SCREEN_H
-#include "gallivm/lp_bld.h"
-#include <llvm-c/ExecutionEngine.h>
-
-#include "os/os_thread.h"
#include "pipe/p_screen.h"
#include "pipe/p_defines.h"
+#include "os/os_thread.h"
+#include "gallivm/lp_bld.h"
struct sw_winsys;
@@ -51,14 +49,6 @@ struct llvmpipe_screen
struct sw_winsys *winsys;
- LLVMModuleRef module;
- LLVMExecutionEngineRef engine;
- LLVMModuleProviderRef provider;
- LLVMTargetDataRef target;
- LLVMPassManagerRef pass;
-
- LLVMTypeRef context_ptr_type;
-
unsigned num_threads;
/* Increments whenever textures are modified. Contexts can track this.
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 6118434d3d..5d83a1e357 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -114,7 +114,7 @@ first_point( struct lp_setup_context *setup,
setup->point( setup, v0 );
}
-static void lp_setup_reset( struct lp_setup_context *setup )
+void lp_setup_reset( struct lp_setup_context *setup )
{
LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
@@ -263,7 +263,6 @@ execute_clears( struct lp_setup_context *setup )
const char *states[] = {
"FLUSHED",
- "EMPTY ",
"CLEARED",
"ACTIVE "
};
@@ -913,6 +912,12 @@ lp_setup_update_state( struct lp_setup_context *setup,
llvmpipe_update_derived(lp);
}
+ if (lp->setup->dirty) {
+ llvmpipe_update_setup(lp);
+ }
+
+ assert(setup->setup.variant);
+
/* Will probably need to move this somewhere else, just need
* to know about vertex shader point size attribute.
*/
@@ -928,7 +933,7 @@ lp_setup_update_state( struct lp_setup_context *setup,
setup->setup.variant->key.size) == 0);
}
- if (update_scene) {
+ if (update_scene && setup->state != SETUP_ACTIVE) {
if (!set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ ))
return FALSE;
}
@@ -991,6 +996,8 @@ lp_setup_destroy( struct lp_setup_context *setup )
lp_scene_destroy(scene);
}
+ lp_fence_reference(&setup->last_fence, NULL);
+
FREE( setup );
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h
index ebb18f8134..0d6e161a21 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup.h
@@ -45,6 +45,9 @@ struct lp_jit_context;
struct llvmpipe_query;
struct pipe_fence_handle;
struct lp_setup_variant;
+struct lp_setup_context;
+
+void lp_setup_reset( struct lp_setup_context *setup );
struct lp_setup_context *
lp_setup_create( struct pipe_context *pipe,
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
index 9c1f0fe793..384242f81d 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
@@ -141,6 +141,8 @@ lp_setup_draw_elements(struct vbuf_render *vbr, const ushort *indices, uint nr)
const boolean flatshade_first = setup->flatshade_first;
unsigned i;
+ assert(setup->setup.variant);
+
if (!lp_setup_update_state(setup, TRUE))
return;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index cd67d48861..1b9119eda0 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -90,7 +90,6 @@
#include "lp_context.h"
#include "lp_debug.h"
#include "lp_perf.h"
-#include "lp_screen.h"
#include "lp_setup.h"
#include "lp_state.h"
#include "lp_tex_sample.h"
@@ -102,10 +101,10 @@
#include <llvm-c/BitWriter.h>
+/** Fragment shader number (for debugging) */
static unsigned fs_no = 0;
-
/**
* Expand the relevent bits of mask_input to a 4-dword mask for the
* four pixels in a 2x2 quad. This will set the four elements of the
@@ -115,13 +114,14 @@ static unsigned fs_no = 0;
* \param mask_input bitwise mask for the whole 4x4 stamp
*/
static LLVMValueRef
-generate_quad_mask(LLVMBuilderRef builder,
+generate_quad_mask(struct gallivm_state *gallivm,
struct lp_type fs_type,
unsigned quad,
LLVMValueRef mask_input) /* int32 */
{
+ LLVMBuilderRef builder = gallivm->builder;
struct lp_type mask_type;
- LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
LLVMValueRef bits[4];
LLVMValueRef mask;
int shift;
@@ -136,7 +136,6 @@ generate_quad_mask(LLVMBuilderRef builder,
/*
* mask_input >>= (quad * 4)
*/
-
switch (quad) {
case 0:
shift = 0;
@@ -163,8 +162,9 @@ generate_quad_mask(LLVMBuilderRef builder,
/*
* mask = { mask_input & (1 << i), for i in [0,3] }
*/
-
- mask = lp_build_broadcast(builder, lp_build_vec_type(mask_type), mask_input);
+ mask = lp_build_broadcast(gallivm,
+ lp_build_vec_type(gallivm, mask_type),
+ mask_input);
bits[0] = LLVMConstInt(i32t, 1 << 0, 0);
bits[1] = LLVMConstInt(i32t, 1 << 1, 0);
@@ -176,11 +176,10 @@ generate_quad_mask(LLVMBuilderRef builder,
/*
* mask = mask != 0 ? ~0 : 0
*/
-
- mask = lp_build_compare(builder,
+ mask = lp_build_compare(gallivm,
mask_type, PIPE_FUNC_NOTEQUAL,
mask,
- lp_build_const_int_vec(mask_type, 0));
+ lp_build_const_int_vec(gallivm, mask_type, 0));
return mask;
}
@@ -213,7 +212,8 @@ find_output_by_semantic( const struct tgsi_shader_info *info,
* \param partial_mask if 1, do mask_input testing
*/
static void
-generate_fs(struct lp_fragment_shader *shader,
+generate_fs(struct gallivm_state *gallivm,
+ struct lp_fragment_shader *shader,
const struct lp_fragment_shader_variant_key *key,
LLVMBuilderRef builder,
struct lp_type type,
@@ -278,42 +278,42 @@ generate_fs(struct lp_fragment_shader *shader,
assert(i < 4);
- stencil_refs[0] = lp_jit_context_stencil_ref_front_value(builder, context_ptr);
- stencil_refs[1] = lp_jit_context_stencil_ref_back_value(builder, context_ptr);
+ stencil_refs[0] = lp_jit_context_stencil_ref_front_value(gallivm, context_ptr);
+ stencil_refs[1] = lp_jit_context_stencil_ref_back_value(gallivm, context_ptr);
- vec_type = lp_build_vec_type(type);
+ vec_type = lp_build_vec_type(gallivm, type);
- consts_ptr = lp_jit_context_constants(builder, context_ptr);
+ consts_ptr = lp_jit_context_constants(gallivm, context_ptr);
memset(outputs, 0, sizeof outputs);
/* Declare the color and z variables */
for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
for(chan = 0; chan < NUM_CHANNELS; ++chan) {
- color[cbuf][chan] = lp_build_alloca(builder, vec_type, "color");
+ color[cbuf][chan] = lp_build_alloca(gallivm, vec_type, "color");
}
}
/* do triangle edge testing */
if (partial_mask) {
- *pmask = generate_quad_mask(builder, type,
+ *pmask = generate_quad_mask(gallivm, type,
i, mask_input);
}
else {
- *pmask = lp_build_const_int_vec(type, ~0);
+ *pmask = lp_build_const_int_vec(gallivm, type, ~0);
}
/* 'mask' will control execution based on quad's pixel alive/killed state */
- lp_build_mask_begin(&mask, builder, type, *pmask);
+ lp_build_mask_begin(&mask, gallivm, type, *pmask);
if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader)
lp_build_mask_check(&mask);
- lp_build_interp_soa_update_pos(interp, i);
+ lp_build_interp_soa_update_pos(interp, gallivm, i);
z = interp->pos[2];
if (depth_mode & EARLY_DEPTH_TEST) {
- lp_build_depth_stencil_test(builder,
+ lp_build_depth_stencil_test(gallivm,
&key->depth,
key->stencil,
type,
@@ -330,15 +330,14 @@ generate_fs(struct lp_fragment_shader *shader,
}
}
- lp_build_interp_soa_update_inputs(interp, i);
+ lp_build_interp_soa_update_inputs(interp, gallivm, i);
/* Build the actual shader */
- lp_build_tgsi_soa(builder, tokens, type, &mask,
+ lp_build_tgsi_soa(gallivm, tokens, type, &mask,
consts_ptr, NULL, /* sys values array */
interp->pos, interp->inputs,
outputs, sampler, &shader->info.base);
-
/* Alpha test */
if (key->alpha.enabled) {
int color0 = find_output_by_semantic(&shader->info.base,
@@ -349,10 +348,10 @@ generate_fs(struct lp_fragment_shader *shader,
LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha");
LLVMValueRef alpha_ref_value;
- alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr);
- alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value);
+ alpha_ref_value = lp_jit_context_alpha_ref_value(gallivm, context_ptr);
+ alpha_ref_value = lp_build_broadcast(gallivm, vec_type, alpha_ref_value);
- lp_build_alpha_test(builder, key->alpha.func, type,
+ lp_build_alpha_test(gallivm, key->alpha.func, type,
&mask, alpha, alpha_ref_value,
(depth_mode & LATE_DEPTH_TEST) != 0);
}
@@ -368,7 +367,7 @@ generate_fs(struct lp_fragment_shader *shader,
z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
}
- lp_build_depth_stencil_test(builder,
+ lp_build_depth_stencil_test(gallivm,
&key->depth,
key->stencil,
type,
@@ -391,7 +390,7 @@ generate_fs(struct lp_fragment_shader *shader,
* depth value, update from zs_value with the new mask value and
* write that out.
*/
- lp_build_deferred_depth_write(builder,
+ lp_build_deferred_depth_write(gallivm,
type,
zs_format_desc,
&mask,
@@ -421,7 +420,7 @@ generate_fs(struct lp_fragment_shader *shader,
}
if (counter)
- lp_build_occlusion_count(builder, type,
+ lp_build_occlusion_count(gallivm, type,
lp_build_mask_value(&mask), counter);
*pmask = lp_build_mask_end(&mask);
@@ -438,7 +437,8 @@ generate_fs(struct lp_fragment_shader *shader,
* \param dst_ptr the destination color buffer pointer
*/
static void
-generate_blend(const struct pipe_blend_state *blend,
+generate_blend(struct gallivm_state *gallivm,
+ const struct pipe_blend_state *blend,
unsigned rt,
LLVMBuilderRef builder,
struct lp_type type,
@@ -457,21 +457,21 @@ generate_blend(const struct pipe_blend_state *blend,
LLVMValueRef res[4];
unsigned chan;
- lp_build_context_init(&bld, builder, type);
+ lp_build_context_init(&bld, gallivm, type);
- lp_build_mask_begin(&mask_ctx, builder, type, mask);
+ lp_build_mask_begin(&mask_ctx, gallivm, type, mask);
if (do_branch)
lp_build_mask_check(&mask_ctx);
- vec_type = lp_build_vec_type(type);
+ vec_type = lp_build_vec_type(gallivm, type);
- const_ptr = lp_jit_context_blend_color(builder, context_ptr);
+ const_ptr = lp_jit_context_blend_color(gallivm, context_ptr);
const_ptr = LLVMBuildBitCast(builder, const_ptr,
LLVMPointerType(vec_type, 0), "");
/* load constant blend color and colors from the dest color buffer */
for(chan = 0; chan < 4; ++chan) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
+ LLVMValueRef index = lp_build_const_int32(gallivm, chan);
con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
@@ -481,12 +481,12 @@ generate_blend(const struct pipe_blend_state *blend,
}
/* do blend */
- lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res);
+ lp_build_blend_soa(gallivm, blend, type, rt, src, dst, con, res);
/* store results to color buffer */
for(chan = 0; chan < 4; ++chan) {
if(blend->rt[rt].colormask & (1 << chan)) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
+ LLVMValueRef index = lp_build_const_int32(gallivm, chan);
lp_build_name(res[chan], "res.%c", "rgba"[chan]);
res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
@@ -504,11 +504,12 @@ generate_blend(const struct pipe_blend_state *blend,
* 2x2 pixels.
*/
static void
-generate_fragment(struct llvmpipe_screen *screen,
+generate_fragment(struct llvmpipe_context *lp,
struct lp_fragment_shader *shader,
struct lp_fragment_shader_variant *variant,
unsigned partial_mask)
{
+ struct gallivm_state *gallivm = lp->gallivm;
const struct lp_fragment_shader_variant_key *key = &variant->key;
struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
char func_name[256];
@@ -519,6 +520,8 @@ generate_fragment(struct llvmpipe_screen *screen,
LLVMTypeRef blend_vec_type;
LLVMTypeRef arg_types[11];
LLVMTypeRef func_type;
+ LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
+ LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
LLVMValueRef context_ptr;
LLVMValueRef x;
LLVMValueRef y;
@@ -580,29 +583,30 @@ generate_fragment(struct llvmpipe_screen *screen,
* lp_jit.h's lp_jit_frag_func function pointer type, and vice-versa.
*/
- fs_elem_type = lp_build_elem_type(fs_type);
- fs_int_vec_type = lp_build_int_vec_type(fs_type);
+ fs_elem_type = lp_build_elem_type(gallivm, fs_type);
+ fs_int_vec_type = lp_build_int_vec_type(gallivm, fs_type);
- blend_vec_type = lp_build_vec_type(blend_type);
+ blend_vec_type = lp_build_vec_type(gallivm, blend_type);
util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
shader->no, variant->no, partial_mask ? "partial" : "whole");
- arg_types[0] = screen->context_ptr_type; /* context */
- arg_types[1] = LLVMInt32Type(); /* x */
- arg_types[2] = LLVMInt32Type(); /* y */
- arg_types[3] = LLVMInt32Type(); /* facing */
+ arg_types[0] = lp_jit_get_context_type(lp); /* context */
+ arg_types[1] = int32_type; /* x */
+ arg_types[2] = int32_type; /* y */
+ arg_types[3] = int32_type; /* facing */
arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* a0 */
arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dadx */
arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */
arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */
- arg_types[8] = LLVMPointerType(LLVMInt8Type(), 0); /* depth */
- arg_types[9] = LLVMInt32Type(); /* mask_input */
- arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */
+ arg_types[8] = LLVMPointerType(int8_type, 0); /* depth */
+ arg_types[9] = int32_type; /* mask_input */
+ arg_types[10] = LLVMPointerType(int32_type, 0); /* counter */
- func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
+ func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
+ arg_types, Elements(arg_types), 0);
- function = LLVMAddFunction(screen->module, func_name, func_type);
+ function = LLVMAddFunction(gallivm->module, func_name, func_type);
LLVMSetFunctionCallConv(function, LLVMCCallConv);
variant->function[partial_mask] = function;
@@ -644,8 +648,9 @@ generate_fragment(struct llvmpipe_screen *screen,
* Function body
*/
- block = LLVMAppendBasicBlock(function, "entry");
- builder = LLVMCreateBuilder();
+ block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
+ builder = gallivm->builder;
+ assert(builder);
LLVMPositionBuilderAtEnd(builder, block);
/*
@@ -654,6 +659,7 @@ generate_fragment(struct llvmpipe_screen *screen,
* already included in the shader key.
*/
lp_build_interp_soa_init(&interp,
+ gallivm,
shader->info.base.num_inputs,
inputs,
builder, fs_type,
@@ -667,7 +673,7 @@ generate_fragment(struct llvmpipe_screen *screen,
zs_format_desc = util_format_description(key->zsbuf_format);
for(i = 0; i < num_fs; ++i) {
- LLVMValueRef depth_offset = LLVMConstInt(LLVMInt32Type(),
+ LLVMValueRef depth_offset = LLVMConstInt(int32_type,
i*fs_type.length*zs_format_desc->block.bits/8,
0);
LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS];
@@ -675,7 +681,8 @@ generate_fragment(struct llvmpipe_screen *screen,
depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
- generate_fs(shader, key,
+ generate_fs(gallivm,
+ shader, key,
builder,
fs_type,
context_ptr,
@@ -701,7 +708,7 @@ generate_fragment(struct llvmpipe_screen *screen,
*/
for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
LLVMValueRef color_ptr;
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), cbuf, 0);
+ LLVMValueRef index = lp_build_const_int32(gallivm, cbuf);
LLVMValueRef blend_in_color[NUM_CHANNELS];
unsigned rt;
@@ -716,7 +723,7 @@ generate_fragment(struct llvmpipe_screen *screen,
LLVMBuildLoad(builder, fs_out_color[cbuf][chan][i], "fs_color_vals");
}
- lp_build_conv(builder, fs_type, blend_type,
+ lp_build_conv(gallivm, fs_type, blend_type,
fs_color_vals,
num_fs,
&blend_in_color[chan], 1);
@@ -725,11 +732,11 @@ generate_fragment(struct llvmpipe_screen *screen,
}
if (partial_mask || !variant->opaque) {
- lp_build_conv_mask(builder, fs_type, blend_type,
+ lp_build_conv_mask(lp->gallivm, fs_type, blend_type,
fs_mask, num_fs,
&blend_mask, 1);
} else {
- blend_mask = lp_build_const_int_vec(blend_type, ~0);
+ blend_mask = lp_build_const_int_vec(lp->gallivm, blend_type, ~0);
}
color_ptr = LLVMBuildLoad(builder,
@@ -750,7 +757,8 @@ generate_fragment(struct llvmpipe_screen *screen,
!key->alpha.enabled &&
!shader->info.base.uses_kill);
- generate_blend(&key->blend,
+ generate_blend(lp->gallivm,
+ &key->blend,
rt,
builder,
blend_type,
@@ -764,9 +772,6 @@ generate_fragment(struct llvmpipe_screen *screen,
LLVMBuildRetVoid(builder);
- LLVMDisposeBuilder(builder);
-
-
/* Verify the LLVM IR. If invalid, dump and abort */
#ifdef DEBUG
if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) {
@@ -777,7 +782,7 @@ generate_fragment(struct llvmpipe_screen *screen,
#endif
/* Apply optimizations to LLVM IR */
- LLVMRunFunctionPassManager(screen->pass, function);
+ LLVMRunFunctionPassManager(gallivm->passmgr, function);
if ((gallivm_debug & GALLIVM_DEBUG_IR) || (LP_DEBUG & DEBUG_FS)) {
/* Print the LLVM IR to stderr */
@@ -787,14 +792,14 @@ generate_fragment(struct llvmpipe_screen *screen,
/* Dump byte code to a file */
if (0) {
- LLVMWriteBitcodeToFile(lp_build_module, "llvmpipe.bc");
+ LLVMWriteBitcodeToFile(gallivm->module, "llvmpipe.bc");
}
/*
* Translate the LLVM IR into machine code.
*/
{
- void *f = LLVMGetPointerToGlobal(screen->engine, function);
+ void *f = LLVMGetPointerToGlobal(gallivm->engine, function);
variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f);
@@ -898,8 +903,13 @@ lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant)
debug_printf("\n");
}
+
+/**
+ * Generate a new fragment shader variant from the shader code and
+ * other state indicated by the key.
+ */
static struct lp_fragment_shader_variant *
-generate_variant(struct llvmpipe_screen *screen,
+generate_variant(struct llvmpipe_context *lp,
struct lp_fragment_shader *shader,
const struct lp_fragment_shader_variant_key *key)
{
@@ -945,11 +955,11 @@ generate_variant(struct llvmpipe_screen *screen,
lp_debug_fs_variant(variant);
}
- generate_fragment(screen, shader, variant, RAST_EDGE_TEST);
+ generate_fragment(lp, shader, variant, RAST_EDGE_TEST);
if (variant->opaque) {
/* Specialized shader, which doesn't need to read the color buffer. */
- generate_fragment(screen, shader, variant, RAST_WHOLE);
+ generate_fragment(lp, shader, variant, RAST_WHOLE);
} else {
variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST];
}
@@ -1034,7 +1044,8 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
if (LP_DEBUG & DEBUG_TGSI) {
unsigned attrib;
- debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader);
+ debug_printf("llvmpipe: Create fragment shader #%u %p:\n",
+ shader->no, (void *) shader);
tgsi_dump(templ->tokens, 0);
debug_printf("usage masks:\n");
for (attrib = 0; attrib < shader->info.base.num_inputs; ++attrib) {
@@ -1071,33 +1082,49 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
llvmpipe->dirty |= LP_NEW_FS;
}
-static void
-remove_shader_variant(struct llvmpipe_context *lp,
- struct lp_fragment_shader_variant *variant)
+
+/**
+ * Remove shader variant from two lists: the shader's variant list
+ * and the context's variant list.
+ */
+void
+llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
+ struct lp_fragment_shader_variant *variant)
{
- struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
unsigned i;
if (gallivm_debug & GALLIVM_DEBUG_IR) {
- debug_printf("llvmpipe: del fs #%u var #%u v created #%u v cached #%u v total cached #%u\n",
- variant->shader->no, variant->no, variant->shader->variants_created,
- variant->shader->variants_cached, lp->nr_fs_variants);
+ debug_printf("llvmpipe: del fs #%u var #%u v created #%u v cached"
+ " #%u v total cached #%u\n",
+ variant->shader->no,
+ variant->no,
+ variant->shader->variants_created,
+ variant->shader->variants_cached,
+ lp->nr_fs_variants);
}
+
+ /* free all the variant's JIT'd functions */
for (i = 0; i < Elements(variant->function); i++) {
if (variant->function[i]) {
if (variant->jit_function[i])
- LLVMFreeMachineCodeForFunction(screen->engine,
+ LLVMFreeMachineCodeForFunction(lp->gallivm->engine,
variant->function[i]);
LLVMDeleteFunction(variant->function[i]);
}
}
+
+ /* remove from shader's list */
remove_from_list(&variant->list_item_local);
variant->shader->variants_cached--;
+
+ /* remove from context's list */
remove_from_list(&variant->list_item_global);
lp->nr_fs_variants--;
+
FREE(variant);
}
+
static void
llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
{
@@ -1106,23 +1133,23 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
struct lp_fs_variant_list_item *li;
assert(fs != llvmpipe->fs);
- (void) llvmpipe;
/*
* XXX: we need to flush the context until we have some sort of reference
* counting in fragment shaders as they may still be binned
* Flushing alone might not sufficient we need to wait on it too.
*/
-
llvmpipe_finish(pipe, __FUNCTION__);
+ /* Delete all the variants */
li = first_elem(&shader->variants);
while(!at_end(&shader->variants, li)) {
struct lp_fs_variant_list_item *next = next_elem(li);
- remove_shader_variant(llvmpipe, li->base);
+ llvmpipe_remove_shader_variant(llvmpipe, li->base);
li = next;
}
+ /* Delete draw module's data */
draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data);
assert(shader->variants_cached == 0);
@@ -1278,14 +1305,15 @@ make_variant_key(struct llvmpipe_context *lp,
}
}
+
+
/**
- * Update fragment state. This is called just prior to drawing
+ * Update fragment shader state. This is called just prior to drawing
* something when some fragment-related state has changed.
*/
void
llvmpipe_update_fs(struct llvmpipe_context *lp)
{
- struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
struct lp_fragment_shader *shader = lp->fs;
struct lp_fragment_shader_variant_key key;
struct lp_fragment_shader_variant *variant = NULL;
@@ -1293,6 +1321,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
make_variant_key(lp, shader, &key);
+ /* Search the variants for one which matches the key */
li = first_elem(&shader->variants);
while(!at_end(&shader->variants, li)) {
if(memcmp(&li->base->key, &key, shader->variant_key_size) == 0) {
@@ -1303,36 +1332,49 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
}
if (variant) {
+ /* Move this variant to the head of the list to implement LRU
+ * deletion of shader's when we have too many.
+ */
move_to_head(&lp->fs_variants_list, &variant->list_item_global);
}
else {
- int64_t t0, t1;
- int64_t dt;
+ /* variant not found, create it now */
+ int64_t t0, t1, dt;
unsigned i;
+
+ /* First, check if we've exceeded the max number of shader variants.
+ * If so, free 25% of them (the least recently used ones).
+ */
if (lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS) {
struct pipe_context *pipe = &lp->pipe;
/*
- * XXX: we need to flush the context until we have some sort of reference
- * counting in fragment shaders as they may still be binned
+ * XXX: we need to flush the context until we have some sort of
+ * reference counting in fragment shaders as they may still be binned
* Flushing alone might not be sufficient we need to wait on it too.
*/
llvmpipe_finish(pipe, __FUNCTION__);
for (i = 0; i < LP_MAX_SHADER_VARIANTS / 4; i++) {
- struct lp_fs_variant_list_item *item = last_elem(&lp->fs_variants_list);
- remove_shader_variant(lp, item->base);
+ struct lp_fs_variant_list_item *item;
+ item = last_elem(&lp->fs_variants_list);
+ llvmpipe_remove_shader_variant(lp, item->base);
}
}
- t0 = os_time_get();
-
- variant = generate_variant(screen, shader, &key);
+ /*
+ * Generate the new variant.
+ */
+ t0 = os_time_get();
+ variant = generate_variant(lp, shader, &key);
t1 = os_time_get();
dt = t1 - t0;
LP_COUNT_ADD(llvm_compile_time, dt);
LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */
+ llvmpipe_variant_count++;
+
+ /* Put the new variant into the list */
if (variant) {
insert_at_head(&shader->variants, &variant->list_item_local);
insert_at_head(&lp->fs_variants_list, &variant->list_item_global);
@@ -1341,6 +1383,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
}
}
+ /* Bind this variant */
lp_setup_set_fs_variant(lp->setup, variant);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h
index 7d58c4936c..98410c6935 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.h
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h
@@ -69,12 +69,15 @@ struct lp_fragment_shader_variant_key
struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS];
};
+
+/** doubly-linked list item */
struct lp_fs_variant_list_item
{
struct lp_fragment_shader_variant *base;
struct lp_fs_variant_list_item *next, *prev;
};
+
struct lp_fragment_shader_variant
{
struct lp_fragment_shader_variant_key key;
@@ -118,5 +121,9 @@ struct lp_fragment_shader
void
lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant);
+void
+llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
+ struct lp_fragment_shader_variant *variant);
+
#endif /* LP_STATE_FS_H_ */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c
index 47f3d95320..ad751b9ef4 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -31,6 +31,7 @@
#include "util/u_simple_list.h"
#include "os/os_time.h"
#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_const.h"
#include "gallivm/lp_bld_debug.h"
#include "gallivm/lp_bld_init.h"
#include "gallivm/lp_bld_intr.h"
@@ -83,24 +84,29 @@ struct lp_setup_args
LLVMValueRef v2a;
};
-static LLVMTypeRef type4f(void)
+
+
+static LLVMTypeRef
+type4f(struct gallivm_state *gallivm)
{
- return LLVMVectorType(LLVMFloatType(), 4);
+ return LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4);
}
/* Equivalent of _mm_setr_ps(a,b,c,d)
*/
-static LLVMValueRef vec4f(LLVMBuilderRef bld,
- LLVMValueRef a, LLVMValueRef b, LLVMValueRef c, LLVMValueRef d,
- const char *name)
+static LLVMValueRef
+vec4f(struct gallivm_state *gallivm,
+ LLVMValueRef a, LLVMValueRef b, LLVMValueRef c, LLVMValueRef d,
+ const char *name)
{
- LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
- LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
- LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
- LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
+ LLVMBuilderRef bld = gallivm->builder;
+ LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
+ LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
+ LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
+ LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
- LLVMValueRef res = LLVMGetUndef(type4f());
+ LLVMValueRef res = LLVMGetUndef(type4f(gallivm));
res = LLVMBuildInsertElement(bld, res, a, i0, "");
res = LLVMBuildInsertElement(bld, res, b, i1, "");
@@ -112,15 +118,17 @@ static LLVMValueRef vec4f(LLVMBuilderRef bld,
/* Equivalent of _mm_set1_ps(a)
*/
-static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld,
- LLVMValueRef a,
- const char *name)
+static LLVMValueRef
+vec4f_from_scalar(struct gallivm_state *gallivm,
+ LLVMValueRef a,
+ const char *name)
{
- LLVMValueRef res = LLVMGetUndef(type4f());
+ LLVMBuilderRef bld = gallivm->builder;
+ LLVMValueRef res = LLVMGetUndef(type4f(gallivm));
int i;
for(i = 0; i < 4; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef index = lp_build_const_int32(gallivm, i);
res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : "");
}
@@ -128,14 +136,15 @@ static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld,
}
static void
-store_coef(LLVMBuilderRef builder,
+store_coef(struct gallivm_state *gallivm,
struct lp_setup_args *args,
unsigned slot,
LLVMValueRef a0,
LLVMValueRef dadx,
LLVMValueRef dady)
{
- LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), slot, 0);
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef idx = lp_build_const_int32(gallivm, slot);
LLVMBuildStore(builder,
a0,
@@ -153,14 +162,14 @@ store_coef(LLVMBuilderRef builder,
static void
-emit_constant_coef4( LLVMBuilderRef builder,
+emit_constant_coef4(struct gallivm_state *gallivm,
struct lp_setup_args *args,
unsigned slot,
LLVMValueRef vert)
{
- LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
- LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero");
- store_coef(builder, args, slot, vert, zerovec, zerovec);
+ LLVMValueRef zero = lp_build_const_float(gallivm, 0.0);
+ LLVMValueRef zerovec = vec4f_from_scalar(gallivm, zero, "zero");
+ store_coef(gallivm, args, slot, vert, zerovec, zerovec);
}
@@ -170,30 +179,33 @@ emit_constant_coef4( LLVMBuilderRef builder,
* \param frontface is the triangle front facing?
*/
static void
-emit_facing_coef( LLVMBuilderRef builder,
+emit_facing_coef(struct gallivm_state *gallivm,
struct lp_setup_args *args,
unsigned slot )
{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
LLVMValueRef a0_0 = args->facing;
- LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, LLVMFloatType(), "");
- LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
- LLVMValueRef a0 = vec4f(builder, a0_0f, zero, zero, zero, "facing");
- LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero");
+ LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, "");
+ LLVMValueRef zero = lp_build_const_float(gallivm, 0.0);
+ LLVMValueRef a0 = vec4f(gallivm, a0_0f, zero, zero, zero, "facing");
+ LLVMValueRef zerovec = vec4f_from_scalar(gallivm, zero, "zero");
- store_coef(builder, args, slot, a0, zerovec, zerovec);
+ store_coef(gallivm, args, slot, a0, zerovec, zerovec);
}
static LLVMValueRef
-vert_attrib(LLVMBuilderRef b,
+vert_attrib(struct gallivm_state *gallivm,
LLVMValueRef vert,
int attr,
int elem,
const char *name)
{
+ LLVMBuilderRef b = gallivm->builder;
LLVMValueRef idx[2];
- idx[0] = LLVMConstInt(LLVMInt32Type(), attr, 0);
- idx[1] = LLVMConstInt(LLVMInt32Type(), elem, 0);
+ idx[0] = lp_build_const_int32(gallivm, attr);
+ idx[1] = lp_build_const_int32(gallivm, elem);
return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name);
}
@@ -214,16 +226,17 @@ vert_clamp(LLVMBuilderRef b,
}
static void
-lp_twoside(LLVMBuilderRef b,
+lp_twoside(struct gallivm_state *gallivm,
struct lp_setup_args *args,
const struct lp_setup_variant_key *key,
int bcolor_slot)
{
+ LLVMBuilderRef b = gallivm->builder;
LLVMValueRef a0_back, a1_back, a2_back;
- LLVMValueRef idx2 = LLVMConstInt(LLVMInt32Type(), bcolor_slot, 0);
+ LLVMValueRef idx2 = lp_build_const_int32(gallivm, bcolor_slot);
LLVMValueRef facing = args->facing;
- LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing, LLVMConstInt(LLVMInt32Type(), 0, 0), ""); /** need i1 for if condition */
+ LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing, lp_build_const_int32(gallivm, 0), ""); /** need i1 for if condition */
a0_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx2, 1, ""), "v0a_back");
a1_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx2, 1, ""), "v1a_back");
@@ -241,29 +254,31 @@ lp_twoside(LLVMBuilderRef b,
}
static void
-lp_do_offset_tri(LLVMBuilderRef b,
+lp_do_offset_tri(struct gallivm_state *gallivm,
struct lp_setup_args *args,
const struct lp_setup_variant_key *key)
{
+ LLVMBuilderRef b = gallivm->builder;
struct lp_build_context bld;
LLVMValueRef zoffset, mult;
LLVMValueRef z0_new, z1_new, z2_new;
LLVMValueRef dzdx0, dzdx, dzdy0, dzdy;
LLVMValueRef max, max_value;
- LLVMValueRef one = LLVMConstReal(LLVMFloatType(), 1.0);
- LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
+ LLVMValueRef one = lp_build_const_float(gallivm, 1.0);
+ LLVMValueRef zero = lp_build_const_float(gallivm, 0.0);
+ LLVMValueRef two = lp_build_const_int32(gallivm, 2);
/* edge vectors: e = v0 - v2, f = v1 - v2 */
- LLVMValueRef v0_x = vert_attrib(b, args->v0, 0, 0, "v0_x");
- LLVMValueRef v1_x = vert_attrib(b, args->v1, 0, 0, "v1_x");
- LLVMValueRef v2_x = vert_attrib(b, args->v2, 0, 0, "v2_x");
- LLVMValueRef v0_y = vert_attrib(b, args->v0, 0, 1, "v0_y");
- LLVMValueRef v1_y = vert_attrib(b, args->v1, 0, 1, "v1_y");
- LLVMValueRef v2_y = vert_attrib(b, args->v2, 0, 1, "v2_y");
- LLVMValueRef v0_z = vert_attrib(b, args->v0, 0, 2, "v0_z");
- LLVMValueRef v1_z = vert_attrib(b, args->v1, 0, 2, "v1_z");
- LLVMValueRef v2_z = vert_attrib(b, args->v2, 0, 2, "v2_z");
+ LLVMValueRef v0_x = vert_attrib(gallivm, args->v0, 0, 0, "v0_x");
+ LLVMValueRef v1_x = vert_attrib(gallivm, args->v1, 0, 0, "v1_x");
+ LLVMValueRef v2_x = vert_attrib(gallivm, args->v2, 0, 0, "v2_x");
+ LLVMValueRef v0_y = vert_attrib(gallivm, args->v0, 0, 1, "v0_y");
+ LLVMValueRef v1_y = vert_attrib(gallivm, args->v1, 0, 1, "v1_y");
+ LLVMValueRef v2_y = vert_attrib(gallivm, args->v2, 0, 1, "v2_y");
+ LLVMValueRef v0_z = vert_attrib(gallivm, args->v0, 0, 2, "v0_z");
+ LLVMValueRef v1_z = vert_attrib(gallivm, args->v1, 0, 2, "v1_z");
+ LLVMValueRef v2_z = vert_attrib(gallivm, args->v2, 0, 2, "v2_z");
/* edge vectors: e = v0 - v2, f = v1 - v2 */
LLVMValueRef dx02 = LLVMBuildFSub(b, v0_x, v2_x, "dx02");
@@ -288,7 +303,7 @@ lp_do_offset_tri(LLVMBuilderRef b,
LLVMValueRef res2 = LLVMBuildFSub(b, dz02_dx12, dx02_dz12, "res2");
/* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/
- lp_build_context_init(&bld, b, lp_type_float(32));
+ lp_build_context_init(&bld, gallivm, lp_type_float(32));
dzdx0 = LLVMBuildFMul(b, res1, inv_det, "dzdx");
dzdx = lp_build_abs(&bld, dzdx0);
dzdy0 = LLVMBuildFMul(b, res2, inv_det, "dzdy");
@@ -298,8 +313,8 @@ lp_do_offset_tri(LLVMBuilderRef b,
max = LLVMBuildFCmp(b, LLVMRealUGT, dzdx, dzdy, "");
max_value = LLVMBuildSelect(b, max, dzdx, dzdy, "max");
- mult = LLVMBuildFMul(b, max_value, LLVMConstReal(LLVMFloatType(), key->scale), "");
- zoffset = LLVMBuildFAdd(b, LLVMConstReal(LLVMFloatType(), key->units), mult, "zoffset");
+ mult = LLVMBuildFMul(b, max_value, lp_build_const_float(gallivm, key->scale), "");
+ zoffset = LLVMBuildFAdd(b, lp_build_const_float(gallivm, key->units), mult, "zoffset");
/* clamp and do offset */
z0_new = vert_clamp(b, LLVMBuildFAdd(b, v0_z, zoffset, ""), zero, one);
@@ -308,18 +323,19 @@ lp_do_offset_tri(LLVMBuilderRef b,
/* insert into args->a0.z, a1.z, a2.z:
*/
- args->v0a = LLVMBuildInsertElement(b, args->v0a, z0_new, LLVMConstInt(LLVMInt32Type(), 2, 0), "");
- args->v1a = LLVMBuildInsertElement(b, args->v1a, z1_new, LLVMConstInt(LLVMInt32Type(), 2, 0), "");
- args->v2a = LLVMBuildInsertElement(b, args->v2a, z2_new, LLVMConstInt(LLVMInt32Type(), 2, 0), "");
+ args->v0a = LLVMBuildInsertElement(b, args->v0a, z0_new, two, "");
+ args->v1a = LLVMBuildInsertElement(b, args->v1a, z1_new, two, "");
+ args->v2a = LLVMBuildInsertElement(b, args->v2a, z2_new, two, "");
}
static void
-load_attribute(LLVMBuilderRef b,
+load_attribute(struct gallivm_state *gallivm,
struct lp_setup_args *args,
const struct lp_setup_variant_key *key,
unsigned vert_attr)
{
- LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0);
+ LLVMBuilderRef b = gallivm->builder;
+ LLVMValueRef idx = lp_build_const_int32(gallivm, vert_attr);
/* Load the vertex data
*/
@@ -331,25 +347,26 @@ load_attribute(LLVMBuilderRef b,
/* Potentially modify it according to twoside, offset, etc:
*/
if (vert_attr == 0 && (key->scale != 0.0f || key->units != 0.0f)) {
- lp_do_offset_tri(b, args, key);
+ lp_do_offset_tri(gallivm, args, key);
}
if (key->twoside) {
if (vert_attr == key->color_slot && key->bcolor_slot != ~0)
- lp_twoside(b, args, key, key->bcolor_slot);
+ lp_twoside(gallivm, args, key, key->bcolor_slot);
else if (vert_attr == key->spec_slot && key->bspec_slot != ~0)
- lp_twoside(b, args, key, key->bspec_slot);
+ lp_twoside(gallivm, args, key, key->bspec_slot);
}
}
static void
-emit_coef4( LLVMBuilderRef b,
+emit_coef4( struct gallivm_state *gallivm,
struct lp_setup_args *args,
unsigned slot,
LLVMValueRef a0,
LLVMValueRef a1,
LLVMValueRef a2)
{
+ LLVMBuilderRef b = gallivm->builder;
LLVMValueRef dy20_ooa = args->dy20_ooa;
LLVMValueRef dy01_ooa = args->dy01_ooa;
LLVMValueRef dx20_ooa = args->dx20_ooa;
@@ -381,17 +398,17 @@ emit_coef4( LLVMBuilderRef b,
LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0");
LLVMValueRef attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0");
- store_coef(b, args, slot, attr_0, dadx, dady);
+ store_coef(gallivm, args, slot, attr_0, dadx, dady);
}
static void
-emit_linear_coef( LLVMBuilderRef b,
+emit_linear_coef( struct gallivm_state *gallivm,
struct lp_setup_args *args,
unsigned slot)
{
/* nothing to do anymore */
- emit_coef4(b,
+ emit_coef4(gallivm,
args, slot,
args->v0a,
args->v1a,
@@ -408,30 +425,32 @@ emit_linear_coef( LLVMBuilderRef b,
* divide the interpolated value by the interpolated W at that fragment.
*/
static void
-emit_perspective_coef( LLVMBuilderRef b,
+emit_perspective_coef( struct gallivm_state *gallivm,
struct lp_setup_args *args,
unsigned slot)
{
+ LLVMBuilderRef b = gallivm->builder;
+
/* premultiply by 1/w (v[0][3] is always 1/w):
*/
- LLVMValueRef v0_oow = vec4f_from_scalar(b, vert_attrib(b, args->v0, 0, 3, ""), "v0_oow");
- LLVMValueRef v1_oow = vec4f_from_scalar(b, vert_attrib(b, args->v1, 0, 3, ""), "v1_oow");
- LLVMValueRef v2_oow = vec4f_from_scalar(b, vert_attrib(b, args->v2, 0, 3, ""), "v2_oow");
+ LLVMValueRef v0_oow = vec4f_from_scalar(gallivm, vert_attrib(gallivm, args->v0, 0, 3, ""), "v0_oow");
+ LLVMValueRef v1_oow = vec4f_from_scalar(gallivm, vert_attrib(gallivm, args->v1, 0, 3, ""), "v1_oow");
+ LLVMValueRef v2_oow = vec4f_from_scalar(gallivm, vert_attrib(gallivm, args->v2, 0, 3, ""), "v2_oow");
LLVMValueRef v0_oow_v0a = LLVMBuildFMul(b, args->v0a, v0_oow, "v0_oow_v0a");
LLVMValueRef v1_oow_v1a = LLVMBuildFMul(b, args->v1a, v1_oow, "v1_oow_v1a");
LLVMValueRef v2_oow_v2a = LLVMBuildFMul(b, args->v2a, v2_oow, "v2_oow_v2a");
- emit_coef4(b, args, slot, v0_oow_v0a, v1_oow_v1a, v2_oow_v2a);
+ emit_coef4(gallivm, args, slot, v0_oow_v0a, v1_oow_v1a, v2_oow_v2a);
}
static void
-emit_position_coef( LLVMBuilderRef builder,
+emit_position_coef( struct gallivm_state *gallivm,
struct lp_setup_args *args,
int slot )
{
- emit_linear_coef(builder, args, slot);
+ emit_linear_coef(gallivm, args, slot);
}
@@ -441,7 +460,7 @@ emit_position_coef( LLVMBuilderRef builder,
* Compute the inputs-> dadx, dady, a0 values.
*/
static void
-emit_tri_coef( LLVMBuilderRef builder,
+emit_tri_coef( struct gallivm_state *gallivm,
const struct lp_setup_variant_key *key,
struct lp_setup_args *args )
{
@@ -449,8 +468,8 @@ emit_tri_coef( LLVMBuilderRef builder,
/* The internal position input is in slot zero:
*/
- load_attribute(builder, args, key, 0);
- emit_position_coef(builder, args, 0);
+ load_attribute(gallivm, args, key, 0);
+ emit_position_coef(gallivm, args, 0);
/* setup interpolation for all the remaining attributes:
*/
@@ -459,24 +478,24 @@ emit_tri_coef( LLVMBuilderRef builder,
if (key->inputs[slot].interp == LP_INTERP_CONSTANT ||
key->inputs[slot].interp == LP_INTERP_LINEAR ||
key->inputs[slot].interp == LP_INTERP_PERSPECTIVE)
- load_attribute(builder, args, key, key->inputs[slot].src_index);
+ load_attribute(gallivm, args, key, key->inputs[slot].src_index);
switch (key->inputs[slot].interp) {
case LP_INTERP_CONSTANT:
if (key->flatshade_first) {
- emit_constant_coef4(builder, args, slot+1, args->v0a);
+ emit_constant_coef4(gallivm, args, slot+1, args->v0a);
}
else {
- emit_constant_coef4(builder, args, slot+1, args->v2a);
+ emit_constant_coef4(gallivm, args, slot+1, args->v2a);
}
break;
case LP_INTERP_LINEAR:
- emit_linear_coef(builder, args, slot+1);
+ emit_linear_coef(gallivm, args, slot+1);
break;
case LP_INTERP_PERSPECTIVE:
- emit_perspective_coef(builder, args, slot+1);
+ emit_perspective_coef(gallivm, args, slot+1);
break;
case LP_INTERP_POSITION:
@@ -487,7 +506,7 @@ emit_tri_coef( LLVMBuilderRef builder,
break;
case LP_INTERP_FACING:
- emit_facing_coef(builder, args, slot+1);
+ emit_facing_coef(gallivm, args, slot+1);
break;
default:
@@ -500,7 +519,7 @@ emit_tri_coef( LLVMBuilderRef builder,
/* XXX: This is generic code, share with fs/vs codegen:
*/
static lp_jit_setup_triangle
-finalize_function(struct llvmpipe_screen *screen,
+finalize_function(struct gallivm_state *gallivm,
LLVMBuilderRef builder,
LLVMValueRef function)
{
@@ -516,7 +535,7 @@ finalize_function(struct llvmpipe_screen *screen,
#endif
/* Apply optimizations to LLVM IR */
- LLVMRunFunctionPassManager(screen->pass, function);
+ LLVMRunFunctionPassManager(gallivm->passmgr, function);
if (gallivm_debug & GALLIVM_DEBUG_IR)
{
@@ -528,7 +547,7 @@ finalize_function(struct llvmpipe_screen *screen,
/*
* Translate the LLVM IR into machine code.
*/
- f = LLVMGetPointerToGlobal(screen->engine, function);
+ f = LLVMGetPointerToGlobal(gallivm->engine, function);
if (gallivm_debug & GALLIVM_DEBUG_ASM)
{
@@ -543,11 +562,12 @@ finalize_function(struct llvmpipe_screen *screen,
/* XXX: Generic code:
*/
static void
-lp_emit_emms(LLVMBuilderRef builder)
+lp_emit_emms(struct gallivm_state *gallivm)
{
#ifdef PIPE_ARCH_X86
/* Avoid corrupting the FPU stack on 32bit OSes. */
- lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
+ lp_build_intrinsic(gallivm->builder, "llvm.x86.mmx.emms",
+ LLVMVoidTypeInContext(gallivm->context), NULL, 0);
#endif
}
@@ -568,21 +588,23 @@ set_noalias(LLVMBuilderRef builder,
}
static void
-init_args(LLVMBuilderRef b,
+init_args(struct gallivm_state *gallivm,
struct lp_setup_args *args,
const struct lp_setup_variant *variant)
{
- LLVMValueRef v0_x = vert_attrib(b, args->v0, 0, 0, "v0_x");
- LLVMValueRef v0_y = vert_attrib(b, args->v0, 0, 1, "v0_y");
+ LLVMBuilderRef b = gallivm->builder;
- LLVMValueRef v1_x = vert_attrib(b, args->v1, 0, 0, "v1_x");
- LLVMValueRef v1_y = vert_attrib(b, args->v1, 0, 1, "v1_y");
+ LLVMValueRef v0_x = vert_attrib(gallivm, args->v0, 0, 0, "v0_x");
+ LLVMValueRef v0_y = vert_attrib(gallivm, args->v0, 0, 1, "v0_y");
- LLVMValueRef v2_x = vert_attrib(b, args->v2, 0, 0, "v2_x");
- LLVMValueRef v2_y = vert_attrib(b, args->v2, 0, 1, "v2_y");
+ LLVMValueRef v1_x = vert_attrib(gallivm, args->v1, 0, 0, "v1_x");
+ LLVMValueRef v1_y = vert_attrib(gallivm, args->v1, 0, 1, "v1_y");
- LLVMValueRef pixel_center = LLVMConstReal(LLVMFloatType(),
- variant->key.pixel_center_half ? 0.5 : 0);
+ LLVMValueRef v2_x = vert_attrib(gallivm, args->v2, 0, 0, "v2_x");
+ LLVMValueRef v2_y = vert_attrib(gallivm, args->v2, 0, 1, "v2_y");
+
+ LLVMValueRef pixel_center = lp_build_const_float(gallivm,
+ variant->key.pixel_center_half ? 0.5 : 0);
LLVMValueRef x0_center = LLVMBuildFSub(b, v0_x, pixel_center, "x0_center" );
LLVMValueRef y0_center = LLVMBuildFSub(b, v0_y, pixel_center, "y0_center" );
@@ -592,7 +614,7 @@ init_args(LLVMBuilderRef b,
LLVMValueRef dx20 = LLVMBuildFSub(b, v2_x, v0_x, "dx20");
LLVMValueRef dy20 = LLVMBuildFSub(b, v2_y, v0_y, "dy20");
- LLVMValueRef one = LLVMConstReal(LLVMFloatType(), 1.0);
+ LLVMValueRef one = lp_build_const_float(gallivm, 1.0);
LLVMValueRef e = LLVMBuildFMul(b, dx01, dy20, "e");
LLVMValueRef f = LLVMBuildFMul(b, dx20, dy01, "f");
LLVMValueRef ooa = LLVMBuildFDiv(b, one, LLVMBuildFSub(b, e, f, ""), "ooa");
@@ -602,14 +624,14 @@ init_args(LLVMBuilderRef b,
LLVMValueRef dx20_ooa = LLVMBuildFMul(b, dx20, ooa, "dx20_ooa");
LLVMValueRef dx01_ooa = LLVMBuildFMul(b, dx01, ooa, "dx01_ooa");
- args->dy20_ooa = vec4f_from_scalar(b, dy20_ooa, "dy20_ooa_4f");
- args->dy01_ooa = vec4f_from_scalar(b, dy01_ooa, "dy01_ooa_4f");
+ args->dy20_ooa = vec4f_from_scalar(gallivm, dy20_ooa, "dy20_ooa_4f");
+ args->dy01_ooa = vec4f_from_scalar(gallivm, dy01_ooa, "dy01_ooa_4f");
- args->dx20_ooa = vec4f_from_scalar(b, dx20_ooa, "dx20_ooa_4f");
- args->dx01_ooa = vec4f_from_scalar(b, dx01_ooa, "dx01_ooa_4f");
+ args->dx20_ooa = vec4f_from_scalar(gallivm, dx20_ooa, "dx20_ooa_4f");
+ args->dx01_ooa = vec4f_from_scalar(gallivm, dx01_ooa, "dx01_ooa_4f");
- args->x0_center = vec4f_from_scalar(b, x0_center, "x0_center_4f");
- args->y0_center = vec4f_from_scalar(b, y0_center, "y0_center_4f");
+ args->x0_center = vec4f_from_scalar(gallivm, x0_center, "x0_center_4f");
+ args->y0_center = vec4f_from_scalar(gallivm, y0_center, "y0_center_4f");
}
/**
@@ -617,7 +639,7 @@ init_args(LLVMBuilderRef b,
*
*/
static struct lp_setup_variant *
-generate_setup_variant(struct llvmpipe_screen *screen,
+generate_setup_variant(struct gallivm_state *gallivm,
struct lp_setup_variant_key *key,
struct llvmpipe_context *lp)
{
@@ -628,7 +650,7 @@ generate_setup_variant(struct llvmpipe_screen *screen,
LLVMTypeRef func_type;
LLVMTypeRef arg_types[7];
LLVMBasicBlockRef block;
- LLVMBuilderRef builder;
+ LLVMBuilderRef builder = gallivm->builder;
int64_t t0, t1;
if (0)
@@ -653,19 +675,20 @@ generate_setup_variant(struct llvmpipe_screen *screen,
* the vertices.
*/
- vec4f_type = LLVMVectorType(LLVMFloatType(), 4);
+ vec4f_type = LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4);
arg_types[0] = LLVMPointerType(vec4f_type, 0); /* v0 */
arg_types[1] = LLVMPointerType(vec4f_type, 0); /* v1 */
arg_types[2] = LLVMPointerType(vec4f_type, 0); /* v2 */
- arg_types[3] = LLVMInt32Type(); /* facing */
+ arg_types[3] = LLVMInt32TypeInContext(gallivm->context); /* facing */
arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */
arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */
arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */
- func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
+ func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
+ arg_types, Elements(arg_types), 0);
- variant->function = LLVMAddFunction(screen->module, func_name, func_type);
+ variant->function = LLVMAddFunction(gallivm->module, func_name, func_type);
if (!variant->function)
goto fail;
@@ -690,19 +713,18 @@ generate_setup_variant(struct llvmpipe_screen *screen,
/*
* Function body
*/
- block = LLVMAppendBasicBlock(variant->function, "entry");
- builder = LLVMCreateBuilder();
+ block = LLVMAppendBasicBlockInContext(gallivm->context,
+ variant->function, "entry");
LLVMPositionBuilderAtEnd(builder, block);
set_noalias(builder, variant->function, arg_types, Elements(arg_types));
- init_args(builder, &args, variant);
- emit_tri_coef(builder, &variant->key, &args);
+ init_args(gallivm, &args, variant);
+ emit_tri_coef(gallivm, &variant->key, &args);
- lp_emit_emms(builder);
+ lp_emit_emms(gallivm);
LLVMBuildRetVoid(builder);
- LLVMDisposeBuilder(builder);
- variant->jit_function = finalize_function(screen, builder,
+ variant->jit_function = finalize_function(gallivm, builder,
variant->function);
if (!variant->jit_function)
goto fail;
@@ -722,7 +744,7 @@ fail:
if (variant) {
if (variant->function) {
if (variant->jit_function)
- LLVMFreeMachineCodeForFunction(screen->engine,
+ LLVMFreeMachineCodeForFunction(gallivm->engine,
variant->function);
LLVMDeleteFunction(variant->function);
}
@@ -773,8 +795,6 @@ static void
remove_setup_variant(struct llvmpipe_context *lp,
struct lp_setup_variant *variant)
{
- struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
-
if (gallivm_debug & GALLIVM_DEBUG_IR) {
debug_printf("llvmpipe: del setup_variant #%u total %u\n",
variant->no, lp->nr_setup_variants);
@@ -782,7 +802,7 @@ remove_setup_variant(struct llvmpipe_context *lp,
if (variant->function) {
if (variant->jit_function)
- LLVMFreeMachineCodeForFunction(screen->engine,
+ LLVMFreeMachineCodeForFunction(lp->gallivm->engine,
variant->function);
LLVMDeleteFunction(variant->function);
}
@@ -825,8 +845,6 @@ cull_setup_variants(struct llvmpipe_context *lp)
void
llvmpipe_update_setup(struct llvmpipe_context *lp)
{
- struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
-
struct lp_setup_variant_key *key = &lp->setup_variant.key;
struct lp_setup_variant *variant = NULL;
struct lp_setup_variant_list_item *li;
@@ -849,9 +867,11 @@ llvmpipe_update_setup(struct llvmpipe_context *lp)
cull_setup_variants(lp);
}
- variant = generate_setup_variant(screen, key, lp);
+ variant = generate_setup_variant(lp->gallivm, key, lp);
insert_at_head(&lp->setup_variants_list, &variant->list_item_global);
lp->nr_setup_variants++;
+
+ llvmpipe_variant_count++;
}
lp_setup_set_setup_variant(lp->setup,
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
index fb29423dd3..fffdeb6ccd 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
@@ -33,6 +33,7 @@
#include "lp_state.h"
#include "draw/draw_context.h"
+#include "util/u_inlines.h"
static void *
@@ -80,8 +81,9 @@ llvmpipe_set_vertex_buffers(struct pipe_context *pipe,
assert(count <= PIPE_MAX_ATTRIBS);
- memcpy(llvmpipe->vertex_buffer, buffers, count * sizeof(buffers[0]));
- llvmpipe->num_vertex_buffers = count;
+ util_copy_vertex_buffers(llvmpipe->vertex_buffer,
+ &llvmpipe->num_vertex_buffers,
+ buffers, count);
llvmpipe->dirty |= LP_NEW_VERTEX;
diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c
index 164242eda6..e7e46a628a 100644
--- a/src/gallium/drivers/llvmpipe/lp_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_surface.c
@@ -52,19 +52,23 @@ adjust_to_tile_bounds(unsigned x, unsigned y, unsigned width, unsigned height,
static void
lp_resource_copy(struct pipe_context *pipe,
- struct pipe_resource *dst, struct pipe_subresource subdst,
+ struct pipe_resource *dst, unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
- struct pipe_resource *src, struct pipe_subresource subsrc,
- unsigned srcx, unsigned srcy, unsigned srcz,
- unsigned width, unsigned height)
+ struct pipe_resource *src, unsigned src_level,
+ const struct pipe_box *src_box)
{
- /* XXX what about the dstz/srcz parameters - zslice wasn't used... */
+ /* XXX this used to ignore srcz/dstz
+ * assume it works the same for cube and 3d
+ */
struct llvmpipe_resource *src_tex = llvmpipe_resource(src);
struct llvmpipe_resource *dst_tex = llvmpipe_resource(dst);
const enum pipe_format format = src_tex->base.format;
+ unsigned width = src_box->width;
+ unsigned height = src_box->height;
+ assert(src_box->depth == 1);
llvmpipe_flush_resource(pipe,
- dst, subdst.face, subdst.level,
+ dst, dst_level, dstz,
0, /* flush_flags */
FALSE, /* read_only */
TRUE, /* cpu_access */
@@ -72,7 +76,7 @@ lp_resource_copy(struct pipe_context *pipe,
"blit dest");
llvmpipe_flush_resource(pipe,
- src, subsrc.face, subsrc.level,
+ src, src_level, src_box->z,
0, /* flush_flags */
TRUE, /* read_only */
TRUE, /* cpu_access */
@@ -80,9 +84,10 @@ lp_resource_copy(struct pipe_context *pipe,
"blit src");
/*
- printf("surface copy from %u to %u: %u,%u to %u,%u %u x %u\n",
- src_tex->id, dst_tex->id,
- srcx, srcy, dstx, dsty, width, height);
+ printf("surface copy from %u lvl %u to %u lvl %u: %u,%u,%u to %u,%u,%u %u x %u x %u\n",
+ src_tex->id, src_level, dst_tex->id, dst_level,
+ src_box->x, src_box->y, src_box->z, dstx, dsty, dstz,
+ src_box->width, src_box->height, src_box->depth);
*/
/* set src tiles to linear layout */
@@ -90,12 +95,13 @@ lp_resource_copy(struct pipe_context *pipe,
unsigned tx, ty, tw, th;
unsigned x, y;
- adjust_to_tile_bounds(srcx, srcy, width, height, &tx, &ty, &tw, &th);
+ adjust_to_tile_bounds(src_box->x, src_box->y, width, height,
+ &tx, &ty, &tw, &th);
for (y = 0; y < th; y += TILE_SIZE) {
for (x = 0; x < tw; x += TILE_SIZE) {
(void) llvmpipe_get_texture_tile_linear(src_tex,
- subsrc.face, subsrc.level,
+ src_box->z, src_level,
LP_TEX_USAGE_READ,
tx + x, ty + y);
}
@@ -130,7 +136,7 @@ lp_resource_copy(struct pipe_context *pipe,
usage = LP_TEX_USAGE_READ_WRITE;
(void) llvmpipe_get_texture_tile_linear(dst_tex,
- subdst.face, subdst.level,
+ dstz, dst_level,
usage,
tx + x, ty + y);
}
@@ -140,22 +146,22 @@ lp_resource_copy(struct pipe_context *pipe,
/* copy */
{
const ubyte *src_linear_ptr
- = llvmpipe_get_texture_image_address(src_tex, subsrc.face,
- subsrc.level,
+ = llvmpipe_get_texture_image_address(src_tex, src_box->z,
+ src_level,
LP_TEX_LAYOUT_LINEAR);
ubyte *dst_linear_ptr
- = llvmpipe_get_texture_image_address(dst_tex, subdst.face,
- subdst.level,
+ = llvmpipe_get_texture_image_address(dst_tex, dstz,
+ dst_level,
LP_TEX_LAYOUT_LINEAR);
if (dst_linear_ptr && src_linear_ptr) {
util_copy_rect(dst_linear_ptr, format,
- llvmpipe_resource_stride(&dst_tex->base, subdst.level),
+ llvmpipe_resource_stride(&dst_tex->base, dst_level),
dstx, dsty,
width, height,
src_linear_ptr,
- llvmpipe_resource_stride(&src_tex->base, subsrc.level),
- srcx, srcy);
+ llvmpipe_resource_stride(&src_tex->base, src_level),
+ src_box->x, src_box->y);
}
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h
index 90422e4258..c64f3e149f 100644
--- a/src/gallium/drivers/llvmpipe/lp_test.h
+++ b/src/gallium/drivers/llvmpipe/lp_test.h
@@ -64,13 +64,14 @@ write_tsv_header(FILE *fp);
boolean
-test_some(unsigned verbose, FILE *fp, unsigned long n);
+test_some(struct gallivm_state *gallivm,unsigned verbose, FILE *fp,
+ unsigned long n);
boolean
-test_single(unsigned verbose, FILE *fp);
+test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp);
boolean
-test_all(unsigned verbose, FILE *fp);
+test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp);
#if defined(PIPE_CC_MSVC)
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 8b6b5e1298..b3ca134131 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -163,11 +163,13 @@ dump_blend_type(FILE *fp,
static LLVMValueRef
-add_blend_test(LLVMModuleRef module,
+add_blend_test(struct gallivm_state *gallivm,
const struct pipe_blend_state *blend,
enum vector_mode mode,
struct lp_type type)
{
+ LLVMModuleRef module = gallivm->module;
+ LLVMContextRef context = gallivm->context;
LLVMTypeRef vec_type;
LLVMTypeRef args[4];
LLVMValueRef func;
@@ -179,18 +181,18 @@ add_blend_test(LLVMModuleRef module,
LLVMBuilderRef builder;
const unsigned rt = 0;
- vec_type = lp_build_vec_type(type);
+ vec_type = lp_build_vec_type(gallivm, type);
args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
- func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0));
+ func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidTypeInContext(context), args, 4, 0));
LLVMSetFunctionCallConv(func, LLVMCCallConv);
src_ptr = LLVMGetParam(func, 0);
dst_ptr = LLVMGetParam(func, 1);
const_ptr = LLVMGetParam(func, 2);
res_ptr = LLVMGetParam(func, 3);
- block = LLVMAppendBasicBlock(func, "entry");
- builder = LLVMCreateBuilder();
+ block = LLVMAppendBasicBlockInContext(context, func, "entry");
+ builder = gallivm->builder;
LLVMPositionBuilderAtEnd(builder, block);
if (mode == AoS) {
@@ -203,7 +205,7 @@ add_blend_test(LLVMModuleRef module,
dst = LLVMBuildLoad(builder, dst_ptr, "dst");
con = LLVMBuildLoad(builder, const_ptr, "const");
- res = lp_build_blend_aos(builder, blend, type, rt, src, dst, con, 3);
+ res = lp_build_blend_aos(gallivm, blend, type, rt, src, dst, con, 3);
lp_build_name(res, "res");
@@ -218,7 +220,7 @@ add_blend_test(LLVMModuleRef module,
unsigned i;
for(i = 0; i < 4; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
@@ -227,10 +229,10 @@ add_blend_test(LLVMModuleRef module,
lp_build_name(dst[i], "dst.%c", "rgba"[i]);
}
- lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res);
+ lp_build_blend_soa(gallivm, blend, type, rt, src, dst, con, res);
for(i = 0; i < 4; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
lp_build_name(res[i], "res.%c", "rgba"[i]);
LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
}
@@ -238,7 +240,6 @@ add_blend_test(LLVMModuleRef module,
LLVMBuildRetVoid(builder);;
- LLVMDisposeBuilder(builder);
return func;
}
@@ -465,16 +466,16 @@ compute_blend_ref(const struct pipe_blend_state *blend,
PIPE_ALIGN_STACK
static boolean
-test_one(unsigned verbose,
+test_one(struct gallivm_state *gallivm,
+ unsigned verbose,
FILE *fp,
const struct pipe_blend_state *blend,
enum vector_mode mode,
struct lp_type type)
{
- LLVMModuleRef module = NULL;
+ LLVMModuleRef module = gallivm->module;
LLVMValueRef func = NULL;
- LLVMExecutionEngineRef engine = lp_build_engine;
- LLVMPassManagerRef pass = NULL;
+ LLVMExecutionEngineRef engine = gallivm->engine;
char *error = NULL;
blend_test_ptr_t blend_test_ptr;
boolean success;
@@ -487,9 +488,7 @@ test_one(unsigned verbose,
if(verbose >= 1)
dump_blend_type(stdout, blend, mode, type);
- module = LLVMModuleCreateWithName("test");
-
- func = add_blend_test(module, blend, mode, type);
+ func = add_blend_test(gallivm, blend, mode, type);
if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
LLVMDumpModule(module);
@@ -497,24 +496,6 @@ test_one(unsigned verbose,
}
LLVMDisposeMessage(error);
-#if 0
- pass = LLVMCreatePassManager();
- LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
- /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
- * but there are more on SVN. */
- LLVMAddConstantPropagationPass(pass);
- LLVMAddInstructionCombiningPass(pass);
- LLVMAddPromoteMemoryToRegisterPass(pass);
- LLVMAddGVNPass(pass);
- LLVMAddCFGSimplificationPass(pass);
- LLVMRunPassManager(pass, module);
-#else
- (void)pass;
-#endif
-
- if(verbose >= 2)
- LLVMDumpModule(module);
-
code = LLVMGetPointerToGlobal(engine, func);
blend_test_ptr = voidptr_to_blend_test_ptr_t(code);
@@ -715,9 +696,6 @@ test_one(unsigned verbose,
LLVMFreeMachineCodeForFunction(engine, func);
- if(pass)
- LLVMDisposePassManager(pass);
-
return success;
}
@@ -773,7 +751,7 @@ const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
boolean
-test_all(unsigned verbose, FILE *fp)
+test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
{
const unsigned *rgb_func;
const unsigned *rgb_src_factor;
@@ -809,7 +787,7 @@ test_all(unsigned verbose, FILE *fp)
blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
blend.rt[0].colormask = PIPE_MASK_RGBA;
- if(!test_one(verbose, fp, &blend, mode, *type))
+ if(!test_one(gallivm, verbose, fp, &blend, mode, *type))
success = FALSE;
}
@@ -826,7 +804,8 @@ test_all(unsigned verbose, FILE *fp)
boolean
-test_some(unsigned verbose, FILE *fp, unsigned long n)
+test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
+ unsigned long n)
{
const unsigned *rgb_func;
const unsigned *rgb_src_factor;
@@ -868,7 +847,7 @@ test_some(unsigned verbose, FILE *fp, unsigned long n)
blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
blend.rt[0].colormask = PIPE_MASK_RGBA;
- if(!test_one(verbose, fp, &blend, mode, *type))
+ if(!test_one(gallivm, verbose, fp, &blend, mode, *type))
success = FALSE;
}
@@ -877,7 +856,7 @@ test_some(unsigned verbose, FILE *fp, unsigned long n)
boolean
-test_single(unsigned verbose, FILE *fp)
+test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
{
printf("no test_single()");
return TRUE;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index 3ba42bf11a..f4a2f360c7 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -97,64 +97,65 @@ dump_conv_types(FILE *fp,
static LLVMValueRef
-add_conv_test(LLVMModuleRef module,
+add_conv_test(struct gallivm_state *gallivm,
struct lp_type src_type, unsigned num_srcs,
struct lp_type dst_type, unsigned num_dsts)
{
+ LLVMModuleRef module = gallivm->module;
+ LLVMContextRef context = gallivm->context;
+ LLVMBuilderRef builder = gallivm->builder;
LLVMTypeRef args[2];
LLVMValueRef func;
LLVMValueRef src_ptr;
LLVMValueRef dst_ptr;
LLVMBasicBlockRef block;
- LLVMBuilderRef builder;
LLVMValueRef src[LP_MAX_VECTOR_LENGTH];
LLVMValueRef dst[LP_MAX_VECTOR_LENGTH];
unsigned i;
- args[0] = LLVMPointerType(lp_build_vec_type(src_type), 0);
- args[1] = LLVMPointerType(lp_build_vec_type(dst_type), 0);
+ args[0] = LLVMPointerType(lp_build_vec_type(gallivm, src_type), 0);
+ args[1] = LLVMPointerType(lp_build_vec_type(gallivm, dst_type), 0);
- func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
+ func = LLVMAddFunction(module, "test",
+ LLVMFunctionType(LLVMVoidTypeInContext(context),
+ args, 2, 0));
LLVMSetFunctionCallConv(func, LLVMCCallConv);
src_ptr = LLVMGetParam(func, 0);
dst_ptr = LLVMGetParam(func, 1);
- block = LLVMAppendBasicBlock(func, "entry");
- builder = LLVMCreateBuilder();
+ block = LLVMAppendBasicBlockInContext(context, func, "entry");
LLVMPositionBuilderAtEnd(builder, block);
for(i = 0; i < num_srcs; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
LLVMValueRef ptr = LLVMBuildGEP(builder, src_ptr, &index, 1, "");
src[i] = LLVMBuildLoad(builder, ptr, "");
}
- lp_build_conv(builder, src_type, dst_type, src, num_srcs, dst, num_dsts);
+ lp_build_conv(gallivm, src_type, dst_type, src, num_srcs, dst, num_dsts);
for(i = 0; i < num_dsts; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
LLVMValueRef ptr = LLVMBuildGEP(builder, dst_ptr, &index, 1, "");
LLVMBuildStore(builder, dst[i], ptr);
}
LLVMBuildRetVoid(builder);;
- LLVMDisposeBuilder(builder);
return func;
}
PIPE_ALIGN_STACK
static boolean
-test_one(unsigned verbose,
+test_one(struct gallivm_state *gallivm, unsigned verbose,
FILE *fp,
struct lp_type src_type,
struct lp_type dst_type)
{
- LLVMModuleRef module = NULL;
+ LLVMModuleRef module = gallivm->module;
+ LLVMExecutionEngineRef engine = gallivm->engine;
LLVMValueRef func = NULL;
- LLVMExecutionEngineRef engine = lp_build_engine;
- LLVMPassManagerRef pass = NULL;
char *error = NULL;
conv_test_ptr_t conv_test_ptr;
boolean success;
@@ -193,9 +194,7 @@ test_one(unsigned verbose,
eps = MAX2(lp_const_eps(src_type), lp_const_eps(dst_type));
- module = LLVMModuleCreateWithName("test");
-
- func = add_conv_test(module, src_type, num_srcs, dst_type, num_dsts);
+ func = add_conv_test(gallivm, src_type, num_srcs, dst_type, num_dsts);
if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
LLVMDumpModule(module);
@@ -203,21 +202,6 @@ test_one(unsigned verbose,
}
LLVMDisposeMessage(error);
-#if 0
- pass = LLVMCreatePassManager();
- LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
- /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
- * but there are more on SVN. */
- LLVMAddConstantPropagationPass(pass);
- LLVMAddInstructionCombiningPass(pass);
- LLVMAddPromoteMemoryToRegisterPass(pass);
- LLVMAddGVNPass(pass);
- LLVMAddCFGSimplificationPass(pass);
- LLVMRunPassManager(pass, module);
-#else
- (void)pass;
-#endif
-
if(verbose >= 2)
LLVMDumpModule(module);
@@ -342,9 +326,6 @@ test_one(unsigned verbose,
LLVMFreeMachineCodeForFunction(engine, func);
- if(pass)
- LLVMDisposePassManager(pass);
-
return success;
}
@@ -390,7 +371,7 @@ const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]);
boolean
-test_all(unsigned verbose, FILE *fp)
+test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
{
const struct lp_type *src_type;
const struct lp_type *dst_type;
@@ -405,7 +386,7 @@ test_all(unsigned verbose, FILE *fp)
if(src_type->norm != dst_type->norm)
continue;
- if(!test_one(verbose, fp, *src_type, *dst_type))
+ if(!test_one(gallivm, verbose, fp, *src_type, *dst_type))
success = FALSE;
}
@@ -416,7 +397,8 @@ test_all(unsigned verbose, FILE *fp)
boolean
-test_some(unsigned verbose, FILE *fp, unsigned long n)
+test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
+ unsigned long n)
{
const struct lp_type *src_type;
const struct lp_type *dst_type;
@@ -430,7 +412,7 @@ test_some(unsigned verbose, FILE *fp, unsigned long n)
dst_type = &conv_types[rand() % num_types];
} while (src_type == dst_type || src_type->norm != dst_type->norm);
- if(!test_one(verbose, fp, *src_type, *dst_type))
+ if(!test_one(gallivm, verbose, fp, *src_type, *dst_type))
success = FALSE;
}
@@ -439,7 +421,7 @@ test_some(unsigned verbose, FILE *fp, unsigned long n)
boolean
-test_single(unsigned verbose, FILE *fp)
+test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
{
/* float, fixed, sign, norm, width, len */
struct lp_type f32x4_type =
@@ -449,7 +431,7 @@ test_single(unsigned verbose, FILE *fp)
boolean success;
- success = test_one(verbose, fp, f32x4_type, ub8x4_type);
+ success = test_one(gallivm, verbose, fp, f32x4_type, ub8x4_type);
return success;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 2855d7cea4..4152ca6cf6 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -30,13 +30,6 @@
#include <stdio.h>
#include <float.h>
-#include "gallivm/lp_bld.h"
-#include "gallivm/lp_bld_debug.h"
-#include "gallivm/lp_bld_init.h"
-#include <llvm-c/Analysis.h>
-#include <llvm-c/Target.h>
-#include <llvm-c/Transforms/Scalar.h>
-
#include "util/u_memory.h"
#include "util/u_pointer.h"
#include "util/u_string.h"
@@ -44,7 +37,11 @@
#include "util/u_format_tests.h"
#include "util/u_format_s3tc.h"
+#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_debug.h"
#include "gallivm/lp_bld_format.h"
+#include "gallivm/lp_bld_init.h"
+
#include "lp_test.h"
@@ -78,56 +75,57 @@ typedef void
static LLVMValueRef
-add_fetch_rgba_test(unsigned verbose,
+add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
const struct util_format_description *desc,
struct lp_type type)
{
char name[256];
+ LLVMContextRef context = gallivm->context;
+ LLVMModuleRef module = gallivm->module;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMPassManagerRef passmgr = gallivm->passmgr;
LLVMTypeRef args[4];
LLVMValueRef func;
LLVMValueRef packed_ptr;
- LLVMValueRef offset = LLVMConstNull(LLVMInt32Type());
+ LLVMValueRef offset = LLVMConstNull(LLVMInt32TypeInContext(context));
LLVMValueRef rgba_ptr;
LLVMValueRef i;
LLVMValueRef j;
LLVMBasicBlockRef block;
- LLVMBuilderRef builder;
LLVMValueRef rgba;
util_snprintf(name, sizeof name, "fetch_%s_%s", desc->short_name,
type.floating ? "float" : "unorm8");
- args[0] = LLVMPointerType(lp_build_vec_type(type), 0);
- args[1] = LLVMPointerType(LLVMInt8Type(), 0);
- args[3] = args[2] = LLVMInt32Type();
+ args[0] = LLVMPointerType(lp_build_vec_type(gallivm, type), 0);
+ args[1] = LLVMPointerType(LLVMInt8TypeInContext(context), 0);
+ args[3] = args[2] = LLVMInt32TypeInContext(context);
- func = LLVMAddFunction(lp_build_module, name,
- LLVMFunctionType(LLVMVoidType(), args, Elements(args), 0));
+ func = LLVMAddFunction(module, name,
+ LLVMFunctionType(LLVMVoidTypeInContext(context),
+ args, Elements(args), 0));
LLVMSetFunctionCallConv(func, LLVMCCallConv);
rgba_ptr = LLVMGetParam(func, 0);
packed_ptr = LLVMGetParam(func, 1);
i = LLVMGetParam(func, 2);
j = LLVMGetParam(func, 3);
- block = LLVMAppendBasicBlock(func, "entry");
- builder = LLVMCreateBuilder();
+ block = LLVMAppendBasicBlockInContext(context, func, "entry");
LLVMPositionBuilderAtEnd(builder, block);
- rgba = lp_build_fetch_rgba_aos(builder, desc, type,
+ rgba = lp_build_fetch_rgba_aos(gallivm, desc, type,
packed_ptr, offset, i, j);
LLVMBuildStore(builder, rgba, rgba_ptr);
LLVMBuildRetVoid(builder);
- LLVMDisposeBuilder(builder);
-
if (LLVMVerifyFunction(func, LLVMPrintMessageAction)) {
LLVMDumpValue(func);
abort();
}
- LLVMRunFunctionPassManager(lp_build_pass, func);
+ LLVMRunFunctionPassManager(passmgr, func);
if (verbose >= 1) {
LLVMDumpValue(func);
@@ -139,10 +137,11 @@ add_fetch_rgba_test(unsigned verbose,
PIPE_ALIGN_STACK
static boolean
-test_format_float(unsigned verbose, FILE *fp,
+test_format_float(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
const struct util_format_description *desc)
{
LLVMValueRef fetch = NULL;
+ LLVMExecutionEngineRef engine = gallivm->engine;
fetch_ptr_t fetch_ptr;
PIPE_ALIGN_VAR(16) float unpacked[4];
boolean first = TRUE;
@@ -150,9 +149,9 @@ test_format_float(unsigned verbose, FILE *fp,
unsigned i, j, k, l;
void *f;
- fetch = add_fetch_rgba_test(verbose, desc, lp_float32_vec4_type());
+ fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_float32_vec4_type());
- f = LLVMGetPointerToGlobal(lp_build_engine, fetch);
+ f = LLVMGetPointerToGlobal(engine, fetch);
fetch_ptr = (fetch_ptr_t) pointer_to_func(f);
if (verbose >= 2) {
@@ -208,7 +207,7 @@ test_format_float(unsigned verbose, FILE *fp,
}
}
- LLVMFreeMachineCodeForFunction(lp_build_engine, fetch);
+ LLVMFreeMachineCodeForFunction(engine, fetch);
LLVMDeleteFunction(fetch);
if(fp)
@@ -220,7 +219,8 @@ test_format_float(unsigned verbose, FILE *fp,
PIPE_ALIGN_STACK
static boolean
-test_format_unorm8(unsigned verbose, FILE *fp,
+test_format_unorm8(struct gallivm_state *gallivm,
+ unsigned verbose, FILE *fp,
const struct util_format_description *desc)
{
LLVMValueRef fetch = NULL;
@@ -231,9 +231,9 @@ test_format_unorm8(unsigned verbose, FILE *fp,
unsigned i, j, k, l;
void *f;
- fetch = add_fetch_rgba_test(verbose, desc, lp_unorm8_vec4_type());
+ fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_unorm8_vec4_type());
- f = LLVMGetPointerToGlobal(lp_build_engine, fetch);
+ f = LLVMGetPointerToGlobal(gallivm->engine, fetch);
fetch_ptr = (fetch_ptr_t) pointer_to_func(f);
if (verbose >= 2) {
@@ -290,7 +290,7 @@ test_format_unorm8(unsigned verbose, FILE *fp,
if (!success)
LLVMDumpValue(fetch);
- LLVMFreeMachineCodeForFunction(lp_build_engine, fetch);
+ LLVMFreeMachineCodeForFunction(gallivm->engine, fetch);
LLVMDeleteFunction(fetch);
if(fp)
@@ -303,16 +303,17 @@ test_format_unorm8(unsigned verbose, FILE *fp,
static boolean
-test_one(unsigned verbose, FILE *fp,
+test_one(struct gallivm_state *gallivm,
+ unsigned verbose, FILE *fp,
const struct util_format_description *format_desc)
{
boolean success = TRUE;
- if (!test_format_float(verbose, fp, format_desc)) {
+ if (!test_format_float(gallivm, verbose, fp, format_desc)) {
success = FALSE;
}
- if (!test_format_unorm8(verbose, fp, format_desc)) {
+ if (!test_format_unorm8(gallivm, verbose, fp, format_desc)) {
success = FALSE;
}
@@ -321,7 +322,7 @@ test_one(unsigned verbose, FILE *fp,
boolean
-test_all(unsigned verbose, FILE *fp)
+test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
{
enum pipe_format format;
boolean success = TRUE;
@@ -349,7 +350,7 @@ test_all(unsigned verbose, FILE *fp)
continue;
}
- if (!test_one(verbose, fp, format_desc)) {
+ if (!test_one(gallivm, verbose, fp, format_desc)) {
success = FALSE;
}
}
@@ -359,14 +360,15 @@ test_all(unsigned verbose, FILE *fp)
boolean
-test_some(unsigned verbose, FILE *fp, unsigned long n)
+test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
+ unsigned long n)
{
- return test_all(verbose, fp);
+ return test_all(gallivm, verbose, fp);
}
boolean
-test_single(unsigned verbose, FILE *fp)
+test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
{
printf("no test_single()");
return TRUE;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c
index 7a0d06ae2c..149ee6f125 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_main.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -380,6 +380,7 @@ int main(int argc, char **argv)
unsigned i;
boolean success;
boolean single = FALSE;
+ struct gallivm_state *gallivm;
for(i = 1; i < argc; ++i) {
if(strcmp(argv[i], "-v") == 0)
@@ -394,21 +395,23 @@ int main(int argc, char **argv)
lp_build_init();
+ gallivm = gallivm_create();
+
util_cpu_detect();
if(fp) {
/* Warm up the caches */
- test_some(0, NULL, 100);
+ test_some(gallivm, 0, NULL, 100);
write_tsv_header(fp);
}
if (single)
- success = test_single(verbose, fp);
+ success = test_single(gallivm, verbose, fp);
else if (n)
- success = test_some(verbose, fp, n);
+ success = test_some(gallivm, verbose, fp, n);
else
- success = test_all(verbose, fp);
+ success = test_all(gallivm, verbose, fp);
if(fp)
fclose(fp);
diff --git a/src/gallium/drivers/llvmpipe/lp_test_printf.c b/src/gallium/drivers/llvmpipe/lp_test_printf.c
index 4653f30e39..620cdb57c1 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_printf.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_printf.c
@@ -35,11 +35,6 @@
#include "gallivm/lp_bld_assert.h"
#include "gallivm/lp_bld_printf.h"
-#include <llvm-c/Analysis.h>
-#include <llvm-c/ExecutionEngine.h>
-#include <llvm-c/Target.h>
-#include <llvm-c/Transforms/Scalar.h>
-
#include "lp_test.h"
@@ -63,48 +58,45 @@ typedef void (*test_printf_t)(int i);
static LLVMValueRef
-add_printf_test(LLVMModuleRef module)
+add_printf_test(struct gallivm_state *gallivm)
{
- LLVMTypeRef args[1] = { LLVMIntType(32) };
- LLVMValueRef func = LLVMAddFunction(module, "test_printf", LLVMFunctionType(LLVMVoidType(), args, 1, 0));
- LLVMBuilderRef builder = LLVMCreateBuilder();
- LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry");
+ LLVMModuleRef module = gallivm->module;
+ LLVMTypeRef args[1] = { LLVMIntTypeInContext(gallivm->context, 32) };
+ LLVMValueRef func = LLVMAddFunction(module, "test_printf", LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), args, 1, 0));
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(gallivm->context, func, "entry");
LLVMSetFunctionCallConv(func, LLVMCCallConv);
LLVMPositionBuilderAtEnd(builder, block);
- lp_build_printf(builder, "hello, world\n");
- lp_build_printf(builder, "print 5 6: %d %d\n", LLVMConstInt(LLVMInt32Type(), 5, 0),
- LLVMConstInt(LLVMInt32Type(), 6, 0));
+ lp_build_printf(gallivm, "hello, world\n");
+ lp_build_printf(gallivm, "print 5 6: %d %d\n", LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), 5, 0),
+ LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), 6, 0));
/* Also test lp_build_assert(). This should not fail. */
- lp_build_assert(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), "assert(1)");
+ lp_build_assert(gallivm, LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), 1, 0), "assert(1)");
LLVMBuildRetVoid(builder);
- LLVMDisposeBuilder(builder);
+
return func;
}
PIPE_ALIGN_STACK
static boolean
-test_printf(unsigned verbose, FILE *fp, const struct printf_test_case *testcase)
+test_printf(struct gallivm_state *gallivm,
+ unsigned verbose, FILE *fp,
+ const struct printf_test_case *testcase)
{
- LLVMModuleRef module = NULL;
- LLVMValueRef test = NULL;
- LLVMExecutionEngineRef engine = NULL;
- LLVMModuleProviderRef provider = NULL;
- LLVMPassManagerRef pass = NULL;
+ LLVMExecutionEngineRef engine = gallivm->engine;
+ LLVMModuleRef module = gallivm->module;
+ LLVMValueRef test;
char *error = NULL;
- test_printf_t test_printf;
- float unpacked[4];
- unsigned packed;
+ test_printf_t test_printf_func;
boolean success = TRUE;
void *code;
- module = LLVMModuleCreateWithName("test");
-
- test = add_printf_test(module);
+ test = add_printf_test(gallivm);
if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
LLVMDumpModule(module);
@@ -112,74 +104,40 @@ test_printf(unsigned verbose, FILE *fp, const struct printf_test_case *testcase)
}
LLVMDisposeMessage(error);
- provider = LLVMCreateModuleProviderForExistingModule(module);
-#if 0
- if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
- fprintf(stderr, "%s\n", error);
- LLVMDisposeMessage(error);
- abort();
- }
-#else
- (void) provider;
- engine = lp_build_engine;
-#endif
-
-#if 0
- pass = LLVMCreatePassManager();
- LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
- /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
- * but there are more on SVN. */
- LLVMAddConstantPropagationPass(pass);
- LLVMAddInstructionCombiningPass(pass);
- LLVMAddPromoteMemoryToRegisterPass(pass);
- LLVMAddGVNPass(pass);
- LLVMAddCFGSimplificationPass(pass);
- LLVMRunPassManager(pass, module);
-#else
- (void)pass;
-#endif
-
code = LLVMGetPointerToGlobal(engine, test);
- test_printf = (test_printf_t)pointer_to_func(code);
-
- memset(unpacked, 0, sizeof unpacked);
- packed = 0;
-
+ test_printf_func = (test_printf_t) pointer_to_func(code);
// LLVMDumpModule(module);
- test_printf(0);
+ test_printf_func(0);
LLVMFreeMachineCodeForFunction(engine, test);
- LLVMDisposeExecutionEngine(engine);
- if(pass)
- LLVMDisposePassManager(pass);
-
return success;
}
boolean
-test_all(unsigned verbose, FILE *fp)
+test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
{
boolean success = TRUE;
- test_printf(verbose, fp, NULL);
+ test_printf(gallivm, verbose, fp, NULL);
return success;
}
boolean
-test_some(unsigned verbose, FILE *fp, unsigned long n)
+test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
+ unsigned long n)
{
- return test_all(verbose, fp);
+ return test_all(gallivm, verbose, fp);
}
boolean
-test_single(unsigned verbose, FILE *fp)
+test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
{
printf("no test_single()");
return TRUE;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c
index 816518e508..4edee4af12 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_round.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_round.c
@@ -34,11 +34,6 @@
#include "gallivm/lp_bld_init.h"
#include "gallivm/lp_bld_arit.h"
-#include <llvm-c/Analysis.h>
-#include <llvm-c/ExecutionEngine.h>
-#include <llvm-c/Target.h>
-#include <llvm-c/Transforms/Scalar.h>
-
#include "lp_test.h"
@@ -64,18 +59,21 @@ typedef LLVMValueRef (*lp_func_t)(struct lp_build_context *, LLVMValueRef);
static LLVMValueRef
-add_test(LLVMModuleRef module, const char *name, lp_func_t lp_func)
+add_test(struct gallivm_state *gallivm, const char *name, lp_func_t lp_func)
{
- LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatType(), 4);
+ LLVMModuleRef module = gallivm->module;
+ LLVMContextRef context = gallivm->context;
+ LLVMBuilderRef builder = gallivm->builder;
+
+ LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatTypeInContext(context), 4);
LLVMTypeRef args[1] = { v4sf };
LLVMValueRef func = LLVMAddFunction(module, name, LLVMFunctionType(v4sf, args, 1, 0));
LLVMValueRef arg1 = LLVMGetParam(func, 0);
- LLVMBuilderRef builder = LLVMCreateBuilder();
- LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry");
+ LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry");
LLVMValueRef ret;
struct lp_build_context bld;
- lp_build_context_init(&bld, builder, lp_float32_vec4_type());
+ lp_build_context_init(&bld, gallivm, lp_float32_vec4_type());
LLVMSetFunctionCallConv(func, LLVMCCallConv);
@@ -84,7 +82,7 @@ add_test(LLVMModuleRef module, const char *name, lp_func_t lp_func)
ret = lp_func(&bld, arg1);
LLVMBuildRet(builder, ret);
- LLVMDisposeBuilder(builder);
+
return func;
}
@@ -117,12 +115,11 @@ compare(v4sf x, v4sf y)
PIPE_ALIGN_STACK
static boolean
-test_round(unsigned verbose, FILE *fp)
+test_round(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
{
- LLVMModuleRef module = NULL;
+ LLVMModuleRef module = gallivm->module;
LLVMValueRef test_round = NULL, test_trunc, test_floor, test_ceil;
- LLVMExecutionEngineRef engine = lp_build_engine;
- LLVMPassManagerRef pass = NULL;
+ LLVMExecutionEngineRef engine = gallivm->engine;
char *error = NULL;
test_round_t round_func, trunc_func, floor_func, ceil_func;
float unpacked[4];
@@ -130,12 +127,10 @@ test_round(unsigned verbose, FILE *fp)
boolean success = TRUE;
int i;
- module = LLVMModuleCreateWithName("test");
-
- test_round = add_test(module, "round", lp_build_round);
- test_trunc = add_test(module, "trunc", lp_build_trunc);
- test_floor = add_test(module, "floor", lp_build_floor);
- test_ceil = add_test(module, "ceil", lp_build_ceil);
+ test_round = add_test(gallivm, "round", lp_build_round);
+ test_trunc = add_test(gallivm, "trunc", lp_build_trunc);
+ test_floor = add_test(gallivm, "floor", lp_build_floor);
+ test_ceil = add_test(gallivm, "ceil", lp_build_ceil);
if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
printf("LLVMVerifyModule: %s\n", error);
@@ -144,21 +139,6 @@ test_round(unsigned verbose, FILE *fp)
}
LLVMDisposeMessage(error);
-#if 0
- pass = LLVMCreatePassManager();
- LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
- /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
- * but there are more on SVN. */
- LLVMAddConstantPropagationPass(pass);
- LLVMAddInstructionCombiningPass(pass);
- LLVMAddPromoteMemoryToRegisterPass(pass);
- LLVMAddGVNPass(pass);
- LLVMAddCFGSimplificationPass(pass);
- LLVMRunPassManager(pass, module);
-#else
- (void)pass;
-#endif
-
round_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_round));
trunc_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_trunc));
floor_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_floor));
@@ -229,17 +209,13 @@ test_round(unsigned verbose, FILE *fp)
LLVMFreeMachineCodeForFunction(engine, test_floor);
LLVMFreeMachineCodeForFunction(engine, test_ceil);
- LLVMDisposeExecutionEngine(engine);
- if(pass)
- LLVMDisposePassManager(pass);
-
return success;
}
#else /* !PIPE_ARCH_SSE */
static boolean
-test_round(unsigned verbose, FILE *fp)
+test_round(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
{
return TRUE;
}
@@ -248,20 +224,21 @@ test_round(unsigned verbose, FILE *fp)
boolean
-test_all(unsigned verbose, FILE *fp)
+test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
{
- return test_round(verbose, fp);
+ return test_round(gallivm, verbose, fp);
}
boolean
-test_some(unsigned verbose, FILE *fp, unsigned long n)
+test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
+ unsigned long n)
{
- return test_all(verbose, fp);
+ return test_all(gallivm, verbose, fp);
}
boolean
-test_single(unsigned verbose, FILE *fp)
+test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
{
printf("no test_single()");
return TRUE;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c
index 79939b1a39..066d633d44 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c
@@ -29,15 +29,11 @@
#include <stdlib.h>
#include <stdio.h>
+#include "util/u_pointer.h"
+
#include "gallivm/lp_bld.h"
#include "gallivm/lp_bld_init.h"
#include "gallivm/lp_bld_arit.h"
-#include "util/u_pointer.h"
-
-#include <llvm-c/Analysis.h>
-#include <llvm-c/ExecutionEngine.h>
-#include <llvm-c/Target.h>
-#include <llvm-c/Transforms/Scalar.h>
#include "lp_test.h"
@@ -61,25 +57,25 @@ write_tsv_header(FILE *fp)
typedef __m128 (*test_sincos_t)(__m128);
static LLVMValueRef
-add_sincos_test(LLVMModuleRef module, boolean sin)
+add_sincos_test(struct gallivm_state *gallivm, LLVMModuleRef module,
+ LLVMContextRef context, boolean sin)
{
- LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatType(), 4);
+ LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatTypeInContext(context), 4);
LLVMTypeRef args[1] = { v4sf };
LLVMValueRef func = LLVMAddFunction(module, "sincos", LLVMFunctionType(v4sf, args, 1, 0));
LLVMValueRef arg1 = LLVMGetParam(func, 0);
- LLVMBuilderRef builder = LLVMCreateBuilder();
- LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry");
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry");
LLVMValueRef ret;
struct lp_build_context bld;
- lp_build_context_init(&bld, builder, lp_float32_vec4_type());
+ lp_build_context_init(&bld, gallivm, lp_float32_vec4_type());
LLVMSetFunctionCallConv(func, LLVMCCallConv);
LLVMPositionBuilderAtEnd(builder, block);
ret = sin ? lp_build_sin(&bld, arg1) : lp_build_cos(&bld, arg1);
LLVMBuildRet(builder, ret);
- LLVMDisposeBuilder(builder);
return func;
}
@@ -95,22 +91,20 @@ printv(char* string, v4sf value)
PIPE_ALIGN_STACK
static boolean
-test_sincos(unsigned verbose, FILE *fp)
+test_sincos(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
{
- LLVMModuleRef module = NULL;
+ LLVMModuleRef module = gallivm->module;
LLVMValueRef test_sin = NULL, test_cos = NULL;
- LLVMExecutionEngineRef engine = lp_build_engine;
- LLVMPassManagerRef pass = NULL;
+ LLVMExecutionEngineRef engine = gallivm->engine;
+ LLVMContextRef context = gallivm->context;
char *error = NULL;
test_sincos_t sin_func;
test_sincos_t cos_func;
float unpacked[4];
boolean success = TRUE;
- module = LLVMModuleCreateWithName("test");
-
- test_sin = add_sincos_test(module, TRUE);
- test_cos = add_sincos_test(module, FALSE);
+ test_sin = add_sincos_test(gallivm, module, context, TRUE);
+ test_cos = add_sincos_test(gallivm, module, context,FALSE);
if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
printf("LLVMVerifyModule: %s\n", error);
@@ -119,21 +113,6 @@ test_sincos(unsigned verbose, FILE *fp)
}
LLVMDisposeMessage(error);
-#if 0
- pass = LLVMCreatePassManager();
- LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
- /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
- * but there are more on SVN. */
- LLVMAddConstantPropagationPass(pass);
- LLVMAddInstructionCombiningPass(pass);
- LLVMAddPromoteMemoryToRegisterPass(pass);
- LLVMAddGVNPass(pass);
- LLVMAddCFGSimplificationPass(pass);
- LLVMRunPassManager(pass, module);
-#else
- (void)pass;
-#endif
-
sin_func = (test_sincos_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_sin));
cos_func = (test_sincos_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_cos));
@@ -152,16 +131,13 @@ test_sincos(unsigned verbose, FILE *fp)
LLVMFreeMachineCodeForFunction(engine, test_sin);
LLVMFreeMachineCodeForFunction(engine, test_cos);
- if(pass)
- LLVMDisposePassManager(pass);
-
return success;
}
#else /* !PIPE_ARCH_SSE */
static boolean
-test_sincos(unsigned verbose, FILE *fp)
+test_sincos(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
{
return TRUE;
}
@@ -170,24 +146,25 @@ test_sincos(unsigned verbose, FILE *fp)
boolean
-test_all(unsigned verbose, FILE *fp)
+test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
{
boolean success = TRUE;
- test_sincos(verbose, fp);
+ test_sincos(gallivm, verbose, fp);
return success;
}
boolean
-test_some(unsigned verbose, FILE *fp, unsigned long n)
+test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
+ unsigned long n)
{
- return test_all(verbose, fp);
+ return test_all(gallivm, verbose, fp);
}
boolean
-test_single(unsigned verbose, FILE *fp)
+test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
{
printf("no test_single()");
return TRUE;
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
index f417fc8a9e..ed4282937f 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
@@ -43,6 +43,7 @@
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_const.h"
#include "gallivm/lp_bld_type.h"
#include "gallivm/lp_bld_sample.h"
#include "gallivm/lp_bld_tgsi.h"
@@ -89,7 +90,7 @@ struct lp_llvm_sampler_soa
*/
static LLVMValueRef
lp_llvm_texture_member(const struct lp_sampler_dynamic_state *base,
- LLVMBuilderRef builder,
+ struct gallivm_state *gallivm,
unsigned unit,
unsigned member_index,
const char *member_name,
@@ -97,6 +98,7 @@ lp_llvm_texture_member(const struct lp_sampler_dynamic_state *base,
{
struct llvmpipe_sampler_dynamic_state *state =
(struct llvmpipe_sampler_dynamic_state *)base;
+ LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef indices[4];
LLVMValueRef ptr;
LLVMValueRef res;
@@ -104,13 +106,13 @@ lp_llvm_texture_member(const struct lp_sampler_dynamic_state *base,
assert(unit < PIPE_MAX_SAMPLERS);
/* context[0] */
- indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indices[0] = lp_build_const_int32(gallivm, 0);
/* context[0].textures */
- indices[1] = LLVMConstInt(LLVMInt32Type(), LP_JIT_CTX_TEXTURES, 0);
+ indices[1] = lp_build_const_int32(gallivm, LP_JIT_CTX_TEXTURES);
/* context[0].textures[unit] */
- indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0);
+ indices[2] = lp_build_const_int32(gallivm, unit);
/* context[0].textures[unit].member */
- indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0);
+ indices[3] = lp_build_const_int32(gallivm, member_index);
ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), "");
@@ -137,10 +139,10 @@ lp_llvm_texture_member(const struct lp_sampler_dynamic_state *base,
#define LP_LLVM_TEXTURE_MEMBER(_name, _index, _emit_load) \
static LLVMValueRef \
lp_llvm_texture_##_name( const struct lp_sampler_dynamic_state *base, \
- LLVMBuilderRef builder, \
+ struct gallivm_state *gallivm, \
unsigned unit) \
{ \
- return lp_llvm_texture_member(base, builder, unit, _index, #_name, _emit_load ); \
+ return lp_llvm_texture_member(base, gallivm, unit, _index, #_name, _emit_load ); \
}
@@ -170,7 +172,7 @@ lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
*/
static void
lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
- LLVMBuilderRef builder,
+ struct gallivm_state *gallivm,
struct lp_type type,
unsigned unit,
unsigned num_coords,
@@ -186,11 +188,11 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
assert(unit < PIPE_MAX_SAMPLERS);
if (LP_PERF & PERF_NO_TEX) {
- lp_build_sample_nop(type, texel);
+ lp_build_sample_nop(gallivm, type, texel);
return;
}
- lp_build_sample_soa(builder,
+ lp_build_sample_soa(gallivm,
&sampler->dynamic_state.static_state[unit],
&sampler->dynamic_state.base,
type,
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index a4b9f2590a..9753da5e57 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -49,6 +49,7 @@
#include "lp_tile_image.h"
#include "lp_texture.h"
#include "lp_setup.h"
+#include "lp_state.h"
#include "state_tracker/sw_winsys.h"
@@ -242,6 +243,7 @@ llvmpipe_resource_create(struct pipe_screen *_screen,
/* other data (vertex buffer, const buffer, etc) */
const enum pipe_format format = templat->format;
const uint w = templat->width0 / util_format_get_blockheight(format);
+ /* XXX buffers should only have one dimension, those values should be 1 */
const uint h = templat->height0 / util_format_get_blockwidth(format);
const uint d = templat->depth0;
const uint bpp = util_format_get_blocksize(format);
@@ -329,17 +331,16 @@ llvmpipe_resource_destroy(struct pipe_screen *pscreen,
*/
void *
llvmpipe_resource_map(struct pipe_resource *resource,
- unsigned face,
- unsigned level,
- unsigned zslice,
+ unsigned level,
+ unsigned layer,
enum lp_texture_usage tex_usage,
enum lp_texture_layout layout)
{
struct llvmpipe_resource *lpr = llvmpipe_resource(resource);
uint8_t *map;
- assert(face < 6);
assert(level < LP_MAX_TEXTURE_LEVELS);
+ assert(layer < (u_minify(resource->depth0, level) + resource->array_size - 1));
assert(tex_usage == LP_TEX_USAGE_READ ||
tex_usage == LP_TEX_USAGE_READ_WRITE ||
@@ -363,9 +364,8 @@ llvmpipe_resource_map(struct pipe_resource *resource,
dt_usage = PIPE_TRANSFER_READ_WRITE;
}
- assert(face == 0);
assert(level == 0);
- assert(zslice == 0);
+ assert(layer == 0);
/* FIXME: keep map count? */
map = winsys->displaytarget_map(winsys, lpr->dt, dt_usage);
@@ -381,15 +381,8 @@ llvmpipe_resource_map(struct pipe_resource *resource,
return map2;
}
else if (resource_is_texture(resource)) {
- /* regular texture */
- if (resource->target != PIPE_TEXTURE_CUBE) {
- assert(face == 0);
- }
- if (resource->target != PIPE_TEXTURE_3D) {
- assert(zslice == 0);
- }
- map = llvmpipe_get_texture_image(lpr, face + zslice, level,
+ map = llvmpipe_get_texture_image(lpr, layer, level,
tex_usage, layout);
return map;
}
@@ -404,9 +397,8 @@ llvmpipe_resource_map(struct pipe_resource *resource,
*/
void
llvmpipe_resource_unmap(struct pipe_resource *resource,
- unsigned face,
unsigned level,
- unsigned zslice)
+ unsigned layer)
{
struct llvmpipe_resource *lpr = llvmpipe_resource(resource);
@@ -415,12 +407,11 @@ llvmpipe_resource_unmap(struct pipe_resource *resource,
struct llvmpipe_screen *lp_screen = llvmpipe_screen(resource->screen);
struct sw_winsys *winsys = lp_screen->winsys;
- assert(face == 0);
assert(level == 0);
- assert(zslice == 0);
+ assert(layer == 0);
/* make sure linear image is up to date */
- (void) llvmpipe_get_texture_image(lpr, face + zslice, level,
+ (void) llvmpipe_get_texture_image(lpr, layer, level,
LP_TEX_USAGE_READ,
LP_TEX_LAYOUT_LINEAR);
@@ -520,34 +511,35 @@ llvmpipe_resource_get_handle(struct pipe_screen *screen,
static struct pipe_surface *
-llvmpipe_get_tex_surface(struct pipe_screen *screen,
- struct pipe_resource *pt,
- unsigned face, unsigned level, unsigned zslice,
- unsigned usage)
+llvmpipe_create_surface(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_surface *surf_tmpl)
{
struct pipe_surface *ps;
- assert(level <= pt->last_level);
+ assert(surf_tmpl->u.tex.level <= pt->last_level);
ps = CALLOC_STRUCT(pipe_surface);
if (ps) {
pipe_reference_init(&ps->reference, 1);
pipe_resource_reference(&ps->texture, pt);
- ps->format = pt->format;
- ps->width = u_minify(pt->width0, level);
- ps->height = u_minify(pt->height0, level);
- ps->usage = usage;
-
- ps->face = face;
- ps->level = level;
- ps->zslice = zslice;
+ ps->context = pipe;
+ ps->format = surf_tmpl->format;
+ ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level);
+ ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level);
+ ps->usage = surf_tmpl->usage;
+
+ ps->u.tex.level = surf_tmpl->u.tex.level;
+ ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+ ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
}
return ps;
}
static void
-llvmpipe_tex_surface_destroy(struct pipe_surface *surf)
+llvmpipe_surface_destroy(struct pipe_context *pipe,
+ struct pipe_surface *surf)
{
/* Effectively do the texture_update work here - if texture images
* needed post-processing to put them into hardware layout, this is
@@ -561,16 +553,17 @@ llvmpipe_tex_surface_destroy(struct pipe_surface *surf)
static struct pipe_transfer *
llvmpipe_get_transfer(struct pipe_context *pipe,
- struct pipe_resource *resource,
- struct pipe_subresource sr,
- unsigned usage,
- const struct pipe_box *box)
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
struct llvmpipe_resource *lprex = llvmpipe_resource(resource);
struct llvmpipe_transfer *lpr;
assert(resource);
- assert(sr.level <= resource->last_level);
+ assert(level <= resource->last_level);
/*
* Transfers, like other pipe operations, must happen in order, so flush the
@@ -580,7 +573,8 @@ llvmpipe_get_transfer(struct pipe_context *pipe,
boolean read_only = !(usage & PIPE_TRANSFER_WRITE);
boolean do_not_block = !!(usage & PIPE_TRANSFER_DONTBLOCK);
if (!llvmpipe_flush_resource(pipe, resource,
- sr.face, sr.level,
+ level,
+ box->depth > 1 ? -1 : box->z,
0, /* flush_flags */
read_only,
TRUE, /* cpu_access */
@@ -594,14 +588,17 @@ llvmpipe_get_transfer(struct pipe_context *pipe,
}
}
+ if (resource == llvmpipe->constants[PIPE_SHADER_FRAGMENT][0])
+ llvmpipe->dirty |= LP_NEW_CONSTANTS;
+
lpr = CALLOC_STRUCT(llvmpipe_transfer);
if (lpr) {
struct pipe_transfer *pt = &lpr->base;
pipe_resource_reference(&pt->resource, resource);
pt->box = *box;
- pt->sr = sr;
- pt->stride = lprex->row_stride[sr.level];
- pt->slice_stride = lprex->img_stride[sr.level];
+ pt->level = level;
+ pt->stride = lprex->row_stride[level];
+ pt->layer_stride = lprex->img_stride[level];
pt->usage = usage;
return pt;
@@ -635,8 +632,7 @@ llvmpipe_transfer_map( struct pipe_context *pipe,
enum lp_texture_usage tex_usage;
const char *mode;
- assert(transfer->sr.face < 6);
- assert(transfer->sr.level < LP_MAX_TEXTURE_LEVELS);
+ assert(transfer->level < LP_MAX_TEXTURE_LEVELS);
/*
printf("tex_transfer_map(%d, %d %d x %d of %d x %d, usage %d )\n",
@@ -666,9 +662,8 @@ llvmpipe_transfer_map( struct pipe_context *pipe,
format = lpr->base.format;
map = llvmpipe_resource_map(transfer->resource,
- transfer->sr.face,
- transfer->sr.level,
- transfer->box.z,
+ transfer->level,
+ transfer->box.z,
tex_usage, LP_TEX_LAYOUT_LINEAR);
@@ -680,7 +675,7 @@ llvmpipe_transfer_map( struct pipe_context *pipe,
*/
screen->timestamp++;
}
-
+
map +=
transfer->box.y / util_format_get_blockheight(format) * transfer->stride +
transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
@@ -696,21 +691,20 @@ llvmpipe_transfer_unmap(struct pipe_context *pipe,
assert(transfer->resource);
llvmpipe_resource_unmap(transfer->resource,
- transfer->sr.face,
- transfer->sr.level,
- transfer->box.z);
+ transfer->level,
+ transfer->box.z);
}
static unsigned int
llvmpipe_is_resource_referenced( struct pipe_context *pipe,
- struct pipe_resource *presource,
- unsigned face, unsigned level)
+ struct pipe_resource *presource,
+ unsigned level, int layer)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
if (presource->target == PIPE_BUFFER)
return PIPE_UNREFERENCED;
-
+
return lp_setup_is_resource_referenced(llvmpipe->setup, presource);
}
@@ -740,6 +734,7 @@ llvmpipe_user_buffer_create(struct pipe_screen *screen,
buffer->base.width0 = bytes;
buffer->base.height0 = 1;
buffer->base.depth0 = 1;
+ buffer->base.array_size = 1;
buffer->userBuffer = TRUE;
buffer->data = ptr;
@@ -1396,8 +1391,6 @@ llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen)
screen->resource_get_handle = llvmpipe_resource_get_handle;
screen->user_buffer_create = llvmpipe_user_buffer_create;
- screen->get_tex_surface = llvmpipe_get_tex_surface;
- screen->tex_surface_destroy = llvmpipe_tex_surface_destroy;
}
@@ -1412,4 +1405,7 @@ llvmpipe_init_context_resource_funcs(struct pipe_context *pipe)
pipe->transfer_flush_region = u_default_transfer_flush_region;
pipe->transfer_inline_write = u_default_transfer_inline_write;
+
+ pipe->create_surface = llvmpipe_create_surface;
+ pipe->surface_destroy = llvmpipe_surface_destroy;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h
index 4e4a65dcb4..b789c0f409 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.h
+++ b/src/gallium/drivers/llvmpipe/lp_texture.h
@@ -172,17 +172,15 @@ llvmpipe_resource_stride(struct pipe_resource *resource,
void *
llvmpipe_resource_map(struct pipe_resource *resource,
- unsigned face_slice,
- unsigned level,
- unsigned zslice,
+ unsigned level,
+ unsigned layer,
enum lp_texture_usage tex_usage,
enum lp_texture_layout layout);
void
llvmpipe_resource_unmap(struct pipe_resource *resource,
- unsigned face_slice,
unsigned level,
- unsigned zslice);
+ unsigned layer);
void *
diff --git a/src/gallium/drivers/noop/noop_pipe.c b/src/gallium/drivers/noop/noop_pipe.c
index fb5cdb4609..3680f4622d 100644
--- a/src/gallium/drivers/noop/noop_pipe.c
+++ b/src/gallium/drivers/noop/noop_pipe.c
@@ -30,10 +30,16 @@
#include <util/u_inlines.h>
#include <util/u_format.h>
#include "noop_public.h"
-#include "state_tracker/sw_winsys.h"
+
+DEBUG_GET_ONCE_BOOL_OPTION(noop, "GALLIUM_NOOP", FALSE)
void noop_init_state_functions(struct pipe_context *ctx);
+struct noop_pipe_screen {
+ struct pipe_screen pscreen;
+ struct pipe_screen *oscreen;
+};
+
/*
* query
*/
@@ -83,7 +89,7 @@ struct noop_resource {
static unsigned noop_is_resource_referenced(struct pipe_context *pipe,
struct pipe_resource *resource,
- unsigned face, unsigned level)
+ unsigned level, int layer)
{
return PIPE_UNREFERENCED;
}
@@ -108,52 +114,29 @@ static struct pipe_resource *noop_resource_create(struct pipe_screen *screen,
FREE(nresource);
return NULL;
}
-#if 0
- if (nresource->base.bind & (PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED)) {
- struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys;
- unsigned stride;
-
- nresource->dt = winsys->displaytarget_create(winsys, nresource->base.bind,
- nresource->base.format,
- nresource->base.width0,
- nresource->base.height0,
- 16, &stride);
- }
-#endif
return &nresource->base;
}
-static struct pipe_resource *noop_resource_from_handle(struct pipe_screen * screen,
+static struct pipe_resource *noop_resource_from_handle(struct pipe_screen *screen,
const struct pipe_resource *templ,
- struct winsys_handle *whandle)
+ struct winsys_handle *handle)
{
- struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys;
- struct noop_resource *nresource;
- struct sw_displaytarget *dt;
- unsigned stride;
+ struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen;
+ struct pipe_screen *oscreen = noop_screen->oscreen;
+ struct pipe_resource *result;
+ struct pipe_resource *noop_resource;
- dt = winsys->displaytarget_from_handle(winsys, templ, whandle, &stride);
- if (dt == NULL) {
- return NULL;
- }
- nresource = (struct noop_resource *)noop_resource_create(screen, templ);
- nresource->dt = dt;
- return &nresource->base;
+ result = oscreen->resource_from_handle(oscreen, templ, handle);
+ noop_resource = noop_resource_create(screen, result);
+ pipe_resource_reference(&result, NULL);
+ return noop_resource;
}
static boolean noop_resource_get_handle(struct pipe_screen *screen,
struct pipe_resource *resource,
struct winsys_handle *handle)
{
- struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys;
- struct noop_resource *nresource = (struct noop_resource *)resource;
-
- if (nresource->dt == NULL)
- return FALSE;
-
- return winsys->displaytarget_get_handle(winsys, nresource->dt, handle);
+ return FALSE;
}
static void noop_resource_destroy(struct pipe_screen *screen,
@@ -161,11 +144,6 @@ static void noop_resource_destroy(struct pipe_screen *screen,
{
struct noop_resource *nresource = (struct noop_resource *)resource;
- if (nresource->dt) {
- /* display target */
- struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys;
- winsys->displaytarget_destroy(winsys, nresource->dt);
- }
free(nresource->data);
FREE(resource);
}
@@ -193,7 +171,7 @@ static struct pipe_resource *noop_user_buffer_create(struct pipe_screen *screen,
*/
static struct pipe_transfer *noop_get_transfer(struct pipe_context *context,
struct pipe_resource *resource,
- struct pipe_subresource sr,
+ unsigned level,
enum pipe_transfer_usage usage,
const struct pipe_box *box)
{
@@ -203,11 +181,11 @@ static struct pipe_transfer *noop_get_transfer(struct pipe_context *context,
if (transfer == NULL)
return NULL;
pipe_resource_reference(&transfer->resource, resource);
- transfer->sr = sr;
+ transfer->level = level;
transfer->usage = usage;
transfer->box = *box;
transfer->stride = 1;
- transfer->slice_stride = 1;
+ transfer->layer_stride = 1;
return transfer;
}
@@ -239,12 +217,12 @@ static void noop_transfer_destroy(struct pipe_context *pipe,
static void noop_transfer_inline_write(struct pipe_context *pipe,
struct pipe_resource *resource,
- struct pipe_subresource sr,
+ unsigned level,
unsigned usage,
const struct pipe_box *box,
const void *data,
unsigned stride,
- unsigned slice_stride)
+ unsigned layer_stride)
{
}
@@ -277,12 +255,11 @@ static void noop_clear_depth_stencil(struct pipe_context *ctx,
static void noop_resource_copy_region(struct pipe_context *ctx,
struct pipe_resource *dst,
- struct pipe_subresource subdst,
+ unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct pipe_resource *src,
- struct pipe_subresource subsrc,
- unsigned srcx, unsigned srcy, unsigned srcz,
- unsigned width, unsigned height)
+ unsigned src_level,
+ const struct pipe_box *src_box)
{
}
@@ -332,46 +309,14 @@ static struct pipe_context *noop_create_context(struct pipe_screen *screen, void
return ctx;
}
-/*
- * texture
- */
-static struct pipe_surface *noop_get_tex_surface(struct pipe_screen *screen,
- struct pipe_resource *texture,
- unsigned face, unsigned level,
- unsigned zslice, unsigned flags)
-{
- struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface);
-
- if (surface == NULL)
- return NULL;
- pipe_reference_init(&surface->reference, 1);
- pipe_resource_reference(&surface->texture, texture);
- surface->format = texture->format;
- surface->width = texture->width0;
- surface->height = texture->height0;
- surface->offset = 0;
- surface->usage = flags;
- surface->zslice = zslice;
- surface->texture = texture;
- surface->face = face;
- surface->level = level;
-
- return surface;
-}
-
-static void noop_tex_surface_destroy(struct pipe_surface *surface)
-{
- pipe_resource_reference(&surface->texture, NULL);
- FREE(surface);
-}
-
/*
* pipe_screen
*/
static void noop_flush_frontbuffer(struct pipe_screen *_screen,
- struct pipe_surface *surface,
- void *context_private)
+ struct pipe_resource *resource,
+ unsigned level, unsigned layer,
+ void *context_private)
{
}
@@ -516,19 +461,30 @@ static boolean noop_is_format_supported(struct pipe_screen* screen,
static void noop_destroy_screen(struct pipe_screen *screen)
{
+ struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen;
+ struct pipe_screen *oscreen = noop_screen->oscreen;
+
+ oscreen->destroy(oscreen);
FREE(screen);
}
-struct pipe_screen *noop_screen_create(struct sw_winsys *winsys)
+struct pipe_screen *noop_screen_create(struct pipe_screen *oscreen)
{
+ struct noop_pipe_screen *noop_screen;
struct pipe_screen *screen;
- screen = CALLOC_STRUCT(pipe_screen);
- if (screen == NULL) {
+ if (!debug_get_option_noop()) {
+ return oscreen;
+ }
+
+ noop_screen = CALLOC_STRUCT(noop_pipe_screen);
+ if (noop_screen == NULL) {
return NULL;
}
+ noop_screen->oscreen = oscreen;
+ screen = &noop_screen->pscreen;
- screen->winsys = (struct pipe_winsys*)winsys;
+ screen->winsys = oscreen->winsys;
screen->destroy = noop_destroy_screen;
screen->get_name = noop_get_name;
screen->get_vendor = noop_get_vendor;
@@ -537,8 +493,6 @@ struct pipe_screen *noop_screen_create(struct sw_winsys *winsys)
screen->get_paramf = noop_get_paramf;
screen->is_format_supported = noop_is_format_supported;
screen->context_create = noop_create_context;
- screen->get_tex_surface = noop_get_tex_surface;
- screen->tex_surface_destroy = noop_tex_surface_destroy;
screen->resource_create = noop_resource_create;
screen->resource_from_handle = noop_resource_from_handle;
screen->resource_get_handle = noop_resource_get_handle;
diff --git a/src/gallium/drivers/noop/noop_public.h b/src/gallium/drivers/noop/noop_public.h
index 8ce82bec69..180ea597fa 100644
--- a/src/gallium/drivers/noop/noop_public.h
+++ b/src/gallium/drivers/noop/noop_public.h
@@ -23,8 +23,7 @@
#ifndef NOOP_PUBLIC_H
#define NOOP_PUBLIC_H
-struct sw_winsys;
-
-struct pipe_screen *noop_screen_create(struct sw_winsys *winsys);
+struct pipe_screen;
+struct pipe_screen *noop_screen_create(struct pipe_screen *screen);
#endif
diff --git a/src/gallium/drivers/noop/noop_state.c b/src/gallium/drivers/noop/noop_state.c
index 048ed42a9b..ad324774c0 100644
--- a/src/gallium/drivers/noop/noop_state.c
+++ b/src/gallium/drivers/noop/noop_state.c
@@ -101,6 +101,28 @@ static struct pipe_sampler_view *noop_create_sampler_view(struct pipe_context *c
return sampler_view;
}
+static struct pipe_surface *noop_create_surface(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ const struct pipe_surface *surf_tmpl)
+{
+ struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface);
+
+ if (surface == NULL)
+ return NULL;
+ pipe_reference_init(&surface->reference, 1);
+ pipe_resource_reference(&surface->texture, texture);
+ surface->context = ctx;
+ surface->format = surf_tmpl->format;
+ surface->width = texture->width0;
+ surface->height = texture->height0;
+ surface->usage = surf_tmpl->usage;
+ surface->texture = texture;
+ surface->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+ surface->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+ surface->u.tex.level = surf_tmpl->u.tex.level;
+
+ return surface;
+}
static void noop_set_vs_sampler_view(struct pipe_context *ctx, unsigned count,
struct pipe_sampler_view **views)
{
@@ -163,6 +185,14 @@ static void noop_sampler_view_destroy(struct pipe_context *ctx,
FREE(state);
}
+
+static void noop_surface_destroy(struct pipe_context *ctx,
+ struct pipe_surface *surface)
+{
+ pipe_resource_reference(&surface->texture, NULL);
+ FREE(surface);
+}
+
static void noop_bind_state(struct pipe_context *ctx, void *state)
{
}
@@ -213,6 +243,8 @@ static void *noop_create_shader_state(struct pipe_context *ctx,
return nstate;
}
+void noop_init_state_functions(struct pipe_context *ctx);
+
void noop_init_state_functions(struct pipe_context *ctx)
{
ctx->create_blend_state = noop_create_blend_state;
@@ -221,6 +253,7 @@ void noop_init_state_functions(struct pipe_context *ctx)
ctx->create_rasterizer_state = noop_create_rs_state;
ctx->create_sampler_state = noop_create_sampler_state;
ctx->create_sampler_view = noop_create_sampler_view;
+ ctx->create_surface = noop_create_surface;
ctx->create_vertex_elements_state = noop_create_vertex_elements;
ctx->create_vs_state = noop_create_shader_state;
ctx->bind_blend_state = noop_bind_state;
@@ -252,5 +285,6 @@ void noop_init_state_functions(struct pipe_context *ctx)
ctx->set_vertex_sampler_views = noop_set_vs_sampler_view;
ctx->set_viewport_state = noop_set_viewport_state;
ctx->sampler_view_destroy = noop_sampler_view_destroy;
+ ctx->surface_destroy = noop_surface_destroy;
ctx->draw_vbo = noop_draw_vbo;
}
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index 8c290273fb..1f4e5171c0 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -66,7 +66,7 @@ void nouveau_screen_fini(struct nouveau_screen *);
-
+#ifndef NOUVEAU_NVC0
static INLINE unsigned
RING_3D(unsigned mthd, unsigned size)
{
@@ -78,5 +78,6 @@ RING_3D_NI(unsigned mthd, unsigned size)
{
return 0x40000000 | (7 << 13) | (size << 18) | mthd;
}
+#endif
#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
index ab480cabd0..8dfb84a596 100644
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -10,7 +10,9 @@
#include "nouveau/nouveau_grobj.h"
#include "nouveau/nouveau_notifier.h"
#include "nouveau/nouveau_resource.h"
-#include "nouveau/nouveau_pushbuf.h"
+#ifndef NOUVEAU_NVC0
+#include "nouveau/nv04_pushbuf.h"
+#endif
#ifndef NV04_PFIFO_MAX_PACKET_LEN
#define NV04_PFIFO_MAX_PACKET_LEN 2047
@@ -41,4 +43,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
extern struct pipe_screen *
nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
+extern struct pipe_screen *
+nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
+
#endif
diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h
index cb7653c3fe..a5b0d0478c 100644
--- a/src/gallium/drivers/nouveau/nv_object.xml.h
+++ b/src/gallium/drivers/nouveau/nv_object.xml.h
@@ -8,12 +8,10 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- nv30-40_3d.xml ( 31709 bytes, from 2010-09-05 07:53:14)
-- copyright.xml ( 6503 bytes, from 2010-04-10 23:15:50)
-- nv_3ddefs.xml ( 15193 bytes, from 2010-09-05 07:50:15)
-- nv_defs.xml ( 4437 bytes, from 2010-08-05 19:38:53)
-- nv_object.xml ( 10424 bytes, from 2010-08-05 19:38:53)
-- nvchipsets.xml ( 2824 bytes, from 2010-08-05 19:38:53)
+- nv_object.xml ( 11547 bytes, from 2010-10-24 15:29:34)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
Copyright (C) 2006-2010 by the following authors:
- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
@@ -37,7 +35,7 @@ Copyright (C) 2006-2010 by the following authors:
- Mark Carey <mark.carey@gmail.com> (careym)
- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
-- Patrice Mandin <mandin.patrice@orange.fr> (pmandin, pmdata)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
- Peter Popov <ironpeter@users.sf.net> (ironpeter)
- Richard Hughes <hughsient@users.sf.net> (hughsient)
@@ -180,6 +178,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_COMPUTE 0x000050c0
#define NVA3_COMPUTE 0x000085c0
#define NVC0_COMPUTE 0x000090c0
+#define NV84_CRYPT 0x000074c1
#define NV01_SUBCHAN__SIZE 0x00002000
#define NV01_SUBCHAN 0x00000000
@@ -194,9 +193,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV84_SUBCHAN_QUERY_GET 0x0000001c
-#define NV84_SUBCHAN_UNK20 0x00000020
+#define NV84_SUBCHAN_QUERY_INTR 0x00000020
-#define NV84_SUBCHAN_UNK24 0x00000024
+#define NV84_SUBCHAN_WRCACHE_FLUSH 0x00000024
#define NV10_SUBCHAN_REF_CNT 0x00000050
@@ -209,7 +208,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV11_SUBCHAN_SEMAPHORE_RELEASE 0x0000006c
-#define NV50_SUBCHAN_UNK80 0x00000080
+#define NV40_SUBCHAN_YIELD 0x00000080
#define NV01_GRAPH 0x00000000
@@ -227,5 +226,43 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV40_GRAPH_PM_TRIGGER 0x00000140
+#define NVC0_SUBCHAN__SIZE 0x00008000
+#define NVC0_SUBCHAN 0x00000000
+
+#define NVC0_SUBCHAN_OBJECT 0x00000000
+
+
+#define NVC0_SUBCHAN_QUERY_ADDRESS_HIGH 0x00000010
+
+#define NVC0_SUBCHAN_QUERY_ADDRESS_LOW 0x00000014
+
+#define NVC0_SUBCHAN_QUERY_SEQUENCE 0x00000018
+
+#define NVC0_SUBCHAN_QUERY_GET 0x0000001c
+
+#define NVC0_SUBCHAN_REF_CNT 0x00000050
+
+#define NVC0_GRAPH 0x00000000
+
+#define NVC0_GRAPH_NOP 0x00000100
+
+#define NVC0_GRAPH_NOTIFY_ADDRESS_HIGH 0x00000104
+
+#define NVC0_GRAPH_NOTIFY_ADDRESS_LOW 0x00000108
+
+#define NVC0_GRAPH_NOTIFY 0x0000010c
+#define NVC0_GRAPH_NOTIFY_WRITE 0x00000000
+#define NVC0_GRAPH_NOTIFY_WRITE_AND_AWAKEN 0x00000001
+
+#define NVC0_GRAPH_SERIALIZE 0x00000110
+
+#define NVC0_GRAPH_MACRO_UPLOAD_POS 0x00000114
+
+#define NVC0_GRAPH_MACRO_UPLOAD_DATA 0x00000118
+
+#define NVC0_GRAPH_MACRO_ID 0x0000011c
+
+#define NVC0_GRAPH_MACRO_POS 0x00000120
+
#endif /* NV_OBJECT_XML */
diff --git a/src/gallium/drivers/nv50/nv50_buffer.c b/src/gallium/drivers/nv50/nv50_buffer.c
index dacfee9799..45356f9f63 100644
--- a/src/gallium/drivers/nv50/nv50_buffer.c
+++ b/src/gallium/drivers/nv50/nv50_buffer.c
@@ -136,6 +136,7 @@ nv50_user_buffer_create(struct pipe_screen *pscreen,
buffer->base.width0 = bytes;
buffer->base.height0 = 1;
buffer->base.depth0 = 1;
+ buffer->base.array_size = 1;
buffer->bo = nouveau_screen_bo_user(pscreen, ptr, bytes);
if (!buffer->bo)
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index 0874cb5e4e..4f97616176 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -49,6 +49,10 @@ nv50_destroy(struct pipe_context *pipe)
struct nv50_context *nv50 = nv50_context(pipe);
int i;
+ for (i = 0; i < nv50->vtxbuf_nr; i++) {
+ pipe_resource_reference(&nv50->vtxbuf[i].buffer, NULL);
+ }
+
for (i = 0; i < 64; i++) {
if (!nv50->state.hw[i])
continue;
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index bf6a577188..b2b0b72fe2 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -108,6 +108,7 @@ get_tile_depth(uint32_t tile_mode)
struct nv50_surface {
struct pipe_surface base;
+ unsigned offset;
};
static INLINE struct nv50_surface *
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index dd0e8fd41b..309b6503ca 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -276,46 +276,53 @@ nv50_miptree_from_handle(struct pipe_screen *pscreen,
*/
struct pipe_surface *
-nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,
- unsigned face, unsigned level, unsigned zslice,
- unsigned flags)
+nv50_miptree_surface_new(struct pipe_context *pipe, struct pipe_resource *pt,
+ const struct pipe_surface *surf_tmpl)
{
+ unsigned level = surf_tmpl->u.tex.level;
struct nv50_miptree *mt = nv50_miptree(pt);
struct nv50_miptree_level *lvl = &mt->level[level];
- struct pipe_surface *ps;
- unsigned img = 0;
+ struct nv50_surface *ns;
+ unsigned img = 0, zslice = 0;
+ assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
+
+ /* XXX can't unify these here? */
if (pt->target == PIPE_TEXTURE_CUBE)
- img = face;
+ img = surf_tmpl->u.tex.first_layer;
+ else if (pt->target == PIPE_TEXTURE_3D)
+ zslice = surf_tmpl->u.tex.first_layer;
- ps = CALLOC_STRUCT(pipe_surface);
- if (!ps)
+ ns = CALLOC_STRUCT(nv50_surface);
+ if (!ns)
return NULL;
- pipe_resource_reference(&ps->texture, pt);
- ps->format = pt->format;
- ps->width = u_minify(pt->width0, level);
- ps->height = u_minify(pt->height0, level);
- ps->usage = flags;
- pipe_reference_init(&ps->reference, 1);
- ps->face = face;
- ps->level = level;
- ps->zslice = zslice;
- ps->offset = lvl->image_offset[img];
+ pipe_resource_reference(&ns->base.texture, pt);
+ ns->base.context = pipe;
+ ns->base.format = pt->format;
+ ns->base.width = u_minify(pt->width0, level);
+ ns->base.height = u_minify(pt->height0, level);
+ ns->base.usage = surf_tmpl->usage;
+ pipe_reference_init(&ns->base.reference, 1);
+ ns->base.u.tex.level = level;
+ ns->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+ ns->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+ ns->offset = lvl->image_offset[img];
if (pt->target == PIPE_TEXTURE_3D) {
- unsigned nb_h = util_format_get_nblocksy(pt->format, ps->height);
- ps->offset += get_zslice_offset(lvl->tile_mode, zslice,
+ unsigned nb_h = util_format_get_nblocksy(pt->format, ns->base.height);
+ ns->offset += get_zslice_offset(lvl->tile_mode, zslice,
lvl->pitch, nb_h);
}
- return ps;
+ return &ns->base;
}
void
-nv50_miptree_surface_del(struct pipe_surface *ps)
+nv50_miptree_surface_del(struct pipe_context *pipe,
+ struct pipe_surface *ps)
{
struct nv50_surface *s = nv50_surface(ps);
- pipe_resource_reference(&ps->texture, NULL);
+ pipe_resource_reference(&s->base.texture, NULL);
FREE(s);
}
diff --git a/src/gallium/drivers/nv50/nv50_resource.c b/src/gallium/drivers/nv50/nv50_resource.c
index cfdb60418b..6c0a969635 100644
--- a/src/gallium/drivers/nv50/nv50_resource.c
+++ b/src/gallium/drivers/nv50/nv50_resource.c
@@ -15,7 +15,7 @@
static unsigned int
nv50_resource_is_referenced(struct pipe_context *pipe,
struct pipe_resource *resource,
- unsigned face, unsigned level)
+ unsigned level, int layer)
{
return nouveau_reference_flags(nv50_resource(resource)->bo);
}
@@ -51,6 +51,9 @@ nv50_init_resource_functions(struct pipe_context *pcontext)
pcontext->transfer_destroy = u_transfer_destroy_vtbl;
pcontext->transfer_inline_write = u_transfer_inline_write_vtbl;
pcontext->is_resource_referenced = nv50_resource_is_referenced;
+
+ pcontext->create_surface = nv50_miptree_surface_new;
+ pcontext->surface_destroy = nv50_miptree_surface_del;
}
void
@@ -61,7 +64,4 @@ nv50_screen_init_resource_functions(struct pipe_screen *pscreen)
pscreen->resource_get_handle = u_resource_get_handle_vtbl;
pscreen->resource_destroy = u_resource_destroy_vtbl;
pscreen->user_buffer_create = nv50_user_buffer_create;
-
- pscreen->get_tex_surface = nv50_miptree_surface_new;
- pscreen->tex_surface_destroy = nv50_miptree_surface_del;
}
diff --git a/src/gallium/drivers/nv50/nv50_resource.h b/src/gallium/drivers/nv50/nv50_resource.h
index f435a5892e..4b2a75e11a 100644
--- a/src/gallium/drivers/nv50/nv50_resource.h
+++ b/src/gallium/drivers/nv50/nv50_resource.h
@@ -87,12 +87,11 @@ nv50_user_buffer_create(struct pipe_screen *screen,
struct pipe_surface *
-nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,
- unsigned face, unsigned level, unsigned zslice,
- unsigned flags);
+nv50_miptree_surface_new(struct pipe_context *pipe, struct pipe_resource *pt,
+ const struct pipe_surface *surf_tmpl);
void
-nv50_miptree_surface_del(struct pipe_surface *ps);
+nv50_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps);
#endif
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 49522b74d5..edc3d54d01 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -127,6 +127,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 0;
case PIPE_CAP_DEPTH_CLAMP:
return 1;
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ return 0;
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ return 0;
default:
NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
return 0;
diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c
index 306aa81d98..1c1b66deb3 100644
--- a/src/gallium/drivers/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nv50/nv50_shader_state.c
@@ -71,7 +71,7 @@ nv50_transfer_constbuf(struct nv50_context *nv50,
map += nr;
}
- pipe_buffer_unmap(pipe, buf, transfer);
+ pipe_buffer_unmap(pipe, transfer);
}
static void
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index f42fa2d4d2..b4eda0f617 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -721,17 +721,16 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
struct nv50_context *nv50 = nv50_context(pipe);
if (shader == PIPE_SHADER_VERTEX) {
- nv50->constbuf[PIPE_SHADER_VERTEX] = buf;
nv50->dirty |= NV50_NEW_VERTPROG_CB;
} else
if (shader == PIPE_SHADER_FRAGMENT) {
- nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf;
nv50->dirty |= NV50_NEW_FRAGPROG_CB;
- } else
- if (shader == PIPE_SHADER_GEOMETRY) {
- nv50->constbuf[PIPE_SHADER_GEOMETRY] = buf;
+ } else {
+ assert(shader == PIPE_SHADER_GEOMETRY);
nv50->dirty |= NV50_NEW_GEOMPROG_CB;
}
+
+ pipe_resource_reference(&nv50->constbuf[shader], buf);
}
static void
@@ -780,8 +779,9 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
{
struct nv50_context *nv50 = nv50_context(pipe);
- memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count);
- nv50->vtxbuf_nr = count;
+ util_copy_vertex_buffers(nv50->vtxbuf,
+ &nv50->vtxbuf_nr,
+ vb, count);
nv50->dirty |= NV50_NEW_ARRAYS;
}
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 16c2dab9af..ae02143e35 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -63,13 +63,13 @@ validate_fb(struct nv50_context *nv50)
so_data (so, fb->cbufs[i]->height);
so_method(so, tesla, NV50TCL_RT_ADDRESS_HIGH(i), 5);
- so_reloc (so, bo, fb->cbufs[i]->offset, NOUVEAU_BO_VRAM |
+ so_reloc (so, bo, ((struct nv50_surface *)fb->cbufs[i])->offset, NOUVEAU_BO_VRAM |
NOUVEAU_BO_HIGH | NOUVEAU_BO_RDWR, 0, 0);
- so_reloc (so, bo, fb->cbufs[i]->offset, NOUVEAU_BO_VRAM |
+ so_reloc (so, bo, ((struct nv50_surface *)fb->cbufs[i])->offset, NOUVEAU_BO_VRAM |
NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
so_data (so, nv50_format_table[fb->cbufs[i]->format].rt);
so_data (so, nv50_miptree(pt)->
- level[fb->cbufs[i]->level].tile_mode << 4);
+ level[fb->cbufs[i]->u.tex.level].tile_mode << 4);
so_data(so, 0x00000000);
so_method(so, tesla, NV50TCL_RT_ARRAY_MODE, 1);
@@ -92,13 +92,13 @@ validate_fb(struct nv50_context *nv50)
assert(nv50_format_table[fb->zsbuf->format].rt);
so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5);
- so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM |
+ so_reloc (so, bo, ((struct nv50_surface *)(fb->zsbuf))->offset, NOUVEAU_BO_VRAM |
NOUVEAU_BO_HIGH | NOUVEAU_BO_RDWR, 0, 0);
- so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM |
+ so_reloc (so, bo, ((struct nv50_surface *)(fb->zsbuf))->offset, NOUVEAU_BO_VRAM |
NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
so_data (so, nv50_format_table[fb->zsbuf->format].rt);
so_data (so, nv50_miptree(pt)->
- level[fb->zsbuf->level].tile_mode << 4);
+ level[fb->zsbuf->u.tex.level].tile_mode << 4);
so_data (so, 0x00000000);
so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1);
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index f70c138fe1..a99df76cee 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -22,7 +22,7 @@
#define __NOUVEAU_PUSH_H__
#include <stdint.h>
-#include "nouveau/nouveau_pushbuf.h"
+#include "nouveau/nv04_pushbuf.h"
#include "nv50_context.h"
#include "nv50_resource.h"
#include "pipe/p_defines.h"
@@ -97,23 +97,23 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
OUT_RING (chan, format);
OUT_RING (chan, 1);
BEGIN_RING(chan, eng2d, mthd + 0x14, 5);
- OUT_RING (chan, mt->level[ps->level].pitch);
+ OUT_RING (chan, mt->level[ps->u.tex.level].pitch);
OUT_RING (chan, ps->width);
OUT_RING (chan, ps->height);
- OUT_RELOCh(chan, bo, ps->offset, flags);
- OUT_RELOCl(chan, bo, ps->offset, flags);
+ OUT_RELOCh(chan, bo, ((struct nv50_surface *)ps)->offset, flags);
+ OUT_RELOCl(chan, bo, ((struct nv50_surface *)ps)->offset, flags);
} else {
BEGIN_RING(chan, eng2d, mthd, 5);
OUT_RING (chan, format);
OUT_RING (chan, 0);
- OUT_RING (chan, mt->level[ps->level].tile_mode << 4);
+ OUT_RING (chan, mt->level[ps->u.tex.level].tile_mode << 4);
OUT_RING (chan, 1);
OUT_RING (chan, 0);
BEGIN_RING(chan, eng2d, mthd + 0x18, 4);
OUT_RING (chan, ps->width);
OUT_RING (chan, ps->height);
- OUT_RELOCh(chan, bo, ps->offset, flags);
- OUT_RELOCl(chan, bo, ps->offset, flags);
+ OUT_RELOCh(chan, bo, ((struct nv50_surface *)ps)->offset, flags);
+ OUT_RELOCl(chan, bo, ((struct nv50_surface *)ps)->offset, flags);
}
#if 0
@@ -173,30 +173,41 @@ nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst,
static void
nv50_surface_copy(struct pipe_context *pipe,
- struct pipe_resource *dest, struct pipe_subresource subdst,
+ struct pipe_resource *dest, unsigned dst_level,
unsigned destx, unsigned desty, unsigned destz,
- struct pipe_resource *src, struct pipe_subresource subsrc,
- unsigned srcx, unsigned srcy, unsigned srcz,
- unsigned width, unsigned height)
+ struct pipe_resource *src, unsigned src_level,
+ const struct pipe_box *src_box)
{
struct nv50_context *nv50 = nv50_context(pipe);
struct nv50_screen *screen = nv50->screen;
- struct pipe_surface *ps_dst, *ps_src;
+ struct pipe_surface *ps_dst, *ps_src, surf_tmpl;
+
assert((src->format == dest->format) ||
(nv50_2d_format_faithful(src->format) &&
nv50_2d_format_faithful(dest->format)));
-
- ps_src = nv50_miptree_surface_new(pipe->screen, src, subsrc.face,
- subsrc.level, srcz, 0 /* bind flags */);
- ps_dst = nv50_miptree_surface_new(pipe->screen, dest, subdst.face,
- subdst.level, destz, 0 /* bindflags */);
-
- nv50_surface_do_copy(screen, ps_dst, destx, desty, ps_src, srcx,
- srcy, width, height);
-
- nv50_miptree_surface_del(ps_src);
- nv50_miptree_surface_del(ps_dst);
+ assert(src_box->depth == 1);
+
+ memset(&surf_tmpl, 0, sizeof(surf_tmpl));
+ surf_tmpl.format = src->format;
+ surf_tmpl.usage = 0; /* no bind flag - not a surface */
+ surf_tmpl.u.tex.level = src_level;
+ surf_tmpl.u.tex.first_layer = src_box->z;
+ surf_tmpl.u.tex.last_layer = src_box->z;
+ /* XXX really need surfaces here? */
+ ps_src = nv50_miptree_surface_new(pipe, src, &surf_tmpl);
+ surf_tmpl.format = dest->format;
+ surf_tmpl.usage = 0; /* no bind flag - not a surface */
+ surf_tmpl.u.tex.level = dst_level;
+ surf_tmpl.u.tex.first_layer = destz;
+ surf_tmpl.u.tex.last_layer = destz;
+ ps_dst = nv50_miptree_surface_new(pipe, dest, &surf_tmpl);
+
+ nv50_surface_do_copy(screen, ps_dst, destx, desty, ps_src, src_box->x,
+ src_box->y, src_box->width, src_box->height);
+
+ nv50_miptree_surface_del(pipe, ps_src);
+ nv50_miptree_surface_del(pipe, ps_dst);
}
static void
@@ -225,10 +236,10 @@ nv50_clear_render_target(struct pipe_context *pipe,
BEGIN_RING(chan, tesla, NV50TCL_RT_CONTROL, 1);
OUT_RING (chan, 1);
BEGIN_RING(chan, tesla, NV50TCL_RT_ADDRESS_HIGH(0), 5);
- OUT_RELOCh(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- OUT_RELOCl(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCh(chan, bo, ((struct nv50_surface *)dst)->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, bo, ((struct nv50_surface *)dst)->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RING (chan, nv50_format_table[dst->format].rt);
- OUT_RING (chan, mt->level[dst->level].tile_mode << 4);
+ OUT_RING (chan, mt->level[dst->u.tex.level].tile_mode << 4);
OUT_RING (chan, 0);
BEGIN_RING(chan, tesla, NV50TCL_RT_HORIZ(0), 2);
OUT_RING (chan, dst->width);
@@ -281,10 +292,10 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
return;
BEGIN_RING(chan, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5);
- OUT_RELOCh(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- OUT_RELOCl(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCh(chan, bo, ((struct nv50_surface *)dst)->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, bo, ((struct nv50_surface *)dst)->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RING (chan, nv50_format_table[dst->format].rt);
- OUT_RING (chan, mt->level[dst->level].tile_mode << 4);
+ OUT_RING (chan, mt->level[dst->u.tex.level].tile_mode << 4);
OUT_RING (chan, 0);
BEGIN_RING(chan, tesla, NV50TCL_ZETA_ENABLE, 1);
OUT_RING (chan, 1);
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 658324ec5b..9243f9edce 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -106,7 +106,7 @@ nv50_tex_construct(struct nv50_sampler_view *view)
tic[6] = 0x03000000;
- tic[7] = (view->pipe.last_level << 4) | view->pipe.first_level;
+ tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
return TRUE;
}
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 0cc2f4a837..bf5af4ddc6 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -126,20 +126,23 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
struct pipe_transfer *
nv50_miptree_transfer_new(struct pipe_context *pcontext,
struct pipe_resource *pt,
- struct pipe_subresource sr,
+ unsigned level,
unsigned usage,
const struct pipe_box *box)
{
struct pipe_screen *pscreen = pcontext->screen;
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
struct nv50_miptree *mt = nv50_miptree(pt);
- struct nv50_miptree_level *lvl = &mt->level[sr.level];
+ struct nv50_miptree_level *lvl = &mt->level[level];
struct nv50_transfer *tx;
- unsigned nx, ny, image = 0;
+ unsigned nx, ny, image = 0, boxz = 0;
int ret;
+ /* XXX can't unify these here? */
if (pt->target == PIPE_TEXTURE_CUBE)
- image = sr.face;
+ image = box->z;
+ else if (pt->target == PIPE_TEXTURE_3D)
+ boxz = box->z;
tx = CALLOC_STRUCT(nv50_transfer);
if (!tx)
@@ -151,21 +154,21 @@ nv50_miptree_transfer_new(struct pipe_context *pcontext,
pipe_resource_reference(&tx->base.resource, pt);
- tx->base.sr = sr;
+ tx->base.level = level;
tx->base.usage = usage;
tx->base.box = *box;
- tx->nblocksx = util_format_get_nblocksx(pt->format, u_minify(pt->width0, sr.level));
- tx->nblocksy = util_format_get_nblocksy(pt->format, u_minify(pt->height0, sr.level));
+ tx->nblocksx = util_format_get_nblocksx(pt->format, u_minify(pt->width0, level));
+ tx->nblocksy = util_format_get_nblocksy(pt->format, u_minify(pt->height0, level));
tx->base.stride = tx->nblocksx * util_format_get_blocksize(pt->format);
tx->base.usage = usage;
tx->level_pitch = lvl->pitch;
- tx->level_width = u_minify(mt->base.base.width0, sr.level);
- tx->level_height = u_minify(mt->base.base.height0, sr.level);
- tx->level_depth = u_minify(mt->base.base.depth0, sr.level);
+ tx->level_width = u_minify(mt->base.base.width0, level);
+ tx->level_height = u_minify(mt->base.base.height0, level);
+ tx->level_depth = u_minify(mt->base.base.depth0, level);
tx->level_offset = lvl->image_offset[image];
tx->level_tiling = lvl->tile_mode;
- tx->level_z = box->z;
+ tx->level_z = boxz;
tx->level_x = util_format_get_nblocksx(pt->format, box->x);
tx->level_y = util_format_get_nblocksy(pt->format, box->y);
ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
@@ -181,7 +184,7 @@ nv50_miptree_transfer_new(struct pipe_context *pcontext,
nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset,
tx->level_pitch, tx->level_tiling,
- box->x, box->y, box->z,
+ box->x, box->y, boxz,
tx->nblocksx, tx->nblocksy,
tx->level_depth,
tx->bo, 0,
diff --git a/src/gallium/drivers/nv50/nv50_transfer.h b/src/gallium/drivers/nv50/nv50_transfer.h
index 663503547c..6699bf546e 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.h
+++ b/src/gallium/drivers/nv50/nv50_transfer.h
@@ -8,7 +8,7 @@
struct pipe_transfer *
nv50_miptree_transfer_new(struct pipe_context *pcontext,
struct pipe_resource *pt,
- struct pipe_subresource sr,
+ unsigned level,
unsigned usage,
const struct pipe_box *box);
void
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index d41a59d05d..53f319acf4 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -284,7 +284,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe,
nzi = TRUE;
}
- pipe_buffer_unmap(pipe, indexBuffer, transfer);
+ pipe_buffer_unmap(pipe, transfer);
}
static void
diff --git a/src/gallium/drivers/nvc0/Makefile b/src/gallium/drivers/nvc0/Makefile
new file mode 100644
index 0000000000..da8f9a2ab4
--- /dev/null
+++ b/src/gallium/drivers/nvc0/Makefile
@@ -0,0 +1,34 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nvc0
+
+C_SOURCES = \
+ nvc0_buffer.c \
+ nvc0_context.c \
+ nvc0_draw.c \
+ nvc0_formats.c \
+ nvc0_miptree.c \
+ nvc0_resource.c \
+ nvc0_screen.c \
+ nvc0_state.c \
+ nvc0_state_validate.c \
+ nvc0_surface.c \
+ nvc0_tex.c \
+ nvc0_transfer.c \
+ nvc0_vbo.c \
+ nvc0_program.c \
+ nvc0_shader_state.c \
+ nvc0_pc.c \
+ nvc0_pc_print.c \
+ nvc0_pc_emit.c \
+ nvc0_tgsi_to_nc.c \
+ nvc0_pc_optimize.c \
+ nvc0_pc_regalloc.c \
+ nvc0_push.c \
+ nvc0_push2.c \
+ nvc0_fence.c \
+ nvc0_mm.c \
+ nvc0_query.c
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/nvc0/SConscript b/src/gallium/drivers/nvc0/SConscript
new file mode 100644
index 0000000000..c49e0ddbc5
--- /dev/null
+++ b/src/gallium/drivers/nvc0/SConscript
@@ -0,0 +1,36 @@
+Import('*')
+
+env = env.Clone()
+
+nvc0 = env.ConvenienceLibrary(
+ target = 'nvc0',
+ source = [
+ 'nvc0_buffer.c',
+ 'nvc0_context.c',
+ 'nvc0_draw.c',
+ 'nvc0_formats.c',
+ 'nvc0_miptree.c',
+ 'nvc0_resource.c',
+ 'nvc0_screen.c',
+ 'nvc0_state.c',
+ 'nvc0_state_validate.c',
+ 'nvc0_surface.c',
+ 'nvc0_tex.c',
+ 'nvc0_transfer.c',
+ 'nvc0_vbo.c',
+ 'nvc0_program.c',
+ 'nvc0_shader_state.c',
+ 'nvc0_pc.c',
+ 'nvc0_pc_print.c',
+ 'nvc0_pc_emit.c',
+ 'nvc0_tgsi_to_nc.c',
+ 'nvc0_pc_optimize.c',
+ 'nvc0_pc_regalloc.c',
+ 'nvc0_push.c',
+ 'nvc0_push2.c',
+ 'nvc0_fence.c',
+ 'nvc0_mm.c',
+ 'nvc0_query.c'
+ ])
+
+Export('nvc0')
diff --git a/src/gallium/drivers/nvc0/nv50_defs.xml.h b/src/gallium/drivers/nvc0/nv50_defs.xml.h
new file mode 100644
index 0000000000..1bf2f802b5
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nv50_defs.xml.h
@@ -0,0 +1,142 @@
+#ifndef NV50_DEFS_XML
+#define NV50_DEFS_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT 0x000000c0
+#define NV50_SURFACE_FORMAT_R32G32B32A32_SINT 0x000000c1
+#define NV50_SURFACE_FORMAT_R32G32B32A32_UINT 0x000000c2
+#define NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT 0x000000c3
+#define NV50_SURFACE_FORMAT_R16G16B16A16_UNORM 0x000000c6
+#define NV50_SURFACE_FORMAT_R16G16B16A16_SNORM 0x000000c7
+#define NV50_SURFACE_FORMAT_R16G16B16A16_SINT 0x000000c8
+#define NV50_SURFACE_FORMAT_R16G16B16A16_UINT 0x000000c9
+#define NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT 0x000000ca
+#define NV50_SURFACE_FORMAT_R32G32_FLOAT 0x000000cb
+#define NV50_SURFACE_FORMAT_R32G32_SINT 0x000000cc
+#define NV50_SURFACE_FORMAT_R32G32_UINT 0x000000cd
+#define NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT 0x000000ce
+#define NV50_SURFACE_FORMAT_A8R8G8B8_UNORM 0x000000cf
+#define NV50_SURFACE_FORMAT_A8R8G8B8_SRGB 0x000000d0
+#define NV50_SURFACE_FORMAT_A2B10G10R10_UNORM 0x000000d1
+#define NV50_SURFACE_FORMAT_A2B10G10R10_UINT 0x000000d2
+#define NV50_SURFACE_FORMAT_A8B8G8R8_UNORM 0x000000d5
+#define NV50_SURFACE_FORMAT_A8B8G8R8_SRGB 0x000000d6
+#define NV50_SURFACE_FORMAT_A8B8G8R8_SNORM 0x000000d7
+#define NV50_SURFACE_FORMAT_A8B8G8R8_SINT 0x000000d8
+#define NV50_SURFACE_FORMAT_A8B8G8R8_UINT 0x000000d9
+#define NV50_SURFACE_FORMAT_R16G16_UNORM 0x000000da
+#define NV50_SURFACE_FORMAT_R16G16_SNORM 0x000000db
+#define NV50_SURFACE_FORMAT_R16G16_SINT 0x000000dc
+#define NV50_SURFACE_FORMAT_R16G16_UINT 0x000000dd
+#define NV50_SURFACE_FORMAT_R16G16_FLOAT 0x000000de
+#define NV50_SURFACE_FORMAT_A2R10G10B10_UNORM 0x000000df
+#define NV50_SURFACE_FORMAT_B10G11R11_FLOAT 0x000000e0
+#define NV50_SURFACE_FORMAT_R32_FLOAT 0x000000e5
+#define NV50_SURFACE_FORMAT_X8R8G8B8_UNORM 0x000000e6
+#define NV50_SURFACE_FORMAT_X8R8G8B8_SRGB 0x000000e7
+#define NV50_SURFACE_FORMAT_R5G6B5_UNORM 0x000000e8
+#define NV50_SURFACE_FORMAT_A1R5G5B5_UNORM 0x000000e9
+#define NV50_SURFACE_FORMAT_R8G8_UNORM 0x000000ea
+#define NV50_SURFACE_FORMAT_R8G8_SNORM 0x000000eb
+#define NV50_SURFACE_FORMAT_R8G8_SINT 0x000000ec
+#define NV50_SURFACE_FORMAT_R8G8_UINT 0x000000ed
+#define NV50_SURFACE_FORMAT_R16_UNORM 0x000000ee
+#define NV50_SURFACE_FORMAT_R16_SNORM 0x000000ef
+#define NV50_SURFACE_FORMAT_R16_SINT 0x000000f0
+#define NV50_SURFACE_FORMAT_R16_UINT 0x000000f1
+#define NV50_SURFACE_FORMAT_R16_FLOAT 0x000000f2
+#define NV50_SURFACE_FORMAT_R8_UNORM 0x000000f3
+#define NV50_SURFACE_FORMAT_R8_SNORM 0x000000f4
+#define NV50_SURFACE_FORMAT_R8_SINT 0x000000f5
+#define NV50_SURFACE_FORMAT_R8_UINT 0x000000f6
+#define NV50_SURFACE_FORMAT_A8_UNORM 0x000000f7
+#define NV50_SURFACE_FORMAT_X1R5G5B5_UNORM 0x000000f8
+#define NV50_SURFACE_FORMAT_X8B8G8R8_UNORM 0x000000f9
+#define NV50_SURFACE_FORMAT_X8B8G8R8_SRGB 0x000000fa
+#define NV50_ZETA_FORMAT_Z32_FLOAT 0x0000000a
+#define NV50_ZETA_FORMAT_Z16_UNORM 0x00000013
+#define NV50_ZETA_FORMAT_Z24S8_UNORM 0x00000014
+#define NV50_ZETA_FORMAT_X8Z24_UNORM 0x00000015
+#define NV50_ZETA_FORMAT_S8Z24_UNORM 0x00000016
+#define NV50_ZETA_FORMAT_UNK18 0x00000018
+#define NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM 0x00000019
+#define NV50_ZETA_FORMAT_UNK1D 0x0000001d
+#define NV50_ZETA_FORMAT_UNK1E 0x0000001e
+#define NV50_ZETA_FORMAT_UNK1F 0x0000001f
+#define NV50_QUERY__SIZE 0x00000010
+#define NV50_QUERY_COUNTER 0x00000000
+
+#define NV50_QUERY_RES 0x00000004
+
+#define NV50_QUERY_TIME 0x00000008
+
+
+#endif /* NV50_DEFS_XML */
diff --git a/src/gallium/drivers/nvc0/nv50_texture.xml.h b/src/gallium/drivers/nvc0/nv50_texture.xml.h
new file mode 100644
index 0000000000..9f83206516
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nv50_texture.xml.h
@@ -0,0 +1,259 @@
+#ifndef NV50_TEXTURE_XML
+#define NV50_TEXTURE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nv50_texture.xml ( 6871 bytes, from 2010-10-03 13:18:37)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_TIC_MAP_ZERO 0x00000000
+#define NV50_TIC_MAP_C0 0x00000002
+#define NV50_TIC_MAP_C1 0x00000003
+#define NV50_TIC_MAP_C2 0x00000004
+#define NV50_TIC_MAP_C3 0x00000005
+#define NV50_TIC_MAP_ONE 0x00000007
+#define NV50_TIC_TYPE_SNORM 0x00000001
+#define NV50_TIC_TYPE_UNORM 0x00000002
+#define NV50_TIC_TYPE_SINT 0x00000003
+#define NV50_TIC_TYPE_UINT 0x00000004
+#define NV50_TIC_TYPE_SSCALED 0x00000005
+#define NV50_TIC_TYPE_USCALED 0x00000006
+#define NV50_TIC_TYPE_FLOAT 0x00000007
+#define NV50_TSC_WRAP_REPEAT 0x00000000
+#define NV50_TSC_WRAP_MIRROR_REPEAT 0x00000001
+#define NV50_TSC_WRAP_CLAMP_TO_EDGE 0x00000002
+#define NV50_TSC_WRAP_CLAMP_TO_BORDER 0x00000003
+#define NV50_TSC_WRAP_CLAMP 0x00000004
+#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_EDGE 0x00000005
+#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_BORDER 0x00000006
+#define NV50_TSC_WRAP_MIRROR_CLAMP 0x00000007
+#define NV50_TIC__SIZE 0x00000020
+#define NV50_TIC_0 0x00000000
+#define NV50_TIC_0_MAPA__MASK 0x38000000
+#define NV50_TIC_0_MAPA__SHIFT 27
+#define NV50_TIC_0_MAPB__MASK 0x07000000
+#define NV50_TIC_0_MAPB__SHIFT 24
+#define NV50_TIC_0_MAPG__MASK 0x00e00000
+#define NV50_TIC_0_MAPG__SHIFT 21
+#define NV50_TIC_0_MAPR__MASK 0x001c0000
+#define NV50_TIC_0_MAPR__SHIFT 18
+#define NV50_TIC_0_TYPE3__MASK 0x00038000
+#define NV50_TIC_0_TYPE3__SHIFT 15
+#define NV50_TIC_0_TYPE2__MASK 0x00007000
+#define NV50_TIC_0_TYPE2__SHIFT 12
+#define NV50_TIC_0_TYPE1__MASK 0x00000e00
+#define NV50_TIC_0_TYPE1__SHIFT 9
+#define NV50_TIC_0_TYPE0__MASK 0x000001c0
+#define NV50_TIC_0_TYPE0__SHIFT 6
+#define NV50_TIC_0_SWIZZLE__MASK 0x3ffc0000
+#define NV50_TIC_0_FMT__MASK 0x0000003f
+#define NV50_TIC_0_FMT__SHIFT 0
+#define NV50_TIC_0_FMT_32_32_32_32 0x00000001
+#define NV50_TIC_0_FMT_16_16_16_16 0x00000003
+#define NV50_TIC_0_FMT_32_32 0x00000004
+#define NV50_TIC_0_FMT_32_8 0x00000005
+#define NV50_TIC_0_FMT_8_8_8_8 0x00000008
+#define NV50_TIC_0_FMT_2_10_10_10 0x00000009
+#define NV50_TIC_0_FMT_16_16 0x0000000c
+#define NV50_TIC_0_FMT_8_24 0x0000000d
+#define NV50_TIC_0_FMT_24_8 0x0000000e
+#define NV50_TIC_0_FMT_32 0x0000000f
+#define NV50_TIC_0_FMT_4_4_4_4 0x00000012
+#define NV50_TIC_0_FMT_5_5_5_1 0x00000013
+#define NV50_TIC_0_FMT_1_5_5_5 0x00000014
+#define NV50_TIC_0_FMT_5_6_5 0x00000015
+#define NV50_TIC_0_FMT_6_5_5 0x00000016
+#define NV50_TIC_0_FMT_8_8 0x00000018
+#define NV50_TIC_0_FMT_16 0x0000001b
+#define NV50_TIC_0_FMT_8 0x0000001d
+#define NV50_TIC_0_FMT_4_4 0x0000001e
+#define NV50_TIC_0_FMT_UNK1F 0x0000001f
+#define NV50_TIC_0_FMT_E5_9_9_9 0x00000020
+#define NV50_TIC_0_FMT_10_11_11 0x00000021
+#define NV50_TIC_0_FMT_C1_C2_C1_C0 0x00000022
+#define NV50_TIC_0_FMT_C2_C1_C0_C1 0x00000023
+#define NV50_TIC_0_FMT_DXT1 0x00000024
+#define NV50_TIC_0_FMT_DXT3 0x00000025
+#define NV50_TIC_0_FMT_DXT5 0x00000026
+#define NV50_TIC_0_FMT_RGTC1 0x00000027
+#define NV50_TIC_0_FMT_RGTC2 0x00000028
+#define NV50_TIC_0_FMT_24_8_ZETA 0x00000029
+#define NV50_TIC_0_FMT_8_24_ZETA 0x0000002a
+#define NV50_TIC_0_FMT_UNK2C_ZETA 0x0000002c
+#define NV50_TIC_0_FMT_UNK2D_ZETA 0x0000002d
+#define NV50_TIC_0_FMT_UNK2E_ZETA 0x0000002e
+#define NV50_TIC_0_FMT_32_ZETA 0x0000002f
+#define NV50_TIC_0_FMT_32_8_ZETA 0x00000030
+#define NV50_TIC_0_FMT_16_ZETA 0x0000003a
+
+#define NV50_TIC_1 0x00000004
+#define NV50_TIC_1_OFFSET_LOW__MASK 0xffffffff
+#define NV50_TIC_1_OFFSET_LOW__SHIFT 0
+
+#define NV50_TIC_2 0x00000008
+#define NV50_TIC_2_OFFSET_HIGH__MASK 0x000000ff
+#define NV50_TIC_2_OFFSET_HIGH__SHIFT 0
+#define NV50_TIC_2_COLORSPACE_SRGB 0x00000400
+#define NV50_TIC_2_TARGET__MASK 0x0003c000
+#define NV50_TIC_2_TARGET__SHIFT 14
+#define NV50_TIC_2_TARGET_1D 0x00000000
+#define NV50_TIC_2_TARGET_2D 0x00004000
+#define NV50_TIC_2_TARGET_3D 0x00008000
+#define NV50_TIC_2_TARGET_CUBE 0x0000c000
+#define NV50_TIC_2_TARGET_1D_ARRAY 0x00010000
+#define NV50_TIC_2_TARGET_2D_ARRAY 0x00014000
+#define NV50_TIC_2_TARGET_BUFFER 0x00018000
+#define NV50_TIC_2_TARGET_RECT 0x0001c000
+#define NV50_TIC_2_TARGET_CUBE_ARRAY 0x00020000
+#define NV50_TIC_2_TILE_MODE_LINEAR 0x00040000
+#define NV50_TIC_2_TILE_MODE_Y__MASK 0x01c00000
+#define NV50_TIC_2_TILE_MODE_Y__SHIFT 22
+#define NV50_TIC_2_TILE_MODE_Z__MASK 0x0e000000
+#define NV50_TIC_2_TILE_MODE_Z__SHIFT 25
+#define NV50_TIC_2_2D_UNK0258__MASK 0x30000000
+#define NV50_TIC_2_2D_UNK0258__SHIFT 28
+#define NV50_TIC_2_NORMALIZED_COORDS 0x80000000
+
+#define NV50_TIC_3 0x0000000c
+#define NV50_TIC_3_PITCH__MASK 0xffffffff
+#define NV50_TIC_3_PITCH__SHIFT 0
+
+#define NV50_TIC_4 0x00000010
+#define NV50_TIC_4_WIDTH__MASK 0xffffffff
+#define NV50_TIC_4_WIDTH__SHIFT 0
+
+#define NV50_TIC_5 0x00000014
+#define NV50_TIC_5_LAST_LEVEL__MASK 0xf0000000
+#define NV50_TIC_5_LAST_LEVEL__SHIFT 28
+#define NV50_TIC_5_DEPTH__MASK 0x0fff0000
+#define NV50_TIC_5_DEPTH__SHIFT 16
+#define NV50_TIC_5_HEIGHT__MASK 0x0000ffff
+#define NV50_TIC_5_HEIGHT__SHIFT 0
+
+#define NV50_TIC_7 0x0000001c
+#define NV50_TIC_7_BASE_LEVEL__MASK 0x0000000f
+#define NV50_TIC_7_BASE_LEVEL__SHIFT 0
+#define NV50_TIC_7_MAX_LEVEL__MASK 0x000000f0
+#define NV50_TIC_7_MAX_LEVEL__SHIFT 4
+
+#define NV50_TSC__SIZE 0x00000020
+#define NV50_TSC_0 0x00000000
+#define NV50_TSC_0_WRAPS__MASK 0x00000007
+#define NV50_TSC_0_WRAPS__SHIFT 0
+#define NV50_TSC_0_WRAPT__MASK 0x00000038
+#define NV50_TSC_0_WRAPT__SHIFT 3
+#define NV50_TSC_0_WRAPR__MASK 0x000001c0
+#define NV50_TSC_0_WRAPR__SHIFT 6
+#define NV50_TSC_0_SHADOW_COMPARE_ENABLE 0x00000200
+#define NV50_TSC_0_SHADOW_COMPARE_FUNC__MASK 0x00001c00
+#define NV50_TSC_0_SHADOW_COMPARE_FUNC__SHIFT 10
+#define NV50_TSC_0_ANISOTROPY_MASK__MASK 0x00700000
+#define NV50_TSC_0_ANISOTROPY_MASK__SHIFT 20
+
+#define NV50_TSC_1 0x00000004
+#define NV50_TSC_1_UNKN_ANISO_15 0x10000000
+#define NV50_TSC_1_UNKN_ANISO_35 0x18000000
+#define NV50_TSC_1_MAGF__MASK 0x00000003
+#define NV50_TSC_1_MAGF__SHIFT 0
+#define NV50_TSC_1_MAGF_NEAREST 0x00000001
+#define NV50_TSC_1_MAGF_LINEAR 0x00000002
+#define NV50_TSC_1_MINF__MASK 0x00000030
+#define NV50_TSC_1_MINF__SHIFT 4
+#define NV50_TSC_1_MINF_NEAREST 0x00000010
+#define NV50_TSC_1_MINF_LINEAR 0x00000020
+#define NV50_TSC_1_MIPF__MASK 0x000000c0
+#define NV50_TSC_1_MIPF__SHIFT 6
+#define NV50_TSC_1_MIPF_NONE 0x00000040
+#define NV50_TSC_1_MIPF_NEAREST 0x00000080
+#define NV50_TSC_1_MIPF_LINEAR 0x000000c0
+#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000
+#define NV50_TSC_1_LOD_BIAS__SHIFT 12
+
+#define NV50_TSC_2 0x00000008
+#define NV50_TSC_2_MIN_LOD__MASK 0x00000f00
+#define NV50_TSC_2_MIN_LOD__SHIFT 8
+#define NV50_TSC_2_MAX_LOD__MASK 0x00f00000
+#define NV50_TSC_2_MAX_LOD__SHIFT 20
+
+#define NV50_TSC_4 0x00000010
+#define NV50_TSC_4_BORDER_COLOR_RED__MASK 0xffffffff
+#define NV50_TSC_4_BORDER_COLOR_RED__SHIFT 0
+
+#define NV50_TSC_5 0x00000014
+#define NV50_TSC_5_BORDER_COLOR_GREEN__MASK 0xffffffff
+#define NV50_TSC_5_BORDER_COLOR_GREEN__SHIFT 0
+
+#define NV50_TSC_6 0x00000018
+#define NV50_TSC_6_BORDER_COLOR_BLUE__MASK 0xffffffff
+#define NV50_TSC_6_BORDER_COLOR_BLUE__SHIFT 0
+
+#define NV50_TSC_7 0x0000001c
+#define NV50_TSC_7_BORDER_COLOR_ALPHA__MASK 0xffffffff
+#define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT 0
+
+
+#endif /* NV50_TEXTURE_XML */
diff --git a/src/gallium/drivers/nvc0/nvc0_2d.xml.h b/src/gallium/drivers/nvc0/nvc0_2d.xml.h
new file mode 100644
index 0000000000..aebcd510e8
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_2d.xml.h
@@ -0,0 +1,380 @@
+#ifndef NVC0_2D_XML
+#define NVC0_2D_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_2d.xml ( 9454 bytes, from 2010-10-16 16:03:11)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24)
+- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_2D_DST_FORMAT 0x00000200
+
+#define NVC0_2D_DST_LINEAR 0x00000204
+
+#define NVC0_2D_DST_TILE_MODE 0x00000208
+
+#define NVC0_2D_DST_DEPTH 0x0000020c
+
+#define NVC0_2D_DST_LAYER 0x00000210
+
+#define NVC0_2D_DST_PITCH 0x00000214
+
+#define NVC0_2D_DST_WIDTH 0x00000218
+
+#define NVC0_2D_DST_HEIGHT 0x0000021c
+
+#define NVC0_2D_DST_ADDRESS_HIGH 0x00000220
+
+#define NVC0_2D_DST_ADDRESS_LOW 0x00000224
+
+#define NVC0_2D_UNK228 0x00000228
+
+#define NVC0_2D_SRC_FORMAT 0x00000230
+
+#define NVC0_2D_SRC_LINEAR 0x00000234
+
+#define NVC0_2D_SRC_TILE_MODE 0x00000238
+
+#define NVC0_2D_SRC_DEPTH 0x0000023c
+
+#define NVC0_2D_SRC_LAYER 0x00000240
+
+#define NVC0_2D_SRC_PITCH 0x00000244
+#define NVC0_2D_SRC_PITCH__MAX 0x00040000
+
+#define NVC0_2D_SRC_WIDTH 0x00000248
+#define NVC0_2D_SRC_WIDTH__MAX 0x00010000
+
+#define NVC0_2D_SRC_HEIGHT 0x0000024c
+#define NVC0_2D_SRC_HEIGHT__MAX 0x00010000
+
+#define NVC0_2D_SRC_ADDRESS_HIGH 0x00000250
+
+#define NVC0_2D_SRC_ADDRESS_LOW 0x00000254
+
+#define NVC0_2D_UNK258 0x00000258
+
+#define NVC0_2D_UNK260 0x00000260
+
+#define NVC0_2D_COND_ADDRESS_HIGH 0x00000264
+
+#define NVC0_2D_COND_ADDRESS_LOW 0x00000268
+
+#define NVC0_2D_COND_MODE 0x0000026c
+#define NVC0_2D_COND_MODE_NEVER 0x00000000
+#define NVC0_2D_COND_MODE_ALWAYS 0x00000001
+#define NVC0_2D_COND_MODE_RES_NON_ZERO 0x00000002
+#define NVC0_2D_COND_MODE_EQUAL 0x00000003
+#define NVC0_2D_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NVC0_2D_CLIP_X 0x00000280
+
+#define NVC0_2D_CLIP_Y 0x00000284
+
+#define NVC0_2D_CLIP_W 0x00000288
+
+#define NVC0_2D_CLIP_H 0x0000028c
+
+#define NVC0_2D_CLIP_ENABLE 0x00000290
+
+#define NVC0_2D_COLOR_KEY_FORMAT 0x00000294
+#define NVC0_2D_COLOR_KEY_FORMAT_16BPP 0x00000000
+#define NVC0_2D_COLOR_KEY_FORMAT_15BPP 0x00000001
+#define NVC0_2D_COLOR_KEY_FORMAT_24BPP 0x00000002
+#define NVC0_2D_COLOR_KEY_FORMAT_30BPP 0x00000003
+#define NVC0_2D_COLOR_KEY_FORMAT_8BPP 0x00000004
+#define NVC0_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005
+#define NVC0_2D_COLOR_KEY_FORMAT_32BPP 0x00000006
+
+#define NVC0_2D_COLOR_KEY 0x00000298
+
+#define NVC0_2D_COLOR_KEY_ENABLE 0x0000029c
+
+#define NVC0_2D_ROP 0x000002a0
+
+#define NVC0_2D_BETA1 0x000002a4
+
+#define NVC0_2D_BETA4 0x000002a8
+
+#define NVC0_2D_OPERATION 0x000002ac
+#define NVC0_2D_OPERATION_SRCCOPY_AND 0x00000000
+#define NVC0_2D_OPERATION_ROP_AND 0x00000001
+#define NVC0_2D_OPERATION_BLEND_AND 0x00000002
+#define NVC0_2D_OPERATION_SRCCOPY 0x00000003
+#define NVC0_2D_OPERATION_UNK4 0x00000004
+#define NVC0_2D_OPERATION_SRCCOPY_PREMULT 0x00000005
+#define NVC0_2D_OPERATION_BLEND_PREMULT 0x00000006
+
+#define NVC0_2D_UNK2B0 0x000002b0
+#define NVC0_2D_UNK2B0_UNK0__MASK 0x0000003f
+#define NVC0_2D_UNK2B0_UNK0__SHIFT 0
+#define NVC0_2D_UNK2B0_UNK1__MASK 0x00003f00
+#define NVC0_2D_UNK2B0_UNK1__SHIFT 8
+
+#define NVC0_2D_PATTERN_SELECT 0x000002b4
+#define NVC0_2D_PATTERN_SELECT_MONO_8X8 0x00000000
+#define NVC0_2D_PATTERN_SELECT_MONO_64X1 0x00000001
+#define NVC0_2D_PATTERN_SELECT_MONO_1X64 0x00000002
+#define NVC0_2D_PATTERN_SELECT_COLOR 0x00000003
+
+#define NVC0_2D_PATTERN_COLOR_FORMAT 0x000002e8
+#define NVC0_2D_PATTERN_COLOR_FORMAT_16BPP 0x00000000
+#define NVC0_2D_PATTERN_COLOR_FORMAT_15BPP 0x00000001
+#define NVC0_2D_PATTERN_COLOR_FORMAT_32BPP 0x00000002
+#define NVC0_2D_PATTERN_COLOR_FORMAT_8BPP 0x00000003
+#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK4 0x00000004
+#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK5 0x00000005
+
+#define NVC0_2D_PATTERN_MONO_FORMAT 0x000002ec
+#define NVC0_2D_PATTERN_MONO_FORMAT_CGA6 0x00000000
+#define NVC0_2D_PATTERN_MONO_FORMAT_LE 0x00000001
+
+#define NVC0_2D_PATTERN_COLOR(i0) (0x000002f0 + 0x4*(i0))
+#define NVC0_2D_PATTERN_COLOR__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_COLOR__LEN 0x00000002
+
+#define NVC0_2D_PATTERN_BITMAP(i0) (0x000002f8 + 0x4*(i0))
+#define NVC0_2D_PATTERN_BITMAP__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_BITMAP__LEN 0x00000002
+
+#define NVC0_2D_PATTERN_X8R8G8B8(i0) (0x00000300 + 0x4*(i0))
+#define NVC0_2D_PATTERN_X8R8G8B8__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_X8R8G8B8__LEN 0x00000040
+#define NVC0_2D_PATTERN_X8R8G8B8_B__MASK 0x000000ff
+#define NVC0_2D_PATTERN_X8R8G8B8_B__SHIFT 0
+#define NVC0_2D_PATTERN_X8R8G8B8_G__MASK 0x0000ff00
+#define NVC0_2D_PATTERN_X8R8G8B8_G__SHIFT 8
+#define NVC0_2D_PATTERN_X8R8G8B8_R__MASK 0x00ff0000
+#define NVC0_2D_PATTERN_X8R8G8B8_R__SHIFT 16
+
+#define NVC0_2D_PATTERN_R5G6B5(i0) (0x00000400 + 0x4*(i0))
+#define NVC0_2D_PATTERN_R5G6B5__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_R5G6B5__LEN 0x00000020
+#define NVC0_2D_PATTERN_R5G6B5_B0__MASK 0x0000001f
+#define NVC0_2D_PATTERN_R5G6B5_B0__SHIFT 0
+#define NVC0_2D_PATTERN_R5G6B5_G0__MASK 0x000007e0
+#define NVC0_2D_PATTERN_R5G6B5_G0__SHIFT 5
+#define NVC0_2D_PATTERN_R5G6B5_R0__MASK 0x0000f800
+#define NVC0_2D_PATTERN_R5G6B5_R0__SHIFT 11
+#define NVC0_2D_PATTERN_R5G6B5_B1__MASK 0x001f0000
+#define NVC0_2D_PATTERN_R5G6B5_B1__SHIFT 16
+#define NVC0_2D_PATTERN_R5G6B5_G1__MASK 0x07e00000
+#define NVC0_2D_PATTERN_R5G6B5_G1__SHIFT 21
+#define NVC0_2D_PATTERN_R5G6B5_R1__MASK 0xf8000000
+#define NVC0_2D_PATTERN_R5G6B5_R1__SHIFT 27
+
+#define NVC0_2D_PATTERN_X1R5G5B5(i0) (0x00000480 + 0x4*(i0))
+#define NVC0_2D_PATTERN_X1R5G5B5__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_X1R5G5B5__LEN 0x00000020
+#define NVC0_2D_PATTERN_X1R5G5B5_B0__MASK 0x0000001f
+#define NVC0_2D_PATTERN_X1R5G5B5_B0__SHIFT 0
+#define NVC0_2D_PATTERN_X1R5G5B5_G0__MASK 0x000003e0
+#define NVC0_2D_PATTERN_X1R5G5B5_G0__SHIFT 5
+#define NVC0_2D_PATTERN_X1R5G5B5_R0__MASK 0x00007c00
+#define NVC0_2D_PATTERN_X1R5G5B5_R0__SHIFT 10
+#define NVC0_2D_PATTERN_X1R5G5B5_B1__MASK 0x001f0000
+#define NVC0_2D_PATTERN_X1R5G5B5_B1__SHIFT 16
+#define NVC0_2D_PATTERN_X1R5G5B5_G1__MASK 0x03e00000
+#define NVC0_2D_PATTERN_X1R5G5B5_G1__SHIFT 21
+#define NVC0_2D_PATTERN_X1R5G5B5_R1__MASK 0x7c000000
+#define NVC0_2D_PATTERN_X1R5G5B5_R1__SHIFT 26
+
+#define NVC0_2D_PATTERN_Y8(i0) (0x00000500 + 0x4*(i0))
+#define NVC0_2D_PATTERN_Y8__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_Y8__LEN 0x00000010
+#define NVC0_2D_PATTERN_Y8_Y0__MASK 0x000000ff
+#define NVC0_2D_PATTERN_Y8_Y0__SHIFT 0
+#define NVC0_2D_PATTERN_Y8_Y1__MASK 0x0000ff00
+#define NVC0_2D_PATTERN_Y8_Y1__SHIFT 8
+#define NVC0_2D_PATTERN_Y8_Y2__MASK 0x00ff0000
+#define NVC0_2D_PATTERN_Y8_Y2__SHIFT 16
+#define NVC0_2D_PATTERN_Y8_Y3__MASK 0xff000000
+#define NVC0_2D_PATTERN_Y8_Y3__SHIFT 24
+
+#define NVC0_2D_DRAW_SHAPE 0x00000580
+#define NVC0_2D_DRAW_SHAPE_POINTS 0x00000000
+#define NVC0_2D_DRAW_SHAPE_LINES 0x00000001
+#define NVC0_2D_DRAW_SHAPE_LINE_STRIP 0x00000002
+#define NVC0_2D_DRAW_SHAPE_TRIANGLES 0x00000003
+#define NVC0_2D_DRAW_SHAPE_RECTANGLES 0x00000004
+
+#define NVC0_2D_DRAW_COLOR_FORMAT 0x00000584
+
+#define NVC0_2D_DRAW_COLOR 0x00000588
+
+#define NVC0_2D_UNK58C 0x0000058c
+#define NVC0_2D_UNK58C_0 0x00000001
+#define NVC0_2D_UNK58C_1 0x00000010
+#define NVC0_2D_UNK58C_2 0x00000100
+#define NVC0_2D_UNK58C_3 0x00001000
+
+#define NVC0_2D_DRAW_POINT16 0x000005e0
+#define NVC0_2D_DRAW_POINT16_X__MASK 0x0000ffff
+#define NVC0_2D_DRAW_POINT16_X__SHIFT 0
+#define NVC0_2D_DRAW_POINT16_Y__MASK 0xffff0000
+#define NVC0_2D_DRAW_POINT16_Y__SHIFT 16
+
+#define NVC0_2D_DRAW_POINT32_X(i0) (0x00000600 + 0x8*(i0))
+#define NVC0_2D_DRAW_POINT32_X__ESIZE 0x00000008
+#define NVC0_2D_DRAW_POINT32_X__LEN 0x00000040
+
+#define NVC0_2D_DRAW_POINT32_Y(i0) (0x00000604 + 0x8*(i0))
+#define NVC0_2D_DRAW_POINT32_Y__ESIZE 0x00000008
+#define NVC0_2D_DRAW_POINT32_Y__LEN 0x00000040
+
+#define NVC0_2D_SIFC_BITMAP_ENABLE 0x00000800
+
+#define NVC0_2D_SIFC_FORMAT 0x00000804
+
+#define NVC0_2D_SIFC_BITMAP_FORMAT 0x00000808
+#define NVC0_2D_SIFC_BITMAP_FORMAT_I1 0x00000000
+#define NVC0_2D_SIFC_BITMAP_FORMAT_I4 0x00000001
+#define NVC0_2D_SIFC_BITMAP_FORMAT_I8 0x00000002
+
+#define NVC0_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c
+
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002
+
+#define NVC0_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814
+
+#define NVC0_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818
+
+#define NVC0_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c
+
+#define NVC0_2D_SIFC_WIDTH 0x00000838
+
+#define NVC0_2D_SIFC_HEIGHT 0x0000083c
+
+#define NVC0_2D_SIFC_DX_DU_FRACT 0x00000840
+
+#define NVC0_2D_SIFC_DX_DU_INT 0x00000844
+
+#define NVC0_2D_SIFC_DY_DV_FRACT 0x00000848
+
+#define NVC0_2D_SIFC_DY_DV_INT 0x0000084c
+
+#define NVC0_2D_SIFC_DST_X_FRACT 0x00000850
+
+#define NVC0_2D_SIFC_DST_X_INT 0x00000854
+
+#define NVC0_2D_SIFC_DST_Y_FRACT 0x00000858
+
+#define NVC0_2D_SIFC_DST_Y_INT 0x0000085c
+
+#define NVC0_2D_SIFC_DATA 0x00000860
+
+#define NVC0_2D_UNK0870 0x00000870
+
+#define NVC0_2D_UNK0880 0x00000880
+
+#define NVC0_2D_UNK0884 0x00000884
+
+#define NVC0_2D_UNK0888 0x00000888
+
+#define NVC0_2D_BLIT_CONTROL 0x0000088c
+#define NVC0_2D_BLIT_CONTROL_ORIGIN__MASK 0x00000001
+#define NVC0_2D_BLIT_CONTROL_ORIGIN__SHIFT 0
+#define NVC0_2D_BLIT_CONTROL_ORIGIN_CENTER 0x00000000
+#define NVC0_2D_BLIT_CONTROL_ORIGIN_CORNER 0x00000001
+#define NVC0_2D_BLIT_CONTROL_FILTER__MASK 0x00000010
+#define NVC0_2D_BLIT_CONTROL_FILTER__SHIFT 4
+#define NVC0_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE 0x00000000
+#define NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR 0x00000010
+
+#define NVC0_2D_BLIT_DST_X 0x000008b0
+
+#define NVC0_2D_BLIT_DST_Y 0x000008b4
+
+#define NVC0_2D_BLIT_DST_W 0x000008b8
+
+#define NVC0_2D_BLIT_DST_H 0x000008bc
+
+#define NVC0_2D_BLIT_DU_DX_FRACT 0x000008c0
+
+#define NVC0_2D_BLIT_DU_DX_INT 0x000008c4
+
+#define NVC0_2D_BLIT_DV_DY_FRACT 0x000008c8
+
+#define NVC0_2D_BLIT_DV_DY_INT 0x000008cc
+
+#define NVC0_2D_BLIT_SRC_X_FRACT 0x000008d0
+
+#define NVC0_2D_BLIT_SRC_X_INT 0x000008d4
+
+#define NVC0_2D_BLIT_SRC_Y_FRACT 0x000008d8
+
+#define NVC0_2D_BLIT_SRC_Y_INT 0x000008dc
+
+
+#endif /* NVC0_2D_XML */
diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
new file mode 100644
index 0000000000..31302949d5
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
@@ -0,0 +1,1182 @@
+#ifndef NVC0_3D_XML
+#define NVC0_3D_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_3d.xml ( 30827 bytes, from 2011-01-13 18:23:07)
+- copyright.xml ( 6452 bytes, from 2010-11-25 23:28:20)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+- nv_3ddefs.xml ( 16394 bytes, from 2010-12-17 15:10:40)
+- nv_object.xml ( 11898 bytes, from 2010-12-23 14:14:20)
+- nvchipsets.xml ( 3074 bytes, from 2010-11-07 00:36:28)
+- nv50_defs.xml ( 4487 bytes, from 2010-12-10 00:37:17)
+
+Copyright (C) 2006-2011 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_3D_NOTIFY_ADDRESS_HIGH 0x00000104
+#define NVC0_3D_NOTIFY_ADDRESS_LOW 0x00000108
+#define NVC0_3D_NOTIFY 0x0000010c
+
+#define NVC0_3D_SERIALIZE 0x00000110
+
+#define NVC0_3D_EARLY_FRAGMENT_TESTS 0x00000210
+
+#define NVC0_3D_TESS_MODE 0x00000320
+#define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f
+#define NVC0_3D_TESS_MODE_PRIM__SHIFT 0
+#define NVC0_3D_TESS_MODE_PRIM_ISOLINES 0x00000000
+#define NVC0_3D_TESS_MODE_PRIM_TRIANGLES 0x00000001
+#define NVC0_3D_TESS_MODE_PRIM_QUADS 0x00000002
+#define NVC0_3D_TESS_MODE_SPACING__MASK 0x000000f0
+#define NVC0_3D_TESS_MODE_SPACING__SHIFT 4
+#define NVC0_3D_TESS_MODE_SPACING_EQUAL 0x00000000
+#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD 0x00000010
+#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN 0x00000020
+#define NVC0_3D_TESS_MODE_CW 0x00000100
+#define NVC0_3D_TESS_MODE_CONNECTED 0x00000200
+
+#define NVC0_3D_TESS_LEVEL_OUTER(i0) (0x00000324 + 0x4*(i0))
+#define NVC0_3D_TESS_LEVEL_OUTER__ESIZE 0x00000004
+#define NVC0_3D_TESS_LEVEL_OUTER__LEN 0x00000004
+
+#define NVC0_3D_TESS_LEVEL_INNER(i0) (0x00000334 + 0x4*(i0))
+#define NVC0_3D_TESS_LEVEL_INNER__ESIZE 0x00000004
+#define NVC0_3D_TESS_LEVEL_INNER__LEN 0x00000002
+
+#define NVC0_3D_RASTERIZE_ENABLE 0x0000037c
+
+#define NVC0_3D_TFB(i0) (0x00000380 + 0x20*(i0))
+#define NVC0_3D_TFB__ESIZE 0x00000020
+#define NVC0_3D_TFB__LEN 0x00000004
+
+#define NVC0_3D_TFB_BUFFER_ENABLE(i0) (0x00000380 + 0x20*(i0))
+
+#define NVC0_3D_TFB_ADDRESS_HIGH(i0) (0x00000384 + 0x20*(i0))
+
+#define NVC0_3D_TFB_ADDRESS_LOW(i0) (0x00000388 + 0x20*(i0))
+
+#define NVC0_3D_TFB_BUFFER_SIZE(i0) (0x0000038c + 0x20*(i0))
+
+#define NVC0_3D_TFB_PRIMITIVE_ID(i0) (0x00000390 + 0x20*(i0))
+
+#define NVC0_3D_TFB_UNK0700(i0) (0x00000700 + 0x10*(i0))
+
+#define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0))
+
+#define NVC0_3D_TFB_BUFFER_STRIDE(i0) (0x00000708 + 0x10*(i0))
+
+#define NVC0_3D_TFB_ENABLE 0x00000744
+
+#define NVC0_3D_LOCAL_BASE 0x0000077c
+
+#define NVC0_3D_LOCAL_ADDRESS_HIGH 0x00000790
+
+#define NVC0_3D_LOCAL_ADDRESS_LOW 0x00000794
+
+#define NVC0_3D_LOCAL_SIZE_HIGH 0x00000798
+
+#define NVC0_3D_LOCAL_SIZE_LOW 0x0000079c
+
+#define NVC0_3D_RT(i0) (0x00000800 + 0x20*(i0))
+#define NVC0_3D_RT__ESIZE 0x00000020
+#define NVC0_3D_RT__LEN 0x00000008
+
+#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x20*(i0))
+
+#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x20*(i0))
+
+#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x20*(i0))
+
+#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x20*(i0))
+
+#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x20*(i0))
+
+#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x20*(i0))
+#define NVC0_3D_RT_TILE_MODE_UNK0 0x00000001
+#define NVC0_3D_RT_TILE_MODE_Y__MASK 0x00000070
+#define NVC0_3D_RT_TILE_MODE_Y__SHIFT 4
+#define NVC0_3D_RT_TILE_MODE_Z__MASK 0x00000700
+#define NVC0_3D_RT_TILE_MODE_Z__SHIFT 8
+
+#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x20*(i0))
+#define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff
+#define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0
+#define NVC0_3D_RT_ARRAY_MODE_VOLUME 0x00010000
+
+#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x20*(i0))
+
+#define NVC0_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_SCALE_X__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_SCALE_Y(i0) (0x00000a04 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_SCALE_Y__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_SCALE_Y__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_SCALE_Z(i0) (0x00000a08 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_SCALE_Z__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_SCALE_Z__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_TRANSLATE_X(i0) (0x00000a0c + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_TRANSLATE_X__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_TRANSLATE_X__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_TRANSLATE_Y(i0) (0x00000a10 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_TRANSLATE_Y__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_TRANSLATE_Y__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_TRANSLATE_Z(i0) (0x00000a14 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_TRANSLATE_Z__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_TRANSLATE_Z__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_HORIZ(i0) (0x00000c00 + 0x10*(i0))
+#define NVC0_3D_VIEWPORT_HORIZ__ESIZE 0x00000010
+#define NVC0_3D_VIEWPORT_HORIZ__LEN 0x00000010
+#define NVC0_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff
+#define NVC0_3D_VIEWPORT_HORIZ_X__SHIFT 0
+#define NVC0_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000
+#define NVC0_3D_VIEWPORT_HORIZ_W__SHIFT 16
+
+#define NVC0_3D_VIEWPORT_VERT(i0) (0x00000c04 + 0x10*(i0))
+#define NVC0_3D_VIEWPORT_VERT__ESIZE 0x00000010
+#define NVC0_3D_VIEWPORT_VERT__LEN 0x00000010
+#define NVC0_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff
+#define NVC0_3D_VIEWPORT_VERT_Y__SHIFT 0
+#define NVC0_3D_VIEWPORT_VERT_H__MASK 0xffff0000
+#define NVC0_3D_VIEWPORT_VERT_H__SHIFT 16
+
+#define NVC0_3D_DEPTH_RANGE_NEAR(i0) (0x00000c08 + 0x10*(i0))
+#define NVC0_3D_DEPTH_RANGE_NEAR__ESIZE 0x00000010
+#define NVC0_3D_DEPTH_RANGE_NEAR__LEN 0x00000010
+
+#define NVC0_3D_DEPTH_RANGE_FAR(i0) (0x00000c0c + 0x10*(i0))
+#define NVC0_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010
+#define NVC0_3D_DEPTH_RANGE_FAR__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_CLIP_HORIZ(i0) (0x00000d00 + 0x8*(i0))
+#define NVC0_3D_VIEWPORT_CLIP_HORIZ__ESIZE 0x00000008
+#define NVC0_3D_VIEWPORT_CLIP_HORIZ__LEN 0x00000008
+#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__MASK 0x0000ffff
+#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__SHIFT 0
+#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__MASK 0xffff0000
+#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__SHIFT 16
+
+#define NVC0_3D_VIEWPORT_CLIP_VERT(i0) (0x00000d04 + 0x8*(i0))
+#define NVC0_3D_VIEWPORT_CLIP_VERT__ESIZE 0x00000008
+#define NVC0_3D_VIEWPORT_CLIP_VERT__LEN 0x00000008
+#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__MASK 0x0000ffff
+#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__SHIFT 0
+#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__MASK 0xffff0000
+#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__SHIFT 16
+
+#define NVC0_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0))
+#define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008
+#define NVC0_3D_CLIPID_REGION_HORIZ__LEN 0x00000004
+#define NVC0_3D_CLIPID_REGION_HORIZ_X__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_REGION_HORIZ_X__SHIFT 0
+#define NVC0_3D_CLIPID_REGION_HORIZ_W__MASK 0xffff0000
+#define NVC0_3D_CLIPID_REGION_HORIZ_W__SHIFT 16
+
+#define NVC0_3D_CLIPID_REGION_VERT(i0) (0x00000d44 + 0x8*(i0))
+#define NVC0_3D_CLIPID_REGION_VERT__ESIZE 0x00000008
+#define NVC0_3D_CLIPID_REGION_VERT__LEN 0x00000004
+#define NVC0_3D_CLIPID_REGION_VERT_Y__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_REGION_VERT_Y__SHIFT 0
+#define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000
+#define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16
+
+#define NVC0_3D_COUNTER_ENABLE 0x00000d68
+#define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001
+#define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002
+#define NVC0_3D_COUNTER_ENABLE_UNK02 0x00000004
+#define NVC0_3D_COUNTER_ENABLE_UNK03 0x00000008
+#define NVC0_3D_COUNTER_ENABLE_UNK04 0x00000010
+#define NVC0_3D_COUNTER_ENABLE_EMITTED_PRIMITIVES 0x00000020
+#define NVC0_3D_COUNTER_ENABLE_UNK06 0x00000040
+#define NVC0_3D_COUNTER_ENABLE_UNK07 0x00000080
+#define NVC0_3D_COUNTER_ENABLE_UNK08 0x00000100
+#define NVC0_3D_COUNTER_ENABLE_UNK09 0x00000200
+#define NVC0_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000400
+#define NVC0_3D_COUNTER_ENABLE_UNK0B 0x00000800
+#define NVC0_3D_COUNTER_ENABLE_UNK0C 0x00001000
+#define NVC0_3D_COUNTER_ENABLE_UNK0D 0x00002000
+#define NVC0_3D_COUNTER_ENABLE_UNK0E 0x00004000
+#define NVC0_3D_COUNTER_ENABLE_UNK0F 0x00008000
+
+#define NVC0_3D_VERTEX_BUFFER_FIRST 0x00000d74
+
+#define NVC0_3D_VERTEX_BUFFER_COUNT 0x00000d78
+
+#define NVC0_3D_CLEAR_COLOR(i0) (0x00000d80 + 0x4*(i0))
+#define NVC0_3D_CLEAR_COLOR__ESIZE 0x00000004
+#define NVC0_3D_CLEAR_COLOR__LEN 0x00000004
+
+#define NVC0_3D_CLEAR_DEPTH 0x00000d90
+
+#define NVC0_3D_CLEAR_STENCIL 0x00000da0
+
+#define NVC0_3D_POLYGON_SMOOTH_ENABLE 0x00000db4
+
+#define NVC0_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0
+
+#define NVC0_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4
+
+#define NVC0_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8
+
+#define NVC0_3D_PATCH_VERTICES 0x00000dcc
+
+#define NVC0_3D_WINDOW_OFFSET_X 0x00000df8
+
+#define NVC0_3D_WINDOW_OFFSET_Y 0x00000dfc
+
+#define NVC0_3D_SCISSOR_ENABLE(i0) (0x00000e00 + 0x10*(i0))
+#define NVC0_3D_SCISSOR_ENABLE__ESIZE 0x00000010
+#define NVC0_3D_SCISSOR_ENABLE__LEN 0x00000010
+
+#define NVC0_3D_SCISSOR_HORIZ(i0) (0x00000e04 + 0x10*(i0))
+#define NVC0_3D_SCISSOR_HORIZ__ESIZE 0x00000010
+#define NVC0_3D_SCISSOR_HORIZ__LEN 0x00000010
+#define NVC0_3D_SCISSOR_HORIZ_MIN__MASK 0x0000ffff
+#define NVC0_3D_SCISSOR_HORIZ_MIN__SHIFT 0
+#define NVC0_3D_SCISSOR_HORIZ_MAX__MASK 0xffff0000
+#define NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT 16
+
+#define NVC0_3D_SCISSOR_VERT(i0) (0x00000e08 + 0x10*(i0))
+#define NVC0_3D_SCISSOR_VERT__ESIZE 0x00000010
+#define NVC0_3D_SCISSOR_VERT__LEN 0x00000010
+#define NVC0_3D_SCISSOR_VERT_MIN__MASK 0x0000ffff
+#define NVC0_3D_SCISSOR_VERT_MIN__SHIFT 0
+#define NVC0_3D_SCISSOR_VERT_MAX__MASK 0xffff0000
+#define NVC0_3D_SCISSOR_VERT_MAX__SHIFT 16
+
+#define NVC0_3D_STENCIL_BACK_FUNC_REF 0x00000f54
+
+#define NVC0_3D_STENCIL_BACK_MASK 0x00000f58
+
+#define NVC0_3D_STENCIL_BACK_FUNC_MASK 0x00000f5c
+
+#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_HIGH 0x00000f84
+
+#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88
+
+#define NVC0_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0))
+#define NVC0_3D_DEPTH_BOUNDS__ESIZE 0x00000004
+#define NVC0_3D_DEPTH_BOUNDS__LEN 0x00000002
+
+#define NVC0_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0))
+#define NVC0_3D_MSAA_MASK__ESIZE 0x00000004
+#define NVC0_3D_MSAA_MASK__LEN 0x00000004
+
+#define NVC0_3D_CLIPID_ADDRESS_HIGH 0x00000fcc
+
+#define NVC0_3D_CLIPID_ADDRESS_LOW 0x00000fd0
+
+#define NVC0_3D_ZETA_ADDRESS_HIGH 0x00000fe0
+
+#define NVC0_3D_ZETA_ADDRESS_LOW 0x00000fe4
+
+#define NVC0_3D_ZETA_FORMAT 0x00000fe8
+
+#define NVC0_3D_ZETA_TILE_MODE 0x00000fec
+
+#define NVC0_3D_ZETA_LAYER_STRIDE 0x00000ff0
+
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ 0x00000ff4
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__MASK 0xffff0000
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT 16
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__MASK 0x0000ffff
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT 0
+
+#define NVC0_3D_SCREEN_SCISSOR_VERT 0x00000ff8
+#define NVC0_3D_SCREEN_SCISSOR_VERT_H__MASK 0xffff0000
+#define NVC0_3D_SCREEN_SCISSOR_VERT_H__SHIFT 16
+#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff
+#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0
+
+#define NVC0_3D_VERTEX_ID 0x00001118
+
+#define NVC0_3D_VTX_ATTR_DEFINE 0x0000114c
+#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__MASK 0x000000ff
+#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT 0
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MASK 0x00000700
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT 8
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MIN 0x00000001
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MAX 0x00000004
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__MASK 0x00007000
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT 12
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_8 0x00001000
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_16 0x00002000
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_32 0x00004000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__MASK 0x00070000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT 16
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000
+
+#define NVC0_3D_VTX_ATTR_DATA(i0) (0x00001150 + 0x4*(i0))
+#define NVC0_3D_VTX_ATTR_DATA__ESIZE 0x00000004
+#define NVC0_3D_VTX_ATTR_DATA__LEN 0x00000004
+
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT(i0) (0x00001160 + 0x4*(i0))
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT__ESIZE 0x00000004
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT__LEN 0x00000020
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK 0x0000003f
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT 0
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST 0x00000040
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__MASK 0x001fff80
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT 7
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__MASK 0x07e00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__SHIFT 21
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32_32 0x00200000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32 0x00400000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16_16 0x00600000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32 0x00800000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16 0x00a00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8_8 0x01400000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16 0x01e00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 0x02400000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8 0x02600000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8 0x03000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16 0x03600000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8 0x03a00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_2_10_10_10 0x06000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__MASK 0x78000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__SHIFT 27
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SNORM 0x08000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UNORM 0x10000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SINT 0x18000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT 0x20000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_USCALED 0x28000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SSCALED 0x30000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT 0x38000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BGRA 0x80000000
+
+#define NVC0_3D_RT_CONTROL 0x0000121c
+#define NVC0_3D_RT_CONTROL_COUNT__MASK 0x0000000f
+#define NVC0_3D_RT_CONTROL_COUNT__SHIFT 0
+#define NVC0_3D_RT_CONTROL_MAP0__MASK 0x00000070
+#define NVC0_3D_RT_CONTROL_MAP0__SHIFT 4
+#define NVC0_3D_RT_CONTROL_MAP1__MASK 0x00000380
+#define NVC0_3D_RT_CONTROL_MAP1__SHIFT 7
+#define NVC0_3D_RT_CONTROL_MAP2__MASK 0x00001c00
+#define NVC0_3D_RT_CONTROL_MAP2__SHIFT 10
+#define NVC0_3D_RT_CONTROL_MAP3__MASK 0x0000e000
+#define NVC0_3D_RT_CONTROL_MAP3__SHIFT 13
+#define NVC0_3D_RT_CONTROL_MAP4__MASK 0x00070000
+#define NVC0_3D_RT_CONTROL_MAP4__SHIFT 16
+#define NVC0_3D_RT_CONTROL_MAP5__MASK 0x00380000
+#define NVC0_3D_RT_CONTROL_MAP5__SHIFT 19
+#define NVC0_3D_RT_CONTROL_MAP6__MASK 0x01c00000
+#define NVC0_3D_RT_CONTROL_MAP6__SHIFT 22
+#define NVC0_3D_RT_CONTROL_MAP7__MASK 0x0e000000
+#define NVC0_3D_RT_CONTROL_MAP7__SHIFT 25
+
+#define NVC0_3D_ZETA_HORIZ 0x00001228
+
+#define NVC0_3D_ZETA_VERT 0x0000122c
+
+#define NVC0_3D_ZETA_ARRAY_MODE 0x00001230
+#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__MASK 0x0000ffff
+#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT 0
+#define NVC0_3D_ZETA_ARRAY_MODE_UNK 0x00010000
+
+#define NVC0_3D_LINKED_TSC 0x00001234
+
+#define NVC0_3D_DRAW_TFB_BYTES 0x0000123c
+
+#define NVC0_3D_FP_RESULT_COUNT 0x00001298
+
+#define NVC0_3D_DEPTH_TEST_ENABLE 0x000012cc
+
+#define NVC0_3D_D3D_FILL_MODE 0x000012d0
+#define NVC0_3D_D3D_FILL_MODE_POINT 0x00000001
+#define NVC0_3D_D3D_FILL_MODE_WIREFRAME 0x00000002
+#define NVC0_3D_D3D_FILL_MODE_SOLID 0x00000003
+
+#define NVC0_3D_SHADE_MODEL 0x000012d4
+#define NVC0_3D_SHADE_MODEL_FLAT 0x00001d00
+#define NVC0_3D_SHADE_MODEL_SMOOTH 0x00001d01
+
+#define NVC0_3D_BLEND_INDEPENDENT 0x000012e4
+
+#define NVC0_3D_DEPTH_WRITE_ENABLE 0x000012e8
+
+#define NVC0_3D_ALPHA_TEST_ENABLE 0x000012ec
+
+#define NVC0_3D_VB_ELEMENT_U8_SETUP 0x00001300
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK 0xc0000000
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT 30
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK 0x3fffffff
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT 0
+
+#define NVC0_3D_VB_ELEMENT_U8 0x00001304
+#define NVC0_3D_VB_ELEMENT_U8_I0__MASK 0x000000ff
+#define NVC0_3D_VB_ELEMENT_U8_I0__SHIFT 0
+#define NVC0_3D_VB_ELEMENT_U8_I1__MASK 0x0000ff00
+#define NVC0_3D_VB_ELEMENT_U8_I1__SHIFT 8
+#define NVC0_3D_VB_ELEMENT_U8_I2__MASK 0x00ff0000
+#define NVC0_3D_VB_ELEMENT_U8_I2__SHIFT 16
+#define NVC0_3D_VB_ELEMENT_U8_I3__MASK 0xff000000
+#define NVC0_3D_VB_ELEMENT_U8_I3__SHIFT 24
+
+#define NVC0_3D_D3D_CULL_MODE 0x00001308
+#define NVC0_3D_D3D_CULL_MODE_NONE 0x00000001
+#define NVC0_3D_D3D_CULL_MODE_FRONT 0x00000002
+#define NVC0_3D_D3D_CULL_MODE_BACK 0x00000003
+
+#define NVC0_3D_DEPTH_TEST_FUNC 0x0000130c
+#define NVC0_3D_DEPTH_TEST_FUNC_NEVER 0x00000200
+#define NVC0_3D_DEPTH_TEST_FUNC_LESS 0x00000201
+#define NVC0_3D_DEPTH_TEST_FUNC_EQUAL 0x00000202
+#define NVC0_3D_DEPTH_TEST_FUNC_LEQUAL 0x00000203
+#define NVC0_3D_DEPTH_TEST_FUNC_GREATER 0x00000204
+#define NVC0_3D_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205
+#define NVC0_3D_DEPTH_TEST_FUNC_GEQUAL 0x00000206
+#define NVC0_3D_DEPTH_TEST_FUNC_ALWAYS 0x00000207
+
+#define NVC0_3D_ALPHA_TEST_REF 0x00001310
+
+#define NVC0_3D_ALPHA_TEST_FUNC 0x00001314
+#define NVC0_3D_ALPHA_TEST_FUNC_NEVER 0x00000200
+#define NVC0_3D_ALPHA_TEST_FUNC_LESS 0x00000201
+#define NVC0_3D_ALPHA_TEST_FUNC_EQUAL 0x00000202
+#define NVC0_3D_ALPHA_TEST_FUNC_LEQUAL 0x00000203
+#define NVC0_3D_ALPHA_TEST_FUNC_GREATER 0x00000204
+#define NVC0_3D_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205
+#define NVC0_3D_ALPHA_TEST_FUNC_GEQUAL 0x00000206
+#define NVC0_3D_ALPHA_TEST_FUNC_ALWAYS 0x00000207
+
+#define NVC0_3D_DRAW_TFB_STRIDE 0x00001318
+#define NVC0_3D_DRAW_TFB_STRIDE__MIN 0x00000001
+#define NVC0_3D_DRAW_TFB_STRIDE__MAX 0x00000fff
+
+#define NVC0_3D_BLEND_COLOR(i0) (0x0000131c + 0x4*(i0))
+#define NVC0_3D_BLEND_COLOR__ESIZE 0x00000004
+#define NVC0_3D_BLEND_COLOR__LEN 0x00000004
+
+#define NVC0_3D_TSC_FLUSH 0x00001330
+#define NVC0_3D_TSC_FLUSH_SPECIFIC 0x00000001
+#define NVC0_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVC0_3D_TSC_FLUSH_ENTRY__SHIFT 4
+
+#define NVC0_3D_TIC_FLUSH 0x00001334
+#define NVC0_3D_TIC_FLUSH_SPECIFIC 0x00000001
+#define NVC0_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVC0_3D_TIC_FLUSH_ENTRY__SHIFT 4
+
+#define NVC0_3D_TEX_CACHE_CTL 0x00001338
+#define NVC0_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030
+#define NVC0_3D_TEX_CACHE_CTL_UNK1__SHIFT 4
+
+#define NVC0_3D_BLEND_EQUATION_RGB 0x00001340
+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NVC0_3D_BLEND_EQUATION_RGB_MIN 0x00008007
+#define NVC0_3D_BLEND_EQUATION_RGB_MAX 0x00008008
+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVC0_3D_BLEND_FUNC_SRC_RGB 0x00001344
+
+#define NVC0_3D_BLEND_FUNC_DST_RGB 0x00001348
+
+#define NVC0_3D_BLEND_EQUATION_ALPHA 0x0000134c
+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
+#define NVC0_3D_BLEND_EQUATION_ALPHA_MIN 0x00008007
+#define NVC0_3D_BLEND_EQUATION_ALPHA_MAX 0x00008008
+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVC0_3D_BLEND_FUNC_SRC_ALPHA 0x00001350
+
+#define NVC0_3D_BLEND_FUNC_DST_ALPHA 0x00001358
+
+#define NVC0_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0))
+#define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004
+#define NVC0_3D_BLEND_ENABLE__LEN 0x00000008
+
+#define NVC0_3D_STENCIL_FRONT_ENABLE 0x00001380
+
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL 0x00001384
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL 0x00001388
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS 0x0000138c
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC 0x00001390
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207
+
+#define NVC0_3D_STENCIL_FRONT_FUNC_REF 0x00001394
+
+#define NVC0_3D_STENCIL_FRONT_MASK 0x00001398
+
+#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x0000139c
+
+#define NVC0_3D_DRAW_TFB_BASE 0x000013a4
+
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN 0x000013a8
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_0 0x00000001
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_1 0x00000010
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_2 0x00000100
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_3 0x00001000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_4 0x00010000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_5 0x00100000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_6 0x01000000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_7 0x10000000
+
+#define NVC0_3D_SCREEN_Y_CONTROL 0x000013ac
+#define NVC0_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001
+#define NVC0_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010
+
+#define NVC0_3D_LINE_WIDTH 0x000013b0
+
+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420
+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001
+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MAX 0x00000400
+
+#define NVC0_3D_VERTEX_ARRAY_FLUSH 0x0000142c
+
+#define NVC0_3D_VB_ELEMENT_BASE 0x00001434
+
+#define NVC0_3D_VB_INSTANCE_BASE 0x00001438
+
+#define NVC0_3D_CODE_CB_FLUSH 0x00001440
+
+#define NVC0_3D_CLIPID_HEIGHT 0x00001504
+#define NVC0_3D_CLIPID_HEIGHT__MAX 0x00002000
+
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE 0x00001510
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_0 0x00000001
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_1 0x00000002
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_2 0x00000004
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_3 0x00000008
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_4 0x00000010
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_5 0x00000020
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_6 0x00000040
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_7 0x00000080
+
+#define NVC0_3D_SAMPLECNT_ENABLE 0x00001514
+
+#define NVC0_3D_POINT_SIZE 0x00001518
+
+#define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520
+
+#define NVC0_3D_COUNTER_RESET 0x00001530
+#define NVC0_3D_COUNTER_RESET_SAMPLECNT 0x00000001
+#define NVC0_3D_COUNTER_RESET_UNK02 0x00000002
+#define NVC0_3D_COUNTER_RESET_UNK03 0x00000003
+#define NVC0_3D_COUNTER_RESET_UNK04 0x00000004
+#define NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES 0x00000010
+#define NVC0_3D_COUNTER_RESET_UNK11 0x00000011
+#define NVC0_3D_COUNTER_RESET_UNK12 0x00000012
+#define NVC0_3D_COUNTER_RESET_UNK13 0x00000013
+#define NVC0_3D_COUNTER_RESET_UNK15 0x00000015
+#define NVC0_3D_COUNTER_RESET_UNK16 0x00000016
+#define NVC0_3D_COUNTER_RESET_UNK17 0x00000017
+#define NVC0_3D_COUNTER_RESET_UNK18 0x00000018
+#define NVC0_3D_COUNTER_RESET_UNK1A 0x0000001a
+#define NVC0_3D_COUNTER_RESET_UNK1B 0x0000001b
+#define NVC0_3D_COUNTER_RESET_UNK1C 0x0000001c
+#define NVC0_3D_COUNTER_RESET_UNK1D 0x0000001d
+#define NVC0_3D_COUNTER_RESET_UNK1E 0x0000001e
+#define NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x0000001f
+
+#define NVC0_3D_MULTISAMPLE_ENABLE 0x00001534
+
+#define NVC0_3D_ZETA_ENABLE 0x00001538
+
+#define NVC0_3D_MULTISAMPLE_CTRL 0x0000153c
+#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE 0x00000001
+#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE 0x00000010
+
+#define NVC0_3D_COND_ADDRESS_HIGH 0x00001550
+
+#define NVC0_3D_COND_ADDRESS_LOW 0x00001554
+
+#define NVC0_3D_COND_MODE 0x00001558
+#define NVC0_3D_COND_MODE_NEVER 0x00000000
+#define NVC0_3D_COND_MODE_ALWAYS 0x00000001
+#define NVC0_3D_COND_MODE_RES_NON_ZERO 0x00000002
+#define NVC0_3D_COND_MODE_EQUAL 0x00000003
+#define NVC0_3D_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NVC0_3D_TSC_ADDRESS_HIGH 0x0000155c
+
+#define NVC0_3D_TSC_ADDRESS_LOW 0x00001560
+#define NVC0_3D_TSC_ADDRESS_LOW__ALIGN 0x00000020
+
+#define NVC0_3D_TSC_LIMIT 0x00001564
+#define NVC0_3D_TSC_LIMIT__MAX 0x00001fff
+
+#define NVC0_3D_POLYGON_OFFSET_FACTOR 0x0000156c
+
+#define NVC0_3D_LINE_SMOOTH_ENABLE 0x00001570
+
+#define NVC0_3D_TIC_ADDRESS_HIGH 0x00001574
+
+#define NVC0_3D_TIC_ADDRESS_LOW 0x00001578
+
+#define NVC0_3D_TIC_LIMIT 0x0000157c
+
+#define NVC0_3D_STENCIL_TWO_SIDE_ENABLE 0x00001594
+
+#define NVC0_3D_STENCIL_BACK_OP_FAIL 0x00001598
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_ZERO 0x00000000
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR 0x00001e02
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR 0x00001e03
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL 0x0000159c
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS 0x000015a0
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC 0x000015a4
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207
+
+#define NVC0_3D_CSAA_ENABLE 0x000015b4
+
+#define NVC0_3D_FRAMEBUFFER_SRGB 0x000015b8
+
+#define NVC0_3D_POLYGON_OFFSET_UNITS 0x000015bc
+
+#define NVC0_3D_GP_BUILTIN_RESULT_EN 0x000015cc
+#define NVC0_3D_GP_BUILTIN_RESULT_EN_LAYER 0x00010000
+
+#define NVC0_3D_MULTISAMPLE_MODE 0x000015d0
+#define NVC0_3D_MULTISAMPLE_MODE_1X 0x00000000
+#define NVC0_3D_MULTISAMPLE_MODE_2XMS 0x00000001
+#define NVC0_3D_MULTISAMPLE_MODE_4XMS 0x00000002
+#define NVC0_3D_MULTISAMPLE_MODE_8XMS 0x00000003
+#define NVC0_3D_MULTISAMPLE_MODE_4XMS_4XCS 0x00000008
+#define NVC0_3D_MULTISAMPLE_MODE_4XMS_12XCS 0x00000009
+#define NVC0_3D_MULTISAMPLE_MODE_8XMS_8XCS 0x0000000a
+
+#define NVC0_3D_VERTEX_BEGIN_D3D 0x000015d4
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT 0
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS 0x00000001
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES 0x00000002
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP 0x00000003
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES 0x00000004
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP 0x00000005
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY 0x0000000a
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
+#define NVC0_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT 0x10000000
+
+#define NVC0_3D_VERTEX_END_D3D 0x000015d8
+#define NVC0_3D_VERTEX_END_D3D_UNK0 0x00000001
+#define NVC0_3D_VERTEX_END_D3D_UNK1 0x00000002
+
+#define NVC0_3D_EDGEFLAG_ENABLE 0x000015e4
+
+#define NVC0_3D_VB_ELEMENT_U32 0x000015e8
+
+#define NVC0_3D_VB_ELEMENT_U16_SETUP 0x000015ec
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK 0xc0000000
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT 30
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK 0x3fffffff
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT 0
+
+#define NVC0_3D_VB_ELEMENT_U16 0x000015f0
+#define NVC0_3D_VB_ELEMENT_U16_I0__MASK 0x0000ffff
+#define NVC0_3D_VB_ELEMENT_U16_I0__SHIFT 0
+#define NVC0_3D_VB_ELEMENT_U16_I1__MASK 0xffff0000
+#define NVC0_3D_VB_ELEMENT_U16_I1__SHIFT 16
+
+#define NVC0_3D_VERTEX_BASE_HIGH 0x000015f4
+
+#define NVC0_3D_VERTEX_BASE_LOW 0x000015f8
+
+#define NVC0_3D_POINT_COORD_REPLACE 0x00001604
+#define NVC0_3D_POINT_COORD_REPLACE_BITS__MASK 0x00001fff
+#define NVC0_3D_POINT_COORD_REPLACE_BITS__SHIFT 0
+
+#define NVC0_3D_CODE_ADDRESS_HIGH 0x00001608
+
+#define NVC0_3D_CODE_ADDRESS_LOW 0x0000160c
+
+#define NVC0_3D_VERTEX_END_GL 0x00001614
+#define NVC0_3D_VERTEX_END_GL_UNK0 0x00000001
+#define NVC0_3D_VERTEX_END_GL_UNK1 0x00000002
+
+#define NVC0_3D_VERTEX_BEGIN_GL 0x00001618
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK 0x0fffffff
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT 0
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS 0x00000000
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES 0x00000001
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP 0x00000002
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP 0x00000003
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES 0x00000004
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP 0x00000005
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN 0x00000006
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS 0x00000007
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP 0x00000008
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON 0x00000009
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY 0x0000000a
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_PATCHES 0x0000000e
+#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x10000000
+
+#define NVC0_3D_VERTEX_DATA 0x00001640
+
+#define NVC0_3D_PRIM_RESTART_ENABLE 0x00001644
+
+#define NVC0_3D_PRIM_RESTART_INDEX 0x00001648
+
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN 0x0000164c
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID 0x00000001
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID 0x00000010
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID 0x00000100
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_UNK12 0x00001000
+
+#define NVC0_3D_POINT_SMOOTH_ENABLE 0x00001658
+
+#define NVC0_3D_POINT_RASTER_RULES 0x0000165c
+#define NVC0_3D_POINT_RASTER_RULES_OGL 0x00000000
+#define NVC0_3D_POINT_RASTER_RULES_D3D 0x00000001
+
+#define NVC0_3D_POINT_SPRITE_CTRL 0x00001660
+
+#define NVC0_3D_TEX_MISC 0x00001664
+#define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004
+
+#define NVC0_3D_LINE_STIPPLE_ENABLE 0x0000166c
+
+#define NVC0_3D_LINE_STIPPLE_PATTERN 0x00001680
+
+#define NVC0_3D_PROVOKING_VERTEX_LAST 0x00001684
+
+#define NVC0_3D_VERTEX_TWO_SIDE_ENABLE 0x00001688
+
+#define NVC0_3D_POLYGON_STIPPLE_ENABLE 0x0000168c
+
+#define NVC0_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001700 + 0x4*(i0))
+#define NVC0_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004
+#define NVC0_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020
+
+#define NVC0_3D_STRMOUT_UNK1780(i0) (0x00001780 + 0x4*(i0))
+#define NVC0_3D_STRMOUT_UNK1780__ESIZE 0x00000004
+#define NVC0_3D_STRMOUT_UNK1780__LEN 0x00000004
+
+#define NVC0_3D_UNK17BC_ADDRESS_HIGH 0x000017bc
+
+#define NVC0_3D_UNK17BC_ADDRESS_LOW 0x000017c0
+
+#define NVC0_3D_UNK17BC_LIMIT 0x000017c4
+
+#define NVC0_3D_INDEX_ARRAY_START_HIGH 0x000017c8
+
+#define NVC0_3D_INDEX_ARRAY_START_LOW 0x000017cc
+
+#define NVC0_3D_INDEX_ARRAY_LIMIT_HIGH 0x000017d0
+
+#define NVC0_3D_INDEX_ARRAY_LIMIT_LOW 0x000017d4
+
+#define NVC0_3D_INDEX_LOG2_SIZE 0x000017d8
+
+#define NVC0_3D_INDEX_BATCH_FIRST 0x000017dc
+
+#define NVC0_3D_INDEX_BATCH_COUNT 0x000017e0
+
+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(i0) (0x00001880 + 0x4*(i0))
+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE 0x00000004
+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__LEN 0x00000020
+
+#define NVC0_3D_VP_POINT_SIZE_EN 0x00001910
+
+#define NVC0_3D_CULL_FACE_ENABLE 0x00001918
+
+#define NVC0_3D_FRONT_FACE 0x0000191c
+#define NVC0_3D_FRONT_FACE_CW 0x00000900
+#define NVC0_3D_FRONT_FACE_CCW 0x00000901
+
+#define NVC0_3D_CULL_FACE 0x00001920
+#define NVC0_3D_CULL_FACE_FRONT 0x00000404
+#define NVC0_3D_CULL_FACE_BACK 0x00000405
+#define NVC0_3D_CULL_FACE_FRONT_AND_BACK 0x00000408
+
+#define NVC0_3D_VIEWPORT_TRANSFORM_EN 0x0000192c
+
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1 0x00000002
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2 0x00000004
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK3 0x00000008
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK4 0x00000010
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12 0x00001000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK13 0x00002000
+
+#define NVC0_3D_VIEWPORT_CLIP_RECTS_EN 0x0000194c
+
+#define NVC0_3D_VIEWPORT_CLIP_MODE 0x00001950
+#define NVC0_3D_VIEWPORT_CLIP_MODE_INSIDE_ANY 0x00000000
+#define NVC0_3D_VIEWPORT_CLIP_MODE_OUTSIDE_ALL 0x00000001
+#define NVC0_3D_VIEWPORT_CLIP_MODE_NEVER 0x00000002
+
+#define NVC0_3D_FP_ZORDER_CTRL 0x0000196c
+#define NVC0_3D_FP_ZORDER_CTRL_0 0x00000001
+#define NVC0_3D_FP_ZORDER_CTRL_1 0x00000010
+
+#define NVC0_3D_CLIPID_ENABLE 0x0000197c
+
+#define NVC0_3D_CLIPID_WIDTH 0x00001980
+#define NVC0_3D_CLIPID_WIDTH__MAX 0x00002000
+#define NVC0_3D_CLIPID_WIDTH__ALIGN 0x00000040
+
+#define NVC0_3D_CLIPID_ID 0x00001984
+
+#define NVC0_3D_FP_CONTROL 0x000019a8
+#define NVC0_3D_FP_CONTROL_MULTIPLE_RESULTS 0x00000001
+#define NVC0_3D_FP_CONTROL_EXPORTS_Z 0x00000100
+#define NVC0_3D_FP_CONTROL_USES_KIL 0x00100000
+
+#define NVC0_3D_DEPTH_BOUNDS_EN 0x000019bc
+
+#define NVC0_3D_LOGIC_OP_ENABLE 0x000019c4
+
+#define NVC0_3D_LOGIC_OP 0x000019c8
+#define NVC0_3D_LOGIC_OP_CLEAR 0x00001500
+#define NVC0_3D_LOGIC_OP_AND 0x00001501
+#define NVC0_3D_LOGIC_OP_AND_REVERSE 0x00001502
+#define NVC0_3D_LOGIC_OP_COPY 0x00001503
+#define NVC0_3D_LOGIC_OP_AND_INVERTED 0x00001504
+#define NVC0_3D_LOGIC_OP_NOOP 0x00001505
+#define NVC0_3D_LOGIC_OP_XOR 0x00001506
+#define NVC0_3D_LOGIC_OP_OR 0x00001507
+#define NVC0_3D_LOGIC_OP_NOR 0x00001508
+#define NVC0_3D_LOGIC_OP_EQUIV 0x00001509
+#define NVC0_3D_LOGIC_OP_INVERT 0x0000150a
+#define NVC0_3D_LOGIC_OP_OR_REVERSE 0x0000150b
+#define NVC0_3D_LOGIC_OP_COPY_INVERTED 0x0000150c
+#define NVC0_3D_LOGIC_OP_OR_INVERTED 0x0000150d
+#define NVC0_3D_LOGIC_OP_NAND 0x0000150e
+#define NVC0_3D_LOGIC_OP_SET 0x0000150f
+
+#define NVC0_3D_CLEAR_BUFFERS 0x000019d0
+#define NVC0_3D_CLEAR_BUFFERS_Z 0x00000001
+#define NVC0_3D_CLEAR_BUFFERS_S 0x00000002
+#define NVC0_3D_CLEAR_BUFFERS_R 0x00000004
+#define NVC0_3D_CLEAR_BUFFERS_G 0x00000008
+#define NVC0_3D_CLEAR_BUFFERS_B 0x00000010
+#define NVC0_3D_CLEAR_BUFFERS_A 0x00000020
+#define NVC0_3D_CLEAR_BUFFERS_RT__MASK 0x000003c0
+#define NVC0_3D_CLEAR_BUFFERS_RT__SHIFT 6
+#define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00
+#define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT 10
+
+#define NVC0_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0))
+#define NVC0_3D_COLOR_MASK__ESIZE 0x00000004
+#define NVC0_3D_COLOR_MASK__LEN 0x00000008
+#define NVC0_3D_COLOR_MASK_R 0x0000000f
+#define NVC0_3D_COLOR_MASK_G 0x000000f0
+#define NVC0_3D_COLOR_MASK_B 0x00000f00
+#define NVC0_3D_COLOR_MASK_A 0x0000f000
+
+#define NVC0_3D_QUERY_ADDRESS_HIGH 0x00001b00
+
+#define NVC0_3D_QUERY_ADDRESS_LOW 0x00001b04
+
+#define NVC0_3D_QUERY_SEQUENCE 0x00001b08
+
+#define NVC0_3D_QUERY_GET 0x00001b0c
+#define NVC0_3D_QUERY_GET_MODE__MASK 0x00000003
+#define NVC0_3D_QUERY_GET_MODE__SHIFT 0
+#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000
+#define NVC0_3D_QUERY_GET_MODE_SYNC 0x00000001
+#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002
+#define NVC0_3D_QUERY_GET_FENCE 0x00000010
+#define NVC0_3D_QUERY_GET_STREAM__MASK 0x000000e0
+#define NVC0_3D_QUERY_GET_STREAM__SHIFT 5
+#define NVC0_3D_QUERY_GET_UNK8 0x00000100
+#define NVC0_3D_QUERY_GET_UNIT__MASK 0x0000f000
+#define NVC0_3D_QUERY_GET_UNIT__SHIFT 12
+#define NVC0_3D_QUERY_GET_SYNC_COND__MASK 0x00010000
+#define NVC0_3D_QUERY_GET_SYNC_COND__SHIFT 16
+#define NVC0_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000
+#define NVC0_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000
+#define NVC0_3D_QUERY_GET_INTR 0x00100000
+#define NVC0_3D_QUERY_GET_UNK21 0x00200000
+#define NVC0_3D_QUERY_GET_SELECT__MASK 0x0f800000
+#define NVC0_3D_QUERY_GET_SELECT__SHIFT 23
+#define NVC0_3D_QUERY_GET_SELECT_ZERO 0x00000000
+#define NVC0_3D_QUERY_GET_SELECT_SAMPLECNT 0x01000000
+#define NVC0_3D_QUERY_GET_SELECT_EMITTED_PRIMS 0x05800000
+#define NVC0_3D_QUERY_GET_SELECT_GENERATED_PRIMS 0x09000000
+#define NVC0_3D_QUERY_GET_SHORT 0x10000000
+
+#define NVC0_3D_VERTEX_ARRAY_FETCH(i0) (0x00001c00 + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010
+#define NVC0_3D_VERTEX_ARRAY_FETCH__LEN 0x00000020
+#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK 0x00000fff
+#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT 0
+#define NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE 0x00001000
+
+#define NVC0_3D_VERTEX_ARRAY_START_HIGH(i0) (0x00001c04 + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_START_HIGH__ESIZE 0x00000010
+#define NVC0_3D_VERTEX_ARRAY_START_HIGH__LEN 0x00000020
+
+#define NVC0_3D_VERTEX_ARRAY_START_LOW(i0) (0x00001c08 + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_START_LOW__ESIZE 0x00000010
+#define NVC0_3D_VERTEX_ARRAY_START_LOW__LEN 0x00000020
+
+#define NVC0_3D_VERTEX_ARRAY_DIVISOR(i0) (0x00001c0c + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_DIVISOR__ESIZE 0x00000010
+#define NVC0_3D_VERTEX_ARRAY_DIVISOR__LEN 0x00000020
+
+#define NVC0_3D_IBLEND(i0) (0x00001e00 + 0x20*(i0))
+#define NVC0_3D_IBLEND__ESIZE 0x00000020
+#define NVC0_3D_IBLEND__LEN 0x00000008
+
+#define NVC0_3D_IBLEND_EQUATION_RGB(i0) (0x00001e04 + 0x20*(i0))
+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NVC0_3D_IBLEND_EQUATION_RGB_MIN 0x00008007
+#define NVC0_3D_IBLEND_EQUATION_RGB_MAX 0x00008008
+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVC0_3D_IBLEND_FUNC_SRC_RGB(i0) (0x00001e08 + 0x20*(i0))
+
+#define NVC0_3D_IBLEND_FUNC_DST_RGB(i0) (0x00001e0c + 0x20*(i0))
+
+#define NVC0_3D_IBLEND_EQUATION_ALPHA(i0) (0x00001e10 + 0x20*(i0))
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_MIN 0x00008007
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_MAX 0x00008008
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVC0_3D_IBLEND_FUNC_SRC_ALPHA(i0) (0x00001e14 + 0x20*(i0))
+
+#define NVC0_3D_IBLEND_FUNC_DST_ALPHA(i0) (0x00001e18 + 0x20*(i0))
+
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00001f00 + 0x8*(i0))
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__ESIZE 0x00000008
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__LEN 0x00000020
+
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00001f04 + 0x8*(i0))
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__ESIZE 0x00000008
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__LEN 0x00000020
+
+#define NVC0_3D_SP(i0) (0x00002000 + 0x40*(i0))
+#define NVC0_3D_SP__ESIZE 0x00000040
+#define NVC0_3D_SP__LEN 0x00000006
+
+#define NVC0_3D_SP_SELECT(i0) (0x00002000 + 0x40*(i0))
+#define NVC0_3D_SP_SELECT_ENABLE 0x00000001
+#define NVC0_3D_SP_SELECT_PROGRAM__MASK 0x00000070
+#define NVC0_3D_SP_SELECT_PROGRAM__SHIFT 4
+#define NVC0_3D_SP_SELECT_PROGRAM_VP_A 0x00000000
+#define NVC0_3D_SP_SELECT_PROGRAM_VP_B 0x00000010
+#define NVC0_3D_SP_SELECT_PROGRAM_TCP 0x00000020
+#define NVC0_3D_SP_SELECT_PROGRAM_TEP 0x00000030
+#define NVC0_3D_SP_SELECT_PROGRAM_GP 0x00000040
+#define NVC0_3D_SP_SELECT_PROGRAM_FP 0x00000050
+
+#define NVC0_3D_SP_START_ID(i0) (0x00002004 + 0x40*(i0))
+
+#define NVC0_3D_SP_GPR_ALLOC(i0) (0x0000200c + 0x40*(i0))
+
+#define NVC0_3D_TEX_LIMITS(i0) (0x00002200 + 0x10*(i0))
+#define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010
+#define NVC0_3D_TEX_LIMITS__LEN 0x00000005
+
+#define NVC0_3D_FIRMWARE(i0) (0x00002300 + 0x4*(i0))
+#define NVC0_3D_FIRMWARE__ESIZE 0x00000004
+#define NVC0_3D_FIRMWARE__LEN 0x00000020
+
+#define NVC0_3D_CB_SIZE 0x00002380
+
+#define NVC0_3D_CB_ADDRESS_HIGH 0x00002384
+
+#define NVC0_3D_CB_ADDRESS_LOW 0x00002388
+
+#define NVC0_3D_CB_POS 0x0000238c
+
+#define NVC0_3D_CB_DATA(i0) (0x00002390 + 0x4*(i0))
+#define NVC0_3D_CB_DATA__ESIZE 0x00000004
+#define NVC0_3D_CB_DATA__LEN 0x00000010
+
+#define NVC0_3D_BIND_TSC(i0) (0x00002400 + 0x20*(i0))
+#define NVC0_3D_BIND_TSC__ESIZE 0x00000020
+#define NVC0_3D_BIND_TSC__LEN 0x00000005
+#define NVC0_3D_BIND_TSC_ACTIVE 0x00000001
+#define NVC0_3D_BIND_TSC_SAMPLER__MASK 0x00000ff0
+#define NVC0_3D_BIND_TSC_SAMPLER__SHIFT 4
+#define NVC0_3D_BIND_TSC_TSC__MASK 0x01fff000
+#define NVC0_3D_BIND_TSC_TSC__SHIFT 12
+
+#define NVC0_3D_BIND_TIC(i0) (0x00002404 + 0x20*(i0))
+#define NVC0_3D_BIND_TIC__ESIZE 0x00000020
+#define NVC0_3D_BIND_TIC__LEN 0x00000005
+#define NVC0_3D_BIND_TIC_ACTIVE 0x00000001
+#define NVC0_3D_BIND_TIC_TEXTURE__MASK 0x000001fe
+#define NVC0_3D_BIND_TIC_TEXTURE__SHIFT 1
+#define NVC0_3D_BIND_TIC_TIC__MASK 0x7ffffe00
+#define NVC0_3D_BIND_TIC_TIC__SHIFT 9
+
+#define NVC0_3D_CB_BIND(i0) (0x00002410 + 0x20*(i0))
+#define NVC0_3D_CB_BIND__ESIZE 0x00000020
+#define NVC0_3D_CB_BIND__LEN 0x00000005
+#define NVC0_3D_CB_BIND_VALID 0x00000001
+#define NVC0_3D_CB_BIND_INDEX__MASK 0x000000f0
+#define NVC0_3D_CB_BIND_INDEX__SHIFT 4
+
+#define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600
+
+#define NVC0_3D_TFB_VARYING_LOCS(i0) (0x00002800 + 0x4*(i0))
+#define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004
+#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000080
+
+#define NVC0_3D_COLOR_MASK_BROADCAST 0x00003808
+
+#define NVC0_3D_VERTEX_ARRAY_SELECT 0x00003820
+
+#define NVC0_3D_BLEND_ENABLES 0x00003858
+
+#define NVC0_3D_POLYGON_MODE_FRONT 0x00003868
+#define NVC0_3D_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NVC0_3D_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NVC0_3D_POLYGON_MODE_FRONT_FILL 0x00001b02
+
+#define NVC0_3D_POLYGON_MODE_BACK 0x00003870
+#define NVC0_3D_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NVC0_3D_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NVC0_3D_POLYGON_MODE_BACK_FILL 0x00001b02
+
+#define NVC0_3D_GP_SELECT 0x00003878
+
+#define NVC0_3D_TEP_SELECT 0x00003880
+
+
+#endif /* NVC0_3D_XML */
diff --git a/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h b/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h
new file mode 100644
index 0000000000..84b152213a
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h
@@ -0,0 +1,98 @@
+#ifndef NV_3DDEFS_XML
+#define NV_3DDEFS_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_3d.xml ( 26312 bytes, from 2010-10-08 10:10:01)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+- nv_3ddefs.xml ( 16397 bytes, from 2010-10-08 13:30:38)
+- nv_object.xml ( 11249 bytes, from 2010-10-07 15:31:28)
+- nvchipsets.xml ( 2824 bytes, from 2010-07-07 13:41:20)
+- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_3D_BLEND_FACTOR_ZERO 0x00004000
+#define NV50_3D_BLEND_FACTOR_ONE 0x00004001
+#define NV50_3D_BLEND_FACTOR_SRC_COLOR 0x00004300
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x00004301
+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA 0x00004302
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NV50_3D_BLEND_FACTOR_DST_ALPHA 0x00004304
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x00004305
+#define NV50_3D_BLEND_FACTOR_DST_COLOR 0x00004306
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x00004307
+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE 0x00004308
+#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR 0x0000c001
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA 0x0000c003
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NV50_3D_BLEND_FACTOR_SRC1_COLOR 0x0000c900
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA 0x0000c902
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA 0x0000c903
+
+#endif /* NV_3DDEFS_XML */
diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c
new file mode 100644
index 0000000000..ea3e642a44
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_buffer.c
@@ -0,0 +1,489 @@
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#define NOUVEAU_NVC0
+#include "nouveau/nouveau_screen.h"
+#include "nouveau/nouveau_winsys.h"
+#undef NOUVEAU_NVC0
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+
+struct nvc0_transfer {
+ struct pipe_transfer base;
+};
+
+static INLINE struct nvc0_transfer *
+nvc0_transfer(struct pipe_transfer *transfer)
+{
+ return (struct nvc0_transfer *)transfer;
+}
+
+static INLINE boolean
+nvc0_buffer_allocate(struct nvc0_screen *screen, struct nvc0_resource *buf,
+ unsigned domain)
+{
+ if (domain == NOUVEAU_BO_VRAM) {
+ buf->mm = nvc0_mm_allocate(screen->mm_VRAM, buf->base.width0, &buf->bo,
+ &buf->offset);
+ if (!buf->bo)
+ return nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_GART);
+ } else
+ if (domain == NOUVEAU_BO_GART) {
+ buf->mm = nvc0_mm_allocate(screen->mm_GART, buf->base.width0, &buf->bo,
+ &buf->offset);
+ if (!buf->bo)
+ return FALSE;
+ }
+ if (domain != NOUVEAU_BO_GART) {
+ if (!buf->data) {
+ buf->data = MALLOC(buf->base.width0);
+ if (!buf->data)
+ return FALSE;
+ }
+ }
+ buf->domain = domain;
+ return TRUE;
+}
+
+static INLINE void
+release_allocation(struct nvc0_mm_allocation **mm, struct nvc0_fence *fence)
+{
+ if (fence && fence->state != NVC0_FENCE_STATE_SIGNALLED) {
+ nvc0_fence_sched_release(fence, *mm);
+ } else {
+ nvc0_mm_free(*mm);
+ }
+ (*mm) = NULL;
+}
+
+static INLINE boolean
+nvc0_buffer_reallocate(struct nvc0_screen *screen, struct nvc0_resource *buf,
+ unsigned domain)
+{
+ nouveau_bo_ref(NULL, &buf->bo);
+
+ if (buf->mm)
+ release_allocation(&buf->mm, buf->fence);
+
+ return nvc0_buffer_allocate(screen, buf, domain);
+}
+
+static void
+nvc0_buffer_destroy(struct pipe_screen *pscreen,
+ struct pipe_resource *presource)
+{
+ struct nvc0_resource *res = nvc0_resource(presource);
+
+ nouveau_bo_ref(NULL, &res->bo);
+
+ if (res->mm)
+ release_allocation(&res->mm, res->fence);
+
+ if (res->data && !(res->status & NVC0_BUFFER_STATUS_USER_MEMORY))
+ FREE(res->data);
+
+ FREE(res);
+}
+
+/* Maybe just migrate to GART right away if we actually need to do this. */
+boolean
+nvc0_buffer_download(struct nvc0_context *nvc0, struct nvc0_resource *buf,
+ unsigned start, unsigned size)
+{
+ struct nvc0_mm_allocation *mm;
+ struct nouveau_bo *bounce = NULL;
+ uint32_t offset;
+
+ assert(buf->domain == NOUVEAU_BO_VRAM);
+
+ mm = nvc0_mm_allocate(nvc0->screen->mm_GART, size, &bounce, &offset);
+ if (!bounce)
+ return FALSE;
+
+ nvc0_m2mf_copy_linear(nvc0, bounce, offset, NOUVEAU_BO_GART,
+ buf->bo, buf->offset + start, NOUVEAU_BO_VRAM,
+ size);
+
+ if (nouveau_bo_map_range(bounce, offset, size, NOUVEAU_BO_RD))
+ return FALSE;
+ memcpy(buf->data + start, bounce->map, size);
+ nouveau_bo_unmap(bounce);
+
+ buf->status &= ~NVC0_BUFFER_STATUS_DIRTY;
+
+ nouveau_bo_ref(NULL, &bounce);
+ if (mm)
+ nvc0_mm_free(mm);
+ return TRUE;
+}
+
+static boolean
+nvc0_buffer_upload(struct nvc0_context *nvc0, struct nvc0_resource *buf,
+ unsigned start, unsigned size)
+{
+ struct nvc0_mm_allocation *mm;
+ struct nouveau_bo *bounce = NULL;
+ uint32_t offset;
+
+ if (size <= 192) {
+ nvc0_m2mf_push_linear(nvc0, buf->bo, buf->domain, buf->offset + start,
+ size, buf->data + start);
+ return TRUE;
+ }
+
+ mm = nvc0_mm_allocate(nvc0->screen->mm_GART, size, &bounce, &offset);
+ if (!bounce)
+ return FALSE;
+
+ nouveau_bo_map_range(bounce, offset, size,
+ NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC);
+ memcpy(bounce->map, buf->data + start, size);
+ nouveau_bo_unmap(bounce);
+
+ nvc0_m2mf_copy_linear(nvc0, buf->bo, buf->offset + start, NOUVEAU_BO_VRAM,
+ bounce, offset, NOUVEAU_BO_GART, size);
+
+ nouveau_bo_ref(NULL, &bounce);
+ if (mm)
+ release_allocation(&mm, nvc0->screen->fence.current);
+
+ if (start == 0 && size == buf->base.width0)
+ buf->status &= ~NVC0_BUFFER_STATUS_DIRTY;
+ return TRUE;
+}
+
+static struct pipe_transfer *
+nvc0_buffer_transfer_get(struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
+{
+ struct nvc0_resource *buf = nvc0_resource(resource);
+ struct nvc0_transfer *xfr = CALLOC_STRUCT(nvc0_transfer);
+ if (!xfr)
+ return NULL;
+
+ xfr->base.resource = resource;
+ xfr->base.box.x = box->x;
+ xfr->base.box.width = box->width;
+ xfr->base.usage = usage;
+
+ if (buf->domain == NOUVEAU_BO_VRAM) {
+ if (usage & PIPE_TRANSFER_READ) {
+ if (buf->status & NVC0_BUFFER_STATUS_DIRTY)
+ nvc0_buffer_download(nvc0_context(pipe), buf, 0, buf->base.width0);
+ }
+ }
+
+ return &xfr->base;
+}
+
+static void
+nvc0_buffer_transfer_destroy(struct pipe_context *pipe,
+ struct pipe_transfer *transfer)
+{
+ struct nvc0_resource *buf = nvc0_resource(transfer->resource);
+ struct nvc0_transfer *xfr = nvc0_transfer(transfer);
+
+ if (xfr->base.usage & PIPE_TRANSFER_WRITE) {
+ /* writing is worse */
+ nvc0_buffer_adjust_score(nvc0_context(pipe), buf, -5000);
+
+ if (buf->domain == NOUVEAU_BO_VRAM) {
+ nvc0_buffer_upload(nvc0_context(pipe), buf,
+ transfer->box.x, transfer->box.width);
+ }
+
+ if (buf->domain != 0 && (buf->base.bind & (PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_INDEX_BUFFER)))
+ nvc0_context(pipe)->vbo_dirty = TRUE;
+ }
+
+ FREE(xfr);
+}
+
+static INLINE boolean
+nvc0_buffer_sync(struct nvc0_resource *buf, unsigned rw)
+{
+ if (rw == PIPE_TRANSFER_READ) {
+ if (!buf->fence_wr)
+ return TRUE;
+ if (!nvc0_fence_wait(buf->fence_wr))
+ return FALSE;
+ } else {
+ if (!buf->fence)
+ return TRUE;
+ if (!nvc0_fence_wait(buf->fence))
+ return FALSE;
+
+ nvc0_fence_reference(&buf->fence, NULL);
+ }
+ nvc0_fence_reference(&buf->fence_wr, NULL);
+
+ return TRUE;
+}
+
+static INLINE boolean
+nvc0_buffer_busy(struct nvc0_resource *buf, unsigned rw)
+{
+ if (rw == PIPE_TRANSFER_READ)
+ return (buf->fence_wr && !nvc0_fence_signalled(buf->fence_wr));
+ else
+ return (buf->fence && !nvc0_fence_signalled(buf->fence));
+}
+
+static void *
+nvc0_buffer_transfer_map(struct pipe_context *pipe,
+ struct pipe_transfer *transfer)
+{
+ struct nvc0_transfer *xfr = nvc0_transfer(transfer);
+ struct nvc0_resource *buf = nvc0_resource(transfer->resource);
+ struct nouveau_bo *bo = buf->bo;
+ uint8_t *map;
+ int ret;
+ uint32_t offset = xfr->base.box.x;
+ uint32_t flags;
+
+ nvc0_buffer_adjust_score(nvc0_context(pipe), buf, -250);
+
+ if (buf->domain != NOUVEAU_BO_GART)
+ return buf->data + offset;
+
+ if (buf->mm)
+ flags = NOUVEAU_BO_NOSYNC | NOUVEAU_BO_RDWR;
+ else
+ flags = nouveau_screen_transfer_flags(xfr->base.usage);
+
+ offset += buf->offset;
+
+ ret = nouveau_bo_map_range(buf->bo, offset, xfr->base.box.width, flags);
+ if (ret)
+ return NULL;
+ map = bo->map;
+
+ /* Unmap right now. Since multiple buffers can share a single nouveau_bo,
+ * not doing so might make future maps fail or trigger "reloc while mapped"
+ * errors. For now, mappings to userspace are guaranteed to be persistent.
+ */
+ nouveau_bo_unmap(bo);
+
+ if (buf->mm) {
+ if (xfr->base.usage & PIPE_TRANSFER_DONTBLOCK) {
+ if (nvc0_buffer_busy(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE))
+ return NULL;
+ } else
+ if (!(xfr->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+ nvc0_buffer_sync(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE);
+ }
+ }
+ return map;
+}
+
+
+
+static void
+nvc0_buffer_transfer_flush_region(struct pipe_context *pipe,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box)
+{
+ struct nvc0_resource *res = nvc0_resource(transfer->resource);
+ struct nouveau_bo *bo = res->bo;
+ unsigned offset = res->offset + transfer->box.x + box->x;
+
+ /* not using non-snoop system memory yet, no need for cflush */
+ if (1)
+ return;
+
+ /* XXX: maybe need to upload for VRAM buffers here */
+
+ nouveau_screen_bo_map_flush_range(pipe->screen, bo, offset, box->width);
+}
+
+static void
+nvc0_buffer_transfer_unmap(struct pipe_context *pipe,
+ struct pipe_transfer *transfer)
+{
+ /* we've called nouveau_bo_unmap right after map */
+}
+
+const struct u_resource_vtbl nvc0_buffer_vtbl =
+{
+ u_default_resource_get_handle, /* get_handle */
+ nvc0_buffer_destroy, /* resource_destroy */
+ NULL, /* is_resource_referenced */
+ nvc0_buffer_transfer_get, /* get_transfer */
+ nvc0_buffer_transfer_destroy, /* transfer_destroy */
+ nvc0_buffer_transfer_map, /* transfer_map */
+ nvc0_buffer_transfer_flush_region, /* transfer_flush_region */
+ nvc0_buffer_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
+};
+
+struct pipe_resource *
+nvc0_buffer_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ)
+{
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+ struct nvc0_resource *buffer;
+ boolean ret;
+
+ buffer = CALLOC_STRUCT(nvc0_resource);
+ if (!buffer)
+ return NULL;
+
+ buffer->base = *templ;
+ buffer->vtbl = &nvc0_buffer_vtbl;
+ pipe_reference_init(&buffer->base.reference, 1);
+ buffer->base.screen = pscreen;
+
+ if (buffer->base.bind & PIPE_BIND_CONSTANT_BUFFER)
+ ret = nvc0_buffer_allocate(screen, buffer, 0);
+ else
+ ret = nvc0_buffer_allocate(screen, buffer, NOUVEAU_BO_GART);
+
+ if (ret == FALSE)
+ goto fail;
+
+ return &buffer->base;
+
+fail:
+ FREE(buffer);
+ return NULL;
+}
+
+
+struct pipe_resource *
+nvc0_user_buffer_create(struct pipe_screen *pscreen,
+ void *ptr,
+ unsigned bytes,
+ unsigned bind)
+{
+ struct nvc0_resource *buffer;
+
+ buffer = CALLOC_STRUCT(nvc0_resource);
+ if (!buffer)
+ return NULL;
+
+ pipe_reference_init(&buffer->base.reference, 1);
+ buffer->vtbl = &nvc0_buffer_vtbl;
+ buffer->base.screen = pscreen;
+ buffer->base.format = PIPE_FORMAT_R8_UNORM;
+ buffer->base.usage = PIPE_USAGE_IMMUTABLE;
+ buffer->base.bind = bind;
+ buffer->base.width0 = bytes;
+ buffer->base.height0 = 1;
+ buffer->base.depth0 = 1;
+
+ buffer->data = ptr;
+ buffer->status = NVC0_BUFFER_STATUS_USER_MEMORY;
+
+ return &buffer->base;
+}
+
+/* Like download, but for GART buffers. Merge ? */
+static INLINE boolean
+nvc0_buffer_data_fetch(struct nvc0_resource *buf,
+ struct nouveau_bo *bo, unsigned offset, unsigned size)
+{
+ if (!buf->data) {
+ buf->data = MALLOC(size);
+ if (!buf->data)
+ return FALSE;
+ }
+ if (nouveau_bo_map_range(bo, offset, size, NOUVEAU_BO_RD))
+ return FALSE;
+ memcpy(buf->data, bo->map, size);
+ nouveau_bo_unmap(bo);
+
+ return TRUE;
+}
+
+/* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */
+boolean
+nvc0_buffer_migrate(struct nvc0_context *nvc0,
+ struct nvc0_resource *buf, const unsigned new_domain)
+{
+ struct nvc0_screen *screen = nvc0_screen(buf->base.screen);
+ struct nouveau_bo *bo;
+ const unsigned old_domain = buf->domain;
+ unsigned size = buf->base.width0;
+ unsigned offset;
+ int ret;
+
+ assert(new_domain != old_domain);
+
+ if (new_domain == NOUVEAU_BO_GART && old_domain == 0) {
+ if (!nvc0_buffer_allocate(screen, buf, new_domain))
+ return FALSE;
+ ret = nouveau_bo_map_range(buf->bo, buf->offset, size, NOUVEAU_BO_WR |
+ NOUVEAU_BO_NOSYNC);
+ if (ret)
+ return ret;
+ memcpy(buf->bo->map, buf->data, size);
+ nouveau_bo_unmap(buf->bo);
+ FREE(buf->data);
+ } else
+ if (old_domain != 0 && new_domain != 0) {
+ struct nvc0_mm_allocation *mm = buf->mm;
+
+ if (new_domain == NOUVEAU_BO_VRAM) {
+ /* keep a system memory copy of our data in case we hit a fallback */
+ if (!nvc0_buffer_data_fetch(buf, buf->bo, buf->offset, size))
+ return FALSE;
+ debug_printf("migrating %u KiB to VRAM\n", size / 1024);
+ }
+
+ offset = buf->offset;
+ bo = buf->bo;
+ buf->bo = NULL;
+ buf->mm = NULL;
+ nvc0_buffer_allocate(screen, buf, new_domain);
+
+ nvc0_m2mf_copy_linear(nvc0, buf->bo, buf->offset, new_domain,
+ bo, offset, old_domain, buf->base.width0);
+
+ nouveau_bo_ref(NULL, &bo);
+ if (mm)
+ release_allocation(&mm, screen->fence.current);
+ } else
+ if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) {
+ if (!nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM))
+ return FALSE;
+ if (!nvc0_buffer_upload(nvc0, buf, 0, buf->base.width0))
+ return FALSE;
+ } else
+ return FALSE;
+
+ assert(buf->domain == new_domain);
+ return TRUE;
+}
+
+/* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART.
+ * We'd like to only allocate @size bytes here, but then we'd have to rebase
+ * the vertex indices ...
+ */
+boolean
+nvc0_user_buffer_upload(struct nvc0_resource *buf, unsigned base, unsigned size)
+{
+ struct nvc0_screen *screen = nvc0_screen(buf->base.screen);
+ int ret;
+
+ assert(buf->status & NVC0_BUFFER_STATUS_USER_MEMORY);
+
+ buf->base.width0 = base + size;
+ if (!nvc0_buffer_reallocate(screen, buf, NOUVEAU_BO_GART))
+ return FALSE;
+
+ ret = nouveau_bo_map_range(buf->bo, buf->offset + base, size,
+ NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC);
+ if (ret)
+ return FALSE;
+ memcpy(buf->bo->map, buf->data + base, size);
+ nouveau_bo_unmap(buf->bo);
+
+ return TRUE;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
new file mode 100644
index 0000000000..2118abb5d5
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_context.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+
+#include "nvc0_context.h"
+#include "nvc0_screen.h"
+#include "nvc0_resource.h"
+
+#include "nouveau/nouveau_reloc.h"
+
+static void
+nvc0_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+ if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
+ BEGIN_RING(chan, RING_3D(SERIALIZE), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1);
+ OUT_RING (chan, 0x00);
+ }
+
+ if (fence) {
+ nvc0_screen_fence_new(nvc0->screen, (struct nvc0_fence **)fence, TRUE);
+ }
+
+ if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) {
+ FIRE_RING(chan);
+
+ nvc0_screen_fence_next(nvc0->screen);
+ }
+}
+
+static void
+nvc0_destroy(struct pipe_context *pipe)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ draw_destroy(nvc0->draw);
+
+ if (nvc0->screen->cur_ctx == nvc0)
+ nvc0->screen->cur_ctx = NULL;
+
+ FREE(nvc0);
+}
+
+struct pipe_context *
+nvc0_create(struct pipe_screen *pscreen, void *priv)
+{
+ struct pipe_winsys *pipe_winsys = pscreen->winsys;
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+ struct nvc0_context *nvc0;
+
+ nvc0 = CALLOC_STRUCT(nvc0_context);
+ if (!nvc0)
+ return NULL;
+ nvc0->screen = screen;
+
+ nvc0->pipe.winsys = pipe_winsys;
+ nvc0->pipe.screen = pscreen;
+ nvc0->pipe.priv = priv;
+
+ nvc0->pipe.destroy = nvc0_destroy;
+
+ nvc0->pipe.draw_vbo = nvc0_draw_vbo;
+ nvc0->pipe.clear = nvc0_clear;
+
+ nvc0->pipe.flush = nvc0_flush;
+
+ screen->base.channel->user_private = nvc0;
+
+ nvc0_init_query_functions(nvc0);
+ nvc0_init_surface_functions(nvc0);
+ nvc0_init_state_functions(nvc0);
+ nvc0_init_resource_functions(&nvc0->pipe);
+
+ nvc0->draw = draw_create(&nvc0->pipe);
+ assert(nvc0->draw);
+ draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0));
+
+ return &nvc0->pipe;
+}
+
+struct resident {
+ struct nvc0_resource *res;
+ uint32_t flags;
+};
+
+void
+nvc0_bufctx_add_resident(struct nvc0_context *nvc0, int ctx,
+ struct nvc0_resource *resource, uint32_t flags)
+{
+ struct resident rsd = { resource, flags };
+
+ if (!resource->bo)
+ return;
+
+ /* We don't need to reference the resource here, it will be referenced
+ * in the context/state, and bufctx will be reset when state changes.
+ */
+ util_dynarray_append(&nvc0->residents[ctx], struct resident, rsd);
+}
+
+void
+nvc0_bufctx_del_resident(struct nvc0_context *nvc0, int ctx,
+ struct nvc0_resource *resource)
+{
+ struct resident *rsd, *top;
+ unsigned i;
+
+ for (i = 0; i < nvc0->residents[ctx].size / sizeof(struct resident); ++i) {
+ rsd = util_dynarray_element(&nvc0->residents[ctx], struct resident, i);
+
+ if (rsd->res == resource) {
+ top = util_dynarray_pop_ptr(&nvc0->residents[ctx], struct resident);
+ if (rsd != top)
+ *rsd = *top;
+ break;
+ }
+ }
+}
+
+void
+nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0)
+{
+ struct resident *rsd;
+ struct util_dynarray *array;
+ unsigned ctx, i;
+
+ for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) {
+ array = &nvc0->residents[ctx];
+
+ for (i = 0; i < array->size / sizeof(struct resident); ++i) {
+ rsd = util_dynarray_element(array, struct resident, i);
+
+ nvc0_resource_validate(rsd->res, rsd->flags);
+ }
+ }
+
+ nvc0_screen_make_buffers_resident(nvc0->screen);
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
new file mode 100644
index 0000000000..eeb5beff7a
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_context.h
@@ -0,0 +1,227 @@
+#ifndef __NVC0_CONTEXT_H__
+#define __NVC0_CONTEXT_H__
+
+#include <stdio.h>
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_inlines.h"
+#include "util/u_dynarray.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nvc0_winsys.h"
+#include "nvc0_stateobj.h"
+#include "nvc0_screen.h"
+#include "nvc0_program.h"
+#include "nvc0_resource.h"
+
+#include "nvc0_3ddefs.xml.h"
+#include "nvc0_3d.xml.h"
+#include "nvc0_2d.xml.h"
+#include "nvc0_m2mf.xml.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args);
+
+#ifdef NOUVEAU_DEBUG
+# define NOUVEAU_DBG(args...) printf(args);
+#else
+# define NOUVEAU_DBG(args...)
+#endif
+
+#define NVC0_NEW_BLEND (1 << 0)
+#define NVC0_NEW_RASTERIZER (1 << 1)
+#define NVC0_NEW_ZSA (1 << 2)
+#define NVC0_NEW_VERTPROG (1 << 3)
+#define NVC0_NEW_TCTLPROG (1 << 4)
+#define NVC0_NEW_TEVLPROG (1 << 5)
+#define NVC0_NEW_GMTYPROG (1 << 6)
+#define NVC0_NEW_FRAGPROG (1 << 7)
+#define NVC0_NEW_BLEND_COLOUR (1 << 8)
+#define NVC0_NEW_STENCIL_REF (1 << 9)
+#define NVC0_NEW_CLIP (1 << 10)
+#define NVC0_NEW_SAMPLE_MASK (1 << 11)
+#define NVC0_NEW_FRAMEBUFFER (1 << 12)
+#define NVC0_NEW_STIPPLE (1 << 13)
+#define NVC0_NEW_SCISSOR (1 << 14)
+#define NVC0_NEW_VIEWPORT (1 << 15)
+#define NVC0_NEW_ARRAYS (1 << 16)
+#define NVC0_NEW_VERTEX (1 << 17)
+#define NVC0_NEW_CONSTBUF (1 << 18)
+#define NVC0_NEW_TEXTURES (1 << 19)
+#define NVC0_NEW_SAMPLERS (1 << 20)
+
+#define NVC0_BUFCTX_CONSTANT 0
+#define NVC0_BUFCTX_FRAME 1
+#define NVC0_BUFCTX_VERTEX 2
+#define NVC0_BUFCTX_TEXTURES 3
+#define NVC0_BUFCTX_COUNT 4
+
+struct nvc0_context {
+ struct pipe_context pipe;
+
+ struct nvc0_screen *screen;
+
+ struct util_dynarray residents[NVC0_BUFCTX_COUNT];
+
+ uint32_t dirty;
+
+ struct {
+ uint32_t instance_bits;
+ uint32_t instance_base;
+ int32_t index_bias;
+ boolean prim_restart;
+ uint8_t num_vtxbufs;
+ uint8_t num_vtxelts;
+ uint8_t num_textures[5];
+ uint8_t num_samplers[5];
+ uint16_t scissor;
+ uint32_t uniform_buffer_bound[5];
+ } state;
+
+ struct nvc0_blend_stateobj *blend;
+ struct nvc0_rasterizer_stateobj *rast;
+ struct nvc0_zsa_stateobj *zsa;
+ struct nvc0_vertex_stateobj *vertex;
+
+ struct nvc0_program *vertprog;
+ struct nvc0_program *tctlprog;
+ struct nvc0_program *tevlprog;
+ struct nvc0_program *gmtyprog;
+ struct nvc0_program *fragprog;
+
+ struct pipe_resource *constbuf[5][16];
+ uint16_t constbuf_dirty[5];
+
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ unsigned num_vtxbufs;
+ struct pipe_index_buffer idxbuf;
+ uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */
+ uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
+ unsigned vbo_min_index; /* from pipe_draw_info, for vertex upload */
+ unsigned vbo_max_index;
+
+ struct pipe_sampler_view *textures[5][PIPE_MAX_SAMPLERS];
+ unsigned num_textures[5];
+ struct nvc0_tsc_entry *samplers[5][PIPE_MAX_SAMPLERS];
+ unsigned num_samplers[5];
+
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_blend_color blend_colour;
+ struct pipe_stencil_ref stencil_ref;
+ struct pipe_poly_stipple stipple;
+ struct pipe_scissor_state scissor;
+ struct pipe_viewport_state viewport;
+ struct pipe_clip_state clip;
+
+ unsigned sample_mask;
+
+ boolean vbo_dirty;
+ boolean vbo_push_hint;
+
+ struct draw_context *draw;
+};
+
+static INLINE struct nvc0_context *
+nvc0_context(struct pipe_context *pipe)
+{
+ return (struct nvc0_context *)pipe;
+}
+
+struct nvc0_surface {
+ struct pipe_surface base;
+ uint32_t offset;
+ uint32_t width;
+ uint16_t height;
+ uint16_t depth;
+};
+
+static INLINE struct nvc0_surface *
+nvc0_surface(struct pipe_surface *ps)
+{
+ return (struct nvc0_surface *)ps;
+}
+
+/* nvc0_context.c */
+struct pipe_context *nvc0_create(struct pipe_screen *, void *);
+
+void nvc0_bufctx_emit_relocs(struct nvc0_context *);
+void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx,
+ struct nvc0_resource *, uint32_t flags);
+void nvc0_bufctx_del_resident(struct nvc0_context *, int ctx,
+ struct nvc0_resource *);
+static INLINE void
+nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx)
+{
+ util_dynarray_resize(&nvc0->residents[ctx], 0);
+}
+
+/* nvc0_draw.c */
+extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
+
+/* nvc0_program.c */
+boolean nvc0_program_translate(struct nvc0_program *);
+void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
+
+/* nvc0_query.c */
+void nvc0_init_query_functions(struct nvc0_context *);
+
+/* nvc0_shader_state.c */
+void nvc0_vertprog_validate(struct nvc0_context *);
+void nvc0_tctlprog_validate(struct nvc0_context *);
+void nvc0_tevlprog_validate(struct nvc0_context *);
+void nvc0_gmtyprog_validate(struct nvc0_context *);
+void nvc0_fragprog_validate(struct nvc0_context *);
+
+/* nvc0_state.c */
+extern void nvc0_init_state_functions(struct nvc0_context *);
+
+/* nvc0_state_validate.c */
+extern boolean nvc0_state_validate(struct nvc0_context *);
+
+/* nvc0_surface.c */
+extern void nvc0_clear(struct pipe_context *, unsigned buffers,
+ const float *rgba, double depth, unsigned stencil);
+extern void nvc0_init_surface_functions(struct nvc0_context *);
+
+/* nvc0_tex.c */
+void nvc0_validate_textures(struct nvc0_context *);
+void nvc0_validate_samplers(struct nvc0_context *);
+
+struct pipe_sampler_view *
+nvc0_create_sampler_view(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_sampler_view *);
+
+/* nvc0_transfer.c */
+void
+nvc0_m2mf_push_linear(struct nvc0_context *nvc0,
+ struct nouveau_bo *dst, unsigned domain, int offset,
+ unsigned size, void *data);
+void
+nvc0_m2mf_copy_linear(struct nvc0_context *nvc0,
+ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
+ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
+ unsigned size);
+
+/* nvc0_vbo.c */
+void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
+
+void *
+nvc0_vertex_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements);
+void
+nvc0_vertex_state_delete(struct pipe_context *pipe, void *hwcso);
+
+void nvc0_vertex_arrays_validate(struct nvc0_context *nvc0);
+
+/* nvc0_push.c */
+void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *);
+void nvc0_push_vbo2(struct nvc0_context *, const struct pipe_draw_info *);
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_draw.c b/src/gallium/drivers/nvc0/nvc0_draw.c
new file mode 100644
index 0000000000..ac7e9f66a1
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_draw.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "draw/draw_pipe.h"
+
+#include "nvc0_context.h"
+
+struct nvc0_render_stage {
+ struct draw_stage stage;
+ struct nvc0_context *nvc0;
+};
+
+static INLINE struct nvc0_render_stage *
+nvc0_render_stage(struct draw_stage *stage)
+{
+ return (struct nvc0_render_stage *)stage;
+}
+
+static void
+nvc0_render_point(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_line(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_tri(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_flush(struct draw_stage *stage, unsigned flags)
+{
+}
+
+static void
+nvc0_render_reset_stipple_counter(struct draw_stage *stage)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_destroy(struct draw_stage *stage)
+{
+ FREE(stage);
+}
+
+struct draw_stage *
+nvc0_draw_render_stage(struct nvc0_context *nvc0)
+{
+ struct nvc0_render_stage *rs = CALLOC_STRUCT(nvc0_render_stage);
+
+ rs->nvc0 = nvc0;
+ rs->stage.draw = nvc0->draw;
+ rs->stage.destroy = nvc0_render_destroy;
+ rs->stage.point = nvc0_render_point;
+ rs->stage.line = nvc0_render_line;
+ rs->stage.tri = nvc0_render_tri;
+ rs->stage.flush = nvc0_render_flush;
+ rs->stage.reset_stipple_counter = nvc0_render_reset_stipple_counter;
+
+ return &rs->stage;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c
new file mode 100644
index 0000000000..9d2c48cf14
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_fence.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nvc0_fence.h"
+#include "nvc0_context.h"
+#include "nvc0_screen.h"
+
+#ifdef PIPE_OS_UNIX
+#include <sched.h>
+#endif
+
+boolean
+nvc0_screen_fence_new(struct nvc0_screen *screen, struct nvc0_fence **fence,
+ boolean emit)
+{
+ *fence = CALLOC_STRUCT(nvc0_fence);
+ if (!*fence)
+ return FALSE;
+
+ (*fence)->screen = screen;
+ (*fence)->ref = 1;
+
+ if (emit)
+ nvc0_fence_emit(*fence);
+
+ return TRUE;
+}
+
+void
+nvc0_fence_emit(struct nvc0_fence *fence)
+{
+ struct nvc0_screen *screen = fence->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+
+ fence->sequence = ++screen->fence.sequence;
+
+ assert(fence->state == NVC0_FENCE_STATE_AVAILABLE);
+
+ BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);
+ OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
+ OUT_RING (chan, fence->sequence);
+ OUT_RING (chan, NVC0_3D_QUERY_GET_FENCE | NVC0_3D_QUERY_GET_SHORT |
+ (0xf << NVC0_3D_QUERY_GET_UNIT__SHIFT));
+
+ ++fence->ref;
+
+ if (screen->fence.tail)
+ screen->fence.tail->next = fence;
+ else
+ screen->fence.head = fence;
+
+ screen->fence.tail = fence;
+
+ fence->state = NVC0_FENCE_STATE_EMITTED;
+}
+
+static void
+nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence);
+
+void
+nvc0_fence_del(struct nvc0_fence *fence)
+{
+ struct nvc0_fence *it;
+ struct nvc0_screen *screen = fence->screen;
+
+ if (fence->state == NVC0_FENCE_STATE_EMITTED) {
+ if (fence == screen->fence.head) {
+ screen->fence.head = fence->next;
+ if (!screen->fence.head)
+ screen->fence.tail = NULL;
+ } else {
+ for (it = screen->fence.head; it && it->next != fence; it = it->next);
+ it->next = fence->next;
+ if (screen->fence.tail == fence)
+ screen->fence.tail = it;
+ }
+ }
+
+ if (fence->buffers) {
+ debug_printf("WARNING: deleting fence with buffers "
+ "still hooked to it !\n");
+ nvc0_fence_trigger_release_buffers(fence);
+ }
+
+ FREE(fence);
+}
+
+static void
+nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence)
+{
+ struct nvc0_mm_allocation *alloc = fence->buffers;
+
+ while (alloc) {
+ struct nvc0_mm_allocation *next = alloc->next;
+ nvc0_mm_free(alloc);
+ alloc = next;
+ };
+ fence->buffers = NULL;
+}
+
+static void
+nvc0_screen_fence_update(struct nvc0_screen *screen)
+{
+ struct nvc0_fence *fence;
+ struct nvc0_fence *next = NULL;
+ uint32_t sequence = screen->fence.map[0];
+
+ if (screen->fence.sequence_ack == sequence)
+ return;
+ screen->fence.sequence_ack = sequence;
+
+ for (fence = screen->fence.head; fence; fence = next) {
+ next = fence->next;
+ sequence = fence->sequence;
+
+ fence->state = NVC0_FENCE_STATE_SIGNALLED;
+
+ if (fence->buffers)
+ nvc0_fence_trigger_release_buffers(fence);
+
+ nvc0_fence_reference(&fence, NULL);
+
+ if (sequence == screen->fence.sequence_ack)
+ break;
+ }
+ screen->fence.head = next;
+ if (!next)
+ screen->fence.tail = NULL;
+}
+
+#define NVC0_FENCE_MAX_SPINS (1 << 17)
+
+boolean
+nvc0_fence_signalled(struct nvc0_fence *fence)
+{
+ struct nvc0_screen *screen = fence->screen;
+
+ if (fence->state == NVC0_FENCE_STATE_EMITTED)
+ nvc0_screen_fence_update(screen);
+
+ return fence->state == NVC0_FENCE_STATE_SIGNALLED;
+}
+
+boolean
+nvc0_fence_wait(struct nvc0_fence *fence)
+{
+ struct nvc0_screen *screen = fence->screen;
+ int spins = 0;
+
+ if (fence->state == NVC0_FENCE_STATE_AVAILABLE) {
+ nvc0_fence_emit(fence);
+
+ FIRE_RING(screen->base.channel);
+
+ if (fence == screen->fence.current)
+ nvc0_screen_fence_new(screen, &screen->fence.current, FALSE);
+ }
+
+ do {
+ nvc0_screen_fence_update(screen);
+
+ if (fence->state == NVC0_FENCE_STATE_SIGNALLED)
+ return TRUE;
+ spins++;
+#ifdef PIPE_OS_UNIX
+ if (!(spins % 8)) /* donate a few cycles */
+ sched_yield();
+#endif
+ } while (spins < NVC0_FENCE_MAX_SPINS);
+
+ if (spins > 9000)
+ NOUVEAU_ERR("fence %x: been spinning too long\n", fence->sequence);
+
+ return FALSE;
+}
+
+void
+nvc0_screen_fence_next(struct nvc0_screen *screen)
+{
+ nvc0_fence_emit(screen->fence.current);
+ nvc0_screen_fence_new(screen, &screen->fence.current, FALSE);
+ nvc0_screen_fence_update(screen);
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h
new file mode 100644
index 0000000000..e63c164bda
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_fence.h
@@ -0,0 +1,48 @@
+
+#ifndef __NVC0_FENCE_H__
+#define __NVC0_FENCE_H__
+
+#include "util/u_inlines.h"
+#include "util/u_double_list.h"
+
+#define NVC0_FENCE_STATE_AVAILABLE 0
+#define NVC0_FENCE_STATE_EMITTED 1
+#define NVC0_FENCE_STATE_SIGNALLED 2
+
+struct nvc0_mm_allocation;
+
+struct nvc0_fence {
+ struct nvc0_fence *next;
+ struct nvc0_screen *screen;
+ int state;
+ int ref;
+ uint32_t sequence;
+ struct nvc0_mm_allocation *buffers;
+};
+
+void nvc0_fence_emit(struct nvc0_fence *);
+void nvc0_fence_del(struct nvc0_fence *);
+
+boolean nvc0_fence_wait(struct nvc0_fence *);
+boolean nvc0_fence_signalled(struct nvc0_fence *);
+
+static INLINE void
+nvc0_fence_reference(struct nvc0_fence **ref, struct nvc0_fence *fence)
+{
+ if (*ref) {
+ if (--(*ref)->ref == 0)
+ nvc0_fence_del(*ref);
+ }
+ if (fence)
+ ++fence->ref;
+
+ *ref = fence;
+}
+
+static INLINE struct nvc0_fence *
+nvc0_fence(struct pipe_fence_handle *fence)
+{
+ return (struct nvc0_fence *)fence;
+}
+
+#endif // __NVC0_FENCE_H__
diff --git a/src/gallium/drivers/nvc0/nvc0_formats.c b/src/gallium/drivers/nvc0/nvc0_formats.c
new file mode 100644
index 0000000000..5d02357381
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_formats.c
@@ -0,0 +1,462 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nvc0_screen.h"
+#include "nv50_texture.xml.h"
+#include "nvc0_3d.xml.h"
+#include "nv50_defs.xml.h"
+#include "nv50_texture.xml.h"
+#include "pipe/p_defines.h"
+
+#define A_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
+ (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \
+ (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
+ (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \
+ (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
+ (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \
+ (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
+ (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \
+ (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
+ NV50_TIC_0_FMT_##sz, \
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_##sz | \
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_##t0 | \
+ (r << 31)
+
+#define B_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
+ (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \
+ (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
+ (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \
+ (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
+ (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \
+ (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
+ (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \
+ (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
+ NV50_TIC_0_FMT_##sz, 0
+
+#define VERTEX_BUFFER PIPE_BIND_VERTEX_BUFFER
+#define SAMPLER_VIEW PIPE_BIND_SAMPLER_VIEW
+#define RENDER_TARGET PIPE_BIND_RENDER_TARGET
+#define DEPTH_STENCIL PIPE_BIND_DEPTH_STENCIL
+#define SCANOUT PIPE_BIND_SCANOUT
+
+/* for vertex buffers: */
+#define NV50_TIC_0_FMT_8_8_8 NV50_TIC_0_FMT_8_8_8_8
+#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16
+#define NV50_TIC_0_FMT_32_32_32 NV50_TIC_0_FMT_32_32_32_32
+
+const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] =
+{
+ /* COMMON FORMATS */
+
+ [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50_SURFACE_FORMAT_A8R8G8B8_UNORM,
+ A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+ [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50_SURFACE_FORMAT_X8R8G8B8_UNORM,
+ A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+ [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50_SURFACE_FORMAT_A8R8G8B8_SRGB,
+ A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50_SURFACE_FORMAT_X8R8G8B8_SRGB,
+ A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_B5G6R5_UNORM] = { NV50_SURFACE_FORMAT_R5G6B5_UNORM,
+ B_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1),
+ SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+ [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50_SURFACE_FORMAT_A1R5G5B5_UNORM,
+ B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1),
+ SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+ [PIPE_FORMAT_B4G4R4A4_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM,
+ B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_A2B10G10R10_UNORM,
+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0),
+ SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT },
+
+ [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50_SURFACE_FORMAT_A2R10G10B10_UNORM,
+ A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 1),
+ SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER },
+
+ /* DEPTH/STENCIL FORMATS */
+
+ [PIPE_FORMAT_Z16_UNORM] = { NV50_ZETA_FORMAT_Z16_UNORM,
+ B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 16_ZETA, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_S8Z24_UNORM,
+ B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_X8Z24_UNORM,
+ B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_S8Z24_UNORM,
+ B_(C1, C1, C1, ONE, UINT, UNORM, UINT, UINT, 24_8, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT,
+ B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_ZETA, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ [PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = {
+ NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM,
+ B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_8, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ /* LUMINANCE, ALPHA, INTENSITY */
+
+ [PIPE_FORMAT_L8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
+ A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_L8_SRGB] = { NV50_SURFACE_FORMAT_R8_UNORM,
+ A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_I8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
+ A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_A8_UNORM] = { NV50_SURFACE_FORMAT_A8_UNORM,
+ A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_L8A8_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM,
+ A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_L8A8_SRGB] = { 0,
+ A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
+ SAMPLER_VIEW },
+
+ /* DXT, RGTC */
+
+ [PIPE_FORMAT_DXT1_RGB] = { 0,
+ B_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_DXT1_RGBA] = { 0,
+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_DXT3_RGBA] = { 0,
+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_DXT5_RGBA] = { 0,
+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_RGTC1_UNORM] = { 0,
+ B_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_RGTC1_SNORM] = { 0,
+ B_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_RGTC2_UNORM] = { 0,
+ B_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC2, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_RGTC2_SNORM] = { 0,
+ B_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC2, 0),
+ SAMPLER_VIEW },
+
+ /* FLOAT 16 */
+
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT,
+ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT,
+ A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16G16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16_FLOAT,
+ A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT,
+ A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ /* FLOAT 32 */
+
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT,
+ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT,
+ A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R32G32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32_FLOAT,
+ A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT,
+ A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ /* ODD FORMATS */
+
+ [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50_SURFACE_FORMAT_B10G11R11_FLOAT,
+ B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R9G9B9E5_FLOAT] = { 0,
+ B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, E5_9_9_9, 0),
+ SAMPLER_VIEW },
+
+ /* SNORM 32 */
+
+ [PIPE_FORMAT_R32G32B32A32_SNORM] = { 0,
+ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32B32_SNORM] = { 0,
+ A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32_SNORM] = { 0,
+ A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32_SNORM] = { 0,
+ A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* UNORM 32 */
+
+ [PIPE_FORMAT_R32G32B32A32_UNORM] = { 0,
+ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32B32_UNORM] = { 0,
+ A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32_UNORM] = { 0,
+ A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32_UNORM] = { 0,
+ A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* SNORM 16 */
+
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_SNORM,
+ A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16G16B16_SNORM] = { 0,
+ A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_R16G16_SNORM,
+ A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM,
+ A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ /* UNORM 16 */
+
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_UNORM,
+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16G16B16_UNORM] = { 0,
+ A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_R16G16_UNORM,
+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM,
+ A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ /* SNORM 8 */
+
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_SNORM,
+ A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 8_8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8G8B8_SNORM] = { 0,
+ A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R8G8_SNORM] = { NV50_SURFACE_FORMAT_R8G8_SNORM,
+ A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8_SNORM] = { NV50_SURFACE_FORMAT_R8_SNORM,
+ A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ /* UNORM 8 */
+
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_UNORM,
+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50_SURFACE_FORMAT_A8B8G8R8_SRGB,
+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8G8B8_UNORM] = { NV50_SURFACE_FORMAT_X8B8G8R8_UNORM,
+ A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8G8B8_SRGB] = { NV50_SURFACE_FORMAT_X8B8G8R8_SRGB,
+ A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8G8_UNORM] = { NV50_SURFACE_FORMAT_R8G8_UNORM,
+ A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
+ A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ /* SSCALED 32 */
+
+ [PIPE_FORMAT_R32G32B32A32_SSCALED] = { NV50_SURFACE_FORMAT_R32G32B32A32_SINT,
+ A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32B32_SSCALED] = { 0,
+ A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32_SSCALED] = { NV50_SURFACE_FORMAT_R32G32_SINT,
+ A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32_SSCALED] = { 0,
+ A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* USCALED 32 */
+
+ [PIPE_FORMAT_R32G32B32A32_USCALED] = { NV50_SURFACE_FORMAT_R32G32B32A32_UINT,
+ A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 32_32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32B32_USCALED] = { 0,
+ A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32_USCALED] = { NV50_SURFACE_FORMAT_R32G32_UINT,
+ A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32_USCALED] = { 0,
+ A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* SSCALED 16 */
+
+ [PIPE_FORMAT_R16G16B16A16_SSCALED] = { NV50_SURFACE_FORMAT_R16G16B16A16_SINT,
+ A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16G16B16_SSCALED] = { 0,
+ A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16G16_SSCALED] = { NV50_SURFACE_FORMAT_R16G16_SINT,
+ A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16_SSCALED] = { NV50_SURFACE_FORMAT_R16_SINT,
+ A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* USCALED 16 */
+
+ [PIPE_FORMAT_R16G16B16A16_USCALED] = { NV50_SURFACE_FORMAT_R16G16B16A16_UINT,
+ A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 16_16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16G16B16_USCALED] = { 0,
+ A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16G16_USCALED] = { NV50_SURFACE_FORMAT_R16G16_UINT,
+ A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16_USCALED] = { NV50_SURFACE_FORMAT_R16_UINT,
+ A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* SSCALED 8 */
+
+ [PIPE_FORMAT_R8G8B8A8_SSCALED] = { NV50_SURFACE_FORMAT_A8B8G8R8_SINT,
+ A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R8G8B8_SSCALED] = { 0,
+ A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R8G8_SSCALED] = { NV50_SURFACE_FORMAT_R8G8_SINT,
+ A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R8_SSCALED] = { NV50_SURFACE_FORMAT_R8_SINT,
+ A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* USCALED 8 */
+
+ [PIPE_FORMAT_R8G8B8A8_USCALED] = { NV50_SURFACE_FORMAT_A8B8G8R8_UINT,
+ A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 8_8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R8G8B8_USCALED] = { 0,
+ A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R8G8_USCALED] = { NV50_SURFACE_FORMAT_R8G8_UINT,
+ A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R8_USCALED] = { NV50_SURFACE_FORMAT_R8_UINT,
+ A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+};
diff --git a/src/gallium/drivers/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nvc0/nvc0_graph_macros.h
new file mode 100644
index 0000000000..8da963a4c5
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_graph_macros.h
@@ -0,0 +1,235 @@
+
+#ifndef __NVC0_PGRAPH_MACROS_H__
+#define __NVC0_PGRAPH_MACROS_H__
+
+/* extrinsrt r1, r2, src, size, dst: replace bits [dst:dst+size) in r1
+ * with bits [src:src+size) in r2
+ *
+ * bra(n)z annul: no delay slot
+ */
+
+/* The comments above the macros describe what they *should* be doing,
+ * but we use less functionality for now.
+ */
+
+/*
+ * for (i = 0; i < 8; ++i)
+ * [NVC0_3D_BLEND_ENABLE(i)] = BIT(i of arg);
+ *
+ * [3428] = arg;
+ *
+ * if (arg == 0 || [NVC0_3D_MULTISAMPLE_ENABLE] == 0)
+ * [0d9c] = 0;
+ * else
+ * [0d9c] = [342c];
+ */
+static const uint32_t nvc0_9097_blend_enables[] =
+{
+ 0x05360021, /* 0x00: maddr [NVC0_3D_BLEND_ENABLE(0), increment = 4] */
+ 0x00404042, /* 0x01: send extrinsrt 0 $r1 0 0x1 0 */
+ 0x00424042, /* 0x02: send extrinsrt 0 $r1 0x1 0x1 0 */
+ 0x00444042, /* 0x03: send extrinsrt 0 $r1 0x2 0x1 0 */
+ 0x00464042, /* 0x04: send extrinsrt 0 $r1 0x3 0x1 0 */
+ 0x00484042, /* 0x05: send extrinsrt 0 $r1 0x4 0x1 0 */
+ 0x004a4042, /* 0x06: send extrinsrt 0 $r1 0x5 0x1 0 */
+ 0x004c40c2, /* 0x07: exit send extrinsrt 0 $r1 0x6 0x1 0 */
+ 0x004e4042, /* 0x08: send extrinsrt 0 $r1 0x7 0x1 0 */
+};
+
+/*
+ * uint64 limit = (parm(0) << 32) | parm(1);
+ * uint64 start = (parm(2) << 32);
+ *
+ * if (limit) {
+ * start |= parm(3);
+ * --limit;
+ * } else {
+ * start |= 1;
+ * }
+ *
+ * [0x1c04 + (arg & 0xf) * 16 + 0] = (start >> 32) & 0xff;
+ * [0x1c04 + (arg & 0xf) * 16 + 4] = start & 0xffffffff;
+ * [0x1f00 + (arg & 0xf) * 8 + 0] = (limit >> 32) & 0xff;
+ * [0x1f00 + (arg & 0xf) * 8 + 4] = limit & 0xffffffff;
+ */
+static const uint32_t nvc0_9097_vertex_array_select[] =
+{
+ 0x00000201, /* 0x00: parm $r2 */
+ 0x00000301, /* 0x01: parm $r3 */
+ 0x00000401, /* 0x02: parm $r4 */
+ 0x00000501, /* 0x03: parm $r5 */
+ 0x11004612, /* 0x04: mov $r6 extrinsrt 0 $r1 0 4 2 */
+ 0x09004712, /* 0x05: mov $r7 extrinsrt 0 $r1 0 4 1 */
+ 0x05c07621, /* 0x06: maddr $r6 add $6 0x1701 */
+ 0x00002041, /* 0x07: send $r4 */
+ 0x00002841, /* 0x08: send $r5 */
+ 0x05f03f21, /* 0x09: maddr $r7 add $7 0x17c0 */
+ 0x000010c1, /* 0x0a: exit send $r2 */
+ 0x00001841, /* 0x0b: send $r3 */
+};
+
+static const uint32_t nvc0_9097_color_mask_brdc[] =
+{
+ 0x05a00021, /* maddr [NVC0_3D_COLOR_MASK(0), increment = 4] */
+ 0x00000841, /* send $r1 */
+ 0x00000841, /* send $r1 */
+ 0x00000841, /* send $r1 */
+ 0x00000841, /* send $r1 */
+ 0x00000841, /* send $r1 */
+ 0x00000841, /* send $r1 */
+ 0x000008c1, /* exit send $r1 */
+ 0x00000841, /* send $r1 */
+};
+
+/*
+ * [GL_POLYGON_MODE_FRONT] = arg;
+ *
+ * if (BIT(31 of [0x3410]))
+ * [1a24] = 0x7353;
+ *
+ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41)
+ * [02ec] = 0;
+ * else
+ * if ([GL_POLYGON_MODE_BACK] == GL_LINE || arg == GL_LINE)
+ * [02ec] = BYTE(1 of [0x3410]) << 4;
+ * else
+ * [02ec] = BYTE(0 of [0x3410]) << 4;
+ */
+static const uint32_t nvc0_9097_poly_mode_front[] =
+{
+ 0x00db0215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_BACK] */
+ 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */
+ 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */
+ 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */
+ 0x00004211, /* 0x04: mov $r2 0x1 */
+ 0x00180611, /* 0x05: mov $r6 0x60 */
+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x07: braz $r7 0xa */
+ 0x00dac021, /* 0x08: maddr 0x36b */
+ 0x00800611, /* 0x09: mov $r6 0x200 */
+ 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */
+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x0c: braz $r7 0xf */
+ 0x00000841, /* 0x0d: send $r1 */
+ 0x00000611, /* 0x0e: mov $r6 0 */
+ 0x002ec0a1, /* 0x0f: exit maddr [02ec] */
+ 0x00003041 /* 0x10: send $r6 */
+};
+
+/*
+ * [GL_POLYGON_MODE_BACK] = arg;
+ *
+ * if (BIT(31 of [0x3410]))
+ * [1a24] = 0x7353;
+ *
+ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41)
+ * [02ec] = 0;
+ * else
+ * if ([GL_POLYGON_MODE_FRONT] == GL_LINE || arg == GL_LINE)
+ * [02ec] = BYTE(1 of [0x3410]) << 4;
+ * else
+ * [02ec] = BYTE(0 of [0x3410]) << 4;
+ */
+/* NOTE: 0x3410 = 0x80002006 by default,
+ * POLYGON_MODE == GL_LINE check replaced by (MODE & 1)
+ * SP_SELECT(i) == (i << 4) | 1 check replaced by SP_SELECT(i) & 1
+ */
+static const uint32_t nvc0_9097_poly_mode_back[] =
+{
+ 0x00dac215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_FRONT] */
+ 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */
+ 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */
+ 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */
+ 0x00004211, /* 0x04: mov $r2 0x1 */
+ 0x00180611, /* 0x05: mov $r6 0x60 */
+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x07: braz $r7 0xa */
+ 0x00dac021, /* 0x08: maddr 0x36b */
+ 0x00800611, /* 0x09: mov $r6 0x200 */
+ 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */
+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x0c: braz $r7 0xf */
+ 0x00000841, /* 0x0d: send $r1 */
+ 0x00000611, /* 0x0e: mov $r6 0 */
+ 0x002ec0a1, /* 0x0f: exit maddr [02ec] */
+ 0x00003041 /* 0x10: send $r6 */
+};
+
+/*
+ * [NVC0_3D_SP_SELECT(4)] = arg
+ *
+ * if BIT(31 of [0x3410]) == 0
+ * [1a24] = 0x7353;
+ *
+ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || arg == 0x41)
+ * [02ec] = 0
+ * else
+ * if (any POLYGON MODE == LINE)
+ * [02ec] = BYTE(1 of [3410]) << 4;
+ * else
+ * [02ec] = BYTE(0 of [3410]) << 4; // 02ec valid bits are 0xff1
+ */
+static const uint32_t nvc0_9097_gp_select[] = /* 0x0f */
+{
+ 0x00dac215, /* 0x00: read $r2 0x36b */
+ 0x00db0315, /* 0x01: read $r3 0x36c */
+ 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */
+ 0x020c0415, /* 0x03: read $r4 0x830 */
+ 0x00004211, /* 0x04: mov $r2 0x1 */
+ 0x00180611, /* 0x05: mov $r6 0x60 */
+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x07: braz $r7 0xa */
+ 0x02100021, /* 0x08: maddr 0x840 */
+ 0x00800611, /* 0x09: mov $r6 0x200 */
+ 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */
+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x0c: braz $r7 0xf */
+ 0x00000841, /* 0x0d: send $r1 */
+ 0x00000611, /* 0x0e: mov $r6 0 */
+ 0x002ec0a1, /* 0x0f: exit maddr 0xbb */
+ 0x00003041, /* 0x10: send $r6 */
+};
+
+/*
+ * [NVC0_3D_SP_SELECT(3)] = arg
+ *
+ * if BIT(31 of [0x3410]) == 0
+ * [1a24] = 0x7353;
+ *
+ * if (arg == 0x31) {
+ * if (BIT(2 of [0x3430])) {
+ * int i = 15; do { --i; } while(i);
+ * [0x1a2c] = 0;
+ * }
+ * }
+ *
+ * if ([NVC0_3D_SP_SELECT(4)] == 0x41 || arg == 0x31)
+ * [02ec] = 0
+ * else
+ * if ([any POLYGON_MODE] == GL_LINE)
+ * [02ec] = BYTE(1 of [3410]) << 4;
+ * else
+ * [02ec] = BYTE(0 of [3410]) << 4;
+ */
+static const uint32_t nvc0_9097_tep_select[] = /* 0x10 */
+{
+ 0x00dac215, /* 0x00: read $r2 0x36b */
+ 0x00db0315, /* 0x01: read $r3 0x36c */
+ 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */
+ 0x02100415, /* 0x03: read $r4 0x840 */
+ 0x00004211, /* 0x04: mov $r2 0x1 */
+ 0x00180611, /* 0x05: mov $r6 0x60 */
+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x07: braz $r7 0xa */
+ 0x020c0021, /* 0x08: maddr 0x830 */
+ 0x00800611, /* 0x09: mov $r6 0x200 */
+ 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */
+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x0c: braz $r7 0xf */
+ 0x00000841, /* 0x0d: send $r1 */
+ 0x00000611, /* 0x0e: mov $r6 0 */
+ 0x002ec0a1, /* 0x0f: exit maddr 0xbb */
+ 0x00003041, /* 0x10: send $r6 */
+};
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h b/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h
new file mode 100644
index 0000000000..3bf628d425
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h
@@ -0,0 +1,138 @@
+#ifndef NVC0_M2MF_XML
+#define NVC0_M2MF_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_m2mf.xml ( 2227 bytes, from 2010-10-16 16:10:29)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24)
+- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_M2MF_TILING_MODE_IN 0x00000204
+
+#define NVC0_M2MF_TILING_PITCH_IN 0x00000208
+
+#define NVC0_M2MF_TILING_HEIGHT_IN 0x0000020c
+
+#define NVC0_M2MF_TILING_DEPTH_IN 0x00000210
+
+#define NVC0_M2MF_TILING_POSITION_IN_Z 0x00000214
+
+#define NVC0_M2MF_TILING_MODE_OUT 0x00000220
+
+#define NVC0_M2MF_TILING_PITCH_OUT 0x00000224
+
+#define NVC0_M2MF_TILING_HEIGHT_OUT 0x00000228
+
+#define NVC0_M2MF_TILING_DEPTH_OUT 0x0000022c
+
+#define NVC0_M2MF_TILING_POSITION_OUT_Z 0x00000230
+
+#define NVC0_M2MF_OFFSET_OUT_HIGH 0x00000238
+
+#define NVC0_M2MF_OFFSET_OUT_LOW 0x0000023c
+
+#define NVC0_M2MF_EXEC 0x00000300
+#define NVC0_M2MF_EXEC_PUSH 0x00000001
+#define NVC0_M2MF_EXEC_LINEAR_IN 0x00000010
+#define NVC0_M2MF_EXEC_LINEAR_OUT 0x00000100
+#define NVC0_M2MF_EXEC_NOTIFY 0x00002000
+#define NVC0_M2MF_EXEC_INC__MASK 0x00f00000
+#define NVC0_M2MF_EXEC_INC__SHIFT 20
+
+#define NVC0_M2MF_DATA 0x00000304
+
+#define NVC0_M2MF_OFFSET_IN_HIGH 0x0000030c
+
+#define NVC0_M2MF_OFFSET_IN_LOW 0x00000310
+
+#define NVC0_M2MF_PITCH_IN 0x00000314
+
+#define NVC0_M2MF_PITCH_OUT 0x00000318
+
+#define NVC0_M2MF_LINE_LENGTH_IN 0x0000031c
+
+#define NVC0_M2MF_LINE_COUNT 0x00000320
+
+#define NVC0_M2MF_NOTIFY_ADDRESS_HIGH 0x0000032c
+
+#define NVC0_M2MF_NOTIFY_ADDRESS_LOW 0x00000330
+
+#define NVC0_M2MF_NOTIFY 0x00000334
+
+#define NVC0_M2MF_TILING_POSITION_IN_X 0x00000344
+
+#define NVC0_M2MF_TILING_POSITION_IN_Y 0x00000348
+
+#define NVC0_M2MF_TILING_POSITION_OUT_X 0x0000034c
+
+#define NVC0_M2MF_TILING_POSITION_OUT_Y 0x00000350
+
+
+#endif /* NVC0_M2MF_XML */
diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c
new file mode 100644
index 0000000000..7c7e134146
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_miptree.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+#include "nvc0_transfer.h"
+
+static INLINE uint32_t
+get_tile_dims(unsigned nx, unsigned ny, unsigned nz)
+{
+ uint32_t tile_mode = 0x000;
+
+ if (ny > 64) tile_mode = 0x040; /* height 128 tiles */
+ else
+ if (ny > 32) tile_mode = 0x030; /* height 64 tiles */
+ else
+ if (ny > 16) tile_mode = 0x020; /* height 32 tiles */
+ else
+ if (ny > 8) tile_mode = 0x010; /* height 16 tiles */
+
+ if (nz == 1)
+ return tile_mode;
+ else
+ if (tile_mode > 0x020)
+ tile_mode = 0x020;
+
+ if (nz > 16 && tile_mode < 0x020)
+ return tile_mode | 0x500; /* depth 32 tiles */
+ if (nz > 8) return tile_mode | 0x400; /* depth 16 tiles */
+ if (nz > 4) return tile_mode | 0x300; /* depth 8 tiles */
+ if (nz > 2) return tile_mode | 0x200; /* depth 4 tiles */
+
+ return tile_mode | 0x100;
+}
+
+static INLINE unsigned
+calc_zslice_offset(uint32_t tile_mode, unsigned z, unsigned pitch, unsigned nbh)
+{
+ unsigned tile_h = NVC0_TILE_HEIGHT(tile_mode);
+ unsigned tile_d_shift = NVC0_TILE_DIM_SHIFT(tile_mode, 2);
+ unsigned tile_d = 1 << tile_d_shift;
+
+ /* stride_2d == to next slice within this volume tile */
+ /* stride_3d == size (in bytes) of a volume tile */
+ unsigned stride_2d = tile_h * NVC0_TILE_PITCH(tile_mode);
+ unsigned stride_3d = tile_d * align(nbh, tile_h) * pitch;
+
+ return (z & (tile_d - 1)) * stride_2d + (z >> tile_d_shift) * stride_3d;
+}
+
+static void
+nvc0_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt)
+{
+ struct nvc0_miptree *mt = nvc0_miptree(pt);
+
+ nouveau_screen_bo_release(pscreen, mt->base.bo);
+
+ FREE(mt);
+}
+
+static boolean
+nvc0_miptree_get_handle(struct pipe_screen *pscreen,
+ struct pipe_resource *pt,
+ struct winsys_handle *whandle)
+{
+ struct nvc0_miptree *mt = nvc0_miptree(pt);
+ unsigned stride;
+
+ if (!mt || !mt->base.bo)
+ return FALSE;
+
+ stride = util_format_get_stride(mt->base.base.format,
+ mt->base.base.width0);
+
+ return nouveau_screen_bo_get_handle(pscreen,
+ mt->base.bo,
+ stride,
+ whandle);
+}
+
+const struct u_resource_vtbl nvc0_miptree_vtbl =
+{
+ nvc0_miptree_get_handle, /* get_handle */
+ nvc0_miptree_destroy, /* resource_destroy */
+ NULL, /* is_resource_referenced */
+ nvc0_miptree_transfer_new, /* get_transfer */
+ nvc0_miptree_transfer_del, /* transfer_destroy */
+ nvc0_miptree_transfer_map, /* transfer_map */
+ u_default_transfer_flush_region, /* transfer_flush_region */
+ nvc0_miptree_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
+};
+
+struct pipe_resource *
+nvc0_miptree_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ)
+{
+ struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+ struct nvc0_miptree *mt = CALLOC_STRUCT(nvc0_miptree);
+ struct pipe_resource *pt = &mt->base.base;
+ int ret;
+ unsigned w, h, d, l, alloc_size;
+ uint32_t tile_flags;
+
+ if (!mt)
+ return NULL;
+
+ mt->base.vtbl = &nvc0_miptree_vtbl;
+ *pt = *templ;
+ pipe_reference_init(&pt->reference, 1);
+ pt->screen = pscreen;
+
+ mt->layout_3d = pt->target == PIPE_TEXTURE_3D;
+
+ w = pt->width0;
+ h = pt->height0;
+ d = mt->layout_3d ? pt->depth0 : 1;
+
+ switch (pt->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ tile_flags = 0x0700; /* COMPRESSED */
+ tile_flags = 0x0200; /* NORMAL ? */
+ tile_flags = 0x0100; /* NORMAL ? */
+ break;
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ tile_flags = 0x5300; /* MSAA 4, COMPRESSED */
+ tile_flags = 0x4600; /* NORMAL */
+ break;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ tile_flags = 0x1100; /* NORMAL */
+ if (w * h >= 128 * 128 && 0)
+ tile_flags = 0x1700; /* COMPRESSED, requires magic */
+ break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ tile_flags = 0xf500; /* COMPRESSED */
+ tile_flags = 0xf700; /* MSAA 2 */
+ tile_flags = 0xf900; /* MSAA 4 */
+ tile_flags = 0xfe00; /* NORMAL */
+ break;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
+ tile_flags = 0xce00; /* COMPRESSED */
+ tile_flags = 0xcf00; /* MSAA 2, COMPRESSED */
+ tile_flags = 0xd000; /* MSAA 4, COMPRESSED */
+ tile_flags = 0xc300; /* NORMAL */
+ break;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ tile_flags = 0xe900; /* COMPRESSED */
+ break;
+ default:
+ tile_flags = 0xe000; /* MSAA 4, COMPRESSED 32 BIT */
+ tile_flags = 0xfe00; /* NORMAL 32 BIT */
+ if (w * h >= 128 * 128 && 0)
+ tile_flags = 0xdb00; /* COMPRESSED 32 BIT, requires magic */
+ break;
+ }
+
+ /* For 3D textures, a mipmap is spanned by all the layers, for array
+ * textures and cube maps, each layer contains its own mipmaps.
+ */
+ for (l = 0; l <= pt->last_level; ++l) {
+ struct nvc0_miptree_level *lvl = &mt->level[l];
+ unsigned nbx = util_format_get_nblocksx(pt->format, w);
+ unsigned nby = util_format_get_nblocksy(pt->format, h);
+ unsigned blocksize = util_format_get_blocksize(pt->format);
+
+ lvl->offset = mt->total_size;
+ lvl->tile_mode = get_tile_dims(nbx, nby, d);
+ lvl->pitch = align(nbx * blocksize, NVC0_TILE_PITCH(lvl->tile_mode));
+
+ mt->total_size += lvl->pitch *
+ align(nby, NVC0_TILE_HEIGHT(lvl->tile_mode)) *
+ align(d, NVC0_TILE_DEPTH(lvl->tile_mode));
+
+ w = u_minify(w, 1);
+ h = u_minify(h, 1);
+ d = u_minify(d, 1);
+ }
+
+ if (pt->array_size > 1) {
+ mt->layer_stride = align(mt->total_size,
+ NVC0_TILE_SIZE(mt->level[0].tile_mode));
+ mt->total_size = mt->layer_stride * pt->array_size;
+ }
+
+ alloc_size = mt->total_size;
+ if (tile_flags == 0x1700)
+ alloc_size *= 3; /* HiZ, XXX: correct size */
+
+ ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, alloc_size,
+ mt->level[0].tile_mode, tile_flags,
+ &mt->base.bo);
+ if (ret) {
+ FREE(mt);
+ return NULL;
+ }
+ mt->base.domain = NOUVEAU_BO_VRAM;
+
+ return pt;
+}
+
+struct pipe_resource *
+nvc0_miptree_from_handle(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle)
+{
+ struct nvc0_miptree *mt;
+ unsigned stride;
+
+ /* only supports 2D, non-mipmapped textures for the moment */
+ if ((templ->target != PIPE_TEXTURE_2D &&
+ templ->target != PIPE_TEXTURE_RECT) ||
+ templ->last_level != 0 ||
+ templ->depth0 != 1 ||
+ templ->array_size > 1)
+ return NULL;
+
+ mt = CALLOC_STRUCT(nvc0_miptree);
+ if (!mt)
+ return NULL;
+
+ mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride);
+ if (mt->base.bo == NULL) {
+ FREE(mt);
+ return NULL;
+ }
+
+ mt->base.base = *templ;
+ mt->base.vtbl = &nvc0_miptree_vtbl;
+ pipe_reference_init(&mt->base.base.reference, 1);
+ mt->base.base.screen = pscreen;
+ mt->level[0].pitch = stride;
+ mt->level[0].offset = 0;
+ mt->level[0].tile_mode = mt->base.bo->tile_mode;
+
+ /* no need to adjust bo reference count */
+ return &mt->base.base;
+}
+
+
+/* Surface functions.
+ */
+
+struct pipe_surface *
+nvc0_miptree_surface_new(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_surface *templ)
+{
+ struct nvc0_miptree *mt = nvc0_miptree(pt); /* guaranteed */
+ struct nvc0_surface *ns;
+ struct pipe_surface *ps;
+ struct nvc0_miptree_level *lvl = &mt->level[templ->u.tex.level];
+
+ ns = CALLOC_STRUCT(nvc0_surface);
+ if (!ns)
+ return NULL;
+ ps = &ns->base;
+
+ pipe_reference_init(&ps->reference, 1);
+ pipe_resource_reference(&ps->texture, pt);
+ ps->context = pipe;
+ ps->format = pt->format;
+ ps->usage = templ->usage;
+ ps->u.tex.level = templ->u.tex.level;
+ ps->u.tex.first_layer = templ->u.tex.first_layer;
+ ps->u.tex.last_layer = templ->u.tex.last_layer;
+
+ ns->width = u_minify(pt->width0, ps->u.tex.level);
+ ns->height = u_minify(pt->height0, ps->u.tex.level);
+ ns->depth = ps->u.tex.last_layer - ps->u.tex.first_layer + 1;
+ ns->offset = lvl->offset;
+
+ /* comment says there are going to be removed, but they're used by the st */
+ ps->width = ns->width;
+ ps->height = ns->height;
+
+ if (mt->layout_3d) {
+ unsigned zslice = ps->u.tex.first_layer;
+
+ /* TODO: re-layout the texture to use only depth 1 tiles in this case: */
+ if (ns->depth > 1 && (zslice & (NVC0_TILE_DEPTH(lvl->tile_mode) - 1)))
+ NOUVEAU_ERR("Creating unsupported 3D surface of slices [%u:%u].\n",
+ zslice, ps->u.tex.last_layer);
+
+ ns->offset += calc_zslice_offset(lvl->tile_mode, zslice, lvl->pitch,
+ util_format_get_nblocksy(pt->format,
+ ns->height));
+ } else {
+ ns->offset += mt->layer_stride * ps->u.tex.first_layer;
+ }
+
+ return ps;
+}
+
+void
+nvc0_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps)
+{
+ struct nvc0_surface *s = nvc0_surface(ps);
+
+ pipe_resource_reference(&ps->texture, NULL);
+
+ FREE(s);
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_mm.c b/src/gallium/drivers/nvc0/nvc0_mm.c
new file mode 100644
index 0000000000..0629dad19c
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_mm.c
@@ -0,0 +1,274 @@
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_double_list.h"
+
+#include "nvc0_screen.h"
+
+#define MM_MIN_ORDER 7
+#define MM_MAX_ORDER 20
+
+#define MM_NUM_BUCKETS (MM_MAX_ORDER - MM_MIN_ORDER + 1)
+
+#define MM_MIN_SIZE (1 << MM_MIN_ORDER)
+#define MM_MAX_SIZE (1 << MM_MAX_ORDER)
+
+struct mm_bucket {
+ struct list_head free;
+ struct list_head used;
+ struct list_head full;
+ int num_free;
+};
+
+struct nvc0_mman {
+ struct nouveau_device *dev;
+ struct mm_bucket bucket[MM_NUM_BUCKETS];
+ uint32_t storage_type;
+ uint32_t domain;
+ uint64_t allocated;
+};
+
+struct mm_slab {
+ struct list_head head;
+ struct nouveau_bo *bo;
+ struct nvc0_mman *cache;
+ int order;
+ int count;
+ int free;
+ uint32_t bits[0];
+};
+
+static int
+mm_slab_alloc(struct mm_slab *slab)
+{
+ int i, n, b;
+
+ if (slab->free == 0)
+ return -1;
+
+ for (i = 0; i < (slab->count + 31) / 32; ++i) {
+ b = ffs(slab->bits[i]) - 1;
+ if (b >= 0) {
+ n = i * 32 + b;
+ assert(n < slab->count);
+ slab->free--;
+ slab->bits[i] &= ~(1 << b);
+ return n;
+ }
+ }
+ return -1;
+}
+
+static INLINE void
+mm_slab_free(struct mm_slab *slab, int i)
+{
+ assert(i < slab->count);
+ slab->bits[i / 32] |= 1 << (i % 32);
+ slab->free++;
+ assert(slab->free <= slab->count);
+}
+
+static INLINE int
+mm_get_order(uint32_t size)
+{
+ int s = __builtin_clz(size) ^ 31;
+
+ if (size > (1 << s))
+ s += 1;
+ return s;
+}
+
+static struct mm_bucket *
+mm_bucket_by_order(struct nvc0_mman *cache, int order)
+{
+ if (order > MM_MAX_ORDER)
+ return NULL;
+ return &cache->bucket[MAX2(order, MM_MIN_ORDER) - MM_MIN_ORDER];
+}
+
+static struct mm_bucket *
+mm_bucket_by_size(struct nvc0_mman *cache, unsigned size)
+{
+ return mm_bucket_by_order(cache, mm_get_order(size));
+}
+
+/* size of bo allocation for slab with chunks of (1 << chunk_order) bytes */
+static INLINE uint32_t
+mm_default_slab_size(unsigned chunk_order)
+{
+ assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER);
+
+ static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] =
+ {
+ 12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22
+ };
+
+ return 1 << slab_order[chunk_order - MM_MIN_ORDER];
+}
+
+static int
+mm_slab_new(struct nvc0_mman *cache, int chunk_order)
+{
+ struct mm_slab *slab;
+ int words, ret;
+ const uint32_t size = mm_default_slab_size(chunk_order);
+
+ words = ((size >> chunk_order) + 31) / 32;
+ assert(words);
+
+ slab = MALLOC(sizeof(struct mm_slab) + words * 4);
+ if (!slab)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ memset(&slab->bits[0], ~0, words * 4);
+
+ slab->bo = NULL;
+ ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size,
+ 0, cache->storage_type, &slab->bo);
+ if (ret) {
+ FREE(slab);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
+ LIST_INITHEAD(&slab->head);
+
+ slab->cache = cache;
+ slab->order = chunk_order;
+ slab->count = slab->free = size >> chunk_order;
+
+ LIST_ADD(&slab->head, &mm_bucket_by_order(cache, chunk_order)->free);
+
+ cache->allocated += size;
+
+ debug_printf("MM: new slab, total memory = %lu KiB\n",
+ cache->allocated / 1024);
+
+ return PIPE_OK;
+}
+
+/* @return token to identify slab or NULL if we just allocated a new bo */
+struct nvc0_mm_allocation *
+nvc0_mm_allocate(struct nvc0_mman *cache,
+ uint32_t size, struct nouveau_bo **bo, uint32_t *offset)
+{
+ struct mm_bucket *bucket;
+ struct mm_slab *slab;
+ struct nvc0_mm_allocation *alloc;
+ int ret;
+
+ bucket = mm_bucket_by_size(cache, size);
+ if (!bucket) {
+ ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size,
+ 0, cache->storage_type, bo);
+ if (ret)
+ debug_printf("bo_new(%x, %x): %i\n", size, cache->storage_type, ret);
+
+ *offset = 0;
+ return NULL;
+ }
+
+ if (!LIST_IS_EMPTY(&bucket->used)) {
+ slab = LIST_ENTRY(struct mm_slab, bucket->used.next, head);
+ } else {
+ if (LIST_IS_EMPTY(&bucket->free)) {
+ mm_slab_new(cache, MAX2(mm_get_order(size), MM_MIN_ORDER));
+ }
+ slab = LIST_ENTRY(struct mm_slab, bucket->free.next, head);
+
+ LIST_DEL(&slab->head);
+ LIST_ADD(&slab->head, &bucket->used);
+ }
+
+ *offset = mm_slab_alloc(slab) << slab->order;
+
+ alloc = MALLOC_STRUCT(nvc0_mm_allocation);
+ if (!alloc)
+ return NULL;
+
+ nouveau_bo_ref(slab->bo, bo);
+
+ if (slab->free == 0) {
+ LIST_DEL(&slab->head);
+ LIST_ADD(&slab->head, &bucket->full);
+ }
+
+ alloc->next = NULL;
+ alloc->offset = *offset;
+ alloc->priv = (void *)slab;
+
+ return alloc;
+}
+
+void
+nvc0_mm_free(struct nvc0_mm_allocation *alloc)
+{
+ struct mm_slab *slab = (struct mm_slab *)alloc->priv;
+ struct mm_bucket *bucket = mm_bucket_by_order(slab->cache, slab->order);
+
+ mm_slab_free(slab, alloc->offset >> slab->order);
+
+ if (slab->free == 1) {
+ LIST_DEL(&slab->head);
+
+ if (slab->count > 1)
+ LIST_ADDTAIL(&slab->head, &bucket->used);
+ else
+ LIST_ADDTAIL(&slab->head, &bucket->free);
+ }
+
+ FREE(alloc);
+}
+
+struct nvc0_mman *
+nvc0_mm_create(struct nouveau_device *dev, uint32_t domain,
+ uint32_t storage_type)
+{
+ struct nvc0_mman *cache = MALLOC_STRUCT(nvc0_mman);
+ int i;
+
+ if (!cache)
+ return NULL;
+
+ cache->dev = dev;
+ cache->domain = domain;
+ cache->storage_type = storage_type;
+ cache->allocated = 0;
+
+ for (i = 0; i < MM_NUM_BUCKETS; ++i) {
+ LIST_INITHEAD(&cache->bucket[i].free);
+ LIST_INITHEAD(&cache->bucket[i].used);
+ LIST_INITHEAD(&cache->bucket[i].full);
+ }
+
+ return cache;
+}
+
+static INLINE void
+nvc0_mm_free_slabs(struct list_head *head)
+{
+ struct mm_slab *slab, *next;
+
+ LIST_FOR_EACH_ENTRY_SAFE(slab, next, head, head) {
+ LIST_DEL(&slab->head);
+ nouveau_bo_ref(NULL, &slab->bo);
+ FREE(slab);
+ }
+}
+
+void
+nvc0_mm_destroy(struct nvc0_mman *cache)
+{
+ int i;
+
+ for (i = 0; i < MM_NUM_BUCKETS; ++i) {
+ if (!LIST_IS_EMPTY(&cache->bucket[i].used) ||
+ !LIST_IS_EMPTY(&cache->bucket[i].full))
+ debug_printf("WARNING: destroying GPU memory cache "
+ "with some buffers still in use\n");
+
+ nvc0_mm_free_slabs(&cache->bucket[i].free);
+ nvc0_mm_free_slabs(&cache->bucket[i].used);
+ nvc0_mm_free_slabs(&cache->bucket[i].full);
+ }
+}
+
diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c
new file mode 100644
index 0000000000..304a191976
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_pc.c
@@ -0,0 +1,693 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define NOUVEAU_DEBUG 1
+
+#include "nvc0_pc.h"
+#include "nvc0_program.h"
+
+boolean
+nvc0_insn_can_load(struct nv_instruction *nvi, int s,
+ struct nv_instruction *ld)
+{
+ int i;
+
+ if (ld->opcode == NV_OP_MOV && ld->src[0]->value->reg.file == NV_FILE_IMM) {
+ if (s > 1 || !(nvc0_op_info_table[nvi->opcode].immediate & (1 << s)))
+ return FALSE;
+ if (!(nvc0_op_info_table[nvi->opcode].immediate & 4))
+ if (ld->src[0]->value->reg.imm.u32 & 0xfff)
+ return FALSE;
+ } else
+ if (!(nvc0_op_info_table[nvi->opcode].memory & (1 << s)))
+ return FALSE;
+
+ if (ld->indirect >= 0)
+ return FALSE;
+
+ for (i = 0; i < 3 && nvi->src[i]; ++i)
+ if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
+ return FALSE;
+
+ return TRUE;
+}
+
+/* Return whether this instruction can be executed conditionally. */
+boolean
+nvc0_insn_is_predicateable(struct nv_instruction *nvi)
+{
+ int s;
+
+ if (!nv_op_predicateable(nvi->opcode))
+ return FALSE;
+ if (nvi->predicate >= 0)
+ return FALSE;
+ for (s = 0; s < 4 && nvi->src[s]; ++s)
+ if (nvi->src[s]->value->reg.file == NV_FILE_IMM)
+ return FALSE;
+ return TRUE;
+}
+
+int
+nvc0_insn_refcount(struct nv_instruction *nvi)
+{
+ int rc = 0;
+ int i;
+ for (i = 0; i < 5 && nvi->def[i]; ++i) {
+ if (!nvi->def[i])
+ return rc;
+ rc += nvi->def[i]->refc;
+ }
+ return rc;
+}
+
+int
+nvc0_pc_replace_value(struct nv_pc *pc,
+ struct nv_value *old_val,
+ struct nv_value *new_val)
+{
+ int i, n, s;
+
+ if (old_val == new_val)
+ return old_val->refc;
+
+ for (i = 0, n = 0; i < pc->num_refs; ++i) {
+ if (pc->refs[i]->value == old_val) {
+ ++n;
+ for (s = 0; s < 6 && pc->refs[i]->insn->src[s]; ++s)
+ if (pc->refs[i]->insn->src[s] == pc->refs[i])
+ break;
+ assert(s < 6);
+ nv_reference(pc, pc->refs[i]->insn, s, new_val);
+ }
+ }
+ return n;
+}
+
+struct nv_value *
+nvc0_pc_find_constant(struct nv_ref *ref)
+{
+ struct nv_value *src;
+
+ if (!ref)
+ return NULL;
+
+ src = ref->value;
+ while (src->insn && src->insn->opcode == NV_OP_MOV) {
+ assert(!src->insn->src[0]->mod);
+ src = src->insn->src[0]->value;
+ }
+ if ((src->reg.file == NV_FILE_IMM) ||
+ (src->insn &&
+ src->insn->opcode == NV_OP_LD &&
+ src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
+ src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15)))
+ return src;
+ return NULL;
+}
+
+struct nv_value *
+nvc0_pc_find_immediate(struct nv_ref *ref)
+{
+ struct nv_value *src = nvc0_pc_find_constant(ref);
+
+ return (src && src->reg.file == NV_FILE_IMM) ? src : NULL;
+}
+
+static void
+nv_pc_free_refs(struct nv_pc *pc)
+{
+ int i;
+ for (i = 0; i < pc->num_refs; i += 64)
+ FREE(pc->refs[i]);
+ FREE(pc->refs);
+}
+
+static const char *
+edge_name(ubyte type)
+{
+ switch (type) {
+ case CFG_EDGE_FORWARD: return "forward";
+ case CFG_EDGE_BACK: return "back";
+ case CFG_EDGE_LOOP_ENTER: return "loop";
+ case CFG_EDGE_LOOP_LEAVE: return "break";
+ case CFG_EDGE_FAKE: return "fake";
+ default:
+ return "?";
+ }
+}
+
+void
+nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f,
+ void *priv)
+{
+ struct nv_basic_block *bb[64], *bbb[16], *b;
+ int j, p, pp;
+
+ bb[0] = root;
+ p = 1;
+ pp = 0;
+
+ while (p > 0) {
+ b = bb[--p];
+ b->priv = 0;
+
+ for (j = 1; j >= 0; --j) {
+ if (!b->out[j])
+ continue;
+
+ switch (b->out_kind[j]) {
+ case CFG_EDGE_BACK:
+ continue;
+ case CFG_EDGE_FORWARD:
+ case CFG_EDGE_FAKE:
+ if (++b->out[j]->priv == b->out[j]->num_in)
+ bb[p++] = b->out[j];
+ break;
+ case CFG_EDGE_LOOP_ENTER:
+ bb[p++] = b->out[j];
+ break;
+ case CFG_EDGE_LOOP_LEAVE:
+ bbb[pp++] = b->out[j];
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ f(priv, b);
+
+ if (!p) {
+ p = pp;
+ for (; pp > 0; --pp)
+ bb[pp - 1] = bbb[pp - 1];
+ }
+ }
+}
+
+static void
+nv_do_print_function(void *priv, struct nv_basic_block *b)
+{
+ struct nv_instruction *i;
+
+ debug_printf("=== BB %i ", b->id);
+ if (b->out[0])
+ debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id);
+ if (b->out[1])
+ debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id);
+ debug_printf("===\n");
+
+ i = b->phi;
+ if (!i)
+ i = b->entry;
+ for (; i; i = i->next)
+ nvc0_print_instruction(i);
+}
+
+void
+nvc0_print_function(struct nv_basic_block *root)
+{
+ if (root->subroutine)
+ debug_printf("SUBROUTINE %i\n", root->subroutine);
+ else
+ debug_printf("MAIN\n");
+
+ nvc0_pc_pass_in_order(root, nv_do_print_function, root);
+}
+
+void
+nvc0_print_program(struct nv_pc *pc)
+{
+ int i;
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i])
+ nvc0_print_function(pc->root[i]);
+}
+
+#if NOUVEAU_DEBUG > 1
+static void
+nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b)
+{
+ int i;
+
+ b->pass_seq = pc->pass_seq;
+
+ fprintf(f, "\t%i [shape=box]\n", b->id);
+
+ for (i = 0; i < 2; ++i) {
+ if (!b->out[i])
+ continue;
+ switch (b->out_kind[i]) {
+ case CFG_EDGE_FORWARD:
+ fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
+ break;
+ case CFG_EDGE_LOOP_ENTER:
+ fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id);
+ break;
+ case CFG_EDGE_LOOP_LEAVE:
+ fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id);
+ break;
+ case CFG_EDGE_BACK:
+ fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
+ continue;
+ case CFG_EDGE_FAKE:
+ fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ if (b->out[i]->pass_seq < pc->pass_seq)
+ nv_do_print_cfgraph(pc, f, b->out[i]);
+ }
+}
+
+/* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */
+static void
+nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr)
+{
+ FILE *f;
+
+ f = fopen(filepath, "a");
+ if (!f)
+ return;
+
+ fprintf(f, "digraph G {\n");
+
+ ++pc->pass_seq;
+
+ nv_do_print_cfgraph(pc, f, pc->root[subr]);
+
+ fprintf(f, "}\n");
+
+ fclose(f);
+}
+#endif
+
+static INLINE void
+nvc0_pc_print_binary(struct nv_pc *pc)
+{
+ unsigned i;
+
+ NOUVEAU_DBG("nvc0_pc_print_binary(%u ops)\n", pc->emit_size / 8);
+
+ for (i = 0; i < pc->emit_size / 4; i += 2) {
+ debug_printf("0x%08x ", pc->emit[i + 0]);
+ debug_printf("0x%08x ", pc->emit[i + 1]);
+ if ((i % 16) == 15)
+ debug_printf("\n");
+ }
+ debug_printf("\n");
+}
+
+static int
+nvc0_emit_program(struct nv_pc *pc)
+{
+ uint32_t *code = pc->emit;
+ int n;
+
+ NOUVEAU_DBG("emitting program: size = %u\n", pc->emit_size);
+
+ pc->emit_pos = 0;
+ for (n = 0; n < pc->num_blocks; ++n) {
+ struct nv_instruction *i;
+ struct nv_basic_block *b = pc->bb_list[n];
+
+ for (i = b->entry; i; i = i->next) {
+ nvc0_emit_instruction(pc, i);
+ pc->emit += 2;
+ pc->emit_pos += 8;
+ }
+ }
+ assert(pc->emit == &code[pc->emit_size / 4]);
+
+ pc->emit[0] = 0x00001de7;
+ pc->emit[1] = 0x80000000;
+ pc->emit_size += 8;
+
+ pc->emit = code;
+
+#ifdef NOUVEAU_DEBUG
+ nvc0_pc_print_binary(pc);
+#else
+ debug_printf("not printing binary\n");
+#endif
+ return 0;
+}
+
+int
+nvc0_generate_code(struct nvc0_translation_info *ti)
+{
+ struct nv_pc *pc;
+ int ret;
+ int i;
+
+ pc = CALLOC_STRUCT(nv_pc);
+ if (!pc)
+ return 1;
+
+ pc->is_fragprog = ti->prog->type == PIPE_SHADER_FRAGMENT;
+
+ pc->root = CALLOC(ti->num_subrs + 1, sizeof(pc->root[0]));
+ if (!pc->root) {
+ FREE(pc);
+ return 1;
+ }
+ pc->num_subroutines = ti->num_subrs;
+
+ ret = nvc0_tgsi_to_nc(pc, ti);
+ if (ret)
+ goto out;
+#if NOUVEAU_DEBUG > 1
+ nvc0_print_program(pc);
+#endif
+
+ pc->opt_reload_elim = ti->require_stores ? FALSE : TRUE;
+
+ /* optimization */
+ ret = nvc0_pc_exec_pass0(pc);
+ if (ret)
+ goto out;
+#ifdef NOUVEAU_DEBUG
+ nvc0_print_program(pc);
+#endif
+
+ /* register allocation */
+ ret = nvc0_pc_exec_pass1(pc);
+ if (ret)
+ goto out;
+#if NOUVEAU_DEBUG > 1
+ nvc0_print_program(pc);
+ nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0);
+#endif
+
+ /* prepare for emission */
+ ret = nvc0_pc_exec_pass2(pc);
+ if (ret)
+ goto out;
+ assert(!(pc->emit_size % 8));
+
+ pc->emit = CALLOC(pc->emit_size / 4 + 2, 4);
+ if (!pc->emit) {
+ ret = 3;
+ goto out;
+ }
+ ret = nvc0_emit_program(pc);
+ if (ret)
+ goto out;
+
+ ti->prog->code = pc->emit;
+ ti->prog->code_base = 0;
+ ti->prog->code_size = pc->emit_size;
+ ti->prog->parm_size = 0;
+
+ ti->prog->max_gpr = MAX2(4, pc->max_reg[NV_FILE_GPR] + 1);
+
+ ti->prog->relocs = pc->reloc_entries;
+ ti->prog->num_relocs = pc->num_relocs;
+
+ NOUVEAU_DBG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
+
+out:
+ nv_pc_free_refs(pc);
+
+ for (i = 0; i < pc->num_blocks; ++i)
+ FREE(pc->bb_list[i]);
+ if (pc->root)
+ FREE(pc->root);
+ if (ret) {
+ /* on success, these will be referenced by struct nvc0_program */
+ if (pc->emit)
+ FREE(pc->emit);
+ if (pc->immd_buf)
+ FREE(pc->immd_buf);
+ if (pc->reloc_entries)
+ FREE(pc->reloc_entries);
+ }
+ FREE(pc);
+ return ret;
+}
+
+static void
+nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i)
+{
+ if (!b->phi) {
+ i->prev = NULL;
+ b->phi = i;
+ i->next = b->entry;
+ if (b->entry) {
+ assert(!b->entry->prev && b->exit);
+ b->entry->prev = i;
+ } else {
+ b->entry = i;
+ b->exit = i;
+ }
+ } else {
+ assert(b->entry);
+ if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */
+ assert(b->entry == b->exit);
+ b->entry->next = i;
+ i->prev = b->entry;
+ b->entry = i;
+ b->exit = i;
+ } else { /* insert before entry */
+ assert(b->entry->prev && b->exit);
+ i->next = b->entry;
+ i->prev = b->entry->prev;
+ b->entry->prev = i;
+ i->prev->next = i;
+ }
+ }
+}
+
+void
+nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i)
+{
+ if (i->opcode == NV_OP_PHI) {
+ nvbb_insert_phi(b, i);
+ } else {
+ i->prev = b->exit;
+ if (b->exit)
+ b->exit->next = i;
+ b->exit = i;
+ if (!b->entry)
+ b->entry = i;
+ else
+ if (i->prev && i->prev->opcode == NV_OP_PHI)
+ b->entry = i;
+ }
+
+ i->bb = b;
+ b->num_instructions++;
+}
+
+void
+nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
+{
+ if (!at->next) {
+ nvc0_insn_append(at->bb, ni);
+ return;
+ }
+ ni->next = at->next;
+ ni->prev = at;
+ ni->next->prev = ni;
+ ni->prev->next = ni;
+}
+
+void
+nvc0_insn_insert_before(struct nv_instruction *at, struct nv_instruction *ni)
+{
+ nvc0_insn_insert_after(at, ni);
+ nvc0_insns_permute(at, ni);
+}
+
+void
+nvc0_insn_delete(struct nv_instruction *nvi)
+{
+ struct nv_basic_block *b = nvi->bb;
+ int s;
+
+ /* debug_printf("REM: "); nv_print_instruction(nvi); */
+
+ for (s = 0; s < 6 && nvi->src[s]; ++s)
+ nv_reference(NULL, nvi, s, NULL);
+
+ if (nvi->next)
+ nvi->next->prev = nvi->prev;
+ else {
+ assert(nvi == b->exit);
+ b->exit = nvi->prev;
+ }
+
+ if (nvi->prev)
+ nvi->prev->next = nvi->next;
+
+ if (nvi == b->entry) {
+ /* PHIs don't get hooked to b->entry */
+ b->entry = nvi->next;
+ assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI);
+ }
+
+ if (nvi == b->phi) {
+ if (nvi->opcode != NV_OP_PHI)
+ NOUVEAU_DBG("NOTE: b->phi points to non-PHI instruction\n");
+
+ assert(!nvi->prev);
+ if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
+ b->phi = NULL;
+ else
+ b->phi = nvi->next;
+ }
+}
+
+void
+nvc0_insns_permute(struct nv_instruction *i1, struct nv_instruction *i2)
+{
+ struct nv_basic_block *b = i1->bb;
+
+ assert(i1->opcode != NV_OP_PHI &&
+ i2->opcode != NV_OP_PHI);
+ assert(i1->next == i2);
+
+ if (b->exit == i2)
+ b->exit = i1;
+
+ if (b->entry == i1)
+ b->entry = i2;
+
+ i2->prev = i1->prev;
+ i1->next = i2->next;
+ i2->next = i1;
+ i1->prev = i2;
+
+ if (i2->prev)
+ i2->prev->next = i2;
+ if (i1->next)
+ i1->next->prev = i1;
+}
+
+void
+nvc0_bblock_attach(struct nv_basic_block *parent,
+ struct nv_basic_block *b, ubyte edge_kind)
+{
+ assert(b->num_in < 8);
+
+ if (parent->out[0]) {
+ assert(!parent->out[1]);
+ parent->out[1] = b;
+ parent->out_kind[1] = edge_kind;
+ } else {
+ parent->out[0] = b;
+ parent->out_kind[0] = edge_kind;
+ }
+
+ b->in[b->num_in] = parent;
+ b->in_kind[b->num_in++] = edge_kind;
+}
+
+/* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
+
+boolean
+nvc0_bblock_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
+{
+ int j;
+
+ if (b == d)
+ return TRUE;
+
+ for (j = 0; j < b->num_in; ++j)
+ if ((b->in_kind[j] != CFG_EDGE_BACK) &&
+ !nvc0_bblock_dominated_by(b->in[j], d))
+ return FALSE;
+
+ return j ? TRUE : FALSE;
+}
+
+/* check if @bf (future) can be reached from @bp (past), stop at @bt */
+boolean
+nvc0_bblock_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
+ struct nv_basic_block *bt)
+{
+ struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b;
+ int i, p, n;
+
+ p = 0;
+ n = 1;
+ q[0] = bp;
+
+ while (p < n) {
+ b = q[p++];
+
+ if (b == bf)
+ break;
+ if (b == bt)
+ continue;
+ assert(n <= (1024 - 2));
+
+ for (i = 0; i < 2; ++i) {
+ if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) {
+ q[n] = b->out[i];
+ q[n++]->priv = 1;
+ }
+ }
+ }
+ for (--n; n >= 0; --n)
+ q[n]->priv = 0;
+
+ return (b == bf);
+}
+
+static struct nv_basic_block *
+nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
+{
+ struct nv_basic_block *out;
+ int i;
+
+ if (!nvc0_bblock_dominated_by(df, b)) {
+ for (i = 0; i < df->num_in; ++i) {
+ if (df->in_kind[i] == CFG_EDGE_BACK)
+ continue;
+ if (nvc0_bblock_dominated_by(df->in[i], b))
+ return df;
+ }
+ }
+ for (i = 0; i < 2 && df->out[i]; ++i) {
+ if (df->out_kind[i] == CFG_EDGE_BACK)
+ continue;
+ if ((out = nvbb_find_dom_frontier(b, df->out[i])))
+ return out;
+ }
+ return NULL;
+}
+
+struct nv_basic_block *
+nvc0_bblock_dom_frontier(struct nv_basic_block *b)
+{
+ struct nv_basic_block *df;
+ int i;
+
+ for (i = 0; i < 2 && b->out[i]; ++i)
+ if ((df = nvbb_find_dom_frontier(b, b->out[i])))
+ return df;
+ return NULL;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h
new file mode 100644
index 0000000000..969cc68c59
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_pc.h
@@ -0,0 +1,653 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NVC0_COMPILER_H__
+#define __NVC0_COMPILER_H__
+
+#include <stdio.h>
+
+#ifndef NOUVEAU_DBG
+#ifdef NOUVEAU_DEBUG
+# define NOUVEAU_DBG(args...) debug_printf(args);
+#else
+# define NOUVEAU_DBG(args...)
+#endif
+#endif
+
+#ifndef NOUVEAU_ERR
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args);
+#endif
+
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_double_list.h"
+
+/* pseudo opcodes */
+#define NV_OP_UNDEF 0
+#define NV_OP_BIND 1
+#define NV_OP_MERGE 2
+#define NV_OP_PHI 3
+#define NV_OP_SELECT 4
+#define NV_OP_NOP 5
+
+/**
+ * BIND forces source operand i into the same register as destination operand i,
+ * and the operands will be assigned consecutive registers (needed for TEX)
+ * SELECT forces its multiple source operands and its destination operand into
+ * one and the same register.
+ */
+
+/* base opcodes */
+#define NV_OP_LD 6
+#define NV_OP_ST 7
+#define NV_OP_MOV 8
+#define NV_OP_AND 9
+#define NV_OP_OR 10
+#define NV_OP_XOR 11
+#define NV_OP_SHL 12
+#define NV_OP_SHR 13
+#define NV_OP_NOT 14
+#define NV_OP_SET 15
+#define NV_OP_ADD 16
+#define NV_OP_SUB 17
+#define NV_OP_MUL 18
+#define NV_OP_MAD 19
+#define NV_OP_ABS 20
+#define NV_OP_NEG 21
+#define NV_OP_MAX 22
+#define NV_OP_MIN 23
+#define NV_OP_CVT 24
+#define NV_OP_CEIL 25
+#define NV_OP_FLOOR 26
+#define NV_OP_TRUNC 27
+#define NV_OP_SAD 28
+
+/* shader opcodes */
+#define NV_OP_VFETCH 29
+#define NV_OP_PFETCH 30
+#define NV_OP_EXPORT 31
+#define NV_OP_LINTERP 32
+#define NV_OP_PINTERP 33
+#define NV_OP_EMIT 34
+#define NV_OP_RESTART 35
+#define NV_OP_TEX 36
+#define NV_OP_TXB 37
+#define NV_OP_TXL 38
+#define NV_OP_TXF 39
+#define NV_OP_TXQ 40
+#define NV_OP_QUADOP 41
+#define NV_OP_DFDX 42
+#define NV_OP_DFDY 43
+#define NV_OP_KIL 44
+
+/* control flow opcodes */
+#define NV_OP_BRA 45
+#define NV_OP_CALL 46
+#define NV_OP_RET 47
+#define NV_OP_EXIT 48
+#define NV_OP_BREAK 49
+#define NV_OP_BREAKADDR 50
+#define NV_OP_JOINAT 51
+#define NV_OP_JOIN 52
+
+/* typed opcodes */
+#define NV_OP_ADD_F32 NV_OP_ADD
+#define NV_OP_ADD_B32 53
+#define NV_OP_MUL_F32 NV_OP_MUL
+#define NV_OP_MUL_B32 54
+#define NV_OP_ABS_F32 NV_OP_ABS
+#define NV_OP_ABS_S32 55
+#define NV_OP_NEG_F32 NV_OP_NEG
+#define NV_OP_NEG_S32 56
+#define NV_OP_MAX_F32 NV_OP_MAX
+#define NV_OP_MAX_S32 57
+#define NV_OP_MAX_U32 58
+#define NV_OP_MIN_F32 NV_OP_MIN
+#define NV_OP_MIN_S32 59
+#define NV_OP_MIN_U32 60
+#define NV_OP_SET_F32 61
+#define NV_OP_SET_S32 62
+#define NV_OP_SET_U32 63
+#define NV_OP_SAR 64
+#define NV_OP_RCP 65
+#define NV_OP_RSQ 66
+#define NV_OP_LG2 67
+#define NV_OP_SIN 68
+#define NV_OP_COS 69
+#define NV_OP_EX2 70
+#define NV_OP_PRESIN 71
+#define NV_OP_PREEX2 72
+#define NV_OP_SAT 73
+
+/* newly added opcodes */
+#define NV_OP_SET_F32_AND 74
+#define NV_OP_SET_F32_OR 75
+#define NV_OP_SET_F32_XOR 76
+#define NV_OP_SELP 77
+#define NV_OP_SLCT 78
+#define NV_OP_SLCT_F32 NV_OP_SLCT
+#define NV_OP_SLCT_S32 79
+#define NV_OP_SLCT_U32 80
+#define NV_OP_SUB_F32 NV_OP_SUB
+#define NV_OP_SUB_S32 81
+#define NV_OP_MAD_F32 NV_OP_MAD
+#define NV_OP_FSET_F32 82
+#define NV_OP_TXG 83
+
+#define NV_OP_COUNT 84
+
+/* nv50 files omitted */
+#define NV_FILE_GPR 0
+#define NV_FILE_COND 1
+#define NV_FILE_PRED 2
+#define NV_FILE_IMM 16
+#define NV_FILE_MEM_S 32
+#define NV_FILE_MEM_V 34
+#define NV_FILE_MEM_A 35
+#define NV_FILE_MEM_L 48
+#define NV_FILE_MEM_G 64
+#define NV_FILE_MEM_C(i) (80 + i)
+
+#define NV_IS_MEMORY_FILE(f) ((f) >= NV_FILE_MEM_S)
+
+#define NV_MOD_NEG 1
+#define NV_MOD_ABS 2
+#define NV_MOD_NOT 4
+#define NV_MOD_SAT 8
+
+#define NV_TYPE_U8 0x00
+#define NV_TYPE_S8 0x01
+#define NV_TYPE_U16 0x02
+#define NV_TYPE_S16 0x03
+#define NV_TYPE_U32 0x04
+#define NV_TYPE_S32 0x05
+#define NV_TYPE_P32 0x07
+#define NV_TYPE_F32 0x09
+#define NV_TYPE_F64 0x0b
+#define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4))
+#define NV_TYPE_ANY 0xff
+
+#define NV_TYPE_ISINT(t) ((t) < 7)
+#define NV_TYPE_ISSGD(t) ((t) & 1)
+
+#define NV_CC_FL 0x0
+#define NV_CC_LT 0x1
+#define NV_CC_EQ 0x2
+#define NV_CC_LE 0x3
+#define NV_CC_GT 0x4
+#define NV_CC_NE 0x5
+#define NV_CC_GE 0x6
+#define NV_CC_U 0x8
+#define NV_CC_TR 0xf
+#define NV_CC_O 0x10
+#define NV_CC_C 0x11
+#define NV_CC_A 0x12
+#define NV_CC_S 0x13
+
+#define NV_PC_MAX_INSTRUCTIONS 2048
+#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4)
+
+#define NV_PC_MAX_BASIC_BLOCKS 1024
+
+struct nv_op_info {
+ uint base; /* e.g. ADD_S32 -> ADD */
+ char name[12];
+ uint8_t type;
+ uint8_t mods;
+ unsigned flow : 1;
+ unsigned commutative : 1;
+ unsigned vector : 1;
+ unsigned predicate : 1;
+ unsigned pseudo : 1;
+ unsigned immediate : 3;
+ unsigned memory : 3;
+};
+
+extern struct nv_op_info nvc0_op_info_table[];
+
+#define NV_BASEOP(op) (nvc0_op_info_table[op].base)
+#define NV_OPTYPE(op) (nvc0_op_info_table[op].type)
+
+static INLINE uint
+nv_op_base(uint opcode)
+{
+ return nvc0_op_info_table[opcode].base;
+}
+
+static INLINE boolean
+nv_is_texture_op(uint opcode)
+{
+ return (opcode >= NV_OP_TEX && opcode <= NV_OP_TXQ);
+}
+
+static INLINE boolean
+nv_is_vector_op(uint opcode)
+{
+ return nvc0_op_info_table[opcode].vector ? TRUE : FALSE;
+}
+
+static INLINE boolean
+nv_op_commutative(uint opcode)
+{
+ return nvc0_op_info_table[opcode].commutative ? TRUE : FALSE;
+}
+
+static INLINE uint8_t
+nv_op_supported_src_mods(uint opcode)
+{
+ return nvc0_op_info_table[opcode].mods;
+}
+
+static INLINE boolean
+nv_op_predicateable(uint opcode)
+{
+ return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE;
+}
+
+static INLINE uint
+nv_type_order(ubyte type)
+{
+ switch (type & 0xf) {
+ case NV_TYPE_U8:
+ case NV_TYPE_S8:
+ return 0;
+ case NV_TYPE_U16:
+ case NV_TYPE_S16:
+ return 1;
+ case NV_TYPE_U32:
+ case NV_TYPE_F32:
+ case NV_TYPE_S32:
+ case NV_TYPE_P32:
+ return 2;
+ case NV_TYPE_F64:
+ return 3;
+ }
+ assert(0);
+ return 0;
+}
+
+static INLINE uint
+nv_type_sizeof(ubyte type)
+{
+ if (type & 0xf0)
+ return (1 << nv_type_order(type)) * (type >> 4);
+ return 1 << nv_type_order(type);
+}
+
+static INLINE uint
+nv_type_sizeof_base(ubyte type)
+{
+ return 1 << nv_type_order(type);
+}
+
+struct nv_reg {
+ uint32_t address; /* for memory locations */
+ int id; /* for registers */
+ ubyte file;
+ ubyte size;
+ union {
+ int32_t s32;
+ int64_t s64;
+ uint64_t u64;
+ uint32_t u32;
+ float f32;
+ double f64;
+ } imm;
+};
+
+struct nv_range {
+ struct nv_range *next;
+ int bgn;
+ int end;
+};
+
+struct nv_ref;
+
+struct nv_value {
+ struct nv_reg reg;
+ struct nv_instruction *insn;
+ struct nv_value *join;
+ struct nv_ref *last_use;
+ int n;
+ struct nv_range *livei;
+ int refc;
+ struct nv_value *next;
+ struct nv_value *prev;
+};
+
+struct nv_ref {
+ struct nv_value *value;
+ struct nv_instruction *insn;
+ struct list_head list; /* connects uses of the same value */
+ uint8_t mod;
+ uint8_t flags;
+};
+
+struct nv_basic_block;
+
+struct nv_instruction {
+ struct nv_instruction *next;
+ struct nv_instruction *prev;
+ uint opcode;
+ uint serial;
+
+ struct nv_value *def[5];
+ struct nv_ref *src[6];
+
+ int8_t predicate; /* index of predicate src */
+ int8_t indirect; /* index of pointer src */
+
+ union {
+ struct {
+ uint8_t t; /* TIC binding */
+ uint8_t s; /* TSC binding */
+ } tex;
+ struct {
+ uint8_t d; /* output type */
+ uint8_t s; /* input type */
+ } cvt;
+ } ext;
+
+ struct nv_basic_block *bb;
+ struct nv_basic_block *target; /* target block of control flow insn */
+
+ unsigned cc : 5; /* condition code */
+ unsigned fixed : 1; /* don't optimize away (prematurely) */
+ unsigned terminator : 1;
+ unsigned join : 1;
+ unsigned set_cond : 4; /* 2nd byte */
+ unsigned saturate : 1;
+ unsigned centroid : 1;
+ unsigned flat : 1;
+ unsigned patch : 1;
+ unsigned lanes : 4; /* 3rd byte */
+ unsigned tex_dim : 2;
+ unsigned tex_array : 1;
+ unsigned tex_cube : 1;
+ unsigned tex_shadow : 1; /* 4th byte */
+ unsigned tex_live : 1;
+ unsigned tex_mask : 4;
+
+ uint8_t quadop;
+};
+
+static INLINE int
+nvi_vector_size(struct nv_instruction *nvi)
+{
+ int i;
+ assert(nvi);
+ for (i = 0; i < 5 && nvi->def[i]; ++i);
+ return i;
+}
+
+#define CFG_EDGE_FORWARD 0
+#define CFG_EDGE_BACK 1
+#define CFG_EDGE_LOOP_ENTER 2
+#define CFG_EDGE_LOOP_LEAVE 4
+#define CFG_EDGE_FAKE 8
+
+/* 'WALL' edge means where reachability check doesn't follow */
+/* 'LOOP' edge means just having to do with loops */
+#define IS_LOOP_EDGE(k) ((k) & 7)
+#define IS_WALL_EDGE(k) ((k) & 9)
+
+struct nv_basic_block {
+ struct nv_instruction *entry; /* first non-phi instruction */
+ struct nv_instruction *exit;
+ struct nv_instruction *phi; /* very first instruction */
+ int num_instructions;
+
+ struct nv_basic_block *out[2]; /* no indirect branches -> 2 */
+ struct nv_basic_block *in[8]; /* hope that suffices */
+ uint num_in;
+ ubyte out_kind[2];
+ ubyte in_kind[8];
+
+ int id;
+ int subroutine;
+ uint priv; /* reset to 0 after you're done */
+ uint pass_seq;
+
+ uint32_t emit_pos; /* position, size in emitted code (in bytes) */
+ uint32_t emit_size;
+
+ uint32_t live_set[NV_PC_MAX_VALUES / 32];
+};
+
+struct nvc0_translation_info;
+
+struct nv_pc {
+ struct nv_basic_block **root;
+ struct nv_basic_block *current_block;
+ struct nv_basic_block *parent_block;
+
+ int loop_nesting_bound;
+ uint pass_seq;
+
+ struct nv_value values[NV_PC_MAX_VALUES];
+ struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS];
+ struct nv_ref **refs;
+ struct nv_basic_block *bb_list[NV_PC_MAX_BASIC_BLOCKS];
+ int num_values;
+ int num_instructions;
+ int num_refs;
+ int num_blocks;
+ int num_subroutines;
+
+ int max_reg[4];
+
+ uint32_t *immd_buf; /* populated on emit */
+ unsigned immd_count;
+
+ uint32_t *emit;
+ uint32_t emit_size;
+ uint32_t emit_pos;
+
+ void *reloc_entries;
+ unsigned num_relocs;
+
+ /* optimization enables */
+ boolean opt_reload_elim;
+ boolean is_fragprog;
+};
+
+void nvc0_insn_append(struct nv_basic_block *, struct nv_instruction *);
+void nvc0_insn_insert_before(struct nv_instruction *, struct nv_instruction *);
+void nvc0_insn_insert_after(struct nv_instruction *, struct nv_instruction *);
+
+static INLINE struct nv_instruction *
+nv_alloc_instruction(struct nv_pc *pc, uint opcode)
+{
+ struct nv_instruction *insn;
+
+ insn = &pc->instructions[pc->num_instructions++];
+ assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS);
+
+ insn->opcode = opcode;
+ insn->cc = 0;
+ insn->indirect = -1;
+ insn->predicate = -1;
+
+ return insn;
+}
+
+static INLINE struct nv_instruction *
+new_instruction(struct nv_pc *pc, uint opcode)
+{
+ struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
+
+ nvc0_insn_append(pc->current_block, insn);
+ return insn;
+}
+
+static INLINE struct nv_instruction *
+new_instruction_at(struct nv_pc *pc, struct nv_instruction *at, uint opcode)
+{
+ struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
+
+ nvc0_insn_insert_after(at, insn);
+ return insn;
+}
+
+static INLINE struct nv_value *
+new_value(struct nv_pc *pc, ubyte file, ubyte size)
+{
+ struct nv_value *value = &pc->values[pc->num_values];
+
+ assert(pc->num_values < NV_PC_MAX_VALUES - 1);
+
+ value->n = pc->num_values++;
+ value->join = value;
+ value->reg.id = -1;
+ value->reg.file = file;
+ value->reg.size = size;
+ return value;
+}
+
+static INLINE struct nv_value *
+new_value_like(struct nv_pc *pc, struct nv_value *like)
+{
+ return new_value(pc, like->reg.file, like->reg.size);
+}
+
+static INLINE struct nv_ref *
+new_ref(struct nv_pc *pc, struct nv_value *val)
+{
+ int i;
+ struct nv_ref *ref;
+
+ if ((pc->num_refs % 64) == 0) {
+ const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *);
+ const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *);
+
+ pc->refs = REALLOC(pc->refs, old_size, new_size);
+
+ ref = CALLOC(64, sizeof(struct nv_ref));
+ for (i = 0; i < 64; ++i)
+ pc->refs[pc->num_refs + i] = &ref[i];
+ }
+
+ ref = pc->refs[pc->num_refs++];
+ ref->value = val;
+
+ LIST_INITHEAD(&ref->list);
+
+ ++val->refc;
+ return ref;
+}
+
+static INLINE struct nv_basic_block *
+new_basic_block(struct nv_pc *pc)
+{
+ struct nv_basic_block *bb;
+
+ if (pc->num_blocks >= NV_PC_MAX_BASIC_BLOCKS)
+ return NULL;
+
+ bb = CALLOC_STRUCT(nv_basic_block);
+
+ bb->id = pc->num_blocks;
+ pc->bb_list[pc->num_blocks++] = bb;
+ return bb;
+}
+
+static INLINE void
+nv_reference(struct nv_pc *pc,
+ struct nv_instruction *nvi, int c, struct nv_value *s)
+{
+ struct nv_ref **d = &nvi->src[c];
+ assert(c < 6);
+
+ if (*d) {
+ --(*d)->value->refc;
+ LIST_DEL(&(*d)->list);
+ }
+
+ if (s) {
+ if (!*d) {
+ *d = new_ref(pc, s);
+ (*d)->insn = nvi;
+ } else {
+ LIST_DEL(&(*d)->list);
+ (*d)->value = s;
+ ++(s->refc);
+ }
+ if (!s->last_use)
+ s->last_use = *d;
+ else
+ LIST_ADDTAIL(&s->last_use->list, &(*d)->list);
+
+ s->last_use = *d;
+ (*d)->insn = nvi;
+ } else {
+ *d = NULL;
+ }
+}
+
+/* nvc0_emit.c */
+void nvc0_emit_instruction(struct nv_pc *, struct nv_instruction *);
+
+/* nvc0_print.c */
+const char *nvc0_opcode_name(uint opcode);
+void nvc0_print_instruction(struct nv_instruction *);
+
+/* nvc0_pc.c */
+void nvc0_print_function(struct nv_basic_block *root);
+void nvc0_print_program(struct nv_pc *);
+
+boolean nvc0_insn_can_load(struct nv_instruction *, int s,
+ struct nv_instruction *);
+boolean nvc0_insn_is_predicateable(struct nv_instruction *);
+
+int nvc0_insn_refcount(struct nv_instruction *);
+void nvc0_insn_delete(struct nv_instruction *);
+void nvc0_insns_permute(struct nv_instruction *prev, struct nv_instruction *);
+
+void nvc0_bblock_attach(struct nv_basic_block *parent,
+ struct nv_basic_block *child, ubyte edge_kind);
+boolean nvc0_bblock_dominated_by(struct nv_basic_block *,
+ struct nv_basic_block *);
+boolean nvc0_bblock_reachable_by(struct nv_basic_block *future,
+ struct nv_basic_block *past,
+ struct nv_basic_block *final);
+struct nv_basic_block *nvc0_bblock_dom_frontier(struct nv_basic_block *);
+
+int nvc0_pc_replace_value(struct nv_pc *pc,
+ struct nv_value *old_val,
+ struct nv_value *new_val);
+
+struct nv_value *nvc0_pc_find_immediate(struct nv_ref *);
+struct nv_value *nvc0_pc_find_constant(struct nv_ref *);
+
+typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b);
+
+void nvc0_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *);
+
+int nvc0_pc_exec_pass0(struct nv_pc *pc);
+int nvc0_pc_exec_pass1(struct nv_pc *pc);
+int nvc0_pc_exec_pass2(struct nv_pc *pc);
+
+int nvc0_tgsi_to_nc(struct nv_pc *, struct nvc0_translation_info *);
+
+#endif // NV50_COMPILER_H
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c
new file mode 100644
index 0000000000..db8055d91c
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c
@@ -0,0 +1,979 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nvc0_pc.h"
+#include "nvc0_program.h"
+
+#define NVC0_FIXUP_CODE_RELOC 0
+#define NVC0_FIXUP_DATA_RELOC 1
+
+struct nvc0_fixup {
+ uint8_t type;
+ int8_t shift;
+ uint32_t mask;
+ uint32_t data;
+ uint32_t ofst;
+};
+
+void
+nvc0_relocate_program(struct nvc0_program *prog,
+ uint32_t code_base,
+ uint32_t data_base)
+{
+ struct nvc0_fixup *f = (struct nvc0_fixup *)prog->relocs;
+ unsigned i;
+
+ for (i = 0; i < prog->num_relocs; ++i) {
+ uint32_t data;
+
+ switch (f[i].type) {
+ case NVC0_FIXUP_CODE_RELOC: data = code_base + f[i].data; break;
+ case NVC0_FIXUP_DATA_RELOC: data = data_base + f[i].data; break;
+ default:
+ data = f[i].data;
+ break;
+ }
+ data = (f[i].shift < 0) ? (data >> -f[i].shift) : (data << f[i].shift);
+
+ prog->code[f[i].ofst / 4] &= ~f[i].mask;
+ prog->code[f[i].ofst / 4] |= data & f[i].mask;
+ }
+}
+
+static void
+create_fixup(struct nv_pc *pc, uint8_t ty,
+ int w, uint32_t data, uint32_t m, int s)
+{
+ struct nvc0_fixup *f;
+
+ const unsigned size = sizeof(struct nvc0_fixup);
+ const unsigned n = pc->num_relocs;
+
+ if (!(n % 8))
+ pc->reloc_entries = REALLOC(pc->reloc_entries, n * size, (n + 8) * size);
+
+ f = (struct nvc0_fixup *)pc->reloc_entries;
+
+ f[n].ofst = pc->emit_pos + w * 4;
+ f[n].type = ty;
+ f[n].data = data;
+ f[n].mask = m;
+ f[n].shift = s;
+
+ ++pc->num_relocs;
+}
+
+static INLINE ubyte
+SSIZE(struct nv_instruction *nvi, int s)
+{
+ return nvi->src[s]->value->reg.size;
+}
+
+static INLINE ubyte
+DSIZE(struct nv_instruction *nvi, int d)
+{
+ return nvi->def[d]->reg.size;
+}
+
+static INLINE struct nv_reg *
+SREG(struct nv_ref *ref)
+{
+ if (!ref)
+ return NULL;
+ return &ref->value->join->reg;
+}
+
+static INLINE struct nv_reg *
+DREG(struct nv_value *val)
+{
+ if (!val)
+ return NULL;
+ return &val->join->reg;
+}
+
+static INLINE ubyte
+SFILE(struct nv_instruction *nvi, int s)
+{
+ return nvi->src[s]->value->reg.file;
+}
+
+static INLINE ubyte
+DFILE(struct nv_instruction *nvi, int d)
+{
+ return nvi->def[0]->reg.file;
+}
+
+static INLINE void
+SID(struct nv_pc *pc, struct nv_ref *ref, int pos)
+{
+ pc->emit[pos / 32] |= (SREG(ref) ? SREG(ref)->id : 63) << (pos % 32);
+}
+
+static INLINE void
+DID(struct nv_pc *pc, struct nv_value *val, int pos)
+{
+ pc->emit[pos / 32] |= (DREG(val) ? DREG(val)->id : 63) << (pos % 32);
+}
+
+static INLINE uint32_t
+get_immd_u32(struct nv_ref *ref) /* XXX: dependent on [0]:2 */
+{
+ assert(ref->value->reg.file == NV_FILE_IMM);
+ return ref->value->reg.imm.u32;
+}
+
+static INLINE void
+set_immd_u32_l(struct nv_pc *pc, uint32_t u32)
+{
+ pc->emit[0] |= (u32 & 0x3f) << 26;
+ pc->emit[1] |= u32 >> 6;
+}
+
+static INLINE void
+set_immd_u32(struct nv_pc *pc, uint32_t u32)
+{
+ if ((pc->emit[0] & 0xf) == 0x2) {
+ set_immd_u32_l(pc, u32);
+ } else
+ if ((pc->emit[0] & 0xf) == 0x3) {
+ assert(!(pc->emit[1] & 0xc000));
+ pc->emit[1] |= 0xc000;
+ assert(!(u32 & 0xfff00000));
+ set_immd_u32_l(pc, u32);
+ } else {
+ assert(!(pc->emit[1] & 0xc000));
+ pc->emit[1] |= 0xc000;
+ assert(!(u32 & 0xfff));
+ set_immd_u32_l(pc, u32 >> 12);
+ }
+}
+
+static INLINE void
+set_immd(struct nv_pc *pc, struct nv_instruction *i, int s)
+{
+ set_immd_u32(pc, get_immd_u32(i->src[s]));
+}
+
+static INLINE void
+DVS(struct nv_pc *pc, struct nv_instruction *i)
+{
+ uint s = i->def[0]->reg.size;
+ int n;
+ for (n = 1; n < 4 && i->def[n]; ++n)
+ s += i->def[n]->reg.size;
+ pc->emit[0] |= ((s / 4) - 1) << 5;
+}
+
+static INLINE void
+SVS(struct nv_pc *pc, struct nv_ref *src)
+{
+ pc->emit[0] |= (SREG(src)->size / 4 - 1) << 5;
+}
+
+static void
+set_pred(struct nv_pc *pc, struct nv_instruction *i)
+{
+ if (i->predicate >= 0) {
+ SID(pc, i->src[i->predicate], 6);
+ if (i->cc)
+ pc->emit[0] |= 0x2000; /* negate */
+ } else {
+ pc->emit[0] |= 0x1c00;
+ }
+}
+
+static INLINE void
+set_address_16(struct nv_pc *pc, struct nv_ref *src)
+{
+ pc->emit[0] |= (src->value->reg.address & 0x003f) << 26;
+ pc->emit[1] |= (src->value->reg.address & 0xffc0) >> 6;
+}
+
+static INLINE unsigned
+const_space_index(struct nv_instruction *i, int s)
+{
+ return SFILE(i, s) - NV_FILE_MEM_C(0);
+}
+
+static void
+emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op)
+{
+ pc->emit[0] = 0x00000007;
+ pc->emit[1] = op << 24;
+
+ if (op == 0x40 || (op >= 0x80 && op <= 0x98)) {
+ /* bra, exit, ret or kil */
+ pc->emit[0] |= 0x1e0;
+ set_pred(pc, i);
+ }
+
+ if (i->target) {
+ int32_t pcrel = i->target->emit_pos - (pc->emit_pos + 8);
+
+ /* we will need relocations only for global functions */
+ /*
+ create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 0, pos, 26, 0xfc000000);
+ create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 1, pos, -6, 0x0001ffff);
+ */
+
+ pc->emit[0] |= (pcrel & 0x3f) << 26;
+ pc->emit[1] |= (pcrel >> 6) & 0x1ffff;
+ }
+}
+
+/* doesn't work for vfetch, export, ld, st, mov ... */
+static void
+emit_form_0(struct nv_pc *pc, struct nv_instruction *i)
+{
+ int s;
+
+ set_pred(pc, i);
+
+ DID(pc, i->def[0], 14);
+
+ for (s = 0; s < 3 && i->src[s]; ++s) {
+ if (SFILE(i, s) >= NV_FILE_MEM_C(0) &&
+ SFILE(i, s) <= NV_FILE_MEM_C(15)) {
+ assert(!(pc->emit[1] & 0xc000));
+ assert(s <= 1);
+ pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10);
+ set_address_16(pc, i->src[s]);
+ } else
+ if (SFILE(i, s) == NV_FILE_GPR) {
+ SID(pc, i->src[s], s ? ((s == 2) ? 49 : 26) : 20);
+ } else
+ if (SFILE(i, s) == NV_FILE_IMM) {
+ assert(!(pc->emit[1] & 0xc000));
+ assert(s == 1 || i->opcode == NV_OP_MOV);
+ set_immd(pc, i, s);
+ }
+ }
+}
+
+static void
+emit_form_1(struct nv_pc *pc, struct nv_instruction *i)
+{
+ int s;
+
+ set_pred(pc, i);
+
+ DID(pc, i->def[0], 14);
+
+ for (s = 0; s < 1 && i->src[s]; ++s) {
+ if (SFILE(i, s) >= NV_FILE_MEM_C(0) &&
+ SFILE(i, s) <= NV_FILE_MEM_C(15)) {
+ assert(!(pc->emit[1] & 0xc000));
+ assert(s <= 1);
+ pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10);
+ set_address_16(pc, i->src[s]);
+ } else
+ if (SFILE(i, s) == NV_FILE_GPR) {
+ SID(pc, i->src[s], 26);
+ } else
+ if (SFILE(i, s) == NV_FILE_IMM) {
+ assert(!(pc->emit[1] & 0xc000));
+ assert(s == 1 || i->opcode == NV_OP_MOV);
+ set_immd(pc, i, s);
+ }
+ }
+}
+
+static void
+emit_neg_abs_1_2(struct nv_pc *pc, struct nv_instruction *i)
+{
+ if (i->src[0]->mod & NV_MOD_ABS)
+ pc->emit[0] |= 1 << 7;
+ if (i->src[0]->mod & NV_MOD_NEG)
+ pc->emit[0] |= 1 << 9;
+ if (i->src[1]->mod & NV_MOD_ABS)
+ pc->emit[0] |= 1 << 6;
+ if (i->src[1]->mod & NV_MOD_NEG)
+ pc->emit[0] |= 1 << 8;
+}
+
+static void
+emit_add_f32(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000000;
+ pc->emit[1] = 0x50000000;
+
+ emit_form_0(pc, i);
+
+ emit_neg_abs_1_2(pc, i);
+
+ if (i->saturate)
+ pc->emit[1] |= 1 << 17;
+}
+
+static void
+emit_mul_f32(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000000;
+ pc->emit[1] = 0x58000000;
+
+ emit_form_0(pc, i);
+
+ if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG)
+ pc->emit[1] |= 1 << 25;
+
+ if (i->saturate)
+ pc->emit[0] |= 1 << 5;
+}
+
+static void
+emit_mad_f32(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000000;
+ pc->emit[1] = 0x30000000;
+
+ emit_form_0(pc, i);
+
+ if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG)
+ pc->emit[0] |= 1 << 9;
+
+ if (i->src[2]->mod & NV_MOD_NEG)
+ pc->emit[0] |= 1 << 8;
+
+ if (i->saturate)
+ pc->emit[0] |= 1 << 5;
+}
+
+static void
+emit_minmax(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000000;
+ pc->emit[1] = 0x08000000;
+
+ if (NV_BASEOP(i->opcode) == NV_OP_MAX)
+ pc->emit[1] |= 0x001e0000;
+ else
+ pc->emit[1] |= 0x000e0000; /* predicate ? */
+
+ emit_form_0(pc, i);
+
+ emit_neg_abs_1_2(pc, i);
+
+ switch (i->opcode) {
+ case NV_OP_MIN_U32:
+ case NV_OP_MAX_U32:
+ pc->emit[0] |= 3;
+ break;
+ case NV_OP_MIN_S32:
+ case NV_OP_MAX_S32:
+ pc->emit[0] |= 3 | (1 << 5);
+ break;
+ case NV_OP_MIN_F32:
+ case NV_OP_MAX_F32:
+ default:
+ break;
+ }
+}
+
+static void
+emit_tex(struct nv_pc *pc, struct nv_instruction *i)
+{
+ int src1 = i->tex_array + i->tex_dim + i->tex_cube;
+
+ pc->emit[0] = 0x00000086;
+ pc->emit[1] = 0x80000000;
+
+ switch (i->opcode) {
+ case NV_OP_TEX: pc->emit[1] = 0x80000000; break;
+ case NV_OP_TXB: pc->emit[1] = 0x84000000; break;
+ case NV_OP_TXL: pc->emit[1] = 0x86000000; break;
+ case NV_OP_TXF: pc->emit[1] = 0x90000000; break;
+ case NV_OP_TXG: pc->emit[1] = 0xe0000000; break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (i->tex_array)
+ pc->emit[1] |= 0x00080000; /* layer index is u16, first value of SRC0 */
+ if (i->tex_shadow)
+ pc->emit[1] |= 0x01000000; /* shadow is part of SRC1, after bias/lod */
+
+ set_pred(pc, i);
+
+ DID(pc, i->def[0], 14);
+ SID(pc, i->src[0], 20);
+ SID(pc, i->src[src1], 26); /* may be NULL -> $r63 */
+
+ pc->emit[1] |= i->tex_mask << 14;
+ pc->emit[1] |= (i->tex_dim - 1) << 20;
+ if (i->tex_cube)
+ pc->emit[1] |= 3 << 20;
+
+ assert(i->ext.tex.s < 16);
+
+ pc->emit[1] |= i->ext.tex.t;
+ pc->emit[1] |= i->ext.tex.s << 8;
+
+ if (i->tex_live)
+ pc->emit[0] |= 1 << 9;
+}
+
+/* 0: cos, 1: sin, 2: ex2, 3: lg2, 4: rcp, 5: rsqrt */
+static void
+emit_flop(struct nv_pc *pc, struct nv_instruction *i, ubyte op)
+{
+ pc->emit[0] = 0x00000000;
+ pc->emit[1] = 0xc8000000;
+
+ set_pred(pc, i);
+
+ DID(pc, i->def[0], 14);
+ SID(pc, i->src[0], 20);
+
+ pc->emit[0] |= op << 26;
+
+ if (op >= 4) {
+ if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 9;
+ if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 7;
+ } else {
+ assert(!i->src[0]->mod);
+ }
+}
+
+static void
+emit_quadop(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000000;
+ pc->emit[1] = 0x48000000;
+
+ set_pred(pc, i);
+
+ assert(SFILE(i, 0) == NV_FILE_GPR && SFILE(i, 1) == NV_FILE_GPR);
+
+ DID(pc, i->def[0], 14);
+ SID(pc, i->src[0], 20);
+ SID(pc, i->src[0], 26);
+
+ pc->emit[0] |= i->lanes << 6; /* l0, l1, l2, l3, dx, dy */
+ pc->emit[1] |= i->quadop;
+}
+
+static void
+emit_ddx(struct nv_pc *pc, struct nv_instruction *i)
+{
+ i->quadop = 0x99;
+ i->lanes = 4;
+ emit_quadop(pc, i);
+}
+
+static void
+emit_ddy(struct nv_pc *pc, struct nv_instruction *i)
+{
+ i->quadop = 0xa5;
+ i->lanes = 5;
+ emit_quadop(pc, i);
+}
+
+/* preparation op (preex2, presin / convert to fixed point) */
+static void
+emit_preop(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000000;
+ pc->emit[1] = 0x60000000;
+
+ if (i->opcode == NV_OP_PREEX2)
+ pc->emit[0] |= 0x20;
+
+ emit_form_1(pc, i);
+
+ if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 8;
+ if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 6;
+}
+
+static void
+emit_shift(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000003;
+
+ switch (i->opcode) {
+ case NV_OP_SAR:
+ pc->emit[0] |= 0x20; /* fall through */
+ case NV_OP_SHR:
+ pc->emit[1] = 0x58000000;
+ break;
+ case NV_OP_SHL:
+ default:
+ pc->emit[1] = 0x60000000;
+ break;
+ }
+
+ emit_form_0(pc, i);
+}
+
+static void
+emit_bitop(struct nv_pc *pc, struct nv_instruction *i)
+{
+ if (SFILE(i, 1) == NV_FILE_IMM) {
+ pc->emit[0] = 0x00000002;
+ pc->emit[1] = 0x38000000;
+ } else {
+ pc->emit[0] = 0x00000003;
+ pc->emit[1] = 0x68000000;
+ }
+
+ switch (i->opcode) {
+ case NV_OP_OR:
+ pc->emit[0] |= 0x40;
+ break;
+ case NV_OP_XOR:
+ pc->emit[0] |= 0x80;
+ break;
+ case NV_OP_AND:
+ default:
+ break;
+ }
+
+ emit_form_0(pc, i);
+}
+
+static void
+emit_set(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000000;
+
+ switch (i->opcode) {
+ case NV_OP_SET_S32:
+ pc->emit[0] |= 0x20; /* fall through */
+ case NV_OP_SET_U32:
+ pc->emit[0] |= 0x3;
+ pc->emit[1] = 0x100e0000;
+ break;
+ case NV_OP_SET_F32_AND:
+ pc->emit[1] = 0x18000000;
+ break;
+ case NV_OP_SET_F32_OR:
+ pc->emit[1] = 0x18200000;
+ break;
+ case NV_OP_SET_F32_XOR:
+ pc->emit[1] = 0x18400000;
+ break;
+ case NV_OP_FSET_F32:
+ pc->emit[0] |= 0x20; /* fall through */
+ case NV_OP_SET_F32:
+ default:
+ pc->emit[1] = 0x180e0000;
+ break;
+ }
+
+ if (DFILE(i, 0) == NV_FILE_PRED) {
+ pc->emit[0] |= 0x1c000;
+ pc->emit[1] += 0x08000000;
+ }
+
+ pc->emit[1] |= i->set_cond << 23;
+
+ emit_form_0(pc, i);
+
+ emit_neg_abs_1_2(pc, i); /* maybe assert that U/S32 don't use mods */
+}
+
+static void
+emit_selp(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000004;
+ pc->emit[1] = 0x20000000;
+
+ emit_form_0(pc, i);
+
+ if (i->cc || (i->src[2]->mod & NV_MOD_NOT))
+ pc->emit[1] |= 1 << 20;
+}
+
+static void
+emit_slct(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000000;
+
+ switch (i->opcode) {
+ case NV_OP_SLCT_S32:
+ pc->emit[0] |= 0x20; /* fall through */
+ case NV_OP_SLCT_U32:
+ pc->emit[0] |= 0x3;
+ pc->emit[1] = 0x30000000;
+ break;
+ case NV_OP_SLCT_F32:
+ default:
+ pc->emit[1] = 0x38000000;
+ break;
+ }
+
+ emit_form_0(pc, i);
+
+ pc->emit[1] |= i->set_cond << 23;
+}
+
+static void
+emit_cvt(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000004;
+ pc->emit[1] = 0x10000000;
+
+ if (i->opcode != NV_OP_CVT)
+ i->ext.cvt.d = i->ext.cvt.s = NV_OPTYPE(i->opcode);
+
+ switch (i->ext.cvt.d) {
+ case NV_TYPE_F32:
+ switch (i->ext.cvt.s) {
+ case NV_TYPE_F32: pc->emit[1] = 0x10000000; break;
+ case NV_TYPE_S32: pc->emit[0] |= 0x200;
+ case NV_TYPE_U32: pc->emit[1] = 0x18000000; break;
+ }
+ break;
+ case NV_TYPE_S32: pc->emit[0] |= 0x80;
+ case NV_TYPE_U32:
+ switch (i->ext.cvt.s) {
+ case NV_TYPE_F32: pc->emit[1] = 0x14000000; break;
+ case NV_TYPE_S32: pc->emit[0] |= 0x200;
+ case NV_TYPE_U32: pc->emit[1] = 0x1c000000; break;
+ }
+ break;
+ default:
+ assert(!"cvt: unknown type");
+ break;
+ }
+
+ if (i->opcode == NV_OP_FLOOR)
+ pc->emit[1] |= 0x00020000;
+ else
+ if (i->opcode == NV_OP_CEIL)
+ pc->emit[1] |= 0x00040000;
+ else
+ if (i->opcode == NV_OP_TRUNC)
+ pc->emit[1] |= 0x00060000;
+
+ if (i->saturate || i->opcode == NV_OP_SAT)
+ pc->emit[0] |= 0x20;
+
+ if (NV_BASEOP(i->opcode) == NV_OP_ABS || i->src[0]->mod & NV_MOD_ABS)
+ pc->emit[0] |= 1 << 6;
+ if (NV_BASEOP(i->opcode) == NV_OP_NEG || i->src[0]->mod & NV_MOD_NEG)
+ pc->emit[0] |= 1 << 8;
+
+ pc->emit[0] |= util_logbase2(DREG(i->def[0])->size) << 20;
+ pc->emit[0] |= util_logbase2(SREG(i->src[0])->size) << 23;
+
+ emit_form_1(pc, i);
+}
+
+static void
+emit_interp(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000000;
+ pc->emit[1] = 0xc07e0000;
+
+ DID(pc, i->def[0], 14);
+
+ set_pred(pc, i);
+
+ if (i->indirect)
+ SID(pc, i->src[i->indirect], 20);
+ else
+ SID(pc, NULL, 20);
+
+ if (i->opcode == NV_OP_PINTERP) {
+ pc->emit[0] |= 0x040;
+ SID(pc, i->src[1], 26);
+ } else {
+ SID(pc, NULL, 26);
+ }
+
+ pc->emit[1] |= i->src[0]->value->reg.address & 0xffff;
+
+ if (i->centroid)
+ pc->emit[0] |= 0x100;
+ else
+ if (i->flat)
+ pc->emit[0] |= 0x080;
+}
+
+static void
+emit_vfetch(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x03f00006;
+ pc->emit[1] = 0x06000000 | i->src[0]->value->reg.address;
+ if (i->patch)
+ pc->emit[0] |= 0x100;
+
+ set_pred(pc, i);
+
+ DVS(pc, i);
+ DID(pc, i->def[0], 14);
+
+ SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 26);
+}
+
+static void
+emit_export(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000006;
+ pc->emit[1] = 0x0a000000;
+ if (i->patch)
+ pc->emit[0] |= 0x100;
+
+ set_pred(pc, i);
+
+ assert(SFILE(i, 0) == NV_FILE_MEM_V);
+ assert(SFILE(i, 1) == NV_FILE_GPR);
+
+ SID(pc, i->src[1], 26); /* register source */
+ SVS(pc, i->src[0]);
+
+ pc->emit[1] |= i->src[0]->value->reg.address & 0xfff;
+
+ SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20);
+}
+
+static void
+emit_mov(struct nv_pc *pc, struct nv_instruction *i)
+{
+ if (i->opcode == NV_OP_MOV)
+ i->lanes = 0xf;
+
+ if (SFILE(i, 0) == NV_FILE_IMM) {
+ pc->emit[0] = 0x000001e2;
+ pc->emit[1] = 0x18000000;
+ } else
+ if (SFILE(i, 0) == NV_FILE_PRED) {
+ pc->emit[0] = 0x1c000004;
+ pc->emit[1] = 0x080e0000;
+ } else {
+ pc->emit[0] = 0x00000004 | (i->lanes << 5);
+ pc->emit[1] = 0x28000000;
+ }
+
+ emit_form_1(pc, i);
+}
+
+static void
+emit_ldst_size(struct nv_pc *pc, struct nv_instruction *i)
+{
+ assert(NV_IS_MEMORY_FILE(SFILE(i, 0)));
+
+ switch (SSIZE(i, 0)) {
+ case 1:
+ if (NV_TYPE_ISSGD(i->ext.cvt.s))
+ pc->emit[0] |= 0x20;
+ break;
+ case 2:
+ pc->emit[0] |= 0x40;
+ if (NV_TYPE_ISSGD(i->ext.cvt.s))
+ pc->emit[0] |= 0x20;
+ break;
+ case 4: pc->emit[0] |= 0x80; break;
+ case 8: pc->emit[0] |= 0xa0; break;
+ case 16: pc->emit[0] |= 0xc0; break;
+ default:
+ NOUVEAU_ERR("invalid load/store size %u\n", SSIZE(i, 0));
+ break;
+ }
+}
+
+static void
+emit_ld_const(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000006;
+ pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10);
+
+ emit_ldst_size(pc, i);
+
+ set_pred(pc, i);
+ set_address_16(pc, i->src[0]);
+
+ SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20);
+ DID(pc, i->def[0], 14);
+}
+
+static void
+emit_ld(struct nv_pc *pc, struct nv_instruction *i)
+{
+ if (SFILE(i, 0) >= NV_FILE_MEM_C(0) &&
+ SFILE(i, 0) <= NV_FILE_MEM_C(15)) {
+ if (SSIZE(i, 0) == 4 && i->indirect < 0) {
+ i->lanes = 0xf;
+ emit_mov(pc, i);
+ } else {
+ emit_ld_const(pc, i);
+ }
+ } else {
+ NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i, 0));
+ abort();
+ }
+}
+
+static void
+emit_st(struct nv_pc *pc, struct nv_instruction *i)
+{
+ NOUVEAU_ERR("emit_st: not handled yet\n");
+ abort();
+}
+
+void
+nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i)
+{
+ debug_printf("EMIT: "); nvc0_print_instruction(i);
+
+ switch (i->opcode) {
+ case NV_OP_VFETCH:
+ emit_vfetch(pc, i);
+ break;
+ case NV_OP_EXPORT:
+ if (!pc->is_fragprog)
+ emit_export(pc, i);
+ break;
+ case NV_OP_MOV:
+ emit_mov(pc, i);
+ break;
+ case NV_OP_LD:
+ emit_ld(pc, i);
+ break;
+ case NV_OP_ST:
+ emit_st(pc, i);
+ break;
+ case NV_OP_LINTERP:
+ case NV_OP_PINTERP:
+ emit_interp(pc, i);
+ break;
+ case NV_OP_ADD_F32:
+ emit_add_f32(pc, i);
+ break;
+ case NV_OP_AND:
+ case NV_OP_OR:
+ case NV_OP_XOR:
+ emit_bitop(pc, i);
+ break;
+ case NV_OP_CVT:
+ case NV_OP_ABS_F32:
+ case NV_OP_ABS_S32:
+ case NV_OP_NEG_F32:
+ case NV_OP_NEG_S32:
+ case NV_OP_SAT:
+ case NV_OP_CEIL:
+ case NV_OP_FLOOR:
+ case NV_OP_TRUNC:
+ emit_cvt(pc, i);
+ break;
+ case NV_OP_DFDX:
+ emit_ddx(pc, i);
+ break;
+ case NV_OP_DFDY:
+ emit_ddy(pc, i);
+ break;
+ case NV_OP_COS:
+ emit_flop(pc, i, 0);
+ break;
+ case NV_OP_SIN:
+ emit_flop(pc, i, 1);
+ break;
+ case NV_OP_EX2:
+ emit_flop(pc, i, 2);
+ break;
+ case NV_OP_LG2:
+ emit_flop(pc, i, 3);
+ break;
+ case NV_OP_RCP:
+ emit_flop(pc, i, 4);
+ break;
+ case NV_OP_RSQ:
+ emit_flop(pc, i, 5);
+ break;
+ case NV_OP_PRESIN:
+ case NV_OP_PREEX2:
+ emit_preop(pc, i);
+ break;
+ case NV_OP_MAD_F32:
+ emit_mad_f32(pc, i);
+ break;
+ case NV_OP_MAX_F32:
+ case NV_OP_MAX_S32:
+ case NV_OP_MAX_U32:
+ case NV_OP_MIN_F32:
+ case NV_OP_MIN_S32:
+ case NV_OP_MIN_U32:
+ emit_minmax(pc, i);
+ break;
+ case NV_OP_MUL_F32:
+ emit_mul_f32(pc, i);
+ break;
+ case NV_OP_SET_F32:
+ case NV_OP_SET_F32_AND:
+ case NV_OP_SET_F32_OR:
+ case NV_OP_SET_F32_XOR:
+ case NV_OP_SET_S32:
+ case NV_OP_SET_U32:
+ case NV_OP_FSET_F32:
+ emit_set(pc, i);
+ break;
+ case NV_OP_SHL:
+ case NV_OP_SHR:
+ case NV_OP_SAR:
+ emit_shift(pc, i);
+ break;
+ case NV_OP_TEX:
+ case NV_OP_TXB:
+ case NV_OP_TXL:
+ emit_tex(pc, i);
+ break;
+ case NV_OP_BRA:
+ emit_flow(pc, i, 0x40);
+ break;
+ case NV_OP_CALL:
+ emit_flow(pc, i, 0x50);
+ break;
+ case NV_OP_JOINAT:
+ emit_flow(pc, i, 0x60);
+ break;
+ case NV_OP_EXIT:
+ emit_flow(pc, i, 0x80);
+ break;
+ case NV_OP_RET:
+ emit_flow(pc, i, 0x90);
+ break;
+ case NV_OP_KIL:
+ emit_flow(pc, i, 0x98);
+ break;
+ case NV_OP_JOIN:
+ case NV_OP_NOP:
+ pc->emit[0] = 0x00003de4;
+ pc->emit[1] = 0x40000000;
+ break;
+ case NV_OP_SELP:
+ emit_selp(pc, i);
+ break;
+ case NV_OP_SLCT_F32:
+ case NV_OP_SLCT_S32:
+ case NV_OP_SLCT_U32:
+ emit_slct(pc, i);
+ break;
+ default:
+ NOUVEAU_ERR("unhandled NV_OP: %d\n", i->opcode);
+ abort();
+ break;
+ }
+
+ if (i->join)
+ pc->emit[0] |= 0x10;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
new file mode 100644
index 0000000000..acc72bff14
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
@@ -0,0 +1,1236 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nvc0_pc.h"
+#include "nvc0_program.h"
+
+#define DESCEND_ARBITRARY(j, f) \
+do { \
+ b->pass_seq = ctx->pc->pass_seq; \
+ \
+ for (j = 0; j < 2; ++j) \
+ if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
+ f(ctx, b->out[j]); \
+} while (0)
+
+static INLINE boolean
+registers_interfere(struct nv_value *a, struct nv_value *b)
+{
+ if (a->reg.file != b->reg.file)
+ return FALSE;
+ if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file))
+ return FALSE;
+
+ assert(a->join->reg.id >= 0 && b->join->reg.id >= 0);
+
+ if (a->join->reg.id < b->join->reg.id) {
+ return (a->join->reg.id + a->reg.size >= b->join->reg.id);
+ } else
+ if (a->join->reg.id > b->join->reg.id) {
+ return (b->join->reg.id + b->reg.size >= a->join->reg.id);
+ }
+
+ return FALSE;
+}
+
+static INLINE boolean
+values_equal(struct nv_value *a, struct nv_value *b)
+{
+ if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
+ return FALSE;
+ if (NV_IS_MEMORY_FILE(a->reg.file))
+ return a->reg.address == b->reg.address;
+ else
+ return a->join->reg.id == b->join->reg.id;
+}
+
+#if 0
+static INLINE boolean
+inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b)
+{
+ int si, di;
+
+ for (di = 0; di < 4 && a->def[di]; ++di)
+ for (si = 0; si < 5 && b->src[si]; ++si)
+ if (registers_interfere(a->def[di], b->src[si]->value))
+ return FALSE;
+
+ return TRUE;
+}
+
+/* Check whether we can swap the order of the instructions,
+ * where a & b may be either the earlier or the later one.
+ */
+static boolean
+inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b)
+{
+ return inst_commutation_check(a, b) && inst_commutation_check(b, a);
+}
+#endif
+
+static INLINE boolean
+inst_removable(struct nv_instruction *nvi)
+{
+ if (nvi->opcode == NV_OP_ST)
+ return FALSE;
+ return (!(nvi->terminator ||
+ nvi->join ||
+ nvi->target ||
+ nvi->fixed ||
+ nvc0_insn_refcount(nvi)));
+}
+
+static INLINE boolean
+inst_is_noop(struct nv_instruction *nvi)
+{
+ if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND)
+ return TRUE;
+ if (nvi->terminator || nvi->join)
+ return FALSE;
+ if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
+ return TRUE;
+ if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
+ return FALSE;
+ if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
+ return FALSE;
+
+ if (nvi->src[0]->value->join->reg.id < 0) {
+ NOUVEAU_DBG("inst_is_noop: orphaned value detected\n");
+ return TRUE;
+ }
+
+ if (nvi->opcode == NV_OP_SELECT)
+ if (!values_equal(nvi->def[0], nvi->src[1]->value))
+ return FALSE;
+ return values_equal(nvi->def[0], nvi->src[0]->value);
+}
+
+struct nv_pass {
+ struct nv_pc *pc;
+ int n;
+ void *priv;
+};
+
+static int
+nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
+
+static void
+nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
+{
+ struct nv_pc *pc = (struct nv_pc *)priv;
+ struct nv_basic_block *in;
+ struct nv_instruction *nvi, *next;
+ int j;
+
+ for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
+
+ if (j >= 0) {
+ in = pc->bb_list[j];
+
+ /* check for no-op branches (BRA $PC+8) */
+ if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
+ in->emit_size -= 8;
+ pc->emit_size -= 8;
+
+ for (++j; j < pc->num_blocks; ++j)
+ pc->bb_list[j]->emit_pos -= 8;
+
+ nvc0_insn_delete(in->exit);
+ }
+ b->emit_pos = in->emit_pos + in->emit_size;
+ }
+
+ pc->bb_list[pc->num_blocks++] = b;
+
+ /* visit node */
+
+ for (nvi = b->entry; nvi; nvi = next) {
+ next = nvi->next;
+ if (inst_is_noop(nvi) ||
+ (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) {
+ nvc0_insn_delete(nvi);
+ } else
+ b->emit_size += 8;
+ }
+ pc->emit_size += b->emit_size;
+
+#ifdef NOUVEAU_DEBUG
+ if (!b->entry)
+ debug_printf("BB:%i is now empty\n", b->id);
+ else
+ debug_printf("BB:%i size = %u\n", b->id, b->emit_size);
+#endif
+}
+
+static int
+nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
+{
+ struct nv_pass pass;
+
+ pass.pc = pc;
+
+ pc->pass_seq++;
+ nv_pass_flatten(&pass, root);
+
+ nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
+
+ return 0;
+}
+
+int
+nvc0_pc_exec_pass2(struct nv_pc *pc)
+{
+ int i, ret;
+
+ NOUVEAU_DBG("preparing %u blocks for emission\n", pc->num_blocks);
+
+ pc->num_blocks = 0; /* will reorder bb_list */
+
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
+ return ret;
+ return 0;
+}
+
+static INLINE boolean
+is_cspace_load(struct nv_instruction *nvi)
+{
+ if (!nvi)
+ return FALSE;
+ assert(nvi->indirect != 0);
+ return (nvi->opcode == NV_OP_LD &&
+ nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
+ nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
+}
+
+static INLINE boolean
+is_immd32_load(struct nv_instruction *nvi)
+{
+ if (!nvi)
+ return FALSE;
+ return (nvi->opcode == NV_OP_MOV &&
+ nvi->src[0]->value->reg.file == NV_FILE_IMM &&
+ nvi->src[0]->value->reg.size == 4);
+}
+
+static INLINE void
+check_swap_src_0_1(struct nv_instruction *nvi)
+{
+ static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
+
+ struct nv_ref *src0 = nvi->src[0];
+ struct nv_ref *src1 = nvi->src[1];
+
+ if (!nv_op_commutative(nvi->opcode))
+ return;
+ assert(src0 && src1 && src0->value && src1->value);
+
+ if (is_cspace_load(src0->value->insn)) {
+ if (!is_cspace_load(src1->value->insn)) {
+ nvi->src[0] = src1;
+ nvi->src[1] = src0;
+ }
+ } else
+ if (is_immd32_load(src0->value->insn)) {
+ if (!is_cspace_load(src1->value->insn) &&
+ !is_immd32_load(src1->value->insn)) {
+ nvi->src[0] = src1;
+ nvi->src[1] = src0;
+ }
+ }
+
+ if (nvi->src[0] != src0 && nvi->opcode == NV_OP_SET)
+ nvi->set_cond = cc_swapped[nvi->set_cond];
+}
+
+static void
+nvi_set_indirect_load(struct nv_pc *pc,
+ struct nv_instruction *nvi, struct nv_value *val)
+{
+ for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect];
+ ++nvi->indirect);
+ assert(nvi->indirect < 6);
+ nv_reference(pc, nvi, nvi->indirect, val);
+}
+
+static int
+nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *nvi, *ld;
+ int s;
+
+ for (nvi = b->entry; nvi; nvi = nvi->next) {
+ check_swap_src_0_1(nvi);
+
+ for (s = 0; s < 3 && nvi->src[s]; ++s) {
+ ld = nvi->src[s]->value->insn;
+ if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV))
+ continue;
+ if (!nvc0_insn_can_load(nvi, s, ld))
+ continue;
+
+ /* fold it ! */
+ nv_reference(ctx->pc, nvi, s, ld->src[0]->value);
+ if (ld->indirect >= 0)
+ nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value);
+
+ if (!nvc0_insn_refcount(ld))
+ nvc0_insn_delete(ld);
+ }
+ }
+ DESCEND_ARBITRARY(s, nvc0_pass_fold_loads);
+
+ return 0;
+}
+
+/* NOTE: Assumes loads have not yet been folded. */
+static int
+nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *nvi, *mi, *next;
+ int j;
+ uint8_t mod;
+
+ for (nvi = b->entry; nvi; nvi = next) {
+ next = nvi->next;
+ if (nvi->opcode == NV_OP_SUB) {
+ nvi->src[1]->mod ^= NV_MOD_NEG;
+ nvi->opcode = NV_OP_ADD;
+ }
+
+ for (j = 0; j < 3 && nvi->src[j]; ++j) {
+ mi = nvi->src[j]->value->insn;
+ if (!mi)
+ continue;
+ if (mi->def[0]->refc > 1 || mi->predicate >= 0)
+ continue;
+
+ if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG;
+ else
+ if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS;
+ else
+ continue;
+ assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
+
+ mod |= mi->src[0]->mod;
+
+ if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
+ /* abs neg [abs] = abs */
+ mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
+ } else
+ if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
+ /* neg as opcode and modifier on same insn cannot occur */
+ /* neg neg abs = abs, neg neg = identity */
+ assert(j == 0);
+ if (mod & NV_MOD_ABS)
+ nvi->opcode = NV_OP_ABS;
+ else
+ nvi->opcode = NV_OP_MOV;
+ mod = 0;
+ }
+
+ if ((nv_op_supported_src_mods(nvi->opcode) & mod) != mod)
+ continue;
+
+ nv_reference(ctx->pc, nvi, j, mi->src[0]->value);
+
+ nvi->src[j]->mod ^= mod;
+ }
+
+ if (nvi->opcode == NV_OP_SAT) {
+ mi = nvi->src[0]->value->insn;
+
+ if (mi->def[0]->refc > 1 ||
+ (mi->opcode != NV_OP_ADD &&
+ mi->opcode != NV_OP_MUL &&
+ mi->opcode != NV_OP_MAD))
+ continue;
+ mi->saturate = 1;
+ mi->def[0] = nvi->def[0];
+ mi->def[0]->insn = mi;
+ nvc0_insn_delete(nvi);
+ }
+ }
+ DESCEND_ARBITRARY(j, nv_pass_lower_mods);
+
+ return 0;
+}
+
+#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
+
+static void
+apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod)
+{
+ if (mod & NV_MOD_ABS) {
+ if (type == NV_TYPE_F32)
+ *val &= 0x7fffffff;
+ else
+ if ((*val) & (1 << 31))
+ *val = ~(*val) + 1;
+ }
+ if (mod & NV_MOD_NEG) {
+ if (type == NV_TYPE_F32)
+ *val ^= 0x80000000;
+ else
+ *val = ~(*val) + 1;
+ }
+ if (mod & NV_MOD_SAT) {
+ union {
+ float f;
+ uint32_t u;
+ int32_t i;
+ } u;
+ u.u = *val;
+ if (type == NV_TYPE_F32) {
+ u.f = CLAMP(u.f, -1.0f, 1.0f);
+ } else
+ if (type == NV_TYPE_U16) {
+ u.u = MIN2(u.u, 0xffff);
+ } else
+ if (type == NV_TYPE_S16) {
+ u.i = CLAMP(u.i, -32768, 32767);
+ }
+ *val = u.u;
+ }
+ if (mod & NV_MOD_NOT)
+ *val = ~*val;
+}
+
+static void
+constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
+ struct nv_value *src0, struct nv_value *src1)
+{
+ struct nv_value *val;
+ union {
+ float f32;
+ uint32_t u32;
+ int32_t s32;
+ } u0, u1, u;
+ ubyte type;
+
+ if (!nvi->def[0])
+ return;
+ type = NV_OPTYPE(nvi->opcode);
+
+ u.u32 = 0;
+ u0.u32 = src0->reg.imm.u32;
+ u1.u32 = src1->reg.imm.u32;
+
+ apply_modifiers(&u0.u32, type, nvi->src[0]->mod);
+ apply_modifiers(&u1.u32, type, nvi->src[1]->mod);
+
+ switch (nvi->opcode) {
+ case NV_OP_MAD_F32:
+ if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
+ return;
+ /* fall through */
+ case NV_OP_MUL_F32:
+ u.f32 = u0.f32 * u1.f32;
+ break;
+ case NV_OP_MUL_B32:
+ u.u32 = u0.u32 * u1.u32;
+ break;
+ case NV_OP_ADD_F32:
+ u.f32 = u0.f32 + u1.f32;
+ break;
+ case NV_OP_ADD_B32:
+ u.u32 = u0.u32 + u1.u32;
+ break;
+ case NV_OP_SUB_F32:
+ u.f32 = u0.f32 - u1.f32;
+ break;
+ /*
+ case NV_OP_SUB_B32:
+ u.u32 = u0.u32 - u1.u32;
+ break;
+ */
+ default:
+ return;
+ }
+
+ val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type));
+ val->reg.imm.u32 = u.u32;
+
+ nv_reference(pc, nvi, 1, NULL);
+ nv_reference(pc, nvi, 0, val);
+
+ if (nvi->opcode == NV_OP_MAD_F32) {
+ nvi->src[1] = nvi->src[0];
+ nvi->src[0] = nvi->src[2];
+ nvi->src[2] = NULL;
+ nvi->opcode = NV_OP_ADD_F32;
+
+ if (val->reg.imm.u32 == 0) {
+ nvi->src[1] = NULL;
+ nvi->opcode = NV_OP_MOV;
+ }
+ } else {
+ nvi->opcode = NV_OP_MOV;
+ }
+}
+
+static void
+constant_operand(struct nv_pc *pc,
+ struct nv_instruction *nvi, struct nv_value *val, int s)
+{
+ union {
+ float f32;
+ uint32_t u32;
+ int32_t s32;
+ } u;
+ int shift;
+ int t = s ? 0 : 1;
+ uint op;
+ ubyte type;
+
+ if (!nvi->def[0])
+ return;
+ type = NV_OPTYPE(nvi->opcode);
+
+ u.u32 = val->reg.imm.u32;
+ apply_modifiers(&u.u32, type, nvi->src[s]->mod);
+
+ if (u.u32 == 0 && NV_BASEOP(nvi->opcode) == NV_OP_MUL) {
+ nvi->opcode = NV_OP_MOV;
+ nv_reference(pc, nvi, t, NULL);
+ if (s) {
+ nvi->src[0] = nvi->src[1];
+ nvi->src[1] = NULL;
+ }
+ return;
+ }
+
+ switch (nvi->opcode) {
+ case NV_OP_MUL_F32:
+ if (u.f32 == 1.0f || u.f32 == -1.0f) {
+ if (u.f32 == -1.0f)
+ nvi->src[t]->mod ^= NV_MOD_NEG;
+ switch (nvi->src[t]->mod) {
+ case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
+ case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
+ case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
+ default:
+ return;
+ }
+ nvi->opcode = op;
+ nv_reference(pc, nvi, 0, nvi->src[t]->value);
+ nv_reference(pc, nvi, 1, NULL);
+ nvi->src[0]->mod = 0;
+ } else
+ if (u.f32 == 2.0f || u.f32 == -2.0f) {
+ if (u.f32 == -2.0f)
+ nvi->src[t]->mod ^= NV_MOD_NEG;
+ nvi->opcode = NV_OP_ADD_F32;
+ nv_reference(pc, nvi, s, nvi->src[t]->value);
+ nvi->src[s]->mod = nvi->src[t]->mod;
+ }
+ case NV_OP_ADD_F32:
+ if (u.u32 == 0) {
+ switch (nvi->src[t]->mod) {
+ case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
+ case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
+ case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
+ case NV_MOD_NEG | NV_MOD_ABS:
+ op = NV_OP_CVT;
+ nvi->ext.cvt.s = nvi->ext.cvt.d = type;
+ break;
+ default:
+ return;
+ }
+ nvi->opcode = op;
+ nv_reference(pc, nvi, 0, nvi->src[t]->value);
+ nv_reference(pc, nvi, 1, NULL);
+ if (nvi->opcode != NV_OP_CVT)
+ nvi->src[0]->mod = 0;
+ }
+ case NV_OP_ADD_B32:
+ if (u.u32 == 0) {
+ assert(nvi->src[t]->mod == 0);
+ nvi->opcode = nvi->saturate ? NV_OP_CVT : NV_OP_MOV;
+ nvi->ext.cvt.s = nvi->ext.cvt.d = type;
+ nv_reference(pc, nvi, 0, nvi->src[t]->value);
+ nv_reference(pc, nvi, 1, NULL);
+ }
+ break;
+ case NV_OP_MUL_B32:
+ /* multiplication by 0 already handled above */
+ assert(nvi->src[s]->mod == 0);
+ shift = ffs(u.s32) - 1;
+ if (shift == 0) {
+ nvi->opcode = NV_OP_MOV;
+ nv_reference(pc, nvi, 0, nvi->src[t]->value);
+ nv_reference(pc, nvi, 1, NULL);
+ } else
+ if (u.s32 > 0 && u.s32 == (1 << shift)) {
+ nvi->opcode = NV_OP_SHL;
+ (val = new_value(pc, NV_FILE_IMM, NV_TYPE_U32))->reg.imm.s32 = shift;
+ nv_reference(pc, nvi, 0, nvi->src[t]->value);
+ nv_reference(pc, nvi, 1, val);
+ break;
+ }
+ break;
+ case NV_OP_RCP:
+ u.f32 = 1.0f / u.f32;
+ (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
+ nvi->opcode = NV_OP_MOV;
+ assert(s == 0);
+ nv_reference(pc, nvi, 0, val);
+ break;
+ case NV_OP_RSQ:
+ u.f32 = 1.0f / sqrtf(u.f32);
+ (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
+ nvi->opcode = NV_OP_MOV;
+ assert(s == 0);
+ nv_reference(pc, nvi, 0, val);
+ break;
+ default:
+ break;
+ }
+}
+
+static int
+nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *nvi, *next;
+ int j;
+
+ for (nvi = b->entry; nvi; nvi = next) {
+ struct nv_value *src0, *src1, *src;
+ int s;
+ uint8_t mod[4];
+
+ next = nvi->next;
+
+ src0 = nvc0_pc_find_immediate(nvi->src[0]);
+ src1 = nvc0_pc_find_immediate(nvi->src[1]);
+
+ if (src0 && src1)
+ constant_expression(ctx->pc, nvi, src0, src1);
+ else {
+ if (src0)
+ constant_operand(ctx->pc, nvi, src0, 0);
+ else
+ if (src1)
+ constant_operand(ctx->pc, nvi, src1, 1);
+ }
+
+ /* check if we can MUL + ADD -> MAD/FMA */
+ if (nvi->opcode != NV_OP_ADD)
+ continue;
+
+ src0 = nvi->src[0]->value;
+ src1 = nvi->src[1]->value;
+
+ if (SRC_IS_MUL(src0) && src0->refc == 1)
+ src = src0;
+ else
+ if (SRC_IS_MUL(src1) && src1->refc == 1)
+ src = src1;
+ else
+ continue;
+
+ /* could have an immediate from above constant_* */
+ if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
+ continue;
+ s = (src == src0) ? 0 : 1;
+
+ mod[0] = nvi->src[0]->mod;
+ mod[1] = nvi->src[1]->mod;
+ mod[2] = src->insn->src[0]->mod;
+ mod[3] = src->insn->src[0]->mod;
+
+ if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
+ continue;
+
+ nvi->opcode = NV_OP_MAD;
+ nv_reference(ctx->pc, nvi, s, NULL);
+ nvi->src[2] = nvi->src[!s];
+
+ nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
+ nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
+ nvi->src[0]->mod = mod[2] ^ mod[s];
+ nvi->src[1]->mod = mod[3];
+ }
+ DESCEND_ARBITRARY(j, nv_pass_lower_arith);
+
+ return 0;
+}
+
+/* TODO: redundant store elimination */
+
+struct mem_record {
+ struct mem_record *next;
+ struct nv_instruction *insn;
+ uint32_t ofst;
+ uint32_t base;
+ uint32_t size;
+};
+
+#define MEM_RECORD_POOL_SIZE 1024
+
+struct pass_reld_elim {
+ struct nv_pc *pc;
+
+ struct mem_record *imm;
+ struct mem_record *mem_v;
+ struct mem_record *mem_a;
+ struct mem_record *mem_c[16];
+ struct mem_record *mem_l;
+
+ struct mem_record pool[MEM_RECORD_POOL_SIZE];
+ int alloc;
+};
+
+static void
+combine_load(struct mem_record *rec, struct nv_instruction *ld)
+{
+ struct nv_instruction *fv = rec->insn;
+ struct nv_value *mem = ld->src[0]->value;
+ uint32_t size = rec->size + mem->reg.size;
+ int j;
+ int d = rec->size / 4;
+
+ assert(rec->size < 16);
+ if (rec->ofst > mem->reg.address) {
+ if ((size == 8 && mem->reg.address & 3) ||
+ (size > 8 && mem->reg.address & 7))
+ return;
+ rec->ofst = mem->reg.address;
+ for (j = 0; j < d; ++j)
+ fv->def[d + j] = fv->def[j];
+ d = 0;
+ } else
+ if ((size == 8 && rec->ofst & 3) ||
+ (size > 8 && rec->ofst & 7)) {
+ return;
+ }
+
+ for (j = 0; j < mem->reg.size / 4; ++j) {
+ fv->def[d] = ld->def[j];
+ fv->def[d++]->insn = fv;
+ }
+
+ fv->src[0]->value->reg.size = rec->size = size;
+
+ nvc0_insn_delete(ld);
+}
+
+static void
+combine_export(struct mem_record *rec, struct nv_instruction *ex)
+{
+
+}
+
+static INLINE void
+add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec,
+ uint32_t base, uint32_t ofst, struct nv_instruction *nvi)
+{
+ struct mem_record *it = &ctx->pool[ctx->alloc++];
+
+ it->next = *rec;
+ *rec = it;
+ it->base = base;
+ it->ofst = ofst;
+ it->insn = nvi;
+ it->size = nvi->src[0]->value->reg.size;
+}
+
+/* vectorize and reuse loads from memory or of immediates */
+static int
+nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
+{
+ struct mem_record **rec, *it;
+ struct nv_instruction *ld, *next;
+ struct nv_value *mem;
+ uint32_t base, ofst;
+ int s;
+
+ for (ld = b->entry; ld; ld = next) {
+ next = ld->next;
+
+ if (is_cspace_load(ld)) {
+ mem = ld->src[0]->value;
+ rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)];
+ } else
+ if (ld->opcode == NV_OP_VFETCH) {
+ mem = ld->src[0]->value;
+ rec = &ctx->mem_a;
+ } else
+ if (ld->opcode == NV_OP_EXPORT) {
+ mem = ld->src[0]->value;
+ if (mem->reg.file != NV_FILE_MEM_V)
+ continue;
+ rec = &ctx->mem_v;
+ } else {
+ continue;
+ }
+ if (ld->def[0] && ld->def[0]->refc == 0)
+ continue;
+ ofst = mem->reg.address;
+ base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0;
+
+ for (it = *rec; it; it = it->next) {
+ if (it->base == base &&
+ ((it->ofst >> 4) == (ofst >> 4)) &&
+ ((it->ofst + it->size == ofst) ||
+ (it->ofst - mem->reg.size == ofst))) {
+ if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
+ continue;
+ if (it->ofst < ofst) {
+ if ((it->ofst & 0xf) == 4)
+ continue;
+ } else
+ if ((ofst & 0xf) == 4)
+ continue;
+ break;
+ }
+ }
+ if (it) {
+ switch (ld->opcode) {
+ case NV_OP_EXPORT: combine_export(it, ld); break;
+ default:
+ combine_load(it, ld);
+ break;
+ }
+ } else
+ if (ctx->alloc < MEM_RECORD_POOL_SIZE) {
+ add_mem_record(ctx, rec, base, ofst, ld);
+ }
+ }
+
+ DESCEND_ARBITRARY(s, nv_pass_mem_opt);
+ return 0;
+}
+
+static void
+eliminate_store(struct mem_record *rec, struct nv_instruction *st)
+{
+}
+
+/* elimination of redundant stores */
+static int
+pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
+{
+ struct mem_record **rec, *it;
+ struct nv_instruction *st, *next;
+ struct nv_value *mem;
+ uint32_t base, ofst, size;
+ int s;
+
+ for (st = b->entry; st; st = next) {
+ next = st->next;
+
+ if (st->opcode == NV_OP_ST) {
+ mem = st->src[0]->value;
+ rec = &ctx->mem_l;
+ } else
+ if (st->opcode == NV_OP_EXPORT) {
+ mem = st->src[0]->value;
+ if (mem->reg.file != NV_FILE_MEM_V)
+ continue;
+ rec = &ctx->mem_v;
+ } else
+ if (st->opcode == NV_OP_ST) {
+ /* TODO: purge */
+ }
+ ofst = mem->reg.address;
+ base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0;
+ size = mem->reg.size;
+
+ for (it = *rec; it; it = it->next) {
+ if (it->base == base &&
+ (it->ofst <= ofst && (it->ofst + size) > ofst))
+ break;
+ }
+ if (it)
+ eliminate_store(it, st);
+ else
+ add_mem_record(ctx, rec, base, ofst, st);
+ }
+
+ DESCEND_ARBITRARY(s, nv_pass_mem_opt);
+ return 0;
+}
+
+/* TODO: properly handle loads from l[] memory in the presence of stores */
+static int
+nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
+{
+#if 0
+ struct load_record **rec, *it;
+ struct nv_instruction *ld, *next;
+ uint64_t data[2];
+ struct nv_value *val;
+ int j;
+
+ for (ld = b->entry; ld; ld = next) {
+ next = ld->next;
+ if (!ld->src[0])
+ continue;
+ val = ld->src[0]->value;
+ rec = NULL;
+
+ if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
+ data[0] = val->reg.id;
+ data[1] = 0;
+ rec = &ctx->mem_v;
+ } else
+ if (ld->opcode == NV_OP_LDA) {
+ data[0] = val->reg.id;
+ data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
+ if (val->reg.file >= NV_FILE_MEM_C(0) &&
+ val->reg.file <= NV_FILE_MEM_C(15))
+ rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
+ else
+ if (val->reg.file == NV_FILE_MEM_S)
+ rec = &ctx->mem_s;
+ else
+ if (val->reg.file == NV_FILE_MEM_L)
+ rec = &ctx->mem_l;
+ } else
+ if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
+ data[0] = val->reg.imm.u32;
+ data[1] = 0;
+ rec = &ctx->imm;
+ }
+
+ if (!rec || !ld->def[0]->refc)
+ continue;
+
+ for (it = *rec; it; it = it->next)
+ if (it->data[0] == data[0] && it->data[1] == data[1])
+ break;
+
+ if (it) {
+ if (ld->def[0]->reg.id >= 0)
+ it->value = ld->def[0];
+ else
+ if (!ld->fixed)
+ nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value);
+ } else {
+ if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
+ continue;
+ it = &ctx->pool[ctx->alloc++];
+ it->next = *rec;
+ it->data[0] = data[0];
+ it->data[1] = data[1];
+ it->value = ld->def[0];
+ *rec = it;
+ }
+ }
+
+ ctx->imm = NULL;
+ ctx->mem_s = NULL;
+ ctx->mem_v = NULL;
+ for (j = 0; j < 16; ++j)
+ ctx->mem_c[j] = NULL;
+ ctx->mem_l = NULL;
+ ctx->alloc = 0;
+
+ DESCEND_ARBITRARY(j, nv_pass_reload_elim);
+#endif
+ return 0;
+}
+
+static int
+nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ int i, c, j;
+
+ for (i = 0; i < ctx->pc->num_instructions; ++i) {
+ struct nv_instruction *nvi = &ctx->pc->instructions[i];
+ struct nv_value *def[4];
+
+ if (!nv_is_texture_op(nvi->opcode))
+ continue;
+ nvi->tex_mask = 0;
+
+ for (c = 0; c < 4; ++c) {
+ if (nvi->def[c]->refc)
+ nvi->tex_mask |= 1 << c;
+ def[c] = nvi->def[c];
+ }
+
+ j = 0;
+ for (c = 0; c < 4; ++c)
+ if (nvi->tex_mask & (1 << c))
+ nvi->def[j++] = def[c];
+ for (c = 0; c < 4; ++c)
+ if (!(nvi->tex_mask & (1 << c)))
+ nvi->def[j++] = def[c];
+ assert(j == 4);
+ }
+ return 0;
+}
+
+struct nv_pass_dce {
+ struct nv_pc *pc;
+ uint removed;
+};
+
+static int
+nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
+{
+ int j;
+ struct nv_instruction *nvi, *next;
+
+ for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
+ next = nvi->next;
+
+ if (inst_removable(nvi)) {
+ nvc0_insn_delete(nvi);
+ ++ctx->removed;
+ }
+ }
+ DESCEND_ARBITRARY(j, nv_pass_dce);
+
+ return 0;
+}
+
+#if 0
+/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
+ * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
+ * BREAK and dummy ELSE block.
+ */
+static INLINE boolean
+bb_is_if_else_endif(struct nv_basic_block *bb)
+{
+ if (!bb->out[0] || !bb->out[1])
+ return FALSE;
+
+ if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
+ return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
+ !bb->out[1]->out[1]);
+ } else {
+ return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
+ !bb->out[0]->out[1] &&
+ !bb->out[1]->out[1]);
+ }
+}
+
+/* predicate instructions and remove branch at the end */
+static void
+predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
+ struct nv_value *p, ubyte cc)
+{
+
+}
+#endif
+
+/* NOTE: Run this after register allocation, we can just cut out the cflow
+ * instructions and hook the predicates to the conditional OPs if they are
+ * not using immediates; better than inserting SELECT to join definitions.
+ *
+ * NOTE: Should adapt prior optimization to make this possible more often.
+ */
+static int
+nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ return 0;
+}
+
+/* local common subexpression elimination, stupid O(n^2) implementation */
+static int
+nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *ir, *ik, *next;
+ struct nv_instruction *entry = b->phi ? b->phi : b->entry;
+ int s;
+ unsigned int reps;
+
+ do {
+ reps = 0;
+ for (ir = entry; ir; ir = next) {
+ next = ir->next;
+ for (ik = entry; ik != ir; ik = ik->next) {
+ if (ir->opcode != ik->opcode || ir->fixed)
+ continue;
+
+ if (!ir->def[0] || !ik->def[0] || ir->def[1] || ik->def[1])
+ continue;
+
+ if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
+ continue;
+
+ if (!values_equal(ik->def[0], ir->def[0]))
+ continue;
+
+ for (s = 0; s < 3; ++s) {
+ struct nv_value *a, *b;
+
+ if (!ik->src[s]) {
+ if (ir->src[s])
+ break;
+ continue;
+ }
+ if (ik->src[s]->mod != ir->src[s]->mod)
+ break;
+ a = ik->src[s]->value;
+ b = ir->src[s]->value;
+ if (a == b)
+ continue;
+ if (a->reg.file != b->reg.file ||
+ a->reg.id < 0 ||
+ a->reg.id != b->reg.id)
+ break;
+ }
+ if (s == 3) {
+ nvc0_insn_delete(ir);
+ ++reps;
+ nvc0_pc_replace_value(ctx->pc, ir->def[0], ik->def[0]);
+ break;
+ }
+ }
+ }
+ } while(reps);
+
+ DESCEND_ARBITRARY(s, nv_pass_cse);
+
+ return 0;
+}
+
+/* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy
+ * neighbouring registers. CSE might have messed this up.
+ */
+static int
+nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_value *val;
+ struct nv_instruction *bnd, *nvi, *next;
+ int s, t;
+
+ for (bnd = b->entry; bnd; bnd = next) {
+ next = bnd->next;
+ if (bnd->opcode != NV_OP_BIND)
+ continue;
+ for (s = 0; s < 4 && bnd->src[s]; ++s) {
+ val = bnd->src[s]->value;
+ for (t = s + 1; t < 4 && bnd->src[t]; ++t) {
+ if (bnd->src[t]->value != val)
+ continue;
+ nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
+ nvi->def[0] = new_value_like(ctx->pc, val);
+ nvi->def[0]->insn = nvi;
+ nv_reference(ctx->pc, nvi, 0, val);
+ nvc0_insn_insert_before(bnd, nvi);
+
+ nv_reference(ctx->pc, bnd, t, nvi->def[0]);
+ }
+ }
+ }
+ DESCEND_ARBITRARY(t, nv_pass_fix_bind);
+
+ return 0;
+}
+
+static int
+nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
+{
+ struct pass_reld_elim *reldelim;
+ struct nv_pass pass;
+ struct nv_pass_dce dce;
+ int ret;
+
+ pass.n = 0;
+ pass.pc = pc;
+
+ /* Do this first, so we don't have to pay attention
+ * to whether sources are supported memory loads.
+ */
+ pc->pass_seq++;
+ ret = nv_pass_lower_arith(&pass, root);
+ if (ret)
+ return ret;
+
+ pc->pass_seq++;
+ ret = nv_pass_lower_mods(&pass, root);
+ if (ret)
+ return ret;
+
+ pc->pass_seq++;
+ ret = nvc0_pass_fold_loads(&pass, root);
+ if (ret)
+ return ret;
+
+ if (pc->opt_reload_elim) {
+ reldelim = CALLOC_STRUCT(pass_reld_elim);
+ reldelim->pc = pc;
+
+ pc->pass_seq++;
+ ret = nv_pass_reload_elim(reldelim, root);
+ if (ret) {
+ FREE(reldelim);
+ return ret;
+ }
+ memset(reldelim, 0, sizeof(struct pass_reld_elim));
+ reldelim->pc = pc;
+ }
+
+ pc->pass_seq++;
+ ret = nv_pass_cse(&pass, root);
+ if (ret)
+ return ret;
+
+ dce.pc = pc;
+ do {
+ dce.removed = 0;
+ pc->pass_seq++;
+ ret = nv_pass_dce(&dce, root);
+ if (ret)
+ return ret;
+ } while (dce.removed);
+
+ if (pc->opt_reload_elim) {
+ pc->pass_seq++;
+ ret = nv_pass_mem_opt(reldelim, root);
+ if (!ret) {
+ memset(reldelim, 0, sizeof(struct pass_reld_elim));
+ reldelim->pc = pc;
+
+ pc->pass_seq++;
+ ret = nv_pass_mem_opt(reldelim, root);
+ }
+ FREE(reldelim);
+ if (ret)
+ return ret;
+ }
+
+ ret = nv_pass_tex_mask(&pass, root);
+ if (ret)
+ return ret;
+
+ pc->pass_seq++;
+ ret = nv_pass_fix_bind(&pass, root);
+
+ return ret;
+}
+
+int
+nvc0_pc_exec_pass0(struct nv_pc *pc)
+{
+ int i, ret;
+
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
+ return ret;
+ return 0;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c
new file mode 100644
index 0000000000..b03826484e
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c
@@ -0,0 +1,377 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nvc0_pc.h"
+
+#define PRINT(args...) debug_printf(args)
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+#endif
+
+static const char *norm = "\x1b[00m";
+static const char *gree = "\x1b[32m";
+static const char *blue = "\x1b[34m";
+static const char *cyan = "\x1b[36m";
+static const char *yllw = "\x1b[33m";
+static const char *mgta = "\x1b[35m";
+
+static const char *nv_cond_names[] =
+{
+ "never", "lt" , "eq" , "le" , "gt" , "ne" , "ge" , "",
+ "never", "ltu", "equ", "leu", "gtu", "neu", "geu", "",
+ "o", "c", "a", "s"
+};
+
+static const char *nv_modifier_strings[] =
+{
+ "",
+ "neg",
+ "abs",
+ "neg abs",
+ "not",
+ "not neg"
+ "not abs",
+ "not neg abs",
+ "sat",
+ "BAD_MOD"
+};
+
+const char *
+nvc0_opcode_name(uint opcode)
+{
+ return nvc0_op_info_table[MIN2(opcode, NV_OP_COUNT)].name;
+}
+
+static INLINE const char *
+nv_type_name(ubyte type, ubyte size)
+{
+ switch (type) {
+ case NV_TYPE_U16: return "u16";
+ case NV_TYPE_S16: return "s16";
+ case NV_TYPE_F32: return "f32";
+ case NV_TYPE_U32: return "u32";
+ case NV_TYPE_S32: return "s32";
+ case NV_TYPE_P32: return "p32";
+ case NV_TYPE_F64: return "f64";
+ case NV_TYPE_ANY:
+ {
+ switch (size) {
+ case 1: return "b8";
+ case 2: return "b16";
+ case 4: return "b32";
+ case 8: return "b64";
+ case 12: return "b96";
+ case 16: return "b128";
+ default:
+ return "BAD_SIZE";
+ }
+ }
+ default:
+ return "BAD_TYPE";
+ }
+}
+
+static INLINE const char *
+nv_cond_name(ubyte cc)
+{
+ return nv_cond_names[MIN2(cc, 19)];
+}
+
+static INLINE const char *
+nv_modifier_string(ubyte mod)
+{
+ return nv_modifier_strings[MIN2(mod, 9)];
+}
+
+static INLINE int
+nv_value_id(struct nv_value *value)
+{
+ if (value->join->reg.id >= 0)
+ return value->join->reg.id;
+ return value->n;
+}
+
+static INLINE boolean
+nv_value_allocated(struct nv_value *value)
+{
+ return (value->reg.id >= 0) ? TRUE : FALSE;
+}
+
+static INLINE void
+nv_print_address(const char c, int buf, struct nv_value *a, int offset)
+{
+ const char ac = (a && nv_value_allocated(a)) ? '$' : '%';
+ char sg;
+
+ if (offset < 0) {
+ sg = '-';
+ offset = -offset;
+ } else {
+ sg = '+';
+ }
+
+ if (buf >= 0)
+ PRINT(" %s%c%i[", cyan, c, buf);
+ else
+ PRINT(" %s%c[", cyan, c);
+ if (a)
+ PRINT("%s%ca%i%s%c", mgta, ac, nv_value_id(a), cyan, sg);
+ PRINT("%s0x%x%s]", yllw, offset, cyan);
+}
+
+static INLINE void
+nv_print_value(struct nv_value *value, struct nv_value *indir, ubyte type)
+{
+ char reg_pfx = nv_value_allocated(value->join) ? '$' : '%';
+
+ if (value->reg.file != NV_FILE_PRED)
+ PRINT(" %s%s", gree, nv_type_name(type, value->reg.size));
+
+ switch (value->reg.file) {
+ case NV_FILE_GPR:
+ PRINT(" %s%cr%i", blue, reg_pfx, nv_value_id(value));
+ if (value->reg.size == 8)
+ PRINT("d");
+ if (value->reg.size == 16)
+ PRINT("q");
+ break;
+ case NV_FILE_PRED:
+ PRINT(" %s%cp%i", mgta, reg_pfx, nv_value_id(value));
+ break;
+ case NV_FILE_COND:
+ PRINT(" %s%cc%i", mgta, reg_pfx, nv_value_id(value));
+ break;
+ case NV_FILE_MEM_L:
+ nv_print_address('l', -1, indir, value->reg.address);
+ break;
+ case NV_FILE_MEM_G:
+ nv_print_address('g', -1, indir, value->reg.address);
+ break;
+ case NV_FILE_MEM_A:
+ nv_print_address('a', -1, indir, value->reg.address);
+ break;
+ case NV_FILE_MEM_V:
+ nv_print_address('v', -1, indir, value->reg.address);
+ break;
+ case NV_FILE_IMM:
+ switch (type) {
+ case NV_TYPE_U16:
+ case NV_TYPE_S16:
+ PRINT(" %s0x%04x", yllw, value->reg.imm.u32);
+ break;
+ case NV_TYPE_F32:
+ PRINT(" %s%f", yllw, value->reg.imm.f32);
+ break;
+ case NV_TYPE_F64:
+ PRINT(" %s%f", yllw, value->reg.imm.f64);
+ break;
+ case NV_TYPE_U32:
+ case NV_TYPE_S32:
+ case NV_TYPE_P32:
+ case NV_TYPE_ANY:
+ PRINT(" %s0x%08x", yllw, value->reg.imm.u32);
+ break;
+ }
+ break;
+ default:
+ if (value->reg.file >= NV_FILE_MEM_C(0) &&
+ value->reg.file <= NV_FILE_MEM_C(15))
+ nv_print_address('c', value->reg.file - NV_FILE_MEM_C(0), indir,
+ value->reg.address);
+ else
+ NOUVEAU_ERR(" BAD_FILE[%i]", nv_value_id(value));
+ break;
+ }
+}
+
+static INLINE void
+nv_print_ref(struct nv_ref *ref, struct nv_value *indir, ubyte type)
+{
+ nv_print_value(ref->value, indir, type);
+}
+
+void
+nvc0_print_instruction(struct nv_instruction *i)
+{
+ int s;
+
+ PRINT("%i: ", i->serial);
+
+ if (i->predicate >= 0) {
+ PRINT("%s%s", gree, i->cc ? "fl" : "tr");
+ nv_print_ref(i->src[i->predicate], NULL, NV_TYPE_U8);
+ PRINT(" ");
+ }
+
+ PRINT("%s", gree);
+ if (NV_BASEOP(i->opcode) == NV_OP_SET)
+ PRINT("set %s", nv_cond_name(i->set_cond));
+ else
+ if (i->saturate)
+ PRINT("sat %s", nvc0_opcode_name(i->opcode));
+ else
+ PRINT("%s", nvc0_opcode_name(i->opcode));
+
+ if (i->opcode == NV_OP_CVT)
+ nv_print_value(i->def[0], NULL, i->ext.cvt.d);
+ else
+ if (i->def[0])
+ nv_print_value(i->def[0], NULL, NV_OPTYPE(i->opcode));
+ else
+ if (i->target)
+ PRINT(" %s(BB:%i)", yllw, i->target->id);
+ else
+ PRINT(" #");
+
+ for (s = 1; s < 4 && i->def[s]; ++s)
+ nv_print_value(i->def[s], NULL, NV_OPTYPE(i->opcode));
+ if (s > 1)
+ PRINT("%s ,", norm);
+
+ for (s = 0; s < 6 && i->src[s]; ++s) {
+ ubyte type;
+ if (s == i->indirect || s == i->predicate)
+ continue;
+ if (i->opcode == NV_OP_CVT)
+ type = i->ext.cvt.s;
+ else
+ type = NV_OPTYPE(i->opcode);
+
+ if (i->src[s]->mod)
+ PRINT(" %s%s", gree, nv_modifier_string(i->src[s]->mod));
+
+ if (i->indirect >= 0 &&
+ NV_IS_MEMORY_FILE(i->src[s]->value->reg.file))
+ nv_print_ref(i->src[s], i->src[i->indirect]->value, type);
+ else
+ nv_print_ref(i->src[s], NULL, type);
+ }
+ PRINT(" %s\n", norm);
+}
+
+#define NV_MOD_SGN NV_MOD_ABS | NV_MOD_NEG
+
+struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
+{
+ { NV_OP_UNDEF, "undef", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
+ { NV_OP_BIND, "bind", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 },
+ { NV_OP_MERGE, "merge", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 },
+ { NV_OP_PHI, "phi", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
+ { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
+ { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 },
+
+ { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 },
+ { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
+ { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
+ { NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
+ { NV_OP_SHL, "shl", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 },
+ { NV_OP_SHR, "shr", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 },
+ { NV_OP_NOT, "not", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SET, "set", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
+ { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
+ { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
+ { NV_OP_ABS, "abs", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_NEG, "neg", NV_TYPE_F32, NV_MOD_ABS, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
+ { NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
+ { NV_OP_CVT, "cvt", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_FLOOR, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_TRUNC, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_SAD, "sad", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
+
+ { NV_OP_VFETCH, "vfetch", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 },
+ { NV_OP_PFETCH, "pfetch", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_EXPORT, "export", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 },
+ { NV_OP_LINTERP, "linterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_PINTERP, "pinterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_EMIT, "emit", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_RESTART, "restart", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_TEX, "tex", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
+ { NV_OP_TXB, "texbias", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
+ { NV_OP_TXL, "texlod", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
+ { NV_OP_TXF, "texfetch", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 },
+ { NV_OP_TXQ, "texquery", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 },
+
+ { NV_OP_QUADOP, "quadop", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_DFDX, "dfdx", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_DFDY, "dfdy", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_KIL, "kil", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_BRA, "bra", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_CALL, "call", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_RET, "ret", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_RET, "exit", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_JOINAT, "joinat", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_JOIN, "join", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_ADD, "add", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 },
+ { NV_OP_MUL, "mul", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 },
+ { NV_OP_ABS, "abs", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_NEG, "neg", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_MAX, "max", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
+ { NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
+ { NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
+ { NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
+ { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 },
+ { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 },
+ { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SET_F32_OR, "or set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SET_F32_XOR, "xor set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_SLCT_F32, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SLCT_F32, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SLCT_F32, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+
+ { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 },
+
+ { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 },
+
+ { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
+
+ { NV_OP_UNDEF, "BAD_OP", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }
+};
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
new file mode 100644
index 0000000000..d24f09a150
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
@@ -0,0 +1,927 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define NOUVEAU_DEBUG 1
+
+/* #define NVC0_RA_DEBUG_LIVEI */
+/* #define NVC0_RA_DEBUG_LIVE_SETS */
+/* #define NVC0_RA_DEBUG_JOIN */
+
+#include "nvc0_pc.h"
+#include "util/u_simple_list.h"
+
+#define NVC0_NUM_REGISTER_FILES 3
+
+/* @unit_shift: log2 of min allocation unit for register */
+struct register_set {
+ uint32_t bits[NVC0_NUM_REGISTER_FILES][2];
+ uint32_t last[NVC0_NUM_REGISTER_FILES];
+ int log2_unit[NVC0_NUM_REGISTER_FILES];
+ struct nv_pc *pc;
+};
+
+struct nv_pc_pass {
+ struct nv_pc *pc;
+ struct nv_instruction **insns;
+ uint num_insns;
+ uint pass_seq;
+};
+
+static void
+ranges_coalesce(struct nv_range *range)
+{
+ while (range->next && range->end >= range->next->bgn) {
+ struct nv_range *rnn = range->next->next;
+ assert(range->bgn <= range->next->bgn);
+ range->end = MAX2(range->end, range->next->end);
+ FREE(range->next);
+ range->next = rnn;
+ }
+}
+
+static boolean
+add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
+{
+ struct nv_range *range, **nextp = &val->livei;
+
+ for (range = val->livei; range; range = range->next) {
+ if (end < range->bgn)
+ break; /* insert before */
+
+ if (bgn > range->end) {
+ nextp = &range->next;
+ continue; /* insert after */
+ }
+
+ /* overlap */
+ if (bgn < range->bgn) {
+ range->bgn = bgn;
+ if (end > range->end)
+ range->end = end;
+ ranges_coalesce(range);
+ return TRUE;
+ }
+ if (end > range->end) {
+ range->end = end;
+ ranges_coalesce(range);
+ return TRUE;
+ }
+ assert(bgn >= range->bgn);
+ assert(end <= range->end);
+ return TRUE;
+ }
+
+ if (!new_range)
+ new_range = CALLOC_STRUCT(nv_range);
+
+ new_range->bgn = bgn;
+ new_range->end = end;
+ new_range->next = range;
+ *(nextp) = new_range;
+ return FALSE;
+}
+
+static void
+add_range(struct nv_value *val, struct nv_basic_block *b, int end)
+{
+ int bgn;
+
+ if (!val->insn) /* ignore non-def values */
+ return;
+ assert(b->entry->serial <= b->exit->serial);
+ assert(b->phi->serial <= end);
+ assert(b->exit->serial + 1 >= end);
+
+ bgn = val->insn->serial;
+ if (bgn < b->entry->serial || bgn > b->exit->serial)
+ bgn = b->entry->serial;
+
+ assert(bgn <= end);
+
+ add_range_ex(val, bgn, end, NULL);
+}
+
+#if defined(NVC0_RA_DEBUG_JOIN) || defined(NVC0_RA_DEBUG_LIVEI)
+static void
+livei_print(struct nv_value *a)
+{
+ struct nv_range *r = a->livei;
+
+ debug_printf("livei %i: ", a->n);
+ while (r) {
+ debug_printf("[%i, %i) ", r->bgn, r->end);
+ r = r->next;
+ }
+ debug_printf("\n");
+}
+#endif
+
+static void
+livei_unify(struct nv_value *dst, struct nv_value *src)
+{
+ struct nv_range *range, *next;
+
+ for (range = src->livei; range; range = next) {
+ next = range->next;
+ if (add_range_ex(dst, range->bgn, range->end, range))
+ FREE(range);
+ }
+ src->livei = NULL;
+}
+
+static void
+livei_release(struct nv_value *val)
+{
+ struct nv_range *range, *next;
+
+ for (range = val->livei; range; range = next) {
+ next = range->next;
+ FREE(range);
+ }
+}
+
+static boolean
+livei_have_overlap(struct nv_value *a, struct nv_value *b)
+{
+ struct nv_range *r_a, *r_b;
+
+ for (r_a = a->livei; r_a; r_a = r_a->next) {
+ for (r_b = b->livei; r_b; r_b = r_b->next) {
+ if (r_b->bgn < r_a->end &&
+ r_b->end > r_a->bgn)
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+static int
+livei_end(struct nv_value *a)
+{
+ struct nv_range *r = a->livei;
+
+ assert(r);
+ while (r->next)
+ r = r->next;
+ return r->end;
+}
+
+static boolean
+livei_contains(struct nv_value *a, int pos)
+{
+ struct nv_range *r;
+
+ for (r = a->livei; r && r->bgn <= pos; r = r->next)
+ if (r->end > pos)
+ return TRUE;
+ return FALSE;
+}
+
+static boolean
+reg_assign(struct register_set *set, struct nv_value **def, int n)
+{
+ int i, id, s, k;
+ uint32_t m;
+ int f = def[0]->reg.file;
+
+ k = n;
+ if (k == 3)
+ k = 4;
+ s = (k * def[0]->reg.size) >> set->log2_unit[f];
+ m = (1 << s) - 1;
+
+ id = set->last[f];
+
+ for (i = 0; i * 32 < set->last[f]; ++i) {
+ if (set->bits[f][i] == 0xffffffff)
+ continue;
+
+ for (id = 0; id < 32; id += s)
+ if (!(set->bits[f][i] & (m << id)))
+ break;
+ if (id < 32)
+ break;
+ }
+ if (i * 32 + id > set->last[f])
+ return FALSE;
+
+ set->bits[f][i] |= m << id;
+
+ id += i * 32;
+
+ set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1);
+
+ for (i = 0; i < n; ++i)
+ if (def[i]->livei)
+ def[i]->reg.id = id++;
+
+ return TRUE;
+}
+
+static INLINE void
+reg_occupy(struct register_set *set, struct nv_value *val)
+{
+ int id = val->reg.id, f = val->reg.file;
+ uint32_t m;
+
+ if (id < 0)
+ return;
+ m = (1 << (val->reg.size >> set->log2_unit[f])) - 1;
+
+ set->bits[f][id / 32] |= m << (id % 32);
+
+ if (set->pc->max_reg[f] < id)
+ set->pc->max_reg[f] = id;
+}
+
+static INLINE void
+reg_release(struct register_set *set, struct nv_value *val)
+{
+ int id = val->reg.id, f = val->reg.file;
+ uint32_t m;
+
+ if (id < 0)
+ return;
+ m = (1 << (val->reg.size >> set->log2_unit[f])) - 1;
+
+ set->bits[f][id / 32] &= ~(m << (id % 32));
+}
+
+static INLINE boolean
+join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+ int i;
+ struct nv_value *val;
+
+ if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
+ return FALSE;
+
+ if (a->join->reg.id == b->join->reg.id)
+ return TRUE;
+
+ /* either a or b or both have been assigned */
+
+ if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
+ return FALSE;
+ else
+ if (b->join->reg.id >= 0) {
+ if (b->join->reg.id == 63)
+ return FALSE;
+ val = a;
+ a = b;
+ b = val;
+ } else
+ if (a->join->reg.id == 63)
+ return FALSE;
+
+ for (i = 0; i < ctx->pc->num_values; ++i) {
+ val = &ctx->pc->values[i];
+
+ if (val->join->reg.id != a->join->reg.id)
+ continue;
+ if (val->join != a->join && livei_have_overlap(val->join, b->join))
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static INLINE void
+do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+ int j;
+ struct nv_value *bjoin = b->join;
+
+ if (b->join->reg.id >= 0)
+ a->join->reg.id = b->join->reg.id;
+
+ livei_unify(a->join, b->join);
+
+#ifdef NVC0_RA_DEBUG_JOIN
+ debug_printf("joining %i to %i\n", b->n, a->n);
+#endif
+
+ /* make a->join the new representative */
+ for (j = 0; j < ctx->pc->num_values; ++j)
+ if (ctx->pc->values[j].join == bjoin)
+ ctx->pc->values[j].join = a->join;
+
+ assert(b->join == a->join);
+}
+
+static INLINE void
+try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+ if (!join_allowed(ctx, a, b)) {
+#ifdef NVC0_RA_DEBUG_JOIN
+ debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
+#endif
+ return;
+ }
+ if (livei_have_overlap(a->join, b->join)) {
+#ifdef NVC0_RA_DEBUG_JOIN
+ debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n);
+ livei_print(a);
+ livei_print(b);
+#endif
+ return;
+ }
+
+ do_join_values(ctx, a, b);
+}
+
+static INLINE boolean
+need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
+{
+ int i = 0, n = 0;
+
+ for (; i < 2; ++i)
+ if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i]))
+ ++n;
+
+ return (b->num_in > 1) && (n == 2);
+}
+
+static int
+phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
+ struct nv_basic_block *tb)
+{
+ int i, j;
+
+ for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) {
+ if (!nvc0_bblock_reachable_by(b, phi->src[i]->value->insn->bb, tb))
+ continue;
+ /* NOTE: back-edges are ignored by the reachable-by check */
+ if (j < 0 || !nvc0_bblock_reachable_by(phi->src[j]->value->insn->bb,
+ phi->src[i]->value->insn->bb, tb))
+ j = i;
+ }
+ return j;
+}
+
+/* For each operand of each PHI in b, generate a new value by inserting a MOV
+ * at the end of the block it is coming from and replace the operand with its
+ * result. This eliminates liveness conflicts and enables us to let values be
+ * copied to the right register if such a conflict exists nonetheless.
+ *
+ * These MOVs are also crucial in making sure the live intervals of phi srces
+ * are extended until the end of the loop, since they are not included in the
+ * live-in sets.
+ */
+static int
+pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *i, *ni;
+ struct nv_value *val;
+ struct nv_basic_block *p, *pn;
+ int n, j;
+
+ b->pass_seq = ctx->pc->pass_seq;
+
+ for (n = 0; n < b->num_in; ++n) {
+ p = pn = b->in[n];
+ assert(p);
+
+ if (need_new_else_block(b, p)) {
+ pn = new_basic_block(ctx->pc);
+
+ if (p->out[0] == b)
+ p->out[0] = pn;
+ else
+ p->out[1] = pn;
+
+ if (p->exit->target == b) /* target to new else-block */
+ p->exit->target = pn;
+
+ b->in[n] = pn;
+
+ pn->out[0] = b;
+ pn->in[0] = p;
+ pn->num_in = 1;
+ }
+ ctx->pc->current_block = pn;
+
+ for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
+ if ((j = phi_opnd_for_bb(i, p, b)) < 0)
+ continue;
+ val = i->src[j]->value;
+
+ if (i->src[j]->flags) {
+ /* value already encountered from a different in-block */
+ val = val->insn->src[0]->value;
+ while (j < 6 && i->src[j])
+ ++j;
+ assert(j < 6);
+ }
+
+ ni = new_instruction(ctx->pc, NV_OP_MOV);
+
+ /* TODO: insert instruction at correct position in the first place */
+ if (ni->prev && ni->prev->target)
+ nvc0_insns_permute(ni->prev, ni);
+
+ ni->def[0] = new_value_like(ctx->pc, val);
+ ni->def[0]->insn = ni;
+ nv_reference(ctx->pc, ni, 0, val);
+ nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */
+ i->src[j]->flags = 1;
+ }
+
+ if (pn != p && pn->exit) {
+ ctx->pc->current_block = b->in[n ? 0 : 1];
+ ni = new_instruction(ctx->pc, NV_OP_BRA);
+ ni->target = b;
+ ni->terminator = 1;
+ }
+ }
+
+ for (j = 0; j < 2; ++j)
+ if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
+ pass_generate_phi_movs(ctx, b->out[j]);
+
+ return 0;
+}
+
+static int
+pass_join_values(struct nv_pc_pass *ctx, int iter)
+{
+ int c, n;
+
+ for (n = 0; n < ctx->num_insns; ++n) {
+ struct nv_instruction *i = ctx->insns[n];
+
+ switch (i->opcode) {
+ case NV_OP_PHI:
+ if (iter != 2)
+ break;
+ for (c = 0; c < 6 && i->src[c]; ++c)
+ try_join_values(ctx, i->def[0], i->src[c]->value);
+ break;
+ case NV_OP_MOV:
+ if ((iter == 2) && i->src[0]->value->insn &&
+ !nv_is_texture_op(i->src[0]->value->join->insn->opcode))
+ try_join_values(ctx, i->def[0], i->src[0]->value);
+ break;
+ case NV_OP_SELECT:
+ if (iter != 1)
+ break;
+ for (c = 0; c < 6 && i->src[c]; ++c) {
+ assert(join_allowed(ctx, i->def[0], i->src[c]->value));
+ do_join_values(ctx, i->def[0], i->src[c]->value);
+ }
+ break;
+ case NV_OP_TEX:
+ case NV_OP_TXB:
+ case NV_OP_TXL:
+ case NV_OP_TXQ:
+ /* on nvc0, TEX src and dst can differ */
+ break;
+ case NV_OP_BIND:
+ if (iter)
+ break;
+ for (c = 0; c < 6 && i->src[c]; ++c)
+ do_join_values(ctx, i->def[c], i->src[c]->value);
+ break;
+ default:
+ break;
+ }
+ }
+ return 0;
+}
+
+/* Order the instructions so that live intervals can be expressed in numbers. */
+static void
+pass_order_instructions(void *priv, struct nv_basic_block *b)
+{
+ struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv;
+ struct nv_instruction *i;
+
+ b->pass_seq = ctx->pc->pass_seq;
+
+ assert(!b->exit || !b->exit->next);
+ for (i = b->phi; i; i = i->next) {
+ i->serial = ctx->num_insns;
+ ctx->insns[ctx->num_insns++] = i;
+ }
+}
+
+static void
+bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b)
+{
+#ifdef NVC0_RA_DEBUG_LIVE_SETS
+ struct nv_value *val;
+ int j;
+
+ debug_printf("LIVE-INs of BB:%i: ", b->id);
+
+ for (j = 0; j < pc->num_values; ++j) {
+ if (!(b->live_set[j / 32] & (1 << (j % 32))))
+ continue;
+ val = &pc->values[j];
+ if (!val->insn)
+ continue;
+ debug_printf("%i ", val->n);
+ }
+ debug_printf("\n");
+#endif
+}
+
+static INLINE void
+live_set_add(struct nv_basic_block *b, struct nv_value *val)
+{
+ if (!val->insn) /* don't add non-def values */
+ return;
+ b->live_set[val->n / 32] |= 1 << (val->n % 32);
+}
+
+static INLINE void
+live_set_rem(struct nv_basic_block *b, struct nv_value *val)
+{
+ b->live_set[val->n / 32] &= ~(1 << (val->n % 32));
+}
+
+static INLINE boolean
+live_set_test(struct nv_basic_block *b, struct nv_ref *ref)
+{
+ int n = ref->value->n;
+ return b->live_set[n / 32] & (1 << (n % 32));
+}
+
+/* The live set of a block contains those values that are live immediately
+ * before the beginning of the block, so do a backwards scan.
+ */
+static int
+pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *i;
+ int j, n, ret = 0;
+
+ if (b->pass_seq >= ctx->pc->pass_seq)
+ return 0;
+ b->pass_seq = ctx->pc->pass_seq;
+
+ /* slight hack for undecidedness: set phi = entry if it's undefined */
+ if (!b->phi)
+ b->phi = b->entry;
+
+ for (n = 0; n < 2; ++n) {
+ if (!b->out[n] || b->out[n] == b)
+ continue;
+ ret = pass_build_live_sets(ctx, b->out[n]);
+ if (ret)
+ return ret;
+
+ if (n == 0) {
+ for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
+ b->live_set[j] = b->out[n]->live_set[j];
+ } else {
+ for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
+ b->live_set[j] |= b->out[n]->live_set[j];
+ }
+ }
+
+ if (!b->entry)
+ return 0;
+
+ bb_live_set_print(ctx->pc, b);
+
+ for (i = b->exit; i != b->entry->prev; i = i->prev) {
+ for (j = 0; j < 5 && i->def[j]; j++)
+ live_set_rem(b, i->def[j]);
+ for (j = 0; j < 6 && i->src[j]; j++)
+ live_set_add(b, i->src[j]->value);
+ }
+ for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next)
+ live_set_rem(b, i->def[0]);
+
+ bb_live_set_print(ctx->pc, b);
+
+ return 0;
+}
+
+static void collect_live_values(struct nv_basic_block *b, const int n)
+{
+ int i;
+
+ if (b->out[0]) {
+ if (b->out[1]) { /* what to do about back-edges ? */
+ for (i = 0; i < n; ++i)
+ b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
+ } else {
+ memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
+ }
+ } else
+ if (b->out[1]) {
+ memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
+ } else {
+ memset(b->live_set, 0, n * sizeof(uint32_t));
+ }
+}
+
+/* NOTE: the live intervals of phi functions start at the first non-phi insn. */
+static int
+pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *i, *i_stop;
+ int j, s;
+ const int n = (ctx->pc->num_values + 31) / 32;
+
+ /* verify that first block does not have live-in values */
+ if (b->num_in == 0)
+ for (j = 0; j < n; ++j)
+ assert(b->live_set[j] == 0);
+
+ collect_live_values(b, n);
+
+ /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
+ for (j = 0; j < 2; ++j) {
+ if (!b->out[j] || !b->out[j]->phi)
+ continue;
+ for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) {
+ live_set_rem(b, i->def[0]);
+
+ for (s = 0; s < 6 && i->src[s]; ++s) {
+ assert(i->src[s]->value->insn);
+ if (nvc0_bblock_reachable_by(b, i->src[s]->value->insn->bb,
+ b->out[j]))
+ live_set_add(b, i->src[s]->value);
+ else
+ live_set_rem(b, i->src[s]->value);
+ }
+ }
+ }
+
+ /* remaining live-outs are live until the end */
+ if (b->exit) {
+ for (j = 0; j < ctx->pc->num_values; ++j) {
+ if (!(b->live_set[j / 32] & (1 << (j % 32))))
+ continue;
+ add_range(&ctx->pc->values[j], b, b->exit->serial + 1);
+#ifdef NVC0_RA_DEBUG_LIVEI
+ debug_printf("adding range for live value %i: ", j);
+ livei_print(&ctx->pc->values[j]);
+#endif
+ }
+ }
+
+ i_stop = b->entry ? b->entry->prev : NULL;
+
+ /* don't have to include phi functions here (will have 0 live range) */
+ for (i = b->exit; i != i_stop; i = i->prev) {
+ assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial);
+ for (j = 0; j < 4 && i->def[j]; ++j)
+ live_set_rem(b, i->def[j]);
+
+ for (j = 0; j < 6 && i->src[j]; ++j) {
+ if (!live_set_test(b, i->src[j])) {
+ live_set_add(b, i->src[j]->value);
+ add_range(i->src[j]->value, b, i->serial);
+#ifdef NVC0_RA_DEBUG_LIVEI
+ debug_printf("adding range for source %i (ends living): ",
+ i->src[j]->value->n);
+ livei_print(i->src[j]->value);
+#endif
+ }
+ }
+ }
+
+ b->pass_seq = ctx->pc->pass_seq;
+
+ if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq)
+ pass_build_intervals(ctx, b->out[0]);
+
+ if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq)
+ pass_build_intervals(ctx, b->out[1]);
+
+ return 0;
+}
+
+static INLINE void
+nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set)
+{
+ memset(set, 0, sizeof(*set));
+
+ set->last[NV_FILE_GPR] = 62;
+ set->last[NV_FILE_PRED] = 6;
+ set->last[NV_FILE_COND] = 1;
+
+ set->log2_unit[NV_FILE_GPR] = 2;
+ set->log2_unit[NV_FILE_COND] = 0;
+ set->log2_unit[NV_FILE_PRED] = 0;
+
+ set->pc = pc;
+}
+
+static void
+insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
+{
+ struct nv_value *elem;
+
+ for (elem = list->prev;
+ elem != list && elem->livei->bgn > nval->livei->bgn;
+ elem = elem->prev);
+ /* now elem begins before or at the same time as val */
+
+ nval->prev = elem;
+ nval->next = elem->next;
+ elem->next->prev = nval;
+ elem->next = nval;
+}
+
+static int
+pass_linear_scan(struct nv_pc_pass *ctx, int iter)
+{
+ struct nv_instruction *i;
+ struct register_set f, free;
+ int k, n;
+ struct nv_value *cur, *val, *tmp[2];
+ struct nv_value active, inactive, handled, unhandled;
+
+ make_empty_list(&active);
+ make_empty_list(&inactive);
+ make_empty_list(&handled);
+ make_empty_list(&unhandled);
+
+ nvc0_ctor_register_set(ctx->pc, &free);
+
+ /* joined values should have range = NULL and thus not be added;
+ * also, fixed memory values won't be added because they're not
+ * def'd, just used
+ */
+ for (n = 0; n < ctx->num_insns; ++n) {
+ i = ctx->insns[n];
+
+ for (k = 0; k < 5; ++k) {
+ if (i->def[k] && i->def[k]->livei)
+ insert_ordered_tail(&unhandled, i->def[k]);
+ else
+ if (0 && i->def[k])
+ debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n);
+ }
+ }
+
+ for (val = unhandled.next; val != unhandled.prev; val = val->next) {
+ assert(val->join == val);
+ assert(val->livei->bgn <= val->next->livei->bgn);
+ }
+
+ foreach_s(cur, tmp[0], &unhandled) {
+ remove_from_list(cur);
+
+ foreach_s(val, tmp[1], &active) {
+ if (livei_end(val) <= cur->livei->bgn) {
+ reg_release(&free, val);
+ move_to_head(&handled, val);
+ } else
+ if (!livei_contains(val, cur->livei->bgn)) {
+ reg_release(&free, val);
+ move_to_head(&inactive, val);
+ }
+ }
+
+ foreach_s(val, tmp[1], &inactive) {
+ if (livei_end(val) <= cur->livei->bgn)
+ move_to_head(&handled, val);
+ else
+ if (livei_contains(val, cur->livei->bgn)) {
+ reg_occupy(&free, val);
+ move_to_head(&active, val);
+ }
+ }
+
+ f = free;
+
+ foreach(val, &inactive)
+ if (livei_have_overlap(val, cur))
+ reg_occupy(&f, val);
+
+ foreach(val, &unhandled)
+ if (val->reg.id >= 0 && livei_have_overlap(val, cur))
+ reg_occupy(&f, val);
+
+ if (cur->reg.id < 0) {
+ boolean mem = FALSE;
+ int v = nvi_vector_size(cur->insn);
+
+ if (v > 1)
+ mem = !reg_assign(&f, &cur->insn->def[0], v);
+ else
+ if (iter)
+ mem = !reg_assign(&f, &cur, 1);
+
+ if (mem) {
+ NOUVEAU_ERR("out of registers\n");
+ abort();
+ }
+ }
+ insert_at_head(&active, cur);
+ reg_occupy(&free, cur);
+ }
+
+ return 0;
+}
+
+static int
+nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
+{
+ struct nv_pc_pass *ctx;
+ int i, ret;
+
+ NOUVEAU_DBG("REGISTER ALLOCATION - entering\n");
+
+ ctx = CALLOC_STRUCT(nv_pc_pass);
+ if (!ctx)
+ return -1;
+ ctx->pc = pc;
+
+ ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
+ if (!ctx->insns) {
+ FREE(ctx);
+ return -1;
+ }
+
+ pc->pass_seq++;
+ ret = pass_generate_phi_movs(ctx, root);
+ assert(!ret);
+
+ for (i = 0; i < pc->loop_nesting_bound; ++i) {
+ pc->pass_seq++;
+ ret = pass_build_live_sets(ctx, root);
+ assert(!ret && "live sets");
+ if (ret) {
+ NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
+ goto out;
+ }
+ }
+
+ pc->pass_seq++;
+ nvc0_pc_pass_in_order(root, pass_order_instructions, ctx);
+
+ pc->pass_seq++;
+ ret = pass_build_intervals(ctx, root);
+ assert(!ret && "build intervals");
+ if (ret) {
+ NOUVEAU_ERR("failed to build live intervals\n");
+ goto out;
+ }
+
+#ifdef NVC0_RA_DEBUG_LIVEI
+ for (i = 0; i < pc->num_values; ++i)
+ livei_print(&pc->values[i]);
+#endif
+
+ ret = pass_join_values(ctx, 0);
+ if (ret)
+ goto out;
+ ret = pass_linear_scan(ctx, 0);
+ if (ret)
+ goto out;
+ ret = pass_join_values(ctx, 1);
+ if (ret)
+ goto out;
+ ret = pass_join_values(ctx, 2);
+ if (ret)
+ goto out;
+ ret = pass_linear_scan(ctx, 1);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < pc->num_values; ++i)
+ livei_release(&pc->values[i]);
+
+ NOUVEAU_DBG("REGISTER ALLOCATION - leaving\n");
+
+out:
+ FREE(ctx->insns);
+ FREE(ctx);
+ return ret;
+}
+
+int
+nvc0_pc_exec_pass1(struct nv_pc *pc)
+{
+ int i, ret;
+
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i])))
+ return ret;
+ return 0;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c
new file mode 100644
index 0000000000..57a0874e67
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_program.c
@@ -0,0 +1,678 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_defines.h"
+
+#define NOUVEAU_DEBUG
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "nvc0_context.h"
+#include "nvc0_pc.h"
+
+static unsigned
+nvc0_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c)
+{
+ unsigned mask = inst->Dst[0].Register.WriteMask;
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_SIN:
+ return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
+ case TGSI_OPCODE_DP3:
+ return 0x7;
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_DPH:
+ case TGSI_OPCODE_KIL: /* WriteMask ignored */
+ return 0xf;
+ case TGSI_OPCODE_DST:
+ return mask & (c ? 0xa : 0x6);
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_EXP:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_LOG:
+ case TGSI_OPCODE_POW:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_SCS:
+ return 0x1;
+ case TGSI_OPCODE_IF:
+ return 0x1;
+ case TGSI_OPCODE_LIT:
+ return 0xb;
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXL:
+ case TGSI_OPCODE_TXP:
+ {
+ const struct tgsi_instruction_texture *tex;
+
+ assert(inst->Instruction.Texture);
+ tex = &inst->Texture;
+
+ mask = 0x7;
+ if (inst->Instruction.Opcode != TGSI_OPCODE_TEX &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXD)
+ mask |= 0x8; /* bias, lod or proj */
+
+ switch (tex->Texture) {
+ case TGSI_TEXTURE_1D:
+ mask &= 0x9;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ mask &= 0x5;
+ break;
+ case TGSI_TEXTURE_2D:
+ mask &= 0xb;
+ break;
+ default:
+ break;
+ }
+ }
+ return mask;
+ case TGSI_OPCODE_XPD:
+ {
+ unsigned x = 0;
+ if (mask & 1) x |= 0x6;
+ if (mask & 2) x |= 0x5;
+ if (mask & 4) x |= 0x3;
+ return x;
+ }
+ default:
+ break;
+ }
+
+ return mask;
+}
+
+static void
+nvc0_indirect_inputs(struct nvc0_translation_info *ti, int id)
+{
+ int i, c;
+
+ for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
+ for (c = 0; c < 4; ++c)
+ ti->input_access[i][c] = id;
+
+ ti->indirect_inputs = TRUE;
+}
+
+static void
+nvc0_indirect_outputs(struct nvc0_translation_info *ti, int id)
+{
+ int i, c;
+
+ for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
+ for (c = 0; c < 4; ++c)
+ ti->output_access[i][c] = id;
+
+ ti->indirect_outputs = TRUE;
+}
+
+static INLINE unsigned
+nvc0_system_value_location(unsigned sn, unsigned si)
+{
+ /* NOTE: locations 0xfxx indicate special regs */
+ switch (sn) {
+ /*
+ case TGSI_SEMANTIC_VERTEXID:
+ return 0x2fc;
+ */
+ case TGSI_SEMANTIC_PRIMID:
+ return 0x60;
+ /*
+ case TGSI_SEMANTIC_LAYER_INDEX:
+ return 0x64;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ return 0x68;
+ */
+ case TGSI_SEMANTIC_INSTANCEID:
+ return 0x2f8;
+ case TGSI_SEMANTIC_FACE:
+ return 0x3fc;
+ /*
+ case TGSI_SEMANTIC_INVOCATIONID:
+ return 0xf11;
+ */
+ default:
+ assert(0);
+ return 0x000;
+ }
+}
+
+static INLINE unsigned
+nvc0_varying_location(unsigned sn, unsigned si)
+{
+ switch (sn) {
+ case TGSI_SEMANTIC_POSITION:
+ return 0x70;
+ case TGSI_SEMANTIC_COLOR:
+ return 0x280 + (si * 16); /* are these hard-wired ? */
+ case TGSI_SEMANTIC_BCOLOR:
+ return 0x2a0 + (si * 16);
+ case TGSI_SEMANTIC_FOG:
+ return 0x270;
+ case TGSI_SEMANTIC_PSIZE:
+ return 0x6c;
+ /*
+ case TGSI_SEMANTIC_PNTC:
+ return 0x2e0;
+ */
+ case TGSI_SEMANTIC_GENERIC:
+ assert(si < 31);
+ return 0x80 + (si * 16);
+ case TGSI_SEMANTIC_NORMAL:
+ return 0x360;
+ case TGSI_SEMANTIC_PRIMID:
+ return 0x40;
+ case TGSI_SEMANTIC_FACE:
+ return 0x3fc;
+ /*
+ case TGSI_SEMANTIC_CLIP_DISTANCE:
+ return 0x2c0 + (si * 4);
+ */
+ default:
+ assert(0);
+ return 0x000;
+ }
+}
+
+static INLINE unsigned
+nvc0_interp_mode(const struct tgsi_full_declaration *decl)
+{
+ unsigned mode;
+
+ if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT)
+ mode = NVC0_INTERP_FLAT;
+ else
+ if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
+ mode = NVC0_INTERP_PERSPECTIVE;
+ else
+ mode = NVC0_INTERP_LINEAR;
+
+ if (decl->Declaration.Centroid)
+ mode |= NVC0_INTERP_CENTROID;
+
+ return mode;
+}
+
+static void
+prog_immediate(struct nvc0_translation_info *ti,
+ const struct tgsi_full_immediate *imm)
+{
+ int c;
+ unsigned n = ti->immd32_nr++;
+
+ assert(ti->immd32_nr <= ti->scan.immediate_count);
+
+ for (c = 0; c < 4; ++c)
+ ti->immd32[n * 4 + c] = imm->u[c].Uint;
+
+ ti->immd32_ty[n] = imm->Immediate.DataType;
+}
+
+static boolean
+prog_decl(struct nvc0_translation_info *ti,
+ const struct tgsi_full_declaration *decl)
+{
+ unsigned i, c;
+ unsigned sn = TGSI_SEMANTIC_GENERIC;
+ unsigned si = 0;
+ const unsigned first = decl->Range.First;
+ const unsigned last = decl->Range.Last;
+
+ if (decl->Declaration.Semantic) {
+ sn = decl->Semantic.Name;
+ si = decl->Semantic.Index;
+ }
+
+ switch (decl->Declaration.File) {
+ case TGSI_FILE_INPUT:
+ for (i = first; i <= last; ++i) {
+ if (ti->prog->type == PIPE_SHADER_VERTEX) {
+ sn = TGSI_SEMANTIC_GENERIC;
+ si = i;
+ }
+ for (c = 0; c < 4; ++c)
+ ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
+
+ if (ti->prog->type == PIPE_SHADER_FRAGMENT)
+ ti->interp_mode[i] = nvc0_interp_mode(decl);
+ }
+ break;
+ case TGSI_FILE_OUTPUT:
+ for (i = first; i <= last; ++i, ++si) {
+ if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
+ si = i;
+ if (i == ti->fp_depth_output) {
+ ti->output_loc[i][2] = (ti->scan.num_outputs - 1) * 4;
+ } else {
+ if (i > ti->fp_depth_output)
+ si -= 1;
+ for (c = 0; c < 4; ++c)
+ ti->output_loc[i][c] = si * 4 + c;
+ }
+ } else {
+ for (c = 0; c < 4; ++c)
+ ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
+ }
+ }
+ break;
+ case TGSI_FILE_SYSTEM_VALUE:
+ ti->sysval_loc[i] = nvc0_system_value_location(sn, si);
+ assert(first == last);
+ break;
+ case TGSI_FILE_NULL:
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_ADDRESS:
+ case TGSI_FILE_IMMEDIATE:
+ case TGSI_FILE_PREDICATE:
+ break;
+ default:
+ NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static void
+prog_inst(struct nvc0_translation_info *ti,
+ const struct tgsi_full_instruction *inst, int id)
+{
+ const struct tgsi_dst_register *dst;
+ const struct tgsi_src_register *src;
+ int s, c, k;
+ unsigned mask;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
+ ti->subr[ti->num_subrs].first_insn = id - 1;
+ ti->subr[ti->num_subrs].id = ti->num_subrs + 1; /* id 0 is main program */
+ ++ti->num_subrs;
+ }
+
+ if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
+ dst = &inst->Dst[0].Register;
+
+ for (c = 0; c < 4; ++c) {
+ if (dst->Indirect)
+ nvc0_indirect_outputs(ti, id);
+ if (!(dst->WriteMask & (1 << c)))
+ continue;
+ ti->output_access[dst->Index][c] = id;
+ }
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
+ inst->Src[0].Register.File == TGSI_FILE_INPUT &&
+ dst->Index == ti->edgeflag_out)
+ ti->prog->vp.edgeflag = inst->Src[0].Register.Index;
+ } else
+ if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
+ if (inst->Dst[0].Register.Indirect)
+ ti->require_stores = TRUE;
+ }
+
+ for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
+ src = &inst->Src[s].Register;
+ if (src->File == TGSI_FILE_TEMPORARY)
+ if (inst->Src[s].Register.Indirect)
+ ti->require_stores = TRUE;
+ if (src->File != TGSI_FILE_INPUT)
+ continue;
+ mask = nvc0_tgsi_src_mask(inst, s);
+
+ if (inst->Src[s].Register.Indirect)
+ nvc0_indirect_inputs(ti, id);
+
+ for (c = 0; c < 4; ++c) {
+ if (!(mask & (1 << c)))
+ continue;
+ k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
+ if (k <= TGSI_SWIZZLE_W)
+ ti->input_access[src->Index][k] = id;
+ }
+ }
+}
+
+/* Probably should introduce something like struct tgsi_function_declaration
+ * instead of trying to guess inputs/outputs.
+ */
+static void
+prog_subroutine_inst(struct nvc0_subroutine *subr,
+ const struct tgsi_full_instruction *inst)
+{
+ const struct tgsi_dst_register *dst;
+ const struct tgsi_src_register *src;
+ int s, c, k;
+ unsigned mask;
+
+ for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
+ src = &inst->Src[s].Register;
+ if (src->File != TGSI_FILE_TEMPORARY)
+ continue;
+ mask = nvc0_tgsi_src_mask(inst, s);
+
+ for (c = 0; c < 4; ++c) {
+ k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
+
+ if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W)
+ if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32))))
+ subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32);
+ }
+ }
+
+ if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
+ dst = &inst->Dst[0].Register;
+
+ for (c = 0; c < 4; ++c)
+ if (dst->WriteMask & (1 << c))
+ subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32);
+ }
+}
+
+static int
+nvc0_vp_gp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti)
+{
+ int i, c;
+ unsigned a;
+
+ for (a = 0x80/4, i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
+ for (c = 0; c < 4; ++c, ++a)
+ if (ti->input_access[i][c])
+ vp->hdr[5 + a / 32] |= 1 << (a % 32); /* VP_ATTR_EN */
+ }
+
+ for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
+ a = (ti->output_loc[i][0] - 0x40) / 4;
+ for (c = 0; c < 4; ++c, ++a) {
+ if (!ti->output_access[i][c])
+ continue;
+ vp->hdr[13 + a / 32] |= 1 << (a % 32); /* VP_EXPORT_EN */
+ }
+ }
+
+ return 0;
+}
+
+static int
+nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti)
+{
+ vp->hdr[0] = 0x20461;
+ vp->hdr[4] = 0xff000;
+
+ vp->hdr[18] = (1 << vp->vp.num_ucps) - 1;
+
+ return nvc0_vp_gp_gen_header(vp, ti);
+}
+
+static int
+nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti)
+{
+ unsigned invocations = 1;
+ unsigned max_output_verts, output_prim;
+ unsigned i;
+
+ gp->hdr[0] = 0x21061;
+
+ for (i = 0; i < ti->scan.num_properties; ++i) {
+ switch (ti->scan.properties[i].name) {
+ case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+ output_prim = ti->scan.properties[i].data[0];
+ break;
+ case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+ max_output_verts = ti->scan.properties[i].data[0];
+ assert(max_output_verts < 512);
+ break;
+ /*
+ case TGSI_PROPERTY_GS_INVOCATIONS:
+ invocations = ti->scan.properties[i].data[0];
+ assert(invocations <= 32);
+ break;
+ */
+ default:
+ break;
+ }
+ }
+
+ gp->hdr[2] = MIN2(invocations, 32) << 24;
+
+ switch (output_prim) {
+ case PIPE_PRIM_POINTS:
+ gp->hdr[3] = 0x01000000;
+ gp->hdr[0] |= 0xf0000000;
+ break;
+ case PIPE_PRIM_LINE_STRIP:
+ gp->hdr[3] = 0x06000000;
+ gp->hdr[0] |= 0x10000000;
+ break;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ gp->hdr[3] = 0x07000000;
+ gp->hdr[0] |= 0x10000000;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ gp->hdr[4] = max_output_verts & 0x1ff;
+
+ return nvc0_vp_gp_gen_header(gp, ti);
+}
+
+static int
+nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti)
+{
+ int i, c;
+ unsigned a, m;
+
+ fp->hdr[0] = 0x21462;
+ fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
+
+ if (ti->scan.uses_kill)
+ fp->hdr[0] |= 0x8000;
+ if (ti->scan.writes_z) {
+ fp->hdr[19] |= 0x2;
+ if (ti->scan.num_outputs > 2)
+ fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
+ } else {
+ if (ti->scan.num_outputs > 1)
+ fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
+ }
+
+ for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
+ m = ti->interp_mode[i];
+ for (c = 0; c < 4; ++c) {
+ if (!ti->input_access[i][c])
+ continue;
+ a = ti->input_loc[i][c] / 2;
+ if ((a & ~7) == 0x70/2)
+ fp->hdr[5] |= 1 << (28 + (a & 7) / 2); /* FRAG_COORD_UMASK */
+ else
+ fp->hdr[4 + a / 32] |= m << (a % 32);
+ }
+ }
+
+ for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
+ if (i != ti->fp_depth_output)
+ fp->hdr[18] |= 0xf << ti->output_loc[i][0];
+ }
+
+ return 0;
+}
+
+static boolean
+nvc0_prog_scan(struct nvc0_translation_info *ti)
+{
+ struct nvc0_program *prog = ti->prog;
+ struct tgsi_parse_context parse;
+ int ret;
+ unsigned i;
+
+#ifdef NOUVEAU_DEBUG
+ tgsi_dump(prog->pipe.tokens, 0);
+#endif
+
+ tgsi_scan_shader(prog->pipe.tokens, &ti->scan);
+
+ if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
+ ti->fp_depth_output = 255;
+ for (i = 0; i < ti->scan.num_outputs; ++i)
+ if (ti->scan.output_semantic_name[i] == TGSI_SEMANTIC_POSITION)
+ ti->fp_depth_output = i;
+ }
+
+ ti->subr =
+ CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0]));
+
+ ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
+ ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
+
+ ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0]));
+
+ tgsi_parse_init(&parse, prog->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ prog_immediate(ti, &parse.FullToken.FullImmediate);
+ break;
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ prog_decl(ti, &parse.FullToken.FullDeclaration);
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ ti->insns[ti->num_insns] = parse.FullToken.FullInstruction;
+ prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->num_insns);
+ break;
+ default:
+ break;
+ }
+ }
+
+ for (i = 0; i < ti->num_subrs; ++i) {
+ unsigned pc = ti->subr[i].id;
+ while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB)
+ prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]);
+ }
+
+ switch (prog->type) {
+ case PIPE_SHADER_VERTEX:
+ ti->input_file = NV_FILE_MEM_A;
+ ti->output_file = NV_FILE_MEM_V;
+ ret = nvc0_vp_gen_header(prog, ti);
+ break;
+ /*
+ case PIPE_SHADER_TESSELLATION_CONTROL:
+ ret = nvc0_tcp_gen_header(ti);
+ break;
+ case PIPE_SHADER_TESSELLATION_EVALUATION:
+ ret = nvc0_tep_gen_header(ti);
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ ret = nvc0_gp_gen_header(ti);
+ break;
+ */
+ case PIPE_SHADER_FRAGMENT:
+ ti->input_file = NV_FILE_MEM_V;
+ ti->output_file = NV_FILE_GPR;
+
+ if (ti->scan.writes_z)
+ prog->flags[0] = 0x11; /* ? */
+ else
+ if (!ti->global_stores)
+ prog->fp.early_z = 1;
+
+ ret = nvc0_fp_gen_header(prog, ti);
+ break;
+ default:
+ assert(!"unsupported program type");
+ ret = -1;
+ break;
+ }
+
+ assert(!ret);
+ return ret;
+}
+
+boolean
+nvc0_program_translate(struct nvc0_program *prog)
+{
+ struct nvc0_translation_info *ti;
+ int ret;
+
+ ti = CALLOC_STRUCT(nvc0_translation_info);
+ ti->prog = prog;
+
+ ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS;
+
+ if (prog->type == PIPE_SHADER_VERTEX && prog->vp.num_ucps)
+ ti->append_ucp = TRUE;
+
+ ret = nvc0_prog_scan(ti);
+ if (ret) {
+ NOUVEAU_ERR("unsupported shader program\n");
+ goto out;
+ }
+
+ ret = nvc0_generate_code(ti);
+ if (ret)
+ NOUVEAU_ERR("shader translation failed\n");
+
+ {
+ unsigned i;
+ for (i = 0; i < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++i)
+ debug_printf("HDR[%02lx] = 0x%08x\n",
+ i * sizeof(prog->hdr[0]), prog->hdr[i]);
+ }
+
+out:
+ if (ti->immd32)
+ FREE(ti->immd32);
+ if (ti->immd32_ty)
+ FREE(ti->immd32_ty);
+ if (ti->insns)
+ FREE(ti->insns);
+ if (ti->subr)
+ FREE(ti->subr);
+ FREE(ti);
+ return ret ? FALSE : TRUE;
+}
+
+void
+nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
+{
+ if (prog->res)
+ nouveau_resource_free(&prog->res);
+
+ if (prog->code)
+ FREE(prog->code);
+ if (prog->relocs)
+ FREE(prog->relocs);
+
+ memset(prog->hdr, 0, sizeof(prog->hdr));
+
+ prog->translated = FALSE;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h
new file mode 100644
index 0000000000..2e84caecc9
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_program.h
@@ -0,0 +1,88 @@
+
+#ifndef __NVC0_PROGRAM_H__
+#define __NVC0_PROGRAM_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+#define NVC0_CAP_MAX_PROGRAM_TEMPS 64
+
+#define NVC0_SHADER_HEADER_SIZE (20 * 4)
+
+struct nvc0_program {
+ struct pipe_shader_state pipe;
+
+ ubyte type;
+ boolean translated;
+ ubyte max_gpr;
+
+ uint32_t *code;
+ unsigned code_base;
+ unsigned code_size;
+ unsigned parm_size;
+
+ uint32_t hdr[20];
+
+ uint32_t flags[2];
+
+ struct {
+ uint8_t edgeflag;
+ uint8_t num_ucps;
+ } vp;
+ struct {
+ uint8_t early_z;
+ } fp;
+
+ void *relocs;
+ unsigned num_relocs;
+
+ struct nouveau_resource *res;
+};
+
+/* first 2 bits are written into the program header, for each input */
+#define NVC0_INTERP_FLAT (1 << 0)
+#define NVC0_INTERP_PERSPECTIVE (2 << 0)
+#define NVC0_INTERP_LINEAR (3 << 0)
+#define NVC0_INTERP_CENTROID (1 << 2)
+
+/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */
+struct nvc0_subroutine {
+ unsigned id;
+ unsigned first_insn;
+ uint32_t argv[NVC0_CAP_MAX_PROGRAM_TEMPS][4];
+ uint32_t retv[NVC0_CAP_MAX_PROGRAM_TEMPS][4];
+};
+
+struct nvc0_translation_info {
+ struct nvc0_program *prog;
+ struct tgsi_full_instruction *insns;
+ unsigned num_insns;
+ ubyte input_file;
+ ubyte output_file;
+ ubyte fp_depth_output;
+ uint16_t input_loc[PIPE_MAX_SHADER_INPUTS][4];
+ uint16_t output_loc[PIPE_MAX_SHADER_OUTPUTS][4];
+ uint16_t sysval_loc[TGSI_SEMANTIC_COUNT];
+ int input_access[PIPE_MAX_SHADER_INPUTS][4];
+ int output_access[PIPE_MAX_SHADER_OUTPUTS][4];
+ ubyte interp_mode[PIPE_MAX_SHADER_INPUTS];
+ boolean indirect_inputs;
+ boolean indirect_outputs;
+ boolean require_stores;
+ boolean global_stores;
+ uint32_t *immd32;
+ ubyte *immd32_ty;
+ unsigned immd32_nr;
+ ubyte edgeflag_out;
+ struct nvc0_subroutine *subr;
+ unsigned num_subrs;
+ boolean append_ucp;
+ struct tgsi_shader_info scan;
+};
+
+int nvc0_generate_code(struct nvc0_translation_info *);
+
+void nvc0_relocate_program(struct nvc0_program *,
+ uint32_t code_base, uint32_t data_base);
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c
new file mode 100644
index 0000000000..941be67858
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_push.c
@@ -0,0 +1,279 @@
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+
+#include "nvc0_3d.xml.h"
+
+struct push_context {
+ struct nouveau_channel *chan;
+
+ void *idxbuf;
+
+ float edgeflag;
+ int edgeflag_attr;
+
+ uint32_t vertex_words;
+ uint32_t packet_vertex_limit;
+
+ struct translate *translate;
+
+ boolean primitive_restart;
+ uint32_t prim;
+ uint32_t restart_index;
+};
+
+static INLINE unsigned
+prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static void
+emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint8_t *elts = (uint8_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i08(elts, push, ctx->restart_index);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->chan->cur);
+
+ ctx->chan->cur += size;
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2);
+ OUT_RING (ctx->chan, 0);
+ OUT_RING (ctx->chan, ctx->prim);
+ }
+ }
+}
+
+static void
+emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint16_t *elts = (uint16_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i16(elts, push, ctx->restart_index);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->chan->cur);
+
+ ctx->chan->cur += size;
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2);
+ OUT_RING (ctx->chan, 0);
+ OUT_RING (ctx->chan, ctx->prim);
+ }
+ }
+}
+
+static void
+emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint32_t *elts = (uint32_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i32(elts, push, ctx->restart_index);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->chan->cur);
+
+ ctx->chan->cur += size;
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2);
+ OUT_RING (ctx->chan, 0);
+ OUT_RING (ctx->chan, ctx->prim);
+ }
+ }
+}
+
+static void
+emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
+{
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size = ctx->vertex_words * push;
+
+ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
+
+ ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur);
+ ctx->chan->cur += size;
+ count -= push;
+ start += push;
+ }
+}
+
+
+#define NVC0_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nvc0_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NVC0_PRIM_GL_CASE(POINTS);
+ NVC0_PRIM_GL_CASE(LINES);
+ NVC0_PRIM_GL_CASE(LINE_LOOP);
+ NVC0_PRIM_GL_CASE(LINE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLES);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
+ NVC0_PRIM_GL_CASE(QUADS);
+ NVC0_PRIM_GL_CASE(QUAD_STRIP);
+ NVC0_PRIM_GL_CASE(POLYGON);
+ NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ /*
+ NVC0_PRIM_GL_CASE(PATCHES); */
+ default:
+ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ break;
+ }
+}
+
+void
+nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
+{
+ struct push_context ctx;
+ unsigned i, index_size;
+ unsigned inst = info->instance_count;
+
+ ctx.chan = nvc0->screen->base.channel;
+ ctx.translate = nvc0->vertex->translate;
+ ctx.packet_vertex_limit = nvc0->vertex->vtx_per_packet_max;
+ ctx.vertex_words = nvc0->vertex->vtx_size;
+
+ for (i = 0; i < nvc0->num_vtxbufs; ++i) {
+ uint8_t *data;
+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i];
+ struct nvc0_resource *res = nvc0_resource(vb->buffer);
+
+ data = nvc0_resource_map_offset(nvc0, res,
+ vb->buffer_offset, NOUVEAU_BO_RD);
+ if (info->indexed)
+ data += info->index_bias * vb->stride;
+
+ ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
+ }
+
+ if (info->indexed) {
+ ctx.idxbuf = nvc0_resource_map_offset(nvc0,
+ nvc0_resource(nvc0->idxbuf.buffer),
+ nvc0->idxbuf.offset, NOUVEAU_BO_RD);
+ if (!ctx.idxbuf)
+ return;
+ index_size = nvc0->idxbuf.index_size;
+ ctx.primitive_restart = info->primitive_restart;
+ ctx.restart_index = info->restart_index;
+ } else {
+ ctx.idxbuf = NULL;
+ index_size = 0;
+ ctx.primitive_restart = FALSE;
+ ctx.restart_index = 0;
+ }
+
+ ctx.prim = nvc0_prim_gl(info->mode);
+
+ while (inst--) {
+ BEGIN_RING(ctx.chan, RING_3D(VERTEX_BEGIN_GL), 1);
+ OUT_RING (ctx.chan, ctx.prim);
+ switch (index_size) {
+ case 0:
+ emit_vertices_seq(&ctx, info->start, info->count);
+ break;
+ case 1:
+ emit_vertices_i08(&ctx, info->start, info->count);
+ break;
+ case 2:
+ emit_vertices_i16(&ctx, info->start, info->count);
+ break;
+ case 4:
+ emit_vertices_i32(&ctx, info->start, info->count);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ IMMED_RING(ctx.chan, RING_3D(VERTEX_END_GL), 0);
+
+ ctx.prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+
+ if (info->indexed)
+ nvc0_resource_unmap(nvc0_resource(nvc0->idxbuf.buffer));
+
+ for (i = 0; i < nvc0->num_vtxbufs; ++i)
+ nvc0_resource_unmap(nvc0_resource(nvc0->vtxbuf[i].buffer));
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_push2.c b/src/gallium/drivers/nvc0/nvc0_push2.c
new file mode 100644
index 0000000000..6f51600558
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_push2.c
@@ -0,0 +1,333 @@
+
+#if 0 /* not used, kept for now to compare with util/translate */
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+
+#include "nvc0_3d.xml.h"
+
+struct push_context {
+ struct nvc0_context *nvc0;
+
+ uint vertex_size;
+
+ void *idxbuf;
+ uint idxsize;
+
+ float edgeflag;
+ int edgeflag_input;
+
+ struct {
+ void *map;
+ void (*push)(struct nouveau_channel *, void *);
+ uint32_t stride;
+ uint32_t divisor;
+ uint32_t step;
+ } attr[32];
+ int num_attrs;
+};
+
+static void
+emit_b32_1(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b32_2(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+ OUT_RING(chan, v[1]);
+}
+
+static void
+emit_b32_3(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+ OUT_RING(chan, v[1]);
+ OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b32_4(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+ OUT_RING(chan, v[1]);
+ OUT_RING(chan, v[2]);
+ OUT_RING(chan, v[3]);
+}
+
+static void
+emit_b16_1(struct nouveau_channel *chan, void *data)
+{
+ uint16_t *v = data;
+
+ OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b16_3(struct nouveau_channel *chan, void *data)
+{
+ uint16_t *v = data;
+
+ OUT_RING(chan, (v[1] << 16) | v[0]);
+ OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b08_1(struct nouveau_channel *chan, void *data)
+{
+ uint8_t *v = data;
+
+ OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b08_3(struct nouveau_channel *chan, void *data)
+{
+ uint8_t *v = data;
+
+ OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
+}
+
+static void
+emit_b64_1(struct nouveau_channel *chan, void *data)
+{
+ double *v = data;
+
+ OUT_RINGf(chan, v[0]);
+}
+
+static void
+emit_b64_2(struct nouveau_channel *chan, void *data)
+{
+ double *v = data;
+
+ OUT_RINGf(chan, v[0]);
+ OUT_RINGf(chan, v[1]);
+}
+
+static void
+emit_b64_3(struct nouveau_channel *chan, void *data)
+{
+ double *v = data;
+
+ OUT_RINGf(chan, v[0]);
+ OUT_RINGf(chan, v[1]);
+ OUT_RINGf(chan, v[2]);
+}
+
+static void
+emit_b64_4(struct nouveau_channel *chan, void *data)
+{
+ double *v = data;
+
+ OUT_RINGf(chan, v[0]);
+ OUT_RINGf(chan, v[1]);
+ OUT_RINGf(chan, v[2]);
+ OUT_RINGf(chan, v[3]);
+}
+
+static INLINE void
+emit_vertex(struct push_context *ctx, unsigned n)
+{
+ struct nouveau_channel *chan = ctx->nvc0->screen->base.channel;
+ int i;
+
+ if (ctx->edgeflag_input < 32) {
+ /* TODO */
+ }
+
+ BEGIN_RING_NI(chan, RING_3D(VERTEX_DATA), ctx->vertex_size);
+ for (i = 0; i < ctx->num_attrs; ++i)
+ ctx->attr[i].push(chan,
+ (uint8_t *)ctx->attr[i].map + n * ctx->attr[i].stride);
+}
+
+static void
+emit_edgeflag(struct push_context *ctx, boolean enabled)
+{
+ struct nouveau_channel *chan = ctx->nvc0->screen->base.channel;
+
+ IMMED_RING(chan, RING_3D(EDGEFLAG_ENABLE), enabled);
+}
+
+static void
+emit_elt08(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint8_t *idxbuf = ctx->idxbuf;
+
+ while (count--)
+ emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_elt16(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint16_t *idxbuf = ctx->idxbuf;
+
+ while (count--)
+ emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_elt32(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint32_t *idxbuf = ctx->idxbuf;
+
+ while (count--)
+ emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_seq(struct push_context *ctx, unsigned start, unsigned count)
+{
+ while (count--)
+ emit_vertex(ctx, start++);
+}
+
+#define NVC0_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nvc0_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NVC0_PRIM_GL_CASE(POINTS);
+ NVC0_PRIM_GL_CASE(LINES);
+ NVC0_PRIM_GL_CASE(LINE_LOOP);
+ NVC0_PRIM_GL_CASE(LINE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLES);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
+ NVC0_PRIM_GL_CASE(QUADS);
+ NVC0_PRIM_GL_CASE(QUAD_STRIP);
+ NVC0_PRIM_GL_CASE(POLYGON);
+ NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ /*
+ NVC0_PRIM_GL_CASE(PATCHES); */
+ default:
+ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ break;
+ }
+}
+
+void
+nvc0_push_vbo2(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
+{
+ struct push_context ctx;
+ unsigned i, n;
+ unsigned inst = info->instance_count;
+ unsigned prim = nvc0_prim_gl(info->mode);
+
+ ctx.nvc0 = nvc0;
+ ctx.vertex_size = nvc0->vertex->vtx_size;
+ ctx.idxbuf = NULL;
+ ctx.num_attrs = 0;
+ ctx.edgeflag = 0.5f;
+ ctx.edgeflag_input = 32;
+
+ for (i = 0; i < nvc0->vertex->num_elements; ++i) {
+ struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe;
+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index];
+ struct nouveau_bo *bo = nvc0_resource(vb->buffer)->bo;
+ unsigned nr_components;
+
+ if (!(nvc0->vbo_fifo & (1 << i)))
+ continue;
+ n = ctx.num_attrs++;
+
+ if (nouveau_bo_map(bo, NOUVEAU_BO_RD))
+ return;
+ ctx.attr[n].map = (uint8_t *)bo->map + vb->buffer_offset + ve->src_offset;
+
+ nouveau_bo_unmap(bo);
+
+ ctx.attr[n].stride = vb->stride;
+ ctx.attr[n].divisor = ve->instance_divisor;
+
+ nr_components = util_format_get_nr_components(ve->src_format);
+ switch (util_format_get_component_bits(ve->src_format,
+ UTIL_FORMAT_COLORSPACE_RGB, 0)) {
+ case 8:
+ switch (nr_components) {
+ case 1: ctx.attr[n].push = emit_b08_1; break;
+ case 2: ctx.attr[n].push = emit_b16_1; break;
+ case 3: ctx.attr[n].push = emit_b08_3; break;
+ case 4: ctx.attr[n].push = emit_b32_1; break;
+ }
+ break;
+ case 16:
+ switch (nr_components) {
+ case 1: ctx.attr[n].push = emit_b16_1; break;
+ case 2: ctx.attr[n].push = emit_b32_1; break;
+ case 3: ctx.attr[n].push = emit_b16_3; break;
+ case 4: ctx.attr[n].push = emit_b32_2; break;
+ }
+ break;
+ case 32:
+ switch (nr_components) {
+ case 1: ctx.attr[n].push = emit_b32_1; break;
+ case 2: ctx.attr[n].push = emit_b32_2; break;
+ case 3: ctx.attr[n].push = emit_b32_3; break;
+ case 4: ctx.attr[n].push = emit_b32_4; break;
+ }
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ if (info->indexed) {
+ struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer);
+ if (!res || nouveau_bo_map(res->bo, NOUVEAU_BO_RD))
+ return;
+ ctx.idxbuf = (uint8_t *)res->bo->map + nvc0->idxbuf.offset + res->offset;
+ nouveau_bo_unmap(res->bo);
+ ctx.idxsize = nvc0->idxbuf.index_size;
+ } else {
+ ctx.idxsize = 0;
+ }
+
+ while (inst--) {
+ BEGIN_RING(nvc0->screen->base.channel, RING_3D(VERTEX_BEGIN_GL), 1);
+ OUT_RING (nvc0->screen->base.channel, prim);
+ switch (ctx.idxsize) {
+ case 0:
+ emit_seq(&ctx, info->start, info->count);
+ break;
+ case 1:
+ emit_elt08(&ctx, info->start, info->count);
+ break;
+ case 2:
+ emit_elt16(&ctx, info->start, info->count);
+ break;
+ case 4:
+ emit_elt32(&ctx, info->start, info->count);
+ break;
+ }
+ IMMED_RING(nvc0->screen->base.channel, RING_3D(VERTEX_END_GL), 0);
+
+ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+}
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c
new file mode 100644
index 0000000000..cc83fbe771
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_query.c
@@ -0,0 +1,337 @@
+/*
+ * Copyright 2011 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Christoph Bumiller
+ */
+
+#include "nvc0_context.h"
+#include "nouveau/nv_object.xml.h"
+
+/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
+ * (since we use only a single GPU channel per screen) will not work properly.
+ *
+ * The first is not that big of an issue because OpenGL does not allow nested
+ * queries anyway.
+ */
+
+struct nvc0_query {
+ uint32_t *data;
+ uint32_t type;
+ uint32_t sequence;
+ struct nouveau_bo *bo;
+ uint32_t base;
+ uint32_t offset; /* base + i * 16 */
+ boolean ready;
+ boolean is64bit;
+ struct nvc0_mm_allocation *mm;
+};
+
+#define NVC0_QUERY_ALLOC_SPACE 128
+
+static INLINE struct nvc0_query *
+nvc0_query(struct pipe_query *pipe)
+{
+ return (struct nvc0_query *)pipe;
+}
+
+static boolean
+nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ int ret;
+
+ if (q->bo) {
+ nouveau_bo_ref(NULL, &q->bo);
+ if (q->mm) {
+ if (q->ready)
+ nvc0_mm_free(q->mm);
+ else
+ nvc0_fence_sched_release(screen->fence.current, q->mm);
+ }
+ }
+ if (size) {
+ q->mm = nvc0_mm_allocate(screen->mm_GART, size, &q->bo, &q->base);
+ if (!q->bo)
+ return FALSE;
+ q->offset = q->base;
+
+ ret = nouveau_bo_map_range(q->bo, q->base, size, NOUVEAU_BO_RD |
+ NOUVEAU_BO_NOSYNC);
+ if (ret) {
+ nvc0_query_allocate(nvc0, q, 0);
+ return FALSE;
+ }
+ q->data = q->bo->map;
+ nouveau_bo_unmap(q->bo);
+ }
+ return TRUE;
+}
+
+static void
+nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0);
+ FREE(nvc0_query(pq));
+}
+
+static struct pipe_query *
+nvc0_query_create(struct pipe_context *pipe, unsigned type)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nvc0_query *q;
+
+ q = CALLOC_STRUCT(nvc0_query);
+ if (!q)
+ return NULL;
+
+ if (!nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE)) {
+ FREE(q);
+ return NULL;
+ }
+
+ q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
+ type == PIPE_QUERY_PRIMITIVES_EMITTED ||
+ type == PIPE_QUERY_SO_STATISTICS);
+ q->type = type;
+
+ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
+ q->offset -= 16;
+ q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */
+ }
+
+ return (struct pipe_query *)q;
+}
+
+static void
+nvc0_query_get(struct nouveau_channel *chan, struct nvc0_query *q,
+ unsigned offset, uint32_t get)
+{
+ offset += q->offset;
+
+ MARK_RING (chan, 5, 2);
+ BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);
+ OUT_RELOCh(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ OUT_RING (chan, q->sequence);
+ OUT_RING (chan, get);
+}
+
+static void
+nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_query *q = nvc0_query(pq);
+
+ /* For occlusion queries we have to change the storage, because a previous
+ * query might set the initial render conition to FALSE even *after* we re-
+ * initialized it to TRUE.
+ */
+ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
+ q->offset += 16;
+ q->data += 16 / sizeof(*q->data);
+ if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
+ nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
+
+ /* XXX: can we do this with the GPU, and sync with respect to a previous
+ * query ?
+ */
+ q->data[1] = 1; /* initial render condition = TRUE */
+ }
+ if (!q->is64bit)
+ q->data[0] = q->sequence++; /* the previously used one */
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ IMMED_RING(chan, RING_3D(COUNTER_RESET), NVC0_3D_COUNTER_RESET_SAMPLECNT);
+ IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */
+ IMMED_RING(chan, RING_3D(COUNTER_RESET),
+ NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ IMMED_RING(chan, RING_3D(COUNTER_RESET),
+ NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ BEGIN_RING_NI(chan, RING_3D(COUNTER_RESET), 2);
+ OUT_RING (chan, NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
+ OUT_RING (chan, NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ case PIPE_QUERY_TIME_ELAPSED:
+ nvc0_query_get(chan, q, 0x10, 0x00005002);
+ break;
+ default:
+ break;
+ }
+ q->ready = FALSE;
+}
+
+static void
+nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_query *q = nvc0_query(pq);
+
+ const int index = 0; /* for multiple vertex streams */
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ nvc0_query_get(chan, q, 0, 0x0100f002);
+ BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
+ OUT_RING (chan, 0);
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nvc0_query_get(chan, q, 0, 0x09005002 | (index << 5));
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5));
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5));
+ nvc0_query_get(chan, q, 0x10, 0x09005002 | (index << 5));
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ case PIPE_QUERY_TIME_ELAPSED:
+ nvc0_query_get(chan, q, 0, 0x00005002);
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ nvc0_query_get(chan, q, 0, 0x1000f010);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static INLINE boolean
+nvc0_query_ready(struct nvc0_query *q)
+{
+ return q->ready || (!q->is64bit && (q->data[0] == q->sequence));
+}
+
+static INLINE boolean
+nvc0_query_wait(struct nvc0_query *q)
+{
+ int ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD);
+ if (ret)
+ return FALSE;
+ nouveau_bo_unmap(q->bo);
+ return TRUE;
+}
+
+static boolean
+nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, void *result)
+{
+ struct nvc0_query *q = nvc0_query(pq);
+ uint64_t *res64 = result;
+ uint32_t *res32 = result;
+ boolean *res8 = result;
+ uint64_t *data64 = (uint64_t *)q->data;
+
+ if (q->type == PIPE_QUERY_GPU_FINISHED) {
+ res8[0] = nvc0_query_ready(q);
+ return TRUE;
+ }
+
+ if (!q->ready) /* update ? */
+ q->ready = nvc0_query_ready(q);
+ if (!q->ready) {
+ struct nouveau_channel *chan = nvc0_context(pipe)->screen->base.channel;
+ if (!wait) {
+ if (nouveau_bo_pending(q->bo) & NOUVEAU_BO_WR) /* for daft apps */
+ FIRE_RING(chan);
+ return FALSE;
+ }
+ if (!nvc0_query_wait(q))
+ return FALSE;
+ }
+ q->ready = TRUE;
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
+ res32[0] = q->data[1];
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
+ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
+ res64[0] = data64[0];
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ res64[0] = data64[0];
+ res64[1] = data64[1];
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
+ res64[0] = 1000000000;
+ res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ res64[0] = data64[1] - data64[3];
+ break;
+ default:
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static void
+nvc0_render_condition(struct pipe_context *pipe,
+ struct pipe_query *pq, uint mode)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_query *q;
+
+ if (!pq) {
+ IMMED_RING(chan, RING_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+ return;
+ }
+ q = nvc0_query(pq);
+
+ if (mode == PIPE_RENDER_COND_WAIT ||
+ mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
+ BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4);
+ OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RING (chan, q->sequence);
+ OUT_RING (chan, 0x00001001);
+ }
+
+ BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RING (chan, NVC0_3D_COND_MODE_RES_NON_ZERO);
+}
+
+void
+nvc0_init_query_functions(struct nvc0_context *nvc0)
+{
+ nvc0->pipe.create_query = nvc0_query_create;
+ nvc0->pipe.destroy_query = nvc0_query_destroy;
+ nvc0->pipe.begin_query = nvc0_query_begin;
+ nvc0->pipe.end_query = nvc0_query_end;
+ nvc0->pipe.get_query_result = nvc0_query_result;
+ nvc0->pipe.render_condition = nvc0_render_condition;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_resource.c b/src/gallium/drivers/nvc0/nvc0_resource.c
new file mode 100644
index 0000000000..7e42cedd16
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_resource.c
@@ -0,0 +1,71 @@
+
+#include "pipe/p_context.h"
+#include "nvc0_resource.h"
+#include "nouveau/nouveau_screen.h"
+
+static unsigned
+nvc0_resource_is_referenced(struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned face, int layer)
+{
+ struct nvc0_resource *res = nvc0_resource(resource);
+ unsigned flags = 0;
+
+#ifdef NOUVEAU_USERSPACE_MM
+ flags = res->status;
+#else
+ unsigned bo_flags = nouveau_bo_pending(res->bo);
+ if (bo_flags & NOUVEAU_BO_RD)
+ flags = PIPE_REFERENCED_FOR_READ;
+ if (bo_flags & NOUVEAU_BO_WR)
+ flags |= PIPE_REFERENCED_FOR_WRITE;
+#endif
+ return flags;
+}
+
+static struct pipe_resource *
+nvc0_resource_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ switch (templ->target) {
+ case PIPE_BUFFER:
+ return nvc0_buffer_create(screen, templ);
+ default:
+ return nvc0_miptree_create(screen, templ);
+ }
+}
+
+static struct pipe_resource *
+nvc0_resource_from_handle(struct pipe_screen * screen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle)
+{
+ if (templ->target == PIPE_BUFFER)
+ return NULL;
+ else
+ return nvc0_miptree_from_handle(screen, templ, whandle);
+}
+
+void
+nvc0_init_resource_functions(struct pipe_context *pcontext)
+{
+ pcontext->get_transfer = u_get_transfer_vtbl;
+ pcontext->transfer_map = u_transfer_map_vtbl;
+ pcontext->transfer_flush_region = u_transfer_flush_region_vtbl;
+ pcontext->transfer_unmap = u_transfer_unmap_vtbl;
+ pcontext->transfer_destroy = u_transfer_destroy_vtbl;
+ pcontext->transfer_inline_write = u_transfer_inline_write_vtbl;
+ pcontext->is_resource_referenced = nvc0_resource_is_referenced;
+ pcontext->create_surface = nvc0_miptree_surface_new;
+ pcontext->surface_destroy = nvc0_miptree_surface_del;
+}
+
+void
+nvc0_screen_init_resource_functions(struct pipe_screen *pscreen)
+{
+ pscreen->resource_create = nvc0_resource_create;
+ pscreen->resource_from_handle = nvc0_resource_from_handle;
+ pscreen->resource_get_handle = u_resource_get_handle_vtbl;
+ pscreen->resource_destroy = u_resource_destroy_vtbl;
+ pscreen->user_buffer_create = nvc0_user_buffer_create;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h
new file mode 100644
index 0000000000..17e79642a6
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_resource.h
@@ -0,0 +1,201 @@
+
+#ifndef __NVC0_RESOURCE_H__
+#define __NVC0_RESOURCE_H__
+
+#include "util/u_transfer.h"
+#include "util/u_double_list.h"
+#define NOUVEAU_NVC0
+#include "nouveau/nouveau_winsys.h"
+#undef NOUVEAU_NVC0
+
+#include "nvc0_fence.h"
+
+struct pipe_resource;
+struct nouveau_bo;
+struct nvc0_context;
+
+#define NVC0_BUFFER_SCORE_MIN -25000
+#define NVC0_BUFFER_SCORE_MAX 25000
+#define NVC0_BUFFER_SCORE_VRAM_THRESHOLD 20000
+
+/* DIRTY: buffer was (or will be after the next flush) written to by GPU and
+ * resource->data has not been updated to reflect modified VRAM contents
+ *
+ * USER_MEMORY: resource->data is a pointer to client memory and may change
+ * between GL calls
+ */
+#define NVC0_BUFFER_STATUS_DIRTY (1 << 0)
+#define NVC0_BUFFER_STATUS_USER_MEMORY (1 << 7)
+
+/* Resources, if mapped into the GPU's address space, are guaranteed to
+ * have constant virtual addresses.
+ * The address of a resource will lie within the nouveau_bo referenced,
+ * and this bo should be added to the memory manager's validation list.
+ */
+struct nvc0_resource {
+ struct pipe_resource base;
+ const struct u_resource_vtbl *vtbl;
+
+ uint8_t *data;
+ struct nouveau_bo *bo;
+ uint32_t offset;
+
+ uint8_t status;
+ uint8_t domain;
+
+ int16_t score; /* low if mapped very often, if high can move to VRAM */
+
+ struct nvc0_fence *fence;
+ struct nvc0_fence *fence_wr;
+
+ struct nvc0_mm_allocation *mm;
+};
+
+boolean
+nvc0_buffer_download(struct nvc0_context *, struct nvc0_resource *,
+ unsigned start, unsigned size);
+
+boolean
+nvc0_buffer_migrate(struct nvc0_context *,
+ struct nvc0_resource *, unsigned domain);
+
+static INLINE void
+nvc0_buffer_adjust_score(struct nvc0_context *nvc0, struct nvc0_resource *res,
+ int16_t score)
+{
+ if (score < 0) {
+ if (res->score > NVC0_BUFFER_SCORE_MIN)
+ res->score += score;
+ } else
+ if (score > 0){
+ if (res->score < NVC0_BUFFER_SCORE_MAX)
+ res->score += score;
+ if (res->domain == NOUVEAU_BO_GART &&
+ res->score > NVC0_BUFFER_SCORE_VRAM_THRESHOLD)
+ nvc0_buffer_migrate(nvc0, res, NOUVEAU_BO_VRAM);
+ }
+}
+
+/* XXX: wait for fence (atm only using this for vertex push) */
+static INLINE void *
+nvc0_resource_map_offset(struct nvc0_context *nvc0,
+ struct nvc0_resource *res, uint32_t offset,
+ uint32_t flags)
+{
+ void *map;
+
+ nvc0_buffer_adjust_score(nvc0, res, -250);
+
+ if ((res->domain == NOUVEAU_BO_VRAM) &&
+ (res->status & NVC0_BUFFER_STATUS_DIRTY))
+ nvc0_buffer_download(nvc0, res, 0, res->base.width0);
+
+ if ((res->domain != NOUVEAU_BO_GART) ||
+ (res->status & NVC0_BUFFER_STATUS_USER_MEMORY))
+ return res->data + offset;
+
+ if (res->mm)
+ flags |= NOUVEAU_BO_NOSYNC;
+
+ if (nouveau_bo_map_range(res->bo, res->offset + offset,
+ res->base.width0, flags))
+ return NULL;
+
+ map = res->bo->map;
+ nouveau_bo_unmap(res->bo);
+ return map;
+}
+
+static INLINE void
+nvc0_resource_unmap(struct nvc0_resource *res)
+{
+ /* no-op */
+}
+
+#define NVC0_TILE_DIM_SHIFT(m, d) (((m) >> (d * 4)) & 0xf)
+
+#define NVC0_TILE_PITCH(m) (64 << NVC0_TILE_DIM_SHIFT(m, 0))
+#define NVC0_TILE_HEIGHT(m) ( 8 << NVC0_TILE_DIM_SHIFT(m, 1))
+#define NVC0_TILE_DEPTH(m) ( 1 << NVC0_TILE_DIM_SHIFT(m, 2))
+
+#define NVC0_TILE_SIZE_2D(m) (((64 * 8) << \
+ NVC0_TILE_DIM_SHIFT(m, 0)) << \
+ NVC0_TILE_DIM_SHIFT(m, 1))
+
+#define NVC0_TILE_SIZE(m) (NVC0_TILE_SIZE_2D(m) << NVC0_TILE_DIM_SHIFT(m, 2))
+
+struct nvc0_miptree_level {
+ uint32_t offset;
+ uint32_t pitch;
+ uint32_t tile_mode;
+};
+
+#define NVC0_MAX_TEXTURE_LEVELS 16
+
+struct nvc0_miptree {
+ struct nvc0_resource base;
+ struct nvc0_miptree_level level[NVC0_MAX_TEXTURE_LEVELS];
+ uint32_t total_size;
+ uint32_t layer_stride;
+ boolean layout_3d; /* TRUE if layer count varies with mip level */
+};
+
+static INLINE struct nvc0_miptree *
+nvc0_miptree(struct pipe_resource *pt)
+{
+ return (struct nvc0_miptree *)pt;
+}
+
+static INLINE struct nvc0_resource *
+nvc0_resource(struct pipe_resource *resource)
+{
+ return (struct nvc0_resource *)resource;
+}
+
+/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */
+static INLINE boolean
+nvc0_resource_mapped_by_gpu(struct pipe_resource *resource)
+{
+ return nvc0_resource(resource)->domain != 0;
+}
+
+void
+nvc0_init_resource_functions(struct pipe_context *pcontext);
+
+void
+nvc0_screen_init_resource_functions(struct pipe_screen *pscreen);
+
+/* Internal functions:
+ */
+struct pipe_resource *
+nvc0_miptree_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmp);
+
+struct pipe_resource *
+nvc0_miptree_from_handle(struct pipe_screen *pscreen,
+ const struct pipe_resource *template,
+ struct winsys_handle *whandle);
+
+struct pipe_resource *
+nvc0_buffer_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ);
+
+struct pipe_resource *
+nvc0_user_buffer_create(struct pipe_screen *screen,
+ void *ptr,
+ unsigned bytes,
+ unsigned usage);
+
+
+struct pipe_surface *
+nvc0_miptree_surface_new(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_surface *templ);
+
+void
+nvc0_miptree_surface_del(struct pipe_context *, struct pipe_surface *);
+
+boolean
+nvc0_user_buffer_upload(struct nvc0_resource *, unsigned base, unsigned size);
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
new file mode 100644
index 0000000000..54eec660b2
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -0,0 +1,669 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "util/u_format_s3tc.h"
+#include "pipe/p_screen.h"
+
+#include "nvc0_fence.h"
+#include "nvc0_context.h"
+#include "nvc0_screen.h"
+
+#include "nouveau/nv_object.xml.h"
+#include "nvc0_graph_macros.h"
+
+static boolean
+nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned bindings, unsigned geom_flags)
+{
+ if (sample_count > 1)
+ return FALSE;
+
+ if (!util_format_s3tc_enabled) {
+ switch (format) {
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT5_RGBA:
+ return FALSE;
+ default:
+ break;
+ }
+ }
+
+ /* transfers & shared are always supported */
+ bindings &= ~(PIPE_BIND_TRANSFER_READ |
+ PIPE_BIND_TRANSFER_WRITE |
+ PIPE_BIND_SHARED);
+
+ return (nvc0_format_table[format].usage & bindings) == bindings;
+}
+
+static int
+nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 32;
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return 64;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 13;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 13;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ case PIPE_CAP_NPOT_TEXTURES:
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ case PIPE_CAP_DEPTH_CLAMP:
+ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_GLSL:
+ case PIPE_CAP_SM3:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 8;
+ case PIPE_CAP_TIMER_QUERY:
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_STREAM_OUTPUT:
+ return 0;
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ return 0;
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ return 0;
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ return 1;
+ default:
+ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static int
+nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
+ enum pipe_shader_cap param)
+{
+ switch (shader) {
+ case PIPE_SHADER_VERTEX:
+ /*
+ case PIPE_SHADER_TESSELLATION_CONTROL:
+ case PIPE_SHADER_TESSELLATION_EVALUATION:
+ */
+ case PIPE_SHADER_GEOMETRY:
+ case PIPE_SHADER_FRAGMENT:
+ break;
+ default:
+ return 0;
+ }
+
+ switch (param) {
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return 16384;
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return 4;
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ if (shader == PIPE_SHADER_VERTEX)
+ return 32;
+ return 0x300 / 16;
+ case PIPE_SHADER_CAP_MAX_CONSTS:
+ return 65536 / 16;
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ return 14;
+ case PIPE_SHADER_CAP_MAX_ADDRS:
+ return 1;
+ case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+ return shader != PIPE_SHADER_FRAGMENT;
+ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+ return 1;
+ case PIPE_SHADER_CAP_MAX_PREDS:
+ return 0;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return NVC0_CAP_MAX_PROGRAM_TEMPS;
+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+ return 1;
+ case PIPE_SHADER_CAP_SUBROUTINES:
+ return 0; /* please inline, or provide function declarations */
+ default:
+ NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0f;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0f;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 16.0f;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 4.0f;
+ default:
+ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
+ return 0.0f;
+ }
+}
+
+static void
+nvc0_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+
+ nvc0_fence_wait(screen->fence.current);
+ nvc0_fence_reference(&screen->fence.current, NULL);
+
+ nouveau_bo_ref(NULL, &screen->text);
+ nouveau_bo_ref(NULL, &screen->tls);
+ nouveau_bo_ref(NULL, &screen->txc);
+ nouveau_bo_ref(NULL, &screen->fence.bo);
+ nouveau_bo_ref(NULL, &screen->mp_stack_bo);
+
+ nouveau_resource_destroy(&screen->text_heap);
+
+ if (screen->tic.entries)
+ FREE(screen->tic.entries);
+
+ nvc0_mm_destroy(screen->mm_GART);
+ nvc0_mm_destroy(screen->mm_VRAM);
+ nvc0_mm_destroy(screen->mm_VRAM_fe0);
+
+ nouveau_grobj_free(&screen->fermi);
+ nouveau_grobj_free(&screen->eng2d);
+ nouveau_grobj_free(&screen->m2mf);
+
+ nouveau_screen_fini(&screen->base);
+
+ FREE(screen);
+}
+
+static int
+nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
+ unsigned size, const uint32_t *data)
+{
+ struct nouveau_channel *chan = screen->base.channel;
+
+ size /= 4;
+
+ BEGIN_RING(chan, RING_3D_(NVC0_GRAPH_MACRO_ID), 2);
+ OUT_RING (chan, (m - 0x3800) / 8);
+ OUT_RING (chan, pos);
+ BEGIN_RING_1I(chan, RING_3D_(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1);
+ OUT_RING (chan, pos);
+ OUT_RINGp (chan, data, size);
+
+ return pos + size;
+}
+
+static void
+nvc0_screen_fence_reference(struct pipe_screen *pscreen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+ nvc0_fence_reference((struct nvc0_fence **)ptr, nvc0_fence(fence));
+}
+
+static int
+nvc0_screen_fence_signalled(struct pipe_screen *pscreen,
+ struct pipe_fence_handle *fence,
+ unsigned flags)
+{
+ return !(nvc0_fence_signalled(nvc0_fence(fence)));
+}
+
+static int
+nvc0_screen_fence_finish(struct pipe_screen *pscreen,
+ struct pipe_fence_handle *fence,
+ unsigned flags)
+{
+ return nvc0_fence_wait((struct nvc0_fence *)fence) != TRUE;
+}
+
+static void
+nvc0_magic_3d_init(struct nouveau_channel *chan)
+{
+ BEGIN_RING(chan, RING_3D_(0x10cc), 1);
+ OUT_RING (chan, 0xff);
+ BEGIN_RING(chan, RING_3D_(0x10e0), 2);
+ OUT_RING(chan, 0xff);
+ OUT_RING(chan, 0xff);
+ BEGIN_RING(chan, RING_3D_(0x10ec), 2);
+ OUT_RING(chan, 0xff);
+ OUT_RING(chan, 0xff);
+ BEGIN_RING(chan, RING_3D_(0x074c), 1);
+ OUT_RING (chan, 0x3f);
+
+ BEGIN_RING(chan, RING_3D_(0x10f8), 1);
+ OUT_RING (chan, 0x0101);
+
+ BEGIN_RING(chan, RING_3D_(0x16a8), 1);
+ OUT_RING (chan, (3 << 16) | 3);
+ BEGIN_RING(chan, RING_3D_(0x1794), 1);
+ OUT_RING (chan, (2 << 16) | 2);
+ BEGIN_RING(chan, RING_3D_(0x0de8), 1);
+ OUT_RING (chan, 1);
+
+#if 0 /* software method */
+ BEGIN_RING(chan, RING_3D_(0x1528), 1); /* MP poke */
+ OUT_RING (chan, 0);
+#endif
+
+ BEGIN_RING(chan, RING_3D_(0x12ac), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D_(0x0218), 1);
+ OUT_RING (chan, 0x10);
+ BEGIN_RING(chan, RING_3D_(0x10fc), 1);
+ OUT_RING (chan, 0x10);
+ BEGIN_RING(chan, RING_3D_(0x1290), 1);
+ OUT_RING (chan, 0x10);
+ BEGIN_RING(chan, RING_3D_(0x12d8), 2);
+ OUT_RING (chan, 0x10);
+ OUT_RING (chan, 0x10);
+ BEGIN_RING(chan, RING_3D_(0x06d4), 1);
+ OUT_RING (chan, 8);
+ BEGIN_RING(chan, RING_3D_(0x1140), 1);
+ OUT_RING (chan, 0x10);
+ BEGIN_RING(chan, RING_3D_(0x1610), 1);
+ OUT_RING (chan, 0xe);
+
+ BEGIN_RING(chan, RING_3D_(0x164c), 1);
+ OUT_RING (chan, 1 << 12);
+ BEGIN_RING(chan, RING_3D_(0x151c), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D_(0x020c), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D_(0x030c), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D_(0x0300), 1);
+ OUT_RING (chan, 3);
+#if 0 /* software method */
+ BEGIN_RING(chan, RING_3D_(0x1280), 1); /* PGRAPH poke */
+ OUT_RING (chan, 0);
+#endif
+ BEGIN_RING(chan, RING_3D_(0x02d0), 1);
+ OUT_RING (chan, 0x1f40);
+ BEGIN_RING(chan, RING_3D_(0x00fdc), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D_(0x19c0), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D_(0x075c), 1);
+ OUT_RING (chan, 3);
+
+ BEGIN_RING(chan, RING_3D_(0x0fac), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D_(0x0f90), 1);
+ OUT_RING (chan, 0);
+}
+
+#define FAIL_SCREEN_INIT(str, err) \
+ do { \
+ NOUVEAU_ERR(str, err); \
+ nvc0_screen_destroy(pscreen); \
+ return NULL; \
+ } while(0)
+
+struct pipe_screen *
+nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
+{
+ struct nvc0_screen *screen;
+ struct nouveau_channel *chan;
+ struct pipe_screen *pscreen;
+ int ret;
+ unsigned i;
+
+ screen = CALLOC_STRUCT(nvc0_screen);
+ if (!screen)
+ return NULL;
+ pscreen = &screen->base.base;
+
+ ret = nouveau_screen_init(&screen->base, dev);
+ if (ret) {
+ nvc0_screen_destroy(pscreen);
+ return NULL;
+ }
+ chan = screen->base.channel;
+
+ pscreen->winsys = ws;
+ pscreen->destroy = nvc0_screen_destroy;
+ pscreen->context_create = nvc0_create;
+ pscreen->is_format_supported = nvc0_screen_is_format_supported;
+ pscreen->get_param = nvc0_screen_get_param;
+ pscreen->get_shader_param = nvc0_screen_get_shader_param;
+ pscreen->get_paramf = nvc0_screen_get_paramf;
+ pscreen->fence_reference = nvc0_screen_fence_reference;
+ pscreen->fence_signalled = nvc0_screen_fence_signalled;
+ pscreen->fence_finish = nvc0_screen_fence_finish;
+
+ nvc0_screen_init_resource_functions(pscreen);
+
+ screen->base.vertex_buffer_flags = NOUVEAU_BO_GART;
+ screen->base.index_buffer_flags = 0;
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,
+ &screen->fence.bo);
+ if (ret)
+ goto fail;
+ nouveau_bo_map(screen->fence.bo, NOUVEAU_BO_RDWR);
+ screen->fence.map = screen->fence.bo->map;
+ nouveau_bo_unmap(screen->fence.bo);
+
+ for (i = 0; i < NVC0_SCRATCH_NR_BUFFERS; ++i) {
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART, 0, NVC0_SCRATCH_SIZE,
+ &screen->scratch.bo[i]);
+ if (ret)
+ goto fail;
+ }
+
+ ret = nouveau_grobj_alloc(chan, 0xbeef9039, NVC0_M2MF, &screen->m2mf);
+ if (ret)
+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret);
+
+ BIND_RING (chan, screen->m2mf, NVC0_SUBCH_MF);
+ BEGIN_RING(chan, RING_MF(NOTIFY_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, 0);
+
+ ret = nouveau_grobj_alloc(chan, 0xbeef902d, NVC0_2D, &screen->eng2d);
+ if (ret)
+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for 2D: %d\n", ret);
+
+ BIND_RING (chan, screen->eng2d, NVC0_SUBCH_2D);
+ BEGIN_RING(chan, RING_2D(OPERATION), 1);
+ OUT_RING (chan, NVC0_2D_OPERATION_SRCCOPY);
+ BEGIN_RING(chan, RING_2D(CLIP_ENABLE), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_2D(COLOR_KEY_ENABLE), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_2D_(0x0884), 1);
+ OUT_RING (chan, 0x3f);
+ BEGIN_RING(chan, RING_2D_(0x0888), 1);
+ OUT_RING (chan, 1);
+
+ ret = nouveau_grobj_alloc(chan, 0xbeef9097, NVC0_3D, &screen->fermi);
+ if (ret)
+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret);
+
+ BIND_RING (chan, screen->fermi, NVC0_SUBCH_3D);
+ BEGIN_RING(chan, RING_3D(NOTIFY_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, 0);
+
+ BEGIN_RING(chan, RING_3D(COND_MODE), 1);
+ OUT_RING (chan, NVC0_3D_COND_MODE_ALWAYS);
+
+ BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
+ OUT_RING (chan, 1);
+
+ BEGIN_RING(chan, RING_3D(CSAA_ENABLE), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(MULTISAMPLE_ENABLE), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(MULTISAMPLE_MODE), 1);
+ OUT_RING (chan, NVC0_3D_MULTISAMPLE_MODE_1X);
+ BEGIN_RING(chan, RING_3D(MULTISAMPLE_CTRL), 1);
+ OUT_RING (chan, 0);
+
+ nvc0_magic_3d_init(chan);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, &screen->text);
+ if (ret)
+ goto fail;
+
+ nouveau_resource_init(&screen->text_heap, 0, 1 << 20);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16,
+ &screen->uniforms);
+ if (ret)
+ goto fail;
+
+ /* auxiliary constants (6 user clip planes, base instance id) */
+ BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
+ OUT_RING (chan, 256);
+ OUT_RELOCh(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ for (i = 0; i < 5; ++i) {
+ BEGIN_RING(chan, RING_3D(CB_BIND(i)), 1);
+ OUT_RING (chan, (15 << 4) | 1);
+ }
+
+ screen->tls_size = 4 * 4 * 32 * 128 * 4;
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17,
+ screen->tls_size, &screen->tls);
+ if (ret)
+ goto fail;
+
+ BEGIN_RING(chan, RING_3D(CODE_ADDRESS_HIGH), 2);
+ OUT_RELOCh(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ BEGIN_RING(chan, RING_3D(LOCAL_ADDRESS_HIGH), 4);
+ OUT_RELOCh(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, screen->tls_size >> 32);
+ OUT_RING (chan, screen->tls_size);
+ BEGIN_RING(chan, RING_3D(LOCAL_BASE), 1);
+ OUT_RING (chan, 0);
+
+ for (i = 0; i < 5; ++i) {
+ BEGIN_RING(chan, RING_3D(TEX_LIMITS(i)), 1);
+ OUT_RING (chan, 0x54);
+ }
+ BEGIN_RING(chan, RING_3D(LINKED_TSC), 1);
+ OUT_RING (chan, 0);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20,
+ &screen->mp_stack_bo);
+ if (ret)
+ goto fail;
+
+ BEGIN_RING(chan, RING_3D_(0x17bc), 3);
+ OUT_RELOCh(chan, screen->mp_stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, screen->mp_stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, 1);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, &screen->txc);
+ if (ret)
+ goto fail;
+
+ BEGIN_RING(chan, RING_3D(TIC_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RING (chan, NVC0_TIC_MAX_ENTRIES - 1);
+
+ BEGIN_RING(chan, RING_3D(TSC_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RING (chan, NVC0_TSC_MAX_ENTRIES - 1);
+
+ BEGIN_RING(chan, RING_3D(SCREEN_Y_CONTROL), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(WINDOW_OFFSET_X), 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D_(0x1590), 1); /* deactivate ZCULL */
+ OUT_RING (chan, 0x3f);
+
+ BEGIN_RING(chan, RING_3D(VIEWPORT_CLIP_RECTS_EN), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(CLIPID_ENABLE), 1);
+ OUT_RING (chan, 0);
+
+ BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2);
+ OUT_RINGf (chan, 0.0f);
+ OUT_RINGf (chan, 1.0f);
+
+ /* We use scissors instead of exact view volume clipping,
+ * so they're always enabled.
+ */
+ BEGIN_RING(chan, RING_3D(SCISSOR_ENABLE(0)), 3);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 8192 << 16);
+ OUT_RING (chan, 8192 << 16);
+
+ BEGIN_RING(chan, RING_3D_(0x0fac), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D_(0x3484), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D_(0x0dbc), 1);
+ OUT_RING (chan, 0x00010000);
+ BEGIN_RING(chan, RING_3D_(0x0dd8), 1);
+ OUT_RING (chan, 0xff800006);
+ BEGIN_RING(chan, RING_3D_(0x3488), 1);
+ OUT_RING (chan, 0);
+
+#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n);
+
+ i = 0;
+ MK_MACRO(NVC0_3D_BLEND_ENABLES, nvc0_9097_blend_enables);
+ MK_MACRO(NVC0_3D_VERTEX_ARRAY_SELECT, nvc0_9097_vertex_array_select);
+ MK_MACRO(NVC0_3D_TEP_SELECT, nvc0_9097_tep_select);
+ MK_MACRO(NVC0_3D_GP_SELECT, nvc0_9097_gp_select);
+ MK_MACRO(NVC0_3D_POLYGON_MODE_FRONT, nvc0_9097_poly_mode_front);
+ MK_MACRO(NVC0_3D_POLYGON_MODE_BACK, nvc0_9097_poly_mode_back);
+ MK_MACRO(NVC0_3D_COLOR_MASK_BROADCAST, nvc0_9097_color_mask_brdc);
+
+ BEGIN_RING(chan, RING_3D(RASTERIZE_ENABLE), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
+ OUT_RING (chan, 0x40);
+ BEGIN_RING(chan, RING_3D(GP_BUILTIN_RESULT_EN), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);
+ OUT_RING (chan, 0x30);
+ BEGIN_RING(chan, RING_3D(PATCH_VERTICES), 1);
+ OUT_RING (chan, 3);
+ BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1);
+ OUT_RING (chan, 0x20);
+ BEGIN_RING(chan, RING_3D(SP_SELECT(0)), 1);
+ OUT_RING (chan, 0x00);
+
+ BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(POINT_RASTER_RULES), 1);
+ OUT_RING (chan, NVC0_3D_POINT_RASTER_RULES_OGL);
+
+ BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1);
+ OUT_RING (chan, 0x11111111);
+ BEGIN_RING(chan, RING_3D(EDGEFLAG_ENABLE), 1);
+ OUT_RING (chan, 1);
+
+ BEGIN_RING(chan, RING_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
+ OUT_RING (chan, 0xab);
+ OUT_RING (chan, 0x00000000);
+
+ FIRE_RING (chan);
+
+ screen->tic.entries = CALLOC(4096, sizeof(void *));
+ screen->tsc.entries = screen->tic.entries + 2048;
+
+ screen->mm_GART = nvc0_mm_create(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
+ 0x000);
+ screen->mm_VRAM = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0x000);
+ screen->mm_VRAM_fe0 = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0);
+
+ nvc0_screen_fence_new(screen, &screen->fence.current, FALSE);
+
+ return pscreen;
+
+fail:
+ nvc0_screen_destroy(pscreen);
+ return NULL;
+}
+
+void
+nvc0_screen_make_buffers_resident(struct nvc0_screen *screen)
+{
+ struct nouveau_channel *chan = screen->base.channel;
+
+ const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
+
+ nouveau_bo_validate(chan, screen->text, flags);
+ nouveau_bo_validate(chan, screen->uniforms, flags);
+ nouveau_bo_validate(chan, screen->txc, flags);
+ nouveau_bo_validate(chan, screen->tls, flags);
+ nouveau_bo_validate(chan, screen->mp_stack_bo, flags);
+}
+
+int
+nvc0_screen_tic_alloc(struct nvc0_screen *screen, void *entry)
+{
+ int i = screen->tic.next;
+
+ while (screen->tic.lock[i / 32] & (1 << (i % 32)))
+ i = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
+
+ screen->tic.next = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
+
+ if (screen->tic.entries[i])
+ nvc0_tic_entry(screen->tic.entries[i])->id = -1;
+
+ screen->tic.entries[i] = entry;
+ return i;
+}
+
+int
+nvc0_screen_tsc_alloc(struct nvc0_screen *screen, void *entry)
+{
+ int i = screen->tsc.next;
+
+ while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
+ i = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
+
+ screen->tsc.next = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
+
+ if (screen->tsc.entries[i])
+ nvc0_tsc_entry(screen->tsc.entries[i])->id = -1;
+
+ screen->tsc.entries[i] = entry;
+ return i;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
new file mode 100644
index 0000000000..1fac142e2b
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_screen.h
@@ -0,0 +1,192 @@
+#ifndef __NVC0_SCREEN_H__
+#define __NVC0_SCREEN_H__
+
+#define NOUVEAU_NVC0
+#include "nouveau/nouveau_screen.h"
+#undef NOUVEAU_NVC0
+#include "nvc0_winsys.h"
+#include "nvc0_stateobj.h"
+
+#define NVC0_TIC_MAX_ENTRIES 2048
+#define NVC0_TSC_MAX_ENTRIES 2048
+
+struct nvc0_mman;
+struct nvc0_context;
+struct nvc0_fence;
+
+#define NVC0_SCRATCH_SIZE (2 << 20)
+#define NVC0_SCRATCH_NR_BUFFERS 2
+
+struct nvc0_screen {
+ struct nouveau_screen base;
+ struct nouveau_winsys *nvws;
+
+ struct nvc0_context *cur_ctx;
+
+ struct nouveau_bo *text;
+ struct nouveau_bo *uniforms;
+ struct nouveau_bo *tls;
+ struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
+ struct nouveau_bo *mp_stack_bo;
+
+ uint64_t tls_size;
+
+ struct nouveau_resource *text_heap;
+
+ struct {
+ struct nouveau_bo *bo[NVC0_SCRATCH_NR_BUFFERS];
+ uint8_t *buf;
+ int index;
+ uint32_t offset;
+ } scratch;
+
+ struct {
+ void **entries;
+ int next;
+ uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32];
+ } tic;
+
+ struct {
+ void **entries;
+ int next;
+ uint32_t lock[NVC0_TSC_MAX_ENTRIES / 32];
+ } tsc;
+
+ struct {
+ uint32_t *map;
+ struct nvc0_fence *head;
+ struct nvc0_fence *tail;
+ struct nvc0_fence *current;
+ uint32_t sequence;
+ uint32_t sequence_ack;
+ struct nouveau_bo *bo;
+ } fence;
+
+ struct nvc0_mman *mm_GART;
+ struct nvc0_mman *mm_VRAM;
+ struct nvc0_mman *mm_VRAM_fe0;
+
+ struct nouveau_grobj *fermi;
+ struct nouveau_grobj *eng2d;
+ struct nouveau_grobj *m2mf;
+};
+
+static INLINE struct nvc0_screen *
+nvc0_screen(struct pipe_screen *screen)
+{
+ return (struct nvc0_screen *)screen;
+}
+
+/* Since a resource can be migrated, we need to decouple allocations from
+ * them. This struct is linked with fences for delayed freeing of allocs.
+ */
+struct nvc0_mm_allocation {
+ struct nvc0_mm_allocation *next;
+ void *priv;
+ uint32_t offset;
+};
+
+static INLINE void
+nvc0_fence_sched_release(struct nvc0_fence *nf, struct nvc0_mm_allocation *mm)
+{
+ mm->next = nf->buffers;
+ nf->buffers = mm;
+}
+
+extern struct nvc0_mman *
+nvc0_mm_create(struct nouveau_device *, uint32_t domain, uint32_t storage_type);
+
+extern void
+nvc0_mm_destroy(struct nvc0_mman *);
+
+extern struct nvc0_mm_allocation *
+nvc0_mm_allocate(struct nvc0_mman *,
+ uint32_t size, struct nouveau_bo **, uint32_t *offset);
+extern void
+nvc0_mm_free(struct nvc0_mm_allocation *);
+
+void nvc0_screen_make_buffers_resident(struct nvc0_screen *);
+
+int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);
+int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
+
+static INLINE void
+nvc0_resource_fence(struct nvc0_resource *res, uint32_t flags)
+{
+ struct nvc0_screen *screen = nvc0_screen(res->base.screen);
+
+ if (res->mm) {
+ nvc0_fence_reference(&res->fence, screen->fence.current);
+
+ if (flags & NOUVEAU_BO_WR)
+ nvc0_fence_reference(&res->fence_wr, screen->fence.current);
+ }
+}
+
+static INLINE void
+nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags)
+{
+ struct nvc0_screen *screen = nvc0_screen(res->base.screen);
+
+ nouveau_bo_validate(screen->base.channel, res->bo, flags);
+
+ nvc0_resource_fence(res, flags);
+}
+
+
+boolean
+nvc0_screen_fence_new(struct nvc0_screen *, struct nvc0_fence **, boolean emit);
+
+void
+nvc0_screen_fence_next(struct nvc0_screen *);
+
+static INLINE boolean
+nvc0_screen_fence_emit(struct nvc0_screen *screen)
+{
+ nvc0_fence_emit(screen->fence.current);
+
+ return nvc0_screen_fence_new(screen, &screen->fence.current, FALSE);
+}
+
+struct nvc0_format {
+ uint32_t rt;
+ uint32_t tic;
+ uint32_t vtx;
+ uint32_t usage;
+};
+
+extern const struct nvc0_format nvc0_format_table[];
+
+static INLINE void
+nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nvc0_tic_entry *tic)
+{
+ if (tic->id >= 0)
+ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
+}
+
+static INLINE void
+nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nvc0_tsc_entry *tsc)
+{
+ if (tsc->id >= 0)
+ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
+}
+
+static INLINE void
+nvc0_screen_tic_free(struct nvc0_screen *screen, struct nvc0_tic_entry *tic)
+{
+ if (tic->id >= 0) {
+ screen->tic.entries[tic->id] = NULL;
+ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
+ }
+}
+
+static INLINE void
+nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nvc0_tsc_entry *tsc)
+{
+ if (tsc->id >= 0) {
+ screen->tsc.entries[tsc->id] = NULL;
+ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
+ }
+}
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c
new file mode 100644
index 0000000000..981b5488d0
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c
@@ -0,0 +1,180 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+
+#include "nvc0_context.h"
+
+static boolean
+nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
+{
+ int ret;
+ unsigned size;
+
+ if (prog->translated)
+ return TRUE;
+
+ prog->translated = nvc0_program_translate(prog);
+ if (!prog->translated)
+ return FALSE;
+
+ size = align(prog->code_size + NVC0_SHADER_HEADER_SIZE, 0x100);
+
+ ret = nouveau_resource_alloc(nvc0->screen->text_heap, size, prog,
+ &prog->res);
+ if (ret)
+ return FALSE;
+
+ prog->code_base = prog->res->start;
+
+ nvc0_m2mf_push_linear(nvc0, nvc0->screen->text, NOUVEAU_BO_VRAM,
+ prog->code_base, NVC0_SHADER_HEADER_SIZE, prog->hdr);
+ nvc0_m2mf_push_linear(nvc0, nvc0->screen->text, NOUVEAU_BO_VRAM,
+ prog->code_base + NVC0_SHADER_HEADER_SIZE,
+ prog->code_size, prog->code);
+
+ BEGIN_RING(nvc0->screen->base.channel, RING_3D_(0x021c), 1);
+ OUT_RING (nvc0->screen->base.channel, 0x1111);
+
+ return TRUE;
+}
+
+void
+nvc0_vertprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_program *vp = nvc0->vertprog;
+
+ if (nvc0->clip.nr > vp->vp.num_ucps) {
+ assert(nvc0->clip.nr <= 6);
+ vp->vp.num_ucps = 6;
+
+ if (vp->translated)
+ nvc0_program_destroy(nvc0, vp);
+ }
+
+ if (!nvc0_program_validate(nvc0, vp))
+ return;
+
+ BEGIN_RING(chan, RING_3D(SP_SELECT(1)), 2);
+ OUT_RING (chan, 0x11);
+ OUT_RING (chan, vp->code_base);
+ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(1)), 1);
+ OUT_RING (chan, vp->max_gpr);
+
+ // BEGIN_RING(chan, RING_3D_(0x163c), 1);
+ // OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(VERT_COLOR_CLAMP_EN), 1);
+ OUT_RING (chan, 1);
+}
+
+void
+nvc0_fragprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_program *fp = nvc0->fragprog;
+
+ if (!nvc0_program_validate(nvc0, fp))
+ return;
+
+ BEGIN_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), 1);
+ OUT_RING (chan, fp->fp.early_z);
+ BEGIN_RING(chan, RING_3D(SP_SELECT(5)), 2);
+ OUT_RING (chan, 0x51);
+ OUT_RING (chan, fp->code_base);
+ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(5)), 1);
+ OUT_RING (chan, fp->max_gpr);
+
+ BEGIN_RING(chan, RING_3D_(0x0360), 2);
+ OUT_RING (chan, 0x20164010);
+ OUT_RING (chan, 0x20);
+ BEGIN_RING(chan, RING_3D_(0x196c), 1);
+ OUT_RING (chan, fp->flags[0]);
+}
+
+void
+nvc0_tctlprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_program *tp = nvc0->tctlprog;
+
+ if (!tp) {
+ BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1);
+ OUT_RING (chan, 0x20);
+ return;
+ }
+ if (!nvc0_program_validate(nvc0, tp))
+ return;
+
+ BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 2);
+ OUT_RING (chan, 0x21);
+ OUT_RING (chan, tp->code_base);
+ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(2)), 1);
+ OUT_RING (chan, tp->max_gpr);
+}
+
+void
+nvc0_tevlprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_program *tp = nvc0->tevlprog;
+
+ if (!tp) {
+ BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);
+ OUT_RING (chan, 0x30);
+ return;
+ }
+ if (!nvc0_program_validate(nvc0, tp))
+ return;
+
+ BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);
+ OUT_RING (chan, 0x31);
+ BEGIN_RING(chan, RING_3D(SP_START_ID(3)), 1);
+ OUT_RING (chan, tp->code_base);
+ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(3)), 1);
+ OUT_RING (chan, tp->max_gpr);
+}
+
+void
+nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_program *gp = nvc0->gmtyprog;
+
+ if (!gp) {
+ BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
+ OUT_RING (chan, 0x40);
+ return;
+ }
+ if (!nvc0_program_validate(nvc0, gp))
+ return;
+
+ BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
+ OUT_RING (chan, 0x41);
+ BEGIN_RING(chan, RING_3D(SP_START_ID(4)), 1);
+ OUT_RING (chan, gp->code_base);
+ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(4)), 1);
+ OUT_RING (chan, gp->max_gpr);
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c
new file mode 100644
index 0000000000..c08f3693f5
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_state.c
@@ -0,0 +1,865 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nvc0_stateobj.h"
+#include "nvc0_context.h"
+
+#include "nvc0_3d.xml.h"
+#include "nv50_texture.xml.h"
+
+#include "nouveau/nouveau_gldefs.h"
+
+static INLINE uint32_t
+nvc0_colormask(unsigned mask)
+{
+ uint32_t ret = 0;
+
+ if (mask & PIPE_MASK_R)
+ ret |= 0x0001;
+ if (mask & PIPE_MASK_G)
+ ret |= 0x0010;
+ if (mask & PIPE_MASK_B)
+ ret |= 0x0100;
+ if (mask & PIPE_MASK_A)
+ ret |= 0x1000;
+
+ return ret;
+}
+
+static INLINE uint32_t
+nvc0_blend_fac(unsigned factor)
+{
+ static const uint16_t bf[] = {
+ NV50_3D_BLEND_FACTOR_ZERO, /* 0x00 */
+ NV50_3D_BLEND_FACTOR_ONE,
+ NV50_3D_BLEND_FACTOR_SRC_COLOR,
+ NV50_3D_BLEND_FACTOR_SRC_ALPHA,
+ NV50_3D_BLEND_FACTOR_DST_ALPHA,
+ NV50_3D_BLEND_FACTOR_DST_COLOR,
+ NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE,
+ NV50_3D_BLEND_FACTOR_CONSTANT_COLOR,
+ NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA,
+ NV50_3D_BLEND_FACTOR_SRC1_COLOR,
+ NV50_3D_BLEND_FACTOR_SRC1_ALPHA,
+ NV50_3D_BLEND_FACTOR_ZERO, /* 0x0b */
+ NV50_3D_BLEND_FACTOR_ZERO, /* 0x0c */
+ NV50_3D_BLEND_FACTOR_ZERO, /* 0x0d */
+ NV50_3D_BLEND_FACTOR_ZERO, /* 0x0e */
+ NV50_3D_BLEND_FACTOR_ZERO, /* 0x0f */
+ NV50_3D_BLEND_FACTOR_ZERO, /* 0x10 */
+ NV50_3D_BLEND_FACTOR_ZERO, /* 0x11 */
+ NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR,
+ NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
+ NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA,
+ NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR,
+ NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR,
+ NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA,
+ NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR,
+ NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA
+ };
+
+ assert(factor < (sizeof(bf) / sizeof(bf[0])));
+ return bf[factor];
+}
+
+static void *
+nvc0_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nvc0_blend_stateobj *so = CALLOC_STRUCT(nvc0_blend_stateobj);
+ int i;
+
+ so->pipe = *cso;
+
+ SB_IMMED_3D(so, BLEND_INDEPENDENT, cso->independent_blend_enable);
+
+ if (!cso->independent_blend_enable) {
+ SB_BEGIN_3D(so, BLEND_ENABLES, 1);
+ SB_DATA (so, cso->rt[0].blend_enable ? 0xff : 0);
+
+ if (cso->rt[0].blend_enable) {
+ SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5);
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[0].rgb_func));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[0].rgb_src_factor));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[0].rgb_dst_factor));
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[0].alpha_func));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[0].alpha_src_factor));
+ SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1);
+ SB_DATA (so, nvc0_blend_fac(cso->rt[0].alpha_dst_factor));
+ }
+
+ SB_BEGIN_3D(so, COLOR_MASK_BROADCAST, 1);
+ SB_DATA (so, nvc0_colormask(cso->rt[0].colormask));
+ } else {
+ uint8_t en = 0;
+
+ for (i = 0; i < 8; ++i) {
+ if (!cso->rt[i].blend_enable)
+ continue;
+ en |= 1 << i;
+
+ SB_BEGIN_3D(so, IBLEND_EQUATION_RGB(i), 6);
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].rgb_func));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_src_factor));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_dst_factor));
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].alpha_func));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_src_factor));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_dst_factor));
+ }
+ SB_BEGIN_3D(so, BLEND_ENABLES, 1);
+ SB_DATA (so, en);
+
+ SB_BEGIN_3D(so, COLOR_MASK(0), 8);
+ for (i = 0; i < 8; ++i)
+ SB_DATA(so, nvc0_colormask(cso->rt[i].colormask));
+ }
+
+ if (cso->logicop_enable) {
+ SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_logicop_func(cso->logicop_func));
+ } else {
+ SB_IMMED_3D(so, LOGIC_OP_ENABLE, 0);
+ }
+
+ assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
+ return so;
+}
+
+static void
+nvc0_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->blend = hwcso;
+ nvc0->dirty |= NVC0_NEW_BLEND;
+}
+
+static void
+nvc0_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nvc0_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nvc0_rasterizer_stateobj *so;
+
+ so = CALLOC_STRUCT(nvc0_rasterizer_stateobj);
+ if (!so)
+ return NULL;
+ so->pipe = *cso;
+
+#ifndef NVC0_SCISSORS_CLIPPING
+ SB_IMMED_3D(so, SCISSOR_ENABLE(0), cso->scissor);
+#endif
+
+ SB_BEGIN_3D(so, SHADE_MODEL, 1);
+ SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT :
+ NVC0_3D_SHADE_MODEL_SMOOTH);
+ SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first);
+ SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside);
+
+ SB_BEGIN_3D(so, LINE_WIDTH, 1);
+ SB_DATA (so, fui(cso->line_width));
+ SB_IMMED_3D(so, LINE_SMOOTH_ENABLE, cso->line_smooth);
+
+ SB_BEGIN_3D(so, LINE_STIPPLE_ENABLE, 1);
+ if (cso->line_stipple_enable) {
+ SB_DATA (so, 1);
+ SB_BEGIN_3D(so, LINE_STIPPLE_PATTERN, 1);
+ SB_DATA (so, (cso->line_stipple_pattern << 8) |
+ cso->line_stipple_factor);
+
+ } else {
+ SB_DATA (so, 0);
+ }
+
+ SB_IMMED_3D(so, VP_POINT_SIZE_EN, cso->point_size_per_vertex);
+ if (!cso->point_size_per_vertex) {
+ SB_BEGIN_3D(so, POINT_SIZE, 1);
+ SB_DATA (so, fui(cso->point_size));
+ }
+ SB_IMMED_3D(so, POINT_SPRITE_ENABLE, cso->point_quad_rasterization);
+ SB_IMMED_3D(so, POINT_SMOOTH_ENABLE, cso->point_smooth);
+
+ SB_BEGIN_3D(so, POLYGON_MODE_FRONT, 1);
+ SB_DATA (so, nvgl_polygon_mode(cso->fill_front));
+ SB_BEGIN_3D(so, POLYGON_MODE_BACK, 1);
+ SB_DATA (so, nvgl_polygon_mode(cso->fill_back));
+ SB_IMMED_3D(so, POLYGON_SMOOTH_ENABLE, cso->poly_smooth);
+
+ SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3);
+ SB_DATA (so, cso->cull_face != PIPE_FACE_NONE);
+ SB_DATA (so, cso->front_ccw ? NVC0_3D_FRONT_FACE_CCW :
+ NVC0_3D_FRONT_FACE_CW);
+ switch (cso->cull_face) {
+ case PIPE_FACE_FRONT_AND_BACK:
+ SB_DATA(so, NVC0_3D_CULL_FACE_FRONT_AND_BACK);
+ break;
+ case PIPE_FACE_FRONT:
+ SB_DATA(so, NVC0_3D_CULL_FACE_FRONT);
+ break;
+ case PIPE_FACE_BACK:
+ default:
+ SB_DATA(so, NVC0_3D_CULL_FACE_BACK);
+ break;
+ }
+
+ SB_IMMED_3D(so, POLYGON_STIPPLE_ENABLE, cso->poly_stipple_enable);
+ SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3);
+ SB_DATA (so, cso->offset_point);
+ SB_DATA (so, cso->offset_line);
+ SB_DATA (so, cso->offset_tri);
+
+ if (cso->offset_point || cso->offset_line || cso->offset_tri) {
+ SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1);
+ SB_DATA (so, fui(cso->offset_scale));
+ SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1);
+ SB_DATA (so, fui(cso->offset_units)); /* XXX: multiply by 2 ? */
+ }
+
+ assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
+ return (void *)so;
+}
+
+static void
+nvc0_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->rast = hwcso;
+ nvc0->dirty |= NVC0_NEW_RASTERIZER;
+}
+
+static void
+nvc0_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nvc0_zsa_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nvc0_zsa_stateobj *so = CALLOC_STRUCT(nvc0_zsa_stateobj);
+
+ so->pipe = *cso;
+
+ SB_IMMED_3D(so, DEPTH_WRITE_ENABLE, cso->depth.writemask);
+ SB_BEGIN_3D(so, DEPTH_TEST_ENABLE, 1);
+ if (cso->depth.enabled) {
+ SB_DATA (so, 1);
+ SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1);
+ SB_DATA (so, nvgl_comparison_op(cso->depth.func));
+ } else {
+ SB_DATA (so, 0);
+ }
+
+ if (cso->stencil[0].enabled) {
+ SB_BEGIN_3D(so, STENCIL_FRONT_ENABLE, 5);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
+ SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func));
+ SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2);
+ SB_DATA (so, cso->stencil[0].writemask);
+ SB_DATA (so, cso->stencil[0].valuemask);
+ } else {
+ SB_IMMED_3D(so, STENCIL_FRONT_ENABLE, 0);
+ }
+
+ if (cso->stencil[1].enabled) {
+ SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
+ SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func));
+ SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2);
+ SB_DATA (so, cso->stencil[1].writemask);
+ SB_DATA (so, cso->stencil[1].valuemask);
+ } else {
+ SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0);
+ }
+
+ SB_BEGIN_3D(so, ALPHA_TEST_ENABLE, 1);
+ if (cso->alpha.enabled) {
+ SB_DATA (so, 1);
+ SB_BEGIN_3D(so, ALPHA_TEST_REF, 2);
+ SB_DATA (so, fui(cso->alpha.ref_value));
+ SB_DATA (so, nvgl_comparison_op(cso->alpha.func));
+ } else {
+ SB_DATA (so, 0);
+ }
+
+ assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
+ return (void *)so;
+}
+
+static void
+nvc0_zsa_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->zsa = hwcso;
+ nvc0->dirty |= NVC0_NEW_ZSA;
+}
+
+static void
+nvc0_zsa_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+/* ====================== SAMPLERS AND TEXTURES ================================
+ */
+
+#define NV50_TSC_WRAP_CASE(n) \
+ case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n
+
+static INLINE unsigned
+nv50_tsc_wrap_mode(unsigned wrap)
+{
+ switch (wrap) {
+ NV50_TSC_WRAP_CASE(REPEAT);
+ NV50_TSC_WRAP_CASE(MIRROR_REPEAT);
+ NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE);
+ NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER);
+ NV50_TSC_WRAP_CASE(CLAMP);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP);
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ return NV50_TSC_WRAP_REPEAT;
+ }
+}
+
+static void *
+nvc0_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nvc0_tsc_entry *so = CALLOC_STRUCT(nvc0_tsc_entry);
+ float f[2];
+
+ so->id = -1;
+
+ so->tsc[0] = (0x00026000 |
+ (nv50_tsc_wrap_mode(cso->wrap_s) << 0) |
+ (nv50_tsc_wrap_mode(cso->wrap_t) << 3) |
+ (nv50_tsc_wrap_mode(cso->wrap_r) << 6));
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ so->tsc[1] |= NV50_TSC_1_MAGF_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ so->tsc[1] |= NV50_TSC_1_MAGF_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ so->tsc[1] |= NV50_TSC_1_MINF_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ so->tsc[1] |= NV50_TSC_1_MINF_NEAREST;
+ break;
+ }
+
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ so->tsc[1] |= NV50_TSC_1_MIPF_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ so->tsc[1] |= NV50_TSC_1_MIPF_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ so->tsc[1] |= NV50_TSC_1_MIPF_NONE;
+ break;
+ }
+
+ if (cso->max_anisotropy >= 16)
+ so->tsc[0] |= (7 << 20);
+ else
+ if (cso->max_anisotropy >= 12)
+ so->tsc[0] |= (6 << 20);
+ else {
+ so->tsc[0] |= (cso->max_anisotropy >> 1) << 20;
+
+ if (cso->max_anisotropy >= 4)
+ so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_35;
+ else
+ if (cso->max_anisotropy >= 2)
+ so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_15;
+ }
+
+ if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ /* NOTE: must be deactivated for non-shadow textures */
+ so->tsc[0] |= (1 << 9);
+ so->tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10;
+ }
+
+ f[0] = CLAMP(cso->lod_bias, -16.0f, 15.0f);
+ so->tsc[1] |= ((int)(f[0] * 256.0f) & 0x1fff) << 12;
+
+ f[0] = CLAMP(cso->min_lod, 0.0f, 15.0f);
+ f[1] = CLAMP(cso->max_lod, 0.0f, 15.0f);
+ so->tsc[2] |=
+ (((int)(f[1] * 256.0f) & 0xfff) << 12) | ((int)(f[0] * 256.0f) & 0xfff);
+
+ so->tsc[4] = fui(cso->border_color[0]);
+ so->tsc[5] = fui(cso->border_color[1]);
+ so->tsc[6] = fui(cso->border_color[2]);
+ so->tsc[7] = fui(cso->border_color[3]);
+
+ return (void *)so;
+}
+
+static void
+nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ unsigned s, i;
+
+ for (s = 0; s < 5; ++s)
+ for (i = 0; i < nvc0_context(pipe)->num_samplers[s]; ++i)
+ if (nvc0_context(pipe)->samplers[s][i] == hwcso)
+ nvc0_context(pipe)->samplers[s][i] = NULL;
+
+ nvc0_screen_tsc_free(nvc0_context(pipe)->screen, nvc0_tsc_entry(hwcso));
+
+ FREE(hwcso);
+}
+
+static INLINE void
+nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s,
+ unsigned nr, void **hwcso)
+{
+ unsigned i;
+
+ for (i = 0; i < nr; ++i) {
+ struct nvc0_tsc_entry *old = nvc0->samplers[s][i];
+
+ nvc0->samplers[s][i] = nvc0_tsc_entry(hwcso[i]);
+ if (old)
+ nvc0_screen_tsc_unlock(nvc0->screen, old);
+ }
+ for (; i < nvc0->num_samplers[s]; ++i)
+ if (nvc0->samplers[s][i])
+ nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
+
+ nvc0->num_samplers[s] = nr;
+
+ nvc0->dirty |= NVC0_NEW_SAMPLERS;
+}
+
+static void
+nvc0_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 0, nr, s);
+}
+
+static void
+nvc0_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 4, nr, s);
+}
+
+static void
+nvc0_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s);
+}
+
+/* NOTE: only called when not referenced anywhere, won't be bound */
+static void
+nvc0_sampler_view_destroy(struct pipe_context *pipe,
+ struct pipe_sampler_view *view)
+{
+ pipe_resource_reference(&view->texture, NULL);
+
+ nvc0_screen_tic_free(nvc0_context(pipe)->screen, nvc0_tic_entry(view));
+
+ FREE(nvc0_tic_entry(view));
+}
+
+static INLINE void
+nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ unsigned i;
+
+ for (i = 0; i < nr; ++i) {
+ struct nvc0_tic_entry *old = nvc0_tic_entry(nvc0->textures[s][i]);
+ if (old)
+ nvc0_screen_tic_unlock(nvc0->screen, old);
+
+ pipe_sampler_view_reference(&nvc0->textures[s][i], views[i]);
+ }
+
+ for (i = nr; i < nvc0->num_textures[s]; ++i) {
+ struct nvc0_tic_entry *old = nvc0_tic_entry(nvc0->textures[s][i]);
+ if (!old)
+ continue;
+ nvc0_screen_tic_unlock(nvc0->screen, old);
+
+ pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
+ }
+
+ nvc0->num_textures[s] = nr;
+
+ nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TEXTURES);
+
+ nvc0->dirty |= NVC0_NEW_TEXTURES;
+}
+
+static void
+nvc0_vp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 0, nr, views);
+}
+
+static void
+nvc0_fp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 4, nr, views);
+}
+
+static void
+nvc0_gp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views);
+}
+
+/* ============================= SHADERS =======================================
+ */
+
+static void *
+nvc0_sp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso, unsigned type)
+{
+ struct nvc0_program *prog;
+
+ prog = CALLOC_STRUCT(nvc0_program);
+ if (!prog)
+ return NULL;
+
+ prog->type = type;
+ prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ return (void *)prog;
+}
+
+static void
+nvc0_sp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_program *prog = (struct nvc0_program *)hwcso;
+
+ nvc0_program_destroy(nvc0_context(pipe), prog);
+
+ FREE((void *)prog->pipe.tokens);
+ FREE(prog);
+}
+
+static void *
+nvc0_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX);
+}
+
+static void
+nvc0_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->vertprog = hwcso;
+ nvc0->dirty |= NVC0_NEW_VERTPROG;
+}
+
+static void *
+nvc0_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT);
+}
+
+static void
+nvc0_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->fragprog = hwcso;
+ nvc0->dirty |= NVC0_NEW_FRAGPROG;
+}
+
+static void *
+nvc0_gp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY);
+}
+
+static void
+nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->gmtyprog = hwcso;
+ nvc0->dirty |= NVC0_NEW_GMTYPROG;
+}
+
+static void
+nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ struct pipe_resource *res)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ switch (shader) {
+ case PIPE_SHADER_VERTEX: shader = 0; break;
+ /*
+ case PIPE_SHADER_TESSELLATION_CONTROL: shader = 1; break;
+ case PIPE_SHADER_TESSELLATION_EVALUATION: shader = 2; break;
+ */
+ case PIPE_SHADER_GEOMETRY: shader = 3; break;
+ case PIPE_SHADER_FRAGMENT: shader = 4; break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (nvc0->constbuf[shader][index])
+ nvc0_bufctx_del_resident(nvc0, NVC0_BUFCTX_CONSTANT,
+ nvc0_resource(
+ nvc0->constbuf[shader][index]));
+
+ pipe_resource_reference(&nvc0->constbuf[shader][index], res);
+
+ nvc0->constbuf_dirty[shader] |= 1 << index;
+
+ nvc0->dirty |= NVC0_NEW_CONSTBUF;
+}
+
+/* =============================================================================
+ */
+
+static void
+nvc0_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->blend_colour = *bcol;
+ nvc0->dirty |= NVC0_NEW_BLEND_COLOUR;
+}
+
+static void
+nvc0_set_stencil_ref(struct pipe_context *pipe,
+ const struct pipe_stencil_ref *sr)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->stencil_ref = *sr;
+ nvc0->dirty |= NVC0_NEW_STENCIL_REF;
+}
+
+static void
+nvc0_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ const unsigned size = clip->nr * sizeof(clip->ucp[0]);
+
+ memcpy(&nvc0->clip.ucp[0][0], &clip->ucp[0][0], size);
+ nvc0->clip.nr = clip->nr;
+
+ nvc0->clip.depth_clamp = clip->depth_clamp;
+
+ nvc0->dirty |= NVC0_NEW_CLIP;
+}
+
+static void
+nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->sample_mask = sample_mask;
+ nvc0->dirty |= NVC0_NEW_SAMPLE_MASK;
+}
+
+
+static void
+nvc0_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->framebuffer = *fb;
+ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+}
+
+static void
+nvc0_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->stipple = *stipple;
+ nvc0->dirty |= NVC0_NEW_STIPPLE;
+}
+
+static void
+nvc0_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *scissor)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->scissor = *scissor;
+ nvc0->dirty |= NVC0_NEW_SCISSOR;
+}
+
+static void
+nvc0_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->viewport = *vpt;
+ nvc0->dirty |= NVC0_NEW_VIEWPORT;
+}
+
+static void
+nvc0_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ unsigned i;
+
+ for (i = 0; i < count; ++i)
+ pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer);
+ for (; i < nvc0->num_vtxbufs; ++i)
+ pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL);
+
+ memcpy(nvc0->vtxbuf, vb, sizeof(*vb) * count);
+ nvc0->num_vtxbufs = count;
+
+ nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX);
+
+ nvc0->dirty |= NVC0_NEW_ARRAYS;
+}
+
+static void
+nvc0_set_index_buffer(struct pipe_context *pipe,
+ const struct pipe_index_buffer *ib)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ if (ib)
+ memcpy(&nvc0->idxbuf, ib, sizeof(nvc0->idxbuf));
+ else
+ nvc0->idxbuf.buffer = NULL;
+}
+
+static void
+nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->vertex = hwcso;
+ nvc0->dirty |= NVC0_NEW_VERTEX;
+}
+
+void
+nvc0_init_state_functions(struct nvc0_context *nvc0)
+{
+ nvc0->pipe.create_blend_state = nvc0_blend_state_create;
+ nvc0->pipe.bind_blend_state = nvc0_blend_state_bind;
+ nvc0->pipe.delete_blend_state = nvc0_blend_state_delete;
+
+ nvc0->pipe.create_rasterizer_state = nvc0_rasterizer_state_create;
+ nvc0->pipe.bind_rasterizer_state = nvc0_rasterizer_state_bind;
+ nvc0->pipe.delete_rasterizer_state = nvc0_rasterizer_state_delete;
+
+ nvc0->pipe.create_depth_stencil_alpha_state = nvc0_zsa_state_create;
+ nvc0->pipe.bind_depth_stencil_alpha_state = nvc0_zsa_state_bind;
+ nvc0->pipe.delete_depth_stencil_alpha_state = nvc0_zsa_state_delete;
+
+ nvc0->pipe.create_sampler_state = nvc0_sampler_state_create;
+ nvc0->pipe.delete_sampler_state = nvc0_sampler_state_delete;
+ nvc0->pipe.bind_vertex_sampler_states = nvc0_vp_sampler_states_bind;
+ nvc0->pipe.bind_fragment_sampler_states = nvc0_fp_sampler_states_bind;
+ nvc0->pipe.bind_geometry_sampler_states = nvc0_gp_sampler_states_bind;
+
+ nvc0->pipe.create_sampler_view = nvc0_create_sampler_view;
+ nvc0->pipe.sampler_view_destroy = nvc0_sampler_view_destroy;
+ nvc0->pipe.set_vertex_sampler_views = nvc0_vp_set_sampler_views;
+ nvc0->pipe.set_fragment_sampler_views = nvc0_fp_set_sampler_views;
+ nvc0->pipe.set_geometry_sampler_views = nvc0_gp_set_sampler_views;
+
+ nvc0->pipe.create_vs_state = nvc0_vp_state_create;
+ nvc0->pipe.create_fs_state = nvc0_fp_state_create;
+ nvc0->pipe.create_gs_state = nvc0_gp_state_create;
+ nvc0->pipe.bind_vs_state = nvc0_vp_state_bind;
+ nvc0->pipe.bind_fs_state = nvc0_fp_state_bind;
+ nvc0->pipe.bind_gs_state = nvc0_gp_state_bind;
+ nvc0->pipe.delete_vs_state = nvc0_sp_state_delete;
+ nvc0->pipe.delete_fs_state = nvc0_sp_state_delete;
+ nvc0->pipe.delete_gs_state = nvc0_sp_state_delete;
+
+ nvc0->pipe.set_blend_color = nvc0_set_blend_color;
+ nvc0->pipe.set_stencil_ref = nvc0_set_stencil_ref;
+ nvc0->pipe.set_clip_state = nvc0_set_clip_state;
+ nvc0->pipe.set_sample_mask = nvc0_set_sample_mask;
+ nvc0->pipe.set_constant_buffer = nvc0_set_constant_buffer;
+ nvc0->pipe.set_framebuffer_state = nvc0_set_framebuffer_state;
+ nvc0->pipe.set_polygon_stipple = nvc0_set_polygon_stipple;
+ nvc0->pipe.set_scissor_state = nvc0_set_scissor_state;
+ nvc0->pipe.set_viewport_state = nvc0_set_viewport_state;
+
+ nvc0->pipe.create_vertex_elements_state = nvc0_vertex_state_create;
+ nvc0->pipe.delete_vertex_elements_state = nvc0_vertex_state_delete;
+ nvc0->pipe.bind_vertex_elements_state = nvc0_vertex_state_bind;
+
+ nvc0->pipe.set_vertex_buffers = nvc0_set_vertex_buffers;
+ nvc0->pipe.set_index_buffer = nvc0_set_index_buffer;
+}
+
diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c
new file mode 100644
index 0000000000..25aec0244d
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c
@@ -0,0 +1,430 @@
+
+#include "nvc0_context.h"
+#include "os/os_time.h"
+
+static void
+nvc0_validate_zcull(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+ struct nvc0_surface *sf = nvc0_surface(fb->zsbuf);
+ struct nvc0_miptree *mt = nvc0_miptree(sf->base.texture);
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t size;
+ uint32_t offset = align(mt->total_size, 1 << 17);
+ unsigned width, height;
+
+ assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2);
+
+ size = mt->total_size * 2;
+
+ height = align(fb->height, 32);
+ width = fb->width % 224;
+ if (width)
+ width = fb->width + (224 - width);
+ else
+ width = fb->width;
+
+ BEGIN_RING(chan, RING_3D_(0x1590), 1); /* ZCULL_REGION_INDEX (bits 0x3f) */
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D_(0x07e8), 2); /* ZCULL_ADDRESS_A_HIGH */
+ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ offset += 1 << 17;
+ BEGIN_RING(chan, RING_3D_(0x07f0), 2); /* ZCULL_ADDRESS_B_HIGH */
+ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ BEGIN_RING(chan, RING_3D_(0x07e0), 2);
+ OUT_RING (chan, size);
+ OUT_RING (chan, size >> 16);
+ BEGIN_RING(chan, RING_3D_(0x15c8), 1); /* bits 0x3 */
+ OUT_RING (chan, 2);
+ BEGIN_RING(chan, RING_3D_(0x07c0), 4); /* ZCULL dimensions */
+ OUT_RING (chan, width);
+ OUT_RING (chan, height);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D_(0x15fc), 2);
+ OUT_RING (chan, 0); /* bits 0xffff */
+ OUT_RING (chan, 0); /* bits 0xffff */
+ BEGIN_RING(chan, RING_3D_(0x1958), 1);
+ OUT_RING (chan, 0); /* bits ~0 */
+}
+
+static void
+nvc0_validate_fb(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+ unsigned i;
+
+ nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_FRAME);
+
+ BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
+ OUT_RING (chan, (076543210 << 4) | fb->nr_cbufs);
+ BEGIN_RING(chan, RING_3D(SCREEN_SCISSOR_HORIZ), 2);
+ OUT_RING (chan, fb->width << 16);
+ OUT_RING (chan, fb->height << 16);
+
+ for (i = 0; i < fb->nr_cbufs; ++i) {
+ struct nvc0_miptree *mt = nvc0_miptree(fb->cbufs[i]->texture);
+ struct nvc0_surface *sf = nvc0_surface(fb->cbufs[i]);
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t offset = sf->offset;
+
+ BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 8);
+ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, sf->width);
+ OUT_RING (chan, sf->height);
+ OUT_RING (chan, nvc0_format_table[sf->base.format].rt);
+ OUT_RING (chan, (mt->layout_3d << 16) |
+ mt->level[sf->base.u.tex.level].tile_mode);
+ OUT_RING (chan, sf->depth);
+ OUT_RING (chan, mt->layer_stride >> 2);
+
+ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ }
+
+ if (fb->zsbuf) {
+ struct nvc0_miptree *mt = nvc0_miptree(fb->zsbuf->texture);
+ struct nvc0_surface *sf = nvc0_surface(fb->zsbuf);
+ struct nouveau_bo *bo = mt->base.bo;
+ int unk = mt->base.base.target == PIPE_TEXTURE_2D;
+ uint32_t offset = sf->offset;
+
+ BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5);
+ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ OUT_RING (chan, nvc0_format_table[fb->zsbuf->format].rt);
+ OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode);
+ OUT_RING (chan, mt->layer_stride >> 2);
+ BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3);
+ OUT_RING (chan, sf->width);
+ OUT_RING (chan, sf->height);
+ OUT_RING (chan, (unk << 16) | sf->depth);
+
+ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ } else {
+ BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
+ OUT_RING (chan, 0);
+ }
+
+#ifndef NVC0_SCISSORS_CLIPPING
+ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
+ OUT_RING (chan, fb->width << 16);
+ OUT_RING (chan, fb->height << 16);
+#endif
+}
+
+static void
+nvc0_validate_blend_colour(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+ BEGIN_RING(chan, RING_3D(BLEND_COLOR(0)), 4);
+ OUT_RINGf (chan, nvc0->blend_colour.color[0]);
+ OUT_RINGf (chan, nvc0->blend_colour.color[1]);
+ OUT_RINGf (chan, nvc0->blend_colour.color[2]);
+ OUT_RINGf (chan, nvc0->blend_colour.color[3]);
+}
+
+static void
+nvc0_validate_stencil_ref(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+ BEGIN_RING(chan, RING_3D(STENCIL_FRONT_FUNC_REF), 1);
+ OUT_RING (chan, nvc0->stencil_ref.ref_value[0]);
+ BEGIN_RING(chan, RING_3D(STENCIL_BACK_FUNC_REF), 1);
+ OUT_RING (chan, nvc0->stencil_ref.ref_value[1]);
+}
+
+static void
+nvc0_validate_stipple(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ unsigned i;
+
+ BEGIN_RING(chan, RING_3D(POLYGON_STIPPLE_PATTERN(0)), 32);
+ for (i = 0; i < 32; ++i)
+ OUT_RING(chan, util_bswap32(nvc0->stipple.stipple[i]));
+}
+
+static void
+nvc0_validate_scissor(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct pipe_scissor_state *s = &nvc0->scissor;
+#ifdef NVC0_SCISSORS_CLIPPING
+ struct pipe_viewport_state *vp = &nvc0->viewport;
+ int minx, maxx, miny, maxy;
+
+ if (!(nvc0->dirty &
+ (NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | NVC0_NEW_FRAMEBUFFER)) &&
+ nvc0->state.scissor == nvc0->rast->pipe.scissor)
+ return;
+ nvc0->state.scissor = nvc0->rast->pipe.scissor;
+
+ if (nvc0->state.scissor) {
+ minx = s->minx;
+ maxx = s->maxx;
+ miny = s->miny;
+ maxy = s->maxy;
+ } else {
+ minx = 0;
+ maxx = nvc0->framebuffer.width;
+ miny = 0;
+ maxy = nvc0->framebuffer.height;
+ }
+
+ minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0])));
+ maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0])));
+ miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1])));
+ maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1])));
+
+ BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
+ OUT_RING (chan, (maxx << 16) | minx);
+ OUT_RING (chan, (maxy << 16) | miny);
+ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
+ OUT_RING (chan, ((maxx - minx) << 16) | minx);
+ OUT_RING (chan, ((maxy - miny) << 16) | miny);
+#else
+ BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
+ OUT_RING (chan, (s->maxx << 16) | s->minx);
+ OUT_RING (chan, (s->maxy << 16) | s->miny);
+#endif
+}
+
+static void
+nvc0_validate_viewport(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+ BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSLATE_X(0)), 3);
+ OUT_RINGf (chan, nvc0->viewport.translate[0]);
+ OUT_RINGf (chan, nvc0->viewport.translate[1]);
+ OUT_RINGf (chan, nvc0->viewport.translate[2]);
+ BEGIN_RING(chan, RING_3D(VIEWPORT_SCALE_X(0)), 3);
+ OUT_RINGf (chan, nvc0->viewport.scale[0]);
+ OUT_RINGf (chan, nvc0->viewport.scale[1]);
+ OUT_RINGf (chan, nvc0->viewport.scale[2]);
+
+#ifdef NVC0_SCISSORS_CLIPPING
+ BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2);
+ OUT_RINGf (chan, nvc0->viewport.translate[2] - nvc0->viewport.scale[2]);
+ OUT_RINGf (chan, nvc0->viewport.translate[2] + nvc0->viewport.scale[2]);
+#endif
+}
+
+static void
+nvc0_validate_clip(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ uint32_t clip;
+
+ clip = nvc0->clip.depth_clamp ? 0x201a : 0x0002;
+#ifndef NVC0_SCISSORS_CLIPPING
+ clip |= 0x1080;
+#endif
+
+ BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1);
+ OUT_RING (chan, clip);
+
+ if (nvc0->clip.nr) {
+ struct nouveau_bo *bo = nvc0->screen->uniforms;
+
+ BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
+ OUT_RING (chan, 256);
+ OUT_RELOCh(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ BEGIN_RING_1I(chan, RING_3D(CB_POS), nvc0->clip.nr * 4 + 1);
+ OUT_RING (chan, 0);
+ OUT_RINGp (chan, &nvc0->clip.ucp[0][0], nvc0->clip.nr * 4);
+
+ BEGIN_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 1);
+ OUT_RING (chan, (1 << nvc0->clip.nr) - 1);
+ } else {
+ IMMED_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 0);
+ }
+}
+
+static void
+nvc0_validate_blend(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+ WAIT_RING(chan, nvc0->blend->size);
+ OUT_RINGp(chan, nvc0->blend->state, nvc0->blend->size);
+}
+
+static void
+nvc0_validate_zsa(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+ WAIT_RING(chan, nvc0->zsa->size);
+ OUT_RINGp(chan, nvc0->zsa->state, nvc0->zsa->size);
+}
+
+static void
+nvc0_validate_rasterizer(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+ WAIT_RING(chan, nvc0->rast->size);
+ OUT_RINGp(chan, nvc0->rast->state, nvc0->rast->size);
+}
+
+static void
+nvc0_constbufs_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nouveau_bo *bo;
+ unsigned s;
+
+ for (s = 0; s < 5; ++s) {
+ struct nvc0_resource *res;
+ int i;
+
+ while (nvc0->constbuf_dirty[s]) {
+ unsigned base = 0;
+ unsigned offset = 0, words = 0;
+ boolean rebind = TRUE;
+
+ i = ffs(nvc0->constbuf_dirty[s]) - 1;
+ nvc0->constbuf_dirty[s] &= ~(1 << i);
+
+ res = nvc0_resource(nvc0->constbuf[s][i]);
+ if (!res) {
+ BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1);
+ OUT_RING (chan, (i << 4) | 0);
+ if (i == 0)
+ nvc0->state.uniform_buffer_bound[s] = 0;
+ continue;
+ }
+
+ if (!nvc0_resource_mapped_by_gpu(&res->base)) {
+ if (i == 0) {
+ base = s << 16;
+ bo = nvc0->screen->uniforms;
+
+ if (nvc0->state.uniform_buffer_bound[s] >= res->base.width0)
+ rebind = FALSE;
+ else
+ nvc0->state.uniform_buffer_bound[s] =
+ align(res->base.width0, 0x100);
+ } else {
+ bo = res->bo;
+ }
+#if 0
+ nvc0_m2mf_push_linear(nvc0, bo, NOUVEAU_BO_VRAM,
+ base, res->base.width0, res->data);
+ BEGIN_RING(chan, RING_3D_(0x021c), 1);
+ OUT_RING (chan, 0x1111);
+#else
+ words = res->base.width0 / 4;
+#endif
+ } else {
+ bo = res->bo;
+ if (i == 0)
+ nvc0->state.uniform_buffer_bound[s] = 0;
+ }
+
+ if (bo != nvc0->screen->uniforms)
+ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_CONSTANT, res,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+
+ if (rebind) {
+ BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
+ OUT_RING (chan, align(res->base.width0, 0x100));
+ OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1);
+ OUT_RING (chan, (i << 4) | 1);
+ }
+
+ while (words) {
+ unsigned nr = AVAIL_RING(chan);
+
+ if (nr < 16) {
+ FIRE_RING(chan);
+ continue;
+ }
+ nr = MIN2(MIN2(nr - 6, words), NV04_PFIFO_MAX_PACKET_LEN - 1);
+
+ BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
+ OUT_RING (chan, align(res->base.width0, 0x100));
+ OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ BEGIN_RING_1I(chan, RING_3D(CB_POS), nr + 1);
+ OUT_RING (chan, offset);
+ OUT_RINGp (chan, &res->data[offset], nr);
+
+ offset += nr * 4;
+ words -= nr;
+ }
+ }
+ }
+}
+
+static struct state_validate {
+ void (*func)(struct nvc0_context *);
+ uint32_t states;
+} validate_list[] = {
+ { nvc0_validate_fb, NVC0_NEW_FRAMEBUFFER },
+ { nvc0_validate_blend, NVC0_NEW_BLEND },
+ { nvc0_validate_zsa, NVC0_NEW_ZSA },
+ { nvc0_validate_rasterizer, NVC0_NEW_RASTERIZER },
+ { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR },
+ { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF },
+ { nvc0_validate_stipple, NVC0_NEW_STIPPLE },
+#ifdef NVC0_SCISSORS_CLIPPING
+ { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT |
+ NVC0_NEW_RASTERIZER |
+ NVC0_NEW_FRAMEBUFFER },
+#else
+ { nvc0_validate_scissor, NVC0_NEW_SCISSOR },
+#endif
+ { nvc0_validate_viewport, NVC0_NEW_VIEWPORT },
+ { nvc0_validate_clip, NVC0_NEW_CLIP },
+ { nvc0_vertprog_validate, NVC0_NEW_VERTPROG },
+ { nvc0_tctlprog_validate, NVC0_NEW_TCTLPROG },
+ { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
+ { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
+ { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG },
+ { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF },
+ { nvc0_validate_textures, NVC0_NEW_TEXTURES },
+ { nvc0_validate_samplers, NVC0_NEW_SAMPLERS },
+ { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }
+};
+#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
+
+boolean
+nvc0_state_validate(struct nvc0_context *nvc0)
+{
+ unsigned i;
+#if 0
+ if (nvc0->screen->cur_ctx != nvc0) /* FIXME: not everything is valid */
+ nvc0->dirty = 0xffffffff;
+#endif
+ nvc0->screen->cur_ctx = nvc0;
+
+ if (nvc0->dirty) {
+ for (i = 0; i < validate_list_len; ++i) {
+ struct state_validate *validate = &validate_list[i];
+
+ if (nvc0->dirty & validate->states)
+ validate->func(nvc0);
+ }
+ nvc0->dirty = 0;
+ }
+
+ nvc0_bufctx_emit_relocs(nvc0);
+
+ return TRUE;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h
new file mode 100644
index 0000000000..ee788c5bb9
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h
@@ -0,0 +1,81 @@
+
+#ifndef __NVC0_STATEOBJ_H__
+#define __NVC0_STATEOBJ_H__
+
+#include "pipe/p_state.h"
+
+#define NVC0_SCISSORS_CLIPPING
+
+#define SB_BEGIN_3D(so, m, s) \
+ (so)->state[(so)->size++] = \
+ (0x2 << 28) | ((s) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2)
+
+#define SB_IMMED_3D(so, m, d) \
+ (so)->state[(so)->size++] = \
+ (0x8 << 28) | ((d) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2)
+
+#define SB_DATA(so, u) (so)->state[(so)->size++] = (u)
+
+struct nvc0_blend_stateobj {
+ struct pipe_blend_state pipe;
+ int size;
+ uint32_t state[72];
+};
+
+struct nvc0_tsc_entry {
+ int id;
+ uint32_t tsc[8];
+};
+
+static INLINE struct nvc0_tsc_entry *
+nvc0_tsc_entry(void *hwcso)
+{
+ return (struct nvc0_tsc_entry *)hwcso;
+}
+
+struct nvc0_tic_entry {
+ struct pipe_sampler_view pipe;
+ int id;
+ uint32_t tic[8];
+};
+
+static INLINE struct nvc0_tic_entry *
+nvc0_tic_entry(struct pipe_sampler_view *view)
+{
+ return (struct nvc0_tic_entry *)view;
+}
+
+struct nvc0_rasterizer_stateobj {
+ struct pipe_rasterizer_state pipe;
+ int size;
+ uint32_t state[36];
+};
+
+struct nvc0_zsa_stateobj {
+ struct pipe_depth_stencil_alpha_state pipe;
+ int size;
+ uint32_t state[29];
+};
+
+struct nvc0_vertex_element {
+ struct pipe_vertex_element pipe;
+ uint32_t state;
+};
+
+struct nvc0_vertex_stateobj {
+ struct translate *translate;
+ unsigned num_elements;
+ uint32_t instance_bits;
+ unsigned vtx_size;
+ unsigned vtx_per_packet_max;
+ struct nvc0_vertex_element element[1];
+};
+
+/* will have to lookup index -> location qualifier from nvc0_program */
+struct nvc0_tfb_state {
+ uint8_t varying_count[4];
+ uint32_t stride[4];
+ uint8_t varying_indices[1];
+};
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c
new file mode 100644
index 0000000000..cc0a65687d
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_surface.c
@@ -0,0 +1,377 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdint.h>
+
+#include "pipe/p_defines.h"
+
+#include "util/u_inlines.h"
+#include "util/u_pack_color.h"
+#include "util/u_format.h"
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+
+#include "nv50_defs.xml.h"
+
+/* return TRUE for formats that can be converted among each other by NVC0_2D */
+static INLINE boolean
+nvc0_2d_format_faithful(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_SRGB:
+ case PIPE_FORMAT_B8G8R8X8_SRGB:
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ case PIPE_FORMAT_B5G5R5A1_UNORM:
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static INLINE uint8_t
+nvc0_2d_format(enum pipe_format format)
+{
+ uint8_t id = nvc0_format_table[format].rt;
+
+ /* Hardware values for color formats range from 0xc0 to 0xff,
+ * but the 2D engine doesn't support all of them.
+ */
+ if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0))))
+ return id;
+
+ switch (util_format_get_blocksize(format)) {
+ case 1:
+ return NV50_SURFACE_FORMAT_R8_UNORM;
+ case 2:
+ return NV50_SURFACE_FORMAT_R16_UNORM;
+ case 4:
+ return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM;
+ default:
+ return 0;
+ }
+}
+
+static int
+nvc0_2d_texture_set(struct nouveau_channel *chan, int dst,
+ struct nvc0_miptree *mt, unsigned level, unsigned layer)
+{
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t width, height, depth;
+ uint32_t format;
+ uint32_t mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT;
+ uint32_t flags = mt->base.domain | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD);
+ uint32_t offset = mt->level[level].offset;
+
+ format = nvc0_2d_format(mt->base.base.format);
+ if (!format) {
+ NOUVEAU_ERR("invalid/unsupported surface format: %s\n",
+ util_format_name(mt->base.base.format));
+ return 1;
+ }
+
+ width = u_minify(mt->base.base.width0, level);
+ height = u_minify(mt->base.base.height0, level);
+
+ offset = mt->level[level].offset;
+ if (!mt->layout_3d) {
+ offset += mt->layer_stride * layer;
+ depth = 1;
+ layer = 0;
+ } else {
+ depth = u_minify(mt->base.base.depth0, level);
+ }
+
+ if (!(bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK)) {
+ BEGIN_RING(chan, RING_2D_(mthd), 2);
+ OUT_RING (chan, format);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_2D_(mthd + 0x14), 5);
+ OUT_RING (chan, mt->level[level].pitch);
+ OUT_RING (chan, width);
+ OUT_RING (chan, height);
+ OUT_RELOCh(chan, bo, offset, flags);
+ OUT_RELOCl(chan, bo, offset, flags);
+ } else {
+ BEGIN_RING(chan, RING_2D_(mthd), 5);
+ OUT_RING (chan, format);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, mt->level[level].tile_mode);
+ OUT_RING (chan, depth);
+ OUT_RING (chan, layer);
+ BEGIN_RING(chan, RING_2D_(mthd + 0x18), 4);
+ OUT_RING (chan, width);
+ OUT_RING (chan, height);
+ OUT_RELOCh(chan, bo, offset, flags);
+ OUT_RELOCl(chan, bo, offset, flags);
+ }
+
+#if 0
+ if (dst) {
+ BEGIN_RING(chan, RING_2D_(NVC0_2D_CLIP_X), 4);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, width);
+ OUT_RING (chan, height);
+ }
+#endif
+ return 0;
+}
+
+static int
+nvc0_2d_texture_do_copy(struct nouveau_channel *chan,
+ struct nvc0_miptree *dst, unsigned dst_level,
+ unsigned dx, unsigned dy, unsigned dz,
+ struct nvc0_miptree *src, unsigned src_level,
+ unsigned sx, unsigned sy, unsigned sz,
+ unsigned w, unsigned h)
+{
+ int ret;
+
+ ret = MARK_RING(chan, 2 * 16 + 32, 4);
+ if (ret)
+ return ret;
+
+ ret = nvc0_2d_texture_set(chan, 1, dst, dst_level, dz);
+ if (ret)
+ return ret;
+
+ ret = nvc0_2d_texture_set(chan, 0, src, src_level, sz);
+ if (ret)
+ return ret;
+
+ /* 0/1 = CENTER/CORNER, 10/00 = POINT/BILINEAR */
+ BEGIN_RING(chan, RING_2D(BLIT_CONTROL), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_2D(BLIT_DST_X), 4);
+ OUT_RING (chan, dx);
+ OUT_RING (chan, dy);
+ OUT_RING (chan, w);
+ OUT_RING (chan, h);
+ BEGIN_RING(chan, RING_2D(BLIT_DU_DX_FRACT), 4);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_2D(BLIT_SRC_X_FRACT), 4);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, sx);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, sy);
+
+ return 0;
+}
+
+static void
+nvc0_resource_copy_region(struct pipe_context *pipe,
+ struct pipe_resource *dst, unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src, unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct nvc0_screen *screen = nvc0_context(pipe)->screen;
+ int ret;
+ unsigned dst_layer = dstz, src_layer = src_box->z;
+
+ assert((src->format == dst->format) ||
+ (nvc0_2d_format_faithful(src->format) &&
+ nvc0_2d_format_faithful(dst->format)));
+
+ for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) {
+ ret = nvc0_2d_texture_do_copy(screen->base.channel,
+ nvc0_miptree(dst), dst_level,
+ dstx, dsty, dst_layer,
+ nvc0_miptree(src), src_level,
+ src_box->x, src_box->y, src_layer,
+ src_box->width, src_box->height);
+ if (ret)
+ return;
+ }
+}
+
+static void
+nvc0_clear_render_target(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ const float *rgba,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct nvc0_context *nv50 = nvc0_context(pipe);
+ struct nvc0_screen *screen = nv50->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nvc0_miptree *mt = nvc0_miptree(dst->texture);
+ struct nvc0_surface *sf = nvc0_surface(dst);
+ struct nouveau_bo *bo = mt->base.bo;
+
+ BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4);
+ OUT_RINGf (chan, rgba[0]);
+ OUT_RINGf (chan, rgba[1]);
+ OUT_RINGf (chan, rgba[2]);
+ OUT_RINGf (chan, rgba[3]);
+
+ if (MARK_RING(chan, 18, 2))
+ return;
+
+ BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(0)), 8);
+ OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RING (chan, sf->width);
+ OUT_RING (chan, sf->height);
+ OUT_RING (chan, nvc0_format_table[dst->format].rt);
+ OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+
+ /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */
+
+ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
+ OUT_RING (chan, (width << 16) | dstx);
+ OUT_RING (chan, (height << 16) | dsty);
+
+ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
+ OUT_RING (chan, 0x3c);
+
+ nv50->dirty |= NVC0_NEW_FRAMEBUFFER;
+}
+
+static void
+nvc0_clear_depth_stencil(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct nvc0_context *nv50 = nvc0_context(pipe);
+ struct nvc0_screen *screen = nv50->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nvc0_miptree *mt = nvc0_miptree(dst->texture);
+ struct nvc0_surface *sf = nvc0_surface(dst);
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t mode = 0;
+
+ if (clear_flags & PIPE_CLEAR_DEPTH) {
+ BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1);
+ OUT_RINGf (chan, depth);
+ mode |= NVC0_3D_CLEAR_BUFFERS_Z;
+ }
+
+ if (clear_flags & PIPE_CLEAR_STENCIL) {
+ BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1);
+ OUT_RING (chan, stencil & 0xff);
+ mode |= NVC0_3D_CLEAR_BUFFERS_S;
+ }
+
+ if (MARK_RING(chan, 17, 2))
+ return;
+
+ BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5);
+ OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RING (chan, nvc0_format_table[dst->format].rt);
+ OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3);
+ OUT_RING (chan, sf->width);
+ OUT_RING (chan, sf->height);
+ OUT_RING (chan, (1 << 16) | 1);
+
+ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
+ OUT_RING (chan, (width << 16) | dstx);
+ OUT_RING (chan, (height << 16) | dsty);
+
+ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
+ OUT_RING (chan, mode);
+
+ nv50->dirty |= NVC0_NEW_FRAMEBUFFER;
+}
+
+void
+nvc0_clear(struct pipe_context *pipe, unsigned buffers,
+ const float *rgba, double depth, unsigned stencil)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+ unsigned i;
+ const unsigned dirty = nvc0->dirty;
+ uint32_t mode = 0;
+
+ /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
+ nvc0->dirty &= NVC0_NEW_FRAMEBUFFER;
+ if (!nvc0_state_validate(nvc0))
+ return;
+
+ if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
+ BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4);
+ OUT_RINGf (chan, rgba[0]);
+ OUT_RINGf (chan, rgba[1]);
+ OUT_RINGf (chan, rgba[2]);
+ OUT_RINGf (chan, rgba[3]);
+ mode =
+ NVC0_3D_CLEAR_BUFFERS_R | NVC0_3D_CLEAR_BUFFERS_G |
+ NVC0_3D_CLEAR_BUFFERS_B | NVC0_3D_CLEAR_BUFFERS_A;
+ }
+
+ if (buffers & PIPE_CLEAR_DEPTH) {
+ BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1);
+ OUT_RING (chan, fui(depth));
+ mode |= NVC0_3D_CLEAR_BUFFERS_Z;
+ }
+
+ if (buffers & PIPE_CLEAR_STENCIL) {
+ BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1);
+ OUT_RING (chan, stencil & 0xff);
+ mode |= NVC0_3D_CLEAR_BUFFERS_S;
+ }
+
+ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
+ OUT_RING (chan, mode);
+
+ for (i = 1; i < fb->nr_cbufs; i++) {
+ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
+ OUT_RING (chan, (i << 6) | 0x3c);
+ }
+
+ nvc0->dirty = dirty & ~NVC0_NEW_FRAMEBUFFER;
+}
+
+void
+nvc0_init_surface_functions(struct nvc0_context *nvc0)
+{
+ nvc0->pipe.resource_copy_region = nvc0_resource_copy_region;
+ nvc0->pipe.clear_render_target = nvc0_clear_render_target;
+ nvc0->pipe.clear_depth_stencil = nvc0_clear_depth_stencil;
+}
+
+
diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c
new file mode 100644
index 0000000000..b219f82c90
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_tex.c
@@ -0,0 +1,277 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+#include "nv50_texture.xml.h"
+
+#include "util/u_format.h"
+
+static INLINE uint32_t
+nv50_tic_swizzle(uint32_t tc, unsigned swz)
+{
+ switch (swz) {
+ case PIPE_SWIZZLE_RED:
+ return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT;
+ case PIPE_SWIZZLE_GREEN:
+ return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT;
+ case PIPE_SWIZZLE_BLUE:
+ return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT;
+ case PIPE_SWIZZLE_ALPHA:
+ return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT;
+ case PIPE_SWIZZLE_ONE:
+ return NV50_TIC_MAP_ONE;
+ case PIPE_SWIZZLE_ZERO:
+ default:
+ return NV50_TIC_MAP_ZERO;
+ }
+}
+
+struct pipe_sampler_view *
+nvc0_create_sampler_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ)
+{
+ const struct util_format_description *desc;
+ uint32_t *tic;
+ uint32_t swz[4];
+ uint32_t depth;
+ struct nvc0_tic_entry *view;
+ struct nvc0_miptree *mt = nvc0_miptree(texture);
+
+ view = MALLOC_STRUCT(nvc0_tic_entry);
+ if (!view)
+ return NULL;
+
+ view->pipe = *templ;
+ view->pipe.reference.count = 1;
+ view->pipe.texture = NULL;
+ view->pipe.context = pipe;
+
+ view->id = -1;
+
+ pipe_resource_reference(&view->pipe.texture, texture);
+
+ tic = &view->tic[0];
+
+ desc = util_format_description(mt->base.base.format);
+
+ /* TIC[0] */
+
+ tic[0] = nvc0_format_table[view->pipe.format].tic;
+
+ swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r);
+ swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g);
+ swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b);
+ swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a);
+ tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) |
+ (swz[0] << NV50_TIC_0_MAPR__SHIFT) |
+ (swz[1] << NV50_TIC_0_MAPG__SHIFT) |
+ (swz[2] << NV50_TIC_0_MAPB__SHIFT) |
+ (swz[3] << NV50_TIC_0_MAPA__SHIFT);
+
+ /* tic[1] = mt->base.bo->offset; */
+ tic[2] = /* mt->base.bo->offset >> 32 */ 0;
+
+ tic[2] |= 0x10001000 | /* NV50_TIC_2_NO_BORDER */ 0x40000000;
+
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+ tic[2] |= NV50_TIC_2_COLORSPACE_SRGB;
+
+ if (mt->base.base.target != PIPE_TEXTURE_RECT)
+ tic[2] |= NV50_TIC_2_NORMALIZED_COORDS;
+
+ tic[2] |=
+ ((mt->base.bo->tile_mode & 0x0f0) << (22 - 4)) |
+ ((mt->base.bo->tile_mode & 0xf00) << (25 - 8));
+
+ depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
+
+ switch (mt->base.base.target) {
+ case PIPE_TEXTURE_1D:
+ tic[2] |= NV50_TIC_2_TARGET_1D;
+ break;
+ case PIPE_TEXTURE_2D:
+ tic[2] |= NV50_TIC_2_TARGET_2D;
+ break;
+ case PIPE_TEXTURE_RECT:
+ tic[2] |= NV50_TIC_2_TARGET_RECT;
+ break;
+ case PIPE_TEXTURE_3D:
+ tic[2] |= NV50_TIC_2_TARGET_3D;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ depth /= 6;
+ if (depth > 1)
+ tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY;
+ else
+ tic[2] |= NV50_TIC_2_TARGET_CUBE;
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY;
+ break;
+ case PIPE_TEXTURE_2D_ARRAY:
+ tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY;
+ break;
+ case PIPE_BUFFER:
+ tic[2] |= NV50_TIC_2_TARGET_BUFFER | /* NV50_TIC_2_LINEAR */ (1 << 18);
+ default:
+ NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target);
+ return FALSE;
+ }
+
+ if (mt->base.base.target == PIPE_BUFFER)
+ tic[3] = mt->base.base.width0;
+ else
+ tic[3] = 0x00300000;
+
+ tic[4] = (1 << 31) | mt->base.base.width0;
+
+ tic[5] = mt->base.base.height0 & 0xffff;
+ tic[5] |= depth << 16;
+ tic[5] |= mt->base.base.last_level << 28;
+
+ tic[6] = 0x03000000;
+
+ tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
+
+ return &view->pipe;
+}
+
+static boolean
+nvc0_validate_tic(struct nvc0_context *nvc0, int s)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nouveau_bo *txc = nvc0->screen->txc;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nvc0->num_textures[s]; ++i) {
+ struct nvc0_tic_entry *tic = nvc0_tic_entry(nvc0->textures[s][i]);
+ struct nvc0_resource *res;
+
+ if (!tic) {
+ BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
+ OUT_RING (chan, (i << 1) | 0);
+ continue;
+ }
+ res = &nvc0_miptree(tic->pipe.texture)->base;
+
+ if (tic->id < 0) {
+ tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
+
+ MARK_RING (chan, 9 + 8, 4);
+ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
+ OUT_RELOCh(chan, txc, tic->id * 32, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, txc, tic->id * 32, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
+ OUT_RING (chan, 32);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_MF(EXEC), 1);
+ OUT_RING (chan, 0x100111);
+ BEGIN_RING_NI(chan, RING_MF(DATA), 8);
+ OUT_RING (chan, tic->tic[0]);
+ OUT_RELOCl(chan, res->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOC (chan, res->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_HIGH | NOUVEAU_BO_OR, tic->tic[2], tic->tic[2]);
+ OUT_RINGp (chan, &tic->tic[3], 5);
+
+ need_flush = TRUE;
+ }
+ nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+
+ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TEXTURES, res,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+
+ BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
+ OUT_RING (chan, (tic->id << 9) | (i << 1) | 1);
+ }
+ for (; i < nvc0->state.num_textures[s]; ++i) {
+ BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
+ OUT_RING (chan, (i << 1) | 0);
+ }
+ nvc0->state.num_textures[s] = nvc0->num_textures[s];
+
+ return need_flush;
+}
+
+void nvc0_validate_textures(struct nvc0_context *nvc0)
+{
+ boolean need_flush;
+
+ need_flush = nvc0_validate_tic(nvc0, 0);
+ need_flush |= nvc0_validate_tic(nvc0, 4);
+
+ if (need_flush) {
+ BEGIN_RING(nvc0->screen->base.channel, RING_3D(TIC_FLUSH), 1);
+ OUT_RING (nvc0->screen->base.channel, 0);
+ }
+}
+
+static boolean
+nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nvc0->num_samplers[s]; ++i) {
+ struct nvc0_tsc_entry *tsc = nvc0_tsc_entry(nvc0->samplers[s][i]);
+
+ if (!tsc) {
+ BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
+ OUT_RING (chan, (i << 4) | 0);
+ continue;
+ }
+ if (tsc->id < 0) {
+ tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
+
+ nvc0_m2mf_push_linear(nvc0, nvc0->screen->txc, NOUVEAU_BO_VRAM,
+ 65536 + tsc->id * 32, 32, tsc->tsc);
+ need_flush = TRUE;
+ }
+ nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
+
+ BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
+ OUT_RING (chan, (tsc->id << 12) | (i << 4) | 1);
+ }
+ for (; i < nvc0->state.num_samplers[s]; ++i) {
+ BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
+ OUT_RING (chan, (i << 4) | 0);
+ }
+ nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
+
+ return need_flush;
+}
+
+void nvc0_validate_samplers(struct nvc0_context *nvc0)
+{
+ boolean need_flush;
+
+ need_flush = nvc0_validate_tsc(nvc0, 0);
+ need_flush |= nvc0_validate_tsc(nvc0, 4);
+
+ if (need_flush) {
+ BEGIN_RING(nvc0->screen->base.channel, RING_3D(TSC_FLUSH), 1);
+ OUT_RING (nvc0->screen->base.channel, 0);
+ }
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
new file mode 100644
index 0000000000..fecfc76fb7
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
@@ -0,0 +1,2004 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <unistd.h>
+
+#define NOUVEAU_DEBUG 1
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_dump.h"
+#include "util/u_dynarray.h"
+
+#include "nvc0_pc.h"
+#include "nvc0_program.h"
+
+/* Arbitrary internal limits. */
+#define BLD_MAX_TEMPS 64
+#define BLD_MAX_ADDRS 4
+#define BLD_MAX_PREDS 4
+#define BLD_MAX_IMMDS 128
+#define BLD_MAX_OUTPS PIPE_MAX_SHADER_OUTPUTS
+
+#define BLD_MAX_COND_NESTING 8
+#define BLD_MAX_LOOP_NESTING 4
+#define BLD_MAX_CALL_NESTING 2
+
+/* This structure represents a TGSI register. */
+struct bld_register {
+ struct nv_value *current;
+ /* collect all SSA values assigned to it */
+ struct util_dynarray vals;
+ /* 1 bit per loop level, indicates if used/defd, reset when loop ends */
+ uint16_t loop_use;
+ uint16_t loop_def;
+};
+
+static INLINE struct nv_value **
+bld_register_access(struct bld_register *reg, unsigned i)
+{
+ return util_dynarray_element(&reg->vals, struct nv_value *, i);
+}
+
+static INLINE void
+bld_register_add_val(struct bld_register *reg, struct nv_value *val)
+{
+ util_dynarray_append(&reg->vals, struct nv_value *, val);
+}
+
+static INLINE boolean
+bld_register_del_val(struct bld_register *reg, struct nv_value *val)
+{
+ unsigned i;
+
+ for (i = reg->vals.size / sizeof(struct nv_value *); i > 0; --i)
+ if (*bld_register_access(reg, i - 1) == val)
+ break;
+ if (!i)
+ return FALSE;
+
+ if (i != reg->vals.size / sizeof(struct nv_value *))
+ *bld_register_access(reg, i - 1) = util_dynarray_pop(&reg->vals,
+ struct nv_value *);
+ else
+ reg->vals.size -= sizeof(struct nv_value *);
+
+ return TRUE;
+}
+
+struct bld_context {
+ struct nvc0_translation_info *ti;
+
+ struct nv_pc *pc;
+ struct nv_basic_block *b;
+
+ struct tgsi_parse_context parse[BLD_MAX_CALL_NESTING];
+ int call_lvl;
+
+ struct nv_basic_block *cond_bb[BLD_MAX_COND_NESTING];
+ struct nv_basic_block *join_bb[BLD_MAX_COND_NESTING];
+ struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING];
+ int cond_lvl;
+ struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING];
+ struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING];
+ int loop_lvl;
+
+ ubyte out_kind; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */
+
+ struct bld_register tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */
+ struct bld_register avs[BLD_MAX_ADDRS][4]; /* TGSI_FILE_ADDRESS */
+ struct bld_register pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */
+ struct bld_register ovs[BLD_MAX_OUTPS][4]; /* TGSI_FILE_OUTPUT, FP only */
+
+ uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 7) / 8];
+ int hpos_index;
+
+ struct nv_value *zero;
+ struct nv_value *frag_coord[4];
+
+ /* wipe on new BB */
+ struct nv_value *saved_sysvals[4];
+ struct nv_value *saved_addr[4][2];
+ struct nv_value *saved_inputs[PIPE_MAX_SHADER_INPUTS][4];
+ struct nv_value *saved_immd[BLD_MAX_IMMDS];
+ uint num_immds;
+};
+
+static INLINE ubyte
+bld_register_file(struct bld_context *bld, struct bld_register *reg)
+{
+ if (reg < &bld->avs[0][0]) return NV_FILE_GPR;
+ else
+ if (reg < &bld->pvs[0][0]) return NV_FILE_GPR;
+ else
+ if (reg < &bld->ovs[0][0]) return NV_FILE_PRED;
+ else
+ return NV_FILE_MEM_V;
+}
+
+static INLINE struct nv_value *
+bld_fetch(struct bld_context *bld, struct bld_register *regs, int i, int c)
+{
+ regs[i * 4 + c].loop_use |= 1 << bld->loop_lvl;
+ return regs[i * 4 + c].current;
+}
+
+static struct nv_value *
+bld_loop_phi(struct bld_context *, struct bld_register *, struct nv_value *);
+
+/* If a variable is defined in a loop without prior use, we don't need
+ * a phi in the loop header to account for backwards flow.
+ *
+ * However, if this variable is then also used outside the loop, we do
+ * need a phi after all. But we must not use this phi's def inside the
+ * loop, so we can eliminate the phi if it is unused later.
+ */
+static INLINE void
+bld_store(struct bld_context *bld,
+ struct bld_register *regs, int i, int c, struct nv_value *val)
+{
+ const uint16_t m = 1 << bld->loop_lvl;
+ struct bld_register *reg = &regs[i * 4 + c];
+
+ if (bld->loop_lvl && !(m & (reg->loop_def | reg->loop_use)))
+ bld_loop_phi(bld, reg, val);
+
+ reg->current = val;
+ bld_register_add_val(reg, reg->current);
+
+ reg->loop_def |= 1 << bld->loop_lvl;
+}
+
+#define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
+#define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
+#define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
+#define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
+#define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
+#define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
+#define STORE_OUTP(i, c, v) \
+ do { \
+ bld_store(bld, &bld->ovs[0][0], i, c, (v)); \
+ bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
+ } while (0)
+
+static INLINE void
+bld_clear_def_use(struct bld_register *regs, int n, int lvl)
+{
+ int i;
+ const uint16_t mask = ~(1 << lvl);
+
+ for (i = 0; i < n * 4; ++i) {
+ regs[i].loop_def &= mask;
+ regs[i].loop_use &= mask;
+ }
+}
+
+static INLINE void
+bld_warn_uninitialized(struct bld_context *bld, int kind,
+ struct bld_register *reg, struct nv_basic_block *b)
+{
+#ifdef NOUVEAU_DEBUG
+ long i = (reg - &bld->tvs[0][0]) / 4;
+ long c = (reg - &bld->tvs[0][0]) & 3;
+
+ if (c == 3)
+ c = -1;
+ debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
+ i, (int)('x' + c), kind ? "may be" : "is", b->id);
+#endif
+}
+
+static INLINE struct nv_value *
+bld_def(struct nv_instruction *i, int c, struct nv_value *value)
+{
+ i->def[c] = value;
+ value->insn = i;
+ return value;
+}
+
+static INLINE struct nv_value *
+find_by_bb(struct bld_register *reg, struct nv_basic_block *b)
+{
+ int i;
+
+ if (reg->current && reg->current->insn->bb == b)
+ return reg->current;
+
+ for (i = 0; i < reg->vals.size / sizeof(struct nv_value *); ++i)
+ if ((*bld_register_access(reg, i))->insn->bb == b)
+ return *bld_register_access(reg, i);
+ return NULL;
+}
+
+/* Fetch value from register that was defined in the specified BB,
+ * or search for first definitions in all of its predecessors.
+ */
+static void
+fetch_by_bb(struct bld_register *reg,
+ struct nv_value **vals, int *n,
+ struct nv_basic_block *b)
+{
+ int i;
+ struct nv_value *val;
+
+ assert(*n < 16); /* MAX_COND_NESTING */
+
+ val = find_by_bb(reg, b);
+ if (val) {
+ for (i = 0; i < *n; ++i)
+ if (vals[i] == val)
+ return;
+ vals[(*n)++] = val;
+ return;
+ }
+ for (i = 0; i < b->num_in; ++i)
+ if (!IS_WALL_EDGE(b->in_kind[i]))
+ fetch_by_bb(reg, vals, n, b->in[i]);
+}
+
+static INLINE struct nv_value *
+bld_load_imm_u32(struct bld_context *bld, uint32_t u);
+
+static INLINE struct nv_value *
+bld_undef(struct bld_context *bld, ubyte file)
+{
+ struct nv_instruction *nvi = new_instruction(bld->pc, NV_OP_UNDEF);
+
+ return bld_def(nvi, 0, new_value(bld->pc, file, 4));
+}
+
+static struct nv_value *
+bld_phi(struct bld_context *bld, struct nv_basic_block *b,
+ struct bld_register *reg)
+{
+ struct nv_basic_block *in;
+ struct nv_value *vals[16] = { NULL };
+ struct nv_value *val;
+ struct nv_instruction *phi;
+ int i, j, n;
+
+ do {
+ i = n = 0;
+ fetch_by_bb(reg, vals, &n, b);
+
+ if (!n) {
+ bld_warn_uninitialized(bld, 0, reg, b);
+ return NULL;
+ }
+
+ if (n == 1) {
+ if (nvc0_bblock_dominated_by(b, vals[0]->insn->bb))
+ break;
+
+ bld_warn_uninitialized(bld, 1, reg, b);
+
+ /* back-tracking to insert missing value of other path */
+ in = b;
+ while (in->in[0]) {
+ if (in->num_in == 1) {
+ in = in->in[0];
+ } else {
+ if (!nvc0_bblock_reachable_by(in->in[0], vals[0]->insn->bb, b))
+ in = in->in[0];
+ else
+ if (!nvc0_bblock_reachable_by(in->in[1], vals[0]->insn->bb, b))
+ in = in->in[1];
+ else
+ in = in->in[0];
+ }
+ }
+ bld->pc->current_block = in;
+
+ /* should make this a no-op */
+ bld_register_add_val(reg, bld_undef(bld, vals[0]->reg.file));
+ continue;
+ }
+
+ for (i = 0; i < n; ++i) {
+ /* if value dominates b, continue to the redefinitions */
+ if (nvc0_bblock_dominated_by(b, vals[i]->insn->bb))
+ continue;
+
+ /* if value dominates any in-block, b should be the dom frontier */
+ for (j = 0; j < b->num_in; ++j)
+ if (nvc0_bblock_dominated_by(b->in[j], vals[i]->insn->bb))
+ break;
+ /* otherwise, find the dominance frontier and put the phi there */
+ if (j == b->num_in) {
+ in = nvc0_bblock_dom_frontier(vals[i]->insn->bb);
+ val = bld_phi(bld, in, reg);
+ bld_register_add_val(reg, val);
+ break;
+ }
+ }
+ } while(i < n);
+
+ bld->pc->current_block = b;
+
+ if (n == 1)
+ return vals[0];
+
+ phi = new_instruction(bld->pc, NV_OP_PHI);
+
+ bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.size));
+ for (i = 0; i < n; ++i)
+ nv_reference(bld->pc, phi, i, vals[i]);
+
+ return phi->def[0];
+}
+
+/* Insert a phi function in the loop header.
+ * For nested loops, we need to insert phi functions in all the outer
+ * loop headers if they don't have one yet.
+ *
+ * @def: redefinition from inside loop, or NULL if to be replaced later
+ */
+static struct nv_value *
+bld_loop_phi(struct bld_context *bld, struct bld_register *reg,
+ struct nv_value *def)
+{
+ struct nv_instruction *phi;
+ struct nv_basic_block *bb = bld->pc->current_block;
+ struct nv_value *val = NULL;
+
+ if (bld->loop_lvl > 1) {
+ --bld->loop_lvl;
+ if (!((reg->loop_def | reg->loop_use) & (1 << bld->loop_lvl)))
+ val = bld_loop_phi(bld, reg, NULL);
+ ++bld->loop_lvl;
+ }
+
+ if (!val)
+ val = bld_phi(bld, bld->pc->current_block, reg); /* old definition */
+ if (!val) {
+ bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0];
+ val = bld_undef(bld, bld_register_file(bld, reg));
+ }
+
+ bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1];
+
+ phi = new_instruction(bld->pc, NV_OP_PHI);
+
+ bld_def(phi, 0, new_value_like(bld->pc, val));
+ if (!def)
+ def = phi->def[0];
+
+ bld_register_add_val(reg, phi->def[0]);
+
+ phi->target = (struct nv_basic_block *)reg; /* cheat */
+
+ nv_reference(bld->pc, phi, 0, val);
+ nv_reference(bld->pc, phi, 1, def);
+
+ bld->pc->current_block = bb;
+
+ return phi->def[0];
+}
+
+static INLINE struct nv_value *
+bld_fetch_global(struct bld_context *bld, struct bld_register *reg)
+{
+ const uint16_t m = 1 << bld->loop_lvl;
+ const uint16_t use = reg->loop_use;
+
+ reg->loop_use |= m;
+
+ /* If neither used nor def'd inside the loop, build a phi in foresight,
+ * so we don't have to replace stuff later on, which requires tracking.
+ */
+ if (bld->loop_lvl && !((use | reg->loop_def) & m))
+ return bld_loop_phi(bld, reg, NULL);
+
+ return bld_phi(bld, bld->pc->current_block, reg);
+}
+
+static INLINE struct nv_value *
+bld_imm_u32(struct bld_context *bld, uint32_t u)
+{
+ int i;
+ unsigned n = bld->num_immds;
+
+ for (i = 0; i < n; ++i)
+ if (bld->saved_immd[i]->reg.imm.u32 == u)
+ return bld->saved_immd[i];
+
+ assert(n < BLD_MAX_IMMDS);
+ bld->num_immds++;
+
+ bld->saved_immd[n] = new_value(bld->pc, NV_FILE_IMM, 4);
+ bld->saved_immd[n]->reg.imm.u32 = u;
+ return bld->saved_immd[n];
+}
+
+static void
+bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *,
+ struct nv_value *);
+
+/* Replace the source of the phi in the loop header by the last assignment,
+ * or eliminate the phi function if there is no assignment inside the loop.
+ *
+ * Redundancy situation 1 - (used) but (not redefined) value:
+ * %3 = phi %0, %3 = %3 is used
+ * %3 = phi %0, %4 = is new definition
+ *
+ * Redundancy situation 2 - (not used) but (redefined) value:
+ * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE
+ */
+static void
+bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
+{
+ struct nv_basic_block *save = bld->pc->current_block;
+ struct nv_instruction *phi, *next;
+ struct nv_value *val;
+ struct bld_register *reg;
+ int i, s, n;
+
+ for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) {
+ next = phi->next;
+
+ reg = (struct bld_register *)phi->target;
+ phi->target = NULL;
+
+ for (s = 1, n = 0; n < bb->num_in; ++n) {
+ if (bb->in_kind[n] != CFG_EDGE_BACK)
+ continue;
+
+ assert(s < 4);
+ bld->pc->current_block = bb->in[n];
+ val = bld_fetch_global(bld, reg);
+
+ for (i = 0; i < 4; ++i)
+ if (phi->src[i] && phi->src[i]->value == val)
+ break;
+ if (i == 4)
+ nv_reference(bld->pc, phi, s++, val);
+ }
+ bld->pc->current_block = save;
+
+ if (phi->src[0]->value == phi->def[0] ||
+ phi->src[0]->value == phi->src[1]->value)
+ s = 1;
+ else
+ if (phi->src[1]->value == phi->def[0])
+ s = 0;
+ else
+ continue;
+
+ if (s >= 0) {
+ /* eliminate the phi */
+ bld_register_del_val(reg, phi->def[0]);
+
+ ++bld->pc->pass_seq;
+ bld_replace_value(bld->pc, bb, phi->def[0], phi->src[s]->value);
+
+ nvc0_insn_delete(phi);
+ }
+ }
+}
+
+static INLINE struct nv_value *
+bld_imm_f32(struct bld_context *bld, float f)
+{
+ return bld_imm_u32(bld, fui(f));
+}
+
+static struct nv_value *
+bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0)
+{
+ struct nv_instruction *insn = new_instruction(bld->pc, opcode);
+
+ nv_reference(bld->pc, insn, 0, src0);
+
+ return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
+}
+
+static struct nv_value *
+bld_insn_2(struct bld_context *bld, uint opcode,
+ struct nv_value *src0, struct nv_value *src1)
+{
+ struct nv_instruction *insn = new_instruction(bld->pc, opcode);
+
+ nv_reference(bld->pc, insn, 0, src0);
+ nv_reference(bld->pc, insn, 1, src1);
+
+ return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
+}
+
+static struct nv_value *
+bld_insn_3(struct bld_context *bld, uint opcode,
+ struct nv_value *src0, struct nv_value *src1,
+ struct nv_value *src2)
+{
+ struct nv_instruction *insn = new_instruction(bld->pc, opcode);
+
+ nv_reference(bld->pc, insn, 0, src0);
+ nv_reference(bld->pc, insn, 1, src1);
+ nv_reference(bld->pc, insn, 2, src2);
+
+ return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
+}
+
+static INLINE void
+bld_src_predicate(struct bld_context *bld,
+ struct nv_instruction *nvi, int s, struct nv_value *val)
+{
+ nvi->predicate = s;
+ nv_reference(bld->pc, nvi, s, val);
+}
+
+static INLINE void
+bld_src_pointer(struct bld_context *bld,
+ struct nv_instruction *nvi, int s, struct nv_value *val)
+{
+ nvi->indirect = s;
+ nv_reference(bld->pc, nvi, s, val);
+}
+
+static void
+bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst,
+ struct nv_value *val)
+{
+ struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_ST);
+ struct nv_value *loc;
+
+ loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32));
+
+ loc->reg.id = ofst * 4;
+
+ nv_reference(bld->pc, insn, 0, loc);
+ nv_reference(bld->pc, insn, 1, ptr);
+ nv_reference(bld->pc, insn, 2, val);
+}
+
+static struct nv_value *
+bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst)
+{
+ struct nv_value *loc, *val;
+
+ loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32));
+
+ loc->reg.address = ofst * 4;
+
+ val = bld_insn_2(bld, NV_OP_LD, loc, ptr);
+
+ return val;
+}
+
+static struct nv_value *
+bld_pow(struct bld_context *bld, struct nv_value *x, struct nv_value *e)
+{
+ struct nv_value *val;
+
+ val = bld_insn_1(bld, NV_OP_LG2, x);
+ val = bld_insn_2(bld, NV_OP_MUL_F32, e, val);
+
+ val = bld_insn_1(bld, NV_OP_PREEX2, val);
+ val = bld_insn_1(bld, NV_OP_EX2, val);
+
+ return val;
+}
+
+static INLINE struct nv_value *
+bld_load_imm_f32(struct bld_context *bld, float f)
+{
+ if (f == 0.0f)
+ return bld->zero;
+ return bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f));
+}
+
+static INLINE struct nv_value *
+bld_load_imm_u32(struct bld_context *bld, uint32_t u)
+{
+ if (u == 0)
+ return bld->zero;
+ return bld_insn_1(bld, NV_OP_MOV, bld_imm_u32(bld, u));
+}
+
+static INLINE struct nv_value *
+bld_setp(struct bld_context *bld, uint op, uint8_t cc,
+ struct nv_value *src0, struct nv_value *src1)
+{
+ struct nv_value *val = bld_insn_2(bld, op, src0, src1);
+
+ val->reg.file = NV_FILE_PRED;
+ val->reg.size = 1;
+ val->insn->set_cond = cc & 0xf;
+ return val;
+}
+
+static INLINE struct nv_value *
+bld_cvt(struct bld_context *bld, uint8_t dt, uint8_t st, struct nv_value *src)
+{
+ struct nv_value *val = bld_insn_1(bld, NV_OP_CVT, src);
+ val->insn->ext.cvt.d = dt;
+ val->insn->ext.cvt.s = st;
+ return val;
+}
+
+static void
+bld_kil(struct bld_context *bld, struct nv_value *src)
+{
+ struct nv_instruction *nvi;
+
+ src = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src, bld->zero);
+
+ nvi = new_instruction(bld->pc, NV_OP_KIL);
+ nvi->fixed = 1;
+
+ bld_src_predicate(bld, nvi, 0, src);
+}
+
+static void
+bld_flow(struct bld_context *bld, uint opcode,
+ struct nv_value *src, struct nv_basic_block *target,
+ boolean reconverge)
+{
+ struct nv_instruction *nvi;
+
+ if (reconverge)
+ new_instruction(bld->pc, NV_OP_JOINAT)->fixed = 1;
+
+ nvi = new_instruction(bld->pc, opcode);
+ nvi->target = target;
+ nvi->terminator = 1;
+ if (src)
+ bld_src_predicate(bld, nvi, 0, src);
+}
+
+static ubyte
+translate_setcc(unsigned opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_SLT: return NV_CC_LT;
+ case TGSI_OPCODE_SGE: return NV_CC_GE;
+ case TGSI_OPCODE_SEQ: return NV_CC_EQ;
+ case TGSI_OPCODE_SGT: return NV_CC_GT;
+ case TGSI_OPCODE_SLE: return NV_CC_LE;
+ case TGSI_OPCODE_SNE: return NV_CC_NE | NV_CC_U;
+ case TGSI_OPCODE_STR: return NV_CC_TR;
+ case TGSI_OPCODE_SFL: return NV_CC_FL;
+
+ case TGSI_OPCODE_ISLT: return NV_CC_LT;
+ case TGSI_OPCODE_ISGE: return NV_CC_GE;
+ case TGSI_OPCODE_USEQ: return NV_CC_EQ;
+ case TGSI_OPCODE_USGE: return NV_CC_GE;
+ case TGSI_OPCODE_USLT: return NV_CC_LT;
+ case TGSI_OPCODE_USNE: return NV_CC_NE;
+ default:
+ assert(0);
+ return NV_CC_FL;
+ }
+}
+
+static uint
+translate_opcode(uint opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_ABS: return NV_OP_ABS_F32;
+ case TGSI_OPCODE_ADD: return NV_OP_ADD_F32;
+ case TGSI_OPCODE_SUB: return NV_OP_SUB_F32;
+ case TGSI_OPCODE_UADD: return NV_OP_ADD_B32;
+ case TGSI_OPCODE_AND: return NV_OP_AND;
+ case TGSI_OPCODE_EX2: return NV_OP_EX2;
+ case TGSI_OPCODE_CEIL: return NV_OP_CEIL;
+ case TGSI_OPCODE_FLR: return NV_OP_FLOOR;
+ case TGSI_OPCODE_TRUNC: return NV_OP_TRUNC;
+ case TGSI_OPCODE_COS: return NV_OP_COS;
+ case TGSI_OPCODE_SIN: return NV_OP_SIN;
+ case TGSI_OPCODE_DDX: return NV_OP_DFDX;
+ case TGSI_OPCODE_DDY: return NV_OP_DFDY;
+ case TGSI_OPCODE_F2I:
+ case TGSI_OPCODE_F2U:
+ case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_U2F: return NV_OP_CVT;
+ case TGSI_OPCODE_INEG: return NV_OP_NEG_S32;
+ case TGSI_OPCODE_LG2: return NV_OP_LG2;
+ case TGSI_OPCODE_ISHR: return NV_OP_SAR;
+ case TGSI_OPCODE_USHR: return NV_OP_SHR;
+ case TGSI_OPCODE_MAD: return NV_OP_MAD_F32;
+ case TGSI_OPCODE_MAX: return NV_OP_MAX_F32;
+ case TGSI_OPCODE_IMAX: return NV_OP_MAX_S32;
+ case TGSI_OPCODE_UMAX: return NV_OP_MAX_U32;
+ case TGSI_OPCODE_MIN: return NV_OP_MIN_F32;
+ case TGSI_OPCODE_IMIN: return NV_OP_MIN_S32;
+ case TGSI_OPCODE_UMIN: return NV_OP_MIN_U32;
+ case TGSI_OPCODE_MUL: return NV_OP_MUL_F32;
+ case TGSI_OPCODE_UMUL: return NV_OP_MUL_B32;
+ case TGSI_OPCODE_OR: return NV_OP_OR;
+ case TGSI_OPCODE_RCP: return NV_OP_RCP;
+ case TGSI_OPCODE_RSQ: return NV_OP_RSQ;
+ case TGSI_OPCODE_SAD: return NV_OP_SAD;
+ case TGSI_OPCODE_SHL: return NV_OP_SHL;
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SNE: return NV_OP_FSET_F32;
+ case TGSI_OPCODE_ISLT:
+ case TGSI_OPCODE_ISGE: return NV_OP_SET_S32;
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_USNE: return NV_OP_SET_U32;
+ case TGSI_OPCODE_TEX: return NV_OP_TEX;
+ case TGSI_OPCODE_TXP: return NV_OP_TEX;
+ case TGSI_OPCODE_TXB: return NV_OP_TXB;
+ case TGSI_OPCODE_TXL: return NV_OP_TXL;
+ case TGSI_OPCODE_XOR: return NV_OP_XOR;
+ default:
+ return NV_OP_NOP;
+ }
+}
+
+#if 0
+static ubyte
+infer_src_type(unsigned opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_AND:
+ case TGSI_OPCODE_OR:
+ case TGSI_OPCODE_XOR:
+ case TGSI_OPCODE_SAD:
+ case TGSI_OPCODE_U2F:
+ case TGSI_OPCODE_UADD:
+ case TGSI_OPCODE_UDIV:
+ case TGSI_OPCODE_UMOD:
+ case TGSI_OPCODE_UMAD:
+ case TGSI_OPCODE_UMUL:
+ case TGSI_OPCODE_UMAX:
+ case TGSI_OPCODE_UMIN:
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_USNE:
+ case TGSI_OPCODE_USHR:
+ return NV_TYPE_U32;
+ case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_IDIV:
+ case TGSI_OPCODE_IMAX:
+ case TGSI_OPCODE_IMIN:
+ case TGSI_OPCODE_INEG:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_ISLT:
+ return NV_TYPE_S32;
+ default:
+ return NV_TYPE_F32;
+ }
+}
+
+static ubyte
+infer_dst_type(unsigned opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_F2U:
+ case TGSI_OPCODE_AND:
+ case TGSI_OPCODE_OR:
+ case TGSI_OPCODE_XOR:
+ case TGSI_OPCODE_SAD:
+ case TGSI_OPCODE_UADD:
+ case TGSI_OPCODE_UDIV:
+ case TGSI_OPCODE_UMOD:
+ case TGSI_OPCODE_UMAD:
+ case TGSI_OPCODE_UMUL:
+ case TGSI_OPCODE_UMAX:
+ case TGSI_OPCODE_UMIN:
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_USNE:
+ case TGSI_OPCODE_USHR:
+ return NV_TYPE_U32;
+ case TGSI_OPCODE_F2I:
+ case TGSI_OPCODE_IDIV:
+ case TGSI_OPCODE_IMAX:
+ case TGSI_OPCODE_IMIN:
+ case TGSI_OPCODE_INEG:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_ISLT:
+ return NV_TYPE_S32;
+ default:
+ return NV_TYPE_F32;
+ }
+}
+#endif
+
+static void
+emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst,
+ unsigned chan, struct nv_value *res)
+{
+ const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+ struct nv_instruction *nvi;
+ struct nv_value *mem;
+ struct nv_value *ptr = NULL;
+ int idx;
+
+ idx = reg->Register.Index;
+ assert(chan < 4);
+
+ if (reg->Register.Indirect)
+ ptr = FETCH_ADDR(reg->Indirect.Index,
+ tgsi_util_get_src_register_swizzle(&reg->Indirect, 0));
+
+ switch (inst->Instruction.Saturate) {
+ case TGSI_SAT_NONE:
+ break;
+ case TGSI_SAT_ZERO_ONE:
+ res = bld_insn_1(bld, NV_OP_SAT, res);
+ break;
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ res = bld_insn_2(bld, NV_OP_MAX_F32, res, bld_load_imm_f32(bld, -1.0f));
+ res = bld_insn_2(bld, NV_OP_MIN_F32, res, bld_load_imm_f32(bld, 1.0f));
+ break;
+ }
+
+ switch (reg->Register.File) {
+ case TGSI_FILE_OUTPUT:
+ if (!res->insn)
+ res = bld_insn_1(bld, NV_OP_MOV, res);
+
+ if (bld->pc->is_fragprog) {
+ assert(!ptr);
+ STORE_OUTP(idx, chan, res);
+ } else {
+ nvi = new_instruction(bld->pc, NV_OP_EXPORT);
+ mem = new_value(bld->pc, bld->ti->output_file, res->reg.size);
+ nv_reference(bld->pc, nvi, 0, mem);
+ nv_reference(bld->pc, nvi, 1, res);
+ if (!ptr)
+ mem->reg.address = bld->ti->output_loc[idx][chan];
+ else
+ mem->reg.address = 0x80 + idx * 16 + chan * 4;
+ nvi->fixed = 1;
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ assert(idx < BLD_MAX_TEMPS);
+ if (!res->insn)
+ res = bld_insn_1(bld, NV_OP_MOV, res);
+
+ assert(res->reg.file == NV_FILE_GPR);
+ assert(res->insn->bb = bld->pc->current_block);
+
+ if (bld->ti->require_stores)
+ bld_lmem_store(bld, ptr, idx * 4 + chan, res);
+ else
+ STORE_TEMP(idx, chan, res);
+ break;
+ case TGSI_FILE_ADDRESS:
+ assert(idx < BLD_MAX_ADDRS);
+ STORE_ADDR(idx, chan, res);
+ break;
+ }
+}
+
+static INLINE uint32_t
+bld_is_output_written(struct bld_context *bld, int i, int c)
+{
+ if (c < 0)
+ return bld->outputs_written[i / 8] & (0xf << ((i * 4) % 32));
+ return bld->outputs_written[i / 8] & (1 << ((i * 4 + c) % 32));
+}
+
+static void
+bld_append_vp_ucp(struct bld_context *bld)
+{
+ struct nv_value *res[6];
+ struct nv_value *ucp, *vtx, *out;
+ struct nv_instruction *insn;
+ int i, c;
+
+ assert(bld->ti->prog->vp.num_ucps <= 6);
+
+ for (c = 0; c < 4; ++c) {
+ vtx = bld_fetch_global(bld, &bld->ovs[bld->hpos_index][c]);
+
+ for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) {
+ ucp = new_value(bld->pc, NV_FILE_MEM_C(15), 4);
+ ucp->reg.address = i * 16 + c * 4;
+
+ if (c == 0)
+ res[i] = bld_insn_2(bld, NV_OP_MUL_F32, vtx, ucp);
+ else
+ res[i] = bld_insn_3(bld, NV_OP_MAD_F32, vtx, ucp, res[i]);
+ }
+ }
+
+ for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) {
+ (out = new_value(bld->pc, NV_FILE_MEM_V, 4))->reg.address = 0x2c0 + i * 4;
+ (insn = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1;
+ nv_reference(bld->pc, insn, 0, out);
+ nv_reference(bld->pc, insn, 1, res[i]);
+ }
+}
+
+static void
+bld_export_fp_outputs(struct bld_context *bld)
+{
+ struct nv_value *vals[4];
+ struct nv_instruction *nvi;
+ int i, c, n;
+
+ for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) {
+ if (!bld_is_output_written(bld, i, -1))
+ continue;
+ for (n = 0, c = 0; c < 4; ++c) {
+ if (!bld_is_output_written(bld, i, c))
+ continue;
+ vals[n] = bld_fetch_global(bld, &bld->ovs[i][c]);
+ assert(vals[n]);
+ vals[n] = bld_insn_1(bld, NV_OP_MOV, vals[n]);
+ vals[n++]->reg.id = bld->ti->output_loc[i][c];
+ }
+ assert(n);
+
+ (nvi = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1;
+ for (c = 0; c < n; ++c)
+ nv_reference(bld->pc, nvi, c, vals[c]);
+ }
+}
+
+static void
+bld_new_block(struct bld_context *bld, struct nv_basic_block *b)
+{
+ int i, c;
+
+ bld->pc->current_block = b;
+
+ for (i = 0; i < 4; ++i)
+ bld->saved_addr[i][0] = NULL;
+ for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
+ for (c = 0; c < 4; ++c)
+ bld->saved_inputs[i][c] = NULL;
+
+ bld->out_kind = CFG_EDGE_FORWARD;
+}
+
+static struct nv_value *
+bld_get_saved_input(struct bld_context *bld, unsigned i, unsigned c)
+{
+ if (bld->saved_inputs[i][c])
+ return bld->saved_inputs[i][c];
+ return NULL;
+}
+
+static struct nv_value *
+bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val)
+{
+ unsigned cent = mode & NVC0_INTERP_CENTROID;
+
+ mode &= ~NVC0_INTERP_CENTROID;
+
+ if (val->reg.address == 0x3fc) {
+ /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */
+ val = bld_insn_1(bld, NV_OP_LINTERP, val);
+ val->insn->flat = 1;
+ val = bld_insn_2(bld, NV_OP_SHL, val, bld_imm_u32(bld, 31));
+ val = bld_insn_2(bld, NV_OP_XOR, val, bld_imm_f32(bld, -1.0f));
+ return val;
+ } else
+ if (mode == NVC0_INTERP_PERSPECTIVE) {
+ val = bld_insn_2(bld, NV_OP_PINTERP, val, bld->frag_coord[3]);
+ } else {
+ val = bld_insn_1(bld, NV_OP_LINTERP, val);
+ }
+
+ val->insn->flat = mode == NVC0_INTERP_FLAT ? 1 : 0;
+ val->insn->centroid = cent ? 1 : 0;
+ return val;
+}
+
+static struct nv_value *
+emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn,
+ const unsigned s, const unsigned chan)
+{
+ const struct tgsi_full_src_register *src = &insn->Src[s];
+ struct nv_value *res = NULL;
+ struct nv_value *ptr = NULL;
+ int idx, ind_idx, dim_idx;
+ unsigned swz, ind_swz, sgn;
+
+ idx = src->Register.Index;
+ swz = tgsi_util_get_full_src_register_swizzle(src, chan);
+
+ if (src->Register.Indirect) {
+ ind_idx = src->Indirect.Index;
+ ind_swz = tgsi_util_get_src_register_swizzle(&src->Indirect, 0);
+
+ ptr = FETCH_ADDR(ind_idx, ind_swz);
+ }
+
+ if (src->Register.Dimension)
+ dim_idx = src->Dimension.Index;
+ else
+ dim_idx = 0;
+
+ switch (src->Register.File) {
+ case TGSI_FILE_CONSTANT:
+ assert(dim_idx < 14);
+ res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), 4);
+ res->reg.address = idx * 16 + swz * 4;
+ res = bld_insn_1(bld, NV_OP_LD, res);
+ if (ptr)
+ bld_src_pointer(bld, res->insn, 1, ptr);
+ break;
+ case TGSI_FILE_IMMEDIATE: /* XXX: type for MOV TEMP[0], -IMM[0] */
+ assert(idx < bld->ti->immd32_nr);
+ res = bld_load_imm_u32(bld, bld->ti->immd32[idx * 4 + swz]);
+ break;
+ case TGSI_FILE_INPUT:
+ assert(!src->Register.Dimension);
+ if (!ptr) {
+ res = bld_get_saved_input(bld, idx, swz);
+ if (res)
+ return res;
+ }
+ res = new_value(bld->pc, bld->ti->input_file, 4);
+ if (ptr)
+ res->reg.address = 0x80 + idx * 16 + swz * 4;
+ else
+ res->reg.address = bld->ti->input_loc[idx][swz];
+
+ if (bld->pc->is_fragprog)
+ res = bld_interp(bld, bld->ti->interp_mode[idx], res);
+ else
+ res = bld_insn_1(bld, NV_OP_VFETCH, res);
+
+ if (ptr)
+ bld_src_pointer(bld, res->insn, res->insn->src[1] ? 2 : 1, ptr);
+ else
+ bld->saved_inputs[idx][swz] = res;
+ break;
+ case TGSI_FILE_TEMPORARY:
+ if (bld->ti->require_stores)
+ res = bld_lmem_load(bld, ptr, idx * 4 + swz);
+ else
+ res = bld_fetch_global(bld, &bld->tvs[idx][swz]);
+ break;
+ case TGSI_FILE_ADDRESS:
+ res = bld_fetch_global(bld, &bld->avs[idx][swz]);
+ break;
+ case TGSI_FILE_PREDICATE:
+ res = bld_fetch_global(bld, &bld->pvs[idx][swz]);
+ break;
+ default:
+ NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File);
+ abort();
+ break;
+ }
+ if (!res)
+ return bld_undef(bld, NV_FILE_GPR);
+
+ sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
+
+ switch (sgn) {
+ case TGSI_UTIL_SIGN_KEEP:
+ break;
+ case TGSI_UTIL_SIGN_CLEAR:
+ res = bld_insn_1(bld, NV_OP_ABS_F32, res);
+ break;
+ case TGSI_UTIL_SIGN_TOGGLE:
+ res = bld_insn_1(bld, NV_OP_NEG_F32, res);
+ break;
+ case TGSI_UTIL_SIGN_SET:
+ res = bld_insn_1(bld, NV_OP_ABS_F32, res);
+ res = bld_insn_1(bld, NV_OP_NEG_F32, res);
+ break;
+ default:
+ NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
+ abort();
+ break;
+ }
+
+ return res;
+}
+
+static void
+bld_lit(struct bld_context *bld, struct nv_value *dst0[4],
+ const struct tgsi_full_instruction *insn)
+{
+ struct nv_value *val0 = NULL;
+ unsigned mask = insn->Dst[0].Register.WriteMask;
+
+ if (mask & ((1 << 0) | (1 << 3)))
+ dst0[3] = dst0[0] = bld_load_imm_f32(bld, 1.0f);
+
+ if (mask & (3 << 1)) {
+ val0 = bld_insn_2(bld, NV_OP_MAX, emit_fetch(bld, insn, 0, 0), bld->zero);
+ if (mask & (1 << 1))
+ dst0[1] = val0;
+ }
+
+ if (mask & (1 << 2)) {
+ struct nv_value *val1, *val3, *src1, *src3, *pred;
+ struct nv_value *pos128 = bld_load_imm_f32(bld, 127.999999f);
+ struct nv_value *neg128 = bld_load_imm_f32(bld, -127.999999f);
+
+ src1 = emit_fetch(bld, insn, 0, 1);
+ src3 = emit_fetch(bld, insn, 0, 3);
+
+ pred = bld_setp(bld, NV_OP_SET_F32, NV_CC_LE, val0, bld->zero);
+
+ val1 = bld_insn_2(bld, NV_OP_MAX_F32, src1, bld->zero);
+ val3 = bld_insn_2(bld, NV_OP_MAX_F32, src3, neg128);
+ val3 = bld_insn_2(bld, NV_OP_MIN_F32, val3, pos128);
+ val3 = bld_pow(bld, val1, val3);
+
+ dst0[2] = bld_insn_1(bld, NV_OP_MOV, bld->zero);
+ bld_src_predicate(bld, dst0[2]->insn, 1, pred);
+
+ dst0[2] = bld_insn_2(bld, NV_OP_SELECT, val3, dst0[2]);
+ }
+}
+
+static INLINE void
+describe_texture_target(unsigned target, int *dim,
+ int *array, int *cube, int *shadow)
+{
+ *array = *cube = *shadow = 0;
+
+ switch (target) {
+ case TGSI_TEXTURE_1D:
+ *dim = 1;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ *dim = *shadow = 1;
+ break;
+ case TGSI_TEXTURE_UNKNOWN:
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ *dim = 2;
+ break;
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ *dim = 2;
+ *shadow = 1;
+ break;
+ case TGSI_TEXTURE_3D:
+ *dim = 3;
+ break;
+ case TGSI_TEXTURE_CUBE:
+ *dim = 2;
+ *cube = 1;
+ break;
+ /*
+ case TGSI_TEXTURE_CUBE_ARRAY:
+ *dim = 2;
+ *cube = *array = 1;
+ break;
+ case TGSI_TEXTURE_1D_ARRAY:
+ *dim = *array = 1;
+ break;
+ case TGSI_TEXTURE_2D_ARRAY:
+ *dim = 2;
+ *array = 1;
+ break;
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ *dim = *array = *shadow = 1;
+ break;
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ *dim = 2;
+ *array = *shadow = 1;
+ break;
+ case TGSI_TEXTURE_CUBE_ARRAY:
+ *dim = 2;
+ *array = *cube = 1;
+ break;
+ */
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static struct nv_value *
+bld_clone(struct bld_context *bld, struct nv_instruction *nvi)
+{
+ struct nv_instruction *dupi = new_instruction(bld->pc, nvi->opcode);
+ struct nv_instruction *next, *prev;
+ int c;
+
+ next = dupi->next;
+ prev = dupi->prev;
+
+ *dupi = *nvi;
+
+ dupi->next = next;
+ dupi->prev = prev;
+
+ for (c = 0; c < 5 && nvi->def[c]; ++c)
+ bld_def(dupi, c, new_value_like(bld->pc, nvi->def[c]));
+
+ for (c = 0; c < 6 && nvi->src[c]; ++c) {
+ dupi->src[c] = NULL;
+ nv_reference(bld->pc, dupi, c, nvi->src[c]->value);
+ }
+
+ return dupi->def[0];
+}
+
+/* NOTE: proj(t0) = (t0 / w) / (tc3 / w) = tc0 / tc2 handled by optimizer */
+static void
+load_proj_tex_coords(struct bld_context *bld,
+ struct nv_value *t[4], int dim, int shadow,
+ const struct tgsi_full_instruction *insn)
+{
+ int c;
+ unsigned mask = (1 << dim) - 1;
+
+ if (shadow)
+ mask |= 4; /* depth comparison value */
+
+ t[3] = emit_fetch(bld, insn, 0, 3);
+ if (t[3]->insn->opcode == NV_OP_PINTERP) {
+ t[3] = bld_clone(bld, t[3]->insn);
+ t[3]->insn->opcode = NV_OP_LINTERP;
+ nv_reference(bld->pc, t[3]->insn, 1, NULL);
+ }
+ t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]);
+
+ for (c = 0; c < 4; ++c) {
+ if (!(mask & (1 << c)))
+ continue;
+ t[c] = emit_fetch(bld, insn, 0, c);
+
+ if (t[c]->insn->opcode != NV_OP_PINTERP)
+ continue;
+ mask &= ~(1 << c);
+
+ t[c] = bld_clone(bld, t[c]->insn);
+ nv_reference(bld->pc, t[c]->insn, 1, t[3]);
+ }
+ if (mask == 0)
+ return;
+
+ t[3] = emit_fetch(bld, insn, 0, 3);
+ t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]);
+
+ for (c = 0; c < 4; ++c)
+ if (mask & (1 << c))
+ t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], t[3]);
+}
+
+/* For a quad of threads / top left, top right, bottom left, bottom right
+ * pixels, do a different operation, and take src0 from a specific thread.
+ */
+#define QOP_ADD 0
+#define QOP_SUBR 1
+#define QOP_SUB 2
+#define QOP_MOV1 3
+
+#define QOP(a, b, c, d) \
+ ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6))
+
+static INLINE struct nv_value *
+bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane,
+ struct nv_value *src1, boolean wp)
+{
+ struct nv_value *val = bld_insn_2(bld, NV_OP_QUADOP, src0, src1);
+ val->insn->lanes = lane;
+ val->insn->quadop = qop;
+ if (wp) {
+ assert(!"quadop predicate write");
+ }
+ return val;
+}
+
+/* order of TGSI operands: x y z layer shadow lod/bias */
+/* order of native operands: layer x y z | lod/bias shadow */
+static struct nv_instruction *
+emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc,
+ struct nv_value *dst[4], struct nv_value *arg[4],
+ int dim, int array, int cube, int shadow)
+{
+ struct nv_value *src[4];
+ struct nv_instruction *nvi, *bnd;
+ int c;
+ int s = 0;
+ boolean lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL;
+
+ if (array)
+ arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]);
+
+ /* ensure that all inputs reside in a GPR */
+ for (c = 0; c < dim + array + cube + shadow; ++c)
+ (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1;
+
+ /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */
+
+ bnd = new_instruction(bld->pc, NV_OP_BIND);
+ if (array) {
+ src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
+ bld_def(bnd, s, src[s]);
+ nv_reference(bld->pc, bnd, s++, arg[dim + cube]);
+ }
+ for (c = 0; c < dim + cube; ++c, ++s) {
+ src[s] = bld_def(bnd, s, new_value(bld->pc, NV_FILE_GPR, 4));
+ nv_reference(bld->pc, bnd, s, arg[c]);
+ }
+
+ if (shadow || lodbias) {
+ bnd = new_instruction(bld->pc, NV_OP_BIND);
+
+ if (lodbias) {
+ src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
+ bld_def(bnd, 0, src[s++]);
+ nv_reference(bld->pc, bnd, 0, arg[dim + cube + array + shadow]);
+ }
+ if (shadow) {
+ src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
+ bld_def(bnd, lodbias, src[s++]);
+ nv_reference(bld->pc, bnd, lodbias, arg[dim + cube + array]);
+ }
+ }
+
+ nvi = new_instruction(bld->pc, opcode);
+ for (c = 0; c < 4; ++c)
+ dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, 4));
+ for (c = 0; c < s; ++c)
+ nv_reference(bld->pc, nvi, c, src[c]);
+
+ nvi->ext.tex.t = tic;
+ nvi->ext.tex.s = tsc;
+ nvi->tex_mask = 0xf;
+ nvi->tex_cube = cube;
+ nvi->tex_dim = dim;
+ nvi->tex_cube = cube;
+ nvi->tex_shadow = shadow;
+ nvi->tex_live = 0;
+
+ return nvi;
+}
+
+/*
+static boolean
+bld_is_constant(struct nv_value *val)
+{
+ if (val->reg.file == NV_FILE_IMM)
+ return TRUE;
+ return val->insn && nvCG_find_constant(val->insn->src[0]);
+}
+*/
+
+static void
+bld_tex(struct bld_context *bld, struct nv_value *dst0[4],
+ const struct tgsi_full_instruction *insn)
+{
+ struct nv_value *t[4], *s[3];
+ uint opcode = translate_opcode(insn->Instruction.Opcode);
+ int c, dim, array, cube, shadow;
+ const int lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL;
+ const int tic = insn->Src[1].Register.Index;
+ const int tsc = tic;
+
+ describe_texture_target(insn->Texture.Texture, &dim, &array, &cube, &shadow);
+
+ assert(dim + array + shadow + lodbias <= 5);
+
+ if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP)
+ load_proj_tex_coords(bld, t, dim, shadow, insn);
+ else {
+ for (c = 0; c < dim + cube + array; ++c)
+ t[c] = emit_fetch(bld, insn, 0, c);
+ if (shadow)
+ t[c] = emit_fetch(bld, insn, 0, MAX2(c, 2));
+ }
+
+ if (cube) {
+ for (c = 0; c < 3; ++c)
+ s[c] = bld_insn_1(bld, NV_OP_ABS_F32, t[c]);
+
+ s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[1]);
+ s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[2]);
+ s[0] = bld_insn_1(bld, NV_OP_RCP, s[0]);
+
+ for (c = 0; c < 3; ++c)
+ t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], s[0]);
+ }
+
+ if (lodbias)
+ t[dim + cube + array + shadow] = emit_fetch(bld, insn, 0, 3);
+
+ emit_tex(bld, opcode, tic, tsc, dst0, t, dim, array, cube, shadow);
+}
+
+static INLINE struct nv_value *
+bld_dot(struct bld_context *bld, const struct tgsi_full_instruction *insn,
+ int n)
+{
+ struct nv_value *dotp, *src0, *src1;
+ int c;
+
+ src0 = emit_fetch(bld, insn, 0, 0);
+ src1 = emit_fetch(bld, insn, 1, 0);
+ dotp = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
+
+ for (c = 1; c < n; ++c) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ dotp = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dotp);
+ }
+ return dotp;
+}
+
+#define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
+ for (chan = 0; chan < 4; ++chan) \
+ if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
+
+static void
+bld_instruction(struct bld_context *bld,
+ const struct tgsi_full_instruction *insn)
+{
+ struct nv_value *src0;
+ struct nv_value *src1;
+ struct nv_value *src2;
+ struct nv_value *dst0[4] = { NULL };
+ struct nv_value *temp;
+ int c;
+ uint opcode = translate_opcode(insn->Instruction.Opcode);
+ uint8_t mask = insn->Dst[0].Register.WriteMask;
+
+#ifdef NOUVEAU_DEBUG
+ debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1);
+#endif
+
+ switch (insn->Instruction.Opcode) {
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_MAX:
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_MUL:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ dst0[c] = bld_insn_2(bld, opcode, src0, src1);
+ }
+ break;
+ case TGSI_OPCODE_ARL:
+ src1 = bld_imm_u32(bld, 4);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src0 = bld_insn_1(bld, NV_OP_FLOOR, src0);
+ src0->insn->ext.cvt.d = NV_TYPE_S32;
+ src0->insn->ext.cvt.s = NV_TYPE_F32;
+ dst0[c] = bld_insn_2(bld, NV_OP_SHL, src0, src1);
+ }
+ break;
+ case TGSI_OPCODE_CMP:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src0 = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src0, bld->zero);
+ src1 = emit_fetch(bld, insn, 1, c);
+ src2 = emit_fetch(bld, insn, 2, c);
+ dst0[c] = bld_insn_3(bld, NV_OP_SELP, src1, src2, src0);
+ }
+ break;
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_SIN:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
+ if (insn->Dst[0].Register.WriteMask & 7)
+ temp = bld_insn_1(bld, opcode, temp);
+ for (c = 0; c < 3; ++c)
+ if (insn->Dst[0].Register.WriteMask & (1 << c))
+ dst0[c] = temp;
+ if (!(insn->Dst[0].Register.WriteMask & (1 << 3)))
+ break;
+ src0 = emit_fetch(bld, insn, 0, 3);
+ temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
+ dst0[3] = bld_insn_1(bld, opcode, temp);
+ break;
+ case TGSI_OPCODE_DP2:
+ temp = bld_dot(bld, insn, 2);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_DP3:
+ temp = bld_dot(bld, insn, 3);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_DP4:
+ temp = bld_dot(bld, insn, 4);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_DPH:
+ src0 = bld_dot(bld, insn, 3);
+ src1 = emit_fetch(bld, insn, 1, 3);
+ temp = bld_insn_2(bld, NV_OP_ADD_F32, src0, src1);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_DST:
+ if (insn->Dst[0].Register.WriteMask & 1)
+ dst0[0] = bld_imm_f32(bld, 1.0f);
+ if (insn->Dst[0].Register.WriteMask & 2) {
+ src0 = emit_fetch(bld, insn, 0, 1);
+ src1 = emit_fetch(bld, insn, 1, 1);
+ dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
+ }
+ if (insn->Dst[0].Register.WriteMask & 4)
+ dst0[2] = emit_fetch(bld, insn, 0, 2);
+ if (insn->Dst[0].Register.WriteMask & 8)
+ dst0[3] = emit_fetch(bld, insn, 1, 3);
+ break;
+ case TGSI_OPCODE_EXP:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ temp = bld_insn_1(bld, NV_OP_FLOOR, src0);
+
+ if (insn->Dst[0].Register.WriteMask & 2)
+ dst0[1] = bld_insn_2(bld, NV_OP_SUB_F32, src0, temp);
+ if (insn->Dst[0].Register.WriteMask & 1) {
+ temp = bld_insn_1(bld, NV_OP_PREEX2, temp);
+ dst0[0] = bld_insn_1(bld, NV_OP_EX2, temp);
+ }
+ if (insn->Dst[0].Register.WriteMask & 4) {
+ temp = bld_insn_1(bld, NV_OP_PREEX2, src0);
+ dst0[2] = bld_insn_1(bld, NV_OP_EX2, temp);
+ }
+ if (insn->Dst[0].Register.WriteMask & 8)
+ dst0[3] = bld_imm_f32(bld, 1.0f);
+ break;
+ case TGSI_OPCODE_EX2:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ temp = bld_insn_1(bld, NV_OP_PREEX2, src0);
+ temp = bld_insn_1(bld, NV_OP_EX2, temp);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_FRC:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ dst0[c] = bld_insn_1(bld, NV_OP_FLOOR, src0);
+ dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, dst0[c]);
+ }
+ break;
+ case TGSI_OPCODE_KIL:
+ for (c = 0; c < 4; ++c)
+ bld_kil(bld, emit_fetch(bld, insn, 0, c));
+ break;
+ case TGSI_OPCODE_KILP:
+ (new_instruction(bld->pc, NV_OP_KIL))->fixed = 1;
+ break;
+ case TGSI_OPCODE_IF:
+ {
+ struct nv_basic_block *b = new_basic_block(bld->pc);
+
+ assert(bld->cond_lvl < BLD_MAX_COND_NESTING);
+
+ nvc0_bblock_attach(bld->pc->current_block, b, CFG_EDGE_FORWARD);
+
+ bld->join_bb[bld->cond_lvl] = bld->pc->current_block;
+ bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
+
+ src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ,
+ emit_fetch(bld, insn, 0, 0), bld->zero);
+
+ bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0));
+
+ ++bld->cond_lvl;
+ bld_new_block(bld, b);
+ }
+ break;
+ case TGSI_OPCODE_ELSE:
+ {
+ struct nv_basic_block *b = new_basic_block(bld->pc);
+
+ --bld->cond_lvl;
+ nvc0_bblock_attach(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
+
+ bld->cond_bb[bld->cond_lvl]->exit->target = b;
+ bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
+
+ new_instruction(bld->pc, NV_OP_BRA)->terminator = 1;
+
+ ++bld->cond_lvl;
+ bld_new_block(bld, b);
+ }
+ break;
+ case TGSI_OPCODE_ENDIF:
+ {
+ struct nv_basic_block *b = new_basic_block(bld->pc);
+
+ --bld->cond_lvl;
+ nvc0_bblock_attach(bld->pc->current_block, b, bld->out_kind);
+ nvc0_bblock_attach(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
+
+ bld->cond_bb[bld->cond_lvl]->exit->target = b;
+
+ bld_new_block(bld, b);
+
+ if (!bld->cond_lvl && bld->join_bb[bld->cond_lvl]) {
+ bld->join_bb[bld->cond_lvl]->exit->prev->target = b;
+ new_instruction(bld->pc, NV_OP_JOIN)->join = 1;
+ }
+ }
+ break;
+ case TGSI_OPCODE_BGNLOOP:
+ {
+ struct nv_basic_block *bl = new_basic_block(bld->pc);
+ struct nv_basic_block *bb = new_basic_block(bld->pc);
+
+ assert(bld->loop_lvl < BLD_MAX_LOOP_NESTING);
+
+ bld->loop_bb[bld->loop_lvl] = bl;
+ bld->brkt_bb[bld->loop_lvl] = bb;
+
+ nvc0_bblock_attach(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER);
+
+ bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]);
+
+ if (bld->loop_lvl == bld->pc->loop_nesting_bound)
+ bld->pc->loop_nesting_bound++;
+
+ bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl);
+ bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl);
+ bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl);
+ }
+ break;
+ case TGSI_OPCODE_BRK:
+ {
+ struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1];
+
+ bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
+
+ if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */
+ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE);
+
+ bld->out_kind = CFG_EDGE_FAKE;
+ }
+ break;
+ case TGSI_OPCODE_CONT:
+ {
+ struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
+
+ bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
+
+ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
+
+ if ((bb = bld->join_bb[bld->cond_lvl - 1])) {
+ bld->join_bb[bld->cond_lvl - 1] = NULL;
+ nvc0_insn_delete(bb->exit->prev);
+ }
+ bld->out_kind = CFG_EDGE_FAKE;
+ }
+ break;
+ case TGSI_OPCODE_ENDLOOP:
+ {
+ struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
+
+ bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
+
+ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
+
+ bld_loop_end(bld, bb); /* replace loop-side operand of the phis */
+
+ bld_new_block(bld, bld->brkt_bb[--bld->loop_lvl]);
+ }
+ break;
+ case TGSI_OPCODE_ABS:
+ case TGSI_OPCODE_CEIL:
+ case TGSI_OPCODE_FLR:
+ case TGSI_OPCODE_TRUNC:
+ case TGSI_OPCODE_DDX:
+ case TGSI_OPCODE_DDY:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ dst0[c] = bld_insn_1(bld, opcode, src0);
+ }
+ break;
+ case TGSI_OPCODE_LIT:
+ bld_lit(bld, dst0, insn);
+ break;
+ case TGSI_OPCODE_LRP:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ src2 = emit_fetch(bld, insn, 2, c);
+ dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2);
+ dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, dst0[c], src0, src2);
+ }
+ break;
+ case TGSI_OPCODE_MOV:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = emit_fetch(bld, insn, 0, c);
+ break;
+ case TGSI_OPCODE_MAD:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ src2 = emit_fetch(bld, insn, 2, c);
+ dst0[c] = bld_insn_3(bld, opcode, src0, src1, src2);
+ }
+ break;
+ case TGSI_OPCODE_POW:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ src1 = emit_fetch(bld, insn, 1, 0);
+ temp = bld_pow(bld, src0, src1);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_LOG:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ src0 = bld_insn_1(bld, NV_OP_ABS_F32, src0);
+ temp = bld_insn_1(bld, NV_OP_LG2, src0);
+ dst0[2] = temp;
+ if (insn->Dst[0].Register.WriteMask & 3) {
+ temp = bld_insn_1(bld, NV_OP_FLOOR, temp);
+ dst0[0] = temp;
+ }
+ if (insn->Dst[0].Register.WriteMask & 2) {
+ temp = bld_insn_1(bld, NV_OP_PREEX2, temp);
+ temp = bld_insn_1(bld, NV_OP_EX2, temp);
+ temp = bld_insn_1(bld, NV_OP_RCP, temp);
+ dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, temp);
+ }
+ if (insn->Dst[0].Register.WriteMask & 8)
+ dst0[3] = bld_imm_f32(bld, 1.0f);
+ break;
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_LG2:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ temp = bld_insn_1(bld, opcode, src0);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_RSQ:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ temp = bld_insn_1(bld, NV_OP_ABS_F32, src0);
+ temp = bld_insn_1(bld, NV_OP_RSQ, temp);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_ISLT:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_USNE:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ dst0[c] = bld_insn_2(bld, opcode, src0, src1);
+ dst0[c]->insn->set_cond = translate_setcc(insn->Instruction.Opcode);
+ }
+ break;
+ case TGSI_OPCODE_SCS:
+ if (insn->Dst[0].Register.WriteMask & 0x3) {
+ src0 = emit_fetch(bld, insn, 0, 0);
+ temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
+ if (insn->Dst[0].Register.WriteMask & 0x1)
+ dst0[0] = bld_insn_1(bld, NV_OP_COS, temp);
+ if (insn->Dst[0].Register.WriteMask & 0x2)
+ dst0[1] = bld_insn_1(bld, NV_OP_SIN, temp);
+ }
+ if (insn->Dst[0].Register.WriteMask & 0x4)
+ dst0[2] = bld_imm_f32(bld, 0.0f);
+ if (insn->Dst[0].Register.WriteMask & 0x8)
+ dst0[3] = bld_imm_f32(bld, 1.0f);
+ break;
+ case TGSI_OPCODE_SSG:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { /* XXX: set lt, set gt, sub */
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = bld_setp(bld, NV_OP_SET_F32, NV_CC_EQ, src0, bld->zero);
+ temp = bld_insn_2(bld, NV_OP_AND, src0, bld_imm_u32(bld, 0x80000000));
+ temp = bld_insn_2(bld, NV_OP_OR, temp, bld_imm_f32(bld, 1.0f));
+ dst0[c] = bld_insn_1(bld, NV_OP_MOV, temp);
+ bld_src_predicate(bld, dst0[c]->insn, 1, src1);
+ }
+ break;
+ case TGSI_OPCODE_SUB:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, src1);
+ }
+ break;
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXL:
+ case TGSI_OPCODE_TXP:
+ bld_tex(bld, dst0, insn);
+ break;
+ case TGSI_OPCODE_XPD:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ if (c == 3) {
+ dst0[3] = bld_imm_f32(bld, 1.0f);
+ break;
+ }
+ src0 = emit_fetch(bld, insn, 1, (c + 1) % 3);
+ src1 = emit_fetch(bld, insn, 0, (c + 2) % 3);
+ dst0[c] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
+
+ src0 = emit_fetch(bld, insn, 0, (c + 1) % 3);
+ src1 = emit_fetch(bld, insn, 1, (c + 2) % 3);
+ dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dst0[c]);
+
+ dst0[c]->insn->src[2]->mod ^= NV_MOD_NEG;
+ }
+ break;
+ case TGSI_OPCODE_RET:
+ (new_instruction(bld->pc, NV_OP_RET))->fixed = 1;
+ break;
+ case TGSI_OPCODE_END:
+ /* VP outputs are exported in-place as scalars, optimization later */
+ if (bld->pc->is_fragprog)
+ bld_export_fp_outputs(bld);
+ if (bld->ti->append_ucp)
+ bld_append_vp_ucp(bld);
+ return;
+ default:
+ NOUVEAU_ERR("unhandled opcode %u\n", insn->Instruction.Opcode);
+ abort();
+ return;
+ }
+
+ if (insn->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
+ !bld->pc->is_fragprog) {
+ struct nv_instruction *mi = NULL;
+ uint size;
+
+ if (bld->ti->append_ucp) {
+ if (bld->ti->output_loc[insn->Dst[0].Register.Index][0] == 0x70) {
+ bld->hpos_index = insn->Dst[0].Register.Index;
+ for (c = 0; c < 4; ++c)
+ if (mask & (1 << c))
+ STORE_OUTP(insn->Dst[0].Register.Index, c, dst0[c]);
+ }
+ }
+
+ for (c = 0; c < 4; ++c)
+ if ((mask & (1 << c)) &&
+ ((dst0[c]->reg.file == NV_FILE_IMM) ||
+ (dst0[c]->reg.id == 63 && dst0[c]->reg.file == NV_FILE_GPR)))
+ dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]);
+
+ c = 0;
+ if ((mask & 0x3) == 0x3) {
+ mask &= ~0x3;
+ size = 8;
+ mi = bld_insn_2(bld, NV_OP_BIND, dst0[0], dst0[1])->insn;
+ }
+ if ((mask & 0xc) == 0xc) {
+ mask &= ~0xc;
+ if (mi) {
+ size = 16;
+ nv_reference(bld->pc, mi, 2, dst0[2]);
+ nv_reference(bld->pc, mi, 3, dst0[3]);
+ } else {
+ c = 2;
+ size = 8;
+ mi = bld_insn_2(bld, NV_OP_BIND, dst0[2], dst0[3])->insn;
+ }
+ } else
+ if (mi && (mask & 0x4)) {
+ size = 12;
+ mask &= ~0x4;
+ nv_reference(bld->pc, mi, 2, dst0[2]);
+ }
+
+ if (mi) {
+ struct nv_instruction *ex = new_instruction(bld->pc, NV_OP_EXPORT);
+ int s;
+
+ nv_reference(bld->pc, ex, 0, new_value(bld->pc, NV_FILE_MEM_V, 4));
+ nv_reference(bld->pc, ex, 1, mi->def[0]);
+
+ for (s = 1; s < size / 4; ++s) {
+ bld_def(mi, s, new_value(bld->pc, NV_FILE_GPR, 4));
+ nv_reference(bld->pc, ex, s + 1, mi->def[s]);
+ }
+
+ ex->fixed = 1;
+ ex->src[0]->value->reg.size = size;
+ ex->src[0]->value->reg.address =
+ bld->ti->output_loc[insn->Dst[0].Register.Index][c];
+ }
+ }
+
+ for (c = 0; c < 4; ++c)
+ if (mask & (1 << c))
+ emit_store(bld, insn, c, dst0[c]);
+}
+
+static INLINE void
+bld_free_registers(struct bld_register *base, int n)
+{
+ int i, c;
+
+ for (i = 0; i < n; ++i)
+ for (c = 0; c < 4; ++c)
+ util_dynarray_fini(&base[i * 4 + c].vals);
+}
+
+int
+nvc0_tgsi_to_nc(struct nv_pc *pc, struct nvc0_translation_info *ti)
+{
+ struct bld_context *bld = CALLOC_STRUCT(bld_context);
+ unsigned ip;
+
+ pc->root[0] = pc->current_block = new_basic_block(pc);
+
+ bld->pc = pc;
+ bld->ti = ti;
+
+ pc->loop_nesting_bound = 1;
+
+ bld->zero = new_value(pc, NV_FILE_GPR, 4);
+ bld->zero->reg.id = 63;
+
+ if (pc->is_fragprog) {
+ struct nv_value *mem = new_value(pc, NV_FILE_MEM_V, 4);
+ mem->reg.address = 0x7c;
+
+ bld->frag_coord[3] = bld_insn_1(bld, NV_OP_LINTERP, mem);
+ bld->frag_coord[3] = bld_insn_1(bld, NV_OP_RCP, bld->frag_coord[3]);
+ }
+
+ for (ip = 0; ip < ti->num_insns; ++ip)
+ bld_instruction(bld, &ti->insns[ip]);
+
+ bld_free_registers(&bld->tvs[0][0], BLD_MAX_TEMPS);
+ bld_free_registers(&bld->avs[0][0], BLD_MAX_ADDRS);
+ bld_free_registers(&bld->pvs[0][0], BLD_MAX_PREDS);
+ bld_free_registers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
+
+ FREE(bld);
+ return 0;
+}
+
+/* If a variable is assigned in a loop, replace all references to the value
+ * from outside the loop with a phi value.
+ */
+static void
+bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b,
+ struct nv_value *old_val,
+ struct nv_value *new_val)
+{
+ struct nv_instruction *nvi;
+
+ for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = nvi->next) {
+ int s;
+ for (s = 0; s < 6 && nvi->src[s]; ++s)
+ if (nvi->src[s]->value == old_val)
+ nv_reference(pc, nvi, s, new_val);
+ }
+
+ b->pass_seq = pc->pass_seq;
+
+ if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq)
+ bld_replace_value(pc, b->out[0], old_val, new_val);
+
+ if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq)
+ bld_replace_value(pc, b->out[1], old_val, new_val);
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c
new file mode 100644
index 0000000000..286b382f58
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_transfer.c
@@ -0,0 +1,381 @@
+
+#include "util/u_format.h"
+
+#include "nvc0_context.h"
+#include "nvc0_transfer.h"
+
+#include "nv50_defs.xml.h"
+
+struct nvc0_transfer {
+ struct pipe_transfer base;
+ struct nvc0_m2mf_rect rect[2];
+ uint32_t nblocksx;
+ uint32_t nblocksy;
+};
+
+static void
+nvc0_m2mf_transfer_rect(struct pipe_screen *pscreen,
+ const struct nvc0_m2mf_rect *dst,
+ const struct nvc0_m2mf_rect *src,
+ uint32_t nblocksx, uint32_t nblocksy)
+{
+ struct nouveau_channel *chan = nouveau_screen(pscreen)->channel;
+ const int cpp = dst->cpp;
+ uint32_t src_ofst = src->base;
+ uint32_t dst_ofst = dst->base;
+ uint32_t height = nblocksy;
+ uint32_t sy = src->y;
+ uint32_t dy = dst->y;
+ uint32_t exec = (1 << 20);
+
+ assert(dst->cpp == src->cpp);
+
+ if (nouveau_bo_tile_layout(src->bo)) {
+ BEGIN_RING(chan, RING_MF(TILING_MODE_IN), 5);
+ OUT_RING (chan, src->tile_mode);
+ OUT_RING (chan, src->width * cpp);
+ OUT_RING (chan, src->height);
+ OUT_RING (chan, src->depth);
+ OUT_RING (chan, src->z);
+ } else {
+ src_ofst += src->y * src->pitch + src->x * cpp;
+
+ BEGIN_RING(chan, RING_MF(PITCH_IN), 1);
+ OUT_RING (chan, src->width * cpp);
+
+ exec |= NVC0_M2MF_EXEC_LINEAR_IN;
+ }
+
+ if (nouveau_bo_tile_layout(dst->bo)) {
+ BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5);
+ OUT_RING (chan, dst->tile_mode);
+ OUT_RING (chan, dst->width * cpp);
+ OUT_RING (chan, dst->height);
+ OUT_RING (chan, dst->depth);
+ OUT_RING (chan, dst->z);
+ } else {
+ dst_ofst += dst->y * dst->pitch + dst->x * cpp;
+
+ BEGIN_RING(chan, RING_MF(PITCH_OUT), 1);
+ OUT_RING (chan, dst->width * cpp);
+
+ exec |= NVC0_M2MF_EXEC_LINEAR_OUT;
+ }
+
+ while (height) {
+ int line_count = height > 2047 ? 2047 : height;
+
+ MARK_RING (chan, 17, 4);
+
+ BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2);
+ OUT_RELOCh(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD);
+
+ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
+ OUT_RELOCh(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR);
+
+ if (!(exec & NVC0_M2MF_EXEC_LINEAR_IN)) {
+ BEGIN_RING(chan, RING_MF(TILING_POSITION_IN_X), 2);
+ OUT_RING (chan, src->x * cpp);
+ OUT_RING (chan, sy);
+ } else {
+ src_ofst += line_count * src->pitch;
+ }
+ if (!(exec & NVC0_M2MF_EXEC_LINEAR_OUT)) {
+ BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2);
+ OUT_RING (chan, dst->x * cpp);
+ OUT_RING (chan, dy);
+ } else {
+ dst_ofst += line_count * dst->pitch;
+ }
+
+ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
+ OUT_RING (chan, nblocksx * cpp);
+ OUT_RING (chan, line_count);
+ BEGIN_RING(chan, RING_MF(EXEC), 1);
+ OUT_RING (chan, exec);
+
+ height -= line_count;
+ sy += line_count;
+ dy += line_count;
+ }
+}
+
+void
+nvc0_m2mf_push_linear(struct nvc0_context *nvc0,
+ struct nouveau_bo *dst, unsigned domain, int offset,
+ unsigned size, void *data)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ uint32_t *src = (uint32_t *)data;
+ unsigned count = (size + 3) / 4;
+
+ MARK_RING (chan, 8, 2);
+
+ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
+ OUT_RELOCh(chan, dst, offset, domain | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, dst, offset, domain | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
+ OUT_RING (chan, size);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_MF(EXEC), 1);
+ OUT_RING (chan, 0x100111);
+
+ while (count) {
+ unsigned nr = AVAIL_RING(chan);
+
+ if (nr < 9) {
+ FIRE_RING(chan);
+ nouveau_bo_validate(chan, dst, NOUVEAU_BO_WR);
+ continue;
+ }
+ nr = MIN2(count, nr - 1);
+ nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
+
+ BEGIN_RING_NI(chan, RING_MF(DATA), nr);
+ OUT_RINGp (chan, src, nr);
+
+ src += nr;
+ count -= nr;
+ }
+}
+
+void
+nvc0_m2mf_copy_linear(struct nvc0_context *nvc0,
+ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
+ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
+ unsigned size)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+ while (size) {
+ unsigned bytes = MIN2(size, 1 << 17);
+
+ MARK_RING (chan, 11, 4);
+
+ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
+ OUT_RELOCh(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2);
+ OUT_RELOCh(chan, src, srcoff, srcdom | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, src, srcoff, srcdom | NOUVEAU_BO_RD);
+ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
+ OUT_RING (chan, bytes);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_MF(EXEC), 1);
+ OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) |
+ NVC0_M2MF_EXEC_LINEAR_IN | NVC0_M2MF_EXEC_LINEAR_OUT);
+
+ srcoff += bytes;
+ dstoff += bytes;
+ size -= bytes;
+ }
+}
+
+static void
+nvc0_m2mf_push_rect(struct pipe_screen *pscreen,
+ const struct nvc0_m2mf_rect *dst,
+ const void *data,
+ unsigned nblocksx, unsigned nblocksy)
+{
+ struct nouveau_channel *chan;
+ const uint8_t *src = (const uint8_t *)data;
+ const int cpp = dst->cpp;
+ const int line_len = nblocksx * cpp;
+ int dy = dst->y;
+
+ assert(nouveau_bo_tile_layout(dst->bo));
+
+ BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5);
+ OUT_RING (chan, dst->tile_mode);
+ OUT_RING (chan, dst->width * cpp);
+ OUT_RING (chan, dst->height);
+ OUT_RING (chan, dst->depth);
+ OUT_RING (chan, dst->z);
+
+ while (nblocksy) {
+ int line_count, words;
+ int size = MIN2(AVAIL_RING(chan), NV04_PFIFO_MAX_PACKET_LEN);
+
+ if (size < (12 + words)) {
+ FIRE_RING(chan);
+ continue;
+ }
+ line_count = (size * 4) / line_len;
+ words = (line_count * line_len + 3) / 4;
+
+ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
+ OUT_RELOCh(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR);
+
+ BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2);
+ OUT_RING (chan, dst->x * cpp);
+ OUT_RING (chan, dy);
+ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
+ OUT_RING (chan, line_len);
+ OUT_RING (chan, line_count);
+ BEGIN_RING(chan, RING_MF(EXEC), 1);
+ OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) |
+ NVC0_M2MF_EXEC_PUSH | NVC0_M2MF_EXEC_LINEAR_IN);
+
+ BEGIN_RING_NI(chan, RING_MF(DATA), words);
+ OUT_RINGp (chan, src, words);
+
+ dy += line_count;
+ src += line_len * line_count;
+ nblocksy -= line_count;
+ }
+}
+
+struct pipe_transfer *
+nvc0_miptree_transfer_new(struct pipe_context *pctx,
+ struct pipe_resource *res,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pctx);
+ struct pipe_screen *pscreen = pctx->screen;
+ struct nouveau_device *dev = nvc0->screen->base.device;
+ struct nvc0_miptree *mt = nvc0_miptree(res);
+ struct nvc0_miptree_level *lvl = &mt->level[level];
+ struct nvc0_transfer *tx;
+ uint32_t size;
+ uint32_t w, h, d, z, layer;
+ int ret;
+
+ if (mt->layout_3d) {
+ z = box->z;
+ d = u_minify(res->depth0, level);
+ layer = 0;
+ } else {
+ z = 0;
+ d = 1;
+ layer = box->z;
+ }
+
+ tx = CALLOC_STRUCT(nvc0_transfer);
+ if (!tx)
+ return NULL;
+
+ pipe_resource_reference(&tx->base.resource, res);
+
+ tx->base.level = level;
+ tx->base.usage = usage;
+ tx->base.box = *box;
+
+ tx->nblocksx = util_format_get_nblocksx(res->format, box->width);
+ tx->nblocksy = util_format_get_nblocksy(res->format, box->height);
+
+ tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format);
+ tx->base.layer_stride = tx->nblocksy * tx->base.stride;
+
+ w = u_minify(res->width0, level);
+ h = u_minify(res->height0, level);
+
+ tx->rect[0].cpp = tx->rect[1].cpp = util_format_get_blocksize(res->format);
+
+ tx->rect[0].bo = mt->base.bo;
+ tx->rect[0].base = lvl->offset + layer * mt->layer_stride;
+ tx->rect[0].tile_mode = lvl->tile_mode;
+ tx->rect[0].x = util_format_get_nblocksx(res->format, box->x);
+ tx->rect[0].y = util_format_get_nblocksy(res->format, box->y);
+ tx->rect[0].z = z;
+ tx->rect[0].width = util_format_get_nblocksx(res->format, w);
+ tx->rect[0].height = util_format_get_nblocksy(res->format, h);
+ tx->rect[0].depth = d;
+ tx->rect[0].pitch = lvl->pitch;
+ tx->rect[0].domain = NOUVEAU_BO_VRAM;
+
+ size = tx->base.layer_stride;
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
+ size * tx->base.box.depth, &tx->rect[1].bo);
+ if (ret) {
+ FREE(tx);
+ return NULL;
+ }
+
+ tx->rect[1].width = tx->nblocksx;
+ tx->rect[1].height = tx->nblocksy;
+ tx->rect[1].depth = 1;
+ tx->rect[1].pitch = tx->base.stride;
+ tx->rect[1].domain = NOUVEAU_BO_GART;
+
+ if (usage & PIPE_TRANSFER_READ) {
+ unsigned i;
+ for (i = 0; i < box->depth; ++i) {
+ nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0],
+ tx->nblocksx, tx->nblocksy);
+ if (mt->layout_3d)
+ tx->rect[0].z++;
+ else
+ tx->rect[0].base += mt->layer_stride;
+ tx->rect[1].base += size;
+ }
+ }
+ tx->rect[0].z = z;
+ tx->rect[1].base = 0;
+
+ return &tx->base;
+}
+
+void
+nvc0_miptree_transfer_del(struct pipe_context *pctx,
+ struct pipe_transfer *transfer)
+{
+ struct pipe_screen *pscreen = pctx->screen;
+ struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
+ struct nvc0_miptree *mt = nvc0_miptree(tx->base.resource);
+ unsigned i;
+
+ if (tx->base.usage & PIPE_TRANSFER_WRITE) {
+ for (i = 0; i < tx->base.box.depth; ++i) {
+ nvc0_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1],
+ tx->nblocksx, tx->nblocksy);
+ if (mt->layout_3d)
+ tx->rect[0].z++;
+ else
+ tx->rect[0].base += mt->layer_stride;
+ tx->rect[1].base += tx->nblocksy * tx->base.stride;
+ }
+ }
+
+ nouveau_bo_ref(NULL, &tx->rect[1].bo);
+ pipe_resource_reference(&transfer->resource, NULL);
+
+ FREE(tx);
+}
+
+void *
+nvc0_miptree_transfer_map(struct pipe_context *pctx,
+ struct pipe_transfer *transfer)
+{
+ struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
+ int ret;
+ unsigned flags = 0;
+
+ if (tx->rect[1].bo->map)
+ return tx->rect[1].bo->map;
+
+ if (transfer->usage & PIPE_TRANSFER_READ)
+ flags = NOUVEAU_BO_RD;
+ if (transfer->usage & PIPE_TRANSFER_WRITE)
+ flags |= NOUVEAU_BO_WR;
+
+ ret = nouveau_bo_map(tx->rect[1].bo, flags);
+ if (ret)
+ return NULL;
+ return tx->rect[1].bo->map;
+}
+
+void
+nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
+ struct pipe_transfer *transfer)
+{
+ struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
+
+ nouveau_bo_unmap(tx->rect[1].bo);
+}
+
diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.h b/src/gallium/drivers/nvc0/nvc0_transfer.h
new file mode 100644
index 0000000000..222f72d274
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_transfer.h
@@ -0,0 +1,38 @@
+
+#ifndef __NVC0_TRANSFER_H__
+#define __NVC0_TRANSFER_H__
+
+#include "pipe/p_state.h"
+
+struct pipe_transfer *
+nvc0_miptree_transfer_new(struct pipe_context *pcontext,
+ struct pipe_resource *pt,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box);
+void
+nvc0_miptree_transfer_del(struct pipe_context *pcontext,
+ struct pipe_transfer *ptx);
+void *
+nvc0_miptree_transfer_map(struct pipe_context *pcontext,
+ struct pipe_transfer *ptx);
+void
+nvc0_miptree_transfer_unmap(struct pipe_context *pcontext,
+ struct pipe_transfer *ptx);
+
+struct nvc0_m2mf_rect {
+ struct nouveau_bo *bo;
+ uint32_t base;
+ unsigned domain;
+ uint32_t pitch;
+ uint32_t width;
+ uint32_t x;
+ uint32_t height;
+ uint32_t y;
+ uint16_t depth;
+ uint16_t z;
+ uint16_t tile_mode;
+ uint16_t cpp;
+};
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c
new file mode 100644
index 0000000000..a14e955738
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_vbo.c
@@ -0,0 +1,654 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+
+#include "nvc0_3d.xml.h"
+
+void
+nvc0_vertex_state_delete(struct pipe_context *pipe,
+ void *hwcso)
+{
+ struct nvc0_vertex_stateobj *so = hwcso;
+
+ if (so->translate)
+ so->translate->release(so->translate);
+ FREE(hwcso);
+}
+
+void *
+nvc0_vertex_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct nvc0_vertex_stateobj *so;
+ struct translate_key transkey;
+ unsigned i;
+
+ assert(num_elements);
+
+ so = MALLOC(sizeof(*so) +
+ (num_elements - 1) * sizeof(struct nvc0_vertex_element));
+ if (!so)
+ return NULL;
+ so->num_elements = num_elements;
+ so->instance_bits = 0;
+
+ transkey.nr_elements = 0;
+ transkey.output_stride = 0;
+
+ for (i = 0; i < num_elements; ++i) {
+ const struct pipe_vertex_element *ve = &elements[i];
+ const unsigned vbi = ve->vertex_buffer_index;
+ enum pipe_format fmt = ve->src_format;
+
+ so->element[i].pipe = elements[i];
+ so->element[i].state = nvc0_format_table[fmt].vtx;
+
+ if (!so->element[i].state) {
+ switch (util_format_get_nr_components(fmt)) {
+ case 1: fmt = PIPE_FORMAT_R32_FLOAT; break;
+ case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break;
+ case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break;
+ case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break;
+ default:
+ assert(0);
+ return NULL;
+ }
+ so->element[i].state = nvc0_format_table[fmt].vtx;
+ }
+ so->element[i].state |= i;
+
+ if (likely(!ve->instance_divisor)) {
+ unsigned j = transkey.nr_elements++;
+
+ transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL;
+ transkey.element[j].input_format = ve->src_format;
+ transkey.element[j].input_buffer = vbi;
+ transkey.element[j].input_offset = ve->src_offset;
+ transkey.element[j].instance_divisor = ve->instance_divisor;
+
+ transkey.element[j].output_format = fmt;
+ transkey.element[j].output_offset = transkey.output_stride;
+ transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3;
+ } else {
+ so->instance_bits |= 1 << i;
+ }
+ }
+
+ so->translate = translate_create(&transkey);
+ so->vtx_size = transkey.output_stride / 4;
+ so->vtx_per_packet_max = NV04_PFIFO_MAX_PACKET_LEN / MAX2(so->vtx_size, 1);
+
+ return so;
+}
+
+#define NVC0_3D_VERTEX_ATTRIB_INACTIVE \
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | \
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST
+
+#define VTX_ATTR(a, c, t, s) \
+ ((NVC0_3D_VTX_ATTR_DEFINE_TYPE_##t) | \
+ (NVC0_3D_VTX_ATTR_DEFINE_SIZE_##s) | \
+ ((a) << NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT) | \
+ ((c) << NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT))
+
+static void
+nvc0_emit_vtxattr(struct nvc0_context *nvc0, struct pipe_vertex_buffer *vb,
+ struct pipe_vertex_element *ve, unsigned attr)
+{
+ const void *data;
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_resource *res = nvc0_resource(vb->buffer);
+ float v[4];
+ int i;
+ const unsigned nc = util_format_get_nr_components(ve->src_format);
+
+ data = nvc0_resource_map_offset(nvc0, res, vb->buffer_offset +
+ ve->src_offset, NOUVEAU_BO_RD);
+
+ util_format_read_4f(ve->src_format, v, 0, data, 0, 0, 0, 1, 1);
+
+ BEGIN_RING(chan, RING_3D(VTX_ATTR_DEFINE), nc + 1);
+ OUT_RING (chan, VTX_ATTR(attr, nc, FLOAT, 32));
+ for (i = 0; i < nc; ++i)
+ OUT_RINGf(chan, v[i]);
+}
+
+static void
+nvc0_prevalidate_vbufs(struct nvc0_context *nvc0)
+{
+ struct pipe_vertex_buffer *vb;
+ struct nvc0_resource *buf;
+ int i;
+ uint32_t base, size;
+
+ nvc0->vbo_fifo = nvc0->vbo_user = 0;
+
+ for (i = 0; i < nvc0->num_vtxbufs; ++i) {
+ vb = &nvc0->vtxbuf[i];
+ if (!vb->stride)
+ continue;
+ buf = nvc0_resource(vb->buffer);
+
+ if (!nvc0_resource_mapped_by_gpu(vb->buffer)) {
+ if (nvc0->vbo_push_hint) {
+ nvc0->vbo_fifo = ~0;
+ continue;
+ } else {
+ if (buf->status & NVC0_BUFFER_STATUS_USER_MEMORY) {
+ nvc0->vbo_user |= 1 << i;
+ assert(vb->stride > vb->buffer_offset);
+ size = vb->stride * (nvc0->vbo_max_index -
+ nvc0->vbo_min_index + 1);
+ base = vb->stride * nvc0->vbo_min_index;
+ nvc0_user_buffer_upload(buf, base, size);
+ } else {
+ nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_GART);
+ }
+ nvc0->vbo_dirty = TRUE;
+ }
+ }
+ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_VERTEX, buf, NOUVEAU_BO_RD);
+ nvc0_buffer_adjust_score(nvc0, buf, 1);
+ }
+}
+
+static void
+nvc0_update_user_vbufs(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ const uint32_t vertex_count = nvc0->vbo_max_index - nvc0->vbo_min_index + 1;
+ uint32_t base, offset, size;
+ int i;
+ uint32_t written = 0;
+
+ for (i = 0; i < nvc0->vertex->num_elements; ++i) {
+ struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe;
+ const int b = ve->vertex_buffer_index;
+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b];
+ struct nvc0_resource *buf = nvc0_resource(vb->buffer);
+
+ if (!(nvc0->vbo_user & (1 << b)))
+ continue;
+
+ if (!vb->stride) {
+ nvc0_emit_vtxattr(nvc0, vb, ve, i);
+ continue;
+ }
+ size = vb->stride * vertex_count;
+ base = vb->stride * nvc0->vbo_min_index;
+
+ if (!(written & (1 << b))) {
+ written |= 1 << b;
+ nvc0_user_buffer_upload(buf, base, size);
+ }
+ offset = vb->buffer_offset + ve->src_offset;
+
+ BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5);
+ OUT_RING (chan, i);
+ OUT_RESRCh(chan, buf, size - 1, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, buf, size - 1, NOUVEAU_BO_RD);
+ OUT_RESRCh(chan, buf, offset, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, buf, offset, NOUVEAU_BO_RD);
+ }
+ nvc0->vbo_dirty = TRUE;
+}
+
+void
+nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_vertex_stateobj *vertex = nvc0->vertex;
+ struct pipe_vertex_buffer *vb;
+ struct nvc0_vertex_element *ve;
+ unsigned i;
+
+ nvc0_prevalidate_vbufs(nvc0);
+
+ BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements);
+ for (i = 0; i < vertex->num_elements; ++i) {
+ ve = &vertex->element[i];
+ vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index];
+
+ if (likely(vb->stride) || nvc0->vbo_fifo) {
+ OUT_RING(chan, ve->state);
+ } else {
+ OUT_RING(chan, ve->state | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST);
+ nvc0->vbo_fifo &= ~(1 << i);
+ }
+ }
+
+ for (i = 0; i < vertex->num_elements; ++i) {
+ struct nvc0_resource *res;
+ unsigned size, offset;
+
+ ve = &vertex->element[i];
+ vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index];
+
+ if (unlikely(ve->pipe.instance_divisor)) {
+ if (!(nvc0->state.instance_bits & (1 << i))) {
+ IMMED_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1);
+ }
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_DIVISOR(i)), 1);
+ OUT_RING (chan, ve->pipe.instance_divisor);
+ } else
+ if (unlikely(nvc0->state.instance_bits & (1 << i))) {
+ IMMED_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0);
+ }
+
+ res = nvc0_resource(vb->buffer);
+
+ if (nvc0->vbo_fifo || unlikely(vb->stride == 0)) {
+ if (!nvc0->vbo_fifo)
+ nvc0_emit_vtxattr(nvc0, vb, &ve->pipe, i);
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ OUT_RING (chan, 0);
+ continue;
+ }
+
+ size = vb->buffer->width0;
+ offset = ve->pipe.src_offset + vb->buffer_offset;
+
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ OUT_RING (chan, (1 << 12) | vb->stride);
+ BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5);
+ OUT_RING (chan, i);
+ OUT_RESRCh(chan, res, size - 1, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, res, size - 1, NOUVEAU_BO_RD);
+ OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD);
+ }
+ for (; i < nvc0->state.num_vtxelts; ++i) {
+ BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(i)), 1);
+ OUT_RING (chan, NVC0_3D_VERTEX_ATTRIB_INACTIVE);
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ OUT_RING (chan, 0);
+ }
+
+ nvc0->state.num_vtxelts = vertex->num_elements;
+ nvc0->state.instance_bits = vertex->instance_bits;
+}
+
+#define NVC0_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nvc0_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NVC0_PRIM_GL_CASE(POINTS);
+ NVC0_PRIM_GL_CASE(LINES);
+ NVC0_PRIM_GL_CASE(LINE_LOOP);
+ NVC0_PRIM_GL_CASE(LINE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLES);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
+ NVC0_PRIM_GL_CASE(QUADS);
+ NVC0_PRIM_GL_CASE(QUAD_STRIP);
+ NVC0_PRIM_GL_CASE(POLYGON);
+ NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ /*
+ NVC0_PRIM_GL_CASE(PATCHES); */
+ default:
+ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ break;
+ }
+}
+
+static void
+nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan)
+{
+ struct nvc0_context *nvc0 = chan->user_private;
+
+ nvc0_bufctx_emit_relocs(nvc0);
+}
+
+#if 0
+static struct nouveau_bo *
+nvc0_tfb_setup(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nouveau_bo *tfb = NULL;
+ int ret, i;
+
+ ret = nouveau_bo_new(nvc0->screen->base.device,
+ NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &tfb);
+ if (ret)
+ return NULL;
+
+ ret = nouveau_bo_map(tfb, NOUVEAU_BO_WR);
+ if (ret)
+ return NULL;
+ memset(tfb->map, 0xee, 8 * 4 * 3);
+ nouveau_bo_unmap(tfb);
+
+ BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(0)), 5);
+ OUT_RING (chan, 1);
+ OUT_RELOCh(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ OUT_RING (chan, tfb->size);
+ OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID(0) */
+ BEGIN_RING(chan, RING_3D(TFB_UNK0700(0)), 3);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 8); /* TFB_VARYING_COUNT(0) */
+ OUT_RING (chan, 32); /* TFB_BUFFER_STRIDE(0) */
+ BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(0)), 2);
+ OUT_RING (chan, 0x1f1e1d1c);
+ OUT_RING (chan, 0xa3a2a1a0);
+ for (i = 1; i < 4; ++i) {
+ BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(i)), 1);
+ OUT_RING (chan, 0);
+ }
+ BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D_(0x135c), 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, RING_3D_(0x135c), 1);
+ OUT_RING (chan, 0);
+
+ return tfb;
+}
+#endif
+
+static void
+nvc0_draw_arrays(struct nvc0_context *nvc0,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned instance_count)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ unsigned prim;
+
+ chan->flush_notify = nvc0_draw_vbo_flush_notify;
+ chan->user_private = nvc0;
+
+ prim = nvc0_prim_gl(mode);
+
+ while (instance_count--) {
+ BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
+ OUT_RING (chan, prim);
+ BEGIN_RING(chan, RING_3D(VERTEX_BUFFER_FIRST), 2);
+ OUT_RING (chan, start);
+ OUT_RING (chan, count);
+ IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0);
+
+ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+
+ chan->flush_notify = NULL;
+}
+
+static void
+nvc0_draw_elements_inline_u08(struct nouveau_channel *chan, uint8_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 3) {
+ unsigned i;
+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), count & 3);
+ for (i = 0; i < (count & 3); ++i)
+ OUT_RING(chan, *map++);
+ count &= ~3;
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4;
+
+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U8), nr);
+ for (i = 0; i < nr; ++i) {
+ OUT_RING(chan,
+ (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]);
+ map += 4;
+ }
+ count -= nr * 4;
+ }
+}
+
+static void
+nvc0_draw_elements_inline_u16(struct nouveau_channel *chan, uint16_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 1) {
+ count &= ~1;
+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1);
+ OUT_RING (chan, *map++);
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
+
+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr);
+ for (i = 0; i < nr; ++i) {
+ OUT_RING(chan, (map[1] << 16) | map[0]);
+ map += 2;
+ }
+ count -= nr * 2;
+ }
+}
+
+static void
+nvc0_draw_elements_inline_u32(struct nouveau_channel *chan, uint32_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ while (count) {
+ const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
+
+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), nr);
+ OUT_RINGp (chan, map, nr);
+
+ map += nr;
+ count -= nr;
+ }
+}
+
+static void
+nvc0_draw_elements_inline_u32_short(struct nouveau_channel *chan, uint32_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 1) {
+ count--;
+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1);
+ OUT_RING (chan, *map++);
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
+
+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr);
+ for (i = 0; i < nr; ++i) {
+ OUT_RING(chan, (map[1] << 16) | map[0]);
+ map += 2;
+ }
+ count -= nr * 2;
+ }
+}
+
+static void
+nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned instance_count, int32_t index_bias)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ void *data;
+ unsigned prim;
+ const unsigned index_size = nvc0->idxbuf.index_size;
+
+ chan->flush_notify = nvc0_draw_vbo_flush_notify;
+ chan->user_private = nvc0;
+
+ prim = nvc0_prim_gl(mode);
+
+ if (index_bias != nvc0->state.index_bias) {
+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_BASE), 1);
+ OUT_RING (chan, index_bias);
+ nvc0->state.index_bias = index_bias;
+ }
+
+ if (nvc0_resource_mapped_by_gpu(nvc0->idxbuf.buffer)) {
+ struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer);
+ unsigned offset = nvc0->idxbuf.offset;
+ unsigned limit = nvc0->idxbuf.buffer->width0 - 1;
+
+ nvc0_buffer_adjust_score(nvc0, res, 1);
+
+ while (instance_count--) {
+ MARK_RING (chan, 11, 4);
+ BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
+ OUT_RING (chan, mode);
+ BEGIN_RING(chan, RING_3D(INDEX_ARRAY_START_HIGH), 7);
+ OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD);
+ OUT_RESRCh(chan, res, limit, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, res, limit, NOUVEAU_BO_RD);
+ OUT_RING (chan, index_size >> 1);
+ OUT_RING (chan, start);
+ OUT_RING (chan, count);
+ IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0);
+
+ nvc0_resource_fence(res, NOUVEAU_BO_RD);
+
+ mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+ } else {
+ data = nvc0_resource_map_offset(nvc0, nvc0_resource(nvc0->idxbuf.buffer),
+ nvc0->idxbuf.offset, NOUVEAU_BO_RD);
+ if (!data)
+ return;
+
+ while (instance_count--) {
+ BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
+ OUT_RING (chan, prim);
+ switch (index_size) {
+ case 1:
+ nvc0_draw_elements_inline_u08(chan, data, start, count);
+ break;
+ case 2:
+ nvc0_draw_elements_inline_u16(chan, data, start, count);
+ break;
+ case 4:
+ if (shorten)
+ nvc0_draw_elements_inline_u32_short(chan, data, start, count);
+ else
+ nvc0_draw_elements_inline_u32(chan, data, start, count);
+ break;
+ default:
+ assert(0);
+ return;
+ }
+ IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0);
+
+ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+ }
+
+ chan->flush_notify = NULL;
+}
+
+void
+nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+ /* For picking only a few vertices from a large user buffer, push is better,
+ * if index count is larger and we expect repeated vertices, suggest upload.
+ */
+ nvc0->vbo_push_hint = /* the 64 is heuristic */
+ !(info->indexed &&
+ ((info->max_index - info->min_index + 64) < info->count));
+
+ nvc0->vbo_min_index = info->min_index;
+ nvc0->vbo_max_index = info->max_index;
+
+ if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS)))
+ nvc0_update_user_vbufs(nvc0);
+
+ nvc0_state_validate(nvc0);
+
+ if (nvc0->state.instance_base != info->start_instance) {
+ nvc0->state.instance_base = info->start_instance;
+ BEGIN_RING(chan, RING_3D(VB_INSTANCE_BASE), 1);
+ OUT_RING (chan, info->start_instance);
+ }
+
+ if (nvc0->vbo_fifo) {
+ nvc0_push_vbo(nvc0, info);
+ return;
+ }
+
+ if (nvc0->vbo_dirty) {
+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FLUSH), 1);
+ OUT_RING (chan, 0);
+ nvc0->vbo_dirty = FALSE;
+ }
+
+ if (!info->indexed) {
+ nvc0_draw_arrays(nvc0,
+ info->mode, info->start, info->count,
+ info->instance_count);
+ } else {
+ boolean shorten = info->max_index <= 65535;
+
+ assert(nvc0->idxbuf.buffer);
+
+ if (info->primitive_restart != nvc0->state.prim_restart) {
+ if (info->primitive_restart) {
+ BEGIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 2);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, info->restart_index);
+
+ if (info->restart_index > 65535)
+ shorten = FALSE;
+ } else {
+ IMMED_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 0);
+ }
+ nvc0->state.prim_restart = info->primitive_restart;
+ } else
+ if (info->primitive_restart) {
+ BEGIN_RING(chan, RING_3D(PRIM_RESTART_INDEX), 1);
+ OUT_RING (chan, info->restart_index);
+
+ if (info->restart_index > 65535)
+ shorten = FALSE;
+ }
+
+ nvc0_draw_elements(nvc0, shorten,
+ info->mode, info->start, info->count,
+ info->instance_count, info->index_bias);
+ }
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h
new file mode 100644
index 0000000000..1544fb7a1d
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_winsys.h
@@ -0,0 +1,120 @@
+
+#ifndef __NVC0_WINSYS_H__
+#define __NVC0_WINSYS_H__
+
+#include <stdint.h>
+#include <unistd.h>
+#include "pipe/p_defines.h"
+
+#include "nouveau/nouveau_bo.h"
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_grobj.h"
+#include "nouveau/nouveau_device.h"
+#include "nouveau/nouveau_resource.h"
+#include "nouveau/nouveau_pushbuf.h"
+#include "nouveau/nouveau_reloc.h"
+
+#include "nvc0_resource.h" /* OUT_RESRC */
+
+#ifndef NV04_PFIFO_MAX_PACKET_LEN
+#define NV04_PFIFO_MAX_PACKET_LEN 2047
+#endif
+
+#define NVC0_SUBCH_3D 1
+#define NVC0_SUBCH_2D 2
+#define NVC0_SUBCH_MF 3
+
+#define NVC0_MF_(n) NVC0_M2MF_##n
+
+#define RING_3D(n) ((NVC0_SUBCH_3D << 13) | (NVC0_3D_##n >> 2))
+#define RING_2D(n) ((NVC0_SUBCH_2D << 13) | (NVC0_2D_##n >> 2))
+#define RING_MF(n) ((NVC0_SUBCH_MF << 13) | (NVC0_MF_(n) >> 2))
+
+#define RING_3D_(m) ((NVC0_SUBCH_3D << 13) | ((m) >> 2))
+#define RING_2D_(m) ((NVC0_SUBCH_2D << 13) | ((m) >> 2))
+#define RING_MF_(m) ((NVC0_SUBCH_MF << 13) | ((m) >> 2))
+
+#define RING_GR(gr, m) (((gr)->subc << 13) | ((m) >> 2))
+
+int nouveau_pushbuf_flush(struct nouveau_channel *, unsigned min);
+
+static inline uint32_t
+nouveau_bo_tile_layout(struct nouveau_bo *bo)
+{
+ return bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK;
+}
+
+static INLINE void
+nouveau_bo_validate(struct nouveau_channel *chan,
+ struct nouveau_bo *bo, unsigned flags)
+{
+ nouveau_reloc_emit(chan, NULL, 0, NULL, bo, 0, 0, flags, 0, 0);
+}
+
+/* incremental methods */
+static INLINE void
+BEGIN_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned size)
+{
+ WAIT_RING(chan, size + 1);
+ OUT_RING (chan, (0x2 << 28) | (size << 16) | mthd);
+}
+
+/* non-incremental */
+static INLINE void
+BEGIN_RING_NI(struct nouveau_channel *chan, uint32_t mthd, unsigned size)
+{
+ WAIT_RING(chan, size + 1);
+ OUT_RING (chan, (0x6 << 28) | (size << 16) | mthd);
+}
+
+/* increment-once */
+static INLINE void
+BEGIN_RING_1I(struct nouveau_channel *chan, uint32_t mthd, unsigned size)
+{
+ WAIT_RING(chan, size + 1);
+ OUT_RING (chan, (0xa << 28) | (size << 16) | mthd);
+}
+
+/* inline-data */
+static INLINE void
+IMMED_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned data)
+{
+ WAIT_RING(chan, 1);
+ OUT_RING (chan, (0x8 << 28) | (data << 16) | mthd);
+}
+
+static INLINE int
+OUT_RESRCh(struct nouveau_channel *chan, struct nvc0_resource *res,
+ unsigned delta, unsigned flags)
+{
+ return OUT_RELOCh(chan, res->bo, res->offset + delta, res->domain | flags);
+}
+
+static INLINE int
+OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res,
+ unsigned delta, unsigned flags)
+{
+ if (flags & NOUVEAU_BO_WR)
+ res->status |= NVC0_BUFFER_STATUS_DIRTY;
+ return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags);
+}
+
+static INLINE void
+BIND_RING(struct nouveau_channel *chan, struct nouveau_grobj *gr, unsigned s)
+{
+ struct nouveau_subchannel *subc = &gr->channel->subc[s];
+
+ assert(s < 8);
+ if (subc->gr) {
+ assert(subc->gr->bound != NOUVEAU_GROBJ_BOUND_EXPLICIT);
+ subc->gr->bound = NOUVEAU_GROBJ_UNBOUND;
+ }
+ subc->gr = gr;
+ subc->gr->subc = s;
+ subc->gr->bound = NOUVEAU_GROBJ_BOUND_EXPLICIT;
+
+ BEGIN_RING(chan, RING_GR(gr, 0x0000), 1);
+ OUT_RING (chan, gr->grclass);
+}
+
+#endif
diff --git a/src/gallium/drivers/nvfx/nv04_2d.c b/src/gallium/drivers/nvfx/nv04_2d.c
index e0e65e7a87..e2fadd33e1 100644
--- a/src/gallium/drivers/nvfx/nv04_2d.c
+++ b/src/gallium/drivers/nvfx/nv04_2d.c
@@ -34,11 +34,11 @@
#include <stdio.h>
#include <stdint.h>
#include <nouveau/nouveau_device.h>
-#include <nouveau/nouveau_pushbuf.h>
#include <nouveau/nouveau_channel.h>
#include <nouveau/nouveau_bo.h>
#include <nouveau/nouveau_notifier.h>
#include <nouveau/nouveau_grobj.h>
+#include <nouveau/nv04_pushbuf.h>
#include "nv04_2d.h"
#include "nouveau/nv_object.xml.h"
diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c
index d6ede5b40a..b609891d31 100644
--- a/src/gallium/drivers/nvfx/nv30_fragtex.c
+++ b/src/gallium/drivers/nvfx/nv30_fragtex.c
@@ -37,12 +37,12 @@ nv30_sampler_view_init(struct pipe_context *pipe,
struct pipe_resource* pt = sv->base.texture;
struct nvfx_texture_format *tf = &nvfx_texture_formats[sv->base.format];
unsigned txf;
- unsigned level = pt->target == PIPE_TEXTURE_CUBE ? 0 : sv->base.first_level;
+ unsigned level = pt->target == PIPE_TEXTURE_CUBE ? 0 : sv->base.u.tex.first_level;
assert(tf->fmt[0] >= 0);
txf = sv->u.init_fmt;
- txf |= (level != sv->base.last_level ? NV30_3D_TEX_FORMAT_MIPMAP : 0);
+ txf |= (level != sv->base.u.tex.last_level ? NV30_3D_TEX_FORMAT_MIPMAP : 0);
txf |= util_logbase2(u_minify(pt->width0, level)) << NV30_3D_TEX_FORMAT_BASE_SIZE_U__SHIFT;
txf |= util_logbase2(u_minify(pt->height0, level)) << NV30_3D_TEX_FORMAT_BASE_SIZE_V__SHIFT;
txf |= util_logbase2(u_minify(pt->depth0, level)) << NV30_3D_TEX_FORMAT_BASE_SIZE_W__SHIFT;
@@ -60,8 +60,8 @@ nv30_sampler_view_init(struct pipe_context *pipe,
else
sv->u.nv30.rect = !!(pt->flags & NVFX_RESOURCE_FLAG_LINEAR);
- sv->lod_offset = sv->base.first_level - level;
- sv->max_lod_limit = sv->base.last_level - level;
+ sv->lod_offset = sv->base.u.tex.first_level - level;
+ sv->max_lod_limit = sv->base.u.tex.last_level - level;
}
void
@@ -71,6 +71,7 @@ nv30_fragtex_set(struct nvfx_context *nvfx, int unit)
struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit];
struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo;
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
unsigned txf;
unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
unsigned use_rect;
@@ -102,7 +103,7 @@ nv30_fragtex_set(struct nvfx_context *nvfx, int unit)
txf = sv->u.nv30.fmt[ps->compare + (use_rect ? 2 : 0)];
MARK_RING(chan, 9, 2);
- OUT_RING(chan, RING_3D(NV30_3D_TEX_OFFSET(unit), 8));
+ BEGIN_RING(chan, eng3d, NV30_3D_TEX_OFFSET(unit), 8);
OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0);
OUT_RELOC(chan, bo, txf,
tex_flags | NOUVEAU_BO_OR,
diff --git a/src/gallium/drivers/nvfx/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c
index d4fb73702d..563183d9d0 100644
--- a/src/gallium/drivers/nvfx/nv40_fragtex.c
+++ b/src/gallium/drivers/nvfx/nv40_fragtex.c
@@ -46,7 +46,7 @@ nv40_sampler_view_init(struct pipe_context *pipe,
struct nvfx_miptree* mt = (struct nvfx_miptree*)pt;
struct nvfx_texture_format *tf = &nvfx_texture_formats[sv->base.format];
unsigned txf;
- unsigned level = pt->target == PIPE_TEXTURE_CUBE ? 0 : sv->base.first_level;
+ unsigned level = pt->target == PIPE_TEXTURE_CUBE ? 0 : sv->base.u.tex.first_level;
assert(tf->fmt[4] >= 0);
txf = sv->u.init_fmt;
@@ -54,7 +54,7 @@ nv40_sampler_view_init(struct pipe_context *pipe,
if(pt->target == PIPE_TEXTURE_CUBE)
txf |= ((pt->last_level + 1) << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT);
else
- txf |= (((sv->base.last_level - sv->base.first_level) + 1) << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT);
+ txf |= (((sv->base.u.tex.last_level - sv->base.u.tex.first_level) + 1) << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT);
if (!mt->linear_pitch)
sv->u.nv40.npot_size2 = 0;
@@ -68,14 +68,15 @@ nv40_sampler_view_init(struct pipe_context *pipe,
sv->u.nv40.npot_size2 |= (u_minify(pt->depth0, level) << NV40_3D_TEX_SIZE1_DEPTH__SHIFT);
- sv->lod_offset = (sv->base.first_level - level) * 256;
- sv->max_lod_limit = (sv->base.last_level - level) * 256;
+ sv->lod_offset = (sv->base.u.tex.first_level - level) * 256;
+ sv->max_lod_limit = (sv->base.u.tex.last_level - level) * 256;
}
void
nv40_fragtex_set(struct nvfx_context *nvfx, int unit)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit];
struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit];
struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo;
@@ -87,7 +88,7 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit)
txf = sv->u.nv40.fmt[ps->compare] | ps->fmt;
MARK_RING(chan, 11, 2);
- OUT_RING(chan, RING_3D(NV30_3D_TEX_OFFSET(unit), 8));
+ BEGIN_RING(chan, eng3d, NV30_3D_TEX_OFFSET(unit), 8);
OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0);
OUT_RELOC(chan, bo, txf, tex_flags | NOUVEAU_BO_OR,
NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1);
@@ -97,7 +98,7 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit)
OUT_RING(chan, ps->filt | sv->filt);
OUT_RING(chan, sv->npot_size);
OUT_RING(chan, ps->bcol);
- OUT_RING(chan, RING_3D(NV40_3D_TEX_SIZE1(unit), 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_TEX_SIZE1(unit), 1);
OUT_RING(chan, sv->u.nv40.npot_size2);
nvfx->hw_txf[unit] = txf;
diff --git a/src/gallium/drivers/nvfx/nvfx_buffer.c b/src/gallium/drivers/nvfx/nvfx_buffer.c
index 041099e0e5..b407429731 100644
--- a/src/gallium/drivers/nvfx/nvfx_buffer.c
+++ b/src/gallium/drivers/nvfx/nvfx_buffer.c
@@ -64,6 +64,7 @@ nvfx_user_buffer_create(struct pipe_screen *pscreen,
buffer->base.base.width0 = bytes;
buffer->base.base.height0 = 1;
buffer->base.base.depth0 = 1;
+ buffer->base.base.array_size = 1;
buffer->data = ptr;
buffer->size = bytes;
buffer->bytes_to_draw_until_static = bytes * screen->static_reuse_threshold;
diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c
index 95834d2327..6c8934d3a4 100644
--- a/src/gallium/drivers/nvfx/nvfx_context.c
+++ b/src/gallium/drivers/nvfx/nvfx_context.c
@@ -13,13 +13,13 @@ nvfx_flush(struct pipe_context *pipe, unsigned flags,
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
/* XXX: we need to actually be intelligent here */
if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
- WAIT_RING(chan, 4);
- OUT_RING(chan, RING_3D(0x1fd8, 1));
+ BEGIN_RING(chan, eng3d, 0x1fd8, 1);
OUT_RING(chan, 2);
- OUT_RING(chan, RING_3D(0x1fd8, 1));
+ BEGIN_RING(chan, eng3d, 0x1fd8, 1);
OUT_RING(chan, 1);
}
diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h
index 6ef2a6945d..2238aa1ad0 100644
--- a/src/gallium/drivers/nvfx/nvfx_context.h
+++ b/src/gallium/drivers/nvfx/nvfx_context.h
@@ -339,30 +339,31 @@ extern void nvfx_init_vertprog_functions(struct nvfx_context *nvfx);
/* nvfx_push.c */
extern void nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info);
-/* must WAIT_RING(chan, ncomp + 1) or equivalent beforehand! */
-static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan, unsigned attrib, const float* v, unsigned ncomp)
+static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan,
+ struct nouveau_grobj *eng3d, unsigned attrib, const float* v,
+ unsigned ncomp)
{
switch (ncomp) {
case 4:
- OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_4F_X(attrib), 4));
+ BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_4F_X(attrib), 4);
OUT_RING(chan, fui(v[0]));
OUT_RING(chan, fui(v[1]));
OUT_RING(chan, fui(v[2]));
OUT_RING(chan, fui(v[3]));
break;
case 3:
- OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_3F_X(attrib), 3));
+ BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_3F_X(attrib), 3);
OUT_RING(chan, fui(v[0]));
OUT_RING(chan, fui(v[1]));
OUT_RING(chan, fui(v[2]));
break;
case 2:
- OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_2F_X(attrib), 2));
+ BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_2F_X(attrib), 2);
OUT_RING(chan, fui(v[0]));
OUT_RING(chan, fui(v[1]));
break;
case 1:
- OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_1F(attrib), 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_1F(attrib), 1);
OUT_RING(chan, fui(v[0]));
break;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c
index 61f888a8ea..81f1ec485d 100644
--- a/src/gallium/drivers/nvfx/nvfx_draw.c
+++ b/src/gallium/drivers/nvfx/nvfx_draw.c
@@ -28,10 +28,10 @@ nvfx_render_flush(struct draw_stage *stage, unsigned flags)
struct nvfx_render_stage *rs = nvfx_render_stage(stage);
struct nvfx_context *nvfx = rs->nvfx;
struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) {
- assert(AVAIL_RING(chan) >= 2);
- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1);
OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP);
rs->prim = NV30_3D_VERTEX_BEGIN_END_STOP;
}
@@ -46,6 +46,7 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
boolean no_elements = nvfx->vertprog->draw_no_elements;
unsigned num_attribs = nvfx->vertprog->draw_elements;
@@ -63,7 +64,7 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,
/* Switch primitive modes if necessary */
if (rs->prim != mode) {
if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) {
- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1);
OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP);
}
@@ -74,23 +75,24 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,
int i;
for(i = 0; i < 32; ++i)
{
- OUT_RING(chan, RING_3D(0x1dac, 1));
+ BEGIN_RING(chan, eng3d, 0x1dac, 1);
OUT_RING(chan, 0);
}
}
- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1);
OUT_RING (chan, mode);
rs->prim = mode;
}
- OUT_RING(chan, RING_3D_NI(NV30_3D_VERTEX_DATA, num_attribs * 4 * count));
if(no_elements) {
+ BEGIN_RING_NI(chan, eng3d, NV30_3D_VERTEX_DATA, 4);
OUT_RING(chan, 0);
OUT_RING(chan, 0);
OUT_RING(chan, 0);
OUT_RING(chan, 0);
} else {
+ BEGIN_RING_NI(chan, eng3d, NV30_3D_VERTEX_DATA, num_attribs * 4 * count);
for (unsigned i = 0; i < count; ++i)
{
struct vertex_header* v = prim->v[i];
diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c
index 13e8beed47..dbd7c77346 100644
--- a/src/gallium/drivers/nvfx/nvfx_fragprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c
@@ -1189,12 +1189,12 @@ out_err:
static inline void
nvfx_fp_memcpy(void* dst, const void* src, size_t len)
{
-#ifndef WORDS_BIGENDIAN
+#ifndef PIPE_ARCH_BIG_ENDIAN
memcpy(dst, src, len);
#else
size_t i;
for(i = 0; i < len; i += 4) {
- uint32_t v = (uint32_t*)((char*)src + i);
+ uint32_t v = *(uint32_t*)((char*)src + i);
*(uint32_t*)((char*)dst + i) = (v >> 16) | (v << 16);
}
#endif
@@ -1233,6 +1233,7 @@ void
nvfx_fragprog_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct nvfx_pipe_fragment_program *pfp = nvfx->fragprog;
struct nvfx_vertex_program* vp;
@@ -1499,17 +1500,17 @@ update:
nvfx->hw_fragprog = fp;
MARK_RING(chan, 8, 1);
- OUT_RING(chan, RING_3D(NV30_3D_FP_ACTIVE_PROGRAM, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_FP_ACTIVE_PROGRAM, 1);
OUT_RELOC(chan, fp->fpbo->bo, offset, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
NOUVEAU_BO_OR, NV30_3D_FP_ACTIVE_PROGRAM_DMA0,
NV30_3D_FP_ACTIVE_PROGRAM_DMA1);
- OUT_RING(chan, RING_3D(NV30_3D_FP_CONTROL, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_FP_CONTROL, 1);
OUT_RING(chan, fp->fp_control);
if(!nvfx->is_nv4x) {
- OUT_RING(chan, RING_3D(NV30_3D_FP_REG_CONTROL, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_FP_REG_CONTROL, 1);
OUT_RING(chan, (1<<16)|0x4);
- OUT_RING(chan, RING_3D(NV30_3D_TEX_UNITS_ENABLE, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_TEX_UNITS_ENABLE, 1);
OUT_RING(chan, fp->samplers);
}
}
@@ -1518,8 +1519,7 @@ update:
unsigned pointsprite_control = fp->point_sprite_control | nvfx->rasterizer->pipe.point_quad_rasterization;
if(pointsprite_control != nvfx->hw_pointsprite_control)
{
- WAIT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV30_3D_POINT_SPRITE, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_POINT_SPRITE, 1);
OUT_RING(chan, pointsprite_control);
nvfx->hw_pointsprite_control = pointsprite_control;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c
index 1d6b4e24cb..1c4901df0e 100644
--- a/src/gallium/drivers/nvfx/nvfx_fragtex.c
+++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c
@@ -122,8 +122,8 @@ nvfx_create_sampler_view(struct pipe_context *pipe,
}
else
{
- sv->offset = nvfx_subresource_offset(pt, 0, sv->base.first_level, 0);
- sv->npot_size = (u_minify(pt->width0, sv->base.first_level) << NV30_3D_TEX_NPOT_SIZE_W__SHIFT) | u_minify(pt->height0, sv->base.first_level);
+ sv->offset = nvfx_subresource_offset(pt, 0, sv->base.u.tex.first_level, 0);
+ sv->npot_size = (u_minify(pt->width0, sv->base.u.tex.first_level) << NV30_3D_TEX_NPOT_SIZE_W__SHIFT) | u_minify(pt->height0, sv->base.u.tex.first_level);
/* apparently, we need to ignore the t coordinate for 1D textures to fix piglit tex1d-2dborder */
if(pt->target == PIPE_TEXTURE_1D)
@@ -177,6 +177,7 @@ void
nvfx_fragtex_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
unsigned samplers, unit;
samplers = nvfx->dirty_samplers;
@@ -197,9 +198,8 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx)
else
nv40_fragtex_set(nvfx, unit);
} else {
- WAIT_RING(chan, 2);
/* this is OK for nv40 too */
- OUT_RING(chan, RING_3D(NV30_3D_TEX_ENABLE(unit), 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_TEX_ENABLE(unit), 1);
OUT_RING(chan, 0);
nvfx->hw_samplers &= ~(1 << unit);
}
diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c
index 7677fde40c..8c043b867b 100644
--- a/src/gallium/drivers/nvfx/nvfx_miptree.c
+++ b/src/gallium/drivers/nvfx/nvfx_miptree.c
@@ -190,25 +190,27 @@ nvfx_miptree_from_handle(struct pipe_screen *pscreen, const struct pipe_resource
}
struct pipe_surface *
-nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,
- unsigned face, unsigned level, unsigned zslice,
- unsigned flags)
+nvfx_miptree_surface_new(struct pipe_context *pipe, struct pipe_resource *pt,
+ const struct pipe_surface *surf_tmpl)
{
- struct nvfx_miptree* mt = (struct nvfx_miptree*)pt;
- struct nvfx_surface *ns;
-
- ns = (struct nvfx_surface*)util_surfaces_get(&mt->surfaces, sizeof(struct nvfx_surface), pscreen, pt, face, level, zslice, flags);
- if(ns->base.base.offset == ~0) {
- util_dirty_surface_init(&ns->base);
- ns->pitch = nvfx_subresource_pitch(pt, level);
- ns->base.base.offset = nvfx_subresource_offset(pt, face, level, zslice);
+ struct nvfx_miptree *mt = (struct nvfx_miptree *)pt;
+ unsigned level = surf_tmpl->u.tex.level;
+ struct nvfx_surface *ns = NULL;
+
+ assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
+ if(util_surfaces_get(&mt->surfaces, sizeof(struct nvfx_surface), pipe,
+ pt, level, surf_tmpl->u.tex.first_layer,
+ surf_tmpl->usage, (struct pipe_surface **)&ns)) {
+ util_dirty_surface_init(&ns->base);
+ ns->pitch = nvfx_subresource_pitch(pt, level);
+ ns->offset = nvfx_subresource_offset(pt, surf_tmpl->u.tex.first_layer, level, surf_tmpl->u.tex.first_layer);
}
return &ns->base.base;
}
void
-nvfx_miptree_surface_del(struct pipe_surface *ps)
+nvfx_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps)
{
struct nvfx_surface* ns = (struct nvfx_surface*)ps;
diff --git a/src/gallium/drivers/nvfx/nvfx_push.c b/src/gallium/drivers/nvfx/nvfx_push.c
index ebf47e6ed3..6391741a2e 100644
--- a/src/gallium/drivers/nvfx/nvfx_push.c
+++ b/src/gallium/drivers/nvfx/nvfx_push.c
@@ -10,6 +10,7 @@
struct push_context {
struct nouveau_channel* chan;
+ struct nouveau_grobj *eng3d;
void *idxbuf;
int32_t idxbias;
@@ -27,9 +28,10 @@ static void
emit_edgeflag(void *priv, boolean enabled)
{
struct push_context* ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
struct nouveau_channel *chan = ctx->chan;
- OUT_RING(chan, RING_3D(NV30_3D_EDGEFLAG, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_EDGEFLAG, 1);
OUT_RING(chan, enabled ? 1 : 0);
}
@@ -37,6 +39,7 @@ static void
emit_vertices_lookup8(void *priv, unsigned start, unsigned count)
{
struct push_context *ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
uint8_t* elts = (uint8_t*)ctx->idxbuf + start;
while(count)
@@ -44,7 +47,7 @@ emit_vertices_lookup8(void *priv, unsigned start, unsigned count)
unsigned push = MIN2(count, ctx->max_vertices_per_packet);
unsigned length = push * ctx->vertex_length;
- OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
+ BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur);
ctx->chan->cur += length;
@@ -57,6 +60,7 @@ static void
emit_vertices_lookup16(void *priv, unsigned start, unsigned count)
{
struct push_context *ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
uint16_t* elts = (uint16_t*)ctx->idxbuf + start;
while(count)
@@ -64,7 +68,7 @@ emit_vertices_lookup16(void *priv, unsigned start, unsigned count)
unsigned push = MIN2(count, ctx->max_vertices_per_packet);
unsigned length = push * ctx->vertex_length;
- OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
+ BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur);
ctx->chan->cur += length;
@@ -77,6 +81,7 @@ static void
emit_vertices_lookup32(void *priv, unsigned start, unsigned count)
{
struct push_context *ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
uint32_t* elts = (uint32_t*)ctx->idxbuf + start;
while(count)
@@ -84,7 +89,7 @@ emit_vertices_lookup32(void *priv, unsigned start, unsigned count)
unsigned push = MIN2(count, ctx->max_vertices_per_packet);
unsigned length = push * ctx->vertex_length;
- OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
+ BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur);
ctx->chan->cur += length;
@@ -97,13 +102,14 @@ static void
emit_vertices(void *priv, unsigned start, unsigned count)
{
struct push_context *ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
while(count)
{
unsigned push = MIN2(count, ctx->max_vertices_per_packet);
unsigned length = push * ctx->vertex_length;
- OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
+ BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur);
ctx->chan->cur += length;
@@ -116,10 +122,11 @@ static void
emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg)
{
struct push_context* ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
struct nouveau_channel *chan = ctx->chan;
unsigned nr = (vc & 0xff);
if (nr) {
- OUT_RING(chan, RING_3D(reg, 1));
+ BEGIN_RING(chan, eng3d, reg, 1);
OUT_RING (chan, ((nr - 1) << 24) | start);
start += nr;
}
@@ -130,7 +137,7 @@ emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg)
nr -= push;
- OUT_RING(chan, RING_3D_NI(reg, push));
+ BEGIN_RING_NI(chan, eng3d, reg, push);
while (push--) {
OUT_RING(chan, ((0x100 - 1) << 24) | start);
start += 0x100;
@@ -154,12 +161,13 @@ static INLINE void
emit_elt8(void* priv, unsigned start, unsigned vc)
{
struct push_context* ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
struct nouveau_channel *chan = ctx->chan;
uint8_t *elts = (uint8_t *)ctx->idxbuf + start;
int idxbias = ctx->idxbias;
if (vc & 1) {
- OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1);
OUT_RING (chan, elts[0]);
elts++; vc--;
}
@@ -168,7 +176,7 @@ emit_elt8(void* priv, unsigned start, unsigned vc)
unsigned i;
unsigned push = MIN2(vc, 2047 * 2);
- OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1));
+ BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
@@ -181,12 +189,13 @@ static INLINE void
emit_elt16(void* priv, unsigned start, unsigned vc)
{
struct push_context* ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
struct nouveau_channel *chan = ctx->chan;
uint16_t *elts = (uint16_t *)ctx->idxbuf + start;
int idxbias = ctx->idxbias;
if (vc & 1) {
- OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1);
OUT_RING (chan, elts[0]);
elts++; vc--;
}
@@ -195,7 +204,7 @@ emit_elt16(void* priv, unsigned start, unsigned vc)
unsigned i;
unsigned push = MIN2(vc, 2047 * 2);
- OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1));
+ BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
@@ -208,6 +217,7 @@ static INLINE void
emit_elt32(void* priv, unsigned start, unsigned vc)
{
struct push_context* ctx = priv;
+ struct nouveau_grobj *eng3d = ctx->eng3d;
struct nouveau_channel *chan = ctx->chan;
uint32_t *elts = (uint32_t *)ctx->idxbuf + start;
int idxbias = ctx->idxbias;
@@ -215,8 +225,7 @@ emit_elt32(void* priv, unsigned start, unsigned vc)
while (vc) {
unsigned push = MIN2(vc, 2047);
- OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U32, push));
- assert(AVAIL_RING(chan) >= push);
+ BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U32, push);
if(idxbias)
{
for(unsigned i = 0; i < push; ++i)
@@ -235,6 +244,7 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct push_context ctx;
struct util_split_prim s;
unsigned instances_left = info->instance_count;
@@ -251,6 +261,7 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+ 4; /* potential edgeflag enable/disable */
ctx.chan = nvfx->screen->base.channel;
+ ctx.eng3d = nvfx->screen->eng3d;
ctx.translate = nvfx->vtxelt->translate;
ctx.idxbuf = NULL;
ctx.vertex_length = nvfx->vtxelt->vertex_length;
@@ -333,8 +344,9 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
- WAIT_RING(chan, 5);
- nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp);
+ nvfx_emit_vtx_attr(chan, eng3d,
+ nvfx->vtxelt->per_instance[i].base.idx, v,
+ nvfx->vtxelt->per_instance[i].base.ncomp);
}
/* per-instance loop */
@@ -374,15 +386,18 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
int i;
for(i = 0; i < 32; ++i)
{
- OUT_RING(chan, RING_3D(0x1dac, 1));
+ BEGIN_RING(chan, eng3d,
+ 0x1dac, 1);
OUT_RING(chan, 0);
}
}
- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
+ BEGIN_RING(chan, eng3d,
+ NV30_3D_VERTEX_BEGIN_END, 1);
OUT_RING(chan, hw_mode);
done = util_split_prim_next(&s, max_verts);
- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
+ BEGIN_RING(chan, eng3d,
+ NV30_3D_VERTEX_BEGIN_END, 1);
OUT_RING(chan, 0);
if(done)
@@ -406,8 +421,10 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
per_instance[i].step = 0;
nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
- WAIT_RING(chan, 5);
- nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp);
+ nvfx_emit_vtx_attr(chan, eng3d,
+ nvfx->vtxelt->per_instance[i].base.idx,
+ v,
+ nvfx->vtxelt->per_instance[i].base.ncomp);
}
}
}
diff --git a/src/gallium/drivers/nvfx/nvfx_query.c b/src/gallium/drivers/nvfx/nvfx_query.c
index 3935ffd7f9..3cd6bf1e47 100644
--- a/src/gallium/drivers/nvfx/nvfx_query.c
+++ b/src/gallium/drivers/nvfx/nvfx_query.c
@@ -49,6 +49,7 @@ nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
struct nvfx_query *q = nvfx_query(pq);
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
uint64_t tmp;
assert(!nvfx->query);
@@ -72,10 +73,9 @@ nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
nouveau_notifier_reset(nvfx->screen->query, q->object->start);
- WAIT_RING(chan, 4);
- OUT_RING(chan, RING_3D(NV30_3D_QUERY_RESET, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_RESET, 1);
OUT_RING(chan, 1);
- OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
OUT_RING(chan, 1);
q->ready = FALSE;
@@ -88,15 +88,15 @@ nvfx_query_end(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct nvfx_query *q = nvfx_query(pq);
assert(nvfx->query == pq);
- WAIT_RING(chan, 4);
- OUT_RING(chan, RING_3D(NV30_3D_QUERY_GET, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_GET, 1);
OUT_RING (chan, (0x01 << NV30_3D_QUERY_GET_UNK24__SHIFT) |
((q->object->start * 32) << NV30_3D_QUERY_GET_OFFSET__SHIFT));
- OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
OUT_RING(chan, 0);
FIRE_RING(chan);
diff --git a/src/gallium/drivers/nvfx/nvfx_resource.c b/src/gallium/drivers/nvfx/nvfx_resource.c
index 39ae893f1b..c60a7bb8b9 100644
--- a/src/gallium/drivers/nvfx/nvfx_resource.c
+++ b/src/gallium/drivers/nvfx/nvfx_resource.c
@@ -7,7 +7,7 @@
static unsigned int
nvfx_resource_is_referenced(struct pipe_context *pipe,
struct pipe_resource *pr,
- unsigned face, unsigned level)
+ unsigned level, int layer)
{
return !!nouveau_reference_flags(nvfx_resource(pr)->bo);
}
@@ -59,6 +59,9 @@ void
nvfx_init_resource_functions(struct pipe_context *pipe)
{
pipe->is_resource_referenced = nvfx_resource_is_referenced;
+
+ pipe->create_surface = nvfx_miptree_surface_new;
+ pipe->surface_destroy = nvfx_miptree_surface_del;
}
void
@@ -69,7 +72,4 @@ nvfx_screen_init_resource_functions(struct pipe_screen *pscreen)
pscreen->resource_get_handle = nvfx_resource_get_handle;
pscreen->resource_destroy = nvfx_resource_destroy;
pscreen->user_buffer_create = nvfx_user_buffer_create;
-
- pscreen->get_tex_surface = nvfx_miptree_surface_new;
- pscreen->tex_surface_destroy = nvfx_miptree_surface_del;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_resource.h b/src/gallium/drivers/nvfx/nvfx_resource.h
index 583be4de2a..070f897944 100644
--- a/src/gallium/drivers/nvfx/nvfx_resource.h
+++ b/src/gallium/drivers/nvfx/nvfx_resource.h
@@ -74,6 +74,7 @@ struct nvfx_miptree {
struct nvfx_surface {
struct util_dirty_surface base;
unsigned pitch;
+ unsigned offset;
struct nvfx_miptree* temp;
};
@@ -116,12 +117,11 @@ nvfx_miptree_from_handle(struct pipe_screen *pscreen,
struct winsys_handle *whandle);
void
-nvfx_miptree_surface_del(struct pipe_surface *ps);
+nvfx_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps);
struct pipe_surface *
-nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,
- unsigned face, unsigned level, unsigned zslice,
- unsigned flags);
+nvfx_miptree_surface_new(struct pipe_context *pipe, struct pipe_resource *pt,
+ const struct pipe_surface *surf_tmpl);
/* only for miptrees, don't use for buffers */
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c
index 92e1d33090..aa1e9567d3 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.c
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -301,98 +301,100 @@ nvfx_screen_destroy(struct pipe_screen *pscreen)
static void nv30_screen_init(struct nvfx_screen *screen)
{
struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
int i;
/* TODO: perhaps we should do some of this on nv40 too? */
for (i=1; i<8; i++) {
- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_HORIZ(i), 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_HORIZ(i), 1);
OUT_RING(chan, 0);
- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_VERT(i), 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_VERT(i), 1);
OUT_RING(chan, 0);
}
- OUT_RING(chan, RING_3D(0x220, 1));
+ BEGIN_RING(chan, eng3d, 0x220, 1);
OUT_RING(chan, 1);
- OUT_RING(chan, RING_3D(0x03b0, 1));
+ BEGIN_RING(chan, eng3d, 0x03b0, 1);
OUT_RING(chan, 0x00100000);
- OUT_RING(chan, RING_3D(0x1454, 1));
+ BEGIN_RING(chan, eng3d, 0x1454, 1);
OUT_RING(chan, 0);
- OUT_RING(chan, RING_3D(0x1d80, 1));
+ BEGIN_RING(chan, eng3d, 0x1d80, 1);
OUT_RING(chan, 3);
- OUT_RING(chan, RING_3D(0x1450, 1));
+ BEGIN_RING(chan, eng3d, 0x1450, 1);
OUT_RING(chan, 0x00030004);
/* NEW */
- OUT_RING(chan, RING_3D(0x1e98, 1));
+ BEGIN_RING(chan, eng3d, 0x1e98, 1);
OUT_RING(chan, 0);
- OUT_RING(chan, RING_3D(0x17e0, 3));
+ BEGIN_RING(chan, eng3d, 0x17e0, 3);
OUT_RING(chan, fui(0.0));
OUT_RING(chan, fui(0.0));
OUT_RING(chan, fui(1.0));
- OUT_RING(chan, RING_3D(0x1f80, 16));
+ BEGIN_RING(chan, eng3d, 0x1f80, 16);
for (i=0; i<16; i++) {
OUT_RING(chan, (i==8) ? 0x0000ffff : 0);
}
- OUT_RING(chan, RING_3D(0x120, 3));
+ BEGIN_RING(chan, eng3d, 0x120, 3);
OUT_RING(chan, 0);
OUT_RING(chan, 1);
OUT_RING(chan, 2);
- OUT_RING(chan, RING_3D(0x1d88, 1));
+ BEGIN_RING(chan, eng3d, 0x1d88, 1);
OUT_RING(chan, 0x00001200);
- OUT_RING(chan, RING_3D(NV30_3D_RC_ENABLE, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_RC_ENABLE, 1);
OUT_RING(chan, 0);
- OUT_RING(chan, RING_3D(NV30_3D_DEPTH_RANGE_NEAR, 2));
+ BEGIN_RING(chan, eng3d, NV30_3D_DEPTH_RANGE_NEAR, 2);
OUT_RING(chan, fui(0.0));
OUT_RING(chan, fui(1.0));
- OUT_RING(chan, RING_3D(NV30_3D_MULTISAMPLE_CONTROL, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_MULTISAMPLE_CONTROL, 1);
OUT_RING(chan, 0xffff0000);
/* enables use of vp rather than fixed-function somehow */
- OUT_RING(chan, RING_3D(0x1e94, 1));
+ BEGIN_RING(chan, eng3d, 0x1e94, 1);
OUT_RING(chan, 0x13);
}
static void nv40_screen_init(struct nvfx_screen *screen)
{
struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
- OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR2, 2));
+ BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR2, 2);
OUT_RING(chan, screen->base.channel->vram->handle);
OUT_RING(chan, screen->base.channel->vram->handle);
- OUT_RING(chan, RING_3D(0x1450, 1));
+ BEGIN_RING(chan, eng3d, 0x1450, 1);
OUT_RING(chan, 0x00000004);
- OUT_RING(chan, RING_3D(0x1ea4, 3));
+ BEGIN_RING(chan, eng3d, 0x1ea4, 3);
OUT_RING(chan, 0x00000010);
OUT_RING(chan, 0x01000100);
OUT_RING(chan, 0xff800006);
/* vtxprog output routing */
- OUT_RING(chan, RING_3D(0x1fc4, 1));
+ BEGIN_RING(chan, eng3d, 0x1fc4, 1);
OUT_RING(chan, 0x06144321);
- OUT_RING(chan, RING_3D(0x1fc8, 2));
+ BEGIN_RING(chan, eng3d, 0x1fc8, 2);
OUT_RING(chan, 0xedcba987);
OUT_RING(chan, 0x0000006f);
- OUT_RING(chan, RING_3D(0x1fd0, 1));
+ BEGIN_RING(chan, eng3d, 0x1fd0, 1);
OUT_RING(chan, 0x00171615);
- OUT_RING(chan, RING_3D(0x1fd4, 1));
+ BEGIN_RING(chan, eng3d, 0x1fd4, 1);
OUT_RING(chan, 0x001b1a19);
- OUT_RING(chan, RING_3D(0x1ef8, 1));
+ BEGIN_RING(chan, eng3d, 0x1ef8, 1);
OUT_RING(chan, 0x0020ffff);
- OUT_RING(chan, RING_3D(0x1d64, 1));
+ BEGIN_RING(chan, eng3d, 0x1d64, 1);
OUT_RING(chan, 0x01d300d4);
- OUT_RING(chan, RING_3D(0x1e94, 1));
+ BEGIN_RING(chan, eng3d, 0x1e94, 1);
OUT_RING(chan, 0x00000001);
- OUT_RING(chan, RING_3D(NV40_3D_MIPMAP_ROUNDING, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_MIPMAP_ROUNDING, 1);
OUT_RING(chan, NV40_3D_MIPMAP_ROUNDING_MODE_DOWN);
}
@@ -571,25 +573,25 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
/* Static eng3d initialisation */
/* note that we just started using the channel, so we must have space in the pushbuffer */
- OUT_RING(chan, RING_3D(NV30_3D_DMA_NOTIFY, 1));
+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_NOTIFY, 1);
OUT_RING(chan, screen->sync->handle);
- OUT_RING(chan, RING_3D(NV30_3D_DMA_TEXTURE0, 2));
+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_TEXTURE0, 2);
OUT_RING(chan, chan->vram->handle);
OUT_RING(chan, chan->gart->handle);
- OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1));
+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_COLOR1, 1);
OUT_RING(chan, chan->vram->handle);
- OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 2));
+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_COLOR0, 2);
OUT_RING(chan, chan->vram->handle);
OUT_RING(chan, chan->vram->handle);
- OUT_RING(chan, RING_3D(NV30_3D_DMA_VTXBUF0, 2));
+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_VTXBUF0, 2);
OUT_RING(chan, chan->vram->handle);
OUT_RING(chan, chan->gart->handle);
- OUT_RING(chan, RING_3D(NV30_3D_DMA_FENCE, 2));
+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_FENCE, 2);
OUT_RING(chan, 0);
OUT_RING(chan, screen->query->handle);
- OUT_RING(chan, RING_3D(NV30_3D_DMA_UNK1AC, 2));
+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_UNK1AC, 2);
OUT_RING(chan, chan->vram->handle);
OUT_RING(chan, chan->vram->handle);
diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c
index 54619037d8..f3dcb205c6 100644
--- a/src/gallium/drivers/nvfx/nvfx_state.c
+++ b/src/gallium/drivers/nvfx/nvfx_state.c
@@ -304,7 +304,7 @@ nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
{
struct nvfx_context *nvfx = nvfx_context(pipe);
- nvfx->constbuf[shader] = buf;
+ pipe_resource_reference(&nvfx->constbuf[shader], buf);
nvfx->constbuf_nr[shader] = buf ? (buf->width0 / (4 * sizeof(float))) : 0;
if (shader == PIPE_SHADER_VERTEX) {
diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c
index 501fdd4430..40ae4f5bd2 100644
--- a/src/gallium/drivers/nvfx/nvfx_state_emit.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c
@@ -7,11 +7,11 @@ void
nvfx_state_viewport_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct pipe_viewport_state *vpt = &nvfx->viewport;
- WAIT_RING(chan, 11);
if(nvfx->render_mode == HW) {
- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8));
+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TRANSLATE_X, 8);
OUT_RINGf(chan, vpt->translate[0]);
OUT_RINGf(chan, vpt->translate[1]);
OUT_RINGf(chan, vpt->translate[2]);
@@ -20,10 +20,10 @@ nvfx_state_viewport_validate(struct nvfx_context *nvfx)
OUT_RINGf(chan, vpt->scale[1]);
OUT_RINGf(chan, vpt->scale[2]);
OUT_RINGf(chan, vpt->scale[3]);
- OUT_RING(chan, RING_3D(0x1d78, 1));
+ BEGIN_RING(chan, eng3d, 0x1d78, 1);
OUT_RING(chan, 1);
} else {
- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8));
+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TRANSLATE_X, 8);
OUT_RINGf(chan, 0.0f);
OUT_RINGf(chan, 0.0f);
OUT_RINGf(chan, 0.0f);
@@ -32,7 +32,7 @@ nvfx_state_viewport_validate(struct nvfx_context *nvfx)
OUT_RINGf(chan, 1.0f);
OUT_RINGf(chan, 1.0f);
OUT_RINGf(chan, 1.0f);
- OUT_RING(chan, RING_3D(0x1d78, 1));
+ BEGIN_RING(chan, eng3d, 0x1d78, 1);
OUT_RING(chan, nvfx->is_nv4x ? 0x110 : 1);
}
}
@@ -41,6 +41,7 @@ void
nvfx_state_scissor_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe;
struct pipe_scissor_state *s = &nvfx->scissor;
@@ -48,8 +49,7 @@ nvfx_state_scissor_validate(struct nvfx_context *nvfx)
return;
nvfx->state.scissor_enabled = rast->scissor;
- WAIT_RING(chan, 3);
- OUT_RING(chan, RING_3D(NV30_3D_SCISSOR_HORIZ, 2));
+ BEGIN_RING(chan, eng3d, NV30_3D_SCISSOR_HORIZ, 2);
if (nvfx->state.scissor_enabled) {
OUT_RING(chan, ((s->maxx - s->minx) << 16) | s->minx);
OUT_RING(chan, ((s->maxy - s->miny) << 16) | s->miny);
@@ -63,12 +63,12 @@ void
nvfx_state_sr_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct pipe_stencil_ref *sr = &nvfx->stencil_ref;
- WAIT_RING(chan, 4);
- OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(0), 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_STENCIL_FUNC_REF(0), 1);
OUT_RING(chan, sr->ref_value[0]);
- OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(1), 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_STENCIL_FUNC_REF(1), 1);
OUT_RING(chan, sr->ref_value[1]);
}
@@ -76,10 +76,10 @@ void
nvfx_state_blend_colour_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
struct pipe_blend_color *bcol = &nvfx->blend_colour;
- WAIT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV30_3D_BLEND_COLOR, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_BLEND_COLOR, 1);
OUT_RING(chan, ((float_to_ubyte(bcol->color[3]) << 24) |
(float_to_ubyte(bcol->color[0]) << 16) |
(float_to_ubyte(bcol->color[1]) << 8) |
@@ -90,9 +90,9 @@ void
nvfx_state_stipple_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
- WAIT_RING(chan, 33);
- OUT_RING(chan, RING_3D(NV30_3D_POLYGON_STIPPLE_PATTERN(0), 32));
+ BEGIN_RING(chan, eng3d, NV30_3D_POLYGON_STIPPLE_PATTERN(0), 32);
OUT_RINGp(chan, nvfx->stipple, 32);
}
@@ -100,12 +100,12 @@ static void
nvfx_coord_conventions_validate(struct nvfx_context* nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
unsigned value = nvfx->hw_fragprog->coord_conventions;
if(value & NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED)
value |= nvfx->framebuffer.height << NV30_3D_COORD_CONVENTIONS_HEIGHT__SHIFT;
- WAIT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV30_3D_COORD_CONVENTIONS, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_COORD_CONVENTIONS, 1);
OUT_RING(chan, value);
}
@@ -113,6 +113,7 @@ static void
nvfx_ucp_validate(struct nvfx_context* nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
unsigned enables[7] =
{
0,
@@ -126,17 +127,15 @@ nvfx_ucp_validate(struct nvfx_context* nvfx)
if(!nvfx->use_vp_clipping)
{
- WAIT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANES_ENABLE, 1);
OUT_RING(chan, 0);
- WAIT_RING(chan, 6 * 4 + 1);
- OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANE(0, 0), nvfx->clip.nr * 4));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANE(0, 0),
+ nvfx->clip.nr * 4);
OUT_RINGp(chan, &nvfx->clip.ucp[0][0], nvfx->clip.nr * 4);
}
- WAIT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANES_ENABLE, 1);
OUT_RING(chan, enables[nvfx->clip.nr]);
}
@@ -144,38 +143,37 @@ static void
nvfx_vertprog_ucp_validate(struct nvfx_context* nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
unsigned i;
struct nvfx_vertex_program* vp = nvfx->hw_vertprog;
if(nvfx->clip.nr != vp->clip_nr)
{
unsigned idx;
- WAIT_RING(chan, 14);
/* remove last instruction bit */
if(vp->clip_nr >= 0)
{
idx = vp->nr_insns - 7 + vp->clip_nr;
- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1);
OUT_RING(chan, vp->exec->start + idx);
- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4);
OUT_RINGp (chan, vp->insns[idx].data, 4);
}
/* set last instruction bit */
idx = vp->nr_insns - 7 + nvfx->clip.nr;
- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1);
OUT_RING(chan, vp->exec->start + idx);
- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4);
OUT_RINGp(chan, vp->insns[idx].data, 3);
OUT_RING(chan, vp->insns[idx].data[3] | 1);
vp->clip_nr = nvfx->clip.nr;
}
// TODO: only do this for the ones changed
- WAIT_RING(chan, 6 * 6);
for(i = 0; i < nvfx->clip.nr; ++i)
{
- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_CONST_ID, 5);
OUT_RING(chan, vp->data->start + i);
OUT_RINGp (chan, nvfx->clip.ucp[i], 4);
}
@@ -185,6 +183,7 @@ static boolean
nvfx_state_validate_common(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
unsigned dirty;
unsigned still_dirty = 0;
int new_fb_mode = -1; /* 1 = all swizzled, 0 = make all linear */
@@ -287,8 +286,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
if(vp_output != nvfx->hw_vp_output)
{
- WAIT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV40_3D_VP_RESULT_EN, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_VP_RESULT_EN, 1);
OUT_RING(chan, vp_output);
nvfx->hw_vp_output = vp_output;
}
@@ -320,8 +318,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
if(dirty & NVFX_NEW_ZSA || (new_fb_mode >= 0))
{
- WAIT_RING(chan, 3);
- OUT_RING(chan, RING_3D(NV30_3D_DEPTH_WRITE_ENABLE, 2));
+ BEGIN_RING(chan, eng3d, NV30_3D_DEPTH_WRITE_ENABLE, 2);
OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.writemask);
OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.enabled);
}
@@ -334,10 +331,9 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
// TODO: what about nv30?
if(nvfx->is_nv4x)
{
- WAIT_RING(chan, 4);
- OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1);
OUT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1);
OUT_RING(chan, 1);
}
}
diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c
index 30e48c8073..f9fed94044 100644
--- a/src/gallium/drivers/nvfx/nvfx_state_fb.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c
@@ -7,7 +7,7 @@ nvfx_surface_linear_renderable(struct pipe_surface* surf)
{
/* TODO: precompute this in nvfx_surface creation */
return (surf->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)
- && !(surf->offset & 63)
+ && !(((struct nvfx_surface*)surf)->offset & 63)
&& !(((struct nvfx_surface*)surf)->pitch & 63);
}
@@ -16,8 +16,8 @@ nvfx_surface_swizzled_renderable(struct pipe_framebuffer_state* fb, struct pipe_
{
/* TODO: precompute this in nvfx_surface creation */
return !((struct nvfx_miptree*)surf->texture)->linear_pitch
- && (surf->texture->target != PIPE_TEXTURE_3D || u_minify(surf->texture->depth0, surf->level) <= 1)
- && !(surf->offset & 127)
+ && (surf->texture->target != PIPE_TEXTURE_3D || u_minify(surf->texture->depth0, surf->u.tex.level) <= 1)
+ && !(((struct nvfx_surface*)surf)->offset & 127)
&& (surf->width == fb->width)
&& (surf->height == fb->height)
&& !((struct nvfx_surface*)surf)->temp
@@ -31,7 +31,7 @@ nvfx_surface_get_render_target(struct pipe_surface* surf, int all_swizzled, stru
if(!ns->temp)
{
target->bo = ((struct nvfx_miptree*)surf->texture)->base.bo;
- target->offset = surf->offset;
+ target->offset = ns->offset;
target->pitch = align(ns->pitch, 64);
assert(target->pitch);
return FALSE;
@@ -54,7 +54,7 @@ nvfx_framebuffer_prepare(struct nvfx_context *nvfx)
int all_swizzled = 1;
if(!nvfx->is_nv4x)
- assert(fb->nr_cbufs <= 2);
+ assert(fb->nr_cbufs <= 1);
else
assert(fb->nr_cbufs <= 4);
@@ -96,6 +96,7 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
{
struct pipe_framebuffer_state *fb = &nvfx->framebuffer;
struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
uint32_t rt_enable, rt_format;
int i;
unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
@@ -113,7 +114,9 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->cbufs[i], prepare_result, &nvfx->hw_rt[i]) << i;
for(; i < 4; ++i)
- nvfx->hw_rt[i].bo = 0;
+ nvfx->hw_rt[i].bo = NULL;
+
+ nvfx->hw_zeta.bo = NULL;
if (fb->zsbuf) {
nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->zsbuf, prepare_result, &nvfx->hw_zeta) << 7;
@@ -202,11 +205,11 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
//printf("rendering to bo %p [%i] at offset %i with pitch %i\n", rt0->bo, rt0->bo->handle, rt0->offset, pitch);
- OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_DMA_COLOR0, 1);
OUT_RELOC(chan, rt0->bo, 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
- OUT_RING(chan, RING_3D(NV30_3D_COLOR0_PITCH, 2));
+ BEGIN_RING(chan, eng3d, NV30_3D_COLOR0_PITCH, 2);
OUT_RING(chan, pitch);
OUT_RELOC(chan, rt0->bo,
rt0->offset, rt_flags | NOUVEAU_BO_LOW,
@@ -214,11 +217,11 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
}
if (rt_enable & NV30_3D_RT_ENABLE_COLOR1) {
- OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_DMA_COLOR1, 1);
OUT_RELOC(chan, nvfx->hw_rt[1].bo, 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
- OUT_RING(chan, RING_3D(NV30_3D_COLOR1_OFFSET, 2));
+ BEGIN_RING(chan, eng3d, NV30_3D_COLOR1_OFFSET, 2);
OUT_RELOC(chan, nvfx->hw_rt[1].bo,
nvfx->hw_rt[1].offset, rt_flags | NOUVEAU_BO_LOW,
0, 0);
@@ -228,68 +231,68 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
if(nvfx->is_nv4x)
{
if (rt_enable & NV40_3D_RT_ENABLE_COLOR2) {
- OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR2, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR2, 1);
OUT_RELOC(chan, nvfx->hw_rt[2].bo, 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
- OUT_RING(chan, RING_3D(NV40_3D_COLOR2_OFFSET, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_COLOR2_OFFSET, 1);
OUT_RELOC(chan, nvfx->hw_rt[2].bo,
nvfx->hw_rt[2].offset, rt_flags | NOUVEAU_BO_LOW,
0, 0);
- OUT_RING(chan, RING_3D(NV40_3D_COLOR2_PITCH, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_COLOR2_PITCH, 1);
OUT_RING(chan, nvfx->hw_rt[2].pitch);
}
if (rt_enable & NV40_3D_RT_ENABLE_COLOR3) {
- OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR3, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR3, 1);
OUT_RELOC(chan, nvfx->hw_rt[3].bo, 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
- OUT_RING(chan, RING_3D(NV40_3D_COLOR3_OFFSET, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_COLOR3_OFFSET, 1);
OUT_RELOC(chan, nvfx->hw_rt[3].bo,
nvfx->hw_rt[3].offset, rt_flags | NOUVEAU_BO_LOW,
0, 0);
- OUT_RING(chan, RING_3D(NV40_3D_COLOR3_PITCH, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_COLOR3_PITCH, 1);
OUT_RING(chan, nvfx->hw_rt[3].pitch);
}
}
if (fb->zsbuf) {
- OUT_RING(chan, RING_3D(NV30_3D_DMA_ZETA, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_DMA_ZETA, 1);
OUT_RELOC(chan, nvfx->hw_zeta.bo, 0,
rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
- OUT_RING(chan, RING_3D(NV30_3D_ZETA_OFFSET, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_ZETA_OFFSET, 1);
/* TODO: reverse engineer LMA */
OUT_RELOC(chan, nvfx->hw_zeta.bo,
nvfx->hw_zeta.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0);
if(nvfx->is_nv4x) {
- OUT_RING(chan, RING_3D(NV40_3D_ZETA_PITCH, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_ZETA_PITCH, 1);
OUT_RING(chan, nvfx->hw_zeta.pitch);
}
}
else if(nvfx->is_nv4x) {
- OUT_RING(chan, RING_3D(NV40_3D_ZETA_PITCH, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_ZETA_PITCH, 1);
OUT_RING(chan, 64);
}
- OUT_RING(chan, RING_3D(NV30_3D_RT_ENABLE, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_RT_ENABLE, 1);
OUT_RING(chan, rt_enable);
- OUT_RING(chan, RING_3D(NV30_3D_RT_HORIZ, 3));
+ BEGIN_RING(chan, eng3d, NV30_3D_RT_HORIZ, 3);
OUT_RING(chan, (w << 16) | 0);
OUT_RING(chan, (h << 16) | 0);
OUT_RING(chan, rt_format);
- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_HORIZ, 2));
+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_HORIZ, 2);
OUT_RING(chan, (w << 16) | 0);
OUT_RING(chan, (h << 16) | 0);
- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_HORIZ(0), 2));
+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_HORIZ(0), 2);
OUT_RING(chan, ((w - 1) << 16) | 0);
OUT_RING(chan, ((h - 1) << 16) | 0);
if(!nvfx->is_nv4x) {
/* Wonder why this is needed, context should all be set to zero on init */
/* TODO: we can most likely remove this, after putting it in context init */
- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TX_ORIGIN, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TX_ORIGIN, 1);
OUT_RING(chan, 0);
}
nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAMEBUFFER;
diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c
index 70adebc1be..be31853d71 100644
--- a/src/gallium/drivers/nvfx/nvfx_surface.c
+++ b/src/gallium/drivers/nvfx/nvfx_surface.c
@@ -99,17 +99,17 @@ nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf,
util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(&surf->base.base), &surf->base);
} else {
rgn->bo = ((struct nvfx_resource*)surf->base.base.texture)->bo;
- rgn->offset = surf->base.base.offset;
+ rgn->offset = surf->offset;
if(surf->base.base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR)
rgn->pitch = surf->pitch;
else
{
rgn->pitch = 0;
- rgn->z = surf->base.base.zslice;
+ rgn->z = surf->base.base.u.tex.first_layer;
rgn->w = surf->base.base.width;
rgn->h = surf->base.base.height;
- rgn->d = u_minify(surf->base.base.texture->depth0, surf->base.base.level);
+ rgn->d = u_minify(surf->base.base.texture->depth0, surf->base.base.u.tex.level);
}
}
@@ -119,11 +119,11 @@ nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf,
}
static INLINE void
-nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, struct pipe_subresource sub, unsigned x, unsigned y, unsigned z, bool for_write)
+nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, unsigned level, unsigned x, unsigned y, unsigned z, bool for_write)
{
if(pt->target != PIPE_BUFFER)
{
- struct nvfx_surface* ns = (struct nvfx_surface*)util_surfaces_peek(&((struct nvfx_miptree*)pt)->surfaces, pt, sub.face, sub.level, z);
+ struct nvfx_surface* ns = (struct nvfx_surface*)util_surfaces_peek(&((struct nvfx_miptree*)pt)->surfaces, pt, level, z);
if(ns && util_dirty_surface_is_dirty(&ns->base))
{
nvfx_region_init_for_surface(rgn, ns, x, y, for_write);
@@ -132,22 +132,22 @@ nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource*
}
rgn->bo = ((struct nvfx_resource*)pt)->bo;
- rgn->offset = nvfx_subresource_offset(pt, sub.face, sub.level, z);
+ rgn->offset = nvfx_subresource_offset(pt, z, level, z);
rgn->x = x;
rgn->y = y;
if(pt->flags & NVFX_RESOURCE_FLAG_LINEAR)
{
- rgn->pitch = nvfx_subresource_pitch(pt, sub.level);
+ rgn->pitch = nvfx_subresource_pitch(pt, level);
rgn->z = 0;
}
else
{
rgn->pitch = 0;
rgn->z = z;
- rgn->w = u_minify(pt->width0, sub.level);
- rgn->h = u_minify(pt->height0, sub.level);
- rgn->d = u_minify(pt->depth0, sub.level);
+ rgn->w = u_minify(pt->width0, level);
+ rgn->h = u_minify(pt->height0, level);
+ rgn->d = u_minify(pt->depth0, level);
}
nvfx_region_set_format(rgn, pt->format);
@@ -168,8 +168,8 @@ nvfx_get_blitter(struct pipe_context* pipe, int copy)
if(nvfx->query && !nvfx->blitters_in_use)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
- WAIT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
OUT_RING(chan, 0);
}
@@ -209,8 +209,8 @@ nvfx_put_blitter(struct pipe_context* pipe, struct blitter_context* blitter)
if(nvfx->query && !nvfx->blitters_in_use)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
- WAIT_RING(chan, 2);
- OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
OUT_RING(chan, 1);
}
}
@@ -234,11 +234,10 @@ nvfx_region_clone(struct nv04_2d_context* ctx, struct nv04_region* rgn, unsigned
static void
nvfx_resource_copy_region(struct pipe_context *pipe,
- struct pipe_resource *dstr, struct pipe_subresource subdst,
- unsigned dstx, unsigned dsty, unsigned dstz,
- struct pipe_resource *srcr, struct pipe_subresource subsrc,
- unsigned srcx, unsigned srcy, unsigned srcz,
- unsigned w, unsigned h)
+ struct pipe_resource *dstr, unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *srcr, unsigned src_level,
+ const struct pipe_box *src_box)
{
static int copy_threshold = -1;
struct nv04_2d_context *ctx = nvfx_screen(pipe->screen)->eng2d;
@@ -247,6 +246,8 @@ nvfx_resource_copy_region(struct pipe_context *pipe,
int src_on_gpu;
boolean small;
int ret;
+ unsigned w = src_box->width;
+ unsigned h = src_box->height;
if(!w || !h)
return;
@@ -257,8 +258,8 @@ nvfx_resource_copy_region(struct pipe_context *pipe,
dst_to_gpu = dstr->usage != PIPE_USAGE_DYNAMIC && dstr->usage != PIPE_USAGE_STAGING;
src_on_gpu = nvfx_resource_on_gpu(srcr);
- nvfx_region_init_for_subresource(&dst, dstr, subdst, dstx, dsty, dstz, TRUE);
- nvfx_region_init_for_subresource(&src, srcr, subsrc, srcx, srcy, srcz, FALSE);
+ nvfx_region_init_for_subresource(&dst, dstr, dst_level, dstx, dsty, dstz, TRUE);
+ nvfx_region_init_for_subresource(&src, srcr, src_level, src_box->x, src_box->y, src_box->z, FALSE);
w = util_format_get_stride(dstr->format, w) >> dst.bpps;
h = util_format_get_nblocksy(dstr->format, h);
@@ -279,7 +280,7 @@ nvfx_resource_copy_region(struct pipe_context *pipe,
* TODO: perhaps support reinterpreting the formats
*/
struct blitter_context* blitter = nvfx_get_blitter(pipe, 1);
- util_blitter_copy_region(blitter, dstr, subdst, dstx, dsty, dstz, srcr, subsrc, srcx, srcy, srcz, w, h, TRUE);
+ util_blitter_copy_region(blitter, dstr, dst_level, dstx, dsty, dstz, srcr, src_level, src_box, TRUE);
nvfx_put_blitter(pipe, blitter);
}
else
@@ -371,7 +372,7 @@ static void
nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int to_temp)
{
struct nvfx_surface* ns = (struct nvfx_surface*)surf;
- struct pipe_subresource tempsr, surfsr;
+ struct pipe_box box;
struct nvfx_context* nvfx = nvfx_context(pipe);
struct nvfx_miptree* temp;
unsigned use_vertex_buffers;
@@ -387,15 +388,20 @@ nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int
use_index_buffer = nvfx->use_index_buffer;
base_vertex = nvfx->base_vertex;
- tempsr.face = 0;
- tempsr.level = 0;
- surfsr.face = surf->face;
- surfsr.level = surf->level;
+ box.x = box.y = 0;
+ assert(surf->u.tex.first_layer == surf->u.tex.last_layer);
+ box.width = surf->width;
+ box.height = surf->height;
+ box.depth = 1;
- if(to_temp)
- nvfx_resource_copy_region(pipe, &temp->base.base, tempsr, 0, 0, 0, surf->texture, surfsr, 0, 0, surf->zslice, surf->width, surf->height);
- else
- nvfx_resource_copy_region(pipe, surf->texture, surfsr, 0, 0, surf->zslice, &temp->base.base, tempsr, 0, 0, 0, surf->width, surf->height);
+ if(to_temp) {
+ box.z = surf->u.tex.first_layer;
+ nvfx_resource_copy_region(pipe, &temp->base.base, 0, 0, 0, 0, surf->texture, surf->u.tex.level, &box);
+ }
+ else {
+ box.z = 0;
+ nvfx_resource_copy_region(pipe, surf->texture, surf->u.tex.level, 0, 0, surf->u.tex.first_layer, &temp->base.base, 0, &box);
+ }
/* If this triggers, it probably means we attempted to use the blitter
* but failed due to non-renderability of the target.
diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c
index 7cb47a20f6..2debcb6eb8 100644
--- a/src/gallium/drivers/nvfx/nvfx_transfer.c
+++ b/src/gallium/drivers/nvfx/nvfx_transfer.c
@@ -21,10 +21,10 @@ struct nvfx_staging_transfer
struct pipe_transfer *
nvfx_transfer_new(struct pipe_context *pipe,
- struct pipe_resource *pt,
- struct pipe_subresource sr,
- unsigned usage,
- const struct pipe_box *box)
+ struct pipe_resource *pt,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
{
if((usage & (PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_DONTBLOCK)) == PIPE_TRANSFER_DONTBLOCK)
{
@@ -44,11 +44,11 @@ nvfx_transfer_new(struct pipe_context *pipe,
return NULL;
pipe_resource_reference(&tx->resource, pt);
- tx->sr = sr;
+ tx->level = level;
tx->usage = usage;
tx->box = *box;
- tx->slice_stride = tx->stride = util_format_get_stride(pt->format, box->width);
+ tx->layer_stride = tx->stride = util_format_get_stride(pt->format, box->width);
tx->data = buffer->data + util_format_get_stride(pt->format, box->x);
return tx;
@@ -62,20 +62,20 @@ nvfx_transfer_new(struct pipe_context *pipe,
if(!tx)
return NULL;
- util_staging_transfer_init(pipe, pt, sr, usage, box, direct, &tx->base);
+ util_staging_transfer_init(pipe, pt, level, usage, box, direct, &tx->base);
if(direct)
{
- tx->base.base.stride = nvfx_subresource_pitch(pt, sr.level);
- tx->base.base.slice_stride = tx->base.base.stride * u_minify(pt->height0, sr.level);
- tx->offset = nvfx_subresource_offset(pt, sr.face, sr.level, box->z)
+ tx->base.base.stride = nvfx_subresource_pitch(pt, level);
+ tx->base.base.layer_stride = tx->base.base.stride * u_minify(pt->height0, level);
+ tx->offset = nvfx_subresource_offset(pt, box->z, level, box->z)
+ util_format_get_2d_size(pt->format, tx->base.base.stride, box->y)
+ util_format_get_stride(pt->format, box->x);
}
else
{
tx->base.base.stride = nvfx_subresource_pitch(tx->base.staging_resource, 0);
- tx->base.base.slice_stride = tx->base.base.stride * tx->base.staging_resource->height0;
+ tx->base.base.layer_stride = tx->base.base.stride * tx->base.staging_resource->height0;
tx->offset = 0;
}
@@ -187,7 +187,7 @@ nvfx_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *ptx)
static void nvfx_transfer_inline_write( struct pipe_context *pipe,
struct pipe_resource *pr,
- struct pipe_subresource sr,
+ unsigned level,
unsigned usage,
const struct pipe_box *box,
const void *data,
@@ -196,7 +196,7 @@ static void nvfx_transfer_inline_write( struct pipe_context *pipe,
{
if(pr->target != PIPE_BUFFER)
{
- u_default_transfer_inline_write(pipe, pr, sr, usage, box, data, stride, slice_stride);
+ u_default_transfer_inline_write(pipe, pr, level, usage, box, data, stride, slice_stride);
}
else
{
diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.h b/src/gallium/drivers/nvfx/nvfx_transfer.h
index 20f20d5b0b..682f428b79 100644
--- a/src/gallium/drivers/nvfx/nvfx_transfer.h
+++ b/src/gallium/drivers/nvfx/nvfx_transfer.h
@@ -9,7 +9,7 @@
struct pipe_transfer *
nvfx_transfer_new(struct pipe_context *pcontext,
struct pipe_resource *pt,
- struct pipe_subresource sr,
+ unsigned level,
unsigned usage,
const struct pipe_box *box);
diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c
index 597664e771..01dacb43da 100644
--- a/src/gallium/drivers/nvfx/nvfx_vbo.c
+++ b/src/gallium/drivers/nvfx/nvfx_vbo.c
@@ -9,8 +9,7 @@
#include "nvfx_resource.h"
#include "nouveau/nouveau_channel.h"
-
-#include "nouveau/nouveau_pushbuf.h"
+#include "nouveau/nv04_pushbuf.h"
static inline unsigned
util_guess_unique_indices_count(unsigned mode, unsigned indices)
@@ -247,6 +246,7 @@ boolean
nvfx_vbo_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
int i;
int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
@@ -262,11 +262,11 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
float v[4];
ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0);
- nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp);
+ nvfx_emit_vtx_attr(chan, eng3d, ve->idx, v, ve->ncomp);
}
- OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
+ BEGIN_RING(chan, eng3d, NV30_3D_VTXFMT(0), elements);
if(nvfx->use_vertex_buffers)
{
unsigned idx = 0;
@@ -297,12 +297,12 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
unsigned i;
/* seems to be some kind of cache flushing */
for(i = 0; i < 3; ++i) {
- OUT_RING(chan, RING_3D(0x1718, 1));
+ BEGIN_RING(chan, eng3d, 0x1718, 1);
OUT_RING(chan, 0);
}
}
- OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
+ BEGIN_RING(chan, eng3d, NV30_3D_VTXBUF(0), elements);
if(nvfx->use_vertex_buffers)
{
unsigned idx = 0;
@@ -330,7 +330,7 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
OUT_RING(chan, 0);
}
- OUT_RING(chan, RING_3D(0x1710, 1));
+ BEGIN_RING(chan, eng3d, 0x1710, 1);
OUT_RING(chan, 0);
nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
@@ -342,15 +342,14 @@ void
nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
unsigned num_outputs = nvfx->vertprog->draw_elements;
int elements = MAX2(num_outputs, nvfx->hw_vtxelt_nr);
if (!elements)
return;
- WAIT_RING(chan, (1 + 6 + 1 + 2) + elements * 2);
-
- OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
+ BEGIN_RING(chan, eng3d, NV30_3D_VTXFMT(0), elements);
for(unsigned i = 0; i < num_outputs; ++i)
OUT_RING(chan, (4 << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT);
for(unsigned i = num_outputs; i < elements; ++i)
@@ -360,16 +359,16 @@ nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx)
unsigned i;
/* seems to be some kind of cache flushing */
for(i = 0; i < 3; ++i) {
- OUT_RING(chan, RING_3D(0x1718, 1));
+ BEGIN_RING(chan, eng3d, 0x1718, 1);
OUT_RING(chan, 0);
}
}
- OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
+ BEGIN_RING(chan, eng3d, NV30_3D_VTXBUF(0), elements);
for (unsigned i = 0; i < elements; i++)
OUT_RING(chan, 0);
- OUT_RING(chan, RING_3D(0x1710, 1));
+ BEGIN_RING(chan, eng3d, 0x1710, 1);
OUT_RING(chan, 0);
nvfx->hw_vtxelt_nr = num_outputs;
@@ -592,18 +591,10 @@ nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
{
struct nvfx_context *nvfx = nvfx_context(pipe);
- for(unsigned i = 0; i < count; ++i)
- {
- pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
- nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
- nvfx->vtxbuf[i].max_index = vb[i].max_index;
- nvfx->vtxbuf[i].stride = vb[i].stride;
- }
-
- for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
- pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
+ util_copy_vertex_buffers(nvfx->vtxbuf,
+ &nvfx->vtxbuf_nr,
+ vb, count);
- nvfx->vtxbuf_nr = count;
nvfx->use_vertex_buffers = -1;
nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c
index e543fda50e..a11941f3d5 100644
--- a/src/gallium/drivers/nvfx/nvfx_vertprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c
@@ -1182,6 +1182,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
{
struct nvfx_screen *screen = nvfx->screen;
struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
struct nvfx_pipe_vertex_program *pvp = nvfx->vertprog;
struct nvfx_vertex_program* vp;
struct pipe_resource *constbuf;
@@ -1341,7 +1342,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
}
*/
- WAIT_RING(chan, 6 * vp->nr_consts);
for (i = nvfx->use_vp_clipping ? 6 : 0; i < vp->nr_consts; i++) {
struct nvfx_vertex_program_data *vpd = &vp->consts[i];
@@ -1356,7 +1356,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
//printf("upload into %i + %i: %f %f %f %f\n", vp->data->start, i, vpd->value[0], vpd->value[1], vpd->value[2], vpd->value[3]);
- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_CONST_ID, 5);
OUT_RING(chan, i + vp->data->start);
OUT_RINGp(chan, (uint32_t *)vpd->value, 4);
}
@@ -1364,11 +1364,10 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
/* Upload vtxprog */
if (upload_code) {
- WAIT_RING(chan, 2 + 5 * vp->nr_insns);
- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1);
OUT_RING(chan, vp->exec->start);
for (i = 0; i < vp->nr_insns; i++) {
- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4);
//printf("%08x %08x %08x %08x\n", vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]);
OUT_RINGp(chan, vp->insns[i].data, 4);
}
@@ -1377,11 +1376,10 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
if(nvfx->dirty & (NVFX_NEW_VERTPROG))
{
- WAIT_RING(chan, 6);
- OUT_RING(chan, RING_3D(NV30_3D_VP_START_FROM_ID, 1));
+ BEGIN_RING(chan, eng3d, NV30_3D_VP_START_FROM_ID, 1);
OUT_RING(chan, vp->exec->start);
if(nvfx->is_nv4x) {
- OUT_RING(chan, RING_3D(NV40_3D_VP_ATTRIB_EN, 1));
+ BEGIN_RING(chan, eng3d, NV40_3D_VP_ATTRIB_EN, 1);
OUT_RING(chan, vp->ir);
}
}
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index 0ac4e4c6f1..a43e83c0d3 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -179,6 +179,12 @@ static void r300_clear(struct pipe_context* pipe,
boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
uint32_t hyperz_dcv = hyperz->zb_depthclearvalue;
+ /* Decompress zbuffers that are bound as textures. If we didn't flush here,
+ * it would happen inside the blitter when updating derived state,
+ * causing a blitter operation to be called from inside the blitter,
+ * which would overwrite saved states and they would never get restored. */
+ r300_flush_depth_textures(r300);
+
/* Enable fast Z clear.
* The zbuffer must be in micro-tiled mode, otherwise it locks up. */
if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) {
@@ -186,12 +192,12 @@ static void r300_clear(struct pipe_context* pipe,
r300_depth_clear_value(fb->zsbuf->format, depth, stencil);
r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG);
- if (zstex->zmask_mem[fb->zsbuf->level]) {
- r300->zmask_clear.dirty = TRUE;
+ if (zstex->zmask_mem[fb->zsbuf->u.tex.level]) {
+ r300_mark_atom_dirty(r300, &r300->zmask_clear);
buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
}
- if (zstex->hiz_mem[fb->zsbuf->level])
- r300->hiz_clear.dirty = TRUE;
+ if (zstex->hiz_mem[fb->zsbuf->u.tex.level])
+ r300_mark_atom_dirty(r300, &r300->hiz_clear);
}
/* Enable CBZB clear. */
@@ -230,7 +236,7 @@ static void r300_clear(struct pipe_context* pipe,
r300_get_num_cs_end_dwords(r300);
/* Reserve CS space. */
- if (dwords > (r300->cs->ndw - r300->cs->cdw)) {
+ if (dwords > (R300_MAX_CMDBUF_DWORDS - r300->cs->cdw)) {
r300->context.flush(&r300->context, 0, NULL);
}
@@ -259,9 +265,9 @@ static void r300_clear(struct pipe_context* pipe,
* If we cleared zmask/hiz, it's in use now. The Hyper-Z state update
* looks if zmask/hiz is in use and enables fastfill accordingly. */
if (zstex &&
- (zstex->zmask_in_use[fb->zsbuf->level] ||
- zstex->hiz_in_use[fb->zsbuf->level])) {
- r300->hyperz_state.dirty = TRUE;
+ (zstex->zmask_in_use[fb->zsbuf->u.tex.level] ||
+ zstex->hiz_in_use[fb->zsbuf->u.tex.level])) {
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
}
@@ -274,6 +280,12 @@ static void r300_clear_render_target(struct pipe_context *pipe,
{
struct r300_context *r300 = r300_context(pipe);
+ /* Decompress zbuffers that are bound as textures. If we didn't flush here,
+ * it would happen inside the blitter when updating derived state,
+ * causing a blitter operation to be called from inside the blitter,
+ * which would overwrite saved states and they would never get restored. */
+ r300_flush_depth_textures(r300);
+
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
util_blitter_clear_render_target(r300->blitter, dst, rgba,
dstx, dsty, width, height);
@@ -291,6 +303,12 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe,
{
struct r300_context *r300 = r300_context(pipe);
+ /* Decompress zbuffers that are bound as textures. If we didn't flush here,
+ * it would happen inside the blitter when updating derived state,
+ * causing a blitter operation to be called from inside the blitter,
+ * which would overwrite saved states and they would never get restored. */
+ r300_flush_depth_textures(r300);
+
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil,
dstx, dsty, width, height);
@@ -298,64 +316,103 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe,
}
/* Flush a depth stencil buffer. */
-void r300_flush_depth_stencil(struct pipe_context *pipe,
- struct pipe_resource *dst,
- struct pipe_subresource subdst,
- unsigned zslice)
+static void r300_flush_depth_stencil(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ unsigned level,
+ unsigned layer)
{
struct r300_context *r300 = r300_context(pipe);
- struct pipe_surface *dstsurf;
+ struct pipe_surface *dstsurf, surf_tmpl;
struct r300_texture *tex = r300_texture(dst);
- if (!tex->zmask_mem[subdst.level])
+ if (!tex->zmask_mem[level])
return;
- if (!tex->zmask_in_use[subdst.level])
+ if (!tex->zmask_in_use[level])
return;
- dstsurf = pipe->screen->get_tex_surface(pipe->screen, dst,
- subdst.face, subdst.level, zslice,
- PIPE_BIND_DEPTH_STENCIL);
+ surf_tmpl.format = dst->format;
+ surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL;
+ surf_tmpl.u.tex.level = level;
+ surf_tmpl.u.tex.first_layer = layer;
+ surf_tmpl.u.tex.last_layer = layer;
+ dstsurf = pipe->create_surface(pipe, dst, &surf_tmpl);
+
r300->z_decomp_rd = TRUE;
+
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
util_blitter_flush_depth_stencil(r300->blitter, dstsurf);
r300_blitter_end(r300);
r300->z_decomp_rd = FALSE;
- tex->zmask_in_use[subdst.level] = FALSE;
+ tex->zmask_in_use[level] = FALSE;
+ pipe_surface_reference(&dstsurf, NULL);
+}
+
+/* We can't use compressed zbuffers as samplers. */
+void r300_flush_depth_textures(struct r300_context *r300)
+{
+ struct r300_textures_state *state =
+ (struct r300_textures_state*)r300->textures_state.state;
+ unsigned i, level;
+ unsigned count = MIN2(state->sampler_view_count,
+ state->sampler_state_count);
+
+ if (r300->z_decomp_rd)
+ return;
+
+ for (i = 0; i < count; i++)
+ if (state->sampler_views[i] && state->sampler_states[i]) {
+ struct pipe_resource *tex = state->sampler_views[i]->base.texture;
+
+ if (tex->target == PIPE_TEXTURE_3D ||
+ tex->target == PIPE_TEXTURE_CUBE)
+ continue;
+
+ /* Ignore non-depth textures.
+ * Also ignore reinterpreted depth textures, e.g. resource_copy. */
+ if (!util_format_is_depth_or_stencil(tex->format))
+ continue;
+
+ for (level = 0; level <= tex->last_level; level++)
+ if (r300_texture(tex)->zmask_in_use[level]) {
+ /* We don't handle 3D textures and cubemaps yet. */
+ r300_flush_depth_stencil(&r300->context, tex, level, 0);
+ }
+ }
}
/* Copy a block of pixels from one surface to another using HW. */
static void r300_hw_copy_region(struct pipe_context* pipe,
struct pipe_resource *dst,
- struct pipe_subresource subdst,
+ unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct pipe_resource *src,
- struct pipe_subresource subsrc,
- unsigned srcx, unsigned srcy, unsigned srcz,
- unsigned width, unsigned height)
+ unsigned src_level,
+ const struct pipe_box *src_box)
{
struct r300_context* r300 = r300_context(pipe);
r300_blitter_begin(r300, R300_COPY);
- util_blitter_copy_region(r300->blitter, dst, subdst, dstx, dsty, dstz,
- src, subsrc, srcx, srcy, srcz, width, height,
- TRUE);
+
+ /* Do a copy */
+ util_blitter_copy_region(r300->blitter, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box, TRUE);
r300_blitter_end(r300);
}
/* Copy a block of pixels from one surface to another. */
static void r300_resource_copy_region(struct pipe_context *pipe,
struct pipe_resource *dst,
- struct pipe_subresource subdst,
+ unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct pipe_resource *src,
- struct pipe_subresource subsrc,
- unsigned srcx, unsigned srcy, unsigned srcz,
- unsigned width, unsigned height)
+ unsigned src_level,
+ const struct pipe_box *src_box)
{
enum pipe_format old_format = dst->format;
enum pipe_format new_format = old_format;
boolean is_depth;
+
if (!pipe->screen->is_format_supported(pipe->screen,
old_format, src->target,
src->nr_samples,
@@ -384,8 +441,9 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
if (is_depth) {
- r300_flush_depth_stencil(pipe, src, subsrc, srcz);
+ r300_flush_depth_stencil(pipe, src, src_level, src_box->z);
}
+
if (old_format != new_format) {
r300_texture_reinterpret_format(pipe->screen,
dst, new_format);
@@ -393,8 +451,8 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
src, new_format);
}
- r300_hw_copy_region(pipe, dst, subdst, dstx, dsty, dstz,
- src, subsrc, srcx, srcy, srcz, width, height);
+ r300_hw_copy_region(pipe, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box);
if (old_format != new_format) {
r300_texture_reinterpret_format(pipe->screen,
diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index 48c2409211..2b183f62c5 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -366,7 +366,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_RV530;
caps->num_vert_fpus = 5;
caps->is_r500 = TRUE;
- /*caps->hiz_ram = RV530_HIZ_LIMIT;*/
+ caps->hiz_ram = RV530_HIZ_LIMIT;
caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
@@ -424,4 +424,5 @@ void r300_parse_chipset(struct r300_capabilities* caps)
}
caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350;
+ caps->dxtc_swizzle = caps->is_r400 || caps->is_r500;
}
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index e7ca642b4f..f2035d2009 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -79,6 +79,10 @@ struct r300_capabilities {
boolean is_r500;
/* Whether or not the second pixel pipe is accessed with the high bit */
boolean high_second_pipe;
+ /* DXTC texture swizzling. */
+ boolean dxtc_swizzle;
+ /* Index bias (AKA index offset). */
+ boolean index_bias_supported;
};
/* Enumerations for legibility and telling which card we're running on. */
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index e8c09b214a..91263ad7bc 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -35,7 +35,9 @@
#include "r300_screen_buffer.h"
#include "r300_winsys.h"
-#include <inttypes.h>
+#ifdef HAVE_LLVM
+#include "gallivm/lp_bld_init.h"
+#endif
static void r300_update_num_contexts(struct r300_screen *r300screen,
int diff)
@@ -44,14 +46,14 @@ static void r300_update_num_contexts(struct r300_screen *r300screen,
p_atomic_inc(&r300screen->num_contexts);
if (r300screen->num_contexts > 1)
- util_mempool_set_thread_safety(&r300screen->pool_buffers,
- UTIL_MEMPOOL_MULTITHREADED);
+ util_slab_set_thread_safety(&r300screen->pool_buffers,
+ UTIL_SLAB_MULTITHREADED);
} else {
p_atomic_dec(&r300screen->num_contexts);
if (r300screen->num_contexts <= 1)
- util_mempool_set_thread_safety(&r300screen->pool_buffers,
- UTIL_MEMPOOL_SINGLETHREADED);
+ util_slab_set_thread_safety(&r300screen->pool_buffers,
+ UTIL_SLAB_SINGLETHREADED);
}
}
@@ -88,6 +90,7 @@ static void r300_release_referenced_objects(struct r300_context *r300)
/* Vertex buffers. */
for (i = 0; i < r300->vertex_buffer_count; i++) {
pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL);
+ pipe_resource_reference(&r300->valid_vertex_buffer[i], NULL);
}
/* If there are any queries pending or not destroyed, remove them now. */
@@ -100,21 +103,15 @@ static void r300_release_referenced_objects(struct r300_context *r300)
static void r300_destroy_context(struct pipe_context* context)
{
struct r300_context* r300 = r300_context(context);
- struct r300_atom *atom;
if (r300->blitter)
util_blitter_destroy(r300->blitter);
- if (r300->draw)
+ if (r300->draw) {
draw_destroy(r300->draw);
- /* Print stats, if enabled. */
- if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) {
- fprintf(stderr, "r300: Stats for context %p:\n", r300);
- fprintf(stderr, " : Flushes: %" PRIu64 "\n", r300->flush_counter);
- foreach(atom, &r300->atom_list) {
- fprintf(stderr, " : %s: %" PRIu64 " emits\n",
- atom->name, atom->counter);
- }
+#ifdef HAVE_LLVM
+ gallivm_destroy(r300->gallivm);
+#endif
}
if (r300->upload_vb)
@@ -135,7 +132,7 @@ static void r300_destroy_context(struct pipe_context* context)
r300->rws->cs_destroy(r300->cs);
/* XXX: No way to tell if this was initialized or not? */
- util_mempool_destroy(&r300->pool_transfers);
+ util_slab_destroy(&r300->pool_transfers);
r300_update_num_contexts(r300->screen, -1);
@@ -177,10 +174,16 @@ void r300_flush_cb(void *data)
r300->atomname.size = atomsize; \
r300->atomname.emit = r300_emit_##atomname; \
r300->atomname.dirty = FALSE; \
- insert_at_tail(&r300->atom_list, &r300->atomname); \
} while (0)
-static void r300_setup_atoms(struct r300_context* r300)
+#define R300_ALLOC_ATOM(atomname, statetype) \
+do { \
+ r300->atomname.state = CALLOC_STRUCT(statetype); \
+ if (r300->atomname.state == NULL) \
+ return FALSE; \
+} while (0)
+
+static boolean r300_setup_atoms(struct r300_context* r300)
{
boolean is_rv350 = r300->screen->caps.is_rv350;
boolean is_r500 = r300->screen->caps.is_r500;
@@ -192,9 +195,6 @@ static void r300_setup_atoms(struct r300_context* r300)
/* Create the actual atom list.
*
- * Each atom is examined and emitted in the order it appears here, which
- * can affect performance and conformance if not handled with care.
- *
* Some atoms never change size, others change every emit - those have
* the size of 0 here.
*
@@ -206,7 +206,6 @@ static void r300_setup_atoms(struct r300_context* r300)
* - fb_state_pipelined (pipelined regs)
* The motivation behind this is to be able to emit a strict
* subset of the regs, and to have reasonable register ordering. */
- make_empty_list(&r300->atom_list);
/* SC, GB (unpipelined), RB3D (unpipelined), ZB (unpipelined). */
R300_INIT_ATOM(gpu_flush, 9);
R300_INIT_ATOM(aa_state, 4);
@@ -261,23 +260,23 @@ static void r300_setup_atoms(struct r300_context* r300)
}
/* Some non-CSO atoms need explicit space to store the state locally. */
- r300->aa_state.state = CALLOC_STRUCT(r300_aa_state);
- r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state);
- r300->clip_state.state = CALLOC_STRUCT(r300_clip_state);
- r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state);
- r300->gpu_flush.state = CALLOC_STRUCT(pipe_framebuffer_state);
- r300->hyperz_state.state = CALLOC_STRUCT(r300_hyperz_state);
- r300->invariant_state.state = CALLOC_STRUCT(r300_invariant_state);
- r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block);
- r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state);
- r300->textures_state.state = CALLOC_STRUCT(r300_textures_state);
- r300->vap_invariant_state.state = CALLOC_STRUCT(r300_vap_invariant_state);
- r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state);
- r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state);
- r300->fs_constants.state = CALLOC_STRUCT(r300_constant_buffer);
- r300->vs_constants.state = CALLOC_STRUCT(r300_constant_buffer);
+ R300_ALLOC_ATOM(aa_state, r300_aa_state);
+ R300_ALLOC_ATOM(blend_color_state, r300_blend_color_state);
+ R300_ALLOC_ATOM(clip_state, r300_clip_state);
+ R300_ALLOC_ATOM(hyperz_state, r300_hyperz_state);
+ R300_ALLOC_ATOM(invariant_state, r300_invariant_state);
+ R300_ALLOC_ATOM(textures_state, r300_textures_state);
+ R300_ALLOC_ATOM(vap_invariant_state, r300_vap_invariant_state);
+ R300_ALLOC_ATOM(viewport_state, r300_viewport_state);
+ R300_ALLOC_ATOM(ztop_state, r300_ztop_state);
+ R300_ALLOC_ATOM(fb_state, pipe_framebuffer_state);
+ R300_ALLOC_ATOM(gpu_flush, pipe_framebuffer_state);
+ R300_ALLOC_ATOM(scissor_state, pipe_scissor_state);
+ R300_ALLOC_ATOM(rs_block_state, r300_rs_block);
+ R300_ALLOC_ATOM(fs_constants, r300_constant_buffer);
+ R300_ALLOC_ATOM(vs_constants, r300_constant_buffer);
if (!r300->screen->caps.has_tcl) {
- r300->vertex_stream_state.state = CALLOC_STRUCT(r300_vertex_stream_state);
+ R300_ALLOC_ATOM(vertex_stream_state, r300_vertex_stream_state);
}
/* Some non-CSO atoms don't use the state pointer. */
@@ -289,11 +288,13 @@ static void r300_setup_atoms(struct r300_context* r300)
/* Some states must be marked as dirty here to properly set up
* hardware in the first command stream. */
- r300->invariant_state.dirty = TRUE;
- r300->pvs_flush.dirty = TRUE;
- r300->vap_invariant_state.dirty = TRUE;
- r300->texture_cache_inval.dirty = TRUE;
- r300->textures_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->invariant_state);
+ r300_mark_atom_dirty(r300, &r300->pvs_flush);
+ r300_mark_atom_dirty(r300, &r300->vap_invariant_state);
+ r300_mark_atom_dirty(r300, &r300->texture_cache_inval);
+ r300_mark_atom_dirty(r300, &r300->textures_state);
+
+ return TRUE;
}
/* Not every state tracker calls every driver function before the first draw
@@ -319,7 +320,7 @@ static void r300_init_states(struct pipe_context *pipe)
pipe->set_scissor_state(pipe, &ss);
/* Initialize the clip state. */
- if (r300_context(pipe)->screen->caps.has_tcl) {
+ if (r300->screen->caps.has_tcl) {
pipe->set_clip_state(pipe, &cs);
} else {
BEGIN_CB(clip->cb, 2);
@@ -421,9 +422,9 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
make_empty_list(&r300->query_list);
- util_mempool_create(&r300->pool_transfers,
- sizeof(struct pipe_transfer), 64,
- UTIL_MEMPOOL_SINGLETHREADED);
+ util_slab_create(&r300->pool_transfers,
+ sizeof(struct pipe_transfer), 64,
+ UTIL_SLAB_SINGLETHREADED);
r300->cs = rws->cs_create(rws);
if (r300->cs == NULL)
@@ -431,7 +432,14 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
if (!r300screen->caps.has_tcl) {
/* Create a Draw. This is used for SW TCL. */
+#ifdef HAVE_LLVM
+ r300->gallivm = gallivm_create();
+ r300->draw = draw_create_gallivm(&r300->context, r300->gallivm);
+#else
r300->draw = draw_create(&r300->context);
+#endif
+ if (r300->draw == NULL)
+ goto fail;
/* Enable our renderer. */
draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300));
/* Disable converting points/lines to triangles. */
@@ -439,7 +447,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
draw_wide_point_threshold(r300->draw, 10000000.f);
}
- r300_setup_atoms(r300);
+ if (!r300_setup_atoms(r300))
+ goto fail;
r300_init_blit_functions(r300);
r300_init_flush_functions(r300);
@@ -462,14 +471,14 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
goto fail;
r300->upload_ib = u_upload_create(&r300->context,
- 32 * 1024, 16,
+ 64 * 1024, 16,
PIPE_BIND_INDEX_BUFFER);
if (r300->upload_ib == NULL)
goto fail;
r300->upload_vb = u_upload_create(&r300->context,
- 128 * 1024, 16,
+ 1024 * 1024, 16,
PIPE_BIND_VERTEX_BUFFER);
if (r300->upload_vb == NULL)
goto fail;
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 7217c51b95..e09cf87f73 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -43,12 +43,8 @@ struct r300_vertex_shader;
struct r300_stencilref_context;
struct r300_atom {
- /* List pointers. */
- struct r300_atom *prev, *next;
/* Name, for debugging. */
const char* name;
- /* Stat counter. */
- uint64_t counter;
/* Opaque state. */
void* state;
/* Emit the state to the context. */
@@ -258,6 +254,8 @@ struct r300_constant_buffer {
uint32_t *ptr;
/* Remapping table. */
unsigned *remap_table;
+ /* const buffer base */
+ uint32_t buffer_base;
};
/* Query object.
@@ -282,6 +280,7 @@ struct r300_query {
/* The buffer where query results are stored. */
struct r300_winsys_buffer *buffer;
+ struct r300_winsys_cs_buffer *cs_buffer;
/* The size of the buffer. */
unsigned buffer_size;
/* The domain of the buffer. */
@@ -313,6 +312,7 @@ struct r300_surface {
/* Winsys buffer backing the texture. */
struct r300_winsys_buffer *buffer;
+ struct r300_winsys_cs_buffer *cs_buffer;
enum r300_buffer_domain domain;
@@ -396,6 +396,7 @@ struct r300_texture {
/* Pipe buffer backing this texture. */
struct r300_winsys_buffer *buffer;
+ struct r300_winsys_cs_buffer *cs_buffer;
/* Registers carrying texture format data. */
/* Only format-independent bits should be filled in. */
@@ -439,9 +440,6 @@ struct r300_translate_context {
/* Translate cache for incompatible vertex offset/stride/format fallback. */
struct translate_cache *translate_cache;
- /* The vertex buffer slot containing the translated buffer. */
- unsigned vb_slot;
-
/* Saved and new vertex element state. */
void *saved_velems, *new_velems;
};
@@ -458,6 +456,7 @@ struct r300_context {
struct r300_screen *screen;
/* Draw module. Used mostly for SW TCL. */
+ struct gallivm_state *gallivm;
struct draw_context* draw;
/* Vertex buffer for SW TCL. */
struct pipe_resource* vbo;
@@ -492,73 +491,79 @@ struct r300_context {
struct r300_query query_list;
/* Various CSO state objects. */
- /* Beginning of atom list. */
- struct r300_atom atom_list;
+
+ /* Each atom is emitted in the order it appears here, which can affect
+ * performance and stability if not handled with care. */
+ /* GPU flush. */
+ struct r300_atom gpu_flush;
/* Anti-aliasing (MSAA) state. */
struct r300_atom aa_state;
+ /* Framebuffer state. */
+ struct r300_atom fb_state;
+ /* HyperZ state (various SC/ZB bits). */
+ struct r300_atom hyperz_state;
+ /* ZTOP state. */
+ struct r300_atom ztop_state;
+ /* Depth, stencil, and alpha state. */
+ struct r300_atom dsa_state;
/* Blend state. */
struct r300_atom blend_state;
/* Blend color state. */
struct r300_atom blend_color_state;
+ /* Scissor state. */
+ struct r300_atom scissor_state;
+ /* Invariant state. This must be emitted to get the engine started. */
+ struct r300_atom invariant_state;
+ /* Viewport state. */
+ struct r300_atom viewport_state;
+ /* PVS flush. */
+ struct r300_atom pvs_flush;
+ /* VAP invariant state. */
+ struct r300_atom vap_invariant_state;
+ /* Vertex stream formatting state. */
+ struct r300_atom vertex_stream_state;
+ /* Vertex shader. */
+ struct r300_atom vs_state;
/* User clip planes. */
struct r300_atom clip_state;
- /* Depth, stencil, and alpha state. */
- struct r300_atom dsa_state;
+ /* RS block state + VAP (vertex shader) output mapping state. */
+ struct r300_atom rs_block_state;
+ /* Rasterizer state. */
+ struct r300_atom rs_state;
+ /* Framebuffer state (pipelined regs). */
+ struct r300_atom fb_state_pipelined;
/* Fragment shader. */
struct r300_atom fs;
/* Fragment shader RC_CONSTANT_STATE variables. */
struct r300_atom fs_rc_constant_state;
/* Fragment shader constant buffer. */
struct r300_atom fs_constants;
- /* Framebuffer state. */
- struct r300_atom fb_state;
- /* Framebuffer state (pipelined regs). */
- struct r300_atom fb_state_pipelined;
- /* HyperZ state (various SC/ZB bits). */
- struct r300_atom hyperz_state;
- /* Occlusion query. */
- struct r300_atom query_start;
- /* Rasterizer state. */
- struct r300_atom rs_state;
- /* RS block state + VAP (vertex shader) output mapping state. */
- struct r300_atom rs_block_state;
- /* Scissor state. */
- struct r300_atom scissor_state;
- /* Textures state. */
- struct r300_atom textures_state;
- /* Vertex stream formatting state. */
- struct r300_atom vertex_stream_state;
- /* Vertex shader. */
- struct r300_atom vs_state;
/* Vertex shader constant buffer. */
struct r300_atom vs_constants;
- /* Viewport state. */
- struct r300_atom viewport_state;
- /* ZTOP state. */
- struct r300_atom ztop_state;
- /* PVS flush. */
- struct r300_atom pvs_flush;
- /* VAP invariant state. */
- struct r300_atom vap_invariant_state;
/* Texture cache invalidate. */
struct r300_atom texture_cache_inval;
- /* GPU flush. */
- struct r300_atom gpu_flush;
+ /* Textures state. */
+ struct r300_atom textures_state;
/* HiZ clear */
struct r300_atom hiz_clear;
/* zmask clear */
struct r300_atom zmask_clear;
+ /* Occlusion query. */
+ struct r300_atom query_start;
- /* Invariant state. This must be emitted to get the engine started. */
- struct r300_atom invariant_state;
+ /* The pointers to the first and the last atom. */
+ struct r300_atom *first_dirty, *last_dirty;
/* Vertex buffers for Gallium. */
+ /* May contain user buffers. */
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ /* Contains only non-user buffers. */
+ struct pipe_resource *valid_vertex_buffer[PIPE_MAX_ATTRIBS];
int vertex_buffer_count;
int vertex_buffer_max_index;
+ boolean any_user_vbs;
/* Vertex elements for Gallium. */
struct r300_vertex_element_state *velems;
- bool any_user_vbs;
struct pipe_index_buffer index_buffer;
@@ -599,12 +604,32 @@ struct r300_context {
struct u_upload_mgr *upload_vb;
struct u_upload_mgr *upload_ib;
- struct util_mempool pool_transfers;
+ struct util_slab_mempool pool_transfers;
/* Stat counter. */
uint64_t flush_counter;
+
+ /* const tracking for VS */
+ int vs_const_base;
+
+ /* AOS (PACKET3_3D_LOAD_VBPNTR) command buffer for the case offset=0. */
+ uint32_t vertex_arrays_cb[(16 * 3 + 1) / 2];
+ boolean vertex_arrays_dirty;
+
+ /* Whether any buffer (FB, textures, VBOs) has been set, but buffers
+ * haven't been validated yet. */
+ boolean validate_buffers;
+ /* Whether user buffers have been validated. */
+ boolean upload_vb_validated;
+ boolean upload_ib_validated;
};
+#define foreach_atom(r300, atom) \
+ for (atom = &r300->gpu_flush; atom != (&r300->query_start)+1; atom++)
+
+#define foreach_dirty_atom(r300, atom) \
+ for (atom = r300->first_dirty; atom != r300->last_dirty; atom++)
+
/* Convenience cast wrappers. */
static INLINE struct r300_query* r300_query(struct pipe_query* q)
{
@@ -631,6 +656,22 @@ static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300)
return (struct r300_fragment_shader*)r300->fs.state;
}
+static INLINE void r300_mark_atom_dirty(struct r300_context *r300,
+ struct r300_atom *atom)
+{
+ atom->dirty = TRUE;
+
+ if (!r300->first_dirty) {
+ r300->first_dirty = atom;
+ r300->last_dirty = atom+1;
+ } else {
+ if (atom < r300->first_dirty)
+ r300->first_dirty = atom;
+ else if (atom+1 > r300->last_dirty)
+ r300->last_dirty = atom+1;
+ }
+}
+
struct pipe_context* r300_create_context(struct pipe_screen* screen,
void *priv);
@@ -647,10 +688,7 @@ void r300_init_state_functions(struct r300_context* r300);
void r300_init_resource_functions(struct r300_context* r300);
/* r300_blit.c */
-void r300_flush_depth_stencil(struct pipe_context *pipe,
- struct pipe_resource *dst,
- struct pipe_subresource subdst,
- unsigned zslice);
+void r300_flush_depth_textures(struct r300_context *r300);
/* r300_query.c */
void r300_resume_query(struct r300_context *r300,
@@ -658,7 +696,8 @@ void r300_resume_query(struct r300_context *r300,
void r300_stop_query(struct r300_context *r300);
/* r300_render_translate.c */
-void r300_begin_vertex_translate(struct r300_context *r300);
+void r300_begin_vertex_translate(struct r300_context *r300,
+ int min_index, int max_index);
void r300_end_vertex_translate(struct r300_context *r300);
void r300_translate_index_buffer(struct r300_context *r300,
struct pipe_resource **index_buffer,
@@ -670,20 +709,23 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300);
/* r300_render.c */
void r300_draw_flush_vbuf(struct r300_context *r300);
-boolean r500_index_bias_supported(struct r300_context *r300);
void r500_emit_index_bias(struct r300_context *r300, int index_bias);
/* r300_state.c */
enum r300_fb_state_change {
R300_CHANGED_FB_STATE = 0,
R300_CHANGED_CBZB_FLAG,
- R300_CHANGED_ZCLEAR_FLAG
+ R300_CHANGED_ZCLEAR_FLAG,
+ R300_CHANGED_MULTIWRITE
};
void r300_mark_fb_state_dirty(struct r300_context *r300,
enum r300_fb_state_change change);
void r300_mark_fs_code_dirty(struct r300_context *r300);
+/* r300_state_derived.c */
+void r300_update_derived_state(struct r300_context* r300);
+
/* r300_debug.c */
void r500_dump_rs_block(struct r300_rs_block *rs);
diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index c194d6a1b0..6726f100e1 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -51,7 +51,7 @@
int cs_count = 0; (void) cs_count; (void) cs_winsys;
#define BEGIN_CS(size) do { \
- assert(size <= (cs_copy->ndw - cs_copy->cdw)); \
+ assert(size <= (R300_MAX_CMDBUF_DWORDS - cs_copy->cdw)); \
CS_DEBUG(cs_count = size;) \
} while (0)
@@ -72,7 +72,7 @@
*/
#define OUT_CS(value) do { \
- cs_copy->ptr[cs_copy->cdw++] = (value); \
+ cs_copy->buf[cs_copy->cdw++] = (value); \
CS_DEBUG(cs_count--;) \
} while (0)
@@ -96,7 +96,7 @@
OUT_CS(CP_PACKET3(op, count))
#define OUT_CS_TABLE(values, count) do { \
- memcpy(cs_copy->ptr + cs_copy->cdw, values, count * 4); \
+ memcpy(cs_copy->buf + cs_copy->cdw, values, count * 4); \
cs_copy->cdw += count; \
CS_DEBUG(cs_count -= count;) \
} while (0)
@@ -106,26 +106,26 @@
* Writing relocations.
*/
-#define OUT_CS_RELOC(bo, offset, rd, wd) do { \
+#define OUT_CS_RELOC(bo, offset) do { \
assert(bo); \
OUT_CS(offset); \
- cs_winsys->cs_write_reloc(cs_copy, bo, rd, wd); \
+ cs_winsys->cs_write_reloc(cs_copy, bo); \
CS_DEBUG(cs_count -= 2;) \
} while (0)
-#define OUT_CS_BUF_RELOC(bo, offset, rd, wd) do { \
+#define OUT_CS_BUF_RELOC(bo, offset) do { \
assert(bo); \
- OUT_CS_RELOC(r300_buffer(bo)->buf, offset, rd, wd); \
+ OUT_CS_RELOC(r300_buffer(bo)->cs_buf, offset); \
} while (0)
-#define OUT_CS_TEX_RELOC(tex, offset, rd, wd) do { \
+#define OUT_CS_TEX_RELOC(tex, offset) do { \
assert(tex); \
- OUT_CS_RELOC(tex->buffer, offset, rd, wd); \
+ OUT_CS_RELOC(tex->cs_buffer, offset); \
} while (0)
-#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd) do { \
+#define OUT_CS_BUF_RELOC_NO_OFFSET(bo) do { \
assert(bo); \
- cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->buf, rd, wd); \
+ cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->cs_buf); \
CS_DEBUG(cs_count -= 2;) \
} while (0)
@@ -136,7 +136,7 @@
#define WRITE_CS_TABLE(values, count) do { \
CS_DEBUG(assert(cs_count == 0);) \
- memcpy(cs_copy->ptr + cs_copy->cdw, (values), (count) * 4); \
+ memcpy(cs_copy->buf + cs_copy->cdw, (values), (count) * 4); \
cs_copy->cdw += (count); \
} while (0)
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index f78fe34790..d6aa90bd05 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -40,8 +40,8 @@ static const struct debug_named_value debug_options[] = {
{ "rs", DBG_RS, "Log rasterizer" },
{ "fb", DBG_FB, "Log framebuffer" },
{ "cbzb", DBG_CBZB, "Log fast color clear info" },
- { "stats", DBG_STATS, "Log emission statistics" },
{ "hyperz", DBG_HYPERZ, "Log HyperZ info" },
+ { "upload", DBG_UPLOAD, "Log user buffer upload info" },
{ "scissor", DBG_SCISSOR, "Log scissor info" },
{ "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries" },
{ "anisohq", DBG_ANISOHQ, "Use high quality anisotropic filtering" },
diff --git a/src/gallium/drivers/r300/r300_defines.h b/src/gallium/drivers/r300/r300_defines.h
index 896aeef395..2d111f9158 100644
--- a/src/gallium/drivers/r300/r300_defines.h
+++ b/src/gallium/drivers/r300/r300_defines.h
@@ -43,8 +43,8 @@ enum r300_buffer_tiling {
};
enum r300_buffer_domain { /* bitfield */
- R300_DOMAIN_GTT = 1,
- R300_DOMAIN_VRAM = 2
+ R300_DOMAIN_GTT = 2,
+ R300_DOMAIN_VRAM = 4
};
#endif
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 5b6f82cd89..d14cdcbbaf 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -26,9 +26,9 @@
#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_mm.h"
-#include "util/u_simple_list.h"
#include "r300_context.h"
+#include "r300_cb.h"
#include "r300_cs.h"
#include "r300_emit.h"
#include "r300_fs.h"
@@ -353,10 +353,10 @@ void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state)
if (aa->dest) {
OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1);
- OUT_CS_RELOC(aa->dest->buffer, aa->dest->offset, 0, aa->dest->domain);
+ OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->offset);
OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1);
- OUT_CS_RELOC(aa->dest->buffer, aa->dest->pitch, 0, aa->dest->domain);
+ OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->pitch);
}
OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl);
@@ -369,6 +369,8 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
struct r300_surface* surf;
unsigned i;
boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
+ uint32_t rb3d_cctl = 0;
+
CS_LOCALS(r300);
BEGIN_CS(size);
@@ -376,21 +378,24 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
/* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers, which is not
* what we usually want. */
if (r300->screen->caps.is_r500) {
- OUT_CS_REG(R300_RB3D_CCTL,
- R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE);
- } else {
- OUT_CS_REG(R300_RB3D_CCTL, 0);
+ rb3d_cctl = R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE;
+ }
+ if (fb->nr_cbufs &&
+ r300_fragment_shader_writes_all(r300_fs(r300))) {
+ rb3d_cctl |= R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs);
}
+ OUT_CS_REG(R300_RB3D_CCTL, rb3d_cctl);
+
/* Set up colorbuffers. */
for (i = 0; i < fb->nr_cbufs; i++) {
surf = r300_surface(fb->cbufs[i]);
OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1);
- OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain);
+ OUT_CS_RELOC(surf->cs_buffer, surf->offset);
OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1);
- OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain);
+ OUT_CS_RELOC(surf->cs_buffer, surf->pitch);
}
/* Set up the ZB part of the CBZB clear. */
@@ -400,10 +405,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format);
OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
- OUT_CS_RELOC(surf->buffer, surf->cbzb_midpoint_offset, 0, surf->domain);
+ OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_midpoint_offset);
OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
- OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain);
+ OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_pitch);
DBG(r300, DBG_CBZB,
"CBZB clearing cbuf %08x %08x\n", surf->cbzb_format,
@@ -416,15 +421,15 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_REG(R300_ZB_FORMAT, surf->format);
OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
- OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain);
+ OUT_CS_RELOC(surf->cs_buffer, surf->offset);
OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
- OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain);
+ OUT_CS_RELOC(surf->cs_buffer, surf->pitch);
if (can_hyperz) {
uint32_t surf_pitch;
struct r300_texture *tex;
- int level = surf->base.level;
+ int level = surf->base.u.tex.level;
tex = r300_texture(surf->base.texture);
surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK;
@@ -482,15 +487,21 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300,
{
struct pipe_framebuffer_state* fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
- unsigned i;
+ unsigned i, num_cbufs = fb->nr_cbufs;
CS_LOCALS(r300);
+ /* If we use the multiwrite feature, the colorbuffers 2,3,4 must be
+ * marked as UNUSED in the US block. */
+ if (r300_fragment_shader_writes_all(r300_fs(r300))) {
+ num_cbufs = MIN2(num_cbufs, 1);
+ }
+
BEGIN_CS(size);
/* Colorbuffer format in the US block.
* (must be written after unpipelined regs) */
OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4);
- for (i = 0; i < fb->nr_cbufs; i++) {
+ for (i = 0; i < num_cbufs; i++) {
OUT_CS(r300_surface(fb->cbufs[i])->format);
}
for (; i < 4; i++) {
@@ -559,7 +570,7 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300,
struct r300_query *query)
{
struct r300_capabilities* caps = &r300->screen->caps;
- struct r300_winsys_buffer *buf = r300->query_current->buffer;
+ struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer;
CS_LOCALS(r300);
assert(caps->num_frag_pipes);
@@ -578,28 +589,24 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300,
/* pipe 3 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 3);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 3) * 4,
- 0, query->domain);
+ OUT_CS_RELOC(buf, (query->num_results + 3) * 4);
case 3:
/* pipe 2 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 2);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 2) * 4,
- 0, query->domain);
+ OUT_CS_RELOC(buf, (query->num_results + 2) * 4);
case 2:
/* pipe 1 only */
/* As mentioned above, accomodate RV380 and older. */
OUT_CS_REG(R300_SU_REG_DEST,
1 << (caps->high_second_pipe ? 3 : 1));
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 1) * 4,
- 0, query->domain);
+ OUT_CS_RELOC(buf, (query->num_results + 1) * 4);
case 1:
/* pipe 0 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 0);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 0) * 4,
- 0, query->domain);
+ OUT_CS_RELOC(buf, (query->num_results + 0) * 4);
break;
default:
fprintf(stderr, "r300: Implementation error: Chipset reports %d"
@@ -615,13 +622,13 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300,
static void rv530_emit_query_end_single_z(struct r300_context *r300,
struct r300_query *query)
{
- struct r300_winsys_buffer *buf = r300->query_current->buffer;
+ struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer;
CS_LOCALS(r300);
BEGIN_CS(8);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain);
+ OUT_CS_RELOC(buf, query->num_results * 4);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
END_CS;
}
@@ -629,16 +636,16 @@ static void rv530_emit_query_end_single_z(struct r300_context *r300,
static void rv530_emit_query_end_double_z(struct r300_context *r300,
struct r300_query *query)
{
- struct r300_winsys_buffer *buf = r300->query_current->buffer;
+ struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer;
CS_LOCALS(r300);
BEGIN_CS(14);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain);
+ OUT_CS_RELOC(buf, (query->num_results + 0) * 4);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1);
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain);
+ OUT_CS_RELOC(buf, (query->num_results + 1) * 4);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
END_CS;
}
@@ -799,56 +806,100 @@ void r300_emit_textures_state(struct r300_context *r300,
OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format.format2);
OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1);
- OUT_CS_TEX_RELOC(tex, texstate->format.tile_config, tex->domain,
- 0);
+ OUT_CS_TEX_RELOC(tex, texstate->format.tile_config);
}
}
END_CS;
}
-void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed)
+static void r300_update_vertex_arrays_cb(struct r300_context *r300, unsigned packet_size)
{
struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer;
struct pipe_vertex_element *velem = r300->velems->velem;
- struct r300_buffer *buf;
- int i;
unsigned *hw_format_size = r300->velems->hw_format_size;
- unsigned size1, size2, aos_count = r300->velems->count;
- unsigned packet_size = (aos_count * 3 + 1) / 2;
- CS_LOCALS(r300);
-
- BEGIN_CS(2 + packet_size + aos_count * 2);
- OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
- OUT_CS(aos_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
+ unsigned size1, size2, vertex_array_count = r300->velems->count;
+ int i;
+ CB_LOCALS;
- for (i = 0; i < aos_count - 1; i += 2) {
+ BEGIN_CB(r300->vertex_arrays_cb, packet_size);
+ for (i = 0; i < vertex_array_count - 1; i += 2) {
vb1 = &vbuf[velem[i].vertex_buffer_index];
vb2 = &vbuf[velem[i+1].vertex_buffer_index];
size1 = hw_format_size[i];
size2 = hw_format_size[i+1];
- OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) |
+ OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) |
R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride));
- OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
- OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride);
+ OUT_CB(vb1->buffer_offset + velem[i].src_offset);
+ OUT_CB(vb2->buffer_offset + velem[i+1].src_offset);
}
- if (aos_count & 1) {
+ if (vertex_array_count & 1) {
vb1 = &vbuf[velem[i].vertex_buffer_index];
size1 = hw_format_size[i];
- OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));
- OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
+ OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));
+ OUT_CB(vb1->buffer_offset + velem[i].src_offset);
}
+ END_CB;
+
+ r300->vertex_arrays_dirty = FALSE;
+}
+
+void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed)
+{
+ struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
+ struct pipe_resource **valid_vbuf = r300->valid_vertex_buffer;
+ struct pipe_vertex_element *velem = r300->velems->velem;
+ struct r300_buffer *buf;
+ int i;
+ unsigned vertex_array_count = r300->velems->count;
+ unsigned packet_size = (vertex_array_count * 3 + 1) / 2;
+ CS_LOCALS(r300);
- for (i = 0; i < aos_count; i++) {
- buf = r300_buffer(vbuf[velem[i].vertex_buffer_index].buffer);
- OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0);
+ BEGIN_CS(2 + packet_size + vertex_array_count * 2);
+ OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
+ OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
+
+ if (!offset) {
+ if (r300->vertex_arrays_dirty) {
+ r300_update_vertex_arrays_cb(r300, packet_size);
+ }
+ OUT_CS_TABLE(r300->vertex_arrays_cb, packet_size);
+ } else {
+ struct pipe_vertex_buffer *vb1, *vb2;
+ unsigned *hw_format_size = r300->velems->hw_format_size;
+ unsigned size1, size2;
+
+ for (i = 0; i < vertex_array_count - 1; i += 2) {
+ vb1 = &vbuf[velem[i].vertex_buffer_index];
+ vb2 = &vbuf[velem[i+1].vertex_buffer_index];
+ size1 = hw_format_size[i];
+ size2 = hw_format_size[i+1];
+
+ OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) |
+ R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride));
+ OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
+ OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride);
+ }
+
+ if (vertex_array_count & 1) {
+ vb1 = &vbuf[velem[i].vertex_buffer_index];
+ size1 = hw_format_size[i];
+
+ OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));
+ OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
+ }
+ }
+
+ for (i = 0; i < vertex_array_count; i++) {
+ buf = r300_buffer(valid_vbuf[velem[i].vertex_buffer_index]);
+ OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b);
}
END_CS;
}
-void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed)
+void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed)
{
CS_LOCALS(r300);
@@ -868,7 +919,7 @@ void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed)
OUT_CS(r300->vertex_info.size |
(r300->vertex_info.size << 8));
OUT_CS(r300->draw_vbo_offset);
- OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0);
+ OUT_CS_BUF_RELOC(r300->vbo, 0);
END_CS;
}
@@ -924,7 +975,6 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
struct r300_vertex_program_code* code = &vs->code;
struct r300_screen* r300screen = r300->screen;
unsigned instruction_count = code->length / 4;
- unsigned i;
unsigned vtx_mem_size = r300screen->caps.is_r500 ? 128 : 72;
unsigned input_count = MAX2(util_bitcount(code->InputsRead), 1);
@@ -935,10 +985,6 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
vtx_mem_size / output_count, 10);
unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 5);
- unsigned imm_first = vs->externals_count;
- unsigned imm_end = vs->code.constants.Count;
- unsigned imm_count = vs->immediates_count;
-
CS_LOCALS(r300);
BEGIN_CS(size);
@@ -947,12 +993,10 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
* R300_VAP_PVS_CONST_CNTL
* R300_VAP_PVS_CODE_CNTL_1
* See the r5xx docs for instructions on how to use these. */
- OUT_CS_REG_SEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
- OUT_CS(R300_PVS_FIRST_INST(0) |
- R300_PVS_XYZW_VALID_INST(instruction_count - 1) |
- R300_PVS_LAST_INST(instruction_count - 1));
- OUT_CS(R300_PVS_MAX_CONST_ADDR(code->constants.Count - 1));
- OUT_CS(instruction_count - 1);
+ OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_0, R300_PVS_FIRST_INST(0) |
+ R300_PVS_XYZW_VALID_INST(instruction_count - 1) |
+ R300_PVS_LAST_INST(instruction_count - 1));
+ OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_1, instruction_count - 1);
OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);
OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length);
@@ -964,19 +1008,6 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
R300_PVS_VF_MAX_VTX_NUM(12) |
(r300screen->caps.is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0));
- /* Emit immediates. */
- if (imm_count) {
- OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,
- (r300->screen->caps.is_r500 ?
- R500_PVS_CONST_START : R300_PVS_CONST_START) +
- imm_first);
- OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, imm_count * 4);
- for (i = imm_first; i < imm_end; i++) {
- const float *data = vs->code.constants.Constants[i].u.Immediate;
- OUT_CS_TABLE(data, 4);
- }
- }
-
/* Emit flow control instructions. */
if (code->num_fc_ops) {
@@ -1001,24 +1032,43 @@ void r300_emit_vs_constants(struct r300_context* r300,
unsigned count =
((struct r300_vertex_shader*)r300->vs_state.state)->externals_count;
struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;
+ struct r300_vertex_shader *vs = (struct r300_vertex_shader*)r300->vs_state.state;
unsigned i;
+ int imm_first = vs->externals_count;
+ int imm_end = vs->code.constants.Count;
+ int imm_count = vs->immediates_count;
CS_LOCALS(r300);
- if (!count)
- return;
-
BEGIN_CS(size);
- OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,
- (r300->screen->caps.is_r500 ?
- R500_PVS_CONST_START : R300_PVS_CONST_START));
- OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4);
- if (buf->remap_table){
- for (i = 0; i < count; i++) {
- uint32_t *data = &buf->ptr[buf->remap_table[i]*4];
+ OUT_CS_REG(R300_VAP_PVS_CONST_CNTL,
+ R300_PVS_CONST_BASE_OFFSET(buf->buffer_base) |
+ R300_PVS_MAX_CONST_ADDR(MAX2(imm_end - 1, 0)));
+ if (vs->externals_count) {
+ OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,
+ (r300->screen->caps.is_r500 ?
+ R500_PVS_CONST_START : R300_PVS_CONST_START) + buf->buffer_base);
+ OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4);
+ if (buf->remap_table){
+ for (i = 0; i < count; i++) {
+ uint32_t *data = &buf->ptr[buf->remap_table[i]*4];
+ OUT_CS_TABLE(data, 4);
+ }
+ } else {
+ OUT_CS_TABLE(buf->ptr, count * 4);
+ }
+ }
+
+ /* Emit immediates. */
+ if (imm_count) {
+ OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,
+ (r300->screen->caps.is_r500 ?
+ R500_PVS_CONST_START : R300_PVS_CONST_START) +
+ buf->buffer_base + imm_first);
+ OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, imm_count * 4);
+ for (i = imm_first; i < imm_end; i++) {
+ const float *data = vs->code.constants.Constants[i].u.Immediate;
OUT_CS_TABLE(data, 4);
}
- } else {
- OUT_CS_TABLE(buf->ptr, count * 4);
}
END_CS;
}
@@ -1073,8 +1123,8 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state)
tex = r300_texture(fb->zsbuf->texture);
- offset = tex->hiz_mem[fb->zsbuf->level]->ofs;
- stride = tex->desc.stride_in_pixels[fb->zsbuf->level];
+ offset = tex->hiz_mem[fb->zsbuf->u.tex.level]->ofs;
+ stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level];
/* convert from pixels to 4x4 blocks */
stride = ALIGN_DIVUP(stride, 4);
@@ -1096,7 +1146,7 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state)
z->current_func = -1;
/* Mark the current zbuffer's hiz ram as in use. */
- tex->hiz_in_use[fb->zsbuf->level] = TRUE;
+ tex->hiz_in_use[fb->zsbuf->u.tex.level] = TRUE;
}
void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state)
@@ -1110,9 +1160,9 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state
int mult, offset_shift;
tex = r300_texture(fb->zsbuf->texture);
- stride = tex->desc.stride_in_pixels[fb->zsbuf->level];
+ stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level];
- offset = tex->zmask_mem[fb->zsbuf->level]->ofs;
+ offset = tex->zmask_mem[fb->zsbuf->u.tex.level]->ofs;
if (r300->z_compression == RV350_Z_COMPRESS_88)
mult = 8;
@@ -1138,7 +1188,7 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state
}
/* Mark the current zbuffer's zmask as in use. */
- tex->zmask_in_use[fb->zsbuf->level] = TRUE;
+ tex->zmask_in_use[fb->zsbuf->u.tex.level] = TRUE;
}
void r300_emit_ztop_state(struct r300_context* r300,
@@ -1165,72 +1215,77 @@ boolean r300_emit_buffer_validate(struct r300_context *r300,
boolean do_validate_vertex_buffers,
struct pipe_resource *index_buffer)
{
- struct pipe_framebuffer_state* fb =
+ struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_textures_state *texstate =
(struct r300_textures_state*)r300->textures_state.state;
- struct r300_texture* tex;
- struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
- struct pipe_vertex_element *velem = r300->velems->velem;
- struct pipe_resource *pbuf;
+ struct r300_texture *tex;
unsigned i;
-
- /* upload buffers first */
- if (r300->screen->caps.has_tcl && r300->any_user_vbs) {
- r300_upload_user_buffers(r300);
- r300->any_user_vbs = false;
+ boolean flushed = FALSE;
+
+validate:
+ if (r300->fb_state.dirty) {
+ /* Color buffers... */
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ tex = r300_texture(fb->cbufs[i]->texture);
+ assert(tex && tex->buffer && "cbuf is marked, but NULL!");
+ r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, 0,
+ r300_surface(fb->cbufs[i])->domain);
+ }
+ /* ...depth buffer... */
+ if (fb->zsbuf) {
+ tex = r300_texture(fb->zsbuf->texture);
+ assert(tex && tex->buffer && "zsbuf is marked, but NULL!");
+ r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, 0,
+ r300_surface(fb->zsbuf)->domain);
+ }
}
+ if (r300->textures_state.dirty) {
+ /* ...textures... */
+ for (i = 0; i < texstate->count; i++) {
+ if (!(texstate->tx_enable & (1 << i))) {
+ continue;
+ }
- /* Clean out BOs. */
- r300->rws->cs_reset_buffers(r300->cs);
-
- /* Color buffers... */
- for (i = 0; i < fb->nr_cbufs; i++) {
- tex = r300_texture(fb->cbufs[i]->texture);
- assert(tex && tex->buffer && "cbuf is marked, but NULL!");
- r300->rws->cs_add_buffer(r300->cs, tex->buffer, 0,
- r300_surface(fb->cbufs[i])->domain);
- }
- /* ...depth buffer... */
- if (fb->zsbuf) {
- tex = r300_texture(fb->zsbuf->texture);
- assert(tex && tex->buffer && "zsbuf is marked, but NULL!");
- r300->rws->cs_add_buffer(r300->cs, tex->buffer, 0,
- r300_surface(fb->zsbuf)->domain);
- }
- /* ...textures... */
- for (i = 0; i < texstate->count; i++) {
- if (!(texstate->tx_enable & (1 << i))) {
- continue;
+ tex = r300_texture(texstate->sampler_views[i]->base.texture);
+ r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, tex->domain, 0);
}
-
- tex = r300_texture(texstate->sampler_views[i]->base.texture);
- r300->rws->cs_add_buffer(r300->cs, tex->buffer, tex->domain, 0);
}
/* ...occlusion query buffer... */
if (r300->query_current)
- r300->rws->cs_add_buffer(r300->cs, r300->query_current->buffer,
- 0, r300->query_current->domain);
+ r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buffer,
+ 0, r300->query_current->domain);
/* ...vertex buffer for SWTCL path... */
if (r300->vbo)
- r300->rws->cs_add_buffer(r300->cs, r300_buffer(r300->vbo)->buf,
- r300_buffer(r300->vbo)->domain, 0);
+ r300->rws->cs_add_reloc(r300->cs, r300_buffer(r300->vbo)->cs_buf,
+ r300_buffer(r300->vbo)->domain, 0);
/* ...vertex buffers for HWTCL path... */
if (do_validate_vertex_buffers) {
- for (i = 0; i < r300->velems->count; i++) {
- pbuf = vbuf[velem[i].vertex_buffer_index].buffer;
-
- r300->rws->cs_add_buffer(r300->cs, r300_buffer(pbuf)->buf,
- r300_buffer(pbuf)->domain, 0);
+ struct pipe_resource **buf = r300->valid_vertex_buffer;
+ struct pipe_resource **last = r300->valid_vertex_buffer +
+ r300->vertex_buffer_count;
+ for (; buf != last; buf++) {
+ if (!*buf)
+ continue;
+
+ r300->rws->cs_add_reloc(r300->cs, r300_buffer(*buf)->cs_buf,
+ r300_buffer(*buf)->domain, 0);
}
}
/* ...and index buffer for HWTCL path. */
if (index_buffer)
- r300->rws->cs_add_buffer(r300->cs, r300_buffer(index_buffer)->buf,
- r300_buffer(index_buffer)->domain, 0);
+ r300->rws->cs_add_reloc(r300->cs, r300_buffer(index_buffer)->cs_buf,
+ r300_buffer(index_buffer)->domain, 0);
+ /* Now do the validation. */
if (!r300->rws->cs_validate(r300->cs)) {
- return FALSE;
+ /* Ooops, an infinite loop, give up. */
+ if (flushed)
+ return FALSE;
+
+ r300->context.flush(&r300->context, 0, NULL);
+ flushed = TRUE;
+ goto validate;
}
return TRUE;
@@ -1241,7 +1296,7 @@ unsigned r300_get_num_dirty_dwords(struct r300_context *r300)
struct r300_atom* atom;
unsigned dwords = 0;
- foreach(atom, &r300->atom_list) {
+ foreach_dirty_atom(r300, atom) {
if (atom->dirty) {
dwords += atom->size;
}
@@ -1260,7 +1315,7 @@ unsigned r300_get_num_cs_end_dwords(struct r300_context *r300)
/* Emitted in flush. */
dwords += 26; /* emit_query_end */
dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */
- if (r500_index_bias_supported(r300))
+ if (r300->screen->caps.index_bias_supported)
dwords += 2;
return dwords;
@@ -1269,17 +1324,16 @@ unsigned r300_get_num_cs_end_dwords(struct r300_context *r300)
/* Emit all dirty state. */
void r300_emit_dirty_state(struct r300_context* r300)
{
- struct r300_atom* atom;
+ struct r300_atom *atom;
- foreach(atom, &r300->atom_list) {
+ foreach_dirty_atom(r300, atom) {
if (atom->dirty) {
atom->emit(r300, atom->size, atom->state);
- if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) {
- atom->counter++;
- }
atom->dirty = FALSE;
}
}
+ r300->first_dirty = NULL;
+ r300->last_dirty = NULL;
r300->dirty_hw++;
}
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 278dbcb4c7..acea51d942 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -31,7 +31,7 @@ struct r300_vertex_program_code;
uint32_t pack_float24(float f);
-void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed);
+void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed);
void r300_emit_blend_state(struct r300_context* r300,
unsigned size, void* state);
@@ -86,7 +86,7 @@ void r300_emit_scissor_state(struct r300_context* r300,
void r300_emit_textures_state(struct r300_context *r300,
unsigned size, void *state);
-void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed);
+void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed);
void r300_emit_vap_invariant_state(struct r300_context *r300,
unsigned size, void *state);
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 1afd27f093..b250532ba9 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -49,7 +49,7 @@ static void r300_flush(struct pipe_context* pipe,
if (r300->dirty_hw) {
r300_emit_hyperz_end(r300);
r300_emit_query_end(r300);
- if (r500_index_bias_supported(r300))
+ if (r300->screen->caps.index_bias_supported)
r500_emit_index_bias(r300, 0);
r300->flush_counter++;
@@ -57,9 +57,9 @@ static void r300_flush(struct pipe_context* pipe,
r300->dirty_hw = 0;
/* New kitchen sink, baby. */
- foreach(atom, &r300->atom_list) {
+ foreach_atom(r300, atom) {
if (atom->state || atom->allow_null_state) {
- atom->dirty = TRUE;
+ r300_mark_atom_dirty(r300, atom);
}
}
@@ -68,6 +68,14 @@ static void r300_flush(struct pipe_context* pipe,
r300->vs_state.dirty = FALSE;
r300->vs_constants.dirty = FALSE;
}
+
+ r300->validate_buffers = TRUE;
+ r300->upload_vb_validated = FALSE;
+ r300->upload_ib_validated = FALSE;
+ } else {
+ /* Even if hw is not dirty, we should at least reset the CS in case
+ * the space checking failed for the first draw operation. */
+ r300->rws->cs_flush(r300->cs);
}
/* reset flushed query */
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index c91532eb7b..6d4091dc87 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -390,12 +390,18 @@ static void r300_translate_fragment_shader(
compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
- compiler.Base.remove_unused_constants = TRUE;
compiler.AllocateHwInputs = &allocate_hardware_inputs;
compiler.UserData = &shader->inputs;
find_output_registers(&compiler, shader);
+ shader->write_all = FALSE;
+ for (i = 0; i < shader->info.num_properties; i++) {
+ if (shader->info.properties[i].name == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
+ shader->write_all = TRUE;
+ }
+ }
+
if (compiler.Base.Debug & RC_DBG_LOG) {
DBG(r300, DBG_FP, "r300: Initial fragment program\n");
tgsi_dump(tokens, 0);
@@ -408,6 +414,11 @@ static void r300_translate_fragment_shader(
r300_tgsi_to_rc(&ttr, tokens);
+ if (!r300->screen->caps.is_r500 ||
+ compiler.Base.Program.Constants.Count > 200) {
+ compiler.Base.remove_unused_constants = TRUE;
+ }
+
/**
* Transform the program to support WPOS.
*
diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h
index 51bfa88c5e..c86a90b85a 100644
--- a/src/gallium/drivers/r300/r300_fs.h
+++ b/src/gallium/drivers/r300/r300_fs.h
@@ -54,6 +54,9 @@ struct r300_fragment_shader_code {
uint32_t *cb_code;
struct r300_fragment_shader_code* next;
+
+ boolean write_all;
+
};
struct r300_fragment_shader {
@@ -81,4 +84,10 @@ static INLINE boolean r300_fragment_shader_writes_depth(struct r300_fragment_sha
return (fs->shader->code.writes_depth) ? TRUE : FALSE;
}
+static INLINE boolean r300_fragment_shader_writes_all(struct r300_fragment_shader *fs)
+{
+ if (!fs)
+ return FALSE;
+ return (fs->shader->write_all) ? TRUE : FALSE;
+}
#endif /* R300_FS_H */
diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c
index 79f7f8abe9..c22e307c67 100644
--- a/src/gallium/drivers/r300/r300_hyperz.c
+++ b/src/gallium/drivers/r300/r300_hyperz.c
@@ -158,8 +158,8 @@ static void r300_update_hyperz(struct r300_context* r300)
if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
return;
- zmask_in_use = zstex->zmask_in_use[fb->zsbuf->level];
- hiz_in_use = zstex->hiz_in_use[fb->zsbuf->level];
+ zmask_in_use = zstex->zmask_in_use[fb->zsbuf->u.tex.level];
+ hiz_in_use = zstex->hiz_in_use[fb->zsbuf->u.tex.level];
/* Z fastfill. */
if (zmask_in_use) {
@@ -282,7 +282,7 @@ static void r300_update_ztop(struct r300_context* r300)
ztop_state->z_buffer_top = R300_ZTOP_ENABLE;
}
if (ztop_state->z_buffer_top != old_ztop)
- r300->ztop_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->ztop_state);
}
#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
@@ -333,7 +333,7 @@ void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf)
{
struct r300_texture *tex;
uint32_t zsize, ndw;
- int level = surf->base.level;
+ int level = surf->base.u.tex.level;
tex = r300_texture(surf->base.texture);
@@ -352,7 +352,7 @@ void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf
{
int bsize = 256;
uint32_t zsize, ndw;
- int level = surf->base.level;
+ int level = surf->base.u.tex.level;
struct r300_texture *tex;
tex = r300_texture(surf->base.texture);
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 5f34fcb274..6223e04321 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -60,6 +60,7 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe,
q->buffer = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096,
PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM,
q->domain);
+ q->cs_buffer = r300->rws->buffer_get_cs_handle(r300->rws, q->buffer);
return (struct pipe_query*)q;
}
@@ -79,7 +80,7 @@ void r300_resume_query(struct r300_context *r300,
struct r300_query *query)
{
r300->query_current = query;
- r300->query_start.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->query_start);
}
static void r300_begin_query(struct pipe_context* pipe,
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 6bea783f69..d1154dee40 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -427,7 +427,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_PVS_CONST_START 512
# define R500_PVS_CONST_START 1024
# define R300_MAX_PVS_CONST_VECS 256
-# define R500_MAX_PVS_CONST_VECS 1024
+# define R500_MAX_PVS_CONST_VECS 256
# define R300_PVS_UCP_START 1024
# define R500_PVS_UCP_START 1536
# define R300_POINT_VPORT_SCALE_OFFSET 1030
@@ -553,6 +553,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* Addresses are relative to the vertex program parameters area. */
#define R300_VAP_PVS_CONST_CNTL 0x22D4
# define R300_PVS_CONST_BASE_OFFSET_SHIFT 0
+# define R300_PVS_CONST_BASE_OFFSET(x) (x)
# define R300_PVS_MAX_CONST_ADDR_SHIFT 16
# define R300_PVS_MAX_CONST_ADDR(x) ((x) << 16)
#define R300_VAP_PVS_CODE_CNTL_1 0x22D8
@@ -1520,11 +1521,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TX_TRI_PERF_3_8 (3<<15)
# define R300_ANISO_THRESHOLD_MASK (7<<17)
+# define R400_DXTC_SWIZZLE_ENABLE (1<<21)
# define R500_MACRO_SWITCH (1<<22)
# define R500_TX_MAX_ANISO(x) ((x) << 23)
# define R500_TX_MAX_ANISO_MASK (63 << 23)
# define R500_TX_ANISO_HIGH_QUALITY (1 << 30)
-
# define R500_BORDER_FIX (1<<31)
#define R300_TX_FORMAT0_0 0x4480
@@ -2630,8 +2631,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_ZB_BW_CNTL 0x4f1c
# define R300_HIZ_DISABLE (0 << 0)
# define R300_HIZ_ENABLE (1 << 0)
-# define R300_HIZ_MIN (0 << 1)
-# define R300_HIZ_MAX (1 << 1)
+# define R300_HIZ_MAX (0 << 1)
+# define R300_HIZ_MIN (1 << 1)
# define R300_FAST_FILL_DISABLE (0 << 2)
# define R300_FAST_FILL_ENABLE (1 << 2)
# define R300_RD_COMP_DISABLE (0 << 3)
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 60700cf303..b35822c82f 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -39,7 +39,6 @@
#include "r300_screen_buffer.h"
#include "r300_emit.h"
#include "r300_reg.h"
-#include "r300_state_derived.h"
#include <limits.h>
@@ -118,12 +117,6 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300,
return color_control;
}
-boolean r500_index_bias_supported(struct r300_context *r300)
-{
- return r300->screen->caps.is_r500 &&
- r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
-}
-
void r500_emit_index_bias(struct r300_context *r300, int index_bias)
{
CS_LOCALS(r300);
@@ -136,7 +129,7 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias)
/* This function splits the index bias value into two parts:
* - buffer_offset: the value that can be safely added to buffer offsets
- * in r300_emit_aos (it must yield a positive offset when added to
+ * in r300_emit_vertex_arrays (it must yield a positive offset when added to
* a vertex buffer offset)
* - index_offset: the value that must be manually subtracted from indices
* in an index buffer to achieve negative offsets. */
@@ -172,8 +165,8 @@ static void r300_split_index_bias(struct r300_context *r300, int index_bias,
enum r300_prepare_flags {
PREP_FIRST_DRAW = (1 << 0), /* call emit_dirty_state and friends? */
PREP_VALIDATE_VBOS = (1 << 1), /* validate VBOs? */
- PREP_EMIT_AOS = (1 << 2), /* call emit_aos? */
- PREP_EMIT_AOS_SWTCL = (1 << 3), /* call emit_aos_swtcl? */
+ PREP_EMIT_AOS = (1 << 2), /* call emit_vertex_arrays? */
+ PREP_EMIT_AOS_SWTCL = (1 << 3), /* call emit_vertex_arrays_swtcl? */
PREP_INDEXED = (1 << 4) /* is this draw_elements? */
};
@@ -191,28 +184,27 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300,
{
boolean flushed = FALSE;
boolean first_draw = flags & PREP_FIRST_DRAW;
- boolean emit_aos = flags & PREP_EMIT_AOS;
- boolean emit_aos_swtcl = flags & PREP_EMIT_AOS_SWTCL;
- boolean hw_index_bias = r500_index_bias_supported(r300);
+ boolean emit_vertex_arrays = flags & PREP_EMIT_AOS;
+ boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL;
/* Add dirty state, index offset, and AOS. */
if (first_draw) {
cs_dwords += r300_get_num_dirty_dwords(r300);
- if (hw_index_bias)
+ if (r300->screen->caps.index_bias_supported)
cs_dwords += 2; /* emit_index_offset */
- if (emit_aos)
- cs_dwords += 55; /* emit_aos */
+ if (emit_vertex_arrays)
+ cs_dwords += 55; /* emit_vertex_arrays */
- if (emit_aos_swtcl)
- cs_dwords += 7; /* emit_aos_swtcl */
+ if (emit_vertex_arrays_swtcl)
+ cs_dwords += 7; /* emit_vertex_arrays_swtcl */
}
cs_dwords += r300_get_num_cs_end_dwords(r300);
/* Reserve requested CS space. */
- if (cs_dwords > (r300->cs->ndw - r300->cs->cdw)) {
+ if (cs_dwords > (R300_MAX_CMDBUF_DWORDS - r300->cs->cdw)) {
r300->context.flush(&r300->context, 0, NULL);
flushed = TRUE;
}
@@ -225,44 +217,57 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300,
* \param r300 The context.
* \param flags See r300_prepare_flags.
* \param index_buffer The index buffer to validate. The parameter may be NULL.
- * \param aos_offset The offset passed to emit_aos.
+ * \param buffer_offset The offset passed to emit_vertex_arrays.
* \param index_bias The index bias to emit.
* \return TRUE if rendering should be skipped
*/
static boolean r300_emit_states(struct r300_context *r300,
enum r300_prepare_flags flags,
struct pipe_resource *index_buffer,
- int aos_offset,
+ int buffer_offset,
int index_bias)
{
boolean first_draw = flags & PREP_FIRST_DRAW;
- boolean emit_aos = flags & PREP_EMIT_AOS;
- boolean emit_aos_swtcl = flags & PREP_EMIT_AOS_SWTCL;
+ boolean emit_vertex_arrays = flags & PREP_EMIT_AOS;
+ boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL;
boolean indexed = flags & PREP_INDEXED;
- boolean hw_index_bias = r500_index_bias_supported(r300);
+ boolean validate_vbos = flags & PREP_VALIDATE_VBOS;
/* Validate buffers and emit dirty state if needed. */
if (first_draw) {
- if (!r300_emit_buffer_validate(r300, flags & PREP_VALIDATE_VBOS,
- index_buffer)) {
- fprintf(stderr, "r300: CS space validation failed. "
- "(not enough memory?) Skipping rendering.\n");
- return FALSE;
+ if (r300->validate_buffers) {
+ if (!r300_emit_buffer_validate(r300, validate_vbos,
+ index_buffer)) {
+ fprintf(stderr, "r300: CS space validation failed. "
+ "(not enough memory?) Skipping rendering.\n");
+ return FALSE;
+ }
+
+ /* Consider the validation done only if everything was validated. */
+ if (validate_vbos) {
+ r300->validate_buffers = FALSE;
+ if (r300->any_user_vbs)
+ r300->upload_vb_validated = TRUE;
+ if (r300->index_buffer.buffer &&
+ r300_is_user_buffer(r300->index_buffer.buffer)) {
+ r300->upload_ib_validated = TRUE;
+ }
+ }
}
r300_emit_dirty_state(r300);
- if (hw_index_bias) {
+ if (r300->screen->caps.index_bias_supported) {
if (r300->screen->caps.has_tcl)
r500_emit_index_bias(r300, index_bias);
else
r500_emit_index_bias(r300, 0);
}
- if (emit_aos)
- r300_emit_aos(r300, aos_offset, indexed);
+ if (emit_vertex_arrays)
+ r300_emit_vertex_arrays(r300, buffer_offset, indexed);
- if (emit_aos_swtcl)
- r300_emit_aos_swtcl(r300, indexed);
+ if (emit_vertex_arrays_swtcl)
+ r300_emit_vertex_arrays_swtcl(r300, indexed);
}
return TRUE;
@@ -275,7 +280,7 @@ static boolean r300_emit_states(struct r300_context *r300,
* \param flags See r300_prepare_flags.
* \param index_buffer The index buffer to validate. The parameter may be NULL.
* \param cs_dwords The number of dwords to reserve in CS.
- * \param aos_offset The offset passed to emit_aos.
+ * \param buffer_offset The offset passed to emit_vertex_arrays.
* \param index_bias The index bias to emit.
* \return TRUE if rendering should be skipped
*/
@@ -283,20 +288,20 @@ static boolean r300_prepare_for_rendering(struct r300_context *r300,
enum r300_prepare_flags flags,
struct pipe_resource *index_buffer,
unsigned cs_dwords,
- int aos_offset,
+ int buffer_offset,
int index_bias)
{
if (r300_reserve_cs_dwords(r300, flags, cs_dwords))
flags |= PREP_FIRST_DRAW;
- return r300_emit_states(r300, flags, index_buffer, aos_offset, index_bias);
+ return r300_emit_states(r300, flags, index_buffer, buffer_offset, index_bias);
}
static boolean immd_is_good_idea(struct r300_context *r300,
unsigned count)
{
struct pipe_vertex_element* velem;
- struct pipe_vertex_buffer* vbuf;
+ struct pipe_resource *buf;
boolean checked[PIPE_MAX_ATTRIBS] = {0};
unsigned vertex_element_count = r300->velems->count;
unsigned i, vbi;
@@ -320,14 +325,13 @@ static boolean immd_is_good_idea(struct r300_context *r300,
vbi = velem->vertex_buffer_index;
if (!checked[vbi]) {
- vbuf = &r300->vertex_buffer[vbi];
+ buf = r300->valid_vertex_buffer[vbi];
- if (!(r300_buffer(vbuf->buffer)->domain & R300_DOMAIN_GTT)) {
+ if (!(r300_buffer(buf)->domain & R300_DOMAIN_GTT)) {
return FALSE;
}
- if (r300_buffer_is_referenced(&r300->context,
- vbuf->buffer,
+ if (r300_buffer_is_referenced(&r300->context, buf,
R300_REF_CS | R300_REF_HW)) {
/* It's a very bad idea to map it... */
return FALSE;
@@ -386,7 +390,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
/* Map the buffer. */
if (!transfer[vbi]) {
map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context,
- vbuf->buffer,
+ r300->valid_vertex_buffer[vbi],
PIPE_TRANSFER_READ,
&transfer[vbi]);
map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start;
@@ -418,8 +422,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
vbi = r300->velems->velem[i].vertex_buffer_index;
if (transfer[vbi]) {
- vbuf = &r300->vertex_buffer[vbi];
- pipe_buffer_unmap(&r300->context, vbuf->buffer, transfer[vbi]);
+ pipe_buffer_unmap(&r300->context, transfer[vbi]);
transfer[vbi] = NULL;
}
}
@@ -461,10 +464,10 @@ static void r300_emit_draw_elements(struct r300_context *r300,
unsigned maxIndex,
unsigned mode,
unsigned start,
- unsigned count)
+ unsigned count,
+ uint16_t *imm_indices3)
{
- uint32_t count_dwords;
- uint32_t offset_dwords = indexSize * start / sizeof(uint32_t);
+ uint32_t count_dwords, offset_dwords;
boolean alt_num_verts = count > 65535;
CS_LOCALS(r300);
@@ -474,20 +477,42 @@ static void r300_emit_draw_elements(struct r300_context *r300,
return;
}
- maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index);
-
DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n",
count, minIndex, maxIndex);
- BEGIN_CS(13 + (alt_num_verts ? 2 : 0));
- if (alt_num_verts) {
- OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
- }
+ BEGIN_CS(5);
OUT_CS_REG(R300_GA_COLOR_CONTROL,
r300_provoking_vertex_fixes(r300, mode));
OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
OUT_CS(maxIndex);
OUT_CS(minIndex);
+ END_CS;
+
+ /* If start is odd, render the first triangle with indices embedded
+ * in the command stream. This will increase start by 3 and make it
+ * even. We can then proceed without a fallback. */
+ if (indexSize == 2 && (start & 1) &&
+ mode == PIPE_PRIM_TRIANGLES) {
+ BEGIN_CS(4);
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 2);
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (3 << 16) |
+ R300_VAP_VF_CNTL__PRIM_TRIANGLES);
+ OUT_CS(imm_indices3[1] << 16 | imm_indices3[0]);
+ OUT_CS(imm_indices3[2]);
+ END_CS;
+
+ start += 3;
+ count -= 3;
+ if (!count)
+ return;
+ }
+
+ offset_dwords = indexSize * start / sizeof(uint32_t);
+
+ BEGIN_CS(8 + (alt_num_verts ? 2 : 0));
+ if (alt_num_verts) {
+ OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
+ }
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
if (indexSize == 4) {
count_dwords = count;
@@ -511,16 +536,13 @@ static void r300_emit_draw_elements(struct r300_context *r300,
OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) |
(0 << R300_INDX_BUFFER_SKIP_SHIFT));
OUT_CS(offset_dwords << 2);
- OUT_CS_BUF_RELOC(indexBuffer, count_dwords,
- r300_buffer(indexBuffer)->domain, 0);
+ OUT_CS_BUF_RELOC(indexBuffer, count_dwords);
END_CS;
}
/* This is the fast-path drawing & emission for HW TCL. */
static void r300_draw_range_elements(struct pipe_context* pipe,
- struct pipe_resource* indexBuffer,
- unsigned indexSize,
int indexBias,
unsigned minIndex,
unsigned maxIndex,
@@ -529,41 +551,66 @@ static void r300_draw_range_elements(struct pipe_context* pipe,
unsigned count)
{
struct r300_context* r300 = r300_context(pipe);
+ struct pipe_resource *indexBuffer = r300->index_buffer.buffer;
+ unsigned indexSize = r300->index_buffer.index_size;
struct pipe_resource* orgIndexBuffer = indexBuffer;
boolean alt_num_verts = r300->screen->caps.is_r500 &&
count > 65536 &&
r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
unsigned short_count;
int buffer_offset = 0, index_offset = 0; /* for index bias emulation */
- unsigned new_offset;
+ uint16_t indices3[3];
- if (indexBias && !r500_index_bias_supported(r300)) {
+ if (indexBias && !r300->screen->caps.index_bias_supported) {
r300_split_index_bias(r300, indexBias, &buffer_offset, &index_offset);
}
r300_translate_index_buffer(r300, &indexBuffer, &indexSize, index_offset,
&start, count);
- r300_update_derived_state(r300);
- r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count, &new_offset);
+ /* Fallback for misaligned ushort indices. */
+ if (indexSize == 2 && (start & 1) &&
+ !r300_is_user_buffer(indexBuffer)) {
+ struct pipe_transfer *transfer;
+ struct pipe_resource *userbuf;
- start = new_offset;
+ uint16_t *ptr = pipe_buffer_map(pipe, indexBuffer,
+ PIPE_TRANSFER_READ, &transfer);
- /* 15 dwords for emit_draw_elements. Give up if the function fails. */
+ if (mode == PIPE_PRIM_TRIANGLES) {
+ memcpy(indices3, ptr + start, 6);
+ } else {
+ /* Copy the mapped index buffer directly to the upload buffer.
+ * The start index will be aligned simply from the fact that
+ * every sub-buffer in u_upload_mgr is aligned. */
+ userbuf = pipe->screen->user_buffer_create(pipe->screen,
+ ptr, 0,
+ PIPE_BIND_INDEX_BUFFER);
+ indexBuffer = userbuf;
+ r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, count);
+ pipe_resource_reference(&userbuf, NULL);
+ }
+ pipe_buffer_unmap(pipe, transfer);
+ } else {
+ if (r300_is_user_buffer(indexBuffer))
+ r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, count);
+ }
+
+ /* 19 dwords for emit_draw_elements. Give up if the function fails. */
if (!r300_prepare_for_rendering(r300,
PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS |
- PREP_INDEXED, indexBuffer, 15, buffer_offset, indexBias))
+ PREP_INDEXED, indexBuffer, 19, buffer_offset, indexBias))
goto done;
if (alt_num_verts || count <= 65535) {
r300_emit_draw_elements(r300, indexBuffer, indexSize,
- minIndex, maxIndex, mode, start, count);
+ minIndex, maxIndex, mode, start, count, indices3);
} else {
do {
short_count = MIN2(count, 65534);
r300_emit_draw_elements(r300, indexBuffer, indexSize,
minIndex, maxIndex,
- mode, start, short_count);
+ mode, start, short_count, indices3);
start += short_count;
count -= short_count;
@@ -572,7 +619,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe,
if (count) {
if (!r300_prepare_for_rendering(r300,
PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED,
- indexBuffer, 15, buffer_offset, indexBias))
+ indexBuffer, 19, buffer_offset, indexBias))
goto done;
}
} while (count);
@@ -593,8 +640,6 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
unsigned short_count;
- r300_update_derived_state(r300);
-
if (immd_is_good_idea(r300, count)) {
r300_emit_draw_arrays_immediate(r300, mode, start, count);
} else {
@@ -633,7 +678,8 @@ static void r300_draw_vbo(struct pipe_context* pipe,
unsigned count = info->count;
boolean translate = FALSE;
boolean indexed = info->indexed && r300->index_buffer.buffer;
- unsigned start_indexed = 0;
+ unsigned min_index = 0;
+ unsigned max_index = r300->vertex_buffer_max_index;
if (r300->skip_rendering) {
return;
@@ -643,43 +689,65 @@ static void r300_draw_vbo(struct pipe_context* pipe,
return;
}
- /* Index buffer range checking. */
if (indexed) {
- assert(r300->index_buffer.offset % r300->index_buffer.index_size == 0);
-
- /* Compute start for draw_elements, taking the offset into account. */
- start_indexed =
+ int real_min_index, real_max_index;
+ /* Compute the start for draw_elements, taking the offset into account. */
+ unsigned start_indexed =
info->start +
(r300->index_buffer.offset / r300->index_buffer.index_size);
+ assert(r300->index_buffer.offset % r300->index_buffer.index_size == 0);
+
+ /* Index buffer range checking. */
if ((start_indexed + count) * r300->index_buffer.index_size >
r300->index_buffer.buffer->width0) {
fprintf(stderr, "r300: Invalid index buffer range. Skipping rendering.\n");
return;
}
- }
- /* Set up fallback for incompatible vertex layout if needed. */
- if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) {
- r300_begin_vertex_translate(r300);
- translate = TRUE;
- }
+ min_index = MAX2(min_index, info->min_index);
+ max_index = MIN2(max_index, info->max_index);
+ real_min_index = (int)min_index - info->index_bias;
+ real_max_index = (int)max_index - info->index_bias;
- if (indexed) {
- r300_draw_range_elements(pipe,
- r300->index_buffer.buffer,
- r300->index_buffer.index_size,
- info->index_bias,
- info->min_index,
- info->max_index,
- info->mode,
- start_indexed,
- count);
+ if (max_index >= (1 << 24) - 1) {
+ fprintf(stderr, "r300: Invalid max_index: %i. Skipping rendering...\n", max_index);
+ return;
+ }
+
+ r300_update_derived_state(r300);
+
+ /* Set up the fallback for an incompatible vertex layout if needed. */
+ if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) {
+ r300_begin_vertex_translate(r300, real_min_index, real_max_index);
+ translate = TRUE;
+ }
+
+ /* Upload vertex buffers. */
+ if (r300->any_user_vbs) {
+ r300_upload_user_buffers(r300, real_min_index, real_max_index);
+ }
+
+ r300_draw_range_elements(pipe, info->index_bias, min_index, max_index,
+ info->mode, start_indexed, count);
} else {
- r300_draw_arrays(pipe,
- info->mode,
- info->start,
- count);
+ min_index = MAX2(min_index, info->start);
+ max_index = MIN2(max_index, info->start + count - 1);
+
+ r300_update_derived_state(r300);
+
+ /* Set up the fallback for an incompatible vertex layout if needed. */
+ if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) {
+ r300_begin_vertex_translate(r300, min_index, max_index);
+ translate = TRUE;
+ }
+
+ /* Upload vertex buffers. */
+ if (r300->any_user_vbs) {
+ r300_upload_user_buffers(r300, min_index, max_index);
+ }
+
+ r300_draw_arrays(pipe, info->mode, info->start, count);
}
if (translate) {
@@ -744,14 +812,13 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
for (i = 0; i < r300->vertex_buffer_count; i++) {
if (r300->vertex_buffer[i].buffer) {
- pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer,
- vb_transfer[i]);
+ pipe_buffer_unmap(pipe, vb_transfer[i]);
draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
}
}
if (indexed) {
- pipe_buffer_unmap(pipe, r300->index_buffer.buffer, ib_transfer);
+ pipe_buffer_unmap(pipe, ib_transfer);
draw_set_mapped_index_buffer(r300->draw, NULL);
}
}
@@ -810,6 +877,7 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render,
R300_MAX_DRAW_VBO_SIZE);
r300->draw_vbo_offset = 0;
r300->draw_vbo_size = R300_MAX_DRAW_VBO_SIZE;
+ r300->validate_buffers = TRUE;
}
r300render->vertex_size = vertex_size;
@@ -850,7 +918,7 @@ static void r300_render_unmap_vertices(struct vbuf_render* render,
r300render->vbo_max_used = MAX2(r300render->vbo_max_used,
r300render->vertex_size * (max + 1));
- pipe_buffer_unmap(context, r300->vbo, r300render->vbo_transfer);
+ pipe_buffer_unmap(context, r300render->vbo_transfer);
r300render->vbo_transfer = NULL;
}
@@ -967,7 +1035,7 @@ static void r300_render_draw_elements(struct vbuf_render* render,
end_cs_dwords = r300_get_num_cs_end_dwords(r300);
while (count) {
- free_dwords = r300->cs->ndw - r300->cs->cdw;
+ free_dwords = R300_MAX_CMDBUF_DWORDS - r300->cs->cdw;
short_count = MIN2(count, (free_dwords - end_cs_dwords - 6) * 2);
@@ -1015,8 +1083,7 @@ static struct vbuf_render* r300_render_create(struct r300_context* r300)
r300render->r300 = r300;
- /* XXX find real numbers plz */
- r300render->base.max_vertex_buffer_bytes = 128 * 1024;
+ r300render->base.max_vertex_buffer_bytes = 1024 * 1024;
r300render->base.max_indices = 16 * 1024;
r300render->base.get_vertex_info = r300_render_get_vertex_info;
@@ -1088,6 +1155,8 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter,
const float zeros[4] = {0, 0, 0, 0};
CS_LOCALS(r300);
+ r300->context.set_vertex_buffers(&r300->context, 0, NULL);
+
if (type == UTIL_BLITTER_ATTRIB_TEXCOORD)
r300->sprite_coord_enable = 1;
@@ -1144,37 +1213,45 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter,
done:
/* Restore the state. */
- r300->clip_state.dirty = TRUE;
- r300->rs_state.dirty = TRUE;
- r300->viewport_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->clip_state);
+ r300_mark_atom_dirty(r300, &r300->rs_state);
+ r300_mark_atom_dirty(r300, &r300->viewport_state);
r300->sprite_coord_enable = last_sprite_coord_enable;
}
static void r300_resource_resolve(struct pipe_context* pipe,
struct pipe_resource* dest,
- struct pipe_subresource subdest,
+ unsigned dst_layer,
struct pipe_resource* src,
- struct pipe_subresource subsrc)
+ unsigned src_layer)
{
struct r300_context* r300 = r300_context(pipe);
+ struct pipe_surface* srcsurf, surf_tmpl;
struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
- struct pipe_surface* srcsurf = src->screen->get_tex_surface(src->screen,
- src, subsrc.face, subsrc.level, 0, 0);
float color[] = {0, 0, 0, 0};
+ memset(&surf_tmpl, 0, sizeof(surf_tmpl));
+ surf_tmpl.format = src->format;
+ surf_tmpl.usage = 0; /* not really a surface hence no bind flags */
+ surf_tmpl.u.tex.level = 0; /* msaa resources cannot have mipmaps */
+ surf_tmpl.u.tex.first_layer = src_layer;
+ surf_tmpl.u.tex.last_layer = src_layer;
+ srcsurf = pipe->create_surface(pipe, src, &surf_tmpl);
+ surf_tmpl.format = dest->format;
+ surf_tmpl.u.tex.first_layer = dst_layer;
+ surf_tmpl.u.tex.last_layer = dst_layer;
+
DBG(r300, DBG_DRAW, "r300: Resolving resource...\n");
/* Enable AA resolve. */
- aa->dest = r300_surface(
- dest->screen->get_tex_surface(dest->screen, dest, subdest.face,
- subdest.level, 0, 0));
+ aa->dest = r300_surface(pipe->create_surface(pipe, dest, &surf_tmpl));
aa->aaresolve_ctl =
R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE |
R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE;
r300->aa_state.size = 12;
- r300->aa_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->aa_state);
/* Resolve the surface. */
r300->context.clear_render_target(pipe,
@@ -1183,7 +1260,7 @@ static void r300_resource_resolve(struct pipe_context* pipe,
/* Disable AA resolve. */
aa->aaresolve_ctl = 0;
r300->aa_state.size = 4;
- r300->aa_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->aa_state);
pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL);
pipe_surface_reference((struct pipe_surface**)&aa->dest, NULL);
diff --git a/src/gallium/drivers/r300/r300_render_stencilref.c b/src/gallium/drivers/r300/r300_render_stencilref.c
index 1f035d64a2..747594afaf 100644
--- a/src/gallium/drivers/r300/r300_render_stencilref.c
+++ b/src/gallium/drivers/r300/r300_render_stencilref.c
@@ -66,7 +66,7 @@ static void r300_stencilref_begin(struct r300_context *r300)
/* We *cull* pixels, therefore no need to mask out the bits. */
rs->cb_main[rs->cull_mode_index] |= R300_CULL_BACK;
- r300->rs_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->rs_state);
}
/* Set drawing for back faces. */
@@ -80,8 +80,8 @@ static void r300_stencilref_switch_side(struct r300_context *r300)
dsa->stencil_ref_mask = dsa->stencil_ref_bf;
r300->stencil_ref.ref_value[0] = r300->stencil_ref.ref_value[1];
- r300->rs_state.dirty = TRUE;
- r300->dsa_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->rs_state);
+ r300_mark_atom_dirty(r300, &r300->dsa_state);
}
/* Restore the original state. */
@@ -96,8 +96,8 @@ static void r300_stencilref_end(struct r300_context *r300)
dsa->stencil_ref_mask = sr->zb_stencilrefmask;
r300->stencil_ref.ref_value[0] = sr->ref_value_front;
- r300->rs_state.dirty = TRUE;
- r300->dsa_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->rs_state);
+ r300_mark_atom_dirty(r300, &r300->dsa_state);
}
static void r300_stencilref_draw_vbo(struct pipe_context *pipe,
diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c
index 9247064508..c48062c808 100644
--- a/src/gallium/drivers/r300/r300_render_translate.c
+++ b/src/gallium/drivers/r300/r300_render_translate.c
@@ -31,7 +31,10 @@
#include "translate/translate.h"
#include "util/u_index_modify.h"
-void r300_begin_vertex_translate(struct r300_context *r300)
+/* XXX Optimization: use min_index and translate only that range. */
+/* XXX Use the uploader. */
+void r300_begin_vertex_translate(struct r300_context *r300,
+ int min_index, int max_index)
{
struct pipe_context *pipe = &r300->context;
struct translate_key key = {0};
@@ -44,6 +47,7 @@ void r300_begin_vertex_translate(struct r300_context *r300)
struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer;
struct pipe_resource *out_buffer;
unsigned i, num_verts;
+ unsigned slot;
/* Initialize the translate key, i.e. the recipe how vertices should be
* translated. */
@@ -108,12 +112,12 @@ void r300_begin_vertex_translate(struct r300_context *r300)
vb_map[i] = pipe_buffer_map(pipe, vb->buffer,
PIPE_TRANSFER_READ, &vb_transfer[i]);
- tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index);
+ tr->set_buffer(tr, i, vb_map[i], vb->stride, max_index);
}
}
/* Create and map the output buffer. */
- num_verts = r300->vertex_buffer_max_index + 1;
+ num_verts = max_index + 1;
out_buffer = pipe_buffer_create(&r300->screen->screen,
PIPE_BIND_VERTEX_BUFFER,
@@ -128,26 +132,30 @@ void r300_begin_vertex_translate(struct r300_context *r300)
/* Unmap all buffers. */
for (i = 0; i < r300->vertex_buffer_count; i++) {
if (vb_translated[i]) {
- pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer,
- vb_transfer[i]);
+ pipe_buffer_unmap(pipe, vb_transfer[i]);
}
}
- pipe_buffer_unmap(pipe, out_buffer, out_transfer);
+ pipe_buffer_unmap(pipe, out_transfer);
/* Setup the new vertex buffer in the first free slot. */
+ slot = ~0;
for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i];
if (!vb->buffer) {
- pipe_resource_reference(&vb->buffer, out_buffer);
+ pipe_resource_reference(&r300->valid_vertex_buffer[i], out_buffer);
vb->buffer_offset = 0;
- vb->max_index = num_verts - 1;
vb->stride = key.output_stride;
- r300->tran.vb_slot = i;
+ slot = i;
+ /* XXX probably need to preserve the real count for u_blitter_save_*. */
+ r300->vertex_buffer_count = MAX2(r300->vertex_buffer_count, i+1);
+ r300->validate_buffers = TRUE;
break;
}
}
+ /* XXX This may fail. */
+ assert(slot != ~0);
/* Save and replace vertex elements. */
{
@@ -161,7 +169,7 @@ void r300_begin_vertex_translate(struct r300_context *r300)
new_velems[i].instance_divisor = ve->velem[i].instance_divisor;
new_velems[i].src_format = te->output_format;
new_velems[i].src_offset = te->output_offset;
- new_velems[i].vertex_buffer_index = r300->tran.vb_slot;
+ new_velems[i].vertex_buffer_index = slot;
} else {
memcpy(&new_velems[i], &ve->velem[i],
sizeof(struct pipe_vertex_element));
@@ -183,12 +191,9 @@ void r300_end_vertex_translate(struct r300_context *r300)
/* Restore vertex elements. */
pipe->bind_vertex_elements_state(pipe, r300->tran.saved_velems);
pipe->delete_vertex_elements_state(pipe, r300->tran.new_velems);
-
- /* Delete the now-unused VBO. */
- pipe_resource_reference(&r300->vertex_buffer[r300->tran.vb_slot].buffer,
- NULL);
}
+/* XXX Use the uploader. */
void r300_translate_index_buffer(struct r300_context *r300,
struct pipe_resource **index_buffer,
unsigned *index_size, unsigned index_offset,
@@ -199,12 +204,14 @@ void r300_translate_index_buffer(struct r300_context *r300,
util_shorten_ubyte_elts(&r300->context, index_buffer, index_offset, *start, count);
*index_size = 2;
*start = 0;
+ r300->validate_buffers = TRUE;
break;
case 2:
- if (*start % 2 != 0 || index_offset) {
+ if (index_offset) {
util_rebuild_ushort_elts(&r300->context, index_buffer, index_offset, *start, count);
*start = 0;
+ r300->validate_buffers = TRUE;
}
break;
@@ -212,6 +219,7 @@ void r300_translate_index_buffer(struct r300_context *r300,
if (index_offset) {
util_rebuild_uint_elts(&r300->context, index_buffer, index_offset, *start, count);
*start = 0;
+ r300->validate_buffers = TRUE;
}
break;
}
diff --git a/src/gallium/drivers/r300/r300_resource.c b/src/gallium/drivers/r300/r300_resource.c
index f6f33028dc..dd1df97059 100644
--- a/src/gallium/drivers/r300/r300_resource.c
+++ b/src/gallium/drivers/r300/r300_resource.c
@@ -58,6 +58,8 @@ void r300_init_resource_functions(struct r300_context *r300)
r300->context.transfer_destroy = u_transfer_destroy_vtbl;
r300->context.transfer_inline_write = u_transfer_inline_write_vtbl;
r300->context.is_resource_referenced = u_is_resource_referenced_vtbl;
+ r300->context.create_surface = r300_create_surface;
+ r300->context.surface_destroy = r300_surface_destroy;
}
void r300_init_screen_resource_functions(struct r300_screen *r300screen)
@@ -67,7 +69,4 @@ void r300_init_screen_resource_functions(struct r300_screen *r300screen)
r300screen->screen.resource_get_handle = u_resource_get_handle_vtbl;
r300screen->screen.resource_destroy = u_resource_destroy_vtbl;
r300screen->screen.user_buffer_create = r300_user_buffer_create;
-
- r300screen->screen.get_tex_surface = r300_get_tex_surface;
- r300screen->screen.tex_surface_destroy = r300_tex_surface_destroy;
}
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 5332866188..c75aeaa10a 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -34,6 +34,10 @@
#include "draw/draw_context.h"
+#ifdef HAVE_LLVM
+#include "gallivm/lp_bld_init.h"
+#endif
+
/* Return the identifier behind whom the brave coders responsible for this
* amalgamation of code, sweat, and duct tape, routinely obscure their names.
*
@@ -116,8 +120,9 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
- case PIPE_CAP_TEXTURE_SWIZZLE:
return 1;
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ return util_format_s3tc_enabled ? r300screen->caps.dxtc_swizzle : 1;
/* Unsupported features (boolean caps). */
case PIPE_CAP_TIMER_QUERY:
@@ -308,7 +313,9 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
unsigned usage,
unsigned geom_flags)
{
+ struct r300_winsys_screen *rws = r300_screen(screen)->rws;
uint32_t retval = 0;
+ boolean drm_2_8_0 = rws->get_value(rws, R300_VID_DRM_2_8_0);
boolean is_r500 = r300_screen(screen)->caps.is_r500;
boolean is_r400 = r300_screen(screen)->caps.is_r400;
boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM ||
@@ -362,7 +369,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED)) &&
/* 2101010 cannot be rendered to on non-r5xx. */
- (is_r500 || !is_color2101010) &&
+ (!is_color2101010 || (is_r500 && drm_2_8_0)) &&
r300_is_colorbuffer_format_supported(format)) {
retval |= usage &
(PIPE_BIND_RENDER_TARGET |
@@ -399,7 +406,7 @@ static void r300_destroy_screen(struct pipe_screen* pscreen)
struct r300_screen* r300screen = r300_screen(pscreen);
struct r300_winsys_screen *rws = r300_winsys_screen(pscreen);
- util_mempool_destroy(&r300screen->pool_buffers);
+ util_slab_destroy(&r300screen->pool_buffers);
if (rws)
rws->destroy(rws);
@@ -456,9 +463,13 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws)
r300_init_debug(r300screen);
r300_parse_chipset(&r300screen->caps);
- util_mempool_create(&r300screen->pool_buffers,
- sizeof(struct r300_buffer), 64,
- UTIL_MEMPOOL_SINGLETHREADED);
+ r300screen->caps.index_bias_supported =
+ r300screen->caps.is_r500 &&
+ rws->get_value(rws, R300_VID_DRM_2_3_0);
+
+ util_slab_create(&r300screen->pool_buffers,
+ sizeof(struct r300_buffer), 64,
+ UTIL_SLAB_SINGLETHREADED);
r300screen->rws = rws;
r300screen->screen.winsys = (struct pipe_winsys*)rws;
@@ -479,5 +490,9 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws)
util_format_s3tc_init();
+#ifdef HAVE_LLVM
+ lp_build_init();
+#endif
+
return &r300screen->screen;
}
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 8b7f1fab61..752f53b757 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -28,7 +28,7 @@
#include "r300_chipset.h"
-#include "util/u_mempool.h"
+#include "util/u_slab.h"
#include <stdio.h>
@@ -44,7 +44,7 @@ struct r300_screen {
struct r300_capabilities caps;
/* Memory pools. */
- struct util_mempool pool_buffers;
+ struct util_slab_mempool pool_buffers;
/** Combination of DBG_xxx flags */
unsigned debug;
@@ -93,6 +93,7 @@ r300_winsys_screen(struct pipe_screen *screen) {
#define DBG_CBZB (1 << 11)
#define DBG_HYPERZ (1 << 12)
#define DBG_SCISSOR (1 << 13)
+#define DBG_UPLOAD (1 << 14)
/* Features. */
#define DBG_ANISOHQ (1 << 16)
#define DBG_NO_TILING (1 << 17)
@@ -101,7 +102,6 @@ r300_winsys_screen(struct pipe_screen *screen) {
#define DBG_NO_OPT (1 << 20)
#define DBG_NO_CBZB (1 << 21)
/* Statistics. */
-#define DBG_STATS (1 << 24)
#define DBG_P_STAT (1 << 25)
/*@}*/
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c
index 37a080ba48..cc3c1d7687 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.c
+++ b/src/gallium/drivers/r300/r300_screen_buffer.c
@@ -40,10 +40,10 @@ unsigned r300_buffer_is_referenced(struct pipe_context *context,
struct r300_context *r300 = r300_context(context);
struct r300_buffer *rbuf = r300_buffer(buf);
- if (r300_buffer_is_user_buffer(buf))
+ if (r300_is_user_buffer(buf))
return PIPE_UNREFERENCED;
- if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->buf, domain))
+ if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf, domain))
return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
return PIPE_UNREFERENCED;
@@ -51,76 +51,86 @@ unsigned r300_buffer_is_referenced(struct pipe_context *context,
static unsigned r300_buffer_is_referenced_by_cs(struct pipe_context *context,
struct pipe_resource *buf,
- unsigned face, unsigned level)
+ unsigned level, int layer)
{
return r300_buffer_is_referenced(context, buf, R300_REF_CS);
}
-/* External helper, not required to implent u_resource_vtbl:
- */
-int r300_upload_index_buffer(struct r300_context *r300,
- struct pipe_resource **index_buffer,
- unsigned index_size,
- unsigned start,
- unsigned count,
- unsigned *out_offset)
+void r300_upload_index_buffer(struct r300_context *r300,
+ struct pipe_resource **index_buffer,
+ unsigned index_size, unsigned *start,
+ unsigned count)
{
- struct pipe_resource *upload_buffer = NULL;
- unsigned index_offset = start * index_size;
- int ret = 0;
-
- if (r300_buffer_is_user_buffer(*index_buffer)) {
- ret = u_upload_buffer(r300->upload_ib,
- index_offset,
- count * index_size,
- *index_buffer,
- &index_offset,
- &upload_buffer);
- if (ret) {
- goto done;
- }
- *index_buffer = upload_buffer;
- *out_offset = index_offset / index_size;
- } else
- *out_offset = start;
-
- done:
- // if (upload_buffer)
- // pipe_resource_reference(&upload_buffer, NULL);
- return ret;
+ unsigned index_offset;
+ uint8_t *ptr = r300_buffer(*index_buffer)->user_buffer;
+ boolean flushed;
+
+ *index_buffer = NULL;
+
+ u_upload_data(r300->upload_ib,
+ 0, count * index_size,
+ ptr + (*start * index_size),
+ &index_offset,
+ index_buffer, &flushed);
+
+ *start = index_offset / index_size;
+
+ if (flushed || !r300->upload_ib_validated) {
+ r300->upload_ib_validated = FALSE;
+ r300->validate_buffers = TRUE;
+ }
}
-/* External helper, not required to implement u_resource_vtbl:
- */
-int r300_upload_user_buffers(struct r300_context *r300)
+void r300_upload_user_buffers(struct r300_context *r300,
+ int min_index, int max_index)
{
- enum pipe_error ret = PIPE_OK;
- int i, nr;
-
- nr = r300->velems->count;
+ int i, nr = r300->velems->count;
+ unsigned count = max_index + 1 - min_index;
+ boolean flushed;
+ boolean uploaded[16] = {0};
for (i = 0; i < nr; i++) {
- struct pipe_vertex_buffer *vb =
- &r300->vertex_buffer[r300->velems->velem[i].vertex_buffer_index];
-
- if (r300_buffer_is_user_buffer(vb->buffer)) {
- struct pipe_resource *upload_buffer = NULL;
- unsigned offset = 0; /*vb->buffer_offset * 4;*/
- unsigned size = vb->buffer->width0;
- unsigned upload_offset;
- ret = u_upload_buffer(r300->upload_vb,
- offset, size,
- vb->buffer,
- &upload_offset, &upload_buffer);
- if (ret)
- return ret;
-
- pipe_resource_reference(&vb->buffer, NULL);
- vb->buffer = upload_buffer;
- vb->buffer_offset = upload_offset;
+ unsigned index = r300->velems->velem[i].vertex_buffer_index;
+ struct pipe_vertex_buffer *vb = &r300->vertex_buffer[index];
+ struct r300_buffer *userbuf = r300_buffer(vb->buffer);
+
+ if (userbuf && userbuf->user_buffer && !uploaded[index]) {
+ unsigned first, size;
+
+ if (vb->stride) {
+ first = vb->stride * min_index;
+ size = vb->stride * count;
+ } else {
+ first = 0;
+ size = r300->velems->hw_format_size[i];
+ }
+
+ DBG(r300, DBG_UPLOAD,
+ "Uploading %i bytes, index: %i, buffer: %p, userptr: %p "
+ "offset: %i, stride: %i.\n",
+ size, index, userbuf, userbuf->user_buffer,
+ vb->buffer_offset, vb->stride);
+
+ u_upload_data(r300->upload_vb, first, size,
+ userbuf->user_buffer + first,
+ &vb->buffer_offset,
+ &r300->valid_vertex_buffer[index],
+ &flushed);
+
+ vb->buffer_offset -= first;
+
+ r300->vertex_arrays_dirty = TRUE;
+
+ if (flushed || !r300->upload_vb_validated) {
+ r300->upload_vb_validated = FALSE;
+ r300->validate_buffers = TRUE;
+ }
+ uploaded[index] = TRUE;
+ } else {
+ assert(r300->valid_vertex_buffer[index]);
}
}
- return ret;
+ DBG(r300, DBG_UPLOAD, "-------\n");
}
static void r300_buffer_destroy(struct pipe_screen *screen,
@@ -136,26 +146,26 @@ static void r300_buffer_destroy(struct pipe_screen *screen,
if (rbuf->buf)
rws->buffer_reference(rws, &rbuf->buf, NULL);
- util_mempool_free(&r300screen->pool_buffers, rbuf);
+ util_slab_free(&r300screen->pool_buffers, rbuf);
}
static struct pipe_transfer*
-r300_default_get_transfer(struct pipe_context *context,
- struct pipe_resource *resource,
- struct pipe_subresource sr,
- unsigned usage,
- const struct pipe_box *box)
+r300_buffer_get_transfer(struct pipe_context *context,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
{
struct r300_context *r300 = r300_context(context);
struct pipe_transfer *transfer =
- util_mempool_malloc(&r300->pool_transfers);
+ util_slab_alloc(&r300->pool_transfers);
transfer->resource = resource;
- transfer->sr = sr;
+ transfer->level = level;
transfer->usage = usage;
transfer->box = *box;
transfer->stride = 0;
- transfer->slice_stride = 0;
+ transfer->layer_stride = 0;
transfer->data = NULL;
/* Note strides are zero, this is ok for buffers, but not for
@@ -164,11 +174,11 @@ r300_default_get_transfer(struct pipe_context *context,
return transfer;
}
-static void r300_default_transfer_destroy(struct pipe_context *pipe,
- struct pipe_transfer *transfer)
+static void r300_buffer_transfer_destroy(struct pipe_context *pipe,
+ struct pipe_transfer *transfer)
{
struct r300_context *r300 = r300_context(pipe);
- util_mempool_free(&r300->pool_transfers, transfer);
+ util_slab_free(&r300->pool_transfers, transfer);
}
static void *
@@ -180,46 +190,17 @@ r300_buffer_transfer_map( struct pipe_context *pipe,
struct r300_winsys_screen *rws = r300screen->rws;
struct r300_buffer *rbuf = r300_buffer(transfer->resource);
uint8_t *map;
- boolean flush = FALSE;
- unsigned i;
if (rbuf->user_buffer)
return (uint8_t *) rbuf->user_buffer + transfer->box.x;
if (rbuf->constant_buffer)
return (uint8_t *) rbuf->constant_buffer + transfer->box.x;
- /* check if the mapping is to a range we already flushed */
- if (transfer->usage & PIPE_TRANSFER_DISCARD) {
- for (i = 0; i < rbuf->num_ranges; i++) {
- if ((transfer->box.x >= rbuf->ranges[i].start) &&
- (transfer->box.x < rbuf->ranges[i].end))
- flush = TRUE;
-
- if (flush) {
- /* unreference this hw buffer and allocate a new one */
- rws->buffer_reference(rws, &rbuf->buf, NULL);
-
- rbuf->num_ranges = 0;
- rbuf->buf =
- r300screen->rws->buffer_create(r300screen->rws,
- rbuf->b.b.width0, 16,
- rbuf->b.b.bind,
- rbuf->b.b.usage,
- rbuf->domain);
- break;
- }
- }
- }
-
map = rws->buffer_map(rws, rbuf->buf, r300->cs, transfer->usage);
if (map == NULL)
return NULL;
- /* map_buffer() returned a pointer to the beginning of the buffer,
- * but transfers are expected to return a pointer to just the
- * region specified in the box.
- */
return map + transfer->box.x;
}
@@ -227,30 +208,7 @@ static void r300_buffer_transfer_flush_region( struct pipe_context *pipe,
struct pipe_transfer *transfer,
const struct pipe_box *box)
{
- struct r300_buffer *rbuf = r300_buffer(transfer->resource);
- unsigned i;
- unsigned offset = transfer->box.x + box->x;
- unsigned length = box->width;
-
- assert(box->x + box->width <= transfer->box.width);
-
- if (rbuf->user_buffer)
- return;
- if (rbuf->constant_buffer)
- return;
-
- /* mark the range as used */
- for(i = 0; i < rbuf->num_ranges; ++i) {
- if(offset <= rbuf->ranges[i].end && rbuf->ranges[i].start <= (offset+box->width)) {
- rbuf->ranges[i].start = MIN2(rbuf->ranges[i].start, offset);
- rbuf->ranges[i].end = MAX2(rbuf->ranges[i].end, (offset+length));
- return;
- }
- }
-
- rbuf->ranges[rbuf->num_ranges].start = offset;
- rbuf->ranges[rbuf->num_ranges].end = offset+length;
- rbuf->num_ranges++;
+ /* no-op */
}
static void r300_buffer_transfer_unmap( struct pipe_context *pipe,
@@ -265,17 +223,45 @@ static void r300_buffer_transfer_unmap( struct pipe_context *pipe,
}
}
-struct u_resource_vtbl r300_buffer_vtbl =
+static void r300_buffer_transfer_inline_write(struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned layer_stride)
+{
+ struct r300_context *r300 = r300_context(pipe);
+ struct r300_winsys_screen *rws = r300->screen->rws;
+ struct r300_buffer *rbuf = r300_buffer(resource);
+ uint8_t *map = NULL;
+
+ if (rbuf->constant_buffer) {
+ memcpy(rbuf->constant_buffer + box->x, data, box->width);
+ return;
+ }
+ assert(rbuf->user_buffer == NULL);
+
+ map = rws->buffer_map(rws, rbuf->buf, r300->cs,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage);
+
+ memcpy(map + box->x, data, box->width);
+
+ rws->buffer_unmap(rws, rbuf->buf);
+}
+
+struct u_resource_vtbl r300_buffer_vtbl =
{
u_default_resource_get_handle, /* get_handle */
r300_buffer_destroy, /* resource_destroy */
r300_buffer_is_referenced_by_cs, /* is_buffer_referenced */
- r300_default_get_transfer, /* get_transfer */
- r300_default_transfer_destroy, /* transfer_destroy */
+ r300_buffer_get_transfer, /* get_transfer */
+ r300_buffer_transfer_destroy, /* transfer_destroy */
r300_buffer_transfer_map, /* transfer_map */
r300_buffer_transfer_flush_region, /* transfer_flush_region */
r300_buffer_transfer_unmap, /* transfer_unmap */
- u_default_transfer_inline_write /* transfer_inline_write */
+ r300_buffer_transfer_inline_write /* transfer_inline_write */
};
struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
@@ -285,7 +271,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
struct r300_buffer *rbuf;
unsigned alignment = 16;
- rbuf = util_mempool_malloc(&r300screen->pool_buffers);
+ rbuf = util_slab_alloc(&r300screen->pool_buffers);
rbuf->magic = R300_BUFFER_MAGIC;
@@ -294,7 +280,6 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
pipe_reference_init(&rbuf->b.b.reference, 1);
rbuf->b.b.screen = screen;
rbuf->domain = R300_DOMAIN_GTT;
- rbuf->num_ranges = 0;
rbuf->buf = NULL;
rbuf->constant_buffer = NULL;
rbuf->user_buffer = NULL;
@@ -310,9 +295,11 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
rbuf->b.b.width0, alignment,
rbuf->b.b.bind, rbuf->b.b.usage,
rbuf->domain);
+ rbuf->cs_buf =
+ r300screen->rws->buffer_get_cs_handle(r300screen->rws, rbuf->buf);
if (!rbuf->buf) {
- util_mempool_free(&r300screen->pool_buffers, rbuf);
+ util_slab_free(&r300screen->pool_buffers, rbuf);
return NULL;
}
@@ -320,14 +307,13 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
}
struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen,
- void *ptr,
- unsigned bytes,
+ void *ptr, unsigned size,
unsigned bind)
{
struct r300_screen *r300screen = r300_screen(screen);
struct r300_buffer *rbuf;
- rbuf = util_mempool_malloc(&r300screen->pool_buffers);
+ rbuf = util_slab_alloc(&r300screen->pool_buffers);
rbuf->magic = R300_BUFFER_MAGIC;
@@ -338,12 +324,12 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen,
rbuf->b.b.format = PIPE_FORMAT_R8_UNORM;
rbuf->b.b.usage = PIPE_USAGE_IMMUTABLE;
rbuf->b.b.bind = bind;
- rbuf->b.b.width0 = bytes;
+ rbuf->b.b.width0 = ~0;
rbuf->b.b.height0 = 1;
rbuf->b.b.depth0 = 1;
+ rbuf->b.b.array_size = 1;
rbuf->b.b.flags = 0;
rbuf->domain = R300_DOMAIN_GTT;
- rbuf->num_ranges = 0;
rbuf->buf = NULL;
rbuf->constant_buffer = NULL;
rbuf->user_buffer = ptr;
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h
index cafa9f96f2..58dec8539b 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.h
+++ b/src/gallium/drivers/r300/r300_screen_buffer.h
@@ -51,32 +51,30 @@ struct r300_buffer
uint32_t magic;
struct r300_winsys_buffer *buf;
+ struct r300_winsys_cs_buffer *cs_buf;
enum r300_buffer_domain domain;
- void *user_buffer;
- void *constant_buffer;
- struct r300_buffer_range ranges[R300_BUFFER_MAX_RANGES];
- unsigned num_ranges;
+ uint8_t *user_buffer;
+ uint8_t *constant_buffer;
};
/* Functions. */
-int r300_upload_user_buffers(struct r300_context *r300);
+void r300_upload_user_buffers(struct r300_context *r300,
+ int min_index, int max_index);
-int r300_upload_index_buffer(struct r300_context *r300,
- struct pipe_resource **index_buffer,
- unsigned index_size,
- unsigned start,
- unsigned count, unsigned *out_offset);
+void r300_upload_index_buffer(struct r300_context *r300,
+ struct pipe_resource **index_buffer,
+ unsigned index_size, unsigned *start,
+ unsigned count);
struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ);
struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen,
- void *ptr,
- unsigned bytes,
- unsigned usage);
+ void *ptr, unsigned size,
+ unsigned bind);
unsigned r300_buffer_is_referenced(struct pipe_context *context,
struct pipe_resource *buf,
@@ -86,14 +84,10 @@ unsigned r300_buffer_is_referenced(struct pipe_context *context,
static INLINE struct r300_buffer *r300_buffer(struct pipe_resource *buffer)
{
- if (buffer) {
- assert(((struct r300_buffer *)buffer)->magic == R300_BUFFER_MAGIC);
- return (struct r300_buffer *)buffer;
- }
- return NULL;
+ return (struct r300_buffer *)buffer;
}
-static INLINE boolean r300_buffer_is_user_buffer(struct pipe_resource *buffer)
+static INLINE boolean r300_is_user_buffer(struct pipe_resource *buffer)
{
return r300_buffer(buffer)->user_buffer ? true : false;
}
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 247c22216e..3a97b76a4c 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -52,7 +52,7 @@
#define UPDATE_STATE(cso, atom) \
if (cso != atom.state) { \
atom.state = cso; \
- atom.dirty = TRUE; \
+ r300_mark_atom_dirty(r300, &(atom)); \
}
static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA,
@@ -417,7 +417,7 @@ static void r300_set_blend_color(struct pipe_context* pipe,
END_CB;
}
- r300->blend_color_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->blend_color_state);
}
static void r300_set_clip_state(struct pipe_context* pipe,
@@ -446,7 +446,7 @@ static void r300_set_clip_state(struct pipe_context* pipe,
(state->depth_clamp ? R300_CLIP_DISABLE : 0));
END_CB;
- r300->clip_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->clip_state);
} else {
draw_set_clip_state(r300->draw, state);
}
@@ -594,7 +594,7 @@ static void r300_bind_dsa_state(struct pipe_context* pipe,
UPDATE_STATE(state, r300->dsa_state);
- r300->hyperz_state.dirty = TRUE; /* Will be updated before the emission. */
+ r300_mark_atom_dirty(r300, &r300->hyperz_state); /* Will be updated before the emission. */
r300_dsa_inject_stencilref(r300);
}
@@ -613,7 +613,7 @@ static void r300_set_stencil_ref(struct pipe_context* pipe,
r300->stencil_ref = *sr;
r300_dsa_inject_stencilref(r300);
- r300->dsa_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->dsa_state);
}
static void r300_tex_set_tiling_flags(struct r300_context *r300,
@@ -626,7 +626,7 @@ static void r300_tex_set_tiling_flags(struct r300_context *r300,
/* Tiling determines how DRM treats the buffer data.
* We must flush CS when changing it if the buffer is referenced. */
if (r300->rws->cs_is_buffer_referenced(r300->cs,
- tex->buffer, R300_REF_CS))
+ tex->cs_buffer, R300_REF_CS))
r300->context.flush(&r300->context, 0, NULL);
r300->rws->buffer_set_tiling(r300->rws, tex->buffer,
@@ -647,12 +647,12 @@ static void r300_fb_set_tiling_flags(struct r300_context *r300,
for (i = 0; i < state->nr_cbufs; i++) {
r300_tex_set_tiling_flags(r300,
r300_texture(state->cbufs[i]->texture),
- state->cbufs[i]->level);
+ state->cbufs[i]->u.tex.level);
}
if (state->zsbuf) {
r300_tex_set_tiling_flags(r300,
r300_texture(state->zsbuf->texture),
- state->zsbuf->level);
+ state->zsbuf->u.tex.level);
}
}
@@ -663,14 +663,14 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index,
struct r300_texture *rtex = r300_texture(tex);
fprintf(stderr,
- "r300: %s[%i] Dim: %ix%i, Offset: %i, ZSlice: %i, "
- "Face: %i, Level: %i, Format: %s\n"
+ "r300: %s[%i] Dim: %ix%i, Firstlayer: %i, "
+ "Lastlayer: %i, Level: %i, Format: %s\n"
"r300: TEX: Macro: %s, Micro: %s, Pitch: %i, "
"Dim: %ix%ix%i, LastLevel: %i, Format: %s\n",
- binding, index, surf->width, surf->height, surf->offset,
- surf->zslice, surf->face, surf->level,
+ binding, index, surf->width, surf->height,
+ surf->u.tex.first_layer, surf->u.tex.last_layer, surf->u.tex.level,
util_format_short_name(surf->format),
rtex->desc.macrotile[0] ? "YES" : " NO",
@@ -686,14 +686,23 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
struct pipe_framebuffer_state *state = r300->fb_state.state;
boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
- /* What is marked as dirty depends on the enum r300_fb_state_change. */
- r300->gpu_flush.dirty = TRUE;
- r300->fb_state.dirty = TRUE;
- r300->hyperz_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->gpu_flush);
+ r300_mark_atom_dirty(r300, &r300->fb_state);
+ /* What is marked as dirty depends on the enum r300_fb_state_change. */
if (change == R300_CHANGED_FB_STATE) {
- r300->aa_state.dirty = TRUE;
- r300->fb_state_pipelined.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->aa_state);
+ }
+
+ if (change == R300_CHANGED_FB_STATE ||
+ change == R300_CHANGED_CBZB_FLAG ||
+ change == R300_CHANGED_ZCLEAR_FLAG) {
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
+ }
+
+ if (change == R300_CHANGED_FB_STATE ||
+ change == R300_CHANGED_MULTIWRITE) {
+ r300_mark_atom_dirty(r300, &r300->fb_state_pipelined);
}
/* Now compute the fb_state atom size. */
@@ -738,11 +747,11 @@ static void
/* If nr_cbufs is changed from zero to non-zero or vice versa... */
if (!!old_state->nr_cbufs != !!state->nr_cbufs) {
- r300->blend_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->blend_state);
}
/* If zsbuf is set from NULL to non-NULL or vice versa.. */
if (!!old_state->zsbuf != !!state->zsbuf) {
- r300->dsa_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->dsa_state);
}
/* The tiling flags are dependent on the surface miplevel, unfortunately. */
@@ -751,6 +760,7 @@ static void
util_copy_framebuffer_state(r300->fb_state.state, state);
r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE);
+ r300->validate_buffers = TRUE;
r300->z_compression = false;
@@ -768,7 +778,7 @@ static void
struct r300_surface *zs_surf = r300_surface(state->zsbuf);
struct r300_texture *tex;
int compress = r300->screen->caps.is_rv350 ? RV350_Z_COMPRESS_88 : R300_Z_COMPRESS_44;
- int level = zs_surf->base.level;
+ int level = zs_surf->base.u.tex.level;
tex = r300_texture(zs_surf->base.texture);
@@ -795,7 +805,7 @@ static void
r300->zbuffer_bpp = zbuffer_bpp;
if (r300->polygon_offset_enabled)
- r300->rs_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->rs_state);
}
}
@@ -853,9 +863,9 @@ void r300_mark_fs_code_dirty(struct r300_context *r300)
{
struct r300_fragment_shader* fs = r300_fs(r300);
- r300->fs.dirty = TRUE;
- r300->fs_rc_constant_state.dirty = TRUE;
- r300->fs_constants.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->fs);
+ r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
+ r300_mark_atom_dirty(r300, &r300->fs_constants);
r300->fs.size = fs->shader->cb_code_size;
if (r300->screen->caps.is_r500) {
@@ -875,17 +885,26 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader;
+ struct pipe_framebuffer_state *fb = r300->fb_state.state;
+ boolean last_multi_write;
if (fs == NULL) {
r300->fs.state = NULL;
return;
}
+ last_multi_write = r300_fragment_shader_writes_all(r300_fs(r300));
+
r300->fs.state = fs;
r300_pick_fragment_shader(r300);
r300_mark_fs_code_dirty(r300);
- r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */
+ if (fb->nr_cbufs > 1 &&
+ last_multi_write != r300_fragment_shader_writes_all(fs)) {
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_MULTIWRITE);
+ }
+
+ r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */
}
/* Delete fragment shader state. */
@@ -1137,7 +1156,7 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
if (last_sprite_coord_enable != r300->sprite_coord_enable ||
last_two_sided_color != r300->two_sided_color) {
- r300->rs_block_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->rs_block_state);
}
}
@@ -1235,7 +1254,7 @@ static void r300_bind_sampler_states(struct pipe_context* pipe,
memcpy(state->sampler_states, states, sizeof(void*) * count);
state->sampler_state_count = count;
- r300->textures_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->textures_state);
}
static void r300_lacks_vertex_textures(struct pipe_context* pipe,
@@ -1297,29 +1316,27 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
}
for (i = 0; i < count; i++) {
- if (&state->sampler_views[i]->base != views[i]) {
- pipe_sampler_view_reference(
- (struct pipe_sampler_view**)&state->sampler_views[i],
- views[i]);
+ pipe_sampler_view_reference(
+ (struct pipe_sampler_view**)&state->sampler_views[i],
+ views[i]);
- if (!views[i]) {
- continue;
- }
+ if (!views[i]) {
+ continue;
+ }
- /* A new sampler view (= texture)... */
- dirty_tex = TRUE;
+ /* A new sampler view (= texture)... */
+ dirty_tex = TRUE;
- /* Set the texrect factor in the fragment shader.
+ /* Set the texrect factor in the fragment shader.
* Needed for RECT and NPOT fallback. */
- texture = r300_texture(views[i]->texture);
- if (texture->desc.is_npot) {
- r300->fs_rc_constant_state.dirty = TRUE;
- }
+ texture = r300_texture(views[i]->texture);
+ if (texture->desc.is_npot) {
+ r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
+ }
- state->sampler_views[i]->texcache_region =
+ state->sampler_views[i]->texcache_region =
r300_assign_texture_cache_region(view_index, real_num_views);
- view_index++;
- }
+ view_index++;
}
for (i = count; i < tex_units; i++) {
@@ -1332,10 +1349,11 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
state->sampler_view_count = count;
- r300->textures_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->textures_state);
+ r300->validate_buffers = TRUE;
if (dirty_tex) {
- r300->texture_cache_inval.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->texture_cache_inval);
}
}
@@ -1347,6 +1365,7 @@ r300_create_sampler_view(struct pipe_context *pipe,
struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view);
struct r300_texture *tex = r300_texture(texture);
boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500;
+ boolean dxtc_swizzle = r300_screen(pipe->screen)->caps.dxtc_swizzle;
if (view) {
view->base = *templ;
@@ -1363,7 +1382,8 @@ r300_create_sampler_view(struct pipe_context *pipe,
view->format = tex->tx_format;
view->format.format1 |= r300_translate_texformat(templ->format,
view->swizzle,
- is_r500);
+ is_r500,
+ dxtc_swizzle);
if (is_r500) {
view->format.format2 |= r500_tx_format_msb_bit(templ->format);
}
@@ -1388,7 +1408,7 @@ static void r300_set_scissor_state(struct pipe_context* pipe,
memcpy(r300->scissor_state.state, state,
sizeof(struct pipe_scissor_state));
- r300->scissor_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->scissor_state);
}
static void r300_set_viewport_state(struct pipe_context* pipe,
@@ -1434,9 +1454,9 @@ static void r300_set_viewport_state(struct pipe_context* pipe,
viewport->vte_control |= R300_VPORT_Z_OFFSET_ENA;
}
- r300->viewport_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->viewport_state);
if (r300->fs.state && r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED) {
- r300->fs_rc_constant_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
}
}
@@ -1445,15 +1465,15 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
const struct pipe_vertex_buffer* buffers)
{
struct r300_context* r300 = r300_context(pipe);
- struct pipe_vertex_buffer *vbo;
+ const struct pipe_vertex_buffer *vbo;
unsigned i, max_index = (1 << 24) - 1;
boolean any_user_buffer = FALSE;
+ boolean any_nonuser_buffer = FALSE;
struct pipe_vertex_buffer dummy_vb = {0};
/* There must be at least one vertex buffer set, otherwise it locks up. */
if (!count) {
dummy_vb.buffer = r300->dummy_vb;
- dummy_vb.max_index = r300->dummy_vb->width0 / 4;
buffers = &dummy_vb;
count = 1;
}
@@ -1480,34 +1500,41 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
}
for (i = 0; i < count; i++) {
- /* Why, yes, I AM casting away constness. How did you know? */
- vbo = (struct pipe_vertex_buffer*)&buffers[i];
+ vbo = &buffers[i];
/* Skip NULL buffers */
- if (!buffers[i].buffer) {
+ if (!vbo->buffer) {
continue;
}
- if (r300_buffer_is_user_buffer(vbo->buffer)) {
+ /* User buffers have no info about maximum index,
+ * we will have to compute it in draw_vbo. */
+ if (r300_is_user_buffer(vbo->buffer)) {
any_user_buffer = TRUE;
+ continue;
}
+ any_nonuser_buffer = TRUE;
- if (vbo->max_index == ~0) {
- /* if no VBO stride then only one vertex value so max index is 1 */
- /* should think about converting to VS constants like svga does */
- if (!vbo->stride)
- vbo->max_index = 1;
- else
- vbo->max_index =
- (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride;
- }
+ /* The stride of zero means we will be fetching only the first
+ * vertex, so don't care about max_index. */
+ if (!vbo->stride)
+ continue;
- max_index = MIN2(vbo->max_index, max_index);
+ /* Update the maximum index. */
+ {
+ unsigned vbo_max_index =
+ (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride;
+ max_index = MIN2(max_index, vbo_max_index);
+ }
}
r300->any_user_vbs = any_user_buffer;
r300->vertex_buffer_max_index = max_index;
-
+ r300->vertex_arrays_dirty = TRUE;
+ if (any_nonuser_buffer)
+ r300->validate_buffers = TRUE;
+ if (!any_user_buffer)
+ r300->upload_vb_validated = FALSE;
} else {
/* SW TCL. */
draw_set_vertex_buffers(r300->draw, count, buffers);
@@ -1515,16 +1542,25 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
/* Common code. */
for (i = 0; i < count; i++) {
+ vbo = &buffers[i];
+
/* Reference our buffer. */
- pipe_resource_reference(&r300->vertex_buffer[i].buffer, buffers[i].buffer);
+ pipe_resource_reference(&r300->vertex_buffer[i].buffer, vbo->buffer);
+ if (vbo->buffer && r300_is_user_buffer(vbo->buffer)) {
+ pipe_resource_reference(&r300->valid_vertex_buffer[i], NULL);
+ } else {
+ pipe_resource_reference(&r300->valid_vertex_buffer[i], vbo->buffer);
+ }
}
for (; i < r300->vertex_buffer_count; i++) {
/* Dereference any old buffers. */
pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL);
+ pipe_resource_reference(&r300->valid_vertex_buffer[i], NULL);
}
memcpy(r300->vertex_buffer, buffers,
- sizeof(struct pipe_vertex_buffer) * count);
+ sizeof(struct pipe_vertex_buffer) * count);
+
r300->vertex_buffer_count = count;
}
@@ -1533,20 +1569,23 @@ static void r300_set_index_buffer(struct pipe_context* pipe,
{
struct r300_context* r300 = r300_context(pipe);
- if (ib) {
+ if (ib && ib->buffer) {
pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer);
memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer));
+
+ if (r300->screen->caps.has_tcl &&
+ !r300_is_user_buffer(ib->buffer)) {
+ r300->validate_buffers = TRUE;
+ r300->upload_ib_validated = FALSE;
+ }
}
else {
pipe_resource_reference(&r300->index_buffer.buffer, NULL);
memset(&r300->index_buffer, 0, sizeof(r300->index_buffer));
}
- if (r300->screen->caps.has_tcl) {
- /* TODO make this more like a state */
- }
- else {
- draw_set_index_buffer(r300->draw, ib);
+ if (!r300->screen->caps.has_tcl) {
+ draw_set_index_buffer(r300->draw, ib);
}
}
@@ -1714,6 +1753,7 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state);
r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2;
+ r300->vertex_arrays_dirty = TRUE;
}
static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
@@ -1756,27 +1796,25 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
r300->vs_state.state = vs;
/* The majority of the RS block bits is dependent on the vertex shader. */
- r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */
+ r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */
if (r300->screen->caps.has_tcl) {
unsigned fc_op_dwords = r300->screen->caps.is_r500 ? 3 : 2;
- r300->vs_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->vs_state);
r300->vs_state.size =
vs->code.length + 9 +
- (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0) +
(vs->code.num_fc_ops ? vs->code.num_fc_ops * fc_op_dwords + 4 : 0);
- if (vs->externals_count) {
- r300->vs_constants.dirty = TRUE;
- r300->vs_constants.size = vs->externals_count * 4 + 3;
- } else {
- r300->vs_constants.size = 0;
- }
+ r300_mark_atom_dirty(r300, &r300->vs_constants);
+ r300->vs_constants.size =
+ 2 +
+ (vs->externals_count ? vs->externals_count * 4 + 3 : 0) +
+ (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0);
((struct r300_constant_buffer*)r300->vs_constants.state)->remap_table =
vs->code.constants_remap_table;
- r300->pvs_flush.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->pvs_flush);
} else {
draw_bind_vertex_shader(r300->draw,
(struct draw_vertex_shader*)vs->draw_vs);
@@ -1807,6 +1845,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe,
{
struct r300_context* r300 = r300_context(pipe);
struct r300_constant_buffer *cbuf;
+ struct r300_buffer *rbuf = r300_buffer(buf);
uint32_t *mapped;
switch (shader) {
@@ -1817,33 +1856,48 @@ static void r300_set_constant_buffer(struct pipe_context *pipe,
cbuf = (struct r300_constant_buffer*)r300->fs_constants.state;
break;
default:
- assert(0);
return;
}
- if (buf == NULL || buf->width0 == 0 ||
- (mapped = r300_buffer(buf)->constant_buffer) == NULL) {
+ if (buf == NULL || buf->width0 == 0)
+ return;
+
+ if (rbuf->user_buffer)
+ mapped = (uint32_t*)rbuf->user_buffer;
+ else if (rbuf->constant_buffer)
+ mapped = (uint32_t*)rbuf->constant_buffer;
+ else
return;
- }
if (shader == PIPE_SHADER_FRAGMENT ||
(shader == PIPE_SHADER_VERTEX && r300->screen->caps.has_tcl)) {
- assert((buf->width0 % (4 * sizeof(float))) == 0);
- cbuf->ptr = mapped + index*4;
+ cbuf->ptr = mapped;
}
if (shader == PIPE_SHADER_VERTEX) {
if (r300->screen->caps.has_tcl) {
- if (r300->vs_constants.size) {
- r300->vs_constants.dirty = TRUE;
+ struct r300_vertex_shader *vs =
+ (struct r300_vertex_shader*)r300->vs_state.state;
+
+ if (!vs) {
+ cbuf->buffer_base = 0;
+ return;
+ }
+
+ cbuf->buffer_base = r300->vs_const_base;
+ r300->vs_const_base += vs->code.constants.Count;
+ if (r300->vs_const_base > R500_MAX_PVS_CONST_VECS) {
+ r300->vs_const_base = vs->code.constants.Count;
+ cbuf->buffer_base = 0;
+ r300_mark_atom_dirty(r300, &r300->pvs_flush);
}
- r300->pvs_flush.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->vs_constants);
} else if (r300->draw) {
draw_set_mapped_constant_buffer(r300->draw, PIPE_SHADER_VERTEX,
0, mapped, buf->width0);
}
} else if (shader == PIPE_SHADER_FRAGMENT) {
- r300->fs_constants.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->fs_constants);
}
}
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 1cff3483b5..95be7849f8 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -32,7 +32,6 @@
#include "r300_hyperz.h"
#include "r300_screen.h"
#include "r300_shader_semantics.h"
-#include "r300_state_derived.h"
#include "r300_state_inlines.h"
#include "r300_texture.h"
#include "r300_vs.h"
@@ -193,7 +192,7 @@ static void r300_swtcl_vertex_psc(struct r300_context *r300)
(R300_LAST_VEC << (i & 1 ? 16 : 0));
vstream->count = (i >> 1) + 1;
- r300->vertex_stream_state.dirty = TRUE;
+ r300_mark_atom_dirty(r300, &r300->vertex_stream_state);
r300->vertex_stream_state.size = (1 + vstream->count) * 2;
}
@@ -592,7 +591,8 @@ static void r300_update_rs_block(struct r300_context *r300)
}
static uint32_t r300_get_border_color(enum pipe_format format,
- const float border[4])
+ const float border[4],
+ boolean is_r500)
{
const struct util_format_description *desc;
float border_swizzled[4] = {0};
@@ -601,6 +601,24 @@ static uint32_t r300_get_border_color(enum pipe_format format,
desc = util_format_description(format);
+ /* Do depth formats first. */
+ if (util_format_is_depth_or_stencil(format)) {
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ return util_pack_z(PIPE_FORMAT_Z16_UNORM, border[0]);
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ if (is_r500) {
+ return util_pack_z(PIPE_FORMAT_X8Z24_UNORM, border[0]);
+ } else {
+ return util_pack_z(PIPE_FORMAT_Z16_UNORM, border[0]) << 16;
+ }
+ default:
+ assert(0);
+ return 0;
+ }
+ }
+
/* Apply inverse swizzle of the format. */
for (i = 0; i < 4; i++) {
switch (desc->swizzle[i]) {
@@ -619,7 +637,17 @@ static uint32_t r300_get_border_color(enum pipe_format format,
}
}
+ /* Compressed formats. */
+ if (util_format_is_compressed(format)) {
+ util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
+ return uc.ui;
+ }
+
switch (desc->channel[0].size) {
+ case 2:
+ util_pack_color(border_swizzled, PIPE_FORMAT_B2G3R3_UNORM, &uc);
+ break;
+
case 4:
util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc);
break;
@@ -642,6 +670,15 @@ static uint32_t r300_get_border_color(enum pipe_format format,
case 10:
util_pack_color(border_swizzled, PIPE_FORMAT_B10G10R10A2_UNORM, &uc);
break;
+
+ case 16:
+ if (desc->nr_channels <= 2) {
+ border_swizzled[0] = border_swizzled[2];
+ util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_UNORM, &uc);
+ } else {
+ util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+ }
+ break;
}
return uc.ui;
@@ -683,12 +720,13 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
/* Set the border color. */
texstate->border_color =
r300_get_border_color(view->base.format,
- sampler->state.border_color);
+ sampler->state.border_color,
+ r300->screen->caps.is_r500);
/* determine min/max levels */
- max_level = MIN3(sampler->max_lod + view->base.first_level,
- tex->desc.b.b.last_level, view->base.last_level);
- min_level = MIN2(sampler->min_lod + view->base.first_level,
+ max_level = MIN3(sampler->max_lod + view->base.u.tex.first_level,
+ tex->desc.b.b.last_level, view->base.u.tex.last_level);
+ min_level = MIN2(sampler->min_lod + view->base.u.tex.first_level,
max_level);
if (tex->desc.is_npot && min_level > 0) {
@@ -729,13 +767,18 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
if (sampler->state.compare_mode == PIPE_TEX_COMPARE_NONE) {
texstate->format.format1 |=
r300_get_swizzle_combined(depth_swizzle,
- view->swizzle);
+ view->swizzle, FALSE);
} else {
texstate->format.format1 |=
- r300_get_swizzle_combined(depth_swizzle, 0);
+ r300_get_swizzle_combined(depth_swizzle, 0, FALSE);
}
}
+ if (r300->screen->caps.dxtc_swizzle &&
+ util_format_is_compressed(tex->desc.b.b.format)) {
+ texstate->filter1 |= R400_DXTC_SWIZZLE_ENABLE;
+ }
+
/* to emulate 1D textures through 2D ones correctly */
if (tex->desc.b.b.target == PIPE_TEXTURE_1D) {
texstate->filter0 &= ~R300_TX_WRAP_T_MASK;
@@ -819,40 +862,6 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
}
}
-/* We can't use compressed zbuffers as samplers. */
-static void r300_flush_depth_textures(struct r300_context *r300)
-{
- struct r300_textures_state *state =
- (struct r300_textures_state*)r300->textures_state.state;
- unsigned i, level;
- unsigned count = MIN2(state->sampler_view_count,
- state->sampler_state_count);
-
- if (r300->z_decomp_rd)
- return;
-
- for (i = 0; i < count; i++)
- if (state->sampler_views[i] && state->sampler_states[i]) {
- struct pipe_resource *tex = state->sampler_views[i]->base.texture;
-
- if (tex->target == PIPE_TEXTURE_3D ||
- tex->target == PIPE_TEXTURE_CUBE)
- continue;
-
- /* Ignore non-depth textures.
- * Also ignore reinterpreted depth textures, e.g. resource_copy. */
- if (!util_format_is_depth_or_stencil(tex->format))
- continue;
-
- for (level = 0; level <= tex->last_level; level++)
- if (r300_texture(tex)->zmask_in_use[level]) {
- /* We don't handle 3D textures and cubemaps yet. */
- r300_flush_depth_stencil(&r300->context, tex,
- u_subresource(0, level), 0);
- }
- }
-}
-
void r300_update_derived_state(struct r300_context* r300)
{
r300_flush_depth_textures(r300);
diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h
deleted file mode 100644
index 71a4a47b00..0000000000
--- a/src/gallium/drivers/r300/r300_state_derived.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_STATE_DERIVED_H
-#define R300_STATE_DERIVED_H
-
-struct r300_context;
-
-void r300_update_derived_state(struct r300_context* r300);
-
-#endif /* R300_STATE_DERIVED_H */
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index cee56bccdc..6fdc504ed5 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -40,7 +40,8 @@
#include "pipe/p_screen.h"
unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
- const unsigned char *swizzle_view)
+ const unsigned char *swizzle_view,
+ boolean dxtc_swizzle)
{
unsigned i;
unsigned char swizzle[4];
@@ -51,10 +52,10 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
R300_TX_FORMAT_B_SHIFT,
R300_TX_FORMAT_A_SHIFT
};
- const uint32_t swizzle_bit[4] = {
- R300_TX_FORMAT_X,
+ uint32_t swizzle_bit[4] = {
+ dxtc_swizzle ? R300_TX_FORMAT_Z : R300_TX_FORMAT_X,
R300_TX_FORMAT_Y,
- R300_TX_FORMAT_Z,
+ dxtc_swizzle ? R300_TX_FORMAT_X : R300_TX_FORMAT_Z,
R300_TX_FORMAT_W
};
@@ -107,7 +108,8 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
* makes available X, Y, Z, W, ZERO, and ONE for swizzling. */
uint32_t r300_translate_texformat(enum pipe_format format,
const unsigned char *swizzle_view,
- boolean is_r500)
+ boolean is_r500,
+ boolean dxtc_swizzle)
{
uint32_t result = 0;
const struct util_format_description *desc;
@@ -169,7 +171,8 @@ uint32_t r300_translate_texformat(enum pipe_format format,
}
}
- result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view);
+ result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view,
+ util_format_is_compressed(format) && dxtc_swizzle);
/* S3TC formats. */
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
@@ -241,6 +244,11 @@ uint32_t r300_translate_texformat(enum pipe_format format,
desc->channel[2].size == 6) {
return R300_TX_FORMAT_Z6Y5X5 | result;
}
+ if (desc->channel[0].size == 2 &&
+ desc->channel[1].size == 3 &&
+ desc->channel[2].size == 3) {
+ return R300_TX_FORMAT_Z3Y3X2 | result;
+ }
return ~0; /* Unsupported/unknown. */
case 4:
@@ -478,6 +486,8 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
} else {
if (desc->channel[i].size == 16) {
modifier |= R300_US_OUT_FMT_C4_16;
+ } else if (desc->channel[i].size == 10) {
+ modifier |= R300_US_OUT_FMT_C4_10;
} else {
/* C4_8 seems to be used for the formats whose pixel size
* is <= 32 bits. */
@@ -571,7 +581,7 @@ boolean r300_is_zs_format_supported(enum pipe_format format)
boolean r300_is_sampler_format_supported(enum pipe_format format)
{
- return r300_translate_texformat(format, 0, TRUE) != ~0;
+ return r300_translate_texformat(format, 0, TRUE, FALSE) != ~0;
}
void r300_texture_setup_format_state(struct r300_screen *screen,
@@ -665,21 +675,21 @@ void r300_texture_reinterpret_format(struct pipe_screen *screen,
}
static unsigned r300_texture_is_referenced(struct pipe_context *context,
- struct pipe_resource *texture,
- unsigned face, unsigned level)
+ struct pipe_resource *texture,
+ unsigned level, int layer)
{
struct r300_context *r300 = r300_context(context);
struct r300_texture *rtex = (struct r300_texture *)texture;
if (r300->rws->cs_is_buffer_referenced(r300->cs,
- rtex->buffer, R300_REF_CS))
+ rtex->cs_buffer, R300_REF_CS))
return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
return PIPE_UNREFERENCED;
}
static void r300_texture_destroy(struct pipe_screen *screen,
- struct pipe_resource* texture)
+ struct pipe_resource* texture)
{
struct r300_texture* tex = (struct r300_texture*)texture;
struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys;
@@ -774,6 +784,8 @@ r300_texture_create_object(struct r300_screen *rscreen,
}
}
+ tex->cs_buffer = rws->buffer_get_cs_handle(rws, tex->buffer);
+
rws->buffer_set_tiling(rws, tex->buffer,
tex->desc.microtile, tex->desc.macrotile[0],
tex->desc.stride_in_bytes[0]);
@@ -848,38 +860,40 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen,
/* Not required to implement u_resource_vtbl, consider moving to another file:
*/
-struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
- struct pipe_resource* texture,
- unsigned face,
- unsigned level,
- unsigned zslice,
- unsigned flags)
+struct pipe_surface* r300_create_surface(struct pipe_context * ctx,
+ struct pipe_resource* texture,
+ const struct pipe_surface *surf_tmpl)
{
struct r300_texture* tex = r300_texture(texture);
struct r300_surface* surface = CALLOC_STRUCT(r300_surface);
+ unsigned level = surf_tmpl->u.tex.level;
+
+ assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
if (surface) {
uint32_t offset, tile_height;
pipe_reference_init(&surface->base.reference, 1);
pipe_resource_reference(&surface->base.texture, texture);
- surface->base.format = texture->format;
+ surface->base.context = ctx;
+ surface->base.format = surf_tmpl->format;
surface->base.width = u_minify(texture->width0, level);
surface->base.height = u_minify(texture->height0, level);
- surface->base.usage = flags;
- surface->base.zslice = zslice;
- surface->base.face = face;
- surface->base.level = level;
+ surface->base.usage = surf_tmpl->usage;
+ surface->base.u.tex.level = level;
+ surface->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+ surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
surface->buffer = tex->buffer;
+ surface->cs_buffer = tex->cs_buffer;
/* Prefer VRAM if there are multiple domains to choose from. */
surface->domain = tex->domain;
if (surface->domain & R300_DOMAIN_VRAM)
surface->domain &= ~R300_DOMAIN_GTT;
- surface->offset = r300_texture_get_offset(&tex->desc,
- level, zslice, face);
+ surface->offset = r300_texture_get_offset(&tex->desc, level,
+ surf_tmpl->u.tex.first_layer);
surface->pitch = tex->fb_state.pitch[level];
surface->format = tex->fb_state.format;
@@ -892,7 +906,7 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
tex->desc.b.b.nr_samples,
tex->desc.microtile,
tex->desc.macrotile[level],
- DIM_HEIGHT);
+ DIM_HEIGHT, 0);
surface->cbzb_height = align((surface->base.height + 1) / 2,
tile_height);
@@ -910,13 +924,13 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
else
surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z;
- SCREEN_DBG(r300_screen(screen), DBG_CBZB,
- "CBZB Allowed: %s, Dim: %ix%i, Misalignment: %i, Micro: %s, Macro: %s\n",
- surface->cbzb_allowed ? "YES" : " NO",
- surface->cbzb_width, surface->cbzb_height,
- offset & 2047,
- tex->desc.microtile ? "YES" : " NO",
- tex->desc.macrotile[level] ? "YES" : " NO");
+ DBG(r300_context(ctx), DBG_CBZB,
+ "CBZB Allowed: %s, Dim: %ix%i, Misalignment: %i, Micro: %s, Macro: %s\n",
+ surface->cbzb_allowed ? "YES" : " NO",
+ surface->cbzb_width, surface->cbzb_height,
+ offset & 2047,
+ tex->desc.microtile ? "YES" : " NO",
+ tex->desc.macrotile[level] ? "YES" : " NO");
}
return &surface->base;
@@ -924,7 +938,7 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
/* Not required to implement u_resource_vtbl, consider moving to another file:
*/
-void r300_tex_surface_destroy(struct pipe_surface* s)
+void r300_surface_destroy(struct pipe_context *ctx, struct pipe_surface* s)
{
pipe_resource_reference(&s->texture, NULL);
FREE(s);
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index c4588a0c90..0ab22f747e 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -27,6 +27,7 @@
#include "pipe/p_format.h"
struct pipe_screen;
+struct pipe_context;
struct pipe_resource;
struct winsys_handle;
struct r300_texture_format_state;
@@ -35,11 +36,13 @@ struct r300_texture;
struct r300_screen;
unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
- const unsigned char *swizzle_view);
+ const unsigned char *swizzle_view,
+ boolean dxtc_swizzle);
uint32_t r300_translate_texformat(enum pipe_format format,
const unsigned char *swizzle_view,
- boolean is_r500);
+ boolean is_r500,
+ boolean dxtc_swizzle);
uint32_t r500_tx_format_msb_bit(enum pipe_format format);
@@ -68,13 +71,10 @@ r300_texture_create(struct pipe_screen* screen,
const struct pipe_resource* templ);
-struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
- struct pipe_resource* texture,
- unsigned face,
- unsigned level,
- unsigned zslice,
- unsigned flags);
+struct pipe_surface* r300_create_surface(struct pipe_context *ctx,
+ struct pipe_resource* texture,
+ const struct pipe_surface *surf_tmpl);
-void r300_tex_surface_destroy(struct pipe_surface* s);
+void r300_surface_destroy(struct pipe_context *ctx, struct pipe_surface* s);
#endif /* R300_TEXTURE_H */
diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c
index 543d0fdc15..7b1739142d 100644
--- a/src/gallium/drivers/r300/r300_texture_desc.c
+++ b/src/gallium/drivers/r300/r300_texture_desc.c
@@ -34,7 +34,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format,
unsigned num_samples,
enum r300_buffer_tiling microtile,
enum r300_buffer_tiling macrotile,
- enum r300_dim dim)
+ enum r300_dim dim, boolean is_rs690)
{
static const unsigned table[2][5][3][2] =
{
@@ -57,6 +57,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format,
{{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */
}
};
+
static const unsigned aa_block[2] = {4, 8};
unsigned tile = 0;
unsigned pixsize = util_format_get_blocksize(format);
@@ -74,6 +75,14 @@ unsigned r300_get_pixel_alignment(enum pipe_format format,
} else {
/* Standard alignment. */
tile = table[macrotile][util_logbase2(pixsize)][microtile][dim];
+ if (macrotile == 0 && is_rs690 && dim == DIM_WIDTH) {
+ int align;
+ int h_tile;
+ h_tile = table[macrotile][util_logbase2(pixsize)][microtile][DIM_HEIGHT];
+ align = 64 / (pixsize * h_tile);
+ if (tile < align)
+ tile = align;
+ }
}
assert(tile);
@@ -89,7 +98,7 @@ static boolean r300_texture_macro_switch(struct r300_texture_desc *desc,
unsigned tile, texdim;
tile = r300_get_pixel_alignment(desc->b.b.format, desc->b.b.nr_samples,
- desc->microtile, R300_BUFFER_TILED, dim);
+ desc->microtile, R300_BUFFER_TILED, dim, 0);
if (dim == DIM_WIDTH) {
texdim = u_minify(desc->width0, level);
} else {
@@ -113,6 +122,9 @@ static unsigned r300_texture_get_stride(struct r300_screen *screen,
unsigned level)
{
unsigned tile_width, width, stride;
+ boolean is_rs690 = (screen->caps.family == CHIP_FAMILY_RS600 ||
+ screen->caps.family == CHIP_FAMILY_RS690 ||
+ screen->caps.family == CHIP_FAMILY_RS740);
if (desc->stride_in_bytes_override)
return desc->stride_in_bytes_override;
@@ -131,38 +143,14 @@ static unsigned r300_texture_get_stride(struct r300_screen *screen,
desc->b.b.nr_samples,
desc->microtile,
desc->macrotile[level],
- DIM_WIDTH);
+ DIM_WIDTH, is_rs690);
width = align(width, tile_width);
stride = util_format_get_stride(desc->b.b.format, width);
-
- /* Some IGPs need a minimum stride of 64 bytes, hmm... */
- if (!desc->macrotile[level] &&
- (screen->caps.family == CHIP_FAMILY_RS600 ||
- screen->caps.family == CHIP_FAMILY_RS690 ||
- screen->caps.family == CHIP_FAMILY_RS740)) {
- unsigned min_stride;
-
- if (desc->microtile) {
- unsigned tile_height =
- r300_get_pixel_alignment(desc->b.b.format,
- desc->b.b.nr_samples,
- desc->microtile,
- desc->macrotile[level],
- DIM_HEIGHT);
-
- min_stride = 64 / tile_height;
- } else {
- min_stride = 64;
- }
-
- return stride < min_stride ? min_stride : stride;
- }
-
/* The alignment to 32 bytes is sort of implied by the layout... */
return stride;
} else {
- return align(util_format_get_stride(desc->b.b.format, width), 32);
+ return align(util_format_get_stride(desc->b.b.format, width), is_rs690 ? 64 : 32);
}
}
@@ -179,7 +167,7 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc,
desc->b.b.nr_samples,
desc->microtile,
desc->macrotile[level],
- DIM_HEIGHT);
+ DIM_HEIGHT, 0);
height = align(height, tile_height);
/* This is needed for the kernel checker, unfortunately. */
@@ -474,22 +462,17 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen,
}
unsigned r300_texture_get_offset(struct r300_texture_desc *desc,
- unsigned level, unsigned zslice,
- unsigned face)
+ unsigned level, unsigned layer)
{
unsigned offset = desc->offset_in_bytes[level];
switch (desc->b.b.target) {
case PIPE_TEXTURE_3D:
- assert(face == 0);
- return offset + zslice * desc->layer_size_in_bytes[level];
-
case PIPE_TEXTURE_CUBE:
- assert(zslice == 0);
- return offset + face * desc->layer_size_in_bytes[level];
+ return offset + layer * desc->layer_size_in_bytes[level];
default:
- assert(zslice == 0 && face == 0);
+ assert(layer == 0);
return offset;
}
}
diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h
index 3d7fe1fb47..121d215b4c 100644
--- a/src/gallium/drivers/r300/r300_texture_desc.h
+++ b/src/gallium/drivers/r300/r300_texture_desc.h
@@ -41,7 +41,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format,
unsigned num_samples,
enum r300_buffer_tiling microtile,
enum r300_buffer_tiling macrotile,
- enum r300_dim dim);
+ enum r300_dim dim, boolean is_rs690);
boolean r300_texture_desc_init(struct r300_screen *rscreen,
struct r300_texture_desc *desc,
@@ -52,7 +52,6 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen,
unsigned max_buffer_size);
unsigned r300_texture_get_offset(struct r300_texture_desc *desc,
- unsigned level, unsigned zslice,
- unsigned face);
+ unsigned level, unsigned layer);
#endif
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 33448bf0de..15a323989b 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -57,7 +57,7 @@ static unsigned translate_opcode(unsigned opcode)
/* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */
/* gap */
case TGSI_OPCODE_FRC: return RC_OPCODE_FRC;
- /* case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; */
+ case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP;
case TGSI_OPCODE_FLR: return RC_OPCODE_FLR;
/* case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; */
case TGSI_OPCODE_EX2: return RC_OPCODE_EX2;
diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c
index e9333b35ef..ae93fab554 100644
--- a/src/gallium/drivers/r300/r300_transfer.c
+++ b/src/gallium/drivers/r300/r300_transfer.c
@@ -27,6 +27,7 @@
#include "util/u_memory.h"
#include "util/u_format.h"
+#include "util/u_box.h"
struct r300_transfer {
/* Parent class */
@@ -52,16 +53,10 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx,
{
struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer;
struct pipe_resource *tex = transfer->resource;
- struct pipe_subresource subdst;
- subdst.face = 0;
- subdst.level = 0;
-
- ctx->resource_copy_region(ctx, &r300transfer->linear_texture->desc.b.b, subdst,
- 0, 0, 0,
- tex, transfer->sr,
- transfer->box.x, transfer->box.y, transfer->box.z,
- transfer->box.width, transfer->box.height);
+ ctx->resource_copy_region(ctx, &r300transfer->linear_texture->desc.b.b, 0,
+ 0, 0, 0,
+ tex, transfer->level, &transfer->box);
}
/* Copy a detiled texture to a tiled one. */
@@ -70,26 +65,22 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx,
{
struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer;
struct pipe_resource *tex = transfer->resource;
- struct pipe_subresource subsrc;
-
- subsrc.face = 0;
- subsrc.level = 0;
+ struct pipe_box src_box;
+ u_box_origin_2d(transfer->box.width, transfer->box.height, &src_box);
- ctx->resource_copy_region(ctx, tex, transfer->sr,
- transfer->box.x, transfer->box.y, transfer->box.z,
- &r300transfer->linear_texture->desc.b.b, subsrc,
- 0, 0, 0,
- transfer->box.width, transfer->box.height);
+ ctx->resource_copy_region(ctx, tex, transfer->level,
+ transfer->box.x, transfer->box.y, transfer->box.z,
+ &r300transfer->linear_texture->desc.b.b, 0, &src_box);
ctx->flush(ctx, 0, NULL);
}
struct pipe_transfer*
r300_texture_get_transfer(struct pipe_context *ctx,
- struct pipe_resource *texture,
- struct pipe_subresource sr,
- unsigned usage,
- const struct pipe_box *box)
+ struct pipe_resource *texture,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
{
struct r300_context *r300 = r300_context(ctx);
struct r300_texture *tex = r300_texture(texture);
@@ -99,13 +90,13 @@ r300_texture_get_transfer(struct pipe_context *ctx,
referenced_cs =
r300->rws->cs_is_buffer_referenced(r300->cs,
- tex->buffer, R300_REF_CS);
+ tex->cs_buffer, R300_REF_CS);
if (referenced_cs) {
referenced_hw = TRUE;
} else {
referenced_hw =
r300->rws->cs_is_buffer_referenced(r300->cs,
- tex->buffer, R300_REF_HW);
+ tex->cs_buffer, R300_REF_HW);
}
blittable = ctx->screen->is_format_supported(
@@ -116,25 +107,26 @@ r300_texture_get_transfer(struct pipe_context *ctx,
if (trans) {
/* Initialize the transfer object. */
pipe_resource_reference(&trans->transfer.resource, texture);
- trans->transfer.sr = sr;
+ trans->transfer.level = level;
trans->transfer.usage = usage;
trans->transfer.box = *box;
/* If the texture is tiled, we must create a temporary detiled texture
* for this transfer.
* Also make write transfers pipelined. */
- if (tex->desc.microtile || tex->desc.macrotile[sr.level] ||
+ if (tex->desc.microtile || tex->desc.macrotile[level] ||
((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) {
base.target = PIPE_TEXTURE_2D;
base.format = texture->format;
base.width0 = box->width;
base.height0 = box->height;
- base.depth0 = 0;
+ base.depth0 = 1;
+ base.array_size = 1;
base.last_level = 0;
base.nr_samples = 0;
base.usage = PIPE_USAGE_DYNAMIC;
base.bind = 0;
- base.flags = R300_RESOURCE_FLAG_TRANSFER;
+ base.flags = R300_RESOURCE_FLAG_TRANSFER;
/* For texture reading, the temporary (detiled) texture is used as
* a render target when blitting from a tiled texture. */
@@ -164,7 +156,7 @@ r300_texture_get_transfer(struct pipe_context *ctx,
if (!trans->linear_texture) {
/* For linear textures, it's safe to fallback to
* an unpipelined transfer. */
- if (!tex->desc.microtile && !tex->desc.macrotile[sr.level]) {
+ if (!tex->desc.microtile && !tex->desc.macrotile[level]) {
goto unpipelined;
}
@@ -182,7 +174,7 @@ r300_texture_get_transfer(struct pipe_context *ctx,
/* Set the stride.
*
* Even though we are using an internal texture for this,
- * the transfer sr, box and usage parameters still reflect
+ * the transfer level, box and usage parameters still reflect
* the arguments received to get_transfer. We just do the
* right thing internally.
*/
@@ -202,9 +194,8 @@ r300_texture_get_transfer(struct pipe_context *ctx,
unpipelined:
/* Unpipelined transfer. */
- trans->transfer.stride = tex->desc.stride_in_bytes[sr.level];
- trans->offset = r300_texture_get_offset(&tex->desc,
- sr.level, box->z, sr.face);
+ trans->transfer.stride = tex->desc.stride_in_bytes[level];
+ trans->offset = r300_texture_get_offset(&tex->desc, level, box->z);
if (referenced_cs)
ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL);
diff --git a/src/gallium/drivers/r300/r300_transfer.h b/src/gallium/drivers/r300/r300_transfer.h
index 0d32a68d1f..7977ef516f 100644
--- a/src/gallium/drivers/r300/r300_transfer.h
+++ b/src/gallium/drivers/r300/r300_transfer.h
@@ -30,22 +30,22 @@ struct r300_context;
struct pipe_transfer*
r300_texture_get_transfer(struct pipe_context *ctx,
- struct pipe_resource *texture,
- struct pipe_subresource sr,
- unsigned usage,
- const struct pipe_box *box);
+ struct pipe_resource *texture,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box);
void
r300_texture_transfer_destroy(struct pipe_context *ctx,
- struct pipe_transfer *trans);
+ struct pipe_transfer *trans);
void*
r300_texture_transfer_map(struct pipe_context *ctx,
- struct pipe_transfer *transfer);
+ struct pipe_transfer *transfer);
void
r300_texture_transfer_unmap(struct pipe_context *ctx,
- struct pipe_transfer *transfer);
+ struct pipe_transfer *transfer);
#endif
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 65696555ac..78021e2c5d 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -213,7 +213,6 @@ void r300_translate_vertex_shader(struct r300_context *r300,
compiler.Base.max_temp_regs = 32;
compiler.Base.max_constants = 256;
compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256;
- compiler.Base.remove_unused_constants = TRUE;
if (compiler.Base.Debug & RC_DBG_LOG) {
DBG(r300, DBG_VP, "r300: Initial vertex program\n");
@@ -227,6 +226,10 @@ void r300_translate_vertex_shader(struct r300_context *r300,
r300_tgsi_to_rc(&ttr, vs->state.tokens);
+ if (compiler.Base.Program.Constants.Count > 200) {
+ compiler.Base.remove_unused_constants = TRUE;
+ }
+
compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs + 1));
compiler.SetHwInputOutput = &set_vertex_inputs_outputs;
diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h
index 4597332399..460da77a4f 100644
--- a/src/gallium/drivers/r300/r300_winsys.h
+++ b/src/gallium/drivers/r300/r300_winsys.h
@@ -33,15 +33,17 @@
#include "r300_defines.h"
+#define R300_MAX_CMDBUF_DWORDS (16 * 1024)
+
struct winsys_handle;
struct r300_winsys_screen;
-struct r300_winsys_buffer;
+struct r300_winsys_buffer; /* for map/unmap etc. */
+struct r300_winsys_cs_buffer; /* for write_reloc etc. */
struct r300_winsys_cs {
- uint32_t *ptr; /* Pointer to the beginning of the CS. */
- unsigned cdw; /* Number of used dwords. */
- unsigned ndw; /* Size of the CS in dwords. */
+ unsigned cdw; /* Number of used dwords. */
+ uint32_t buf[R300_MAX_CMDBUF_DWORDS]; /* The command buffer. */
};
enum r300_value_id {
@@ -49,8 +51,9 @@ enum r300_value_id {
R300_VID_GB_PIPES,
R300_VID_Z_PIPES,
R300_VID_SQUARE_TILING_SUPPORT,
- R300_VID_DRM_2_3_0,
- R300_VID_DRM_2_6_0,
+ R300_VID_DRM_2_3_0, /* R500 VAP regs, MSPOS regs, fixed tex3D size checking */
+ R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */
+ R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer */
R300_CAN_HYPERZ,
};
@@ -102,6 +105,10 @@ struct r300_winsys_screen {
unsigned usage,
enum r300_buffer_domain domain);
+ struct r300_winsys_cs_buffer *(*buffer_get_cs_handle)(
+ struct r300_winsys_screen *ws,
+ struct r300_winsys_buffer *buf);
+
/**
* Reference a buffer object (assign with reference counting).
*
@@ -232,23 +239,22 @@ struct r300_winsys_screen {
void (*cs_destroy)(struct r300_winsys_cs *cs);
/**
- * Add a buffer object to the list of buffers to validate.
+ * Add a new buffer relocation. Every relocation must first be added
+ * before it can be written.
*
- * \param cs A command stream to add buffer for validation against.
- * \param buf A winsys buffer to validate.
- * \param rd A read domain containing a bitmask
- * of the R300_DOMAIN_* flags.
- * \param wd A write domain containing a bitmask
- * of the R300_DOMAIN_* flags.
+ * \param cs A command stream to add buffer for validation against.
+ * \param buf A winsys buffer to validate.
+ * \param rd A read domain containing a bitmask of the R300_DOMAIN_* flags.
+ * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags.
*/
- void (*cs_add_buffer)(struct r300_winsys_cs *cs,
- struct r300_winsys_buffer *buf,
- enum r300_buffer_domain rd,
- enum r300_buffer_domain wd);
+ void (*cs_add_reloc)(struct r300_winsys_cs *cs,
+ struct r300_winsys_cs_buffer *buf,
+ enum r300_buffer_domain rd,
+ enum r300_buffer_domain wd);
/**
- * Revalidate all currently set up winsys buffers.
- * Returns TRUE if a flush is required.
+ * Return TRUE if there is enough memory in VRAM and GTT for the relocs
+ * added so far.
*
* \param cs A command stream to validate.
*/
@@ -263,9 +269,7 @@ struct r300_winsys_screen {
* \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags.
*/
void (*cs_write_reloc)(struct r300_winsys_cs *cs,
- struct r300_winsys_buffer *buf,
- enum r300_buffer_domain rd,
- enum r300_buffer_domain wd);
+ struct r300_winsys_cs_buffer *buf);
/**
* Flush a command stream.
@@ -287,14 +291,6 @@ struct r300_winsys_screen {
void *user);
/**
- * Reset the list of buffer objects to validate, usually called
- * prior to adding buffer objects for validation.
- *
- * \param cs A command stream to reset buffers for.
- */
- void (*cs_reset_buffers)(struct r300_winsys_cs *cs);
-
- /**
* Return TRUE if a buffer is referenced by a command stream or by hardware
* (i.e. is busy), based on the domain parameter.
*
@@ -303,7 +299,7 @@ struct r300_winsys_screen {
* \param domain A bitmask of the R300_REF_* enums.
*/
boolean (*cs_is_buffer_referenced)(struct r300_winsys_cs *cs,
- struct r300_winsys_buffer *buf,
+ struct r300_winsys_cs_buffer *buf,
enum r300_reference_domain domain);
};
diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile
index a484f38e9f..b476b9af3b 100644
--- a/src/gallium/drivers/r600/Makefile
+++ b/src/gallium/drivers/r600/Makefile
@@ -21,6 +21,7 @@ C_SOURCES = \
evergreen_state.c \
eg_asm.c \
r600_translate.c \
- r600_state_common.c
+ r600_state_common.c \
+ r600_upload.c
include ../../Makefile.template
diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript
index 3fc1fa94c2..6498014096 100644
--- a/src/gallium/drivers/r600/SConscript
+++ b/src/gallium/drivers/r600/SConscript
@@ -28,6 +28,7 @@ r600 = env.ConvenienceLibrary(
'r600_state_common.c',
'r600_texture.c',
'r600_translate.c',
+ 'r600_upload.c',
'r700_asm.c',
'evergreen_state.c',
'eg_asm.c',
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 21d66fa956..67d742b376 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -27,6 +27,7 @@
#include "r600_asm.h"
#include "eg_sq.h"
#include "r600_opcodes.h"
+#include "evergreend.h"
int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
{
@@ -34,15 +35,17 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
switch (cf->inst) {
case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
- S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) |
- S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) |
- S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank);
+ S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank);
bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
- S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) |
- S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) |
- S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
S_SQ_CF_ALU_WORD1_BARRIER(1) |
S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
break;
@@ -89,3 +92,37 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
}
return 0;
}
+
+void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+{
+ struct r600_pipe_state *rstate;
+ unsigned i = 0;
+
+ if (count > 8) {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(8 - 1);
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 8 - 1);
+ } else {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 1);
+ }
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
+ S_SQ_CF_WORD1_BARRIER(1);
+
+ rstate = &ve->rstate;
+ rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+ rstate->nregs = 0;
+ r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS,
+ (r600_bo_offset(ve->fetch_shader)) >> 8,
+ 0xFFFFFFFF, ve->fetch_shader);
+}
diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h
index 698299ec13..ecea1db4f1 100644
--- a/src/gallium/drivers/r600/eg_state_inlines.h
+++ b/src/gallium/drivers/r600/eg_state_inlines.h
@@ -290,6 +290,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
switch (format) {
/* 8-bit buffers. */
case PIPE_FORMAT_A8_UNORM:
+ return V_028C70_SWAP_ALT_REV;
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_R8_UNORM:
@@ -312,6 +313,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
return V_028C70_SWAP_STD;
case PIPE_FORMAT_L8A8_UNORM:
+ return V_028C70_SWAP_ALT;
case PIPE_FORMAT_R8G8_UNORM:
return V_028C70_SWAP_STD;
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 0509522d81..94eef77945 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -410,9 +410,9 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) |
S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) |
- S_030010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL);
+ S_030010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
- S_030014_LAST_LEVEL(state->last_level) |
+ S_030014_LAST_LEVEL(state->u.tex.last_level) |
S_030014_BASE_ARRAY(0) |
S_030014_LAST_ARRAY(0), 0xffffffff, NULL);
r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, 0x0, 0xFFFFFFFF, NULL);
@@ -633,10 +633,11 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
struct r600_resource_texture *rtex;
struct r600_resource *rbuffer;
struct r600_surface *surf;
- unsigned level = state->cbufs[cb]->level;
+ unsigned level = state->cbufs[cb]->u.tex.level;
unsigned pitch, slice;
unsigned color_info;
unsigned format, swap, ntype;
+ unsigned offset;
const struct util_format_description *desc;
struct r600_bo *bo[3];
@@ -647,6 +648,9 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
bo[1] = rbuffer->bo;
bo[2] = rbuffer->bo;
+ /* XXX quite sure for dx10+ hw don't need any offset hacks */
+ offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture,
+ level, state->cbufs[cb]->u.tex.first_layer);
pitch = rtex->pitch_in_pixels[level] / 8 - 1;
slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
ntype = 0;
@@ -666,7 +670,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
/* FIXME handle enabling of CB beyond BASE8 which has different offset */
r600_pipe_state_add_reg(rstate,
R_028C60_CB_COLOR0_BASE + cb * 0x3C,
- (state->cbufs[cb]->offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
+ (offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
r600_pipe_state_add_reg(rstate,
R_028C78_CB_COLOR0_DIM + cb * 0x3C,
0x0, 0xFFFFFFFF, NULL);
@@ -698,11 +702,12 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
struct r600_surface *surf;
unsigned level;
unsigned pitch, slice, format, stencil_format;
+ unsigned offset;
if (state->zsbuf == NULL)
return;
- level = state->zsbuf->level;
+ level = state->zsbuf->u.tex.level;
surf = (struct r600_surface *)state->zsbuf;
rtex = (struct r600_resource_texture*)state->zsbuf->texture;
@@ -712,24 +717,27 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
rtex->depth = 1;
rbuffer = &rtex->resource;
+ /* XXX quite sure for dx10+ hw don't need any offset hacks */
+ offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture,
+ level, state->zsbuf->u.tex.first_layer);
pitch = rtex->pitch_in_pixels[level] / 8 - 1;
slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
format = r600_translate_dbformat(state->zsbuf->texture->format);
stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format);
r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE,
- (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE,
- (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
if (stencil_format) {
uint32_t stencil_offset;
stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255;
r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE,
- (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE,
- (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
}
r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL);
@@ -762,8 +770,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
util_copy_framebuffer_state(&rctx->framebuffer, state);
- rctx->pframebuffer = &rctx->framebuffer;
-
/* build states */
for (int i = 0; i < state->nr_cbufs; i++) {
evergreen_cb(rctx, rstate, state, i);
@@ -825,6 +831,10 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]);
rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate;
r600_context_pipe_state_set(&rctx->ctx, rstate);
+
+ if (state->zsbuf) {
+ evergreen_polygon_offset_update(rctx);
+ }
}
static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
@@ -832,6 +842,7 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader,
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_resource *rbuffer = (struct r600_resource*)buffer;
+ uint32_t offset;
/* Note that the state tracker can unbind constant buffers by
* passing NULL here.
@@ -840,6 +851,8 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader,
return;
}
+ r600_upload_const_buffer(rctx, buffer, &offset);
+
switch (shader) {
case PIPE_SHADER_VERTEX:
rctx->vs_const_buffer.nregs = 0;
@@ -849,7 +862,7 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader,
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(&rctx->vs_const_buffer,
R_028980_ALU_CONST_CACHE_VS_0,
- (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
break;
case PIPE_SHADER_FRAGMENT:
@@ -860,7 +873,7 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader,
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(&rctx->ps_const_buffer,
R_028940_ALU_CONST_CACHE_PS_0,
- (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
break;
default:
@@ -1057,12 +1070,76 @@ void evergreen_init_config(struct r600_pipe_context *rctx)
num_hs_stack_entries = 42;
num_ls_stack_entries = 42;
break;
+ case CHIP_BARTS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 128;
+ num_vs_threads = 20;
+ num_gs_threads = 20;
+ num_es_threads = 20;
+ num_hs_threads = 20;
+ num_ls_threads = 20;
+ num_ps_stack_entries = 85;
+ num_vs_stack_entries = 85;
+ num_gs_stack_entries = 85;
+ num_es_stack_entries = 85;
+ num_hs_stack_entries = 85;
+ num_ls_stack_entries = 85;
+ break;
+ case CHIP_TURKS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 128;
+ num_vs_threads = 20;
+ num_gs_threads = 20;
+ num_es_threads = 20;
+ num_hs_threads = 20;
+ num_ls_threads = 20;
+ num_ps_stack_entries = 42;
+ num_vs_stack_entries = 42;
+ num_gs_stack_entries = 42;
+ num_es_stack_entries = 42;
+ num_hs_stack_entries = 42;
+ num_ls_stack_entries = 42;
+ break;
+ case CHIP_CAICOS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 128;
+ num_vs_threads = 10;
+ num_gs_threads = 10;
+ num_es_threads = 10;
+ num_hs_threads = 10;
+ num_ls_threads = 10;
+ num_ps_stack_entries = 42;
+ num_vs_stack_entries = 42;
+ num_gs_stack_entries = 42;
+ num_es_stack_entries = 42;
+ num_hs_stack_entries = 42;
+ num_ls_stack_entries = 42;
+ break;
}
tmp = 0x00000000;
switch (family) {
case CHIP_CEDAR:
case CHIP_PALM:
+ case CHIP_CAICOS:
break;
default:
tmp |= S_008C00_VC_ENABLE(1);
@@ -1194,29 +1271,101 @@ void evergreen_init_config(struct r600_pipe_context *rctx)
r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL);
-r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL,
- 0x0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL);
r600_context_pipe_state_set(&rctx->ctx, rstate);
}
-int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
-void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
+void evergreen_polygon_offset_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_state state;
+
+ state.id = R600_PIPE_STATE_POLYGON_OFFSET;
+ state.nregs = 0;
+ if (rctx->rasterizer && rctx->framebuffer.zsbuf) {
+ float offset_units = rctx->rasterizer->offset_units;
+ unsigned offset_db_fmt_cntl = 0, depth;
+
+ switch (rctx->framebuffer.zsbuf->texture->format) {
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ depth = -24;
+ offset_units *= 2.0f;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ depth = -23;
+ offset_units *= 1.0f;
+ offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
+ depth = -16;
+ offset_units *= 4.0f;
+ break;
+ default:
+ return;
+ }
+ /* FIXME some of those reg can be computed with cso */
+ offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
+ r600_pipe_state_add_reg(&state,
+ R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
+ fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&state,
+ R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
+ fui(offset_units), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&state,
+ R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
+ fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&state,
+ R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
+ fui(offset_units), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&state,
+ R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+ offset_db_fmt_cntl, 0xFFFFFFFF, NULL);
+ r600_context_pipe_state_set(&rctx->ctx, &state);
+ }
+}
+
+static void evergreen_spi_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_shader *shader = rctx->ps_shader;
+ struct r600_pipe_state rstate;
+ struct r600_shader *rshader = &shader->shader;
+ unsigned i, tmp;
+
+ rstate.nregs = 0;
+ for (i = 0; i < rshader->ninput; i++) {
+ tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
+ if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
+ tmp |= S_028644_FLAT_SHADE(rctx->flatshade);
+ }
+ if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
+ rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
+ tmp |= S_028644_PT_SPRITE_TEX(1);
+ }
+ r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
+ }
+ r600_context_pipe_state_set(&rctx->ctx, &rstate);
+}
+
+void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx)
{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state *rstate;
struct r600_resource *rbuffer;
- unsigned i, j, offset, prim;
- u32 vgt_dma_index_type, vgt_draw_initiator, mask;
struct pipe_vertex_buffer *vertex_buffer;
- struct r600_draw rdraw;
- struct r600_pipe_state vgt;
- struct r600_drawl draw;
- boolean translate = FALSE;
+ unsigned i, offset;
+
+ /* we don't update until we know vertex elements */
+ if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer)
+ return;
if (rctx->vertex_elements->incompatible_layout) {
+ /* translate rebind new vertex elements so
+ * return once translated
+ */
r600_begin_vertex_translate(rctx);
- translate = TRUE;
+ return;
}
if (rctx->any_user_vbs) {
@@ -1224,6 +1373,72 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
rctx->any_user_vbs = FALSE;
}
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ rctx->nvs_resource = rctx->vertex_elements->count;
+ } else {
+ /* bind vertex buffer once */
+ rctx->nvs_resource = rctx->nvertex_buffer;
+ }
+
+ for (i = 0 ; i < rctx->nvs_resource; i++) {
+ rstate = &rctx->vs_resource[i];
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
+
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ unsigned vbuffer_index;
+ vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
+ vertex_buffer = &rctx->vertex_buffer[vbuffer_index];
+ rbuffer = (struct r600_resource*)vertex_buffer->buffer;
+ offset = rctx->vertex_elements->vbuffer_offset[i];
+ } else {
+ /* bind vertex buffer once */
+ vertex_buffer = &rctx->vertex_buffer[i];
+ rbuffer = (struct r600_resource*)vertex_buffer->buffer;
+ offset = 0;
+ }
+ if (vertex_buffer == NULL || rbuffer == NULL)
+ continue;
+ offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo);
+
+ r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
+ offset, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1,
+ rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2,
+ S_030008_STRIDE(vertex_buffer->stride),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3,
+ S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
+ S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
+ S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
+ S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7,
+ 0xC0000000, 0xFFFFFFFF, NULL);
+ evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
+ }
+}
+
+int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
+void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_resource *rbuffer;
+ u32 vgt_dma_index_type, vgt_draw_initiator, mask;
+ struct r600_draw rdraw;
+ struct r600_pipe_state vgt;
+ struct r600_drawl draw;
+ unsigned prim;
+
memset(&draw, 0, sizeof(struct r600_drawl));
draw.ctx = ctx;
draw.mode = info->mode;
@@ -1272,48 +1487,23 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
}
if (r600_conv_pipe_prim(draw.mode, &prim))
return;
-
- /* rebuild vertex shader if input format changed */
- if (r600_pipe_shader_update(&rctx->context, rctx->vs_shader))
+ if (unlikely(rctx->ps_shader == NULL)) {
+ R600_ERR("missing vertex shader\n");
return;
- if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader))
+ }
+ if (unlikely(rctx->vs_shader == NULL)) {
+ R600_ERR("missing vertex shader\n");
+ return;
+ }
+ /* there should be enough input */
+ if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) {
+ R600_ERR("%d resources provided, expecting %d\n",
+ rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource);
return;
-
- for (i = 0 ; i < rctx->vertex_elements->count; i++) {
- uint32_t word3, word2;
- uint32_t format;
- rstate = &rctx->vs_resource[i];
-
- rstate->id = R600_PIPE_STATE_RESOURCE;
- rstate->nregs = 0;
-
- j = rctx->vertex_elements->elements[i].vertex_buffer_index;
- vertex_buffer = &rctx->vertex_buffer[j];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = rctx->vertex_elements->elements[i].src_offset +
- vertex_buffer->buffer_offset +
- r600_bo_offset(rbuffer->bo);
-
- format = r600_translate_vertex_data_type(rctx->vertex_elements->hw_format[i]);
-
- word2 = format | S_030008_STRIDE(vertex_buffer->stride);
-
- word3 = S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
- S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
- S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
- S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W);
-
- r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, word2, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, word3, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, 0xC0000000, 0xFFFFFFFF, NULL);
- evergreen_fs_resource_set(&rctx->ctx, rstate, i);
}
+ evergreen_spi_update(rctx);
+
mask = 0;
for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
mask |= (0xF << (i * 4));
@@ -1328,46 +1518,6 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.min_index, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL);
-
- if (rctx->rasterizer && rctx->framebuffer.zsbuf) {
- float offset_units = rctx->rasterizer->offset_units;
- unsigned offset_db_fmt_cntl = 0, depth;
-
- switch (rctx->framebuffer.zsbuf->texture->format) {
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- depth = -24;
- offset_units *= 2.0f;
- break;
- case PIPE_FORMAT_Z32_FLOAT:
- depth = -23;
- offset_units *= 1.0f;
- offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
- break;
- case PIPE_FORMAT_Z16_UNORM:
- depth = -16;
- offset_units *= 4.0f;
- break;
- default:
- return;
- }
- offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
- r600_pipe_state_add_reg(&vgt,
- R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
- fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt,
- R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
- fui(offset_units), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt,
- R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
- fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt,
- R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
- fui(offset_units), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt,
- R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
- offset_db_fmt_cntl, 0xFFFFFFFF, NULL);
- }
r600_context_pipe_state_set(&rctx->ctx, &vgt);
rdraw.vgt_num_indices = draw.count;
@@ -1382,28 +1532,22 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
}
evergreen_context_draw(&rctx->ctx, &rdraw);
- if (translate)
- r600_end_vertex_translate(rctx);
-
pipe_resource_reference(&draw.index_buffer, NULL);
}
void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state *rstate = &shader->rstate;
struct r600_shader *rshader = &shader->shader;
- unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
+ unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
int pos_index = -1, face_index = -1;
int ninterp = 0;
boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE;
unsigned spi_baryc_cntl;
- /* clear previous register */
rstate->nregs = 0;
for (i = 0; i < rshader->ninput; i++) {
- tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
/* evergreen NUM_INTERP only contains values interpolated into the LDS,
POSITION goes via GPRs from the SC so isn't counted */
if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
@@ -1421,16 +1565,6 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
if (rshader->input[i].centroid)
have_centroid = TRUE;
}
- if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
- tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
- }
- if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
- rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
- tmp |= S_028644_PT_SPRITE_TEX(1);
- }
- r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
}
for (i = 0; i < rshader->noutput; i++) {
if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
@@ -1569,14 +1703,8 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
R_028864_SQ_PGM_RESOURCES_2_VS,
0x0, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
- R_0288A8_SQ_PGM_RESOURCES_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
R_02885C_SQ_PGM_START_VS,
(r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo);
- r600_pipe_state_add_reg(rstate,
- R_0288A4_SQ_PGM_START_FS,
- (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo_fetch);
r600_pipe_state_add_reg(rstate,
R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 2ab60f3086..a852bef615 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -35,7 +35,7 @@
#define RADEON_CTX_MAX_PM4 (64 * 1024 / 4)
#define R600_ERR(fmt, args...) \
- fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args)
+ fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args)
typedef uint64_t u64;
typedef uint32_t u32;
@@ -92,6 +92,9 @@ enum radeon_family {
CHIP_CYPRESS,
CHIP_HEMLOCK,
CHIP_PALM,
+ CHIP_BARTS,
+ CHIP_TURKS,
+ CHIP_CAICOS,
CHIP_LAST,
};
@@ -245,10 +248,7 @@ struct r600_context {
u32 *pm4;
struct list_head query_list;
unsigned num_query_running;
- unsigned fence;
struct list_head fenced_bo;
- unsigned *cfence;
- struct r600_bo *fence_bo;
};
struct r600_draw {
@@ -284,14 +284,12 @@ void r600_context_queries_resume(struct r600_context *ctx);
int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon);
void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw);
-void evergreen_ps_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
-void evergreen_vs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
-void evergreen_fs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
-
void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
+struct radeon *radeon_decref(struct radeon *radeon);
+
#endif
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index edadedff25..beefd17ae2 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -22,59 +22,122 @@
*/
#include <stdio.h>
#include <errno.h>
+#include "util/u_format.h"
#include "util/u_memory.h"
#include "pipe/p_shader_tokens.h"
#include "r600_pipe.h"
#include "r600_sq.h"
#include "r600_opcodes.h"
#include "r600_asm.h"
+#include "r600_formats.h"
+#include "r600d.h"
-static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu)
+#define NUM_OF_CYCLES 3
+#define NUM_OF_COMPONENTS 4
+
+static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r600_bc_alu *alu)
{
if(alu->is_op3)
return 3;
- switch (alu->inst) {
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
- return 0;
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
- return 2;
-
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
- return 1;
- default: R600_ERR(
- "Need instruction operand number for 0x%x.\n", alu->inst);
- };
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ switch (alu->inst) {
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
+ return 0;
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
+ return 2;
+
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
+ return 1;
+ default: R600_ERR(
+ "Need instruction operand number for 0x%x.\n", alu->inst);
+ }
+ break;
+ case CHIPREV_EVERGREEN:
+ switch (alu->inst) {
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
+ return 0;
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW:
+ return 2;
+
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
+ return 1;
+ default: R600_ERR(
+ "Need instruction operand number for 0x%x.\n", alu->inst);
+ }
+ break;
+ }
return 3;
}
@@ -101,7 +164,6 @@ static struct r600_bc_alu *r600_bc_alu(void)
if (alu == NULL)
return NULL;
LIST_INITHEAD(&alu->list);
- LIST_INITHEAD(&alu->bs_list);
return alu;
}
@@ -152,6 +214,9 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family)
case CHIP_CYPRESS:
case CHIP_HEMLOCK:
case CHIP_PALM:
+ case CHIP_BARTS:
+ case CHIP_TURKS:
+ case CHIP_CAICOS:
bc->chiprev = CHIPREV_EVERGREEN;
break;
default:
@@ -189,221 +254,813 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
return 0;
}
-const unsigned bank_swizzle_vec[8] = {SQ_ALU_VEC_210, //000
- SQ_ALU_VEC_120, //001
- SQ_ALU_VEC_102, //010
+/* alu instructions that can ony exits once per group */
+static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ return !alu->is_op3 && (
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
+ case CHIPREV_EVERGREEN:
+ default:
+ return !alu->is_op3 && (
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
+ }
+}
- SQ_ALU_VEC_201, //011
- SQ_ALU_VEC_012, //100
- SQ_ALU_VEC_021, //101
+static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ return !alu->is_op3 && (
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4);
+ case CHIPREV_EVERGREEN:
+ default:
+ return !alu->is_op3 && (
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4);
+ }
+}
- SQ_ALU_VEC_012, //110
- SQ_ALU_VEC_012}; //111
+static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ return !alu->is_op3 && (
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
+ case CHIPREV_EVERGREEN:
+ default:
+ return !alu->is_op3 && (
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
+ }
+}
-const unsigned bank_swizzle_scl[8] = {SQ_ALU_SCL_210, //000
- SQ_ALU_SCL_122, //001
- SQ_ALU_SCL_122, //010
+/* alu instructions that can only execute on the vector unit */
+static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ return is_alu_reduction_inst(bc, alu) ||
+ is_alu_mova_inst(bc, alu);
+}
- SQ_ALU_SCL_221, //011
- SQ_ALU_SCL_212, //100
- SQ_ALU_SCL_122, //101
+/* alu instructions that can only execute on the trans unit */
+static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ if (!alu->is_op3)
+ return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE;
+ else
+ return alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT ||
+ alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2 ||
+ alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 ||
+ alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4;
+ case CHIPREV_EVERGREEN:
+ default:
+ if (!alu->is_op3)
+ return alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE;
+ else
+ return alu->inst == EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
+ }
+}
+
+/* alu instructions that can execute on any unit */
+static int is_alu_any_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ return !is_alu_vec_unit_inst(bc, alu) &&
+ !is_alu_trans_unit_inst(bc, alu);
+}
+
+static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first,
+ struct r600_bc_alu *assignment[5])
+{
+ struct r600_bc_alu *alu;
+ unsigned i, chan, trans;
+
+ for (i = 0; i < 5; i++)
+ assignment[i] = NULL;
+
+ for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
+ chan = alu->dst.chan;
+ if (is_alu_trans_unit_inst(bc, alu))
+ trans = 1;
+ else if (is_alu_vec_unit_inst(bc, alu))
+ trans = 0;
+ else if (assignment[chan])
+ trans = 1; // assume ALU_INST_PREFER_VECTOR
+ else
+ trans = 0;
- SQ_ALU_SCL_122, //110
- SQ_ALU_SCL_122}; //111
+ if (trans) {
+ if (assignment[4]) {
+ assert(0); //ALU.Trans has already been allocated
+ return -1;
+ }
+ assignment[4] = alu;
+ } else {
+ if (assignment[chan]) {
+ assert(0); //ALU.chan has already been allocated
+ return -1;
+ }
+ assignment[chan] = alu;
+ }
-static int init_gpr(struct r600_bc_alu *alu)
+ if (alu->last)
+ break;
+ }
+ return 0;
+}
+
+struct alu_bank_swizzle {
+ int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
+ int hw_cfile_addr[4];
+ int hw_cfile_elem[4];
+};
+
+const unsigned cycle_for_bank_swizzle_vec[][3] = {
+ [SQ_ALU_VEC_012] = { 0, 1, 2 },
+ [SQ_ALU_VEC_021] = { 0, 2, 1 },
+ [SQ_ALU_VEC_120] = { 1, 2, 0 },
+ [SQ_ALU_VEC_102] = { 1, 0, 2 },
+ [SQ_ALU_VEC_201] = { 2, 0, 1 },
+ [SQ_ALU_VEC_210] = { 2, 1, 0 }
+};
+
+const unsigned cycle_for_bank_swizzle_scl[][3] = {
+ [SQ_ALU_SCL_210] = { 2, 1, 0 },
+ [SQ_ALU_SCL_122] = { 1, 2, 2 },
+ [SQ_ALU_SCL_212] = { 2, 1, 2 },
+ [SQ_ALU_SCL_221] = { 2, 2, 1 }
+};
+
+static void init_bank_swizzle(struct alu_bank_swizzle *bs)
{
- int cycle, component;
+ int i, cycle, component;
/* set up gpr use */
for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++)
for (component = 0; component < NUM_OF_COMPONENTS; component++)
- alu->hw_gpr[cycle][component] = -1;
- return 0;
+ bs->hw_gpr[cycle][component] = -1;
+ for (i = 0; i < 4; i++)
+ bs->hw_cfile_addr[i] = -1;
+ for (i = 0; i < 4; i++)
+ bs->hw_cfile_elem[i] = -1;
}
-
-#if 0
-static int reserve_gpr(struct r600_bc_alu *alu, unsigned sel, unsigned chan, unsigned cycle)
+
+static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle)
{
- if (alu->hw_gpr[cycle][chan] < 0)
- alu->hw_gpr[cycle][chan] = sel;
- else if (alu->hw_gpr[cycle][chan] != (int)sel) {
- R600_ERR("Another scalar operation has already used GPR read port for channel\n");
+ if (bs->hw_gpr[cycle][chan] == -1)
+ bs->hw_gpr[cycle][chan] = sel;
+ else if (bs->hw_gpr[cycle][chan] != (int)sel) {
+ // Another scalar operation has already used GPR read port for channel
return -1;
}
return 0;
}
-
-static int cycle_for_scalar_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle)
-{
- int table[3];
- int ret = 0;
- switch (swiz) {
- case SQ_ALU_SCL_210:
- table[0] = 2; table[1] = 1; table[2] = 0;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_SCL_122:
- table[0] = 1; table[1] = 2; table[2] = 2;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_SCL_212:
- table[0] = 2; table[1] = 1; table[2] = 2;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_SCL_221:
- table[0] = 2; table[1] = 2; table[2] = 1;
- *p_cycle = table[sel];
- break;
- break;
- default:
- R600_ERR("bad scalar bank swizzle value\n");
- ret = -1;
- break;
+
+static int reserve_cfile(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
+{
+ int res, resmatch = -1, resempty = -1;
+ for (res = 3; res >= 0; --res) {
+ if (bs->hw_cfile_addr[res] == -1)
+ resempty = res;
+ else if (bs->hw_cfile_addr[res] == sel &&
+ bs->hw_cfile_elem[res] == chan)
+ resmatch = res;
}
- return ret;
-}
-
-static int cycle_for_vector_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle)
-{
- int table[3];
- int ret;
-
- switch (swiz) {
- case SQ_ALU_VEC_012:
- table[0] = 0; table[1] = 1; table[2] = 2;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_021:
- table[0] = 0; table[1] = 2; table[2] = 1;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_120:
- table[0] = 1; table[1] = 2; table[2] = 0;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_102:
- table[0] = 1; table[1] = 0; table[2] = 2;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_201:
- table[0] = 2; table[1] = 0; table[2] = 1;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_210:
- table[0] = 2; table[1] = 1; table[2] = 0;
- *p_cycle = table[sel];
- break;
- default:
- R600_ERR("bad vector bank swizzle value\n");
- ret = -1;
- break;
+ if (resmatch != -1)
+ return 0; // Read for this scalar element already reserved, nothing to do here.
+ else if (resempty != -1) {
+ bs->hw_cfile_addr[resempty] = sel;
+ bs->hw_cfile_elem[resempty] = chan;
+ } else {
+ // All cfile read ports are used, cannot reference vector element
+ return -1;
}
- return ret;
+ return 0;
+}
+
+static int is_gpr(unsigned sel)
+{
+ return (sel >= 0 && sel <= 127);
}
+/* CB constants start at 512, and get translated to a kcache index when ALU
+ * clauses are constructed. Note that we handle kcache constants the same way
+ * as (the now gone) cfile constants, is that really required? */
+static int is_cfile(unsigned sel)
+{
+ return (sel > 255 && sel < 512) ||
+ (sel > 511 && sel < 4607) || // Kcache before translate
+ (sel > 127 && sel < 192); // Kcache after translate
+}
+
+static int is_const(int sel)
+{
+ return is_cfile(sel) ||
+ (sel >= V_SQ_ALU_SRC_0 &&
+ sel <= V_SQ_ALU_SRC_LITERAL);
+}
+
+static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu,
+ struct alu_bank_swizzle *bs, int bank_swizzle)
+{
+ int r, src, num_src, sel, elem, cycle;
+
+ num_src = r600_bc_get_num_operands(bc, alu);
+ for (src = 0; src < num_src; src++) {
+ sel = alu->src[src].sel;
+ elem = alu->src[src].chan;
+ if (is_gpr(sel)) {
+ cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src];
+ if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan)
+ // Nothing to do; special-case optimization,
+ // second source uses first source’s reservation
+ continue;
+ else {
+ r = reserve_gpr(bs, sel, elem, cycle);
+ if (r)
+ return r;
+ }
+ } else if (is_cfile(sel)) {
+ r = reserve_cfile(bs, sel, elem);
+ if (r)
+ return r;
+ }
+ // No restrictions on PV, PS, literal or special constants
+ }
+ return 0;
+}
+
+static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu,
+ struct alu_bank_swizzle *bs, int bank_swizzle)
+{
+ int r, src, num_src, const_count, sel, elem, cycle;
+
+ num_src = r600_bc_get_num_operands(bc, alu);
+ for (const_count = 0, src = 0; src < num_src; ++src) {
+ sel = alu->src[src].sel;
+ elem = alu->src[src].chan;
+ if (is_const(sel)) { // Any constant, including literal and inline constants
+ if (const_count >= 2)
+ // More than two references to a constant in
+ // transcendental operation.
+ return -1;
+ else
+ const_count++;
+ }
+ if (is_cfile(sel)) {
+ r = reserve_cfile(bs, sel, elem);
+ if (r)
+ return r;
+ }
+ }
+ for (src = 0; src < num_src; ++src) {
+ sel = alu->src[src].sel;
+ elem = alu->src[src].chan;
+ if (is_gpr(sel)) {
+ cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
+ if (cycle < const_count)
+ // Cycle for GPR load conflicts with
+ // constant load in transcendental operation.
+ return -1;
+ r = reserve_gpr(bs, sel, elem, cycle);
+ if (r)
+ return r;
+ }
+ // Constants already processed
+ // No restrictions on PV, PS
+ }
+ return 0;
+}
-
-static void update_chan_counter(struct r600_bc_alu *alu, int *chan_counter)
+static int check_and_set_bank_swizzle(struct r600_bc *bc,
+ struct r600_bc_alu *slots[5])
{
- int num_src;
- int i;
- int channel_swizzle;
+ struct alu_bank_swizzle bs;
+ int bank_swizzle[5];
+ int i, r = 0, forced = 0;
+
+ for (i = 0; i < 5; i++)
+ if (slots[i] && slots[i]->bank_swizzle_force) {
+ slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
+ forced = 1;
+ }
- num_src = r600_bc_get_num_operands(alu);
+ if (forced)
+ return 0;
- for (i = 0; i < num_src; i++) {
- channel_swizzle = alu->src[i].chan;
- if ((alu->src[i].sel > 0 && alu->src[i].sel < 128) && channel_swizzle <= 3)
- chan_counter[channel_swizzle]++;
+ // just check every possible combination of bank swizzle
+ // not very efficent, but works on the first try in most of the cases
+ for (i = 0; i < 4; i++)
+ bank_swizzle[i] = SQ_ALU_VEC_012;
+ bank_swizzle[4] = SQ_ALU_SCL_210;
+ while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
+ init_bank_swizzle(&bs);
+ for (i = 0; i < 4; i++) {
+ if (slots[i]) {
+ r = check_vector(bc, slots[i], &bs, bank_swizzle[i]);
+ if (r)
+ break;
+ }
+ }
+ if (!r && slots[4]) {
+ r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]);
+ }
+ if (!r) {
+ for (i = 0; i < 5; i++) {
+ if (slots[i])
+ slots[i]->bank_swizzle = bank_swizzle[i];
+ }
+ return 0;
+ }
+
+ for (i = 0; i < 5; i++) {
+ bank_swizzle[i]++;
+ if (bank_swizzle[i] <= SQ_ALU_VEC_210)
+ break;
+ else
+ bank_swizzle[i] = SQ_ALU_VEC_012;
+ }
}
+
+ // couldn't find a working swizzle
+ return -1;
}
-/* we need something like this I think - but this is bogus */
-int check_read_slots(struct r600_bc *bc, struct r600_bc_alu *alu_first)
+static int replace_gpr_with_pv_ps(struct r600_bc *bc,
+ struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev)
{
- struct r600_bc_alu *alu;
- int chan_counter[4] = { 0 };
+ struct r600_bc_alu *prev[5];
+ int gpr[5], chan[5];
+ int i, j, r, src, num_src;
- update_chan_counter(alu_first, chan_counter);
+ r = assign_alu_units(bc, alu_prev, prev);
+ if (r)
+ return r;
- LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
- update_chan_counter(alu, chan_counter);
+ for (i = 0; i < 5; ++i) {
+ if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) {
+ gpr[i] = prev[i]->dst.sel;
+ if (is_alu_reduction_inst(bc, prev[i]))
+ chan[i] = 0;
+ else
+ chan[i] = prev[i]->dst.chan;
+ } else
+ gpr[i] = -1;
}
- if (chan_counter[0] > 3 ||
- chan_counter[1] > 3 ||
- chan_counter[2] > 3 ||
- chan_counter[3] > 3) {
- R600_ERR("needed to split instruction for input ran out of banks %x %d %d %d %d\n",
- alu_first->inst, chan_counter[0], chan_counter[1], chan_counter[2], chan_counter[3]);
- return -1;
+ for (i = 0; i < 5; ++i) {
+ struct r600_bc_alu *alu = slots[i];
+ if(!alu)
+ continue;
+
+ num_src = r600_bc_get_num_operands(bc, alu);
+ for (src = 0; src < num_src; ++src) {
+ if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
+ continue;
+
+ if (alu->src[src].sel == gpr[4] &&
+ alu->src[src].chan == chan[4]) {
+ alu->src[src].sel = V_SQ_ALU_SRC_PS;
+ alu->src[src].chan = 0;
+ continue;
+ }
+
+ for (j = 0; j < 4; ++j) {
+ if (alu->src[src].sel == gpr[j] &&
+ alu->src[src].chan == j) {
+ alu->src[src].sel = V_SQ_ALU_SRC_PV;
+ alu->src[src].chan = chan[j];
+ break;
+ }
+ }
+ }
}
+
return 0;
}
-#endif
-static int is_const(int sel)
+void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg)
{
- if (sel > 255 && sel < 512)
- return 1;
- if (sel >= V_SQ_ALU_SRC_0 && sel <= V_SQ_ALU_SRC_LITERAL)
- return 1;
- return 0;
+ switch(value) {
+ case 0:
+ *sel = V_SQ_ALU_SRC_0;
+ break;
+ case 1:
+ *sel = V_SQ_ALU_SRC_1_INT;
+ break;
+ case -1:
+ *sel = V_SQ_ALU_SRC_M_1_INT;
+ break;
+ case 0x3F800000: // 1.0f
+ *sel = V_SQ_ALU_SRC_1;
+ break;
+ case 0x3F000000: // 0.5f
+ *sel = V_SQ_ALU_SRC_0_5;
+ break;
+ case 0xBF800000: // -1.0f
+ *sel = V_SQ_ALU_SRC_1;
+ *neg ^= 1;
+ break;
+ case 0xBF000000: // -0.5f
+ *sel = V_SQ_ALU_SRC_0_5;
+ *neg ^= 1;
+ break;
+ default:
+ *sel = V_SQ_ALU_SRC_LITERAL;
+ break;
+ }
}
-static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu)
+/* compute how many literal are needed */
+static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu,
+ uint32_t literal[4], unsigned *nliteral)
{
- unsigned swizzle_key;
-
- if (alu->bank_swizzle_force) {
- alu->bank_swizzle = alu->bank_swizzle_force;
- return 0;
+ unsigned num_src = r600_bc_get_num_operands(bc, alu);
+ unsigned i, j;
+
+ for (i = 0; i < num_src; ++i) {
+ if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ uint32_t value = alu->src[i].value[alu->src[i].chan];
+ unsigned found = 0;
+ for (j = 0; j < *nliteral; ++j) {
+ if (literal[j] == value) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ if (*nliteral >= 4)
+ return -EINVAL;
+ literal[(*nliteral)++] = value;
+ }
+ }
}
- swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) +
- (is_const(alu->src[1].sel) ? 2 : 0 ) +
- (is_const(alu->src[2].sel) ? 1 : 0 );
-
- alu->bank_swizzle = bank_swizzle_scl[swizzle_key];
return 0;
}
-static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu)
+static void r600_bc_alu_adjust_literals(struct r600_bc *bc,
+ struct r600_bc_alu *alu,
+ uint32_t literal[4], unsigned nliteral)
+{
+ unsigned num_src = r600_bc_get_num_operands(bc, alu);
+ unsigned i, j;
+
+ for (i = 0; i < num_src; ++i) {
+ if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ uint32_t value = alu->src[i].value[alu->src[i].chan];
+ for (j = 0; j < nliteral; ++j) {
+ if (literal[j] == value) {
+ alu->src[i].chan = j;
+ break;
+ }
+ }
+ }
+ }
+}
+
+static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
+ struct r600_bc_alu *alu_prev)
{
- unsigned swizzle_key;
+ struct r600_bc_alu *prev[5];
+ struct r600_bc_alu *result[5] = { NULL };
+
+ uint32_t literal[4];
+ unsigned nliteral = 0;
+
+ int i, j, r, src, num_src;
+ int num_once_inst = 0;
+
+ r = assign_alu_units(bc, alu_prev, prev);
+ if (r)
+ return r;
- if (alu->bank_swizzle_force) {
- alu->bank_swizzle = alu->bank_swizzle_force;
+ for (i = 0; i < 5; ++i) {
+ struct r600_bc_alu *alu;
+
+ /* check number of literals */
+ if (prev[i] && r600_bc_alu_nliterals(bc, prev[i], literal, &nliteral))
+ return 0;
+ if (slots[i] && r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral))
+ return 0;
+
+ // let's check used slots
+ if (prev[i] && !slots[i]) {
+ result[i] = prev[i];
+ num_once_inst += is_alu_once_inst(bc, prev[i]);
+ continue;
+ } else if (prev[i] && slots[i]) {
+ if (result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
+ // trans unit is still free try to use it
+ if (is_alu_any_unit_inst(bc, slots[i])) {
+ result[i] = prev[i];
+ result[4] = slots[i];
+ } else if (is_alu_any_unit_inst(bc, prev[i])) {
+ result[i] = slots[i];
+ result[4] = prev[i];
+ } else
+ return 0;
+ } else
+ return 0;
+ } else if(!slots[i]) {
+ continue;
+ } else
+ result[i] = slots[i];
+
+ // let's check source gprs
+ alu = slots[i];
+ num_once_inst += is_alu_once_inst(bc, alu);
+
+ num_src = r600_bc_get_num_operands(bc, alu);
+ for (src = 0; src < num_src; ++src) {
+ // constants doesn't matter
+ if (!is_gpr(alu->src[src].sel))
+ continue;
+
+ for (j = 0; j < 5; ++j) {
+ if (!prev[j] || !prev[j]->dst.write)
+ continue;
+
+ // if it's relative then we can't determin which gpr is really used
+ if (prev[j]->dst.chan == alu->src[src].chan &&
+ (prev[j]->dst.sel == alu->src[src].sel ||
+ prev[j]->dst.rel || alu->src[src].rel))
+ return 0;
+ }
+ }
+ }
+
+ /* more than one PRED_ or KILL_ ? */
+ if (num_once_inst > 1)
return 0;
+
+ /* check if the result can still be swizzlet */
+ r = check_and_set_bank_swizzle(bc, result);
+ if (r)
+ return 0;
+
+ /* looks like everything worked out right, apply the changes */
+
+ /* sort instructions */
+ for (i = 0; i < 5; ++i) {
+ slots[i] = result[i];
+ if (result[i]) {
+ LIST_DEL(&result[i]->list);
+ result[i]->last = 0;
+ LIST_ADDTAIL(&result[i]->list, &bc->cf_last->alu);
+ }
+ }
+
+ /* determine new last instruction */
+ LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1;
+
+ /* determine new first instruction */
+ for (i = 0; i < 5; ++i) {
+ if (result[i]) {
+ bc->cf_last->curr_bs_head = result[i];
+ break;
+ }
}
- swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) +
- (is_const(alu->src[1].sel) ? 2 : 0 ) +
- (is_const(alu->src[2].sel) ? 1 : 0 );
- alu->bank_swizzle = bank_swizzle_vec[swizzle_key];
+ bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head;
+ bc->cf_last->prev2_bs_head = NULL;
+
return 0;
}
-static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *alu_first)
+/* This code handles kcache lines as single blocks of 32 constants. We could
+ * probably do slightly better by recognizing that we actually have two
+ * consecutive lines of 16 constants, but the resulting code would also be
+ * somewhat more complicated. */
+static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *alu, int type)
{
- struct r600_bc_alu *alu = NULL;
- int num_instr = 1;
+ struct r600_bc_kcache *kcache = bc->cf_last->kcache;
+ unsigned int required_lines;
+ unsigned int free_lines = 0;
+ unsigned int cache_line[3];
+ unsigned int count = 0;
+ unsigned int i, j;
+ int r;
+
+ /* Collect required cache lines. */
+ for (i = 0; i < 3; ++i) {
+ bool found = false;
+ unsigned int line;
+
+ if (alu->src[i].sel < 512)
+ continue;
+
+ line = ((alu->src[i].sel - 512) / 32) * 2;
+
+ for (j = 0; j < count; ++j) {
+ if (cache_line[j] == line) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ cache_line[count++] = line;
+ }
- init_gpr(alu_first);
+ /* This should never actually happen. */
+ if (count >= 3) return -ENOMEM;
- LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
- num_instr++;
+ for (i = 0; i < 2; ++i) {
+ if (kcache[i].mode == V_SQ_CF_KCACHE_NOP) {
+ ++free_lines;
+ }
}
- if (num_instr == 1) {
- check_scalar(bc, alu_first);
-
- } else {
-/* check_read_slots(bc, bc->cf_last->curr_bs_head);*/
- check_vector(bc, alu_first);
- LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
- check_vector(bc, alu);
+ /* Filter lines pulled in by previous intructions. Note that this is
+ * only for the required_lines count, we can't remove these from the
+ * cache_line array since we may have to start a new ALU clause. */
+ for (i = 0, required_lines = count; i < count; ++i) {
+ for (j = 0; j < 2; ++j) {
+ if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 &&
+ kcache[j].addr == cache_line[i]) {
+ --required_lines;
+ break;
+ }
+ }
+ }
+
+ /* Start a new ALU clause if needed. */
+ if (required_lines > free_lines) {
+ if ((r = r600_bc_add_cf(bc))) {
+ return r;
+ }
+ bc->cf_last->inst = (type << 3);
+ kcache = bc->cf_last->kcache;
+ }
+
+ /* Setup the kcache lines. */
+ for (i = 0; i < count; ++i) {
+ bool found = false;
+
+ for (j = 0; j < 2; ++j) {
+ if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 &&
+ kcache[j].addr == cache_line[i]) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) continue;
+
+ for (j = 0; j < 2; ++j) {
+ if (kcache[j].mode == V_SQ_CF_KCACHE_NOP) {
+ kcache[j].bank = 0;
+ kcache[j].addr = cache_line[i];
+ kcache[j].mode = V_SQ_CF_KCACHE_LOCK_2;
+ break;
+ }
+ }
+ }
+
+ /* Alter the src operands to refer to the kcache. */
+ for (i = 0; i < 3; ++i) {
+ static const unsigned int base[] = {128, 160, 256, 288};
+ unsigned int line;
+
+ if (alu->src[i].sel < 512)
+ continue;
+
+ alu->src[i].sel -= 512;
+ line = (alu->src[i].sel / 32) * 2;
+
+ for (j = 0; j < 2; ++j) {
+ if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 &&
+ kcache[j].addr == line) {
+ alu->src[i].sel &= 0x1f;
+ alu->src[i].sel += base[j];
+ break;
+ }
}
}
+
return 0;
}
@@ -416,62 +1073,89 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
if (nalu == NULL)
return -ENOMEM;
memcpy(nalu, alu, sizeof(struct r600_bc_alu));
- nalu->nliteral = 0;
+
+ if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) {
+ /* check if we could add it anyway */
+ if (bc->cf_last->inst == (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) &&
+ type == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE) {
+ LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) {
+ if (lalu->predicate) {
+ bc->force_add_cf = 1;
+ break;
+ }
+ }
+ } else
+ bc->force_add_cf = 1;
+ }
/* cf can contains only alu or only vtx or only tex */
- if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) ||
- bc->force_add_cf) {
+ if (bc->cf_last == NULL || bc->force_add_cf) {
r = r600_bc_add_cf(bc);
if (r) {
free(nalu);
return r;
}
- bc->cf_last->inst = (type << 3);
}
+ bc->cf_last->inst = (type << 3);
+
+ /* Setup the kcache for this ALU instruction. This will start a new
+ * ALU clause if needed. */
+ if ((r = r600_bc_alloc_kcache_lines(bc, nalu, type))) {
+ free(nalu);
+ return r;
+ }
+
if (!bc->cf_last->curr_bs_head) {
bc->cf_last->curr_bs_head = nalu;
- LIST_INITHEAD(&nalu->bs_list);
- } else {
- LIST_ADDTAIL(&nalu->bs_list, &bc->cf_last->curr_bs_head->bs_list);
}
- /* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots)
+ /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots)
* worst case */
- if (alu->last && (bc->cf_last->ndw >> 1) >= 120) {
+ if (nalu->last && (bc->cf_last->ndw >> 1) >= 120) {
bc->force_add_cf = 1;
}
/* number of gpr == the last gpr used in any alu */
for (i = 0; i < 3; i++) {
- if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) {
- bc->ngpr = alu->src[i].sel + 1;
- }
- /* compute how many literal are needed
- * either 2 or 4 literals
- */
- if (alu->src[i].sel == 253) {
- if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) {
- nalu->nliteral = (alu->src[i].chan + 2) & 0x6;
- }
- }
- }
- if (!LIST_IS_EMPTY(&bc->cf_last->alu)) {
- lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
- if (!lalu->last && lalu->nliteral > nalu->nliteral) {
- nalu->nliteral = lalu->nliteral;
+ if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) {
+ bc->ngpr = nalu->src[i].sel + 1;
}
+ if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
+ r600_bc_special_constants(
+ nalu->src[i].value[nalu->src[i].chan],
+ &nalu->src[i].sel, &nalu->src[i].neg);
}
- if (alu->dst.sel >= bc->ngpr) {
- bc->ngpr = alu->dst.sel + 1;
+ if (nalu->dst.sel >= bc->ngpr) {
+ bc->ngpr = nalu->dst.sel + 1;
}
LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu);
/* each alu use 2 dwords */
bc->cf_last->ndw += 2;
bc->ndw += 2;
- bc->cf_last->kcache0_mode = 2;
-
/* process cur ALU instructions for bank swizzle */
- if (alu->last) {
- check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head);
+ if (nalu->last) {
+ struct r600_bc_alu *slots[5];
+ r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots);
+ if (r)
+ return r;
+
+ if (bc->cf_last->prev_bs_head) {
+ r = merge_inst_groups(bc, slots, bc->cf_last->prev_bs_head);
+ if (r)
+ return r;
+ }
+
+ if (bc->cf_last->prev_bs_head) {
+ r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head);
+ if (r)
+ return r;
+ }
+
+ r = check_and_set_bank_swizzle(bc, slots);
+ if (r)
+ return r;
+
+ bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head;
+ bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head;
bc->cf_last->curr_bs_head = NULL;
}
return 0;
@@ -482,44 +1166,6 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
}
-int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
-{
- struct r600_bc_alu *alu;
-
- if (bc->cf_last == NULL) {
- return 0;
- }
- if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
- return 0;
- }
- /* all same on EG */
- if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
- return 0;
- }
- /* same on EG */
- if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) &&
- (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) ||
- LIST_IS_EMPTY(&bc->cf_last->alu)) {
- R600_ERR("last CF is not ALU (%p)\n", bc->cf_last);
- return -EINVAL;
- }
- alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
- if (!alu->last || !alu->nliteral || alu->literal_added) {
- return 0;
- }
- memcpy(alu->value, value, 4 * 4);
- bc->cf_last->ndw += alu->nliteral;
- bc->ndw += alu->nliteral;
- alu->literal_added = 1;
- return 0;
-}
-
int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
{
struct r600_bc_vtx *nvtx = r600_bc_vtx();
@@ -545,7 +1191,7 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
/* each fetch use 4 dwords */
bc->cf_last->ndw += 4;
bc->ndw += 4;
- if ((bc->ndw / 4) > 7)
+ if ((bc->cf_last->ndw / 4) > 7)
bc->force_add_cf = 1;
return 0;
}
@@ -570,11 +1216,17 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
}
bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_TEX;
}
+ if (ntex->src_gpr >= bc->ngpr) {
+ bc->ngpr = ntex->src_gpr + 1;
+ }
+ if (ntex->dst_gpr >= bc->ngpr) {
+ bc->ngpr = ntex->dst_gpr + 1;
+ }
LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex);
/* each texture fetch use 4 dwords */
bc->cf_last->ndw += 4;
bc->ndw += 4;
- if ((bc->ndw / 4) > 7)
+ if ((bc->cf_last->ndw / 4) > 7)
bc->force_add_cf = 1;
return 0;
}
@@ -670,8 +1322,6 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign
/* r600 only, r700/eg bits in r700_asm.c */
static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
{
- unsigned i;
-
/* don't replace gpr by pv or ps for destination register */
bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
@@ -702,19 +1352,12 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign
S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
+ S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
}
- if (alu->last) {
- if (alu->nliteral && !alu->literal_added) {
- R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst);
- }
- for (i = 0; i < alu->nliteral; i++) {
- bc->bytecode[id++] = alu->value[i];
- }
- }
return 0;
}
@@ -726,15 +1369,17 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
- S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) |
- S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) |
- S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank);
+ S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank);
bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
- S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) |
- S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) |
- S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
S_SQ_CF_ALU_WORD1_BARRIER(1) |
S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) |
S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
@@ -790,8 +1435,10 @@ int r600_bc_build(struct r600_bc *bc)
struct r600_bc_alu *alu;
struct r600_bc_vtx *vtx;
struct r600_bc_tex *tex;
+ uint32_t literal[4];
+ unsigned nliteral;
unsigned addr;
- int r;
+ int i, r;
if (bc->callstack[0].max > 0)
bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
@@ -805,7 +1452,19 @@ int r600_bc_build(struct r600_bc *bc)
LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+ nliteral = 0;
+ LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+ r = r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
+ if (r)
+ return r;
+ if (alu->last) {
+ cf->ndw += align(nliteral, 2);
+ nliteral = 0;
+ }
+ }
break;
case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
@@ -851,8 +1510,15 @@ int r600_bc_build(struct r600_bc *bc)
return r;
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+ nliteral = 0;
LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+ r = r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
+ if (r)
+ return r;
+ r600_bc_alu_adjust_literals(bc, alu, literal, nliteral);
switch(bc->chiprev) {
case CHIPREV_R600:
r = r600_bc_alu_build(bc, alu, addr);
@@ -869,7 +1535,10 @@ int r600_bc_build(struct r600_bc *bc)
return r;
addr += 2;
if (alu->last) {
- addr += alu->nliteral;
+ for (i = 0; i < align(nliteral, 2); ++i) {
+ bc->bytecode[addr++] = literal[i];
+ }
+ nliteral = 0;
}
}
break;
@@ -947,3 +1616,447 @@ void r600_bc_clear(struct r600_bc *bc)
LIST_INITHEAD(&cf->list);
}
+
+void r600_bc_dump(struct r600_bc *bc)
+{
+ struct r600_bc_cf *cf = NULL;
+ struct r600_bc_alu *alu = NULL;
+ struct r600_bc_vtx *vtx = NULL;
+ struct r600_bc_tex *tex = NULL;
+
+ unsigned i, id;
+ uint32_t literal[4];
+ unsigned nliteral;
+ char chip = '6';
+
+ switch (bc->chiprev) {
+ case 1:
+ chip = '7';
+ break;
+ case 2:
+ chip = 'E';
+ break;
+ case 0:
+ default:
+ chip = '6';
+ break;
+ }
+ fprintf(stderr, "bytecode %d dw -- %d gprs ---------------------\n", bc->ndw, bc->ngpr);
+ fprintf(stderr, " %c\n", chip);
+
+ LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
+ id = cf->id;
+
+ switch (cf->inst) {
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+ fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
+ fprintf(stderr, "ADDR:%d ", cf->addr);
+ fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache[0].mode);
+ fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache[0].bank);
+ fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache[1].bank);
+ id++;
+ fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", cf->inst);
+ fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache[1].mode);
+ fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache[0].addr);
+ fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache[1].addr);
+ fprintf(stderr, "COUNT:%d\n", cf->ndw / 2);
+ break;
+ case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+ case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+ case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
+ fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
+ fprintf(stderr, "ADDR:%d\n", cf->addr);
+ id++;
+ fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", cf->inst);
+ fprintf(stderr, "COUNT:%d\n", cf->ndw / 4);
+ break;
+ case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+ case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+ fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
+ fprintf(stderr, "GPR:%X ", cf->output.gpr);
+ fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size);
+ fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base);
+ fprintf(stderr, "TYPE:%X\n", cf->output.type);
+ id++;
+ fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
+ fprintf(stderr, "SWIZ_X:%X ", cf->output.swizzle_x);
+ fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y);
+ fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z);
+ fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
+ fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
+ fprintf(stderr, "BARRIER:%X ", cf->output.barrier);
+ fprintf(stderr, "INST:%d ", cf->output.inst);
+ fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
+ break;
+ case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+ case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
+ case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+ fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
+ fprintf(stderr, "ADDR:%d\n", cf->cf_addr);
+ id++;
+ fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", cf->inst);
+ fprintf(stderr, "COND:%X ", cf->cond);
+ fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count);
+ break;
+ }
+
+ id = cf->addr;
+ nliteral = 0;
+ LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+ r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
+
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel);
+ fprintf(stderr, "REL:%d ", alu->src[0].rel);
+ fprintf(stderr, "CHAN:%d ", alu->src[0].chan);
+ fprintf(stderr, "NEG:%d) ", alu->src[0].neg);
+ fprintf(stderr, "SRC1(SEL:%d ", alu->src[1].sel);
+ fprintf(stderr, "REL:%d ", alu->src[1].rel);
+ fprintf(stderr, "CHAN:%d ", alu->src[1].chan);
+ fprintf(stderr, "NEG:%d) ", alu->src[1].neg);
+ fprintf(stderr, "LAST:%d)\n", alu->last);
+ id++;
+ fprintf(stderr, "%04d %08X %c ", id, bc->bytecode[id], alu->last ? '*' : ' ');
+ fprintf(stderr, "INST:%d ", alu->inst);
+ fprintf(stderr, "DST(SEL:%d ", alu->dst.sel);
+ fprintf(stderr, "CHAN:%d ", alu->dst.chan);
+ fprintf(stderr, "REL:%d ", alu->dst.rel);
+ fprintf(stderr, "CLAMP:%d) ", alu->dst.clamp);
+ fprintf(stderr, "BANK_SWIZZLE:%d ", alu->bank_swizzle);
+ if (alu->is_op3) {
+ fprintf(stderr, "SRC2(SEL:%d ", alu->src[2].sel);
+ fprintf(stderr, "REL:%d ", alu->src[2].rel);
+ fprintf(stderr, "CHAN:%d ", alu->src[2].chan);
+ fprintf(stderr, "NEG:%d)\n", alu->src[2].neg);
+ } else {
+ fprintf(stderr, "SRC0_ABS:%d ", alu->src[0].abs);
+ fprintf(stderr, "SRC1_ABS:%d ", alu->src[1].abs);
+ fprintf(stderr, "WRITE_MASK:%d ", alu->dst.write);
+ fprintf(stderr, "OMOD:%d ", alu->omod);
+ fprintf(stderr, "EXECUTE_MASK:%d ", alu->predicate);
+ fprintf(stderr, "UPDATE_PRED:%d\n", alu->predicate);
+ }
+
+ id++;
+ if (alu->last) {
+ for (i = 0; i < nliteral; i++, id++) {
+ float *f = (float*)(bc->bytecode + id);
+ fprintf(stderr, "%04d %08X\t%f\n", id, bc->bytecode[id], *f);
+ }
+ id += nliteral & 1;
+ nliteral = 0;
+ }
+ }
+
+ LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
+ //TODO
+ }
+
+ LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
+ //TODO
+ }
+ }
+
+ fprintf(stderr, "--------------------------------------\n");
+}
+
+void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+{
+ struct r600_pipe_state *rstate;
+ unsigned i = 0;
+
+ if (count > 8) {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(8 - 1);
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 8 - 1);
+ } else {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 1);
+ }
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
+ S_SQ_CF_WORD1_BARRIER(1);
+
+ rstate = &ve->rstate;
+ rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+ rstate->nregs = 0;
+ r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
+ r600_bo_offset(ve->fetch_shader) >> 8,
+ 0xFFFFFFFF, ve->fetch_shader);
+}
+
+void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+{
+ struct r600_pipe_state *rstate;
+ unsigned i = 0;
+
+ if (count > 8) {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(8 - 1);
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT((count - 8) - 1);
+ } else {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 1);
+ }
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
+ S_SQ_CF_WORD1_BARRIER(1);
+
+ rstate = &ve->rstate;
+ rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+ rstate->nregs = 0;
+ r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
+ r600_bo_offset(ve->fetch_shader) >> 8,
+ 0xFFFFFFFF, ve->fetch_shader);
+}
+
+static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
+ unsigned *num_format, unsigned *format_comp)
+{
+ const struct util_format_description *desc;
+ unsigned i;
+
+ *format = 0;
+ *num_format = 0;
+ *format_comp = 0;
+
+ desc = util_format_description(pformat);
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
+ goto out_unknown;
+ }
+
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ switch (desc->channel[i].type) {
+ /* Half-floats, floats, doubles */
+ case UTIL_FORMAT_TYPE_FLOAT:
+ switch (desc->channel[i].size) {
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_16_FLOAT;
+ break;
+ case 2:
+ *format = FMT_16_16_FLOAT;
+ break;
+ case 3:
+ *format = FMT_16_16_16_FLOAT;
+ break;
+ case 4:
+ *format = FMT_16_16_16_16_FLOAT;
+ break;
+ }
+ break;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_32_FLOAT;
+ break;
+ case 2:
+ *format = FMT_32_32_FLOAT;
+ break;
+ case 3:
+ *format = FMT_32_32_32_FLOAT;
+ break;
+ case 4:
+ *format = FMT_32_32_32_32_FLOAT;
+ break;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+ break;
+ /* Unsigned ints */
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ /* Signed ints */
+ case UTIL_FORMAT_TYPE_SIGNED:
+ switch (desc->channel[i].size) {
+ case 8:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_8;
+ break;
+ case 2:
+ *format = FMT_8_8;
+ break;
+ case 3:
+ // *format = FMT_8_8_8; /* fails piglit draw-vertices test */
+ // break;
+ case 4:
+ *format = FMT_8_8_8_8;
+ break;
+ }
+ break;
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_16;
+ break;
+ case 2:
+ *format = FMT_16_16;
+ break;
+ case 3:
+ // *format = FMT_16_16_16; /* fails piglit draw-vertices test */
+ // break;
+ case 4:
+ *format = FMT_16_16_16_16;
+ break;
+ }
+ break;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_32;
+ break;
+ case 2:
+ *format = FMT_32_32;
+ break;
+ case 3:
+ *format = FMT_32_32_32;
+ break;
+ case 4:
+ *format = FMT_32_32_32_32;
+ break;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
+ *format_comp = 1;
+ }
+ if (desc->channel[i].normalized) {
+ *num_format = 0;
+ } else {
+ *num_format = 2;
+ }
+ return;
+out_unknown:
+ R600_ERR("unsupported vertex format %s\n", util_format_name(pformat));
+}
+
+int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve)
+{
+ unsigned ndw, i;
+ u32 *bytecode;
+ unsigned fetch_resource_start = 0, format, num_format, format_comp;
+ struct pipe_vertex_element *elements = ve->elements;
+ const struct util_format_description *desc;
+
+ /* 2 dwords for cf aligned to 4 + 4 dwords per input */
+ ndw = 8 + ve->count * 4;
+ ve->fs_size = ndw * 4;
+
+ /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
+ ve->fetch_shader = r600_bo(rctx->radeon, ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0);
+ if (ve->fetch_shader == NULL) {
+ return -ENOMEM;
+ }
+
+ bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL);
+ if (bytecode == NULL) {
+ r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+ return -ENOMEM;
+ }
+
+ if (rctx->family >= CHIP_CEDAR) {
+ eg_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
+ } else {
+ r600_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
+ fetch_resource_start = 160;
+ }
+
+ /* vertex elements offset need special handling, if offset is bigger
+ * than what we can put in fetch instruction then we need to alterate
+ * the vertex resource offset. In such case in order to simplify code
+ * we will bound one resource per elements. It's a worst case scenario.
+ */
+ for (i = 0; i < ve->count; i++) {
+ ve->vbuffer_offset[i] = C_SQ_VTX_WORD2_OFFSET & elements[i].src_offset;
+ if (ve->vbuffer_offset[i]) {
+ ve->vbuffer_need_offset = 1;
+ }
+ }
+
+ for (i = 0; i < ve->count; i++) {
+ unsigned vbuffer_index;
+ r600_vertex_data_type(ve->hw_format[i], &format, &num_format, &format_comp);
+ desc = util_format_description(ve->hw_format[i]);
+ if (desc == NULL) {
+ R600_ERR("unknown format %d\n", ve->hw_format[i]);
+ r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+ return -EINVAL;
+ }
+
+ /* see above for vbuffer_need_offset explanation */
+ vbuffer_index = elements[i].vertex_buffer_index;
+ if (ve->vbuffer_need_offset) {
+ bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i + fetch_resource_start);
+ } else {
+ bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index + fetch_resource_start);
+ }
+ bytecode[8 + i * 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) |
+ S_SQ_VTX_WORD0_SRC_SEL_X(0) |
+ S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F);
+ bytecode[8 + i * 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]) |
+ S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]) |
+ S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]) |
+ S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]) |
+ S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) |
+ S_SQ_VTX_WORD1_DATA_FORMAT(format) |
+ S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format) |
+ S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp) |
+ S_SQ_VTX_WORD1_SRF_MODE_ALL(1) |
+ S_SQ_VTX_WORD1_GPR_DST_GPR(i + 1);
+ bytecode[8 + i * 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements[i].src_offset) |
+ S_SQ_VTX_WORD2_MEGA_FETCH(1);
+ bytecode[8 + i * 4 + 3] = 0;
+ }
+ r600_bo_unmap(rctx->radeon, ve->fetch_shader);
+ return 0;
+}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index f2016af3e7..278b4466cb 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -25,8 +25,8 @@
#include "util/u_double_list.h"
-#define NUM_OF_CYCLES 3
-#define NUM_OF_COMPONENTS 4
+struct r600_vertex_element;
+struct r600_pipe_context;
struct r600_bc_alu_src {
unsigned sel;
@@ -34,6 +34,7 @@ struct r600_bc_alu_src {
unsigned neg;
unsigned abs;
unsigned rel;
+ u32 *value;
};
struct r600_bc_alu_dst {
@@ -46,19 +47,15 @@ struct r600_bc_alu_dst {
struct r600_bc_alu {
struct list_head list;
- struct list_head bs_list; /* bank swizzle list */
struct r600_bc_alu_src src[3];
struct r600_bc_alu_dst dst;
unsigned inst;
unsigned last;
unsigned is_op3;
unsigned predicate;
- unsigned nliteral;
- unsigned literal_added;
unsigned bank_swizzle;
unsigned bank_swizzle_force;
- u32 value[4];
- int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
+ unsigned omod;
};
struct r600_bc_tex {
@@ -122,6 +119,12 @@ struct r600_bc_output {
unsigned barrier;
};
+struct r600_bc_kcache {
+ unsigned bank;
+ unsigned mode;
+ unsigned addr;
+};
+
struct r600_bc_cf {
struct list_head list;
unsigned inst;
@@ -131,18 +134,15 @@ struct r600_bc_cf {
unsigned cond;
unsigned pop_count;
unsigned cf_addr; /* control flow addr */
- unsigned kcache0_mode;
- unsigned kcache1_mode;
- unsigned kcache0_addr;
- unsigned kcache1_addr;
- unsigned kcache0_bank;
- unsigned kcache1_bank;
+ struct r600_bc_kcache kcache[2];
unsigned r6xx_uses_waterfall;
struct list_head alu;
struct list_head tex;
struct list_head vtx;
struct r600_bc_output output;
struct r600_bc_alu *curr_bs_head;
+ struct r600_bc_alu *prev_bs_head;
+ struct r600_bc_alu *prev2_bs_head;
};
#define FC_NONE 0
@@ -188,18 +188,24 @@ struct r600_bc {
/* eg_asm.c */
int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf);
+void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
/* r600_asm.c */
int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
void r600_bc_clear(struct r600_bc *bc);
int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu);
-int r600_bc_add_literal(struct r600_bc *bc, const u32 *value);
int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx);
int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex);
int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output);
int r600_bc_build(struct r600_bc *bc);
int r600_bc_add_cfinst(struct r600_bc *bc, int inst);
int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type);
+void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg);
+void r600_bc_dump(struct r600_bc *bc);
+void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
+void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
+
+int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve);
/* r700_asm.c */
int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 74cf968799..b9ec9592e3 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -78,19 +78,24 @@ static void r600_blitter_end(struct pipe_context *ctx)
r600_context_queries_resume(&rctx->ctx);
}
-int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture)
+void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct pipe_surface *zsurf, *cbsurf;
+ struct pipe_surface *zsurf, *cbsurf, surf_tmpl;
int level = 0;
float depth = 1.0f;
+ surf_tmpl.format = texture->resource.base.b.format;
+ surf_tmpl.u.tex.level = level;
+ surf_tmpl.u.tex.first_layer = 0;
+ surf_tmpl.u.tex.last_layer = 0;
+ surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL;
- zsurf = ctx->screen->get_tex_surface(ctx->screen, &texture->resource.base.b, 0, level, 0,
- PIPE_BIND_DEPTH_STENCIL);
+ zsurf = ctx->create_surface(ctx, &texture->resource.base.b, &surf_tmpl);
- cbsurf = ctx->screen->get_tex_surface(ctx->screen,
- (struct pipe_resource*)texture->flushed_depth_texture,
- 0, level, 0, PIPE_BIND_RENDER_TARGET);
+ surf_tmpl.format = ((struct pipe_resource*)texture->flushed_depth_texture)->format;
+ surf_tmpl.usage = PIPE_BIND_RENDER_TARGET;
+ cbsurf = ctx->create_surface(ctx,
+ (struct pipe_resource*)texture->flushed_depth_texture, &surf_tmpl);
if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 ||
rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635)
@@ -102,9 +107,6 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te
pipe_surface_reference(&zsurf, NULL);
pipe_surface_reference(&cbsurf, NULL);
-
-
- return 0;
}
static void r600_clear(struct pipe_context *ctx, unsigned buffers,
@@ -154,41 +156,38 @@ static void r600_clear_depth_stencil(struct pipe_context *ctx,
/* Copy a block of pixels from one surface to another using HW. */
static void r600_hw_copy_region(struct pipe_context *ctx,
- struct pipe_resource *dst,
- struct pipe_subresource subdst,
- unsigned dstx, unsigned dsty, unsigned dstz,
- struct pipe_resource *src,
- struct pipe_subresource subsrc,
- unsigned srcx, unsigned srcy, unsigned srcz,
- unsigned width, unsigned height)
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
r600_blitter_begin(ctx, R600_COPY);
- util_blitter_copy_region(rctx->blitter, dst, subdst, dstx, dsty, dstz,
- src, subsrc, srcx, srcy, srcz, width, height,
- TRUE);
+ util_blitter_copy_region(rctx->blitter, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box, TRUE);
r600_blitter_end(ctx);
}
static void r600_resource_copy_region(struct pipe_context *ctx,
struct pipe_resource *dst,
- struct pipe_subresource subdst,
+ unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct pipe_resource *src,
- struct pipe_subresource subsrc,
- unsigned srcx, unsigned srcy, unsigned srcz,
- unsigned width, unsigned height)
+ unsigned src_level,
+ const struct pipe_box *src_box)
{
boolean is_depth;
/* there is something wrong with depth resource copies at the moment so avoid them for now */
is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
if (is_depth)
- util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz,
- src, subsrc, srcx, srcy, srcz, width, height);
+ util_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box);
else
- r600_hw_copy_region(ctx, dst, subdst, dstx, dsty, dstz,
- src, subsrc, srcx, srcy, srcz, width, height);
+ r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box);
}
void r600_init_blit_functions(struct r600_pipe_context *rctx)
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index a432271b82..469c8195fe 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -29,7 +29,6 @@
#include <util/u_math.h>
#include <util/u_inlines.h>
#include <util/u_memory.h>
-#include <util/u_upload_mgr.h>
#include "state_tracker/drm_driver.h"
#include <xf86drm.h>
#include "radeon_drm.h"
@@ -53,12 +52,13 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
rbuffer->magic = R600_BUFFER_MAGIC;
rbuffer->user_buffer = NULL;
- rbuffer->num_ranges = 0;
rbuffer->r.base.b = *templ;
pipe_reference_init(&rbuffer->r.base.b.reference, 1);
rbuffer->r.base.b.screen = screen;
rbuffer->r.base.vtbl = &r600_buffer_vtbl;
rbuffer->r.size = rbuffer->r.base.b.width0;
+ rbuffer->r.bo_size = rbuffer->r.size;
+ rbuffer->uploaded = FALSE;
bo = r600_bo((struct radeon*)screen->winsys, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind, rbuffer->r.base.b.usage);
if (bo == NULL) {
FREE(rbuffer);
@@ -89,10 +89,12 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
rbuffer->r.base.b.width0 = bytes;
rbuffer->r.base.b.height0 = 1;
rbuffer->r.base.b.depth0 = 1;
+ rbuffer->r.base.b.array_size = 1;
rbuffer->r.base.b.flags = 0;
- rbuffer->num_ranges = 0;
rbuffer->r.bo = NULL;
+ rbuffer->r.bo_size = 0;
rbuffer->user_buffer = ptr;
+ rbuffer->uploaded = FALSE;
return &rbuffer->r.base.b;
}
@@ -104,6 +106,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen,
if (rbuffer->r.bo) {
r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL);
}
+ rbuffer->r.bo = NULL;
FREE(rbuffer);
}
@@ -113,29 +116,10 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
int write = 0;
uint8_t *data;
- int i;
- boolean flush = FALSE;
if (rbuffer->user_buffer)
return (uint8_t*)rbuffer->user_buffer + transfer->box.x;
- if (transfer->usage & PIPE_TRANSFER_DISCARD) {
- for (i = 0; i < rbuffer->num_ranges; i++) {
- if ((transfer->box.x >= rbuffer->ranges[i].start) &&
- (transfer->box.x < rbuffer->ranges[i].end))
- flush = TRUE;
-
- if (flush) {
- r600_bo_reference((struct radeon*)pipe->winsys, &rbuffer->r.bo, NULL);
- rbuffer->num_ranges = 0;
- rbuffer->r.bo = r600_bo((struct radeon*)pipe->winsys,
- rbuffer->r.base.b.width0, 0,
- rbuffer->r.base.b.bind,
- rbuffer->r.base.b.usage);
- break;
- }
- }
- }
if (transfer->usage & PIPE_TRANSFER_DONTBLOCK) {
/* FIXME */
}
@@ -154,41 +138,22 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
{
struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
+ if (rbuffer->user_buffer)
+ return;
+
if (rbuffer->r.bo)
r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo);
}
static void r600_buffer_transfer_flush_region(struct pipe_context *pipe,
- struct pipe_transfer *transfer,
- const struct pipe_box *box)
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box)
{
- struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
- unsigned i;
- unsigned offset = transfer->box.x + box->x;
- unsigned length = box->width;
-
- assert(box->x + box->width <= transfer->box.width);
-
- if (rbuffer->user_buffer)
- return;
-
- /* mark the range as used */
- for(i = 0; i < rbuffer->num_ranges; ++i) {
- if(offset <= rbuffer->ranges[i].end && rbuffer->ranges[i].start <= (offset+box->width)) {
- rbuffer->ranges[i].start = MIN2(rbuffer->ranges[i].start, offset);
- rbuffer->ranges[i].end = MAX2(rbuffer->ranges[i].end, (offset+length));
- return;
- }
- }
-
- rbuffer->ranges[rbuffer->num_ranges].start = offset;
- rbuffer->ranges[rbuffer->num_ranges].end = offset+length;
- rbuffer->num_ranges++;
}
unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context,
struct pipe_resource *buf,
- unsigned face, unsigned level)
+ unsigned level, int layer)
{
/* FIXME */
return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
@@ -235,29 +200,25 @@ struct u_resource_vtbl r600_buffer_vtbl =
int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw)
{
- struct pipe_resource *upload_buffer = NULL;
- unsigned index_offset = draw->index_buffer_offset;
- int ret = 0;
-
if (r600_buffer_is_user_buffer(draw->index_buffer)) {
- ret = u_upload_buffer(rctx->upload_ib,
- index_offset,
- draw->count * draw->index_size,
- draw->index_buffer,
- &index_offset,
- &upload_buffer);
- if (ret) {
- goto done;
- }
- draw->index_buffer_offset = index_offset;
-
- /* Transfer ownership. */
- pipe_resource_reference(&draw->index_buffer, upload_buffer);
- pipe_resource_reference(&upload_buffer, NULL);
+ struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer);
+ unsigned upload_offset;
+ int ret = 0;
+
+ ret = r600_upload_buffer(rctx->rupload_vb,
+ draw->index_buffer_offset,
+ draw->count * draw->index_size,
+ rbuffer,
+ &upload_offset,
+ &rbuffer->r.bo_size,
+ &rbuffer->r.bo);
+ if (ret)
+ return ret;
+ rbuffer->uploaded = TRUE;
+ draw->index_buffer_offset = upload_offset;
}
-done:
- return ret;
+ return 0;
}
int r600_upload_user_buffers(struct r600_pipe_context *rctx)
@@ -266,27 +227,52 @@ int r600_upload_user_buffers(struct r600_pipe_context *rctx)
int i, nr;
nr = rctx->vertex_elements->count;
+ nr = rctx->nvertex_buffer;
for (i = 0; i < nr; i++) {
- struct pipe_vertex_buffer *vb =
- &rctx->vertex_buffer[rctx->vertex_elements->elements[i].vertex_buffer_index];
+ struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i];
if (r600_buffer_is_user_buffer(vb->buffer)) {
- struct pipe_resource *upload_buffer = NULL;
- unsigned offset = 0; /*vb->buffer_offset * 4;*/
- unsigned size = vb->buffer->width0;
+ struct r600_resource_buffer *rbuffer = r600_buffer(vb->buffer);
unsigned upload_offset;
- ret = u_upload_buffer(rctx->upload_vb,
- offset, size,
- vb->buffer,
- &upload_offset, &upload_buffer);
+
+ ret = r600_upload_buffer(rctx->rupload_vb,
+ 0, vb->buffer->width0,
+ rbuffer,
+ &upload_offset,
+ &rbuffer->r.bo_size,
+ &rbuffer->r.bo);
if (ret)
return ret;
-
- pipe_resource_reference(&vb->buffer, NULL);
- vb->buffer = upload_buffer;
+ rbuffer->uploaded = TRUE;
vb->buffer_offset = upload_offset;
}
}
return ret;
}
+
+
+int r600_upload_const_buffer(struct r600_pipe_context *rctx, struct pipe_resource *cbuffer,
+ uint32_t *const_offset)
+{
+ if (r600_buffer_is_user_buffer(cbuffer)) {
+ struct r600_resource_buffer *rbuffer = r600_buffer(cbuffer);
+ unsigned upload_offset;
+ int ret = 0;
+
+ ret = r600_upload_buffer(rctx->rupload_const,
+ 0, cbuffer->width0,
+ rbuffer,
+ &upload_offset,
+ &rbuffer->r.bo_size,
+ &rbuffer->r.bo);
+ if (ret)
+ return ret;
+ rbuffer->uploaded = TRUE;
+ *const_offset = upload_offset;
+ return 0;
+ }
+
+ *const_offset = 0;
+ return 0;
+}
diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h
index 2ee0c83e5d..a85d0bbf1e 100644
--- a/src/gallium/drivers/r600/r600_opcodes.h
+++ b/src/gallium/drivers/r600/r600_opcodes.h
@@ -330,10 +330,14 @@
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED_64 0x00000098
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_64 0x00000099
/* TODO Fill in more ALU */
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT 0x0000009B
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT 0x0000009C
+/* TODO Fill in more ALU */
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR 0x000000B1
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 0x000000BE
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE 0x000000BF
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE 0x000000C0
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4 0x000000C1
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x000000CC
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY 0x000000D6
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 511e52fbce..20838e4d98 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -35,7 +35,6 @@
#include <util/u_pack_color.h>
#include <util/u_memory.h>
#include <util/u_inlines.h>
-#include <util/u_upload_mgr.h>
#include <pipebuffer/pb_buffer.h>
#include "r600.h"
#include "r600d.h"
@@ -59,9 +58,6 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags,
if (!rctx->ctx.pm4_cdwords)
return;
- u_upload_flush(rctx->upload_vb);
- u_upload_flush(rctx->upload_ib);
-
#if 0
sprintf(dname, "gallium-%08d.bof", dc);
if (dc < 20) {
@@ -71,6 +67,9 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags,
dc++;
#endif
r600_context_flush(&rctx->ctx);
+
+ r600_upload_flush(rctx->rupload_vb);
+ r600_upload_flush(rctx->rupload_const);
}
static void r600_destroy_context(struct pipe_context *context)
@@ -79,6 +78,8 @@ static void r600_destroy_context(struct pipe_context *context)
rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush);
+ r600_end_vertex_translate(rctx);
+
r600_context_fini(&rctx->ctx);
util_blitter_destroy(rctx->blitter);
@@ -87,8 +88,8 @@ static void r600_destroy_context(struct pipe_context *context)
free(rctx->states[i]);
}
- u_upload_destroy(rctx->upload_vb);
- u_upload_destroy(rctx->upload_ib);
+ r600_upload_destroy(rctx->rupload_vb);
+ r600_upload_destroy(rctx->rupload_const);
if (rctx->tran.translate_cache)
translate_cache_destroy(rctx->tran.translate_cache);
@@ -120,6 +121,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
r600_init_blit_functions(rctx);
r600_init_query_functions(rctx);
r600_init_context_resource_functions(rctx);
+ r600_init_surface_functions(rctx);
switch (r600_get_family(rctx->radeon)) {
case CHIP_R600:
@@ -148,6 +150,9 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
case CHIP_CYPRESS:
case CHIP_HEMLOCK:
case CHIP_PALM:
+ case CHIP_BARTS:
+ case CHIP_TURKS:
+ case CHIP_CAICOS:
rctx->context.draw_vbo = evergreen_draw;
evergreen_init_state_functions(rctx);
if (evergreen_context_init(&rctx->ctx, rctx->radeon)) {
@@ -162,16 +167,14 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
return NULL;
}
- rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16,
- PIPE_BIND_INDEX_BUFFER);
- if (rctx->upload_ib == NULL) {
+ rctx->rupload_vb = r600_upload_create(rctx, 128 * 1024, 16);
+ if (rctx->rupload_vb == NULL) {
r600_destroy_context(&rctx->context);
return NULL;
}
- rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16,
- PIPE_BIND_VERTEX_BUFFER);
- if (rctx->upload_vb == NULL) {
+ rctx->rupload_const = r600_upload_create(rctx, 128 * 1024, 256);
+ if (rctx->rupload_const == NULL) {
r600_destroy_context(&rctx->context);
return NULL;
}
@@ -206,8 +209,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
else
rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx);
- r600_blit_uncompress_depth_ptr = r600_blit_uncompress_depth;
-
return &rctx->context;
}
@@ -240,6 +241,9 @@ static const char *r600_get_family_name(enum radeon_family family)
case CHIP_CYPRESS: return "AMD CYPRESS";
case CHIP_HEMLOCK: return "AMD HEMLOCK";
case CHIP_PALM: return "AMD PALM";
+ case CHIP_BARTS: return "AMD BARTS";
+ case CHIP_TURKS: return "AMD TURKS";
+ case CHIP_CAICOS: return "AMD CAICOS";
default: return "AMD unknown";
}
}
@@ -254,6 +258,9 @@ static const char* r600_get_name(struct pipe_screen* pscreen)
static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
{
+ struct r600_screen *rscreen = (struct r600_screen *)pscreen;
+ enum radeon_family family = r600_get_family(rscreen->radeon);
+
switch (param) {
/* Supported features (boolean caps). */
case PIPE_CAP_NPOT_TEXTURES:
@@ -286,7 +293,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
- return 14;
+ if (family >= CHIP_CEDAR)
+ return 15;
+ else
+ return 14;
case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
/* FIXME allow this once infrastructure is there */
return 16;
@@ -315,12 +325,18 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param)
{
+ struct r600_screen *rscreen = (struct r600_screen *)pscreen;
+ enum radeon_family family = r600_get_family(rscreen->radeon);
+
switch (param) {
case PIPE_CAP_MAX_LINE_WIDTH:
case PIPE_CAP_MAX_LINE_WIDTH_AA:
case PIPE_CAP_MAX_POINT_WIDTH:
case PIPE_CAP_MAX_POINT_WIDTH_AA:
- return 8192.0f;
+ if (family >= CHIP_CEDAR)
+ return 16384.0f;
+ else
+ return 8192.0f;
case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
return 16.0f;
case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
@@ -407,9 +423,9 @@ static boolean r600_is_format_supported(struct pipe_screen* screen,
}
if ((usage & (PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED)) &&
+ PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT |
+ PIPE_BIND_SHARED)) &&
r600_is_colorbuffer_format_supported(format)) {
retval |= usage &
(PIPE_BIND_RENDER_TARGET |
@@ -467,7 +483,6 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon)
rscreen->screen.get_paramf = r600_get_paramf;
rscreen->screen.is_format_supported = r600_is_format_supported;
rscreen->screen.context_create = r600_create_context;
- r600_init_screen_texture_functions(&rscreen->screen);
r600_init_screen_resource_functions(&rscreen->screen);
rscreen->tiling_info = r600_get_tiling_info(radeon);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index ba9fedf0b6..2112a40f69 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -53,6 +53,8 @@ enum r600_pipe_state_id {
R600_PIPE_STATE_CONSTANT,
R600_PIPE_STATE_SAMPLER,
R600_PIPE_STATE_RESOURCE,
+ R600_PIPE_STATE_POLYGON_OFFSET,
+ R600_PIPE_STATE_FETCH_SHADER,
R600_PIPE_NSTATES
};
@@ -86,7 +88,15 @@ struct r600_vertex_element
struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS];
enum pipe_format hw_format[PIPE_MAX_ATTRIBS];
unsigned hw_format_size[PIPE_MAX_ATTRIBS];
- boolean incompatible_layout;
+ boolean incompatible_layout;
+ struct r600_bo *fetch_shader;
+ unsigned fs_size;
+ struct r600_pipe_state rstate;
+ /* if offset is to big for fetch instructio we need to alterate
+ * offset of vertex buffer, record here the offset need to add
+ */
+ unsigned vbuffer_need_offset;
+ unsigned vbuffer_offset[PIPE_MAX_ATTRIBS];
};
struct r600_pipe_shader {
@@ -101,29 +111,31 @@ struct r600_pipe_shader {
#define NUM_TEX_UNITS 16
struct r600_textures_info {
- struct r600_pipe_sampler_view *views[NUM_TEX_UNITS];
- unsigned n_views;
+ struct r600_pipe_sampler_view *views[NUM_TEX_UNITS];
+ unsigned n_views;
void *samplers[NUM_TEX_UNITS];
- unsigned n_samplers;
+ unsigned n_samplers;
};
+/* vertex buffer translation context, used to translate vertex input that
+ * hw doesn't natively support, so far only FLOAT64 is unsupported.
+ */
struct r600_translate_context {
/* Translate cache for incompatible vertex offset/stride/format fallback. */
- struct translate_cache *translate_cache;
-
+ struct translate_cache *translate_cache;
/* The vertex buffer slot containing the translated buffer. */
- unsigned vb_slot;
- /* Saved and new vertex element state. */
- void *saved_velems, *new_velems;
+ unsigned vb_slot;
+ void *new_velems;
};
#define R600_CONSTANT_ARRAY_SIZE 256
#define R600_RESOURCE_ARRAY_SIZE 160
+struct r600_upload;
+
struct r600_pipe_context {
struct pipe_context context;
struct blitter_context *blitter;
- struct pipe_framebuffer_state *pframebuffer;
unsigned family;
void *custom_dsa_flush;
struct r600_screen *screen;
@@ -140,6 +152,7 @@ struct r600_pipe_context {
struct pipe_stencil_ref stencil_ref;
struct pipe_viewport_state viewport;
struct pipe_clip_state clip;
+ unsigned nvs_resource;
struct r600_pipe_state *vs_resource;
struct r600_pipe_state *ps_resource;
struct r600_pipe_state config;
@@ -151,14 +164,12 @@ struct r600_pipe_context {
/* shader information */
unsigned sprite_coord_enable;
bool flatshade;
- struct u_upload_mgr *upload_vb;
- struct u_upload_mgr *upload_ib;
+ struct r600_upload *rupload_vb;
unsigned any_user_vbs;
- struct r600_textures_info ps_samplers;
-
- unsigned vb_max_index;
- struct r600_translate_context tran;
-
+ struct r600_textures_info ps_samplers;
+ unsigned vb_max_index;
+ struct r600_translate_context tran;
+ struct r600_upload *rupload_const;
};
struct r600_drawl {
@@ -181,10 +192,12 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx);
+void evergreen_polygon_offset_update(struct r600_pipe_context *rctx);
+void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx);
/* r600_blit.c */
void r600_init_blit_functions(struct r600_pipe_context *rctx);
-int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture);
+void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture);
/* r600_buffer.c */
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
@@ -194,7 +207,7 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
unsigned bind);
unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context,
struct pipe_resource *buf,
- unsigned face, unsigned level);
+ unsigned level, int layer);
struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
struct winsys_handle *whandle);
int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw);
@@ -207,7 +220,7 @@ void r600_init_query_functions(struct r600_pipe_context *rctx);
void r600_init_context_resource_functions(struct r600_pipe_context *r600);
/* r600_shader.c */
-int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader);
int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens);
void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader);
int r600_find_vs_semantic_index(struct r600_shader *vs,
@@ -218,14 +231,20 @@ void r600_init_state_functions(struct r600_pipe_context *rctx);
void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info);
void r600_init_config(struct r600_pipe_context *rctx);
void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx);
+void r600_polygon_offset_update(struct r600_pipe_context *rctx);
+void r600_vertex_buffer_update(struct r600_pipe_context *rctx);
+
/* r600_helper.h */
int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
/* r600_texture.c */
void r600_init_screen_texture_functions(struct pipe_screen *screen);
+void r600_init_surface_functions(struct r600_pipe_context *r600);
uint32_t r600_translate_texformat(enum pipe_format format,
const unsigned char *swizzle_view,
uint32_t *word4_p, uint32_t *yuv_format_p);
+unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
+ unsigned level, unsigned layer);
/* r600_translate.c */
void r600_begin_vertex_translate(struct r600_pipe_context *rctx);
@@ -252,13 +271,13 @@ void r600_sampler_view_destroy(struct pipe_context *ctx,
void r600_bind_state(struct pipe_context *ctx, void *state);
void r600_delete_state(struct pipe_context *ctx, void *state);
void r600_bind_vertex_elements(struct pipe_context *ctx, void *state);
-
void *r600_create_shader_state(struct pipe_context *ctx,
const struct pipe_shader_state *state);
void r600_bind_ps_shader(struct pipe_context *ctx, void *state);
void r600_bind_vs_shader(struct pipe_context *ctx, void *state);
void r600_delete_ps_shader(struct pipe_context *ctx, void *state);
void r600_delete_vs_shader(struct pipe_context *ctx, void *state);
+
/*
* common helpers
*/
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index 7a2d1f4412..28b3e1e5e4 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -46,6 +46,7 @@ struct r600_resource {
struct u_resource base;
struct r600_bo *bo;
u32 size;
+ unsigned bo_size;
};
struct r600_resource_texture {
@@ -61,7 +62,21 @@ struct r600_resource_texture {
unsigned tile_type;
unsigned depth;
unsigned dirty;
- struct r600_resource_texture *flushed_depth_texture;
+ struct r600_resource_texture *flushed_depth_texture;
+};
+
+#define R600_BUFFER_MAGIC 0xabcd1600
+
+struct r600_resource_buffer {
+ struct r600_resource r;
+ uint32_t magic;
+ void *user_buffer;
+ bool uploaded;
+};
+
+struct r600_surface {
+ struct pipe_surface base;
+ unsigned aligned_height;
};
void r600_init_screen_resource_functions(struct pipe_screen *screen);
@@ -73,46 +88,29 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
const struct pipe_resource *base,
struct winsys_handle *whandle);
-#define R600_BUFFER_MAGIC 0xabcd1600
-#define R600_BUFFER_MAX_RANGES 32
-
-struct r600_buffer_range {
- uint32_t start;
- uint32_t end;
-};
-
-struct r600_resource_buffer {
- struct r600_resource r;
- uint32_t magic;
- void *user_buffer;
- struct r600_buffer_range ranges[R600_BUFFER_MAX_RANGES];
- unsigned num_ranges;
-};
-
/* r600_buffer */
static INLINE struct r600_resource_buffer *r600_buffer(struct pipe_resource *buffer)
{
if (buffer) {
assert(((struct r600_resource_buffer *)buffer)->magic == R600_BUFFER_MAGIC);
return (struct r600_resource_buffer *)buffer;
- }
- return NULL;
+ }
+ return NULL;
}
static INLINE boolean r600_buffer_is_user_buffer(struct pipe_resource *buffer)
{
- return r600_buffer(buffer)->user_buffer ? TRUE : FALSE;
+ if (r600_buffer(buffer)->uploaded)
+ return FALSE;
+ return r600_buffer(buffer)->user_buffer ? TRUE : FALSE;
}
-int r600_texture_depth_flush(struct pipe_context *ctx,
- struct pipe_resource *texture);
-
-extern int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture);
+int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture);
/* r600_texture.c texture transfer functions. */
struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
struct pipe_resource *texture,
- struct pipe_subresource sr,
+ unsigned level,
unsigned usage,
const struct pipe_box *box);
void r600_texture_transfer_destroy(struct pipe_context *ctx,
@@ -122,9 +120,16 @@ void* r600_texture_transfer_map(struct pipe_context *ctx,
void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer* transfer);
-struct r600_surface {
- struct pipe_surface base;
- unsigned aligned_height;
-};
-
+struct r600_pipe_context;
+struct r600_upload *r600_upload_create(struct r600_pipe_context *rctx,
+ unsigned default_size,
+ unsigned alignment);
+void r600_upload_flush(struct r600_upload *upload);
+void r600_upload_destroy(struct r600_upload *upload);
+int r600_upload_buffer(struct r600_upload *upload, unsigned offset,
+ unsigned size, struct r600_resource_buffer *in_buffer,
+ unsigned *out_offset, unsigned *out_size,
+ struct r600_bo **out_buffer);
+
+int r600_upload_const_buffer(struct r600_pipe_context *rctx, struct pipe_resource *cbuffer, uint32_t *offset);
#endif
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 77b180984d..e85e829bad 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -44,6 +44,9 @@ static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shade
rstate->nregs = 0;
/* so far never got proper semantic id from tgsi */
+ /* FIXME better to move this in config things so they get emited
+ * only one time per cs
+ */
for (i = 0; i < 10; i++) {
spi_vs_out_id[i] = 0;
}
@@ -67,20 +70,11 @@ static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shade
S_028868_STACK_SIZE(rshader->bc.nstack),
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
- R_0288A4_SQ_PGM_RESOURCES_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
R_0288D0_SQ_PGM_CF_OFFSET_VS,
0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
- R_0288DC_SQ_PGM_CF_OFFSET_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
R_028858_SQ_PGM_START_VS,
r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
- r600_pipe_state_add_reg(rstate,
- R_028894_SQ_PGM_START_FS,
- r600_bo_offset(shader->bo_fetch) >> 8, 0xFFFFFFFF, shader->bo_fetch);
r600_pipe_state_add_reg(rstate,
R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
@@ -104,37 +98,20 @@ int r600_find_vs_semantic_index(struct r600_shader *vs,
static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_state *rstate = &shader->rstate;
struct r600_shader *rshader = &shader->shader;
- unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
+ unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
int pos_index = -1, face_index = -1;
- /* clear previous register */
rstate->nregs = 0;
for (i = 0; i < rshader->ninput; i++) {
- tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
- if (rshader->input[i].centroid)
- tmp |= S_028644_SEL_CENTROID(1);
- if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
- tmp |= S_028644_SEL_LINEAR(1);
-
if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
pos_index = i;
- if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
- tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
- }
if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
face_index = i;
- if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
- rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
- tmp |= S_028644_PT_SPRITE_TEX(1);
- }
- r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
}
+
for (i = 0; i < rshader->noutput; i++) {
if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
r600_pipe_state_add_reg(rstate,
@@ -210,22 +187,13 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade
0xFFFFFFFF, NULL);
}
-static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_shader *rshader = &shader->shader;
void *ptr;
/* copy new shader */
- if (rshader->processor_type == TGSI_PROCESSOR_VERTEX && shader->bo_fetch == NULL) {
- shader->bo_fetch = r600_bo(rctx->radeon, rshader->bc_fetch.ndw * 4, 4096, 0, 0);
- if (shader->bo_fetch == NULL) {
- return -ENOMEM;
- }
- ptr = r600_bo_map(rctx->radeon, shader->bo_fetch, 0, NULL);
- memcpy(ptr, rshader->bc_fetch.bytecode, rshader->bc_fetch.ndw * 4);
- r600_bo_unmap(rctx->radeon, shader->bo_fetch);
- }
if (shader->bo == NULL) {
shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
if (shader->bo == NULL) {
@@ -236,7 +204,6 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s
r600_bo_unmap(rctx->radeon, shader->bo);
}
/* build state */
- rshader->flat_shade = rctx->flatshade;
switch (rshader->processor_type) {
case TGSI_PROCESSOR_VERTEX:
if (rshader->family >= CHIP_CEDAR) {
@@ -255,116 +222,51 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s
default:
return -EINVAL;
}
- r600_context_pipe_state_set(&rctx->ctx, &shader->rstate);
return 0;
}
-static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_shader *shader = &rshader->shader;
- const struct util_format_description *desc;
- enum pipe_format resource_format[160];
- unsigned i, nresources = 0;
- struct r600_bc *bc = &shader->bc_fetch;
- struct r600_bc_cf *cf;
- struct r600_bc_vtx *vtx;
-
- if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
- return 0;
- /* doing a full memcmp fell over the refcount */
- if ((rshader->vertex_elements.count == rctx->vertex_elements->count) &&
- (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements,
- rctx->vertex_elements->count * sizeof(struct pipe_vertex_element)))) {
- return 0;
- }
- rshader->vertex_elements = *rctx->vertex_elements;
- for (i = 0; i < rctx->vertex_elements->count; i++) {
- resource_format[nresources++] = rctx->vertex_elements->hw_format[i];
- }
- r600_bo_reference(rctx->radeon, &rshader->bo_fetch, NULL);
- LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
- switch (cf->inst) {
- case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
- case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
- LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
- desc = util_format_description(resource_format[vtx->buffer_id]);
- if (desc == NULL) {
- R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
- return -EINVAL;
- }
- vtx->dst_sel_x = desc->swizzle[0];
- vtx->dst_sel_y = desc->swizzle[1];
- vtx->dst_sel_z = desc->swizzle[2];
- vtx->dst_sel_w = desc->swizzle[3];
- }
- break;
- default:
- break;
- }
- }
- return r600_bc_build(&shader->bc_fetch);
-}
-
-int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- int r;
-
- if (shader == NULL)
- return -EINVAL;
- /* there should be enough input */
- if (rctx->vertex_elements->count < shader->shader.bc.nresource) {
- R600_ERR("%d resources provided, expecting %d\n",
- rctx->vertex_elements->count, shader->shader.bc.nresource);
- return -EINVAL;
- }
- r = r600_shader_update(ctx, shader);
- if (r)
- return r;
- return r600_pipe_shader(ctx, shader);
-}
-
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
+int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);
int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
{
+ static int dump_shaders = -1;
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ u32 *literals;
int r;
-//fprintf(stderr, "--------------------------------------------------------------\n");
-//tgsi_dump(tokens, 0);
+ /* Would like some magic "get_bool_option_once" routine.
+ */
+ if (dump_shaders == -1)
+ dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
+
+ if (dump_shaders) {
+ fprintf(stderr, "--------------------------------------------------------------\n");
+ tgsi_dump(tokens, 0);
+ }
shader->shader.family = r600_get_family(rctx->radeon);
- r = r600_shader_from_tgsi(tokens, &shader->shader);
+ r = r600_shader_from_tgsi(tokens, &shader->shader, &literals);
if (r) {
R600_ERR("translation from TGSI failed !\n");
return r;
}
r = r600_bc_build(&shader->shader.bc);
+ free(literals);
if (r) {
R600_ERR("building bytecode failed !\n");
return r;
}
- if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) {
- r = r600_bc_build(&shader->shader.bc_fetch);
- if (r) {
- R600_ERR("building bytecode failed !\n");
- return r;
- }
+ if (dump_shaders) {
+ r600_bc_dump(&shader->shader.bc);
+ fprintf(stderr, "______________________________________________________________\n");
}
-//fprintf(stderr, "______________________________________________________________\n");
- return 0;
+ return r600_pipe_shader(ctx, shader);
}
-void
-r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
r600_bo_reference(rctx->radeon, &shader->bo, NULL);
-
r600_bc_clear(&shader->shader.bc);
-
- /* FIXME: is there more stuff to free? */
}
/*
@@ -381,9 +283,7 @@ struct r600_shader_ctx {
unsigned temp_reg;
struct r600_shader_tgsi_instruction *inst_info;
struct r600_bc *bc;
- struct r600_bc *bc_fetch;
struct r600_shader *shader;
- u32 value[4];
u32 *literals;
u32 nliterals;
u32 max_driver_temp_used;
@@ -501,9 +401,7 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
static int tgsi_declaration(struct r600_shader_ctx *ctx)
{
struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
- struct r600_bc_vtx vtx;
unsigned i;
- int r;
switch (d->Declaration.File) {
case TGSI_FILE_INPUT:
@@ -513,26 +411,6 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
ctx->shader->input[i].centroid = d->Declaration.Centroid;
ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
- if (ctx->type == TGSI_PROCESSOR_VERTEX) {
- /* turn input into fetch */
- memset(&vtx, 0, sizeof(struct r600_bc_vtx));
- vtx.inst = 0;
- vtx.fetch_type = 0;
- vtx.buffer_id = i;
- /* register containing the index into the buffer */
- vtx.src_gpr = 0;
- vtx.src_sel_x = 0;
- vtx.mega_fetch_count = 0x1F;
- vtx.dst_gpr = ctx->shader->input[i].gpr;
- vtx.dst_sel_x = 0;
- vtx.dst_sel_y = 1;
- vtx.dst_sel_z = 2;
- vtx.dst_sel_w = 3;
- vtx.use_const_fields = 1;
- r = r600_bc_add_vtx(ctx->bc_fetch, &vtx);
- if (r)
- return r;
- }
if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
/* turn input into interpolate on EG */
if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
@@ -614,7 +492,7 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
return ctx->num_interp_gpr;
}
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
+int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals)
{
struct tgsi_full_immediate *immediate;
struct r600_shader_ctx ctx;
@@ -624,7 +502,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
int i, r = 0, pos0;
ctx.bc = &shader->bc;
- ctx.bc_fetch = &shader->bc_fetch;
ctx.shader = shader;
r = r600_bc_init(ctx.bc, shader->family);
if (r)
@@ -634,19 +511,15 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
tgsi_parse_init(&ctx.parse, tokens);
ctx.type = ctx.parse.FullHeader.Processor.Processor;
shader->processor_type = ctx.type;
- if (shader->processor_type == TGSI_PROCESSOR_VERTEX) {
- r = r600_bc_init(ctx.bc_fetch, shader->family);
- if (r)
- return r;
- ctx.bc_fetch->type = -1;
- }
ctx.bc->type = shader->processor_type;
/* register allocations */
/* Values [0,127] correspond to GPR[0..127].
* Values [128,159] correspond to constant buffer bank 0
* Values [160,191] correspond to constant buffer bank 1
- * Values [256,511] correspond to cfile constants c[0..255].
+ * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
+ * Values [256,287] correspond to constant buffer bank 2 (EG)
+ * Values [288,319] correspond to constant buffer bank 3 (EG)
* Other special values are shown in the list below.
* 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
* 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
@@ -680,9 +553,11 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
ctx.info.file_count[TGSI_FILE_OUTPUT];
- ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
+ /* Outside the GPR range. This will be translated to one of the
+ * kcache banks later. */
+ ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
- ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
+ ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_count[TGSI_FILE_TEMPORARY];
@@ -725,9 +600,8 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
r = ctx.inst_info->process(&ctx);
if (r)
goto out_err;
- r = r600_bc_add_literal(ctx.bc, ctx.value);
- if (r)
- goto out_err;
+ break;
+ case TGSI_TOKEN_TYPE_PROPERTY:
break;
default:
R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
@@ -840,21 +714,13 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
}
}
- /* add return to fetch shader */
- if (ctx.type == TGSI_PROCESSOR_VERTEX) {
- if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
- r600_bc_add_cfinst(ctx.bc_fetch, EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
- } else {
- r600_bc_add_cfinst(ctx.bc_fetch, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
- }
- }
/* add output to bytecode */
for (i = 0; i < noutput; i++) {
r = r600_bc_add_output(ctx.bc, &output[i]);
if (r)
goto out_err;
}
- free(ctx.literals);
+ *literals = ctx.literals;
tgsi_parse_free(&ctx.parse);
return 0;
out_err:
@@ -878,22 +744,29 @@ static int tgsi_src(struct r600_shader_ctx *ctx,
const struct tgsi_full_src_register *tgsi_src,
struct r600_bc_alu_src *r600_src)
{
- int index;
memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
- r600_src->sel = tgsi_src->Register.Index;
- if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
- r600_src->sel = 0;
- index = tgsi_src->Register.Index;
- ctx->value[0] = ctx->literals[index * 4 + 0];
- ctx->value[1] = ctx->literals[index * 4 + 1];
- ctx->value[2] = ctx->literals[index * 4 + 2];
- ctx->value[3] = ctx->literals[index * 4 + 3];
- }
- if (tgsi_src->Register.Indirect)
- r600_src->rel = V_SQ_REL_RELATIVE;
r600_src->neg = tgsi_src->Register.Negate;
r600_src->abs = tgsi_src->Register.Absolute;
- r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
+ if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
+ int index;
+ if((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
+ (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
+ (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
+
+ index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
+ r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
+ if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
+ return 0;
+ }
+ index = tgsi_src->Register.Index;
+ r600_src->sel = V_SQ_ALU_SRC_LITERAL;
+ r600_src->value = ctx->literals + index * 4;
+ } else {
+ if (tgsi_src->Register.Indirect)
+ r600_src->rel = V_SQ_REL_RELATIVE;
+ r600_src->sel = tgsi_src->Register.Index;
+ r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
+ }
return 0;
}
@@ -981,18 +854,19 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_
int i, j, k, nliteral, r;
for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
- if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
+ if (r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) {
nliteral++;
}
}
for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
- if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
+ if (j > 0 && r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) {
int treg = r600_get_temp(ctx);
for (k = 0; k < 4; k++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = r600_src[i].sel;
alu.src[0].chan = k;
+ alu.src[0].value = r600_src[i].value;
alu.dst.sel = treg;
alu.dst.chan = k;
alu.dst.write = 1;
@@ -1002,9 +876,6 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
- if (r)
- return r;
r600_src[i].sel = treg;
j--;
}
@@ -1012,19 +883,25 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_
return 0;
}
-static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
+static int tgsi_last_instruction(unsigned writemask)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
- struct r600_bc_alu alu;
- int i, j, r;
- int lasti = 0;
+ int i, lasti = 0;
for (i = 0; i < 4; i++) {
- if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+ if (writemask & (1 << i)) {
lasti = i;
}
}
+ return lasti;
+}
+
+static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu_src r600_src[3];
+ struct r600_bc_alu alu;
+ int i, j, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
r = tgsi_split_constant(ctx, r600_src);
if (r)
@@ -1093,12 +970,14 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
struct r600_bc_alu_src r600_src[3])
{
+ static float half_inv_pi = 1.0 /(3.1415926535 * 2);
+ static float double_pi = 3.1415926535 * 2;
+ static float neg_pi = -3.1415926535;
+
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
int r;
- uint32_t lit_vals[4];
struct r600_bc_alu alu;
- memset(lit_vals, 0, 4*4);
r = tgsi_split_constant(ctx, r600_src);
if (r)
return r;
@@ -1106,9 +985,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
if (r)
return r;
- lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
- lit_vals[1] = fui(0.5f);
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
@@ -1122,15 +998,13 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].chan = 0;
- alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = (uint32_t *)&half_inv_pi;
+ alu.src[2].sel = V_SQ_ALU_SRC_0_5;
alu.src[2].chan = 1;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, lit_vals);
- if (r)
- return r;
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
@@ -1146,14 +1020,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
if (r)
return r;
- if (ctx->bc->chiprev == CHIPREV_R600) {
- lit_vals[0] = fui(3.1415926535897f * 2.0f);
- lit_vals[1] = fui(-3.1415926535897f);
- } else {
- lit_vals[0] = fui(1.0f);
- lit_vals[1] = fui(-0.5f);
- }
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
@@ -1169,13 +1035,20 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
alu.src[1].chan = 0;
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[2].chan = 1;
+
+ if (ctx->bc->chiprev == CHIPREV_R600) {
+ alu.src[1].value = (uint32_t *)&double_pi;
+ alu.src[2].value = (uint32_t *)&neg_pi;
+ } else {
+ alu.src[1].sel = V_SQ_ALU_SRC_1;
+ alu.src[2].sel = V_SQ_ALU_SRC_0_5;
+ alu.src[2].neg = 1;
+ }
+
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, lit_vals);
- if (r)
- return r;
return 0;
}
@@ -1185,7 +1058,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, r;
- int lasti = 0;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
r = tgsi_setup_trig(ctx, r600_src);
if (r)
@@ -1205,10 +1078,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
return r;
/* replicate result */
- for (i = 0; i < 4; i++) {
- if (inst->Dst[0].Register.WriteMask & (1 << i))
- lasti = i;
- }
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
@@ -1295,10 +1164,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* dst.w = 1.0; */
@@ -1319,10 +1184,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
return 0;
@@ -1358,9 +1219,6 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* kill must be last in ALU */
ctx->bc->force_add_cf = 1;
@@ -1423,10 +1281,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
if (inst->Dst[0].Register.WriteMask & (1 << 2))
{
int chan;
@@ -1445,10 +1299,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
chan = alu.dst.chan;
sel = alu.dst.sel;
@@ -1471,9 +1321,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* dst.z = exp(tmp.x) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
@@ -1517,9 +1364,6 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* replicate result */
return tgsi_helper_tempx_replicate(ctx);
}
@@ -1568,9 +1412,6 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* replicate result */
return tgsi_helper_tempx_replicate(ctx);
}
@@ -1594,12 +1435,9 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc,ctx->value);
- if (r)
- return r;
/* b * LOG2(a) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
if (r)
return r;
@@ -1611,9 +1449,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc,ctx->value);
- if (r)
- return r;
/* POW(a,b) = EXP2(b * LOG2(a))*/
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
@@ -1624,9 +1459,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc,ctx->value);
- if (r)
- return r;
return tgsi_helper_tempx_replicate(ctx);
}
@@ -1666,9 +1498,6 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* dst = (-tmp > 0 ? -1 : tmp) */
for (i = 0; i < 4; i++) {
@@ -1703,9 +1532,6 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
struct r600_bc_alu alu;
int i, r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
@@ -1735,6 +1561,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
r = tgsi_split_constant(ctx, r600_src);
if (r)
@@ -1742,26 +1569,32 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
r = tgsi_split_literal_constant(ctx, r600_src);
if (r)
return r;
- /* do it in 2 step as op3 doesn't support writemask */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
alu.src[j] = r600_src[j];
alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
}
- alu.dst.sel = ctx->temp_reg;
+
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- return tgsi_helper_copy(ctx, inst);
+ return 0;
}
static int tgsi_dp(struct r600_shader_ctx *ctx)
@@ -1784,9 +1617,13 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
alu.src[j] = r600_src[j];
alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
}
- alu.dst.sel = ctx->temp_reg;
+
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+
alu.dst.chan = i;
- alu.dst.write = 1;
+ alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
/* handle some special cases */
switch (ctx->inst_info->tgsi_opcode) {
case TGSI_OPCODE_DP2:
@@ -1818,19 +1655,21 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- return tgsi_helper_copy(ctx, inst);
+ return 0;
}
static int tgsi_tex(struct r600_shader_ctx *ctx)
{
+ static float one_point_five = 1.5f;
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_tex tex;
struct r600_bc_alu alu;
unsigned src_gpr;
int r, i;
int opcode;
- boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
- uint32_t lit_vals[4];
+ boolean src_not_temp =
+ inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
+ inst->Src[0].Register.File != TGSI_FILE_INPUT;
src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
@@ -1959,6 +1798,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[2].chan = 0;
+ alu.src[2].value = (u32*)&one_point_five;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
@@ -1979,6 +1819,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[2].chan = 0;
+ alu.src[2].value = (u32*)&one_point_five;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 1;
@@ -1989,11 +1830,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
if (r)
return r;
- lit_vals[0] = fui(1.5f);
-
- r = r600_bc_add_literal(ctx->bc, lit_vals);
- if (r)
- return r;
src_not_temp = FALSE;
src_gpr = ctx->temp_reg;
}
@@ -2066,6 +1902,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
unsigned i;
int r;
@@ -2075,8 +1912,40 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
r = tgsi_split_literal_constant(ctx, r600_src);
if (r)
return r;
+
+ /* optimize if it's just an equal balance */
+ if(r600_src[0].sel == V_SQ_ALU_SRC_0_5) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
+ alu.src[0] = r600_src[1];
+ alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
+ alu.src[1] = r600_src[2];
+ alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
+ alu.omod = 3;
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+
+ alu.dst.chan = i;
+ if (i == lasti) {
+ alu.last = 1;
+ }
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+ }
+
/* 1 - src0 */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
alu.src[0].sel = V_SQ_ALU_SRC_1;
@@ -2086,7 +1955,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
alu.src[1].neg = 1;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
alu.dst.write = 1;
@@ -2094,12 +1963,12 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* (1 - src0) * src2 */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
alu.src[0].sel = ctx->temp_reg;
@@ -2108,7 +1977,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
alu.dst.write = 1;
@@ -2116,12 +1985,12 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* src0 * src1 + (1 - src0) * src2 */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
@@ -2131,16 +2000,20 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
alu.src[2].sel = ctx->temp_reg;
alu.src[2].chan = i;
- alu.dst.sel = ctx->temp_reg;
+
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+
alu.dst.chan = i;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- return tgsi_helper_copy(ctx, inst);
+ return 0;
}
static int tgsi_cmp(struct r600_shader_ctx *ctx)
@@ -2148,8 +2021,8 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
- int use_temp = 0;
int i, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
r = tgsi_split_constant(ctx, r600_src);
if (r)
@@ -2158,10 +2031,10 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
if (r)
return r;
- if (inst->Dst[0].Register.WriteMask != 0xf)
- use_temp = 1;
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
- for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
alu.src[0] = r600_src[0];
@@ -2173,24 +2046,19 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
alu.src[2] = r600_src[1];
alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
- if (use_temp)
- alu.dst.sel = ctx->temp_reg;
- else {
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
- }
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
- if (i == 3)
+ if (i == lasti)
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- if (use_temp)
- return tgsi_helper_copy(ctx, inst);
return 0;
}
@@ -2257,10 +2125,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
for (i = 0; i < 4; i++) {
@@ -2318,10 +2182,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
if (use_temp)
return tgsi_helper_copy(ctx, inst);
@@ -2354,10 +2214,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
@@ -2369,10 +2225,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.y = tmp - floor(tmp); */
@@ -2398,9 +2250,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.z = RoughApprox2ToX(tmp);*/
@@ -2421,9 +2270,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.w = 1.0;*/
@@ -2441,9 +2287,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
return tgsi_helper_copy(ctx, inst);
}
@@ -2473,10 +2316,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
@@ -2489,10 +2328,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.y = src.x / (2 ^ floor(log2(src.x))); */
@@ -2515,10 +2350,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
@@ -2534,10 +2365,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
@@ -2553,10 +2380,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
@@ -2572,10 +2395,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
@@ -2597,10 +2416,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.z = log2(src);*/
@@ -2622,10 +2437,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.w = 1.0; */
@@ -2644,10 +2455,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
return tgsi_helper_copy(ctx, inst);
@@ -2802,8 +2609,25 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
static int pops(struct r600_shader_ctx *ctx, int pops)
{
- r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
- ctx->bc->cf_last->pop_count = pops;
+ int alu_pop = 3;
+ if (ctx->bc->cf_last) {
+ if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
+ alu_pop = 0;
+ else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
+ alu_pop = 1;
+ }
+ alu_pop += pops;
+ if (alu_pop == 1) {
+ ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
+ ctx->bc->force_add_cf = 1;
+ } else if (alu_pop == 2) {
+ ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
+ ctx->bc->force_add_cf = 1;
+ } else {
+ r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
+ ctx->bc->cf_last->pop_count = pops;
+ ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
+ }
return 0;
}
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index cd108da491..935dd6fe3a 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -38,7 +38,6 @@ struct r600_shader_io {
struct r600_shader {
unsigned processor_type;
struct r600_bc bc;
- boolean flat_shade;
unsigned ninput;
unsigned noutput;
unsigned nlds;
@@ -46,9 +45,8 @@ struct r600_shader {
struct r600_shader_io output[32];
enum radeon_family family;
boolean uses_kill;
- struct r600_bc bc_fetch;
};
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
+int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);
#endif
diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h
index 0573e63dc8..56ed35e8b3 100644
--- a/src/gallium/drivers/r600/r600_sq.h
+++ b/src/gallium/drivers/r600/r600_sq.h
@@ -74,6 +74,10 @@
#define S_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) & 0x3) << 30)
#define G_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) >> 30) & 0x3)
#define C_SQ_CF_ALU_WORD0_KCACHE_MODE0 0x3FFFFFFF
+#define V_SQ_CF_KCACHE_NOP 0x00000000
+#define V_SQ_CF_KCACHE_LOCK_1 0x00000001
+#define V_SQ_CF_KCACHE_LOCK_2 0x00000002
+#define V_SQ_CF_KCACHE_LOCK_LOOP_INDEX 0x00000003
#define P_SQ_CF_ALU_WORD1
#define S_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) & 0x3) << 0)
#define G_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) >> 0) & 0x3)
@@ -187,6 +191,8 @@
#define V_SQ_ALU_SRC_M_1_INT 0x000000FB
#define V_SQ_ALU_SRC_0_5 0x000000FC
#define V_SQ_ALU_SRC_LITERAL 0x000000FD
+#define V_SQ_ALU_SRC_PV 0x000000FE
+#define V_SQ_ALU_SRC_PS 0x000000FF
#define V_SQ_ALU_SRC_PARAM_BASE 0x000001C0
#define S_SQ_ALU_WORD0_SRC0_REL(x) (((x) & 0x1) << 9)
#define G_SQ_ALU_WORD0_SRC0_REL(x) (((x) >> 9) & 0x1)
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index bf4ca057d2..96b02d72b9 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -36,7 +36,6 @@
#include <util/u_pack_color.h>
#include <util/u_memory.h>
#include <util/u_inlines.h>
-#include <util/u_upload_mgr.h>
#include <util/u_framebuffer.h>
#include <pipebuffer/pb_buffer.h>
#include "r600.h"
@@ -46,14 +45,164 @@
#include "r600_pipe.h"
#include "r600_state_inlines.h"
+void r600_polygon_offset_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_state state;
+
+ state.id = R600_PIPE_STATE_POLYGON_OFFSET;
+ state.nregs = 0;
+ if (rctx->rasterizer && rctx->framebuffer.zsbuf) {
+ float offset_units = rctx->rasterizer->offset_units;
+ unsigned offset_db_fmt_cntl = 0, depth;
+
+ switch (rctx->framebuffer.zsbuf->texture->format) {
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ depth = -24;
+ offset_units *= 2.0f;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ depth = -23;
+ offset_units *= 1.0f;
+ offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
+ depth = -16;
+ offset_units *= 4.0f;
+ break;
+ default:
+ return;
+ }
+ /* FIXME some of those reg can be computed with cso */
+ offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
+ r600_pipe_state_add_reg(&state,
+ R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE,
+ fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&state,
+ R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET,
+ fui(offset_units), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&state,
+ R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE,
+ fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&state,
+ R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET,
+ fui(offset_units), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&state,
+ R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+ offset_db_fmt_cntl, 0xFFFFFFFF, NULL);
+ r600_context_pipe_state_set(&rctx->ctx, &state);
+ }
+}
+
+/* FIXME optimize away spi update when it's not needed */
+static void r600_spi_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_shader *shader = rctx->ps_shader;
+ struct r600_pipe_state rstate;
+ struct r600_shader *rshader = &shader->shader;
+ unsigned i, tmp;
+
+ rstate.nregs = 0;
+ for (i = 0; i < rshader->ninput; i++) {
+ tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
+ if (rshader->input[i].centroid)
+ tmp |= S_028644_SEL_CENTROID(1);
+ if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
+ tmp |= S_028644_SEL_LINEAR(1);
+
+ if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
+ tmp |= S_028644_FLAT_SHADE(rctx->flatshade);
+ }
+ if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
+ rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
+ tmp |= S_028644_PT_SPRITE_TEX(1);
+ }
+ r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
+ }
+ r600_context_pipe_state_set(&rctx->ctx, &rstate);
+}
+
+void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_state *rstate;
+ struct r600_resource *rbuffer;
+ struct pipe_vertex_buffer *vertex_buffer;
+ unsigned i, offset;
+
+ /* we don't update until we know vertex elements */
+ if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer)
+ return;
+
+ if (rctx->vertex_elements->incompatible_layout) {
+ /* translate rebind new vertex elements so
+ * return once translated
+ */
+ r600_begin_vertex_translate(rctx);
+ return;
+ }
+
+ if (rctx->any_user_vbs) {
+ r600_upload_user_buffers(rctx);
+ rctx->any_user_vbs = FALSE;
+ }
+
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ rctx->nvs_resource = rctx->vertex_elements->count;
+ } else {
+ /* bind vertex buffer once */
+ rctx->nvs_resource = rctx->nvertex_buffer;
+ }
+
+ for (i = 0 ; i < rctx->nvs_resource; i++) {
+ rstate = &rctx->vs_resource[i];
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
+
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ unsigned vbuffer_index;
+ vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
+ vertex_buffer = &rctx->vertex_buffer[vbuffer_index];
+ rbuffer = (struct r600_resource*)vertex_buffer->buffer;
+ offset = rctx->vertex_elements->vbuffer_offset[i];
+ } else {
+ /* bind vertex buffer once */
+ vertex_buffer = &rctx->vertex_buffer[i];
+ rbuffer = (struct r600_resource*)vertex_buffer->buffer;
+ offset = 0;
+ }
+ if (vertex_buffer == NULL || rbuffer == NULL)
+ continue;
+ offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo);
+
+ r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0,
+ offset, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
+ rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
+ S_038008_STRIDE(vertex_buffer->stride),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
+ 0xC0000000, 0xFFFFFFFF, NULL);
+ r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
+ }
+}
+
static void r600_draw_common(struct r600_drawl *draw)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx;
- struct r600_pipe_state *rstate;
struct r600_resource *rbuffer;
- unsigned i, j, offset, prim;
+ unsigned prim;
u32 vgt_dma_index_type, vgt_draw_initiator, mask;
- struct pipe_vertex_buffer *vertex_buffer;
struct r600_draw rdraw;
struct r600_pipe_state vgt;
@@ -76,42 +225,23 @@ static void r600_draw_common(struct r600_drawl *draw)
}
if (r600_conv_pipe_prim(draw->mode, &prim))
return;
-
-
- /* rebuild vertex shader if input format changed */
- if (r600_pipe_shader_update(&rctx->context, rctx->vs_shader))
+ if (unlikely(rctx->ps_shader == NULL)) {
+ R600_ERR("missing vertex shader\n");
return;
- if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader))
+ }
+ if (unlikely(rctx->vs_shader == NULL)) {
+ R600_ERR("missing vertex shader\n");
+ return;
+ }
+ /* there should be enough input */
+ if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) {
+ R600_ERR("%d resources provided, expecting %d\n",
+ rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource);
return;
-
- for (i = 0 ; i < rctx->vertex_elements->count; i++) {
- uint32_t word2, format;
-
- rstate = &rctx->vs_resource[i];
- rstate->id = R600_PIPE_STATE_RESOURCE;
- rstate->nregs = 0;
-
- j = rctx->vertex_elements->elements[i].vertex_buffer_index;
- vertex_buffer = &rctx->vertex_buffer[j];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = rctx->vertex_elements->elements[i].src_offset +
- vertex_buffer->buffer_offset +
- r600_bo_offset(rbuffer->bo);
-
- format = r600_translate_vertex_data_type(rctx->vertex_elements->hw_format[i]);
-
- word2 = format | S_038008_STRIDE(vertex_buffer->stride);
-
- r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, word2, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, 0xC0000000, 0xFFFFFFFF, NULL);
- r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
}
+ r600_spi_update(rctx);
+
mask = 0;
for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
mask |= (0xF << (i * 4));
@@ -126,46 +256,6 @@ static void r600_draw_common(struct r600_drawl *draw)
r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL);
- /* build late state */
- if (rctx->rasterizer && rctx->framebuffer.zsbuf) {
- float offset_units = rctx->rasterizer->offset_units;
- unsigned offset_db_fmt_cntl = 0, depth;
-
- switch (rctx->framebuffer.zsbuf->texture->format) {
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- depth = -24;
- offset_units *= 2.0f;
- break;
- case PIPE_FORMAT_Z32_FLOAT:
- depth = -23;
- offset_units *= 1.0f;
- offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
- break;
- case PIPE_FORMAT_Z16_UNORM:
- depth = -16;
- offset_units *= 4.0f;
- break;
- default:
- return;
- }
- offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
- r600_pipe_state_add_reg(&vgt,
- R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE,
- fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt,
- R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET,
- fui(offset_units), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt,
- R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE,
- fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt,
- R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET,
- fui(offset_units), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt,
- R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
- offset_db_fmt_cntl, 0xFFFFFFFF, NULL);
- }
r600_context_pipe_state_set(&rctx->ctx, &vgt);
rdraw.vgt_num_indices = draw->count;
@@ -185,17 +275,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_drawl draw;
- boolean translate = FALSE;
- if (rctx->vertex_elements->incompatible_layout) {
- r600_begin_vertex_translate(rctx);
- translate = TRUE;
- }
-
- if (rctx->any_user_vbs) {
- r600_upload_user_buffers(rctx);
- rctx->any_user_vbs = FALSE;
- }
memset(&draw, 0, sizeof(struct r600_drawl));
draw.ctx = ctx;
draw.mode = info->mode;
@@ -226,9 +306,6 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
}
r600_draw_common(&draw);
- if (translate)
- r600_end_vertex_translate(rctx);
-
pipe_resource_reference(&draw.index_buffer, NULL);
}
@@ -603,9 +680,9 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) |
S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) |
S_038010_REQUEST_SIZE(1) |
- S_038010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL);
+ S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
- S_038014_LAST_LEVEL(state->last_level) |
+ S_038014_LAST_LEVEL(state->u.tex.last_level) |
S_038014_BASE_ARRAY(0) |
S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
@@ -824,10 +901,11 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
struct r600_resource_texture *rtex;
struct r600_resource *rbuffer;
struct r600_surface *surf;
- unsigned level = state->cbufs[cb]->level;
+ unsigned level = state->cbufs[cb]->u.tex.level;
unsigned pitch, slice;
unsigned color_info;
unsigned format, swap, ntype;
+ unsigned offset;
const struct util_format_description *desc;
struct r600_bo *bo[3];
@@ -838,6 +916,9 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
bo[1] = rbuffer->bo;
bo[2] = rbuffer->bo;
+ /* XXX quite sure for dx10+ hw don't need any offset hacks */
+ offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture,
+ level, state->cbufs[cb]->u.tex.first_layer);
pitch = rtex->pitch_in_pixels[level] / 8 - 1;
slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
ntype = 0;
@@ -857,7 +938,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
r600_pipe_state_add_reg(rstate,
R_028040_CB_COLOR0_BASE + cb * 4,
- (state->cbufs[cb]->offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
+ (offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
r600_pipe_state_add_reg(rstate,
R_0280A0_CB_COLOR0_INFO + cb * 4,
color_info, 0xFFFFFFFF, bo[0]);
@@ -888,11 +969,12 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
struct r600_surface *surf;
unsigned level;
unsigned pitch, slice, format;
+ unsigned offset;
if (state->zsbuf == NULL)
return;
- level = state->zsbuf->level;
+ level = state->zsbuf->u.tex.level;
surf = (struct r600_surface *)state->zsbuf;
rtex = (struct r600_resource_texture*)state->zsbuf->texture;
@@ -902,12 +984,15 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
rtex->depth = 1;
rbuffer = &rtex->resource;
+ /* XXX quite sure for dx10+ hw don't need any offset hacks */
+ offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture,
+ level, state->zsbuf->u.tex.first_layer);
pitch = rtex->pitch_in_pixels[level] / 8 - 1;
slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
format = r600_translate_dbformat(state->zsbuf->texture->format);
r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE,
- (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
r600_pipe_state_add_reg(rstate, R_028000_DB_DEPTH_SIZE,
S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice),
0xFFFFFFFF, NULL);
@@ -934,8 +1019,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
util_copy_framebuffer_state(&rctx->framebuffer, state);
- rctx->pframebuffer = &rctx->framebuffer;
-
/* build states */
for (int i = 0; i < state->nr_cbufs; i++) {
r600_cb(rctx, rstate, state, i);
@@ -1015,6 +1098,10 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]);
rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate;
r600_context_pipe_state_set(&rctx->ctx, rstate);
+
+ if (state->zsbuf) {
+ r600_polygon_offset_update(rctx);
+ }
}
static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
@@ -1022,6 +1109,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_resource *rbuffer = (struct r600_resource*)buffer;
+ uint32_t offset;
/* Note that the state tracker can unbind constant buffers by
* passing NULL here.
@@ -1030,6 +1118,8 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint
return;
}
+ r600_upload_const_buffer(rctx, buffer, &offset);
+
switch (shader) {
case PIPE_SHADER_VERTEX:
rctx->vs_const_buffer.nregs = 0;
@@ -1039,7 +1129,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(&rctx->vs_const_buffer,
R_028980_ALU_CONST_CACHE_VS_0,
- r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
break;
case PIPE_SHADER_FRAGMENT:
@@ -1050,7 +1140,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(&rctx->ps_const_buffer,
R_028940_ALU_CONST_CACHE_PS_0,
- r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo);
+ (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
break;
default:
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 55bc5d0d22..3603376f73 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -58,6 +58,12 @@ void r600_bind_rs_state(struct pipe_context *ctx, void *state)
rctx->states[rs->rstate.id] = &rs->rstate;
r600_context_pipe_state_set(&rctx->ctx, &rs->rstate);
+
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_polygon_offset_update(rctx);
+ } else {
+ r600_polygon_offset_update(rctx);
+ }
}
void r600_delete_rs_state(struct pipe_context *ctx, void *state)
@@ -113,8 +119,23 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_vertex_element *v = (struct r600_vertex_element*)state;
+ /* delete previous translated vertex elements */
+ if (rctx->tran.new_velems) {
+ r600_end_vertex_translate(rctx);
+ }
+
rctx->vertex_elements = v;
if (v) {
+ rctx->states[v->rstate.id] = &v->rstate;
+ r600_context_pipe_state_set(&rctx->ctx, &v->rstate);
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_vertex_buffer_update(rctx);
+ } else {
+ r600_vertex_buffer_update(rctx);
+ }
+ }
+
+ if (v) {
// rctx->vs_rebuild = TRUE;
}
}
@@ -122,11 +143,16 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
void r600_delete_vertex_element(struct pipe_context *ctx, void *state)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_vertex_element *v = (struct r600_vertex_element*)state;
- FREE(state);
-
+ if (rctx->states[v->rstate.id] == &v->rstate) {
+ rctx->states[v->rstate.id] = NULL;
+ }
if (rctx->vertex_elements == state)
rctx->vertex_elements = NULL;
+
+ r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL);
+ FREE(state);
}
@@ -153,8 +179,16 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
struct pipe_vertex_buffer *vbo;
unsigned max_index = (unsigned)-1;
- for (int i = 0; i < rctx->nvertex_buffer; i++) {
- pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL);
+ if (rctx->family >= CHIP_CEDAR) {
+ for (int i = 0; i < rctx->nvertex_buffer; i++) {
+ pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL);
+ evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i);
+ }
+ } else {
+ for (int i = 0; i < rctx->nvertex_buffer; i++) {
+ pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL);
+ r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i);
+ }
}
memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count);
@@ -162,20 +196,29 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
vbo = (struct pipe_vertex_buffer*)&buffers[i];
rctx->vertex_buffer[i].buffer = NULL;
+ if (buffers[i].buffer == NULL)
+ continue;
if (r600_buffer_is_user_buffer(buffers[i].buffer))
rctx->any_user_vbs = TRUE;
pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer);
+ /* The stride of zero means we will be fetching only the first
+ * vertex, so don't care about max_index. */
+ if (!vbo->stride)
+ continue;
+
if (vbo->max_index == ~0) {
- if (!vbo->stride)
- vbo->max_index = 1;
- else
- vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride;
+ vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride;
}
max_index = MIN2(vbo->max_index, max_index);
}
rctx->nvertex_buffer = count;
rctx->vb_max_index = max_index;
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_vertex_buffer_update(rctx);
+ } else {
+ r600_vertex_buffer_update(rctx);
+ }
}
@@ -186,9 +229,10 @@ void *r600_create_vertex_elements(struct pipe_context *ctx,
unsigned count,
const struct pipe_vertex_element *elements)
{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element);
- int i;
enum pipe_format *format;
+ int i;
assert(count < 32);
if (!v)
@@ -210,12 +254,16 @@ void *r600_create_vertex_elements(struct pipe_context *ctx,
}
v->incompatible_layout =
v->incompatible_layout ||
- v->elements[i].src_format != v->hw_format[i] ||
- v->elements[i].src_offset % 4 != 0;
+ v->elements[i].src_format != v->hw_format[i];
v->hw_format_size[i] = align(util_format_get_blocksize(v->hw_format[i]), 4);
}
+ if (r600_vertex_elements_build_fetch_shader(rctx, v)) {
+ FREE(v);
+ return NULL;
+ }
+
return v;
}
@@ -238,6 +286,9 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state)
/* TODO delete old shader */
rctx->ps_shader = (struct r600_pipe_shader *)state;
+ if (state) {
+ r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_shader->rstate);
+ }
}
void r600_bind_vs_shader(struct pipe_context *ctx, void *state)
@@ -246,6 +297,9 @@ void r600_bind_vs_shader(struct pipe_context *ctx, void *state)
/* TODO delete old shader */
rctx->vs_shader = (struct r600_pipe_shader *)state;
+ if (state) {
+ r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_shader->rstate);
+ }
}
void r600_delete_ps_shader(struct pipe_context *ctx, void *state)
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index 781612af57..d994196e19 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -305,6 +305,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
return V_0280A0_SWAP_STD;
case PIPE_FORMAT_L8A8_UNORM:
+ return V_0280A0_SWAP_ALT;
case PIPE_FORMAT_R8G8_UNORM:
return V_0280A0_SWAP_STD;
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 8ecd434a43..e274562457 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -45,14 +45,10 @@ static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_t
{
struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
struct pipe_resource *texture = transfer->resource;
- struct pipe_subresource subdst;
- subdst.face = 0;
- subdst.level = 0;
ctx->resource_copy_region(ctx, rtransfer->staging_texture,
- subdst, 0, 0, 0, texture, transfer->sr,
- transfer->box.x, transfer->box.y, transfer->box.z,
- transfer->box.width, transfer->box.height);
+ 0, 0, 0, 0, texture, transfer->level,
+ &transfer->box);
}
@@ -61,34 +57,32 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600
{
struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
struct pipe_resource *texture = transfer->resource;
- struct pipe_subresource subsrc;
-
- subsrc.face = 0;
- subsrc.level = 0;
- ctx->resource_copy_region(ctx, texture, transfer->sr,
+ struct pipe_box sbox;
+
+ sbox.x = sbox.y = sbox.z = 0;
+ sbox.width = transfer->box.width;
+ sbox.height = transfer->box.height;
+ /* XXX that might be wrong */
+ sbox.depth = 1;
+ ctx->resource_copy_region(ctx, texture, transfer->level,
transfer->box.x, transfer->box.y, transfer->box.z,
- rtransfer->staging_texture, subsrc,
- 0, 0, 0,
- transfer->box.width, transfer->box.height);
+ rtransfer->staging_texture,
+ 0, &sbox);
ctx->flush(ctx, 0, NULL);
}
-static unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
- unsigned level, unsigned zslice,
- unsigned face)
+unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
+ unsigned level, unsigned layer)
{
unsigned offset = rtex->offset[level];
switch (rtex->resource.base.b.target) {
case PIPE_TEXTURE_3D:
- assert(face == 0);
- return offset + zslice * rtex->layer_size[level];
case PIPE_TEXTURE_CUBE:
- assert(zslice == 0);
- return offset + face * rtex->layer_size[level];
+ return offset + layer * rtex->layer_size[level];
default:
- assert(zslice == 0 && face == 0);
+ assert(layer == 0);
return offset;
}
}
@@ -175,10 +169,7 @@ static unsigned r600_texture_get_stride(struct pipe_screen *screen,
struct r600_resource_texture *rtex,
unsigned level)
{
- struct r600_screen* rscreen = (struct r600_screen *)screen;
struct pipe_resource *ptex = &rtex->resource.base.b;
- struct radeon *radeon = (struct radeon *)screen->winsys;
- enum chip_class chipc = r600_get_family_class(radeon);
unsigned width, stride, tile_width;
if (rtex->pitch_override)
@@ -212,11 +203,10 @@ static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen,
}
/* Get a width in pixels from a stride in bytes. */
-static unsigned pitch_to_width(enum pipe_format format,
- unsigned pitch_in_bytes)
+static unsigned pitch_to_width(enum pipe_format format, unsigned pitch_in_bytes)
{
- return (pitch_in_bytes / util_format_get_blocksize(format)) *
- util_format_get_blockwidth(format);
+ return (pitch_in_bytes / util_format_get_blocksize(format)) *
+ util_format_get_blockwidth(format);
}
static void r600_texture_set_array_mode(struct pipe_screen *screen,
@@ -335,12 +325,12 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
unsigned array_mode = 0;
- static int force_tiling = -1;
+ static int force_tiling = -1;
- /* Would like some magic "get_bool_option_once" routine.
+ /* Would like some magic "get_bool_option_once" routine.
*/
if (force_tiling == -1)
- force_tiling = debug_get_bool_option("R600_FORCE_TILING", FALSE);
+ force_tiling = debug_get_bool_option("R600_FORCE_TILING", FALSE);
if (force_tiling) {
if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
@@ -371,8 +361,8 @@ static void r600_texture_destroy(struct pipe_screen *screen,
}
static boolean r600_texture_get_handle(struct pipe_screen* screen,
- struct pipe_resource *ptex,
- struct winsys_handle *whandle)
+ struct pipe_resource *ptex,
+ struct winsys_handle *whandle)
{
struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
struct r600_resource *resource = &rtex->resource;
@@ -382,36 +372,39 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
rtex->pitch_in_bytes[0], whandle);
}
-static struct pipe_surface *r600_get_tex_surface(struct pipe_screen *screen,
+static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
struct pipe_resource *texture,
- unsigned face, unsigned level,
- unsigned zslice, unsigned flags)
+ const struct pipe_surface *surf_tmpl)
{
struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture;
struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
- unsigned offset, tile_height;
+ unsigned tile_height;
+ unsigned level = surf_tmpl->u.tex.level;
+ assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
if (surface == NULL)
return NULL;
- offset = r600_texture_get_offset(rtex, level, zslice, face);
+ /* XXX no offset */
+/* offset = r600_texture_get_offset(rtex, level, surf_tmpl->u.tex.first_layer);*/
pipe_reference_init(&surface->base.reference, 1);
pipe_resource_reference(&surface->base.texture, texture);
- surface->base.format = texture->format;
+ surface->base.context = pipe;
+ surface->base.format = surf_tmpl->format;
surface->base.width = mip_minify(texture->width0, level);
surface->base.height = mip_minify(texture->height0, level);
- surface->base.offset = offset;
- surface->base.usage = flags;
- surface->base.zslice = zslice;
+ surface->base.usage = surf_tmpl->usage;
surface->base.texture = texture;
- surface->base.face = face;
- surface->base.level = level;
+ surface->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+ surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+ surface->base.u.tex.level = level;
- tile_height = r600_get_height_alignment(screen, rtex->array_mode[level]);
+ tile_height = r600_get_height_alignment(pipe->screen, rtex->array_mode[level]);
surface->aligned_height = align(surface->base.height, tile_height);
return &surface->base;
}
-static void r600_tex_surface_destroy(struct pipe_surface *surface)
+static void r600_surface_destroy(struct pipe_context *pipe,
+ struct pipe_surface *surface)
{
pipe_resource_reference(&surface->texture, NULL);
FREE(surface);
@@ -444,14 +437,12 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
static unsigned int r600_texture_is_referenced(struct pipe_context *context,
struct pipe_resource *texture,
- unsigned face, unsigned level)
+ unsigned level, int layer)
{
/* FIXME */
return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
}
-int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture);
-
int r600_texture_depth_flush(struct pipe_context *ctx,
struct pipe_resource *texture)
{
@@ -483,7 +474,7 @@ int r600_texture_depth_flush(struct pipe_context *ctx,
out:
/* XXX: only do this if the depth texture has actually changed:
*/
- r600_blit_uncompress_depth_ptr(ctx, rtex);
+ r600_blit_uncompress_depth(ctx, rtex);
return 0;
}
@@ -491,7 +482,7 @@ out:
*/
static INLINE unsigned u_box_volume( const struct pipe_box *box )
{
- return box->width * box->depth * box->height;
+ return box->width * box->depth * box->height;
};
@@ -499,44 +490,44 @@ static INLINE unsigned u_box_volume( const struct pipe_box *box )
* If so, don't use a staging resource.
*/
static boolean permit_hardware_blit(struct pipe_screen *screen,
- struct pipe_resource *res)
+ struct pipe_resource *res)
{
- unsigned bind;
+ unsigned bind;
- if (util_format_is_depth_or_stencil(res->format))
- bind = PIPE_BIND_DEPTH_STENCIL;
- else
- bind = PIPE_BIND_RENDER_TARGET;
+ if (util_format_is_depth_or_stencil(res->format))
+ bind = PIPE_BIND_DEPTH_STENCIL;
+ else
+ bind = PIPE_BIND_RENDER_TARGET;
/* See r600_resource_copy_region: there is something wrong
- * with depth resource copies at the moment so avoid them for
- * now.
- */
+ * with depth resource copies at the moment so avoid them for
+ * now.
+ */
if (util_format_get_component_bits(res->format,
- UTIL_FORMAT_COLORSPACE_ZS,
- 0) != 0)
- return FALSE;
-
- if (!screen->is_format_supported(screen,
- res->format,
- res->target,
- res->nr_samples,
- bind, 0))
- return FALSE;
-
- if (!screen->is_format_supported(screen,
- res->format,
- res->target,
- res->nr_samples,
- PIPE_BIND_SAMPLER_VIEW, 0))
- return FALSE;
-
- return TRUE;
+ UTIL_FORMAT_COLORSPACE_ZS,
+ 0) != 0)
+ return FALSE;
+
+ if (!screen->is_format_supported(screen,
+ res->format,
+ res->target,
+ res->nr_samples,
+ bind, 0))
+ return FALSE;
+
+ if (!screen->is_format_supported(screen,
+ res->format,
+ res->target,
+ res->nr_samples,
+ PIPE_BIND_SAMPLER_VIEW, 0))
+ return FALSE;
+
+ return TRUE;
}
struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
struct pipe_resource *texture,
- struct pipe_subresource sr,
+ unsigned level,
unsigned usage,
const struct pipe_box *box)
{
@@ -556,37 +547,36 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
if (rtex->tiled)
use_staging_texture = TRUE;
- if ((usage & PIPE_TRANSFER_READ) &&
- u_box_volume(box) > 1024)
- use_staging_texture = TRUE;
-
- /* XXX: Use a staging texture for uploads if the underlying BO
- * is busy. No interface for checking that currently? so do
- * it eagerly whenever the transfer doesn't require a readback
- * and might block.
- */
- if ((usage & PIPE_TRANSFER_WRITE) &&
- !(usage & (PIPE_TRANSFER_READ |
- PIPE_TRANSFER_DONTBLOCK |
- PIPE_TRANSFER_UNSYNCHRONIZED)))
- use_staging_texture = TRUE;
-
- if (!permit_hardware_blit(ctx->screen, texture) ||
- (texture->flags & R600_RESOURCE_FLAG_TRANSFER))
- use_staging_texture = FALSE;
+ if ((usage & PIPE_TRANSFER_READ) && u_box_volume(box) > 1024)
+ use_staging_texture = TRUE;
+
+ /* XXX: Use a staging texture for uploads if the underlying BO
+ * is busy. No interface for checking that currently? so do
+ * it eagerly whenever the transfer doesn't require a readback
+ * and might block.
+ */
+ if ((usage & PIPE_TRANSFER_WRITE) &&
+ !(usage & (PIPE_TRANSFER_READ |
+ PIPE_TRANSFER_DONTBLOCK |
+ PIPE_TRANSFER_UNSYNCHRONIZED)))
+ use_staging_texture = TRUE;
+
+ if (!permit_hardware_blit(ctx->screen, texture) ||
+ (texture->flags & R600_RESOURCE_FLAG_TRANSFER))
+ use_staging_texture = FALSE;
trans = CALLOC_STRUCT(r600_transfer);
if (trans == NULL)
return NULL;
pipe_resource_reference(&trans->transfer.resource, texture);
- trans->transfer.sr = sr;
+ trans->transfer.level = level;
trans->transfer.usage = usage;
trans->transfer.box = *box;
if (rtex->depth) {
- /* XXX: only readback the rectangle which is being mapped?
- */
- /* XXX: when discard is true, no need to read back from depth texture
- */
+ /* XXX: only readback the rectangle which is being mapped?
+ */
+ /* XXX: when discard is true, no need to read back from depth texture
+ */
r = r600_texture_depth_flush(ctx, texture);
if (r < 0) {
R600_ERR("failed to create temporary texture to hold untiled copy\n");
@@ -600,6 +590,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
resource.width0 = box->width;
resource.height0 = box->height;
resource.depth0 = 1;
+ resource.array_size = 1;
resource.last_level = 0;
resource.nr_samples = 0;
resource.usage = PIPE_USAGE_STAGING;
@@ -625,7 +616,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
}
trans->transfer.stride =
- ((struct r600_resource_texture *)trans->staging_texture)->pitch_in_bytes[0];
+ ((struct r600_resource_texture *)trans->staging_texture)->pitch_in_bytes[0];
if (usage & PIPE_TRANSFER_READ) {
r600_copy_to_staging_texture(ctx, trans);
/* Always referenced in the blit. */
@@ -633,8 +624,8 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
}
return &trans->transfer;
}
- trans->transfer.stride = rtex->pitch_in_bytes[sr.level];
- trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face);
+ trans->transfer.stride = rtex->pitch_in_bytes[level];
+ trans->offset = r600_texture_get_offset(rtex, level, box->z);
return &trans->transfer;
}
@@ -747,10 +738,10 @@ struct u_resource_vtbl r600_texture_vtbl =
u_default_transfer_inline_write /* transfer_inline_write */
};
-void r600_init_screen_texture_functions(struct pipe_screen *screen)
+void r600_init_surface_functions(struct r600_pipe_context *r600)
{
- screen->get_tex_surface = r600_get_tex_surface;
- screen->tex_surface_destroy = r600_tex_surface_destroy;
+ r600->context.create_surface = r600_create_surface;
+ r600->context.surface_destroy = r600_surface_destroy;
}
static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
@@ -851,8 +842,8 @@ uint32_t r600_translate_texformat(enum pipe_format format,
case UTIL_FORMAT_COLORSPACE_YUV:
yuv_format |= (1 << 30);
switch (format) {
- case PIPE_FORMAT_UYVY:
- case PIPE_FORMAT_YUYV:
+ case PIPE_FORMAT_UYVY:
+ case PIPE_FORMAT_YUYV:
default:
break;
}
@@ -870,29 +861,29 @@ uint32_t r600_translate_texformat(enum pipe_format format,
/* S3TC formats. TODO */
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
- static int r600_enable_s3tc = -1;
+ static int r600_enable_s3tc = -1;
- if (r600_enable_s3tc == -1)
- r600_enable_s3tc =
- debug_get_bool_option("R600_ENABLE_S3TC", FALSE);
+ if (r600_enable_s3tc == -1)
+ r600_enable_s3tc =
+ debug_get_bool_option("R600_ENABLE_S3TC", FALSE);
- if (!r600_enable_s3tc)
- goto out_unknown;
+ if (!r600_enable_s3tc)
+ goto out_unknown;
switch (format) {
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
- result = FMT_BC1;
- goto out_word4;
+ result = FMT_BC1;
+ goto out_word4;
case PIPE_FORMAT_DXT3_RGBA:
- result = FMT_BC2;
- goto out_word4;
+ result = FMT_BC2;
+ goto out_word4;
case PIPE_FORMAT_DXT5_RGBA:
- result = FMT_BC3;
- goto out_word4;
- default:
- goto out_unknown;
- }
+ result = FMT_BC3;
+ goto out_word4;
+ default:
+ goto out_unknown;
+ }
}
diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c
index 9a07cf2073..f80fa7af94 100644
--- a/src/gallium/drivers/r600/r600_translate.c
+++ b/src/gallium/drivers/r600/r600_translate.c
@@ -41,6 +41,8 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer;
struct pipe_resource *out_buffer;
unsigned i, num_verts;
+ struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS];
+ void *tmp;
/* Initialize the translate key, i.e. the recipe how vertices should be
* translated. */
@@ -51,9 +53,7 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
unsigned output_format_size = ve->hw_format_size[i];
/* Check for support. */
- if (ve->elements[i].src_format == ve->hw_format[i] &&
- (vb->buffer_offset + ve->elements[i].src_offset) % 4 == 0 &&
- vb->stride % 4 == 0) {
+ if (ve->elements[i].src_format == ve->hw_format[i]) {
continue;
}
@@ -125,12 +125,11 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
/* Unmap all buffers. */
for (i = 0; i < rctx->nvertex_buffer; i++) {
if (vb_translated[i]) {
- pipe_buffer_unmap(pipe, rctx->vertex_buffer[i].buffer,
- vb_transfer[i]);
+ pipe_buffer_unmap(pipe, vb_transfer[i]);
}
}
- pipe_buffer_unmap(pipe, out_buffer, out_transfer);
+ pipe_buffer_unmap(pipe, out_transfer);
/* Setup the new vertex buffer in the first free slot. */
for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
@@ -147,29 +146,23 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
}
/* Save and replace vertex elements. */
- {
- struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS];
-
- rctx->tran.saved_velems = rctx->vertex_elements;
-
- for (i = 0; i < ve->count; i++) {
- if (vb_translated[ve->elements[i].vertex_buffer_index]) {
- te = &key.element[tr_elem_index[i]];
- new_velems[i].instance_divisor = ve->elements[i].instance_divisor;
- new_velems[i].src_format = te->output_format;
- new_velems[i].src_offset = te->output_offset;
- new_velems[i].vertex_buffer_index = rctx->tran.vb_slot;
- } else {
- memcpy(&new_velems[i], &ve->elements[i],
- sizeof(struct pipe_vertex_element));
- }
+ for (i = 0; i < ve->count; i++) {
+ if (vb_translated[ve->elements[i].vertex_buffer_index]) {
+ te = &key.element[tr_elem_index[i]];
+ new_velems[i].instance_divisor = ve->elements[i].instance_divisor;
+ new_velems[i].src_format = te->output_format;
+ new_velems[i].src_offset = te->output_offset;
+ new_velems[i].vertex_buffer_index = rctx->tran.vb_slot;
+ } else {
+ memcpy(&new_velems[i], &ve->elements[i],
+ sizeof(struct pipe_vertex_element));
}
-
- rctx->tran.new_velems =
- pipe->create_vertex_elements_state(pipe, ve->count, new_velems);
- pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems);
}
+ tmp = pipe->create_vertex_elements_state(pipe, ve->count, new_velems);
+ pipe->bind_vertex_elements_state(pipe, tmp);
+ rctx->tran.new_velems = tmp;
+
pipe_resource_reference(&out_buffer, NULL);
}
@@ -177,13 +170,15 @@ void r600_end_vertex_translate(struct r600_pipe_context *rctx)
{
struct pipe_context *pipe = &rctx->context;
+ if (rctx->tran.new_velems == NULL) {
+ return;
+ }
/* Restore vertex elements. */
- pipe->bind_vertex_elements_state(pipe, rctx->tran.saved_velems);
pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems);
+ rctx->tran.new_velems = NULL;
/* Delete the now-unused VBO. */
- pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer,
- NULL);
+ pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer, NULL);
}
void r600_translate_index_buffer(struct r600_pipe_context *r600,
@@ -197,14 +192,7 @@ void r600_translate_index_buffer(struct r600_pipe_context *r600,
*index_size = 2;
*start = 0;
break;
-
case 2:
- if (*start % 2 != 0) {
- util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count);
- *start = 0;
- }
- break;
-
case 4:
break;
}
diff --git a/src/gallium/drivers/r600/r600_upload.c b/src/gallium/drivers/r600/r600_upload.c
new file mode 100644
index 0000000000..44102ff55b
--- /dev/null
+++ b/src/gallium/drivers/r600/r600_upload.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jerome Glisse <jglisse@redhat.com>
+ */
+#include <errno.h>
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "r600.h"
+#include "r600_pipe.h"
+#include "r600_resource.h"
+
+struct r600_upload {
+ struct r600_pipe_context *rctx;
+ struct r600_bo *buffer;
+ char *ptr;
+ unsigned size;
+ unsigned default_size;
+ unsigned total_alloc_size;
+ unsigned offset;
+ unsigned alignment;
+};
+
+struct r600_upload *r600_upload_create(struct r600_pipe_context *rctx,
+ unsigned default_size,
+ unsigned alignment)
+{
+ struct r600_upload *upload = CALLOC_STRUCT(r600_upload);
+
+ if (upload == NULL)
+ return NULL;
+
+ upload->rctx = rctx;
+ upload->size = 0;
+ upload->default_size = default_size;
+ upload->alignment = alignment;
+ upload->ptr = NULL;
+ upload->buffer = NULL;
+ upload->total_alloc_size = 0;
+
+ return upload;
+}
+
+void r600_upload_flush(struct r600_upload *upload)
+{
+ if (upload->buffer) {
+ r600_bo_reference(upload->rctx->radeon, &upload->buffer, NULL);
+ }
+ upload->default_size = MAX2(upload->total_alloc_size, upload->default_size);
+ upload->total_alloc_size = 0;
+ upload->size = 0;
+ upload->offset = 0;
+ upload->ptr = NULL;
+ upload->buffer = NULL;
+}
+
+void r600_upload_destroy(struct r600_upload *upload)
+{
+ r600_upload_flush(upload);
+ FREE(upload);
+}
+
+int r600_upload_buffer(struct r600_upload *upload, unsigned offset,
+ unsigned size, struct r600_resource_buffer *in_buffer,
+ unsigned *out_offset, unsigned *out_size,
+ struct r600_bo **out_buffer)
+{
+ unsigned alloc_size = align(size, upload->alignment);
+ const void *in_ptr = NULL;
+
+ if (upload->offset + alloc_size > upload->size) {
+ if (upload->size) {
+ r600_bo_reference(upload->rctx->radeon, &upload->buffer, NULL);
+ }
+ upload->size = align(MAX2(upload->default_size, alloc_size), 4096);
+ upload->total_alloc_size += upload->size;
+ upload->offset = 0;
+ upload->buffer = r600_bo(upload->rctx->radeon, upload->size, 4096, PIPE_BIND_VERTEX_BUFFER, 0);
+ if (upload->buffer == NULL) {
+ return -ENOMEM;
+ }
+ upload->ptr = r600_bo_map(upload->rctx->radeon, upload->buffer, 0, NULL);
+ }
+
+ in_ptr = in_buffer->user_buffer;
+ memcpy(upload->ptr + upload->offset, (uint8_t *) in_ptr + offset, size);
+ *out_offset = upload->offset;
+ *out_size = upload->size;
+ *out_buffer = NULL;
+ r600_bo_reference(upload->rctx->radeon, out_buffer, upload->buffer);
+ upload->offset += alloc_size;
+
+ return 0;
+}
diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c
index 892dee86ba..a7f2f54736 100644
--- a/src/gallium/drivers/r600/r700_asm.c
+++ b/src/gallium/drivers/r600/r700_asm.c
@@ -29,8 +29,6 @@
int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
{
- unsigned i;
-
bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
@@ -61,18 +59,11 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
+ S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
}
- if (alu->last) {
- if (alu->nliteral && !alu->literal_added) {
- R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst);
- }
- for (i = 0; i < alu->nliteral; i++) {
- bc->bytecode[id++] = alu->value[i];
- }
- }
return 0;
}
diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c
index 413da59e55..94e57e40f8 100644
--- a/src/gallium/drivers/rbug/rbug_context.c
+++ b/src/gallium/drivers/rbug/rbug_context.c
@@ -707,17 +707,13 @@ rbug_set_sample_mask(struct pipe_context *_pipe,
static void
rbug_resource_copy_region(struct pipe_context *_pipe,
struct pipe_resource *_dst,
- struct pipe_subresource subdst,
+ unsigned dst_level,
unsigned dstx,
unsigned dsty,
unsigned dstz,
struct pipe_resource *_src,
- struct pipe_subresource subsrc,
- unsigned srcx,
- unsigned srcy,
- unsigned srcz,
- unsigned width,
- unsigned height)
+ unsigned src_level,
+ const struct pipe_box *src_box)
{
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct rbug_resource *rb_resource_dst = rbug_resource(_dst);
@@ -728,17 +724,13 @@ rbug_resource_copy_region(struct pipe_context *_pipe,
pipe->resource_copy_region(pipe,
dst,
- subdst,
+ dst_level,
dstx,
dsty,
dstz,
src,
- subsrc,
- srcx,
- srcy,
- srcz,
- width,
- height);
+ src_level,
+ src_box);
}
static void
@@ -820,8 +812,8 @@ rbug_flush(struct pipe_context *_pipe,
static unsigned int
rbug_is_resource_referenced(struct pipe_context *_pipe,
struct pipe_resource *_resource,
- unsigned face,
- unsigned level)
+ unsigned level,
+ int layer)
{
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct rbug_resource *rb_resource = rbug_resource(_resource);
@@ -830,8 +822,8 @@ rbug_is_resource_referenced(struct pipe_context *_pipe,
return pipe->is_resource_referenced(pipe,
resource,
- face,
- level);
+ level,
+ layer);
}
static struct pipe_sampler_view *
@@ -862,10 +854,40 @@ rbug_context_sampler_view_destroy(struct pipe_context *_pipe,
rbug_sampler_view(_view));
}
+static struct pipe_surface *
+rbug_context_create_surface(struct pipe_context *_pipe,
+ struct pipe_resource *_resource,
+ const struct pipe_surface *surf_tmpl)
+{
+ struct rbug_context *rb_pipe = rbug_context(_pipe);
+ struct rbug_resource *rb_resource = rbug_resource(_resource);
+ struct pipe_context *pipe = rb_pipe->pipe;
+ struct pipe_resource *resource = rb_resource->resource;
+ struct pipe_surface *result;
+
+ result = pipe->create_surface(pipe,
+ resource,
+ surf_tmpl);
+
+ if (result)
+ return rbug_surface_create(rb_pipe, rb_resource, result);
+ return NULL;
+}
+
+static void
+rbug_context_surface_destroy(struct pipe_context *_pipe,
+ struct pipe_surface *_surface)
+{
+ rbug_surface_destroy(rbug_context(_pipe),
+ rbug_surface(_surface));
+}
+
+
+
static struct pipe_transfer *
rbug_context_get_transfer(struct pipe_context *_context,
struct pipe_resource *_resource,
- struct pipe_subresource sr,
+ unsigned level,
unsigned usage,
const struct pipe_box *box)
{
@@ -877,7 +899,7 @@ rbug_context_get_transfer(struct pipe_context *_context,
result = context->get_transfer(context,
resource,
- sr,
+ level,
usage,
box);
@@ -942,12 +964,12 @@ rbug_context_transfer_unmap(struct pipe_context *_context,
static void
rbug_context_transfer_inline_write(struct pipe_context *_context,
struct pipe_resource *_resource,
- struct pipe_subresource sr,
+ unsigned level,
unsigned usage,
const struct pipe_box *box,
const void *data,
unsigned stride,
- unsigned slice_stride)
+ unsigned layer_stride)
{
struct rbug_context *rb_pipe = rbug_context(_context);
struct rbug_resource *rb_resource = rbug_resource(_resource);
@@ -956,12 +978,12 @@ rbug_context_transfer_inline_write(struct pipe_context *_context,
context->transfer_inline_write(context,
resource,
- sr,
+ level,
usage,
box,
data,
stride,
- slice_stride);
+ layer_stride);
}
@@ -1042,6 +1064,8 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
rb_pipe->base.is_resource_referenced = rbug_is_resource_referenced;
rb_pipe->base.create_sampler_view = rbug_context_create_sampler_view;
rb_pipe->base.sampler_view_destroy = rbug_context_sampler_view_destroy;
+ rb_pipe->base.create_surface = rbug_context_create_surface;
+ rb_pipe->base.surface_destroy = rbug_context_surface_destroy;
rb_pipe->base.get_transfer = rbug_context_get_transfer;
rb_pipe->base.transfer_destroy = rbug_context_transfer_destroy;
rb_pipe->base.transfer_map = rbug_context_transfer_map;
diff --git a/src/gallium/drivers/rbug/rbug_core.c b/src/gallium/drivers/rbug/rbug_core.c
index 9dc663b079..eb772d19d0 100644
--- a/src/gallium/drivers/rbug/rbug_core.c
+++ b/src/gallium/drivers/rbug/rbug_core.c
@@ -266,9 +266,9 @@ rbug_texture_read(struct rbug_rbug *tr_rbug, struct rbug_header *header, uint32_
tex = tr_tex->resource;
t = pipe_get_transfer(context, tex,
- gptr->face, gptr->level, gptr->zslice,
- PIPE_TRANSFER_READ,
- gptr->x, gptr->y, gptr->w, gptr->h);
+ gptr->level, gptr->face + gptr->zslice,
+ PIPE_TRANSFER_READ,
+ gptr->x, gptr->y, gptr->w, gptr->h);
map = context->transfer_map(context, t);
@@ -279,7 +279,7 @@ rbug_texture_read(struct rbug_rbug *tr_rbug, struct rbug_header *header, uint32_
util_format_get_blocksize(t->resource->format),
(uint8_t*)map,
t->stride * util_format_get_nblocksy(t->resource->format,
- t->box.height),
+ t->box.height),
t->stride,
NULL);
diff --git a/src/gallium/drivers/rbug/rbug_objects.c b/src/gallium/drivers/rbug/rbug_objects.c
index 0979fcff95..7d7cc482ae 100644
--- a/src/gallium/drivers/rbug/rbug_objects.c
+++ b/src/gallium/drivers/rbug/rbug_objects.c
@@ -79,7 +79,8 @@ rbug_resource_destroy(struct rbug_resource *rb_resource)
struct pipe_surface *
-rbug_surface_create(struct rbug_resource *rb_resource,
+rbug_surface_create(struct rbug_context *rb_context,
+ struct rbug_resource *rb_resource,
struct pipe_surface *surface)
{
struct rbug_surface *rb_surface;
@@ -108,10 +109,12 @@ error:
}
void
-rbug_surface_destroy(struct rbug_surface *rb_surface)
+rbug_surface_destroy(struct rbug_context *rb_context,
+ struct rbug_surface *rb_surface)
{
pipe_resource_reference(&rb_surface->base.texture, NULL);
- pipe_surface_reference(&rb_surface->surface, NULL);
+ rb_context->pipe->surface_destroy(rb_context->pipe,
+ rb_surface->surface);
FREE(rb_surface);
}
diff --git a/src/gallium/drivers/rbug/rbug_objects.h b/src/gallium/drivers/rbug/rbug_objects.h
index 49c128d3d1..3fba333422 100644
--- a/src/gallium/drivers/rbug/rbug_objects.h
+++ b/src/gallium/drivers/rbug/rbug_objects.h
@@ -189,11 +189,13 @@ void
rbug_resource_destroy(struct rbug_resource *rb_resource);
struct pipe_surface *
-rbug_surface_create(struct rbug_resource *rb_resource,
+rbug_surface_create(struct rbug_context *rb_context,
+ struct rbug_resource *rb_resource,
struct pipe_surface *surface);
void
-rbug_surface_destroy(struct rbug_surface *rb_surface);
+rbug_surface_destroy(struct rbug_context *rb_context,
+ struct rbug_surface *rb_surface);
struct pipe_sampler_view *
rbug_sampler_view_create(struct rbug_context *rb_context,
diff --git a/src/gallium/drivers/rbug/rbug_screen.c b/src/gallium/drivers/rbug/rbug_screen.c
index 961df482c2..d635ce575c 100644
--- a/src/gallium/drivers/rbug/rbug_screen.c
+++ b/src/gallium/drivers/rbug/rbug_screen.c
@@ -188,40 +188,6 @@ rbug_screen_resource_destroy(struct pipe_screen *screen,
rbug_resource_destroy(rbug_resource(_resource));
}
-static struct pipe_surface *
-rbug_screen_get_tex_surface(struct pipe_screen *_screen,
- struct pipe_resource *_resource,
- unsigned face,
- unsigned level,
- unsigned zslice,
- unsigned usage)
-{
- struct rbug_screen *rb_screen = rbug_screen(_screen);
- struct rbug_resource *rb_resource = rbug_resource(_resource);
- struct pipe_screen *screen = rb_screen->screen;
- struct pipe_resource *resource = rb_resource->resource;
- struct pipe_surface *result;
-
- result = screen->get_tex_surface(screen,
- resource,
- face,
- level,
- zslice,
- usage);
-
- if (result)
- return rbug_surface_create(rb_resource, result);
- return NULL;
-}
-
-static void
-rbug_screen_tex_surface_destroy(struct pipe_surface *_surface)
-{
- rbug_surface_destroy(rbug_surface(_surface));
-}
-
-
-
static struct pipe_resource *
rbug_screen_user_buffer_create(struct pipe_screen *_screen,
void *ptr,
@@ -246,16 +212,18 @@ rbug_screen_user_buffer_create(struct pipe_screen *_screen,
static void
rbug_screen_flush_frontbuffer(struct pipe_screen *_screen,
- struct pipe_surface *_surface,
+ struct pipe_resource *_resource,
+ unsigned level, unsigned layer,
void *context_private)
{
struct rbug_screen *rb_screen = rbug_screen(_screen);
- struct rbug_surface *rb_surface = rbug_surface(_surface);
+ struct rbug_resource *rb_resource = rbug_resource(_resource);
struct pipe_screen *screen = rb_screen->screen;
- struct pipe_surface *surface = rb_surface->surface;
+ struct pipe_resource *resource = rb_resource->resource;
screen->flush_frontbuffer(screen,
- surface,
+ resource,
+ level, layer,
context_private);
}
@@ -336,8 +304,6 @@ rbug_screen_create(struct pipe_screen *screen)
rb_screen->base.resource_from_handle = rbug_screen_resource_from_handle;
rb_screen->base.resource_get_handle = rbug_screen_resource_get_handle;
rb_screen->base.resource_destroy = rbug_screen_resource_destroy;
- rb_screen->base.get_tex_surface = rbug_screen_get_tex_surface;
- rb_screen->base.tex_surface_destroy = rbug_screen_tex_surface_destroy;
rb_screen->base.user_buffer_create = rbug_screen_user_buffer_create;
rb_screen->base.flush_frontbuffer = rbug_screen_flush_frontbuffer;
rb_screen->base.fence_reference = rbug_screen_fence_reference;
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index b5d30bc6fc..f3489c1c79 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -129,6 +129,10 @@ softpipe_destroy( struct pipe_context *pipe )
}
}
+ for (i = 0; i < softpipe->num_vertex_buffers; i++) {
+ pipe_resource_reference(&softpipe->vertex_buffer[i].buffer, NULL);
+ }
+
tgsi_exec_machine_destroy(softpipe->fs_machine);
FREE( softpipe );
@@ -145,15 +149,15 @@ softpipe_destroy( struct pipe_context *pipe )
*/
static unsigned int
softpipe_is_resource_referenced( struct pipe_context *pipe,
- struct pipe_resource *texture,
- unsigned face, unsigned level)
+ struct pipe_resource *texture,
+ unsigned level, int layer)
{
struct softpipe_context *softpipe = softpipe_context( pipe );
unsigned i;
if (texture->target == PIPE_BUFFER)
return PIPE_UNREFERENCED;
-
+
/* check if any of the bound drawing surfaces are this texture */
if (softpipe->dirty_render_cache) {
for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 9361a3df09..903574b7e1 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -154,9 +154,9 @@ struct softpipe_context {
/** TGSI exec things */
struct {
- struct sp_sampler_varient *geom_samplers_list[PIPE_MAX_GEOMETRY_SAMPLERS];
- struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_VERTEX_SAMPLERS];
- struct sp_sampler_varient *frag_samplers_list[PIPE_MAX_SAMPLERS];
+ struct sp_sampler_variant *geom_samplers_list[PIPE_MAX_GEOMETRY_SAMPLERS];
+ struct sp_sampler_variant *vert_samplers_list[PIPE_MAX_VERTEX_SAMPLERS];
+ struct sp_sampler_variant *frag_samplers_list[PIPE_MAX_SAMPLERS];
} tgsi;
struct tgsi_exec_machine *fs_machine;
@@ -192,7 +192,7 @@ softpipe_context( struct pipe_context *pipe )
}
void
-softpipe_reset_sampler_varients(struct softpipe_context *softpipe);
+softpipe_reset_sampler_variants(struct softpipe_context *softpipe);
struct pipe_context *
softpipe_create_context( struct pipe_screen *, void *priv );
diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c
index 1071011db0..4258395063 100644
--- a/src/gallium/drivers/softpipe/sp_flush.c
+++ b/src/gallium/drivers/softpipe/sp_flush.c
@@ -120,8 +120,8 @@ softpipe_flush( struct pipe_context *pipe,
boolean
softpipe_flush_resource(struct pipe_context *pipe,
struct pipe_resource *texture,
- unsigned face,
unsigned level,
+ int layer,
unsigned flush_flags,
boolean read_only,
boolean cpu_access,
@@ -129,7 +129,7 @@ softpipe_flush_resource(struct pipe_context *pipe,
{
unsigned referenced;
- referenced = pipe->is_resource_referenced(pipe, texture, face, level);
+ referenced = pipe->is_resource_referenced(pipe, texture, level, layer);
if ((referenced & PIPE_REFERENCED_FOR_WRITE) ||
((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) {
diff --git a/src/gallium/drivers/softpipe/sp_flush.h b/src/gallium/drivers/softpipe/sp_flush.h
index cb97482a71..22a5ceeb9e 100644
--- a/src/gallium/drivers/softpipe/sp_flush.h
+++ b/src/gallium/drivers/softpipe/sp_flush.h
@@ -40,8 +40,8 @@ softpipe_flush(struct pipe_context *pipe, unsigned flags,
boolean
softpipe_flush_resource(struct pipe_context *pipe,
struct pipe_resource *texture,
- unsigned face,
unsigned level,
+ int layer,
unsigned flush_flags,
boolean read_only,
boolean cpu_access,
diff --git a/src/gallium/drivers/softpipe/sp_limits.h b/src/gallium/drivers/softpipe/sp_limits.h
new file mode 100644
index 0000000000..a7a24c98d5
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_limits.h
@@ -0,0 +1,42 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef SP_LIMITS_H
+#define SP_LIMITS_H
+
+
+
+#define SP_MAX_TEXTURE_2D_LEVELS 15 /* 16K x 16K */
+#define SP_MAX_TEXTURE_3D_LEVELS 9 /* 512 x 512 x 512 */
+
+
+/** Max surface size */
+#define MAX_WIDTH (1 << (SP_MAX_TEXTURE_2D_LEVELS - 1))
+#define MAX_HEIGHT (1 << (SP_MAX_TEXTURE_2D_LEVELS - 1))
+
+
+#endif /* SP_LIMITS_H */
diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index 6af1b2d061..76cfc0bf51 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -35,6 +35,7 @@
#include "util/u_memory.h"
#include "util/u_format.h"
#include "sp_context.h"
+#include "sp_state.h"
#include "sp_quad.h"
#include "sp_tile_cache.h"
#include "sp_quad_pipe.h"
@@ -794,6 +795,9 @@ blend_fallback(struct quad_stage *qs,
struct softpipe_context *softpipe = qs->softpipe;
const struct pipe_blend_state *blend = softpipe->blend;
unsigned cbuf;
+ boolean write_all;
+
+ write_all = softpipe->fs->color0_writes_all_cbufs;
for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++)
{
@@ -806,15 +810,19 @@ blend_fallback(struct quad_stage *qs,
quads[0]->input.y0);
boolean has_dst_alpha
= util_format_has_alpha(softpipe->framebuffer.cbufs[cbuf]->format);
- uint q, i, j;
+ uint q, i, j, qbuf;
+
+ qbuf = write_all ? 0 : cbuf;
for (q = 0; q < nr; q++) {
struct quad_header *quad = quads[q];
- float (*quadColor)[4] = quad->output.color[cbuf];
+ float (*quadColor)[4];
const int itx = (quad->input.x0 & (TILE_SIZE-1));
const int ity = (quad->input.y0 & (TILE_SIZE-1));
- /* get/swizzle dest colors
+ quadColor = quad->output.color[qbuf];
+
+ /* get/swizzle dest colors
*/
for (j = 0; j < QUAD_SIZE; j++) {
int x = itx + (j & 1);
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 9b54babdfb..e19f2e6fc7 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -268,12 +268,13 @@ softpipe_destroy_screen( struct pipe_screen *screen )
*/
static void
softpipe_flush_frontbuffer(struct pipe_screen *_screen,
- struct pipe_surface *surface,
+ struct pipe_resource *resource,
+ unsigned level, unsigned layer,
void *context_private)
{
struct softpipe_screen *screen = softpipe_screen(_screen);
struct sw_winsys *winsys = screen->winsys;
- struct softpipe_resource *texture = softpipe_resource(surface->texture);
+ struct softpipe_resource *texture = softpipe_resource(resource);
assert(texture->dt);
if (texture->dt)
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index 525bf23734..bb19f8cff2 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -74,7 +74,7 @@ struct sp_fragment_shader {
boolean origin_lower_left; /**< fragment shader uses lower left position origin? */
boolean pixel_center_integer; /**< fragment shader uses integer pixel center? */
-
+ boolean color0_writes_all_cbufs; /**< fragment shader writes color0 to all bound cbufs */
void (*prepare)( const struct sp_fragment_shader *shader,
struct tgsi_exec_machine *machine,
struct tgsi_sampler **samplers);
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index 3ba4d934fd..bf4c12701a 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -197,7 +197,7 @@ update_tgsi_samplers( struct softpipe_context *softpipe )
{
unsigned i;
- softpipe_reset_sampler_varients( softpipe );
+ softpipe_reset_sampler_variants( softpipe );
for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
struct softpipe_tex_tile_cache *tc = softpipe->tex_cache[i];
diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c
index b59fbc33ed..cfa211b60a 100644
--- a/src/gallium/drivers/softpipe/sp_state_sampler.c
+++ b/src/gallium/drivers/softpipe/sp_state_sampler.c
@@ -43,8 +43,8 @@
struct sp_sampler {
struct pipe_sampler_state base;
- struct sp_sampler_varient *varients;
- struct sp_sampler_varient *current;
+ struct sp_sampler_variant *variants;
+ struct sp_sampler_variant *current;
};
static struct sp_sampler *sp_sampler( struct pipe_sampler_state *sampler )
@@ -60,7 +60,7 @@ softpipe_create_sampler_state(struct pipe_context *pipe,
struct sp_sampler *sp_sampler = CALLOC_STRUCT(sp_sampler);
sp_sampler->base = *sampler;
- sp_sampler->varients = NULL;
+ sp_sampler->variants = NULL;
return (void *)sp_sampler;
}
@@ -277,23 +277,24 @@ softpipe_set_geometry_sampler_views(struct pipe_context *pipe,
/**
- * Find/create an sp_sampler_varient object for sampling the given texture,
+ * Find/create an sp_sampler_variant object for sampling the given texture,
* sampler and tex unit.
*
* Note that the tex unit is significant. We can't re-use a sampler
- * varient for multiple texture units because the sampler varient contains
+ * variant for multiple texture units because the sampler variant contains
* the texture object pointer. If the texture object pointer were stored
- * somewhere outside the sampler varient, we could re-use samplers for
+ * somewhere outside the sampler variant, we could re-use samplers for
* multiple texture units.
*/
-static struct sp_sampler_varient *
-get_sampler_varient( unsigned unit,
+static struct sp_sampler_variant *
+get_sampler_variant( unsigned unit,
struct sp_sampler *sampler,
+ struct pipe_sampler_view *view,
struct pipe_resource *resource,
unsigned processor )
{
struct softpipe_resource *sp_texture = softpipe_resource(resource);
- struct sp_sampler_varient *v = NULL;
+ struct sp_sampler_variant *v = NULL;
union sp_sampler_key key;
/* if this fails, widen the key.unit field and update this assertion */
@@ -303,6 +304,10 @@ get_sampler_varient( unsigned unit,
key.bits.is_pot = sp_texture->pot;
key.bits.processor = processor;
key.bits.unit = unit;
+ key.bits.swizzle_r = view->swizzle_r;
+ key.bits.swizzle_g = view->swizzle_g;
+ key.bits.swizzle_b = view->swizzle_b;
+ key.bits.swizzle_a = view->swizzle_a;
key.bits.pad = 0;
if (sampler->current &&
@@ -311,14 +316,14 @@ get_sampler_varient( unsigned unit,
}
if (v == NULL) {
- for (v = sampler->varients; v; v = v->next)
+ for (v = sampler->variants; v; v = v->next)
if (v->key.value == key.value)
break;
if (v == NULL) {
- v = sp_create_sampler_varient( &sampler->base, key );
- v->next = sampler->varients;
- sampler->varients = v;
+ v = sp_create_sampler_variant( &sampler->base, key );
+ v->next = sampler->variants;
+ sampler->variants = v;
}
}
@@ -328,7 +333,7 @@ get_sampler_varient( unsigned unit,
void
-softpipe_reset_sampler_varients(struct softpipe_context *softpipe)
+softpipe_reset_sampler_variants(struct softpipe_context *softpipe)
{
int i;
@@ -345,12 +350,13 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe)
}
softpipe->tgsi.vert_samplers_list[i] =
- get_sampler_varient( i,
+ get_sampler_variant( i,
sp_sampler(softpipe->vertex_samplers[i]),
+ softpipe->vertex_sampler_views[i],
texture,
TGSI_PROCESSOR_VERTEX );
- sp_sampler_varient_bind_texture( softpipe->tgsi.vert_samplers_list[i],
+ sp_sampler_variant_bind_texture( softpipe->tgsi.vert_samplers_list[i],
softpipe->vertex_tex_cache[i],
texture );
}
@@ -366,13 +372,14 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe)
}
softpipe->tgsi.geom_samplers_list[i] =
- get_sampler_varient(
+ get_sampler_variant(
i,
sp_sampler(softpipe->geometry_samplers[i]),
+ softpipe->geometry_sampler_views[i],
texture,
TGSI_PROCESSOR_GEOMETRY );
- sp_sampler_varient_bind_texture(
+ sp_sampler_variant_bind_texture(
softpipe->tgsi.geom_samplers_list[i],
softpipe->geometry_tex_cache[i],
texture );
@@ -389,12 +396,13 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe)
}
softpipe->tgsi.frag_samplers_list[i] =
- get_sampler_varient( i,
+ get_sampler_variant( i,
sp_sampler(softpipe->sampler[i]),
+ softpipe->sampler_views[i],
texture,
TGSI_PROCESSOR_FRAGMENT );
- sp_sampler_varient_bind_texture( softpipe->tgsi.frag_samplers_list[i],
+ sp_sampler_variant_bind_texture( softpipe->tgsi.frag_samplers_list[i],
softpipe->tex_cache[i],
texture );
}
@@ -406,11 +414,11 @@ softpipe_delete_sampler_state(struct pipe_context *pipe,
void *sampler)
{
struct sp_sampler *sp_sampler = (struct sp_sampler *)sampler;
- struct sp_sampler_varient *v, *tmp;
+ struct sp_sampler_variant *v, *tmp;
- for (v = sp_sampler->varients; v; v = tmp) {
+ for (v = sp_sampler->variants; v; v = tmp) {
tmp = v->next;
- sp_sampler_varient_destroy(v);
+ sp_sampler_variant_destroy(v);
}
FREE( sampler );
diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c
index 7fff338cce..66ddc56572 100644
--- a/src/gallium/drivers/softpipe/sp_state_shader.c
+++ b/src/gallium/drivers/softpipe/sp_state_shader.c
@@ -78,6 +78,8 @@ softpipe_create_fs_state(struct pipe_context *pipe,
state->origin_lower_left = state->info.properties[i].data[0];
else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_PIXEL_CENTER)
state->pixel_center_integer = state->info.properties[i].data[0];
+ else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS)
+ state->color0_writes_all_cbufs = state->info.properties[i].data[0];
}
return state;
diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c
index 7d8055f2ba..5f4d661abd 100644
--- a/src/gallium/drivers/softpipe/sp_state_vertex.c
+++ b/src/gallium/drivers/softpipe/sp_state_vertex.c
@@ -33,6 +33,7 @@
#include "sp_state.h"
#include "util/u_memory.h"
+#include "util/u_inlines.h"
#include "draw/draw_context.h"
@@ -84,8 +85,9 @@ softpipe_set_vertex_buffers(struct pipe_context *pipe,
assert(count <= PIPE_MAX_ATTRIBS);
- memcpy(softpipe->vertex_buffer, buffers, count * sizeof(buffers[0]));
- softpipe->num_vertex_buffers = count;
+ util_copy_vertex_buffers(softpipe->vertex_buffer,
+ &softpipe->num_vertex_buffers,
+ buffers, count);
softpipe->dirty |= SP_NEW_VERTEX;
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 2eac4c7a82..cbc40d4b44 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -545,7 +545,7 @@ wrap_linear_unorm_clamp_to_edge(const float s[4], unsigned size,
* derivatives w.r.t X and Y, then compute lambda (level of detail).
*/
static float
-compute_lambda_1d(const struct sp_sampler_varient *samp,
+compute_lambda_1d(const struct sp_sampler_variant *samp,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE])
@@ -560,7 +560,7 @@ compute_lambda_1d(const struct sp_sampler_varient *samp,
static float
-compute_lambda_2d(const struct sp_sampler_varient *samp,
+compute_lambda_2d(const struct sp_sampler_variant *samp,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE])
@@ -579,7 +579,7 @@ compute_lambda_2d(const struct sp_sampler_varient *samp,
static float
-compute_lambda_3d(const struct sp_sampler_varient *samp,
+compute_lambda_3d(const struct sp_sampler_variant *samp,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE])
@@ -608,7 +608,7 @@ compute_lambda_3d(const struct sp_sampler_varient *samp,
* Since there aren't derivatives to use, just return 0.
*/
static float
-compute_lambda_vert(const struct sp_sampler_varient *samp,
+compute_lambda_vert(const struct sp_sampler_variant *samp,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE])
@@ -634,7 +634,7 @@ compute_lambda_vert(const struct sp_sampler_varient *samp,
static INLINE const float *
-get_texel_2d_no_border(const struct sp_sampler_varient *samp,
+get_texel_2d_no_border(const struct sp_sampler_variant *samp,
union tex_tile_address addr, int x, int y)
{
const struct softpipe_tex_cached_tile *tile;
@@ -651,7 +651,7 @@ get_texel_2d_no_border(const struct sp_sampler_varient *samp,
static INLINE const float *
-get_texel_2d(const struct sp_sampler_varient *samp,
+get_texel_2d(const struct sp_sampler_variant *samp,
union tex_tile_address addr, int x, int y)
{
const struct pipe_resource *texture = samp->texture;
@@ -659,8 +659,7 @@ get_texel_2d(const struct sp_sampler_varient *samp,
if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
y < 0 || y >= (int) u_minify(texture->height0, level)) {
- return sp_tex_tile_cache_border_color(samp->cache,
- samp->sampler->border_color);
+ return samp->sampler->border_color;
}
else {
return get_texel_2d_no_border( samp, addr, x, y );
@@ -671,7 +670,7 @@ get_texel_2d(const struct sp_sampler_varient *samp,
/* Gather a quad of adjacent texels within a tile:
*/
static INLINE void
-get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_varient *samp,
+get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_variant *samp,
union tex_tile_address addr,
unsigned x, unsigned y,
const float *out[4])
@@ -695,7 +694,7 @@ get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_varient *samp,
/* Gather a quad of potentially non-adjacent texels:
*/
static INLINE void
-get_texel_quad_2d_no_border(const struct sp_sampler_varient *samp,
+get_texel_quad_2d_no_border(const struct sp_sampler_variant *samp,
union tex_tile_address addr,
int x0, int y0,
int x1, int y1,
@@ -710,7 +709,7 @@ get_texel_quad_2d_no_border(const struct sp_sampler_varient *samp,
/* Can involve a lot of unnecessary checks for border color:
*/
static INLINE void
-get_texel_quad_2d(const struct sp_sampler_varient *samp,
+get_texel_quad_2d(const struct sp_sampler_variant *samp,
union tex_tile_address addr,
int x0, int y0,
int x1, int y1,
@@ -724,10 +723,10 @@ get_texel_quad_2d(const struct sp_sampler_varient *samp,
-/* 3d varients:
+/* 3d variants:
*/
static INLINE const float *
-get_texel_3d_no_border(const struct sp_sampler_varient *samp,
+get_texel_3d_no_border(const struct sp_sampler_variant *samp,
union tex_tile_address addr, int x, int y, int z)
{
const struct softpipe_tex_cached_tile *tile;
@@ -745,7 +744,7 @@ get_texel_3d_no_border(const struct sp_sampler_varient *samp,
static INLINE const float *
-get_texel_3d(const struct sp_sampler_varient *samp,
+get_texel_3d(const struct sp_sampler_variant *samp,
union tex_tile_address addr, int x, int y, int z)
{
const struct pipe_resource *texture = samp->texture;
@@ -754,8 +753,7 @@ get_texel_3d(const struct sp_sampler_varient *samp,
if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
y < 0 || y >= (int) u_minify(texture->height0, level) ||
z < 0 || z >= (int) u_minify(texture->depth0, level)) {
- return sp_tex_tile_cache_border_color(samp->cache,
- samp->sampler->border_color);
+ return samp->sampler->border_color;
}
else {
return get_texel_3d_no_border( samp, addr, x, y, z );
@@ -800,7 +798,7 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
unsigned j;
unsigned level = samp->level;
unsigned xpot = pot_level_size(samp->xpot, level);
@@ -863,7 +861,7 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
unsigned j;
unsigned level = samp->level;
unsigned xpot = pot_level_size(samp->xpot, level);
@@ -907,7 +905,7 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
unsigned j;
unsigned level = samp->level;
unsigned xpot = pot_level_size(samp->xpot, level);
@@ -960,7 +958,7 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
const struct pipe_resource *texture = samp->texture;
unsigned level0, j;
int width;
@@ -1000,7 +998,7 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
const struct pipe_resource *texture = samp->texture;
unsigned level0, j;
int width, height;
@@ -1052,7 +1050,7 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
const struct pipe_resource *texture = samp->texture;
const unsigned *faces = samp->faces; /* zero when not cube-mapping */
unsigned level0, j;
@@ -1096,7 +1094,7 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
const struct pipe_resource *texture = samp->texture;
unsigned level0, j;
int width, height, depth;
@@ -1138,7 +1136,7 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
const struct pipe_resource *texture = samp->texture;
unsigned level0, j;
int width;
@@ -1178,7 +1176,7 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
const struct pipe_resource *texture = samp->texture;
unsigned level0, j;
int width, height;
@@ -1225,7 +1223,7 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
const struct pipe_resource *texture = samp->texture;
const unsigned *faces = samp->faces; /* zero when not cube-mapping */
unsigned level0, j;
@@ -1274,7 +1272,7 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
const struct pipe_resource *texture = samp->texture;
unsigned level0, j;
int width, height, depth;
@@ -1350,7 +1348,7 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
const struct pipe_resource *texture = samp->texture;
int level0;
float lambda;
@@ -1417,7 +1415,7 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
const struct pipe_resource *texture = samp->texture;
float lambda;
float lod[QUAD_SIZE];
@@ -1460,7 +1458,7 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
float lambda;
float lod[QUAD_SIZE];
@@ -1501,7 +1499,7 @@ mip_filter_linear_2d_linear_repeat_POT(
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
const struct pipe_resource *texture = samp->texture;
int level0;
float lambda;
@@ -1569,7 +1567,7 @@ sample_compare(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
const struct pipe_sampler_state *sampler = samp->sampler;
int j, k0, k1, k2, k3;
float val;
@@ -1656,7 +1654,7 @@ sample_cube(struct tgsi_sampler *tgsi_sampler,
enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
+ struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
unsigned j;
float ssss[4], tttt[4];
@@ -1731,6 +1729,86 @@ sample_cube(struct tgsi_sampler *tgsi_sampler,
}
+static void
+sample_swizzle(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ float rgba_temp[NUM_CHANNELS][QUAD_SIZE];
+ const unsigned swizzle_r = samp->key.bits.swizzle_r;
+ const unsigned swizzle_g = samp->key.bits.swizzle_g;
+ const unsigned swizzle_b = samp->key.bits.swizzle_b;
+ const unsigned swizzle_a = samp->key.bits.swizzle_a;
+ unsigned j;
+
+ samp->sample_target(tgsi_sampler, s, t, p, c0, control, rgba_temp);
+
+ switch (swizzle_r) {
+ case PIPE_SWIZZLE_ZERO:
+ for (j = 0; j < 4; j++)
+ rgba[0][j] = 0.0f;
+ break;
+ case PIPE_SWIZZLE_ONE:
+ for (j = 0; j < 4; j++)
+ rgba[0][j] = 1.0f;
+ break;
+ default:
+ assert(swizzle_r < 4);
+ for (j = 0; j < 4; j++)
+ rgba[0][j] = rgba_temp[swizzle_r][j];
+ }
+
+ switch (swizzle_g) {
+ case PIPE_SWIZZLE_ZERO:
+ for (j = 0; j < 4; j++)
+ rgba[1][j] = 0.0f;
+ break;
+ case PIPE_SWIZZLE_ONE:
+ for (j = 0; j < 4; j++)
+ rgba[1][j] = 1.0f;
+ break;
+ default:
+ assert(swizzle_g < 4);
+ for (j = 0; j < 4; j++)
+ rgba[1][j] = rgba_temp[swizzle_g][j];
+ }
+
+ switch (swizzle_b) {
+ case PIPE_SWIZZLE_ZERO:
+ for (j = 0; j < 4; j++)
+ rgba[2][j] = 0.0f;
+ break;
+ case PIPE_SWIZZLE_ONE:
+ for (j = 0; j < 4; j++)
+ rgba[2][j] = 1.0f;
+ break;
+ default:
+ assert(swizzle_b < 4);
+ for (j = 0; j < 4; j++)
+ rgba[2][j] = rgba_temp[swizzle_b][j];
+ }
+
+ switch (swizzle_a) {
+ case PIPE_SWIZZLE_ZERO:
+ for (j = 0; j < 4; j++)
+ rgba[3][j] = 0.0f;
+ break;
+ case PIPE_SWIZZLE_ONE:
+ for (j = 0; j < 4; j++)
+ rgba[3][j] = 1.0f;
+ break;
+ default:
+ assert(swizzle_a < 4);
+ for (j = 0; j < 4; j++)
+ rgba[3][j] = rgba_temp[swizzle_a][j];
+ }
+}
+
static wrap_nearest_func
get_nearest_unorm_wrap(unsigned mode)
@@ -1909,10 +1987,10 @@ get_img_filter(const union sp_sampler_key key,
/**
- * Bind the given texture object and texture cache to the sampler varient.
+ * Bind the given texture object and texture cache to the sampler variant.
*/
void
-sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp,
+sp_sampler_variant_bind_texture( struct sp_sampler_variant *samp,
struct softpipe_tex_tile_cache *tex_cache,
const struct pipe_resource *texture )
{
@@ -1927,20 +2005,20 @@ sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp,
void
-sp_sampler_varient_destroy( struct sp_sampler_varient *samp )
+sp_sampler_variant_destroy( struct sp_sampler_variant *samp )
{
FREE(samp);
}
/**
- * Create a sampler varient for a given set of non-orthogonal state.
+ * Create a sampler variant for a given set of non-orthogonal state.
*/
-struct sp_sampler_varient *
-sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
+struct sp_sampler_variant *
+sp_create_sampler_variant( const struct pipe_sampler_state *sampler,
const union sp_sampler_key key )
{
- struct sp_sampler_varient *samp = CALLOC_STRUCT(sp_sampler_varient);
+ struct sp_sampler_variant *samp = CALLOC_STRUCT(sp_sampler_variant);
if (!samp)
return NULL;
@@ -2015,7 +2093,7 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
}
if (key.bits.target == PIPE_TEXTURE_CUBE) {
- samp->base.get_samples = sample_cube;
+ samp->sample_target = sample_cube;
}
else {
samp->faces[0] = 0;
@@ -2026,7 +2104,17 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
/* Skip cube face determination by promoting the compare
* function pointer:
*/
- samp->base.get_samples = samp->compare;
+ samp->sample_target = samp->compare;
+ }
+
+ if (key.bits.swizzle_r != PIPE_SWIZZLE_RED ||
+ key.bits.swizzle_g != PIPE_SWIZZLE_GREEN ||
+ key.bits.swizzle_b != PIPE_SWIZZLE_BLUE ||
+ key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA) {
+ samp->base.get_samples = sample_swizzle;
+ }
+ else {
+ samp->base.get_samples = samp->sample_target;
}
return samp;
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index 6114acf737..ed99006ab0 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -32,7 +32,7 @@
#include "tgsi/tgsi_exec.h"
-struct sp_sampler_varient;
+struct sp_sampler_variant;
typedef void (*wrap_nearest_func)(const float s[4],
unsigned size,
@@ -44,7 +44,7 @@ typedef void (*wrap_linear_func)(const float s[4],
int icoord1[4],
float w[4]);
-typedef float (*compute_lambda_func)(const struct sp_sampler_varient *sampler,
+typedef float (*compute_lambda_func)(const struct sp_sampler_variant *sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE]);
@@ -64,7 +64,11 @@ union sp_sampler_key {
unsigned is_pot:1;
unsigned processor:2;
unsigned unit:4;
- unsigned pad:22;
+ unsigned swizzle_r:3;
+ unsigned swizzle_g:3;
+ unsigned swizzle_b:3;
+ unsigned swizzle_a:3;
+ unsigned pad:10;
} bits;
unsigned value;
};
@@ -72,7 +76,7 @@ union sp_sampler_key {
/**
* Subclass of tgsi_sampler
*/
-struct sp_sampler_varient
+struct sp_sampler_variant
{
struct tgsi_sampler base; /**< base class */
@@ -113,32 +117,33 @@ struct sp_sampler_varient
filter_func mip_filter;
filter_func compare;
+ filter_func sample_target;
/* Linked list:
*/
- struct sp_sampler_varient *next;
+ struct sp_sampler_variant *next;
};
struct sp_sampler;
-/* Create a sampler varient for a given set of non-orthogonal state. Currently the
+/* Create a sampler variant for a given set of non-orthogonal state. Currently the
*/
-struct sp_sampler_varient *
-sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
+struct sp_sampler_variant *
+sp_create_sampler_variant( const struct pipe_sampler_state *sampler,
const union sp_sampler_key key );
-void sp_sampler_varient_bind_texture( struct sp_sampler_varient *varient,
+void sp_sampler_variant_bind_texture( struct sp_sampler_variant *variant,
struct softpipe_tex_tile_cache *tex_cache,
const struct pipe_resource *tex );
-void sp_sampler_varient_destroy( struct sp_sampler_varient * );
+void sp_sampler_variant_destroy( struct sp_sampler_variant * );
-static INLINE struct sp_sampler_varient *
-sp_sampler_varient(const struct tgsi_sampler *sampler)
+static INLINE struct sp_sampler_variant *
+sp_sampler_variant(const struct tgsi_sampler *sampler)
{
- return (struct sp_sampler_varient *) sampler;
+ return (struct sp_sampler_variant *) sampler;
}
extern void
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
index e817c0c8cf..e42015ad49 100644
--- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
@@ -48,6 +48,9 @@ sp_create_tex_tile_cache( struct pipe_context *pipe )
struct softpipe_tex_tile_cache *tc;
uint pos;
+ /* make sure max texture size works */
+ assert((TILE_SIZE << TEX_ADDR_BITS) >= (1 << (SP_MAX_TEXTURE_2D_LEVELS-1)));
+
tc = CALLOC_STRUCT( softpipe_tex_tile_cache );
if (tc) {
tc->pipe = pipe;
@@ -260,15 +263,14 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
}
tc->tex_trans =
- pipe_get_transfer(tc->pipe, tc->texture,
- addr.bits.face,
- addr.bits.level,
- addr.bits.z,
- PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED,
- 0, 0,
- u_minify(tc->texture->width0, addr.bits.level),
- u_minify(tc->texture->height0, addr.bits.level));
-
+ pipe_get_transfer(tc->pipe, tc->texture,
+ addr.bits.level,
+ addr.bits.face + addr.bits.z,
+ PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED,
+ 0, 0,
+ u_minify(tc->texture->width0, addr.bits.level),
+ u_minify(tc->texture->height0, addr.bits.level));
+
tc->tex_trans_map = tc->pipe->transfer_map(tc->pipe, tc->tex_trans);
tc->tex_face = addr.bits.face;
@@ -276,45 +278,26 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
tc->tex_z = addr.bits.z;
}
- /* get tile from the transfer (view into texture) */
+ /* get tile from the transfer (view into texture)
+ * Note we're using the swizzle version of this fuction only because
+ * we need to pass the texture cache's format explicitly.
+ */
pipe_get_tile_swizzle(tc->pipe,
tc->tex_trans,
addr.bits.x * TILE_SIZE,
addr.bits.y * TILE_SIZE,
TILE_SIZE,
TILE_SIZE,
- tc->swizzle_r,
- tc->swizzle_g,
- tc->swizzle_b,
- tc->swizzle_a,
+ PIPE_SWIZZLE_RED,
+ PIPE_SWIZZLE_GREEN,
+ PIPE_SWIZZLE_BLUE,
+ PIPE_SWIZZLE_ALPHA,
tc->format,
(float *) tile->data.color);
+
tile->addr = addr;
}
tc->last_tile = tile;
return tile;
}
-
-
-
-/**
- * Return the swizzled border color.
- */
-const float *
-sp_tex_tile_cache_border_color(struct softpipe_tex_tile_cache *tc,
- const float border_color[4])
-{
- float rgba01[6];
-
- COPY_4V(rgba01, border_color);
- rgba01[PIPE_SWIZZLE_ZERO] = 0.0f;
- rgba01[PIPE_SWIZZLE_ONE] = 1.0f;
-
- tc->swz_border_color[0] = rgba01[tc->swizzle_r];
- tc->swz_border_color[1] = rgba01[tc->swizzle_g];
- tc->swz_border_color[2] = rgba01[tc->swizzle_b];
- tc->swz_border_color[3] = rgba01[tc->swizzle_a];
-
- return tc->swz_border_color;
-}
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
index 05f25133da..2220955b71 100644
--- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
@@ -30,6 +30,7 @@
#include "pipe/p_compiler.h"
+#include "sp_limits.h"
struct softpipe_context;
@@ -39,22 +40,26 @@ struct softpipe_tex_tile_cache;
/**
* Cache tile size (width and height). This needs to be a power of two.
*/
-#define TILE_SIZE 64
+#define TILE_SIZE_LOG2 6
+#define TILE_SIZE (1 << TILE_SIZE_LOG2)
-/* If we need to support > 4096, just expand this to be a 64 bit
- * union, or consider tiling in Z as well.
+#define TEX_ADDR_BITS (SP_MAX_TEXTURE_2D_LEVELS - 1 - TILE_SIZE_LOG2)
+#define TEX_Z_BITS (SP_MAX_TEXTURE_2D_LEVELS - 1)
+
+/**
+ * Texture tile address as a union for fast compares.
*/
union tex_tile_address {
struct {
- unsigned x:6; /* 4096 / TILE_SIZE */
- unsigned y:6; /* 4096 / TILE_SIZE */
- unsigned z:12; /* 4096 -- z not tiled */
+ unsigned x:TEX_ADDR_BITS; /* 16K / TILE_SIZE */
+ unsigned y:TEX_ADDR_BITS; /* 16K / TILE_SIZE */
+ unsigned z:TEX_Z_BITS; /* 16K -- z not tiled */
unsigned face:3;
unsigned level:4;
unsigned invalid:1;
} bits;
- unsigned value;
+ uint64_t value;
};
@@ -90,8 +95,6 @@ struct softpipe_tex_tile_cache
unsigned format;
struct softpipe_tex_cached_tile *last_tile; /**< most recently retrieved tile */
-
- float swz_border_color[4]; /**< swizzled border color */
};
@@ -126,10 +129,10 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
static INLINE union tex_tile_address
tex_tile_address( unsigned x,
- unsigned y,
- unsigned z,
- unsigned face,
- unsigned level )
+ unsigned y,
+ unsigned z,
+ unsigned face,
+ unsigned level )
{
union tex_tile_address addr;
@@ -139,7 +142,7 @@ tex_tile_address( unsigned x,
addr.bits.z = z;
addr.bits.face = face;
addr.bits.level = level;
-
+
return addr;
}
@@ -156,10 +159,5 @@ sp_get_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
}
-const float *
-sp_tex_tile_cache_border_color(struct softpipe_tex_tile_cache *tc,
- const float border_color[4]);
-
-
#endif /* SP_TEX_TILE_CACHE_H */
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index 4e6123fbd0..509d9982b1 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -220,23 +220,18 @@ softpipe_resource_get_handle(struct pipe_screen *screen,
*/
static unsigned
sp_get_tex_image_offset(const struct softpipe_resource *spr,
- unsigned level, unsigned face, unsigned zslice)
+ unsigned level, unsigned layer)
{
const unsigned hgt = u_minify(spr->base.height0, level);
const unsigned nblocksy = util_format_get_nblocksy(spr->base.format, hgt);
unsigned offset = spr->level_offset[level];
- if (spr->base.target == PIPE_TEXTURE_CUBE) {
- assert(zslice == 0);
- offset += face * nblocksy * spr->stride[level];
- }
- else if (spr->base.target == PIPE_TEXTURE_3D) {
- assert(face == 0);
- offset += zslice * nblocksy * spr->stride[level];
+ if (spr->base.target == PIPE_TEXTURE_CUBE ||
+ spr->base.target == PIPE_TEXTURE_3D) {
+ offset += layer * nblocksy * spr->stride[level];
}
else {
- assert(face == 0);
- assert(zslice == 0);
+ assert(layer == 0);
}
return offset;
@@ -247,39 +242,40 @@ sp_get_tex_image_offset(const struct softpipe_resource *spr,
* Get a pipe_surface "view" into a texture resource.
*/
static struct pipe_surface *
-softpipe_get_tex_surface(struct pipe_screen *screen,
- struct pipe_resource *pt,
- unsigned face, unsigned level, unsigned zslice,
- unsigned usage)
+softpipe_create_surface(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_surface *surf_tmpl)
{
- struct softpipe_resource *spr = softpipe_resource(pt);
struct pipe_surface *ps;
+ unsigned level = surf_tmpl->u.tex.level;
assert(level <= pt->last_level);
+ assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
ps = CALLOC_STRUCT(pipe_surface);
if (ps) {
pipe_reference_init(&ps->reference, 1);
pipe_resource_reference(&ps->texture, pt);
- ps->format = pt->format;
+ ps->context = pipe;
+ ps->format = surf_tmpl->format;
ps->width = u_minify(pt->width0, level);
ps->height = u_minify(pt->height0, level);
- ps->offset = sp_get_tex_image_offset(spr, level, face, zslice);
- ps->usage = usage;
+ ps->usage = surf_tmpl->usage;
- ps->face = face;
- ps->level = level;
- ps->zslice = zslice;
+ ps->u.tex.level = level;
+ ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+ ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
}
return ps;
}
/**
- * Free a pipe_surface which was created with softpipe_get_tex_surface().
+ * Free a pipe_surface which was created with softpipe_create_surface().
*/
static void
-softpipe_tex_surface_destroy(struct pipe_surface *surf)
+softpipe_surface_destroy(struct pipe_context *pipe,
+ struct pipe_surface *surf)
{
/* Effectively do the texture_update work here - if texture images
* needed post-processing to put them into hardware layout, this is
@@ -302,21 +298,21 @@ softpipe_tex_surface_destroy(struct pipe_surface *surf)
*/
static struct pipe_transfer *
softpipe_get_transfer(struct pipe_context *pipe,
- struct pipe_resource *resource,
- struct pipe_subresource sr,
- unsigned usage,
- const struct pipe_box *box)
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
{
struct softpipe_resource *spr = softpipe_resource(resource);
struct softpipe_transfer *spt;
assert(resource);
- assert(sr.level <= resource->last_level);
+ assert(level <= resource->last_level);
/* make sure the requested region is in the image bounds */
- assert(box->x + box->width <= u_minify(resource->width0, sr.level));
- assert(box->y + box->height <= u_minify(resource->height0, sr.level));
- assert(box->z + box->depth <= u_minify(resource->depth0, sr.level));
+ assert(box->x + box->width <= u_minify(resource->width0, level));
+ assert(box->y + box->height <= u_minify(resource->height0, level));
+ assert(box->z + box->depth <= (u_minify(resource->depth0, level) + resource->array_size - 1));
/*
* Transfers, like other pipe operations, must happen in order, so flush the
@@ -326,7 +322,7 @@ softpipe_get_transfer(struct pipe_context *pipe,
boolean read_only = !(usage & PIPE_TRANSFER_WRITE);
boolean do_not_block = !!(usage & PIPE_TRANSFER_DONTBLOCK);
if (!softpipe_flush_resource(pipe, resource,
- sr.face, sr.level,
+ level, box->depth > 1 ? -1 : box->z,
0, /* flush_flags */
read_only,
TRUE, /* cpu_access */
@@ -343,21 +339,21 @@ softpipe_get_transfer(struct pipe_context *pipe,
if (spt) {
struct pipe_transfer *pt = &spt->base;
enum pipe_format format = resource->format;
- const unsigned hgt = u_minify(spr->base.height0, sr.level);
+ const unsigned hgt = u_minify(spr->base.height0, level);
const unsigned nblocksy = util_format_get_nblocksy(format, hgt);
pipe_resource_reference(&pt->resource, resource);
- pt->sr = sr;
+ pt->level = level;
pt->usage = usage;
pt->box = *box;
- pt->stride = spr->stride[sr.level];
- pt->slice_stride = pt->stride * nblocksy;
+ pt->stride = spr->stride[level];
+ pt->layer_stride = pt->stride * nblocksy;
- spt->offset = sp_get_tex_image_offset(spr, sr.level, sr.face, box->z);
+ spt->offset = sp_get_tex_image_offset(spr, level, box->z);
spt->offset +=
- box->y / util_format_get_blockheight(format) * spt->base.stride +
- box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
+ box->y / util_format_get_blockheight(format) * spt->base.stride +
+ box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
return pt;
}
@@ -454,6 +450,7 @@ softpipe_user_buffer_create(struct pipe_screen *screen,
spr->base.width0 = bytes;
spr->base.height0 = 1;
spr->base.depth0 = 1;
+ spr->base.array_size = 1;
spr->userBuffer = TRUE;
spr->data = ptr;
@@ -471,6 +468,9 @@ softpipe_init_texture_funcs(struct pipe_context *pipe)
pipe->transfer_flush_region = u_default_transfer_flush_region;
pipe->transfer_inline_write = u_default_transfer_inline_write;
+
+ pipe->create_surface = softpipe_create_surface;
+ pipe->surface_destroy = softpipe_surface_destroy;
}
@@ -483,6 +483,4 @@ softpipe_init_screen_texture_funcs(struct pipe_screen *screen)
screen->resource_get_handle = softpipe_resource_get_handle;
screen->user_buffer_create = softpipe_user_buffer_create;
- screen->get_tex_surface = softpipe_get_tex_surface;
- screen->tex_surface_destroy = softpipe_tex_surface_destroy;
}
diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h
index 6b205dc532..5603110eeb 100644
--- a/src/gallium/drivers/softpipe/sp_texture.h
+++ b/src/gallium/drivers/softpipe/sp_texture.h
@@ -30,10 +30,7 @@
#include "pipe/p_state.h"
-
-
-#define SP_MAX_TEXTURE_2D_LEVELS 13 /* 4K x 4K */
-#define SP_MAX_TEXTURE_3D_LEVELS 9 /* 512 x 512 x 512 */
+#include "sp_limits.h"
struct pipe_context;
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index aa76b8aa1e..480860af63 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -92,6 +92,10 @@ sp_create_tile_cache( struct pipe_context *pipe )
maxTexSize = 1 << (maxLevels - 1);
assert(MAX_WIDTH >= maxTexSize);
+ assert(sizeof(union tile_address) == 4);
+
+ assert((TILE_SIZE << TILE_ADDR_BITS) >= MAX_WIDTH);
+
tc = CALLOC_STRUCT( softpipe_tile_cache );
if (tc) {
tc->pipe = pipe;
@@ -170,11 +174,11 @@ sp_tile_cache_set_surface(struct softpipe_tile_cache *tc,
tc->surface = ps;
if (ps) {
- tc->transfer = pipe_get_transfer(pipe, ps->texture, ps->face,
- ps->level, ps->zslice,
- PIPE_TRANSFER_READ_WRITE |
- PIPE_TRANSFER_UNSYNCHRONIZED,
- 0, 0, ps->width, ps->height);
+ tc->transfer = pipe_get_transfer(pipe, ps->texture,
+ ps->u.tex.level, ps->u.tex.first_layer,
+ PIPE_TRANSFER_READ_WRITE |
+ PIPE_TRANSFER_UNSYNCHRONIZED,
+ 0, 0, ps->width, ps->height);
tc->depth_stencil = (ps->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED ||
ps->format == PIPE_FORMAT_Z24X8_UNORM ||
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h
index 4151a47c32..68140b1d2f 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.h
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.h
@@ -30,6 +30,7 @@
#include "pipe/p_compiler.h"
+#include "sp_texture.h"
struct softpipe_tile_cache;
@@ -38,18 +39,22 @@ struct softpipe_tile_cache;
/**
* Cache tile size (width and height). This needs to be a power of two.
*/
-#define TILE_SIZE 64
+#define TILE_SIZE_LOG2 6
+#define TILE_SIZE (1 << TILE_SIZE_LOG2)
-/* If we need to support > 4096, just expand this to be a 64 bit
- * union, or consider tiling in Z as well.
+#define TILE_ADDR_BITS (SP_MAX_TEXTURE_2D_LEVELS - 1 - TILE_SIZE_LOG2)
+
+
+/**
+ * Surface tile address as a union for fast compares.
*/
union tile_address {
struct {
- unsigned x:6; /* 4096 / TILE_SIZE */
- unsigned y:6; /* 4096 / TILE_SIZE */
+ unsigned x:TILE_ADDR_BITS; /* 16K / TILE_SIZE */
+ unsigned y:TILE_ADDR_BITS; /* 16K / TILE_SIZE */
unsigned invalid:1;
- unsigned pad:19;
+ unsigned pad:15;
} bits;
unsigned value;
};
@@ -70,11 +75,6 @@ struct softpipe_cached_tile
#define NUM_ENTRIES 50
-/** XXX move these */
-#define MAX_WIDTH 4096
-#define MAX_HEIGHT 4096
-
-
struct softpipe_tile_cache
{
struct pipe_context *pipe;
diff --git a/src/gallium/drivers/svga/SConscript b/src/gallium/drivers/svga/SConscript
index 12ce4732d1..5455ed0757 100644
--- a/src/gallium/drivers/svga/SConscript
+++ b/src/gallium/drivers/svga/SConscript
@@ -73,4 +73,6 @@ svga = env.ConvenienceLibrary(
source = sources,
)
+env.Alias('svga', svga)
+
Export('svga')
diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c
index e975f3b02f..05eab8a517 100644
--- a/src/gallium/drivers/svga/svga_cmd.c
+++ b/src/gallium/drivers/svga/svga_cmd.c
@@ -455,8 +455,8 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,
cmd->guest.pitch = st->base.stride;
swc->surface_relocation(swc, &cmd->host.sid, texture->handle, surface_flags);
- cmd->host.face = st->base.sr.face; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */
- cmd->host.mipmap = st->base.sr.level;
+ cmd->host.face = st->face; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */
+ cmd->host.mipmap = st->base.level;
cmd->transfer = transfer;
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
index cd3f6b8982..1e513f1039 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -109,6 +109,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen,
svga_init_vertex_functions(svga);
svga_init_constbuffer_functions(svga);
svga_init_query_functions(svga);
+ svga_init_surface_functions(svga);
/* debug */
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index 1fb5a04887..d4970908b1 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -288,6 +288,11 @@ struct svga_sw_state
boolean need_swvfetch;
boolean need_pipeline;
boolean need_swtnl;
+
+ /* Flag to make sure that need sw is on while
+ * updating state within a swtnl call.
+ */
+ boolean in_swtnl_draw;
};
@@ -422,6 +427,7 @@ void svga_init_vertex_functions( struct svga_context *svga );
void svga_init_constbuffer_functions( struct svga_context *svga );
void svga_init_draw_functions( struct svga_context *svga );
void svga_init_query_functions( struct svga_context *svga );
+void svga_init_surface_functions(struct svga_context *svga);
void svga_cleanup_vertex_state( struct svga_context *svga );
void svga_cleanup_tss_binding( struct svga_context *svga );
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index 81dd4778d0..97cbac447d 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -315,7 +315,6 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
break;
}
- assert(!stride || width <= stride);
if (max_index != ~0) {
assert(offset + (index_bias + max_index) * stride + width <= size);
}
diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c
index da33fae62f..be0e7abe21 100644
--- a/src/gallium/drivers/svga/svga_draw_arrays.c
+++ b/src/gallium/drivers/svga/svga_draw_arrays.c
@@ -65,14 +65,14 @@ static enum pipe_error generate_indices( struct svga_hwtnl *hwtnl,
generate( nr,
dst_map );
- pipe_buffer_unmap( pipe, dst, transfer );
+ pipe_buffer_unmap( pipe, transfer );
*out_buf = dst;
return PIPE_OK;
fail:
if (dst_map)
- pipe_buffer_unmap( pipe, dst, transfer );
+ pipe_buffer_unmap( pipe, transfer );
if (dst)
pipe->screen->resource_destroy( pipe->screen, dst );
diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c
index c4579177b7..83527c6ef4 100644
--- a/src/gallium/drivers/svga/svga_draw_elements.c
+++ b/src/gallium/drivers/svga/svga_draw_elements.c
@@ -72,18 +72,18 @@ translate_indices( struct svga_hwtnl *hwtnl,
nr,
dst_map );
- pipe_buffer_unmap( pipe, src, src_transfer );
- pipe_buffer_unmap( pipe, dst, dst_transfer );
+ pipe_buffer_unmap( pipe, src_transfer );
+ pipe_buffer_unmap( pipe, dst_transfer );
*out_buf = dst;
return PIPE_OK;
fail:
if (src_map)
- pipe_buffer_unmap( pipe, src, src_transfer );
+ pipe_buffer_unmap( pipe, src_transfer );
if (dst_map)
- pipe_buffer_unmap( pipe, dst, dst_transfer );
+ pipe_buffer_unmap( pipe, dst_transfer );
if (dst)
pipe->screen->resource_destroy( pipe->screen, dst );
@@ -120,14 +120,17 @@ svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
if (index_buffer &&
svga_buffer_is_user_buffer(index_buffer))
{
+ boolean flushed;
assert( index_buffer->width0 >= index_offset + count * index_size );
ret = u_upload_buffer( hwtnl->upload_ib,
+ 0,
index_offset,
count * index_size,
index_buffer,
&index_offset,
- &upload_buffer );
+ &upload_buffer,
+ &flushed );
if (ret)
goto done;
diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c
index ca036a6463..426698806c 100644
--- a/src/gallium/drivers/svga/svga_pipe_blit.c
+++ b/src/gallium/drivers/svga/svga_pipe_blit.c
@@ -32,37 +32,40 @@
#define FILE_DEBUG_FLAG DEBUG_BLIT
-/* XXX I got my doubts about this, should maybe use svga_texture_copy_handle directly? */
+/* XXX still have doubts about this... */
static void svga_surface_copy(struct pipe_context *pipe,
struct pipe_resource* dst_tex,
- struct pipe_subresource subdst,
+ unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct pipe_resource* src_tex,
- struct pipe_subresource subsrc,
- unsigned srcx, unsigned srcy, unsigned srcz,
- unsigned width, unsigned height)
-{
+ unsigned src_level,
+ const struct pipe_box *src_box)
+ {
struct svga_context *svga = svga_context(pipe);
- struct pipe_screen *screen = pipe->screen;
+ struct svga_texture *stex = svga_texture(src_tex);
+ struct svga_texture *dtex = svga_texture(dst_tex);
+/* struct pipe_screen *screen = pipe->screen;
SVGA3dCopyBox *box;
enum pipe_error ret;
- struct pipe_surface *srcsurf, *dstsurf;
+ struct pipe_surface *srcsurf, *dstsurf;*/
+ unsigned dst_face, dst_z, src_face, src_z;
svga_hwtnl_flush_retry( svga );
+#if 0
srcsurf = screen->get_tex_surface(screen, src_tex,
- subsrc.face, subsrc.level, srcz,
+ src_level, src_box->z, src_box->z,
PIPE_BIND_SAMPLER_VIEW);
dstsurf = screen->get_tex_surface(screen, dst_tex,
- subdst.face, subdst.level, dstz,
+ dst_level, dst_box->z, dst_box->z,
PIPE_BIND_RENDER_TARGET);
SVGA_DBG(DEBUG_DMA, "blit to sid %p (%d,%d), from sid %p (%d,%d) sz %dx%d\n",
svga_surface(dstsurf)->handle,
dstx, dsty,
svga_surface(srcsurf)->handle,
- srcx, srcy,
+ src_box->x, src_box->y,
width, height);
ret = SVGA3D_BeginSurfaceCopy(svga->swc,
@@ -88,8 +91,8 @@ static void svga_surface_copy(struct pipe_context *pipe,
box->w = width;
box->h = height;
box->d = 1;
- box->srcx = srcx;
- box->srcy = srcy;
+ box->srcx = src_box->x;
+ box->srcy = src_box->y;
box->srcz = 0;
SVGA_FIFOCommitAll(svga->swc);
@@ -100,6 +103,37 @@ static void svga_surface_copy(struct pipe_context *pipe,
pipe_surface_reference(&srcsurf, NULL);
pipe_surface_reference(&dstsurf, NULL);
+#else
+ if (src_tex->target == PIPE_TEXTURE_CUBE) {
+ src_face = src_box->z;
+ src_z = 0;
+ assert(src_box->depth == 1);
+ }
+ else {
+ src_face = 0;
+ src_z = src_box->z;
+ }
+ /* different src/dst type???*/
+ if (dst_tex->target == PIPE_TEXTURE_CUBE) {
+ dst_face = dstz;
+ dst_z = 0;
+ assert(src_box->depth == 1);
+ }
+ else {
+ dst_face = 0;
+ dst_z = dstz;
+ }
+ svga_texture_copy_handle(svga,
+ stex->handle,
+ src_box->x, src_box->y, src_z,
+ src_level, src_face,
+ dtex->handle,
+ dstx, dsty, dst_z,
+ dst_level, dst_face,
+ src_box->width, src_box->height, src_box->depth);
+
+#endif
+
}
diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
index 660eb0757a..e97b4e5741 100644
--- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c
+++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
@@ -68,7 +68,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
/* need this for draw module. */
rast->templ = *templ;
- /* light_twoside - XXX: need fragment shader varient */
+ /* light_twoside - XXX: need fragment shader variant */
/* poly_smooth - XXX: no fallback available */
/* poly_stipple_enable - draw module */
/* sprite_coord_enable - ? */
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c
index 198d401332..f12e2b6862 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -53,8 +53,8 @@ svga_buffer_needs_hw_storage(unsigned usage)
static unsigned int
svga_buffer_is_referenced( struct pipe_context *pipe,
- struct pipe_resource *buf,
- unsigned face, unsigned level)
+ struct pipe_resource *buf,
+ unsigned level, int layer)
{
struct svga_screen *ss = svga_screen(pipe->screen);
struct svga_buffer *sbuf = svga_buffer(buf);
@@ -337,6 +337,7 @@ svga_user_buffer_create(struct pipe_screen *screen,
sbuf->b.b.width0 = bytes;
sbuf->b.b.height0 = 1;
sbuf->b.b.depth0 = 1;
+ sbuf->b.b.array_size = 1;
sbuf->swbuf = ptr;
sbuf->user = TRUE;
diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c
index 26eb03a895..7c9e600b9f 100644
--- a/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/src/gallium/drivers/svga/svga_resource_texture.c
@@ -50,8 +50,8 @@
static unsigned int
svga_texture_is_referenced( struct pipe_context *pipe,
- struct pipe_resource *texture,
- unsigned face, unsigned level)
+ struct pipe_resource *texture,
+ unsigned level, int layer)
{
struct svga_texture *tex = svga_texture(texture);
struct svga_screen *ss = svga_screen(pipe->screen);
@@ -171,20 +171,7 @@ svga_transfer_dma_band(struct svga_context *svga,
struct svga_texture *texture = svga_texture(st->base.resource);
SVGA3dCopyBox box;
enum pipe_error ret;
-
- SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - (%u, %u, %u), %ubpp\n",
- transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from",
- texture->handle,
- st->base.sr.face,
- st->base.box.x,
- y,
- st->base.box.z,
- st->base.box.x + st->base.box.width,
- y + h,
- st->base.box.z + 1,
- util_format_get_blocksize(texture->b.b.format) * 8 /
- (util_format_get_blockwidth(texture->b.b.format)*util_format_get_blockheight(texture->b.b.format)));
-
+
box.x = st->base.box.x;
box.y = y;
box.z = st->base.box.z;
@@ -195,6 +182,26 @@ svga_transfer_dma_band(struct svga_context *svga,
box.srcy = srcy;
box.srcz = 0;
+ if (st->base.resource->target == PIPE_TEXTURE_CUBE) {
+ st->face = st->base.box.z;
+ box.z = 0;
+ }
+ else
+ st->face = 0;
+
+ SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - (%u, %u, %u), %ubpp\n",
+ transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from",
+ texture->handle,
+ st->face,
+ st->base.box.x,
+ y,
+ box.z,
+ st->base.box.x + st->base.box.width,
+ y + h,
+ box.z + 1,
+ util_format_get_blocksize(texture->b.b.format) * 8 /
+ (util_format_get_blockwidth(texture->b.b.format)*util_format_get_blockheight(texture->b.b.format)));
+
ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1);
if(ret != PIPE_OK) {
svga->swc->flush(svga->swc, NULL);
@@ -213,7 +220,7 @@ svga_transfer_dma(struct svga_context *svga,
struct svga_screen *screen = svga_screen(texture->b.b.screen);
struct svga_winsys_screen *sws = screen->sws;
struct pipe_fence_handle *fence = NULL;
-
+
if (transfer == SVGA3D_READ_HOST_VRAM) {
SVGA_DBG(DEBUG_PERF, "%s: readback transfer\n", __FUNCTION__);
}
@@ -221,7 +228,7 @@ svga_transfer_dma(struct svga_context *svga,
if(!st->swbuf) {
/* Do the DMA transfer in a single go */
-
+
svga_transfer_dma_band(svga, st, transfer, st->base.box.y, st->base.box.height, 0);
if(transfer == SVGA3D_READ_HOST_VRAM) {
@@ -245,12 +252,12 @@ svga_transfer_dma(struct svga_context *svga,
/* Transfer band must be aligned to pixel block boundaries */
assert(y % blockheight == 0);
assert(h % blockheight == 0);
-
+
offset = y * st->base.stride / blockheight;
length = h * st->base.stride / blockheight;
sw = (uint8_t *)st->swbuf + offset;
-
+
if(transfer == SVGA3D_WRITE_HOST_VRAM) {
/* Wait for the previous DMAs to complete */
/* TODO: keep one DMA (at half the size) in the background */
@@ -267,9 +274,9 @@ svga_transfer_dma(struct svga_context *svga,
sws->buffer_unmap(sws, st->hwbuf);
}
}
-
+
svga_transfer_dma_band(svga, st, transfer, y, h, srcy);
-
+
if(transfer == SVGA3D_READ_HOST_VRAM) {
svga_context_flush(svga, &fence);
sws->fence_finish(sws, fence, 0);
@@ -336,10 +343,10 @@ svga_texture_destroy(struct pipe_screen *screen,
*/
static struct pipe_transfer *
svga_texture_get_transfer(struct pipe_context *pipe,
- struct pipe_resource *texture,
- struct pipe_subresource sr,
- unsigned usage,
- const struct pipe_box *box)
+ struct pipe_resource *texture,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
{
struct svga_context *svga = svga_context(pipe);
struct svga_screen *ss = svga_screen(pipe->screen);
@@ -352,19 +359,20 @@ svga_texture_get_transfer(struct pipe_context *pipe,
if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
return NULL;
+ assert(box->depth == 1);
st = CALLOC_STRUCT(svga_transfer);
if (!st)
return NULL;
-
+
pipe_resource_reference(&st->base.resource, texture);
- st->base.sr = sr;
+ st->base.level = level;
st->base.usage = usage;
st->base.box = *box;
st->base.stride = nblocksx*util_format_get_blocksize(texture->format);
- st->base.slice_stride = 0;
+ st->base.layer_stride = 0;
st->hw_nblocksy = nblocksy;
-
+
st->hwbuf = svga_winsys_buffer_create(svga,
1,
0,
@@ -391,7 +399,7 @@ svga_texture_get_transfer(struct pipe_context *pipe,
if(!st->swbuf)
goto no_swbuf;
}
-
+
if (usage & PIPE_TRANSFER_READ)
svga_transfer_dma(svga, st, SVGA3D_READ_HOST_VRAM);
@@ -454,8 +462,11 @@ svga_texture_transfer_destroy(struct pipe_context *pipe,
if (st->base.usage & PIPE_TRANSFER_WRITE) {
svga_transfer_dma(svga, st, SVGA3D_WRITE_HOST_VRAM);
ss->texture_timestamp++;
- tex->view_age[transfer->sr.level] = ++(tex->age);
- tex->defined[transfer->sr.face][transfer->sr.level] = TRUE;
+ tex->view_age[transfer->level] = ++(tex->age);
+ if (transfer->resource->target == PIPE_TEXTURE_CUBE)
+ tex->defined[transfer->box.z][transfer->level] = TRUE;
+ else
+ tex->defined[0][transfer->level] = TRUE;
}
pipe_resource_reference(&st->base.resource, NULL);
@@ -490,7 +501,7 @@ svga_texture_create(struct pipe_screen *screen,
{
struct svga_screen *svgascreen = svga_screen(screen);
struct svga_texture *tex = CALLOC_STRUCT(svga_texture);
-
+
if (!tex)
goto error1;
@@ -507,7 +518,7 @@ svga_texture_create(struct pipe_screen *screen,
tex->key.size.width = template->width0;
tex->key.size.height = template->height0;
tex->key.size.depth = template->depth0;
-
+
if(template->target == PIPE_TEXTURE_CUBE) {
tex->key.flags |= SVGA3D_SURFACE_CUBEMAP;
tex->key.numFaces = 6;
diff --git a/src/gallium/drivers/svga/svga_resource_texture.h b/src/gallium/drivers/svga/svga_resource_texture.h
index 631937f2eb..9a2911c2a9 100644
--- a/src/gallium/drivers/svga/svga_resource_texture.h
+++ b/src/gallium/drivers/svga/svga_resource_texture.h
@@ -85,6 +85,8 @@ struct svga_transfer
{
struct pipe_transfer base;
+ unsigned face;
+
struct svga_winsys_buffer *hwbuf;
/* Height of the hardware buffer in pixel blocks */
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index 666b498d14..d0f42c614c 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -35,7 +35,6 @@
#include "svga_resource_texture.h"
#include "svga_resource.h"
#include "svga_debug.h"
-#include "svga_surface.h"
#include "svga3d_shaderdefs.h"
@@ -499,7 +498,6 @@ svga_screen_create(struct svga_winsys_screen *sws)
screen->fence_finish = svga_fence_finish;
svgascreen->sws = sws;
- svga_screen_init_surface_functions(screen);
svga_init_screen_resource_functions(svgascreen);
svgascreen->use_ps30 =
diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c
index 97c818cd37..daf1024fd0 100644
--- a/src/gallium/drivers/svga/svga_state_constants.c
+++ b/src/gallium/drivers/svga/svga_state_constants.c
@@ -110,7 +110,7 @@ static int emit_consts( struct svga_context *svga,
done:
if (data)
- pipe_buffer_unmap(&svga->pipe, svga->curr.cb[unit], transfer);
+ pipe_buffer_unmap(&svga->pipe, transfer);
return ret;
}
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
index ad6f294713..9c04adec8e 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -136,7 +136,7 @@ static int make_fs_key( const struct svga_context *svga,
/* The blend workaround for simulating logicop xor behaviour
* requires that the incoming fragment color be white. This change
- * achieves that by creating a varient of the current fragment
+ * achieves that by creating a variant of the current fragment
* shader that overrides all output colors with 1,1,1,1
*
* This will work for most shaders, including those containing
diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c
index d34d68f535..8ba5ac8cdb 100644
--- a/src/gallium/drivers/svga/svga_state_need_swtnl.c
+++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c
@@ -114,7 +114,7 @@ static int update_need_pipeline( struct svga_context *svga,
/* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE
*/
if (svga->curr.rast->need_pipeline & (1 << svga->curr.reduced_prim)) {
- SVGA_DBG(DEBUG_SWTNL, "%s: rast need_pipeline (%d) & prim (%x)\n",
+ SVGA_DBG(DEBUG_SWTNL, "%s: rast need_pipeline (0x%x) & prim (0x%x)\n",
__FUNCTION__,
svga->curr.rast->need_pipeline,
(1 << svga->curr.reduced_prim) );
@@ -175,9 +175,17 @@ static int update_need_swtnl( struct svga_context *svga,
need_swtnl = 1;
}
+ /*
+ * Some state changes the draw module does makes us belive we
+ * we don't need swtnl. This causes the vdecl code to pickup
+ * the wrong buffers and vertex formats. Try trivial/line-wide.
+ */
+ if (svga->state.sw.in_swtnl_draw)
+ need_swtnl = 1;
+
if (need_swtnl != svga->state.sw.need_swtnl) {
SVGA_DBG(DEBUG_SWTNL|DEBUG_PERF,
- "%s need_swvfetch: %s, need_pipeline %s\n",
+ "%s: need_swvfetch %s, need_pipeline %s\n",
__FUNCTION__,
svga->state.sw.need_swvfetch ? "true" : "false",
svga->state.sw.need_pipeline ? "true" : "false");
diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c
index 4a50b19474..f8b269a101 100644
--- a/src/gallium/drivers/svga/svga_state_tss.c
+++ b/src/gallium/drivers/svga/svga_state_tss.c
@@ -238,7 +238,6 @@ update_tss(struct svga_context *svga,
// TEXCOORDINDEX -- hopefully not needed
if (svga->curr.tex_flags.flag_1d & (1 << i)) {
- debug_printf("wrap 1d tex %d\n", i);
EMIT_TS(svga, i, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV, fail);
}
else
diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c
index 3af7bf2b35..958d00393f 100644
--- a/src/gallium/drivers/svga/svga_state_vdecl.c
+++ b/src/gallium/drivers/svga/svga_state_vdecl.c
@@ -57,12 +57,14 @@ upload_user_buffers( struct svga_context *svga )
struct svga_buffer *buffer = svga_buffer(svga->curr.vb[i].buffer);
if (!buffer->uploaded.buffer) {
+ boolean flushed;
ret = u_upload_buffer( svga->upload_vb,
- 0,
+ 0, 0,
buffer->b.b.width0,
&buffer->b.b,
&buffer->uploaded.offset,
- &buffer->uploaded.buffer );
+ &buffer->uploaded.buffer,
+ &flushed);
if (ret)
return ret;
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index 5133c70593..6682a1efe6 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -233,9 +233,7 @@ static int update_zero_stride( struct svga_context *svga,
translate->run(translate, 0, 1, 0,
svga->curr.zero_stride_constants);
- pipe_buffer_unmap(&svga->pipe,
- vbuffer->buffer,
- transfer);
+ pipe_buffer_unmap(&svga->pipe, transfer);
translate->release(translate);
}
diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c
index b21dc5fd9a..3e4bed76c0 100644
--- a/src/gallium/drivers/svga/svga_surface.c
+++ b/src/gallium/drivers/svga/svga_surface.c
@@ -179,36 +179,50 @@ svga_texture_view_surface(struct pipe_context *pipe,
static struct pipe_surface *
-svga_get_tex_surface(struct pipe_screen *screen,
- struct pipe_resource *pt,
- unsigned face, unsigned level, unsigned zslice,
- unsigned flags)
+svga_create_surface(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_surface *surf_tmpl)
{
struct svga_texture *tex = svga_texture(pt);
+ struct pipe_screen *screen = pipe->screen;
struct svga_surface *s;
- boolean render = (flags & (PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_DEPTH_STENCIL)) ? TRUE : FALSE;
+ unsigned face, zslice;
+ /* XXX surfaces should only be used for rendering purposes nowadays */
+ boolean render = (surf_tmpl->usage & (PIPE_BIND_RENDER_TARGET |
+ PIPE_BIND_DEPTH_STENCIL)) ? TRUE : FALSE;
boolean view = FALSE;
SVGA3dSurfaceFormat format;
+ assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
+
s = CALLOC_STRUCT(svga_surface);
if (!s)
return NULL;
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ face = surf_tmpl->u.tex.first_layer;
+ zslice = 0;
+ }
+ else {
+ face = 0;
+ zslice = surf_tmpl->u.tex.first_layer;
+ }
+
pipe_reference_init(&s->base.reference, 1);
pipe_resource_reference(&s->base.texture, pt);
- s->base.format = pt->format;
- s->base.width = u_minify(pt->width0, level);
- s->base.height = u_minify(pt->height0, level);
- s->base.usage = flags;
- s->base.level = level;
- s->base.face = face;
- s->base.zslice = zslice;
+ s->base.context = pipe;
+ s->base.format = surf_tmpl->format;
+ s->base.width = u_minify(pt->width0, surf_tmpl->u.tex.level);
+ s->base.height = u_minify(pt->height0, surf_tmpl->u.tex.level);
+ s->base.usage = surf_tmpl->usage;
+ s->base.u.tex.level = surf_tmpl->u.tex.level;
+ s->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+ s->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
if (!render)
- format = svga_translate_format(pt->format);
+ format = svga_translate_format(surf_tmpl->format);
else
- format = svga_translate_format_render(pt->format);
+ format = svga_translate_format_render(surf_tmpl->format);
assert(format != SVGA3D_FORMAT_INVALID);
@@ -217,11 +231,11 @@ svga_get_tex_surface(struct pipe_screen *screen,
/* Currently only used for compressed textures */
if (render &&
- format != svga_translate_format(pt->format)) {
+ format != svga_translate_format(surf_tmpl->format)) {
view = TRUE;
}
- if (level != 0 &&
+ if (surf_tmpl->u.tex.level != 0 &&
svga_screen(screen)->debug.force_level_surface_view)
view = TRUE;
@@ -233,22 +247,22 @@ svga_get_tex_surface(struct pipe_screen *screen,
if (view) {
SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n",
- pt, level, face, zslice, s);
+ pt, surf_tmpl->u.tex.level, face, zslice, s);
- s->handle = svga_texture_view_surface(NULL, tex, format, level, 1, face, zslice,
- &s->key);
+ s->handle = svga_texture_view_surface(NULL, tex, format, surf_tmpl->u.tex.level,
+ 1, face, zslice, &s->key);
s->real_face = 0;
s->real_level = 0;
s->real_zslice = 0;
} else {
SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n",
- pt, level, face, zslice, s);
+ pt, surf_tmpl->u.tex.level, face, zslice, s);
memset(&s->key, 0, sizeof s->key);
s->handle = tex->handle;
s->real_face = face;
- s->real_level = level;
s->real_zslice = zslice;
+ s->real_level = surf_tmpl->u.tex.level;
}
return &s->base;
@@ -256,7 +270,8 @@ svga_get_tex_surface(struct pipe_screen *screen,
static void
-svga_tex_surface_destroy(struct pipe_surface *surf)
+svga_surface_destroy(struct pipe_context *pipe,
+ struct pipe_surface *surf)
{
struct svga_surface *s = svga_surface(surf);
struct svga_texture *t = svga_texture(surf->texture);
@@ -282,8 +297,13 @@ svga_mark_surface_dirty(struct pipe_surface *surf)
s->dirty = TRUE;
- if (s->handle == tex->handle)
- tex->defined[surf->face][surf->level] = TRUE;
+ if (s->handle == tex->handle) {
+ /* hmm so 3d textures always have all their slices marked ? */
+ if (surf->texture->target == PIPE_TEXTURE_CUBE)
+ tex->defined[surf->u.tex.first_layer][surf->u.tex.level] = TRUE;
+ else
+ tex->defined[0][surf->u.tex.level] = TRUE;
+ }
else {
/* this will happen later in svga_propagate_surface */
}
@@ -314,22 +334,32 @@ svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf)
struct svga_surface *s = svga_surface(surf);
struct svga_texture *tex = svga_texture(surf->texture);
struct svga_screen *ss = svga_screen(surf->texture->screen);
+ unsigned zslice, face;
if (!s->dirty)
return;
+ if (surf->texture->target == PIPE_TEXTURE_CUBE) {
+ zslice = 0;
+ face = surf->u.tex.first_layer;
+ }
+ else {
+ zslice = surf->u.tex.first_layer;
+ face = 0;
+ }
+
s->dirty = FALSE;
ss->texture_timestamp++;
- tex->view_age[surf->level] = ++(tex->age);
+ tex->view_age[surf->u.tex.level] = ++(tex->age);
if (s->handle != tex->handle) {
- SVGA_DBG(DEBUG_VIEWS, "svga: Surface propagate: tex %p, level %u, from %p\n", tex, surf->level, surf);
+ SVGA_DBG(DEBUG_VIEWS, "svga: Surface propagate: tex %p, level %u, from %p\n", tex, surf->u.tex.level, surf);
svga_texture_copy_handle(svga_context(pipe),
s->handle, 0, 0, 0, s->real_level, s->real_face,
- tex->handle, 0, 0, surf->zslice, surf->level, surf->face,
- u_minify(tex->b.b.width0, surf->level),
- u_minify(tex->b.b.height0, surf->level), 1);
- tex->defined[surf->face][surf->level] = TRUE;
+ tex->handle, 0, 0, zslice, surf->u.tex.level, face,
+ u_minify(tex->b.b.width0, surf->u.tex.level),
+ u_minify(tex->b.b.height0, surf->u.tex.level), 1);
+ tex->defined[face][surf->u.tex.level] = TRUE;
}
}
@@ -351,9 +381,9 @@ svga_surface_needs_propagation(struct pipe_surface *surf)
void
-svga_screen_init_surface_functions(struct pipe_screen *screen)
+svga_init_surface_functions(struct svga_context *svga)
{
- screen->get_tex_surface = svga_get_tex_surface;
- screen->tex_surface_destroy = svga_tex_surface_destroy;
+ svga->pipe.create_surface = svga_create_surface;
+ svga->pipe.surface_destroy = svga_surface_destroy;
}
diff --git a/src/gallium/drivers/svga/svga_surface.h b/src/gallium/drivers/svga/svga_surface.h
index 13bd5b19b6..afb8326e1f 100644
--- a/src/gallium/drivers/svga/svga_surface.h
+++ b/src/gallium/drivers/svga/svga_surface.h
@@ -90,7 +90,4 @@ svga_surface(struct pipe_surface *surface)
return (struct svga_surface *)surface;
}
-void
-svga_screen_init_surface_functions(struct pipe_screen *screen);
-
#endif
diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c
index ff3da84272..d5db6bf641 100644
--- a/src/gallium/drivers/svga/svga_swtnl_backend.c
+++ b/src/gallium/drivers/svga/svga_swtnl_backend.c
@@ -141,7 +141,7 @@ svga_vbuf_render_unmap_vertices( struct vbuf_render *render,
pipe_buffer_flush_mapped_range(&svga->pipe,
svga_render->vbuf_transfer,
offset, length);
- pipe_buffer_unmap(&svga->pipe, svga_render->vbuf, svga_render->vbuf_transfer);
+ pipe_buffer_unmap(&svga->pipe, svga_render->vbuf_transfer);
svga_render->min_index = min_index;
svga_render->max_index = max_index;
svga_render->vbuf_used = MAX2(svga_render->vbuf_used, used);
@@ -158,7 +158,7 @@ svga_vbuf_render_set_primitive( struct vbuf_render *render,
}
static void
-svga_vbuf_sumbit_state( struct svga_vbuf_render *svga_render )
+svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
{
struct svga_context *svga = svga_render->svga;
SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
@@ -221,7 +221,8 @@ svga_vbuf_render_draw_arrays( struct vbuf_render *render,
unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
enum pipe_error ret = 0;
- svga_vbuf_sumbit_state(svga_render);
+ /* off to hardware */
+ svga_vbuf_submit_state(svga_render);
/* Need to call update_state() again as the draw module may have
* altered some of our state behind our backs. Testcase:
@@ -267,9 +268,8 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render,
pipe_buffer_write_nooverlap(&svga->pipe, svga_render->ibuf,
svga_render->ibuf_offset, 2 * nr_indices, indices);
-
/* off to hardware */
- svga_vbuf_sumbit_state(svga_render);
+ svga_vbuf_submit_state(svga_render);
/* Need to call update_state() again as the draw module may have
* altered some of our state behind our backs. Testcase:
diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c
index 814e8edd70..05d86e1fb1 100644
--- a/src/gallium/drivers/svga/svga_swtnl_draw.c
+++ b/src/gallium/drivers/svga/svga_swtnl_draw.c
@@ -51,6 +51,9 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
assert(svga->state.sw.need_swtnl);
assert(draw);
+ /* Make sure that the need_swtnl flag does not go away */
+ svga->state.sw.in_swtnl_draw = TRUE;
+
ret = svga_update_state(svga, SVGA_STATE_SWTNL_DRAW);
if (ret) {
svga_context_flush(svga, NULL);
@@ -106,22 +109,22 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
* unmap vertex/index buffers
*/
for (i = 0; i < svga->curr.num_vertex_buffers; i++) {
- pipe_buffer_unmap(&svga->pipe, svga->curr.vb[i].buffer,
- vb_transfer[i]);
+ pipe_buffer_unmap(&svga->pipe, vb_transfer[i]);
draw_set_mapped_vertex_buffer(draw, i, NULL);
}
if (ib_transfer) {
- pipe_buffer_unmap(&svga->pipe, svga->curr.ib.buffer, ib_transfer);
+ pipe_buffer_unmap(&svga->pipe, ib_transfer);
draw_set_mapped_index_buffer(draw, NULL);
}
if (svga->curr.cb[PIPE_SHADER_VERTEX]) {
- pipe_buffer_unmap(&svga->pipe,
- svga->curr.cb[PIPE_SHADER_VERTEX],
- cb_transfer);
+ pipe_buffer_unmap(&svga->pipe, cb_transfer);
}
+ /* Now safe to remove the need_swtnl flag in any update_state call */
+ svga->state.sw.in_swtnl_draw = FALSE;
+
return ret;
}
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 04f30f82c3..eaabae8ce4 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -314,6 +314,9 @@ trace_context_bind_vertex_sampler_states(struct pipe_context *_pipe,
struct trace_context *tr_ctx = trace_context(_pipe);
struct pipe_context *pipe = tr_ctx->pipe;
+ if (!pipe->bind_vertex_sampler_states)
+ return;
+
trace_dump_call_begin("pipe_context", "bind_vertex_sampler_states");
trace_dump_arg(ptr, pipe);
@@ -885,6 +888,60 @@ trace_sampler_view_destroy(struct pipe_context *_pipe,
FREE(_view);
}
+/********************************************************************
+ * surface
+ */
+
+
+static struct pipe_surface *
+trace_create_surface(struct pipe_context *_pipe,
+ struct pipe_resource *_texture,
+ const struct pipe_surface *surf_tmpl)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct trace_resource *tr_tex = trace_resource(_texture);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ struct pipe_resource *texture = tr_tex->resource;
+ struct pipe_surface *result = NULL;
+
+ trace_dump_call_begin("pipe_context", "create_surface");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, texture);
+ /* hmm some values unitialized there */
+ trace_dump_arg(surface, surf_tmpl);
+
+ result = pipe->create_surface(pipe, texture, surf_tmpl);
+
+ trace_dump_ret(ptr, result);
+
+ trace_dump_call_end();
+
+ result = trace_surf_create(tr_tex, result);
+
+ return result;
+}
+
+
+static void
+trace_surface_destroy(struct pipe_context *_pipe,
+ struct pipe_surface *_surface)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ struct trace_surface *tr_surf = trace_surface(_surface);
+ struct pipe_surface *surface = tr_surf->surface;
+
+ trace_dump_call_begin("pipe_context", "surface_destroy");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, surface);
+
+ trace_dump_call_end();
+
+ trace_surf_destroy(tr_surf);
+}
+
static INLINE void
trace_context_set_fragment_sampler_views(struct pipe_context *_pipe,
@@ -926,6 +983,9 @@ trace_context_set_vertex_sampler_views(struct pipe_context *_pipe,
struct pipe_sampler_view *unwrapped_views[PIPE_MAX_VERTEX_SAMPLERS];
unsigned i;
+ if (!pipe->set_vertex_sampler_views)
+ return;
+
for(i = 0; i < num; ++i) {
tr_view = trace_sampler_view(views[i]);
unwrapped_views[i] = tr_view ? tr_view->sampler_view : NULL;
@@ -1004,12 +1064,11 @@ trace_context_set_index_buffer(struct pipe_context *_pipe,
static INLINE void
trace_context_resource_copy_region(struct pipe_context *_pipe,
struct pipe_resource *dst,
- struct pipe_subresource subdst,
+ unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct pipe_resource *src,
- struct pipe_subresource subsrc,
- unsigned srcx, unsigned srcy, unsigned srcz,
- unsigned width, unsigned height)
+ unsigned src_level,
+ const struct pipe_box *src_box)
{
struct trace_context *tr_ctx = trace_context(_pipe);
struct pipe_context *pipe = tr_ctx->pipe;
@@ -1021,21 +1080,17 @@ trace_context_resource_copy_region(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(ptr, dst);
- trace_dump_arg_struct(subresource, subdst);
+ trace_dump_arg(uint, dst_level);
trace_dump_arg(uint, dstx);
trace_dump_arg(uint, dsty);
trace_dump_arg(uint, dstz);
trace_dump_arg(ptr, src);
- trace_dump_arg_struct(subresource, subsrc);
- trace_dump_arg(uint, srcx);
- trace_dump_arg(uint, srcy);
- trace_dump_arg(uint, srcz);
- trace_dump_arg(uint, width);
- trace_dump_arg(uint, height);
+ trace_dump_arg(uint, src_level);
+ trace_dump_arg(box, src_box);
pipe->resource_copy_region(pipe,
- dst, subdst, dstx, dsty, dstz,
- src, subsrc, srcx, srcy, srcz, width, height);
+ dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box);
trace_dump_call_end();
}
@@ -1166,8 +1221,8 @@ trace_context_destroy(struct pipe_context *_pipe)
static unsigned int
trace_is_resource_referenced( struct pipe_context *_pipe,
- struct pipe_resource *_resource,
- unsigned face, unsigned level)
+ struct pipe_resource *_resource,
+ unsigned level, int layer)
{
struct trace_context *tr_ctx = trace_context(_pipe);
struct trace_resource *tr_tex = trace_resource(_resource);
@@ -1178,10 +1233,10 @@ trace_is_resource_referenced( struct pipe_context *_pipe,
trace_dump_call_begin("pipe_context", "is_resource_referenced");
trace_dump_arg(ptr, pipe);
trace_dump_arg(ptr, texture);
- trace_dump_arg(uint, face);
trace_dump_arg(uint, level);
+ trace_dump_arg(int, layer);
- referenced = pipe->is_resource_referenced(pipe, texture, face, level);
+ referenced = pipe->is_resource_referenced(pipe, texture, level, layer);
trace_dump_ret(uint, referenced);
trace_dump_call_end();
@@ -1197,10 +1252,10 @@ trace_is_resource_referenced( struct pipe_context *_pipe,
static struct pipe_transfer *
trace_context_get_transfer(struct pipe_context *_context,
- struct pipe_resource *_resource,
- struct pipe_subresource sr,
- unsigned usage,
- const struct pipe_box *box)
+ struct pipe_resource *_resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
{
struct trace_context *tr_context = trace_context(_context);
struct trace_resource *tr_tex = trace_resource(_resource);
@@ -1215,7 +1270,7 @@ trace_context_get_transfer(struct pipe_context *_context,
* to transfer_inline_write and ignore read transfers.
*/
- result = context->get_transfer(context, texture, sr, usage, box);
+ result = context->get_transfer(context, texture, level, usage, box);
if (result)
result = trace_transfer_create(tr_context, tr_tex, result);
@@ -1226,7 +1281,7 @@ trace_context_get_transfer(struct pipe_context *_context,
static void
trace_context_transfer_destroy(struct pipe_context *_context,
- struct pipe_transfer *_transfer)
+ struct pipe_transfer *_transfer)
{
struct trace_context *tr_context = trace_context(_context);
struct trace_transfer *tr_trans = trace_transfer(_transfer);
@@ -1274,7 +1329,7 @@ trace_context_transfer_flush_region( struct pipe_context *_context,
static void
trace_context_transfer_unmap(struct pipe_context *_context,
- struct pipe_transfer *_transfer)
+ struct pipe_transfer *_transfer)
{
struct trace_context *tr_ctx = trace_context(_context);
struct trace_transfer *tr_trans = trace_transfer(_transfer);
@@ -1287,17 +1342,17 @@ trace_context_transfer_unmap(struct pipe_context *_context,
*/
struct pipe_resource *resource = transfer->resource;
- struct pipe_subresource sr = transfer->sr;
+ unsigned level = transfer->level;
unsigned usage = transfer->usage;
const struct pipe_box *box = &transfer->box;
unsigned stride = transfer->stride;
- unsigned slice_stride = transfer->slice_stride;
+ unsigned layer_stride = transfer->layer_stride;
trace_dump_call_begin("pipe_context", "transfer_inline_write");
trace_dump_arg(ptr, context);
trace_dump_arg(ptr, resource);
- trace_dump_arg_struct(subresource, sr);
+ trace_dump_arg(uint, level);
trace_dump_arg(uint, usage);
trace_dump_arg(box, box);
@@ -1306,11 +1361,11 @@ trace_context_transfer_unmap(struct pipe_context *_context,
resource->format,
box,
stride,
- slice_stride);
+ layer_stride);
trace_dump_arg_end();
trace_dump_arg(uint, stride);
- trace_dump_arg(uint, slice_stride);
+ trace_dump_arg(uint, layer_stride);
trace_dump_call_end();
@@ -1323,13 +1378,13 @@ trace_context_transfer_unmap(struct pipe_context *_context,
static void
trace_context_transfer_inline_write(struct pipe_context *_context,
- struct pipe_resource *_resource,
- struct pipe_subresource sr,
- unsigned usage,
- const struct pipe_box *box,
- const void *data,
- unsigned stride,
- unsigned slice_stride)
+ struct pipe_resource *_resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned layer_stride)
{
struct trace_context *tr_context = trace_context(_context);
struct trace_resource *tr_tex = trace_resource(_resource);
@@ -1342,7 +1397,7 @@ trace_context_transfer_inline_write(struct pipe_context *_context,
trace_dump_arg(ptr, context);
trace_dump_arg(ptr, resource);
- trace_dump_arg_struct(subresource, sr);
+ trace_dump_arg(uint, level);
trace_dump_arg(uint, usage);
trace_dump_arg(box, box);
@@ -1351,16 +1406,16 @@ trace_context_transfer_inline_write(struct pipe_context *_context,
resource->format,
box,
stride,
- slice_stride);
+ layer_stride);
trace_dump_arg_end();
trace_dump_arg(uint, stride);
- trace_dump_arg(uint, slice_stride);
+ trace_dump_arg(uint, layer_stride);
trace_dump_call_end();
context->transfer_inline_write(context, resource,
- sr, usage, box, data, stride, slice_stride);
+ level, usage, box, data, stride, layer_stride);
}
@@ -1434,6 +1489,8 @@ trace_context_create(struct trace_screen *tr_scr,
tr_ctx->base.set_vertex_sampler_views = trace_context_set_vertex_sampler_views;
tr_ctx->base.create_sampler_view = trace_create_sampler_view;
tr_ctx->base.sampler_view_destroy = trace_sampler_view_destroy;
+ tr_ctx->base.create_surface = trace_create_surface;
+ tr_ctx->base.surface_destroy = trace_surface_destroy;
tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers;
tr_ctx->base.set_index_buffer = trace_context_set_index_buffer;
tr_ctx->base.resource_copy_region = trace_context_resource_copy_region;
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index 8f81606032..155c869fbd 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -71,6 +71,10 @@ void trace_dump_resource_template(const struct pipe_resource *templat)
trace_dump_uint(templat->depth0);
trace_dump_member_end();
+ trace_dump_member_begin("array_size");
+ trace_dump_uint(templat->array_size);
+ trace_dump_member_end();
+
trace_dump_member(uint, templat, last_level);
trace_dump_member(uint, templat, usage);
trace_dump_member(uint, templat, bind);
@@ -80,25 +84,6 @@ void trace_dump_resource_template(const struct pipe_resource *templat)
}
-void trace_dump_subresource(const struct pipe_subresource *subresource)
-{
- if (!trace_dumping_enabled_locked())
- return;
-
- if(!subresource) {
- trace_dump_null();
- return;
- }
-
- trace_dump_struct_begin("pipe_subresource");
-
- trace_dump_member(uint, subresource, face);
- trace_dump_member(uint, subresource, level);
-
- trace_dump_struct_end();
-}
-
-
void trace_dump_box(const struct pipe_box *box)
{
if (!trace_dumping_enabled_locked())
@@ -445,8 +430,13 @@ void trace_dump_sampler_view_template(const struct pipe_sampler_view *state)
trace_dump_struct_begin("pipe_sampler_view");
trace_dump_member(format, state, format);
- trace_dump_member(uint, state, first_level);
- trace_dump_member(uint, state, last_level);
+ /* XXX */
+ trace_dump_member(uint, state, u.tex.first_level);
+ trace_dump_member(uint, state, u.tex.last_level);
+ trace_dump_member(uint, state, u.tex.first_layer);
+ trace_dump_member(uint, state, u.tex.last_layer);
+ trace_dump_member(uint, state, u.buf.first_element);
+ trace_dump_member(uint, state, u.buf.last_element);
trace_dump_member(uint, state, swizzle_r);
trace_dump_member(uint, state, swizzle_g);
trace_dump_member(uint, state, swizzle_b);
@@ -472,14 +462,14 @@ void trace_dump_surface(const struct pipe_surface *state)
trace_dump_member(uint, state, width);
trace_dump_member(uint, state, height);
- trace_dump_member(uint, state, layout);
- trace_dump_member(uint, state, offset);
trace_dump_member(uint, state, usage);
trace_dump_member(ptr, state, texture);
- trace_dump_member(uint, state, face);
- trace_dump_member(uint, state, level);
- trace_dump_member(uint, state, zslice);
+ trace_dump_member(uint, state, u.tex.level);
+ trace_dump_member(uint, state, u.tex.first_layer);
+ trace_dump_member(uint, state, u.tex.last_layer);
+ trace_dump_member(uint, state, u.buf.first_element);
+ trace_dump_member(uint, state, u.buf.last_element);
trace_dump_struct_end();
}
@@ -497,16 +487,18 @@ void trace_dump_transfer(const struct pipe_transfer *state)
trace_dump_struct_begin("pipe_transfer");
+ trace_dump_member(uint, state, box.x);
+ trace_dump_member(uint, state, box.y);
+ trace_dump_member(uint, state, box.z);
trace_dump_member(uint, state, box.width);
trace_dump_member(uint, state, box.height);
+ trace_dump_member(uint, state, box.depth);
trace_dump_member(uint, state, stride);
+ trace_dump_member(uint, state, layer_stride);
trace_dump_member(uint, state, usage);
trace_dump_member(ptr, state, resource);
- trace_dump_member(uint, state, sr.face);
- trace_dump_member(uint, state, sr.level);
- trace_dump_member(uint, state, box.z);
trace_dump_struct_end();
}
diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h
index 078d208610..fe8ece78d4 100644
--- a/src/gallium/drivers/trace/tr_dump_state.h
+++ b/src/gallium/drivers/trace/tr_dump_state.h
@@ -37,8 +37,6 @@ void trace_dump_format(enum pipe_format format);
void trace_dump_resource_template(const struct pipe_resource *templat);
-void trace_dump_subresource(const struct pipe_subresource *subresource);
-
void trace_dump_box(const struct pipe_box *box);
void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state);
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index 935831071e..c2de2daa88 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -210,23 +210,26 @@ trace_screen_context_create(struct pipe_screen *_screen, void *priv)
static void
trace_screen_flush_frontbuffer(struct pipe_screen *_screen,
- struct pipe_surface *_surface,
+ struct pipe_resource *_resource,
+ unsigned level, unsigned layer,
void *context_private)
{
struct trace_screen *tr_scr = trace_screen(_screen);
- struct trace_surface *tr_surf = trace_surface(_surface);
+ struct trace_resource *tr_res = trace_resource(_resource);
struct pipe_screen *screen = tr_scr->screen;
- struct pipe_surface *surface = tr_surf->surface;
+ struct pipe_resource *resource = tr_res->resource;
trace_dump_call_begin("pipe_screen", "flush_frontbuffer");
trace_dump_arg(ptr, screen);
- trace_dump_arg(ptr, surface);
+ trace_dump_arg(ptr, resource);
+ trace_dump_arg(uint, level);
+ trace_dump_arg(uint, layer);
/* XXX: hide, as there is nothing we can do with this
trace_dump_arg(ptr, context_private);
*/
- screen->flush_frontbuffer(screen, surface, context_private);
+ screen->flush_frontbuffer(screen, resource, level, layer, context_private);
trace_dump_call_end();
}
@@ -318,68 +321,6 @@ trace_screen_resource_destroy(struct pipe_screen *_screen,
}
-/********************************************************************
- * surface
- */
-
-
-static struct pipe_surface *
-trace_screen_get_tex_surface(struct pipe_screen *_screen,
- struct pipe_resource *_texture,
- unsigned face, unsigned level,
- unsigned zslice,
- unsigned usage)
-{
- struct trace_screen *tr_scr = trace_screen(_screen);
- struct trace_resource *tr_tex = trace_resource(_texture);
- struct pipe_screen *screen = tr_scr->screen;
- struct pipe_resource *texture = tr_tex->resource;
- struct pipe_surface *result = NULL;
-
- assert(texture->screen == screen);
-
- trace_dump_call_begin("pipe_screen", "get_tex_surface");
-
- trace_dump_arg(ptr, screen);
- trace_dump_arg(ptr, texture);
- trace_dump_arg(uint, face);
- trace_dump_arg(uint, level);
- trace_dump_arg(uint, zslice);
- trace_dump_arg(uint, usage);
-
- result = screen->get_tex_surface(screen, texture, face, level, zslice, usage);
-
- trace_dump_ret(ptr, result);
-
- trace_dump_call_end();
-
- result = trace_surface_create(tr_tex, result);
-
- return result;
-}
-
-
-static void
-trace_screen_tex_surface_destroy(struct pipe_surface *_surface)
-{
- struct trace_screen *tr_scr = trace_screen(_surface->texture->screen);
- struct trace_surface *tr_surf = trace_surface(_surface);
- struct pipe_screen *screen = tr_scr->screen;
- struct pipe_surface *surface = tr_surf->surface;
-
- trace_dump_call_begin("pipe_screen", "tex_surface_destroy");
-
- trace_dump_arg(ptr, screen);
- trace_dump_arg(ptr, surface);
-
- trace_dump_call_end();
-
- trace_surface_destroy(tr_surf);
-}
-
-
-
-
/********************************************************************
* buffer
@@ -580,8 +521,6 @@ trace_screen_create(struct pipe_screen *screen)
tr_scr->base.resource_from_handle = trace_screen_resource_from_handle;
tr_scr->base.resource_get_handle = trace_screen_resource_get_handle;
tr_scr->base.resource_destroy = trace_screen_resource_destroy;
- tr_scr->base.get_tex_surface = trace_screen_get_tex_surface;
- tr_scr->base.tex_surface_destroy = trace_screen_tex_surface_destroy;
tr_scr->base.user_buffer_create = trace_screen_user_buffer_create;
tr_scr->base.fence_reference = trace_screen_fence_reference;
tr_scr->base.fence_signalled = trace_screen_fence_signalled;
diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c
index 9914b98b39..2799734647 100644
--- a/src/gallium/drivers/trace/tr_texture.c
+++ b/src/gallium/drivers/trace/tr_texture.c
@@ -74,8 +74,8 @@ trace_resource_destroy(struct trace_screen *tr_scr,
struct pipe_surface *
-trace_surface_create(struct trace_resource *tr_tex,
- struct pipe_surface *surface)
+trace_surf_create(struct trace_resource *tr_tex,
+ struct pipe_surface *surface)
{
struct trace_surface *tr_surf;
@@ -104,7 +104,7 @@ error:
void
-trace_surface_destroy(struct trace_surface *tr_surf)
+trace_surf_destroy(struct trace_surface *tr_surf)
{
pipe_resource_reference(&tr_surf->base.texture, NULL);
pipe_surface_reference(&tr_surf->surface, NULL);
diff --git a/src/gallium/drivers/trace/tr_texture.h b/src/gallium/drivers/trace/tr_texture.h
index 6513995d50..3352c96e59 100644
--- a/src/gallium/drivers/trace/tr_texture.h
+++ b/src/gallium/drivers/trace/tr_texture.h
@@ -125,11 +125,11 @@ trace_resource_destroy(struct trace_screen *tr_scr,
struct trace_resource *tr_tex);
struct pipe_surface *
-trace_surface_create(struct trace_resource *tr_tex,
+trace_surf_create(struct trace_resource *tr_tex,
struct pipe_surface *surface);
void
-trace_surface_destroy(struct trace_surface *tr_surf);
+trace_surf_destroy(struct trace_surface *tr_surf);
struct pipe_transfer *
trace_transfer_create(struct trace_context *tr_ctx,