summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/Makefile21
-rw-r--r--src/gallium/Makefile.template76
-rw-r--r--src/gallium/SConscript5
-rw-r--r--src/gallium/auxiliary/Makefile14
-rw-r--r--src/gallium/auxiliary/cso_cache/Makefile3
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_cache.c2
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c2
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_hash.c8
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_hash.h6
-rw-r--r--src/gallium/auxiliary/draw/Makefile5
-rw-r--r--src/gallium/auxiliary/draw/SConscript1
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c20
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h8
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c14
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c19
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c98
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h5
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h2
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c69
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c76
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c105
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c13
-rw-r--r--src/gallium/auxiliary/draw/draw_vbuf.h21
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.h6
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c7
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c42
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c15
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_ppc.c244
-rw-r--r--src/gallium/auxiliary/gallivm/Makefile4
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm.cpp2
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm_builtins.cpp2
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm_cpu.cpp13
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm_p.h4
-rw-r--r--src/gallium/auxiliary/gallivm/instructions.cpp2
-rw-r--r--src/gallium/auxiliary/gallivm/instructionssoa.cpp254
-rw-r--r--src/gallium/auxiliary/gallivm/instructionssoa.h1
-rw-r--r--src/gallium/auxiliary/gallivm/storage.cpp2
-rw-r--r--src/gallium/auxiliary/gallivm/storagesoa.cpp99
-rw-r--r--src/gallium/auxiliary/gallivm/storagesoa.h17
-rw-r--r--src/gallium/auxiliary/gallivm/tgsitollvm.cpp23
-rw-r--r--src/gallium/auxiliary/indices/Makefile12
-rw-r--r--src/gallium/auxiliary/indices/SConscript17
-rw-r--r--src/gallium/auxiliary/indices/u_indices.c253
-rw-r--r--src/gallium/auxiliary/indices/u_indices.h83
-rw-r--r--src/gallium/auxiliary/indices/u_indices_gen.c5129
-rw-r--r--src/gallium/auxiliary/indices/u_indices_gen.py319
-rw-r--r--src/gallium/auxiliary/indices/u_indices_priv.h43
-rw-r--r--src/gallium/auxiliary/pipebuffer/Makefile7
-rw-r--r--src/gallium/auxiliary/pipebuffer/SConscript2
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer.h67
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c187
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h51
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c22
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr.h21
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c2
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c26
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c29
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c11
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c41
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c303
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c27
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c23
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_validate.c86
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_validate.h10
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_winsys.c170
-rw-r--r--src/gallium/auxiliary/rtasm/Makefile4
-rw-r--r--src/gallium/auxiliary/rtasm/SConscript1
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_cpu.c2
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_execmem.c19
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc.c1077
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc.h342
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c725
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h513
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c68
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.h11
-rw-r--r--src/gallium/auxiliary/sct/Makefile3
-rw-r--r--src/gallium/auxiliary/tgsi/Makefile4
-rw-r--r--src/gallium/auxiliary/tgsi/SConscript1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.c46
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c6
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump_c.c7
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c123
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h21
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_iterate.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.c60
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ppc.c1363
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ppc.h51
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c17
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c374
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_text.c4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_transform.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_util.c2
-rw-r--r--src/gallium/auxiliary/translate/Makefile3
-rw-r--r--src/gallium/auxiliary/util/Makefile10
-rw-r--r--src/gallium/auxiliary/util/SConscript14
-rw-r--r--src/gallium/auxiliary/util/u_blit.c7
-rw-r--r--src/gallium/auxiliary/util/u_cache.c2
-rw-r--r--src/gallium/auxiliary/util/u_debug.c (renamed from src/gallium/auxiliary/util/p_debug.c)158
-rw-r--r--src/gallium/auxiliary/util/u_debug.h (renamed from src/gallium/include/pipe/p_debug.h)13
-rw-r--r--src/gallium/auxiliary/util/u_debug_memory.c (renamed from src/gallium/auxiliary/util/p_debug_mem.c)9
-rw-r--r--src/gallium/auxiliary/util/u_debug_profile.c (renamed from src/gallium/auxiliary/util/p_debug_prof.c)2
-rw-r--r--src/gallium/auxiliary/util/u_debug_stack.c97
-rw-r--r--src/gallium/auxiliary/util/u_debug_stack.h65
-rw-r--r--src/gallium/auxiliary/util/u_draw_quad.c3
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c664
-rw-r--r--src/gallium/auxiliary/util/u_handle_table.c2
-rw-r--r--src/gallium/auxiliary/util/u_hash_table.c2
-rw-r--r--src/gallium/auxiliary/util/u_keymap.c309
-rw-r--r--src/gallium/auxiliary/util/u_keymap.h68
-rw-r--r--src/gallium/auxiliary/util/u_linear.c70
-rw-r--r--src/gallium/auxiliary/util/u_linear.h61
-rw-r--r--src/gallium/auxiliary/util/u_math.h8
-rw-r--r--src/gallium/auxiliary/util/u_memory.h6
-rw-r--r--src/gallium/auxiliary/util/u_mm.c14
-rw-r--r--src/gallium/auxiliary/util/u_mm.h12
-rw-r--r--src/gallium/auxiliary/util/u_prim.h16
-rw-r--r--src/gallium/auxiliary/util/u_rect.c112
-rw-r--r--src/gallium/auxiliary/util/u_simple_screen.c143
-rw-r--r--src/gallium/auxiliary/util/u_simple_screen.h47
-rw-r--r--src/gallium/auxiliary/util/u_simple_shaders.c11
-rw-r--r--src/gallium/auxiliary/util/u_simple_shaders.h4
-rw-r--r--src/gallium/auxiliary/util/u_sse.h77
-rw-r--r--src/gallium/auxiliary/util/u_tile.c137
-rw-r--r--src/gallium/auxiliary/util/u_tile.h30
-rw-r--r--src/gallium/auxiliary/util/u_time.c2
-rw-r--r--src/gallium/auxiliary/util/u_timed_winsys.c50
-rw-r--r--src/gallium/drivers/Makefile16
-rw-r--r--src/gallium/drivers/cell/common.h156
-rw-r--r--src/gallium/drivers/cell/ppu/Makefile7
-rw-r--r--src/gallium/drivers/cell/ppu/cell_batch.c123
-rw-r--r--src/gallium/drivers/cell/ppu/cell_batch.h9
-rw-r--r--src/gallium/drivers/cell/ppu/cell_clear.c24
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.c40
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.h56
-rw-r--r--src/gallium/drivers/cell/ppu/cell_draw_arrays.c10
-rw-r--r--src/gallium/drivers/cell/ppu/cell_fence.c168
-rw-r--r--src/gallium/drivers/cell/ppu/cell_fence.h57
-rw-r--r--src/gallium/drivers/cell/ppu/cell_flush.c15
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.c1841
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.c1873
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.h2
-rw-r--r--src/gallium/drivers/cell/ppu/cell_pipe_state.c194
-rw-r--r--src/gallium/drivers/cell/ppu/cell_render.c1
-rw-r--r--src/gallium/drivers/cell/ppu/cell_screen.c22
-rw-r--r--src/gallium/drivers/cell/ppu/cell_spu.c46
-rw-r--r--src/gallium/drivers/cell/ppu/cell_spu.h13
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state.h5
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c259
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_per_fragment.c16
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_shader.c19
-rw-r--r--src/gallium/drivers/cell/ppu/cell_surface.c1
-rw-r--r--src/gallium/drivers/cell/ppu/cell_texture.c587
-rw-r--r--src/gallium/drivers/cell/ppu/cell_texture.h35
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vbuf.c55
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vertex_fetch.c33
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vertex_shader.c2
-rw-r--r--src/gallium/drivers/cell/spu/.gitignore1
-rw-r--r--src/gallium/drivers/cell/spu/Makefile9
-rw-r--r--src/gallium/drivers/cell/spu/spu_colorpack.h49
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.c815
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.h35
-rw-r--r--src/gallium/drivers/cell/spu/spu_dcache.c4
-rw-r--r--src/gallium/drivers/cell/spu/spu_funcs.c173
-rw-r--r--src/gallium/drivers/cell/spu/spu_funcs.h (renamed from src/gallium/drivers/cell/ppu/cell_winsys.c)17
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c626
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h87
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.c288
-rw-r--r--src/gallium/drivers/cell/spu/spu_render.c38
-rw-r--r--src/gallium/drivers/cell/spu/spu_shuffle.h186
-rw-r--r--src/gallium/drivers/cell/spu/spu_texture.c635
-rw-r--r--src/gallium/drivers/cell/spu/spu_texture.h28
-rw-r--r--src/gallium/drivers/cell/spu/spu_tile.c37
-rw-r--r--src/gallium/drivers/cell/spu/spu_tile.h6
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c789
-rw-r--r--src/gallium/drivers/cell/spu/spu_util.c2
-rw-r--r--src/gallium/drivers/failover/Makefile3
-rw-r--r--src/gallium/drivers/failover/fo_context.c2
-rw-r--r--src/gallium/drivers/failover/fo_context.h7
-rw-r--r--src/gallium/drivers/i915simple/Makefile3
-rw-r--r--src/gallium/drivers/i915simple/i915_batch.h2
-rw-r--r--src/gallium/drivers/i915simple/i915_blit.c32
-rw-r--r--src/gallium/drivers/i915simple/i915_blit.h6
-rw-r--r--src/gallium/drivers/i915simple/i915_context.c2
-rw-r--r--src/gallium/drivers/i915simple/i915_debug.c18
-rw-r--r--src/gallium/drivers/i915simple/i915_debug.h7
-rw-r--r--src/gallium/drivers/i915simple/i915_debug_fp.c2
-rw-r--r--src/gallium/drivers/i915simple/i915_fpc_translate.c15
-rw-r--r--src/gallium/drivers/i915simple/i915_prim_vbuf.c65
-rw-r--r--src/gallium/drivers/i915simple/i915_screen.c94
-rw-r--r--src/gallium/drivers/i915simple/i915_screen.h19
-rw-r--r--src/gallium/drivers/i915simple/i915_state.c20
-rw-r--r--src/gallium/drivers/i915simple/i915_state_emit.c28
-rw-r--r--src/gallium/drivers/i915simple/i915_surface.c78
-rw-r--r--src/gallium/drivers/i915simple/i915_texture.c21
-rw-r--r--src/gallium/drivers/i965simple/Makefile2
-rw-r--r--src/gallium/drivers/i965simple/brw_blit.c2
-rw-r--r--src/gallium/drivers/i965simple/brw_cc.c14
-rw-r--r--src/gallium/drivers/i965simple/brw_context.c2
-rw-r--r--src/gallium/drivers/i965simple/brw_curbe.c8
-rw-r--r--src/gallium/drivers/i965simple/brw_draw.c2
-rw-r--r--src/gallium/drivers/i965simple/brw_draw_upload.c2
-rw-r--r--src/gallium/drivers/i965simple/brw_eu_debug.c2
-rw-r--r--src/gallium/drivers/i965simple/brw_misc_state.c4
-rw-r--r--src/gallium/drivers/i965simple/brw_screen.c4
-rw-r--r--src/gallium/drivers/i965simple/brw_state.c2
-rw-r--r--src/gallium/drivers/i965simple/brw_state_pool.c2
-rw-r--r--src/gallium/drivers/i965simple/brw_surface.c11
-rw-r--r--src/gallium/drivers/i965simple/brw_tex_layout.c20
-rw-r--r--src/gallium/drivers/i965simple/brw_vs_emit.c2
-rw-r--r--src/gallium/drivers/i965simple/brw_wm.c4
-rw-r--r--src/gallium/drivers/i965simple/brw_wm_surface_state.c3
-rw-r--r--src/gallium/drivers/nouveau/nouveau_gldefs.h196
-rw-r--r--src/gallium/drivers/nouveau/nouveau_push.h82
-rw-r--r--src/gallium/drivers/nouveau/nouveau_stateobj.h159
-rw-r--r--src/gallium/drivers/nouveau/nouveau_util.h91
-rw-r--r--src/gallium/drivers/nouveau/nouveau_winsys.h101
-rw-r--r--src/gallium/drivers/nv04/Makefile20
-rw-r--r--src/gallium/drivers/nv04/nv04_clear.c12
-rw-r--r--src/gallium/drivers/nv04/nv04_context.c107
-rw-r--r--src/gallium/drivers/nv04/nv04_context.h151
-rw-r--r--src/gallium/drivers/nv04/nv04_fragprog.c21
-rw-r--r--src/gallium/drivers/nv04/nv04_fragtex.c73
-rw-r--r--src/gallium/drivers/nv04/nv04_miptree.c177
-rw-r--r--src/gallium/drivers/nv04/nv04_prim_vbuf.c321
-rw-r--r--src/gallium/drivers/nv04/nv04_screen.c237
-rw-r--r--src/gallium/drivers/nv04/nv04_screen.h27
-rw-r--r--src/gallium/drivers/nv04/nv04_state.c458
-rw-r--r--src/gallium/drivers/nv04/nv04_state.h74
-rw-r--r--src/gallium/drivers/nv04/nv04_state_emit.c223
-rw-r--r--src/gallium/drivers/nv04/nv04_surface.c72
-rw-r--r--src/gallium/drivers/nv04/nv04_surface_2d.c448
-rw-r--r--src/gallium/drivers/nv04/nv04_surface_2d.h29
-rw-r--r--src/gallium/drivers/nv04/nv04_vbo.c78
-rw-r--r--src/gallium/drivers/nv10/Makefile19
-rw-r--r--src/gallium/drivers/nv10/nv10_clear.c12
-rw-r--r--src/gallium/drivers/nv10/nv10_context.c296
-rw-r--r--src/gallium/drivers/nv10/nv10_context.h153
-rw-r--r--src/gallium/drivers/nv10/nv10_fragprog.c21
-rw-r--r--src/gallium/drivers/nv10/nv10_fragtex.c124
-rw-r--r--src/gallium/drivers/nv10/nv10_miptree.c174
-rw-r--r--src/gallium/drivers/nv10/nv10_prim_vbuf.c265
-rw-r--r--src/gallium/drivers/nv10/nv10_screen.c226
-rw-r--r--src/gallium/drivers/nv10/nv10_screen.h24
-rw-r--r--src/gallium/drivers/nv10/nv10_state.c589
-rw-r--r--src/gallium/drivers/nv10/nv10_state.h139
-rw-r--r--src/gallium/drivers/nv10/nv10_state_emit.c306
-rw-r--r--src/gallium/drivers/nv10/nv10_surface.c72
-rw-r--r--src/gallium/drivers/nv10/nv10_vbo.c77
-rw-r--r--src/gallium/drivers/nv20/Makefile20
-rw-r--r--src/gallium/drivers/nv20/nv20_clear.c13
-rw-r--r--src/gallium/drivers/nv20/nv20_context.c419
-rw-r--r--src/gallium/drivers/nv20/nv20_context.h153
-rw-r--r--src/gallium/drivers/nv20/nv20_fragprog.c21
-rw-r--r--src/gallium/drivers/nv20/nv20_fragtex.c124
-rw-r--r--src/gallium/drivers/nv20/nv20_miptree.c206
-rw-r--r--src/gallium/drivers/nv20/nv20_prim_vbuf.c430
-rw-r--r--src/gallium/drivers/nv20/nv20_screen.c222
-rw-r--r--src/gallium/drivers/nv20/nv20_screen.h24
-rw-r--r--src/gallium/drivers/nv20/nv20_state.c582
-rw-r--r--src/gallium/drivers/nv20/nv20_state.h139
-rw-r--r--src/gallium/drivers/nv20/nv20_state_emit.c396
-rw-r--r--src/gallium/drivers/nv20/nv20_surface.c72
-rw-r--r--src/gallium/drivers/nv20/nv20_vbo.c78
-rw-r--r--src/gallium/drivers/nv20/nv20_vertprog.c838
-rw-r--r--src/gallium/drivers/nv30/Makefile28
-rw-r--r--src/gallium/drivers/nv30/nv30_clear.c13
-rw-r--r--src/gallium/drivers/nv30/nv30_context.c72
-rw-r--r--src/gallium/drivers/nv30/nv30_context.h212
-rw-r--r--src/gallium/drivers/nv30/nv30_draw.c61
-rw-r--r--src/gallium/drivers/nv30/nv30_fragprog.c911
-rw-r--r--src/gallium/drivers/nv30/nv30_fragtex.c163
-rw-r--r--src/gallium/drivers/nv30/nv30_miptree.c235
-rw-r--r--src/gallium/drivers/nv30/nv30_query.c122
-rw-r--r--src/gallium/drivers/nv30/nv30_screen.c401
-rw-r--r--src/gallium/drivers/nv30/nv30_screen.h37
-rw-r--r--src/gallium/drivers/nv30/nv30_shader.h490
-rw-r--r--src/gallium/drivers/nv30/nv30_state.c725
-rw-r--r--src/gallium/drivers/nv30/nv30_state.h88
-rw-r--r--src/gallium/drivers/nv30/nv30_state_blend.c40
-rw-r--r--src/gallium/drivers/nv30/nv30_state_emit.c118
-rw-r--r--src/gallium/drivers/nv30/nv30_state_fb.c144
-rw-r--r--src/gallium/drivers/nv30/nv30_state_rasterizer.c17
-rw-r--r--src/gallium/drivers/nv30/nv30_state_scissor.c35
-rw-r--r--src/gallium/drivers/nv30/nv30_state_stipple.c39
-rw-r--r--src/gallium/drivers/nv30/nv30_state_viewport.c70
-rw-r--r--src/gallium/drivers/nv30/nv30_state_zsa.c17
-rw-r--r--src/gallium/drivers/nv30/nv30_surface.c72
-rw-r--r--src/gallium/drivers/nv30/nv30_vbo.c556
-rw-r--r--src/gallium/drivers/nv30/nv30_vertprog.c838
-rw-r--r--src/gallium/drivers/nv40/Makefile28
-rw-r--r--src/gallium/drivers/nv40/nv40_clear.c13
-rw-r--r--src/gallium/drivers/nv40/nv40_context.c72
-rw-r--r--src/gallium/drivers/nv40/nv40_context.h233
-rw-r--r--src/gallium/drivers/nv40/nv40_draw.c349
-rw-r--r--src/gallium/drivers/nv40/nv40_fragprog.c991
-rw-r--r--src/gallium/drivers/nv40/nv40_fragtex.c168
-rw-r--r--src/gallium/drivers/nv40/nv40_miptree.c238
-rw-r--r--src/gallium/drivers/nv40/nv40_query.c122
-rw-r--r--src/gallium/drivers/nv40/nv40_screen.c383
-rw-r--r--src/gallium/drivers/nv40/nv40_screen.h37
-rw-r--r--src/gallium/drivers/nv40/nv40_shader.h556
-rw-r--r--src/gallium/drivers/nv40/nv40_state.c740
-rw-r--r--src/gallium/drivers/nv40/nv40_state.h91
-rw-r--r--src/gallium/drivers/nv40/nv40_state_blend.c40
-rw-r--r--src/gallium/drivers/nv40/nv40_state_emit.c184
-rw-r--r--src/gallium/drivers/nv40/nv40_state_fb.c162
-rw-r--r--src/gallium/drivers/nv40/nv40_state_rasterizer.c17
-rw-r--r--src/gallium/drivers/nv40/nv40_state_scissor.c35
-rw-r--r--src/gallium/drivers/nv40/nv40_state_stipple.c39
-rw-r--r--src/gallium/drivers/nv40/nv40_state_viewport.c67
-rw-r--r--src/gallium/drivers/nv40/nv40_state_zsa.c17
-rw-r--r--src/gallium/drivers/nv40/nv40_surface.c72
-rw-r--r--src/gallium/drivers/nv40/nv40_vbo.c555
-rw-r--r--src/gallium/drivers/nv40/nv40_vertprog.c1070
-rw-r--r--src/gallium/drivers/nv50/Makefile21
-rw-r--r--src/gallium/drivers/nv50/nv50_clear.c92
-rw-r--r--src/gallium/drivers/nv50/nv50_context.c90
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h201
-rw-r--r--src/gallium/drivers/nv50/nv50_draw.c89
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c207
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c1784
-rw-r--r--src/gallium/drivers/nv50/nv50_program.h45
-rw-r--r--src/gallium/drivers/nv50/nv50_query.c135
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c373
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.h35
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c664
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c313
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c208
-rw-r--r--src/gallium/drivers/nv50/nv50_tex.c156
-rw-r--r--src/gallium/drivers/nv50/nv50_texture.h129
-rw-r--r--src/gallium/drivers/nv50/nv50_transfer.c216
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c254
-rw-r--r--src/gallium/drivers/r300/Makefile20
-rw-r--r--src/gallium/drivers/r300/SConscript17
-rw-r--r--src/gallium/drivers/r300/r300_chipset.c348
-rw-r--r--src/gallium/drivers/r300/r300_chipset.h79
-rw-r--r--src/gallium/drivers/r300/r300_clear.c33
-rw-r--r--src/gallium/drivers/r300/r300_clear.h27
-rw-r--r--src/gallium/drivers/r300/r300_context.c141
-rw-r--r--src/gallium/drivers/r300/r300_context.h265
-rw-r--r--src/gallium/drivers/r300/r300_cs.h123
-rw-r--r--src/gallium/drivers/r300/r300_cs_inlines.h50
-rw-r--r--src/gallium/drivers/r300/r300_emit.c303
-rw-r--r--src/gallium/drivers/r300/r300_emit.h59
-rw-r--r--src/gallium/drivers/r300/r300_flush.c42
-rw-r--r--src/gallium/drivers/r300/r300_flush.h33
-rw-r--r--src/gallium/drivers/r300/r300_reg.h3263
-rw-r--r--src/gallium/drivers/r300/r300_screen.c371
-rw-r--r--src/gallium/drivers/r300/r300_screen.h67
-rw-r--r--src/gallium/drivers/r300/r300_state.c854
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c196
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.h33
-rw-r--r--src/gallium/drivers/r300/r300_state_inlines.h83
-rw-r--r--src/gallium/drivers/r300/r300_state_shader.c53
-rw-r--r--src/gallium/drivers/r300/r300_state_shader.h90
-rw-r--r--src/gallium/drivers/r300/r300_surface.c326
-rw-r--r--src/gallium/drivers/r300/r300_surface.h60
-rw-r--r--src/gallium/drivers/r300/r300_swtcl_emit.c327
-rw-r--r--src/gallium/drivers/r300/r300_texture.c187
-rw-r--r--src/gallium/drivers/r300/r300_texture.h34
-rw-r--r--src/gallium/drivers/r300/r300_winsys.h94
-rw-r--r--src/gallium/drivers/softpipe/Makefile5
-rw-r--r--src/gallium/drivers/softpipe/SConscript2
-rw-r--r--src/gallium/drivers/softpipe/sp_clear.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_context.c46
-rw-r--r--src/gallium/drivers/softpipe/sp_context.h42
-rw-r--r--src/gallium/drivers/softpipe/sp_draw_arrays.c18
-rw-r--r--src/gallium/drivers/softpipe/sp_flush.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_exec.c28
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_sse.c6
-rw-r--r--src/gallium/drivers/softpipe/sp_headers.h95
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_vbuf.c74
-rw-r--r--src/gallium/drivers/softpipe/sp_quad.h81
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_alpha_test.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_blend.c8
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_bufloop.c8
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_colormask.c6
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_coverage.c11
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_depth_test.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_earlyz.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_fs.c26
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_occlusion.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_output.c6
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_pipe.c (renamed from src/gallium/drivers/softpipe/sp_quad.c)0
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_pipe.h74
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_stencil.c8
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_stipple.c27
-rw-r--r--src/gallium/drivers/softpipe/sp_query.c6
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c8
-rw-r--r--src/gallium/drivers/softpipe/sp_setup.c40
-rw-r--r--src/gallium/drivers/softpipe/sp_state.h6
-rw-r--r--src/gallium/drivers/softpipe/sp_state_fs.c7
-rw-r--r--src/gallium/drivers/softpipe/sp_state_surface.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c991
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.h70
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.c174
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.h17
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.c182
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.h6
-rw-r--r--src/gallium/drivers/trace/Makefile14
-rw-r--r--src/gallium/drivers/trace/tr_context.c8
-rw-r--r--src/gallium/drivers/trace/tr_context.h2
-rw-r--r--src/gallium/drivers/trace/tr_dump.c2
-rw-r--r--src/gallium/drivers/trace/tr_screen.c131
-rw-r--r--src/gallium/drivers/trace/tr_state.c43
-rw-r--r--src/gallium/drivers/trace/tr_state.h2
-rw-r--r--src/gallium/drivers/trace/tr_texture.c41
-rw-r--r--src/gallium/drivers/trace/tr_texture.h27
-rw-r--r--src/gallium/drivers/trace/tr_winsys.c81
-rw-r--r--src/gallium/drivers/trace/tr_winsys.h4
-rw-r--r--src/gallium/include/pipe/internal/p_winsys_screen.h (renamed from src/gallium/include/pipe/p_winsys.h)39
-rw-r--r--src/gallium/include/pipe/p_compiler.h33
-rw-r--r--src/gallium/include/pipe/p_config.h13
-rw-r--r--src/gallium/include/pipe/p_context.h3
-rw-r--r--src/gallium/include/pipe/p_defines.h16
-rw-r--r--src/gallium/include/pipe/p_format.h38
-rw-r--r--src/gallium/include/pipe/p_inlines.h123
-rw-r--r--src/gallium/include/pipe/p_screen.h137
-rw-r--r--src/gallium/include/pipe/p_shader_tokens.h85
-rw-r--r--src/gallium/include/pipe/p_state.h60
-rw-r--r--src/gallium/include/pipe/p_thread.h2
-rw-r--r--src/gallium/include/state_tracker/drm_api.h33
-rw-r--r--src/gallium/state_trackers/Makefile25
-rw-r--r--src/gallium/state_trackers/egl/Makefile28
-rw-r--r--src/gallium/state_trackers/egl/egl_context.c193
-rw-r--r--src/gallium/state_trackers/egl/egl_surface.c409
-rw-r--r--src/gallium/state_trackers/egl/egl_tracker.c217
-rw-r--r--src/gallium/state_trackers/egl/egl_tracker.h191
-rw-r--r--src/gallium/state_trackers/egl/egl_visual.c85
-rw-r--r--src/gallium/state_trackers/g3dvl/Makefile21
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_basic_csc.c715
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_basic_csc.h13
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_context.c208
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_context.h73
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_csc.h53
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_defs.h11
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_display.c48
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_display.h29
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c1157
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h18
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc1185
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_render.h38
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_screen.c115
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_screen.h63
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_shader_build.c215
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_shader_build.h61
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_surface.c243
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_surface.h86
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_types.h115
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_util.c16
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_util.h6
-rw-r--r--src/gallium/state_trackers/glx/Makefile25
-rw-r--r--src/gallium/state_trackers/glx/dri/dri_context.c168
-rw-r--r--src/gallium/state_trackers/glx/dri/dri_context.h95
-rw-r--r--src/gallium/state_trackers/glx/dri/dri_drawable.c363
-rw-r--r--src/gallium/state_trackers/glx/dri/dri_drawable.h (renamed from src/gallium/winsys/drm/intel/egl/intel_reg.h)54
-rw-r--r--src/gallium/state_trackers/glx/dri/dri_extensions.c108
-rw-r--r--src/gallium/state_trackers/glx/dri/dri_lock.c (renamed from src/gallium/winsys/drm/intel/dri/intel_lock.c)76
-rw-r--r--src/gallium/state_trackers/glx/dri/dri_screen.c255
-rw-r--r--src/gallium/state_trackers/glx/dri/dri_screen.h (renamed from src/gallium/winsys/drm/intel/dri/intel_screen.h)69
-rw-r--r--src/gallium/state_trackers/glx/xlib/Makefile17
-rw-r--r--src/gallium/state_trackers/glx/xlib/SConscript25
-rw-r--r--src/gallium/state_trackers/glx/xlib/fakeglx.c (renamed from src/gallium/winsys/xlib/fakeglx.c)670
-rw-r--r--src/gallium/state_trackers/glx/xlib/fakeglx.h (renamed from src/gallium/winsys/xlib/xfonts.h)10
-rw-r--r--src/gallium/state_trackers/glx/xlib/fakeglx_fonts.c (renamed from src/gallium/winsys/xlib/xfonts.c)8
-rw-r--r--src/gallium/state_trackers/glx/xlib/glxapi.c (renamed from src/gallium/winsys/xlib/glxapi.c)152
-rw-r--r--src/gallium/state_trackers/glx/xlib/glxapi.h (renamed from src/gallium/winsys/xlib/glxapi.h)15
-rw-r--r--src/gallium/state_trackers/glx/xlib/xm_api.c (renamed from src/gallium/winsys/xlib/xm_api.c)382
-rw-r--r--src/gallium/state_trackers/glx/xlib/xm_api.h393
-rw-r--r--src/gallium/state_trackers/glx/xlib/xm_winsys.h (renamed from src/gallium/winsys/xlib/xm_winsys_aub.h)45
-rw-r--r--src/gallium/state_trackers/python/p_format.i12
-rw-r--r--src/gallium/state_trackers/python/st_device.c2
-rw-r--r--src/gallium/state_trackers/python/st_softpipe_winsys.c72
-rw-r--r--src/gallium/state_trackers/wgl/SConscript40
-rw-r--r--src/gallium/state_trackers/wgl/icd/stw_icd.c594
-rw-r--r--src/gallium/state_trackers/wgl/icd/stw_icd.h489
-rw-r--r--src/gallium/state_trackers/wgl/opengl32.def388
-rw-r--r--src/gallium/state_trackers/wgl/opengl32.mingw.def387
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_arbextensionsstring.c (renamed from src/gallium/winsys/drm/intel/egl/intel_device.h)42
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_arbextensionsstring.h35
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_arbpixelformat.c519
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_arbpixelformat.h61
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_context.c349
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_context.h (renamed from src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.h)23
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_device.c152
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_device.h (renamed from src/gallium/winsys/drm/intel/dri/intel_swapbuffers.h)40
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_framebuffer.c212
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_framebuffer.h (renamed from src/gallium/winsys/drm/intel/dri/intel_reg.h)68
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_getprocaddress.c71
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_pixelformat.c286
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_pixelformat.h83
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_public.h68
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_quirks.c113
-rw-r--r--src/gallium/state_trackers/wgl/shared/stw_winsys.h59
-rw-r--r--src/gallium/state_trackers/wgl/wgl/stw_wgl.c337
-rw-r--r--src/gallium/state_trackers/wgl/wgl/stw_wgl.h63
-rw-r--r--src/gallium/winsys/Makefile18
-rw-r--r--src/gallium/winsys/drm/Makefile36
-rw-r--r--src/gallium/winsys/drm/Makefile.template27
-rw-r--r--src/gallium/winsys/drm/intel/Makefile21
-rw-r--r--src/gallium/winsys/drm/intel/common/Makefile23
-rw-r--r--src/gallium/winsys/drm/intel/common/Makefile.template64
-rw-r--r--src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.c429
-rw-r--r--src/gallium/winsys/drm/intel/common/intel_be_context.c107
-rw-r--r--src/gallium/winsys/drm/intel/common/intel_be_context.h40
-rw-r--r--src/gallium/winsys/drm/intel/common/intel_be_device.c308
-rw-r--r--src/gallium/winsys/drm/intel/common/intel_be_device.h72
-rw-r--r--src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.c949
-rw-r--r--src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.h138
-rw-r--r--src/gallium/winsys/drm/intel/common/ws_dri_bufpool.h102
-rw-r--r--src/gallium/winsys/drm/intel/common/ws_dri_drmpool.c268
-rw-r--r--src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.c377
-rw-r--r--src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.h115
-rw-r--r--src/gallium/winsys/drm/intel/common/ws_dri_mallocpool.c161
-rw-r--r--src/gallium/winsys/drm/intel/common/ws_dri_slabpool.c968
-rw-r--r--src/gallium/winsys/drm/intel/dri/Makefile33
-rw-r--r--src/gallium/winsys/drm/intel/dri/SConscript41
-rw-r--r--src/gallium/winsys/drm/intel/dri/intel_batchbuffer.h24
-rw-r--r--src/gallium/winsys/drm/intel/dri/intel_context.c337
-rw-r--r--src/gallium/winsys/drm/intel/dri/intel_context.h164
-rw-r--r--src/gallium/winsys/drm/intel/dri/intel_screen.c607
-rw-r--r--src/gallium/winsys/drm/intel/dri/intel_swapbuffers.c260
-rw-r--r--src/gallium/winsys/drm/intel/dri/server/i830_common.h255
-rw-r--r--src/gallium/winsys/drm/intel/dri/server/i830_dri.h62
-rw-r--r--src/gallium/winsys/drm/intel/egl/Makefile16
-rw-r--r--src/gallium/winsys/drm/intel/egl/SConscript39
-rw-r--r--src/gallium/winsys/drm/intel/egl/intel_batchbuffer.h24
-rw-r--r--src/gallium/winsys/drm/intel/egl/intel_context.c242
-rw-r--r--src/gallium/winsys/drm/intel/egl/intel_context.h118
-rw-r--r--src/gallium/winsys/drm/intel/egl/intel_device.c137
-rw-r--r--src/gallium/winsys/drm/intel/egl/intel_egl.c796
-rw-r--r--src/gallium/winsys/drm/intel/egl/intel_egl.h53
-rw-r--r--src/gallium/winsys/drm/intel/egl/intel_swapbuffers.c111
-rw-r--r--src/gallium/winsys/drm/intel/gem/Makefile16
-rw-r--r--src/gallium/winsys/drm/intel/gem/intel_be_api.c12
-rw-r--r--src/gallium/winsys/drm/intel/gem/intel_be_api.h14
-rw-r--r--src/gallium/winsys/drm/intel/gem/intel_be_batchbuffer.c140
-rw-r--r--src/gallium/winsys/drm/intel/gem/intel_be_batchbuffer.h (renamed from src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.h)38
-rw-r--r--src/gallium/winsys/drm/intel/gem/intel_be_context.c118
-rw-r--r--src/gallium/winsys/drm/intel/gem/intel_be_context.h31
-rw-r--r--src/gallium/winsys/drm/intel/gem/intel_be_device.c283
-rw-r--r--src/gallium/winsys/drm/intel/gem/intel_be_device.h79
-rw-r--r--src/gallium/winsys/drm/intel/gem/intel_be_fence.h38
-rw-r--r--src/gallium/winsys/drm/nouveau/Makefile12
-rw-r--r--src/gallium/winsys/drm/nouveau/common/Makefile22
-rw-r--r--src/gallium/winsys/drm/nouveau/common/nouveau_context.c206
-rw-r--r--src/gallium/winsys/drm/nouveau/common/nouveau_context.h59
-rw-r--r--src/gallium/winsys/drm/nouveau/common/nouveau_dri.h28
-rw-r--r--src/gallium/winsys/drm/nouveau/common/nouveau_local.h19
-rw-r--r--src/gallium/winsys/drm/nouveau/common/nouveau_lock.c72
-rw-r--r--src/gallium/winsys/drm/nouveau/common/nouveau_screen.c31
-rw-r--r--src/gallium/winsys/drm/nouveau/common/nouveau_screen.h27
-rw-r--r--src/gallium/winsys/drm/nouveau/common/nouveau_winsys.c141
-rw-r--r--src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.c245
-rw-r--r--src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.h44
-rw-r--r--src/gallium/winsys/drm/nouveau/common/nouveau_winsys_softpipe.c101
-rw-r--r--src/gallium/winsys/drm/nouveau/dri/Makefile34
-rw-r--r--src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.c124
-rw-r--r--src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.h47
-rw-r--r--src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.c259
-rw-r--r--src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.h13
-rw-r--r--src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.c113
-rw-r--r--src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.h10
-rw-r--r--src/gallium/winsys/drm/radeon/Makefile32
-rw-r--r--src/gallium/winsys/drm/radeon/SConscript29
-rw-r--r--src/gallium/winsys/drm/radeon/radeon_buffer.c239
-rw-r--r--src/gallium/winsys/drm/radeon/radeon_buffer.h54
-rw-r--r--src/gallium/winsys/drm/radeon/radeon_context.c272
-rw-r--r--src/gallium/winsys/drm/radeon/radeon_context.h70
-rw-r--r--src/gallium/winsys/drm/radeon/radeon_r300.c96
-rw-r--r--src/gallium/winsys/drm/radeon/radeon_r300.h34
-rw-r--r--src/gallium/winsys/drm/radeon/radeon_screen.c288
-rw-r--r--src/gallium/winsys/drm/radeon/radeon_screen.h41
-rw-r--r--src/gallium/winsys/drm/radeon/radeon_winsys_softpipe.c (renamed from src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c)81
-rw-r--r--src/gallium/winsys/drm/radeon/radeon_winsys_softpipe.h37
-rw-r--r--src/gallium/winsys/egl_xlib/Makefile2
-rw-r--r--src/gallium/winsys/egl_xlib/egl_xlib.c18
-rw-r--r--src/gallium/winsys/egl_xlib/sw_winsys.c72
-rw-r--r--src/gallium/winsys/g3dvl/nouveau/Makefile50
-rw-r--r--src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.c172
-rw-r--r--src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.h39
-rw-r--r--src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.c88
-rw-r--r--src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.h20
-rw-r--r--src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c94
-rw-r--r--src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.h10
-rw-r--r--src/gallium/winsys/g3dvl/vl_winsys.h14
-rw-r--r--src/gallium/winsys/g3dvl/xsp_winsys.c295
-rw-r--r--src/gallium/winsys/gdi/SConscript18
-rw-r--r--src/gallium/winsys/gdi/gdi_softpipe_winsys.c84
-rw-r--r--src/gallium/winsys/xlib/Makefile41
-rw-r--r--src/gallium/winsys/xlib/SConscript43
-rw-r--r--src/gallium/winsys/xlib/glxheader.h62
-rw-r--r--src/gallium/winsys/xlib/realglx.c180
-rw-r--r--src/gallium/winsys/xlib/realglx.h326
-rw-r--r--src/gallium/winsys/xlib/xlib.c113
-rw-r--r--src/gallium/winsys/xlib/xlib.h14
-rw-r--r--src/gallium/winsys/xlib/xlib_brw.h30
-rw-r--r--src/gallium/winsys/xlib/xlib_brw_aub.c (renamed from src/gallium/winsys/xlib/brw_aub.c)16
-rw-r--r--src/gallium/winsys/xlib/xlib_brw_aub.h (renamed from src/gallium/winsys/xlib/brw_aub.h)0
-rw-r--r--src/gallium/winsys/xlib/xlib_brw_context.c209
-rw-r--r--src/gallium/winsys/xlib/xlib_brw_screen.c (renamed from src/gallium/winsys/xlib/xm_winsys_aub.c)334
-rw-r--r--src/gallium/winsys/xlib/xlib_cell.c437
-rw-r--r--src/gallium/winsys/xlib/xlib_softpipe.c (renamed from src/gallium/winsys/xlib/xm_winsys.c)417
-rw-r--r--src/gallium/winsys/xlib/xlib_trace.c107
-rw-r--r--src/gallium/winsys/xlib/xm_image.c133
-rw-r--r--src/gallium/winsys/xlib/xm_image.h77
-rw-r--r--src/gallium/winsys/xlib/xmesa.h424
-rw-r--r--src/gallium/winsys/xlib/xmesaP.h180
-rw-r--r--src/gallium/winsys/xlib/xmesa_x.h86
616 files changed, 74860 insertions, 15864 deletions
diff --git a/src/gallium/Makefile b/src/gallium/Makefile
index aa77021daf..79ca767f7e 100644
--- a/src/gallium/Makefile
+++ b/src/gallium/Makefile
@@ -1,25 +1,12 @@
+# src/gallium/Makefile
TOP = ../..
include $(TOP)/configs/current
+SUBDIRS = $(GALLIUM_DIRS)
-SUBDIRS = auxiliary drivers
-
-
-default: subdirs
-
-
-subdirs:
+default install clean:
@for dir in $(SUBDIRS) ; do \
if [ -d $$dir ] ; then \
- (cd $$dir && $(MAKE)) || exit 1 ; \
+ (cd $$dir && $(MAKE) $@) || exit 1; \
fi \
done
-
-
-clean:
- rm -f `find . -name \*.[oa]`
- rm -f `find . -name depend`
-
-
-# Dummy install target
-install:
diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template
index 4e462b5c97..98487d43bd 100644
--- a/src/gallium/Makefile.template
+++ b/src/gallium/Makefile.template
@@ -1,64 +1,66 @@
-# -*-makefile-*-
+# src/gallium/Makefile.template
-
-# We still have a dependency on the "dri" buffer manager. Most likely
-# the interface can be reused in non-dri environments, and also as a
-# frontend to simpler memory managers.
+# Template makefile for gallium libraries.
+#
+# Usage:
+# The minimum that the including makefile needs to define
+# is TOP, LIBNAME and one of of the *_SOURCES.
#
-COMMON_SOURCES =
+# Optional defines:
+# LIBRARY_INCLUDES are appended to the list of includes directories.
+# LIBRARY_DEFINES is not used for makedepend, but for compilation.
-OBJECTS = $(C_SOURCES:.c=.o) \
- $(CPP_SOURCES:.cpp=.o) \
- $(ASM_SOURCES:.S=.o)
+### Basic defines ###
+OBJECTS = $(C_SOURCES:.c=.o) \
+ $(CPP_SOURCES:.cpp=.o) \
+ $(ASM_SOURCES:.S=.o)
-### Include directories
INCLUDES = \
-I. \
-I$(TOP)/src/gallium/include \
-I$(TOP)/src/gallium/auxiliary \
-I$(TOP)/src/gallium/drivers \
- -I$(TOP)/include \
- $(DRIVER_INCLUDES)
-
-
-##### RULES #####
-
-.c.o:
- $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
-
-.cpp.o:
- $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@
-
-.S.o:
- $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
+ -I$(GALLIUM)/src/gallium/include \
+ -I$(GALLIUM)/src/gallium/auxiliary \
+ -I$(GALLIUM)/src/gallium/drivers \
+ $(LIBRARY_INCLUDES)
##### TARGETS #####
-default: depend symlinks $(LIBNAME)
-
-
-$(LIBNAME): $(OBJECTS) Makefile $(TOP)/src/gallium/Makefile.template
- $(TOP)/bin/mklib -o $@ -static $(OBJECTS) $(DRIVER_LIBS)
+default: depend lib$(LIBNAME).a
+lib$(LIBNAME).a: $(OBJECTS) Makefile $(TOP)/src/gallium/Makefile.template
+ $(MKLIB) -o $(LIBNAME) -static $(OBJECTS)
depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS)
rm -f depend
touch depend
- $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \
- $(ASM_SOURCES) 2> /dev/null
-
+ $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) 2> /dev/null
# Emacs tags
tags:
etags `find . -name \*.[ch]` `find ../include`
-
# Remove .o and backup files
-clean::
- -rm -f *.o */*.o *~ *.so *~ server/*.o $(SYMLINKS)
- -rm -f depend depend.bak
+clean:
+ rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak
+
+# Dummy target
+install:
+ @echo -n ""
+
+##### RULES #####
+
+.c.o:
+ $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+.cpp.o:
+ $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+.S.o:
+ $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
-include depend
+sinclude depend
diff --git a/src/gallium/SConscript b/src/gallium/SConscript
index 6a3e7e77ed..0c632ac2b8 100644
--- a/src/gallium/SConscript
+++ b/src/gallium/SConscript
@@ -21,9 +21,14 @@ SConscript([
'auxiliary/translate/SConscript',
'auxiliary/draw/SConscript',
'auxiliary/pipebuffer/SConscript',
+ 'auxiliary/indices/SConscript',
])
for driver in env['drivers']:
SConscript(os.path.join('drivers', driver, 'SConscript'))
SConscript('state_trackers/python/SConscript')
+SConscript('state_trackers/glx/xlib/SConscript')
+
+if platform == 'windows':
+ SConscript('state_trackers/wgl/SConscript')
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index eaa0f2fe4e..5446eb68a9 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -1,20 +1,12 @@
+# src/gallium/auxiliary/Makefile
TOP = ../../..
include $(TOP)/configs/current
-
SUBDIRS = $(GALLIUM_AUXILIARY_DIRS)
-
-default: subdirs
-
-
-subdirs:
+default install clean:
@for dir in $(SUBDIRS) ; do \
if [ -d $$dir ] ; then \
- (cd $$dir && $(MAKE)) || exit 1 ; \
+ (cd $$dir && $(MAKE) $@) || exit 1; \
fi \
done
-
-
-clean:
- rm -f `find . -name \*.[oa]`
diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile
index 6bd6602088..8726afcd94 100644
--- a/src/gallium/auxiliary/cso_cache/Makefile
+++ b/src/gallium/auxiliary/cso_cache/Makefile
@@ -9,6 +9,3 @@ C_SOURCES = \
cso_hash.c
include ../../Makefile.template
-
-symlinks:
-
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c
index 6b1754ea00..0bc77a5728 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -28,7 +28,7 @@
/* Authors: Zack Rusin <zack@tungstengraphics.com>
*/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_memory.h"
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 68508f24de..a9157aad71 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -783,7 +783,7 @@ copy_framebuffer_state(struct pipe_framebuffer_state *dst,
dst->width = src->width;
dst->height = src->height;
- dst->num_cbufs = src->num_cbufs;
+ dst->nr_cbufs = src->nr_cbufs;
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]);
}
diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c
index 7f0044c5a7..288cef7b6f 100644
--- a/src/gallium/auxiliary/cso_cache/cso_hash.c
+++ b/src/gallium/auxiliary/cso_cache/cso_hash.c
@@ -30,7 +30,7 @@
* Zack Rusin <zack@tungstengraphics.com>
*/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_memory.h"
#include "cso_hash.h"
@@ -431,3 +431,9 @@ struct cso_hash_iter cso_hash_erase(struct cso_hash *hash, struct cso_hash_iter
--hash->data.d->size;
return ret;
}
+
+boolean cso_hash_contains(struct cso_hash *hash, unsigned key)
+{
+ struct cso_node **node = cso_hash_find_node(hash, key);
+ return (*node != hash->data.e);
+}
diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h
index 85f3e276c6..5891c325fa 100644
--- a/src/gallium/auxiliary/cso_cache/cso_hash.h
+++ b/src/gallium/auxiliary/cso_cache/cso_hash.h
@@ -44,6 +44,7 @@
#ifndef CSO_HASH_H
#define CSO_HASH_H
+#include "pipe/p_compiler.h"
#ifdef __cplusplus
extern "C" {
@@ -95,6 +96,11 @@ struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash);
*/
struct cso_hash_iter cso_hash_find(struct cso_hash *hash, unsigned key);
+/**
+ * Returns true if a value with the given key exists in the hash
+ */
+boolean cso_hash_contains(struct cso_hash *hash, unsigned key);
+
int cso_hash_iter_is_null(struct cso_hash_iter iter);
unsigned cso_hash_iter_key(struct cso_hash_iter iter);
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index f2e36a89e9..5041dcc072 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -40,10 +40,7 @@ C_SOURCES = \
draw_vs_aos_machine.c \
draw_vs_exec.c \
draw_vs_llvm.c \
+ draw_vs_ppc.c \
draw_vs_sse.c
-
include ../../Makefile.template
-
-symlinks:
-
diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript
index 544a04918b..5f05aa324a 100644
--- a/src/gallium/auxiliary/draw/SConscript
+++ b/src/gallium/auxiliary/draw/SConscript
@@ -38,6 +38,7 @@ draw = env.ConvenienceLibrary(
'draw_vs_aos_machine.c',
'draw_vs_exec.c',
'draw_vs_llvm.c',
+ 'draw_vs_ppc.c',
'draw_vs_sse.c',
'draw_vs_varient.c'
])
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index fab8fc95fc..581532c1c9 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -138,8 +138,9 @@ void draw_set_driver_clipping( struct draw_context *draw,
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
draw->driver.bypass_clipping = bypass_clipping;
- draw->bypass_clipping = (draw->rasterizer->bypass_clipping ||
- draw->driver.bypass_clipping);
+ draw->bypass_clipping =
+ ((draw->rasterizer && draw->rasterizer->bypass_clipping) ||
+ draw->driver.bypass_clipping);
}
@@ -343,6 +344,21 @@ draw_num_vs_outputs(const struct draw_context *draw)
}
+/**
+ * Provide TGSI sampler objects for vertex shaders that use texture fetches.
+ * This might only be used by software drivers for the time being.
+ */
+void
+draw_texture_samplers(struct draw_context *draw,
+ uint num_samplers,
+ struct tgsi_sampler **samplers)
+{
+ draw->vs.num_samplers = num_samplers;
+ draw->vs.samplers = samplers;
+}
+
+
+
void draw_set_render( struct draw_context *draw,
struct vbuf_render *render )
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index a29bb01d81..d529e4e9a2 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -45,7 +45,7 @@ struct pipe_context;
struct draw_context;
struct draw_stage;
struct draw_vertex_shader;
-
+struct tgsi_sampler;
struct draw_context *draw_create( void );
@@ -92,6 +92,12 @@ uint
draw_num_vs_outputs(const struct draw_context *draw);
+void
+draw_texture_samplers(struct draw_context *draw,
+ uint num_samplers,
+ struct tgsi_sampler **samplers);
+
+
/*
* Vertex shader functions
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 20841bb5d6..80c9c918a9 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -410,7 +410,7 @@ aaline_create_texture(struct aaline_stage *aaline)
* texels which are zero. Special case the 1x1 and 2x2 levels.
*/
for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) {
- struct pipe_surface *surface;
+ struct pipe_transfer *transfer;
const uint size = aaline->texture->width[level];
ubyte *data;
uint i, j;
@@ -419,9 +419,9 @@ aaline_create_texture(struct aaline_stage *aaline)
/* This texture is new, no need to flush.
*/
- surface = screen->get_tex_surface(screen, aaline->texture, 0, level, 0,
- PIPE_BUFFER_USAGE_CPU_WRITE);
- data = screen->surface_map(screen, surface, PIPE_BUFFER_USAGE_CPU_WRITE);
+ transfer = screen->get_tex_transfer(screen, aaline->texture, 0, level, 0,
+ PIPE_TRANSFER_WRITE, 0, 0, size, size);
+ data = screen->transfer_map(screen, transfer);
if (data == NULL)
return FALSE;
@@ -440,13 +440,13 @@ aaline_create_texture(struct aaline_stage *aaline)
else {
d = 255;
}
- data[i * surface->stride + j] = d;
+ data[i * transfer->stride + j] = d;
}
}
/* unmap */
- screen->surface_unmap(screen, surface);
- screen->tex_surface_release(screen, &surface);
+ screen->transfer_unmap(screen, transfer);
+ screen->tex_transfer_release(screen, &transfer);
}
return TRUE;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index b764d9c518..e68c824c86 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -256,7 +256,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
struct tgsi_full_immediate immed;
uint size = 4;
immed = tgsi_default_full_immediate();
- immed.Immediate.Size = 1 + size; /* one for the token itself */
+ immed.Immediate.NrTokens = 1 + size; /* one for the token itself */
immed.u.Pointer = (void *) value;
ctx->emit_immediate(ctx, &immed);
}
@@ -372,7 +372,7 @@ pstip_update_texture(struct pstip_stage *pstip)
static const uint bit31 = 1 << 31;
struct pipe_context *pipe = pstip->pipe;
struct pipe_screen *screen = pipe->screen;
- struct pipe_surface *surface;
+ struct pipe_transfer *transfer;
const uint *stipple = pstip->state.stipple->stipple;
uint i, j;
ubyte *data;
@@ -381,10 +381,9 @@ pstip_update_texture(struct pstip_stage *pstip)
*/
pipe->flush( pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL );
- surface = screen->get_tex_surface(screen, pstip->texture, 0, 0, 0,
- PIPE_BUFFER_USAGE_CPU_WRITE);
- data = screen->surface_map(screen, surface,
- PIPE_BUFFER_USAGE_CPU_WRITE);
+ transfer = screen->get_tex_transfer(screen, pstip->texture, 0, 0, 0,
+ PIPE_TRANSFER_WRITE, 0, 0, 32, 32);
+ data = screen->transfer_map(screen, transfer);
/*
* Load alpha texture.
@@ -396,18 +395,18 @@ pstip_update_texture(struct pstip_stage *pstip)
for (j = 0; j < 32; j++) {
if (stipple[i] & (bit31 >> j)) {
/* fragment "on" */
- data[i * surface->stride + j] = 0;
+ data[i * transfer->stride + j] = 0;
}
else {
/* fragment "off" */
- data[i * surface->stride + j] = 255;
+ data[i * transfer->stride + j] = 255;
}
}
}
/* unmap */
- screen->surface_unmap(screen, surface);
- screen->tex_surface_release(screen, &surface);
+ screen->transfer_unmap(screen, transfer);
+ screen->tex_transfer_release(screen, &transfer);
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index 9825e116c3..12325d30d6 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -29,12 +29,12 @@
* \file
* Vertex buffer drawing stage.
*
- * \author José Fonseca <jrfonsec@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonsec@tungstengraphics.com>
* \author Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -93,7 +93,6 @@ vbuf_stage( struct draw_stage *stage )
}
-static void vbuf_flush_indices( struct vbuf_stage *vbuf );
static void vbuf_flush_vertices( struct vbuf_stage *vbuf );
static void vbuf_alloc_vertices( struct vbuf_stage *vbuf );
@@ -109,13 +108,12 @@ overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz )
static INLINE void
check_space( struct vbuf_stage *vbuf, unsigned nr )
{
- if (vbuf->nr_vertices + nr > vbuf->max_vertices ) {
- vbuf_flush_vertices(vbuf);
- vbuf_alloc_vertices(vbuf);
+ if (vbuf->nr_vertices + nr > vbuf->max_vertices ||
+ vbuf->nr_indices + nr > vbuf->max_indices)
+ {
+ vbuf_flush_vertices( vbuf );
+ vbuf_alloc_vertices( vbuf );
}
-
- if (vbuf->nr_indices + nr > vbuf->max_indices )
- vbuf_flush_indices(vbuf);
}
@@ -202,7 +200,7 @@ vbuf_point( struct draw_stage *stage,
* will be flushed if needed and a new one allocated.
*/
static void
-vbuf_set_prim( struct vbuf_stage *vbuf, uint prim )
+vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
{
struct translate_key hw_key;
unsigned dst_offset;
@@ -217,11 +215,7 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim )
* state change.
*/
vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render);
-
- if (vbuf->vertex_size != vbuf->vinfo->size * sizeof(float)) {
- vbuf_flush_vertices(vbuf);
- vbuf->vertex_size = vbuf->vinfo->size * sizeof(float);
- }
+ vbuf->vertex_size = vbuf->vinfo->size * sizeof(float);
/* Translate from pipeline vertices to hw vertices.
*/
@@ -294,8 +288,8 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim )
/* Allocate new buffer?
*/
- if (!vbuf->vertices)
- vbuf_alloc_vertices(vbuf);
+ assert(vbuf->vertices == NULL);
+ vbuf_alloc_vertices(vbuf);
}
@@ -305,9 +299,9 @@ vbuf_first_tri( struct draw_stage *stage,
{
struct vbuf_stage *vbuf = vbuf_stage( stage );
- vbuf_flush_indices( vbuf );
+ vbuf_flush_vertices( vbuf );
+ vbuf_start_prim(vbuf, PIPE_PRIM_TRIANGLES);
stage->tri = vbuf_tri;
- vbuf_set_prim(vbuf, PIPE_PRIM_TRIANGLES);
stage->tri( stage, prim );
}
@@ -318,9 +312,9 @@ vbuf_first_line( struct draw_stage *stage,
{
struct vbuf_stage *vbuf = vbuf_stage( stage );
- vbuf_flush_indices( vbuf );
+ vbuf_flush_vertices( vbuf );
+ vbuf_start_prim(vbuf, PIPE_PRIM_LINES);
stage->line = vbuf_line;
- vbuf_set_prim(vbuf, PIPE_PRIM_LINES);
stage->line( stage, prim );
}
@@ -331,53 +325,42 @@ vbuf_first_point( struct draw_stage *stage,
{
struct vbuf_stage *vbuf = vbuf_stage( stage );
- vbuf_flush_indices( vbuf );
+ vbuf_flush_vertices(vbuf);
+ vbuf_start_prim(vbuf, PIPE_PRIM_POINTS);
stage->point = vbuf_point;
- vbuf_set_prim(vbuf, PIPE_PRIM_POINTS);
stage->point( stage, prim );
}
-static void
-vbuf_flush_indices( struct vbuf_stage *vbuf )
-{
- if(!vbuf->nr_indices)
- return;
-
- assert((uint) (vbuf->vertex_ptr - vbuf->vertices) ==
- vbuf->nr_vertices * vbuf->vertex_size / sizeof(unsigned));
-
- vbuf->render->draw(vbuf->render, vbuf->indices, vbuf->nr_indices);
-
- vbuf->nr_indices = 0;
-}
-
/**
* Flush existing vertex buffer and allocate a new one.
- *
- * XXX: We separate flush-on-index-full and flush-on-vb-full, but may
- * raise issues uploading vertices if the hardware wants to flush when
- * we flush.
*/
static void
vbuf_flush_vertices( struct vbuf_stage *vbuf )
{
- if(vbuf->vertices) {
- vbuf_flush_indices(vbuf);
-
+ if(vbuf->vertices) {
+
+ vbuf->render->unmap_vertices( vbuf->render, 0, vbuf->nr_vertices - 1 );
+
+ if (vbuf->nr_indices)
+ {
+ vbuf->render->draw(vbuf->render,
+ vbuf->indices,
+ vbuf->nr_indices );
+
+ vbuf->nr_indices = 0;
+ }
+
/* Reset temporary vertices ids */
if(vbuf->nr_vertices)
draw_reset_vertex_ids( vbuf->stage.draw );
/* Free the vertex buffer */
- vbuf->render->release_vertices(vbuf->render,
- vbuf->vertices,
- vbuf->vertex_size,
- vbuf->nr_vertices);
+ vbuf->render->release_vertices( vbuf->render );
+
vbuf->max_vertices = vbuf->nr_vertices = 0;
vbuf->vertex_ptr = vbuf->vertices = NULL;
-
}
}
@@ -394,14 +377,20 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf )
/* even number */
vbuf->max_vertices = vbuf->max_vertices & ~1;
+ if(vbuf->max_vertices >= UNDEFINED_VERTEX_ID)
+ vbuf->max_vertices = UNDEFINED_VERTEX_ID - 1;
+
/* Must always succeed -- driver gives us a
* 'max_vertex_buffer_bytes' which it guarantees it can allocate,
* and it will flush itself if necessary to do so. If this does
* fail, we are basically without usable hardware.
*/
- vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render,
- (ushort) vbuf->vertex_size,
- (ushort) vbuf->max_vertices);
+ vbuf->render->allocate_vertices(vbuf->render,
+ (ushort) vbuf->vertex_size,
+ (ushort) vbuf->max_vertices);
+
+ vbuf->vertices = (uint *) vbuf->render->map_vertices( vbuf->render );
+
vbuf->vertex_ptr = vbuf->vertices;
}
@@ -412,14 +401,11 @@ vbuf_flush( struct draw_stage *stage, unsigned flags )
{
struct vbuf_stage *vbuf = vbuf_stage( stage );
- vbuf_flush_indices( vbuf );
+ vbuf_flush_vertices( vbuf );
stage->point = vbuf_first_point;
stage->line = vbuf_first_line;
stage->tri = vbuf_first_tri;
-
- if (flags & DRAW_FLUSH_BACKEND)
- vbuf_flush_vertices( vbuf );
}
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index a16b45d340..81e4eae401 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -187,6 +187,9 @@ struct draw_context
/** TGSI program interpreter runtime state */
struct tgsi_exec_machine machine;
+ uint num_samplers;
+ struct tgsi_sampler **samplers;
+
/* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private.
*/
struct gallivm_cpu_engine *engine;
@@ -198,7 +201,7 @@ struct draw_context
const float (*aligned_constants)[4];
- float (*aligned_constant_storage)[4];
+ const float (*aligned_constant_storage)[4];
unsigned const_storage_size;
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 18f24e5980..4e5ffa0930 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -228,7 +228,7 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
for (j = 0; j < draw->pt.nr_vertex_elements; j++) {
uint buf = draw->pt.vertex_element[j].vertex_buffer_index;
ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf];
- ptr += draw->pt.vertex_buffer[buf].pitch * ii;
+ ptr += draw->pt.vertex_buffer[buf].stride * ii;
ptr += draw->pt.vertex_element[j].src_offset;
debug_printf(" Attr %u: ", j);
@@ -301,8 +301,8 @@ draw_arrays(struct draw_context *draw, unsigned prim,
}
debug_printf("Buffers:\n");
for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
- debug_printf(" pitch=%u offset=%u ptr=%p\n",
- draw->pt.vertex_buffer[i].pitch,
+ debug_printf(" stride=%u offset=%u ptr=%p\n",
+ draw->pt.vertex_buffer[i].stride,
draw->pt.vertex_buffer[i].buffer_offset,
draw->pt.user.vbuffer[i]);
}
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index c02f229110..aecaeee5b9 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -173,9 +173,7 @@ void draw_pt_emit( struct pt_emit *emit,
void draw_pt_emit_linear( struct pt_emit *emit,
const float (*vertex_data)[4],
- unsigned vertex_count,
unsigned stride,
- unsigned start,
unsigned count );
void draw_pt_emit_destroy( struct pt_emit *emit );
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index d520b05869..064e16c295 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -165,6 +165,14 @@ void draw_pt_emit( struct pt_emit *emit,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+ if (vertex_count == 0)
+ return;
+
+ if (vertex_count >= UNDEFINED_VERTEX_ID) {
+ assert(0);
+ return;
+ }
+
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
@@ -173,9 +181,11 @@ void draw_pt_emit( struct pt_emit *emit,
return;
}
- hw_verts = render->allocate_vertices(render,
- (ushort)translate->key.output_stride,
- (ushort)vertex_count);
+ render->allocate_vertices(render,
+ (ushort)translate->key.output_stride,
+ (ushort)vertex_count);
+
+ hw_verts = render->map_vertices( render );
if (!hw_verts) {
assert(0);
return;
@@ -196,22 +206,21 @@ void draw_pt_emit( struct pt_emit *emit,
vertex_count,
hw_verts );
+ render->unmap_vertices( render,
+ 0,
+ vertex_count - 1 );
+
render->draw(render,
elts,
count);
- render->release_vertices(render,
- hw_verts,
- translate->key.output_stride,
- vertex_count);
+ render->release_vertices(render);
}
void draw_pt_emit_linear(struct pt_emit *emit,
const float (*vertex_data)[4],
- unsigned vertex_count,
unsigned stride,
- unsigned start,
unsigned count)
{
struct draw_context *draw = emit->draw;
@@ -226,21 +235,23 @@ void draw_pt_emit_linear(struct pt_emit *emit,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+ if (count >= UNDEFINED_VERTEX_ID)
+ goto fail;
+
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
- if (!draw->render->set_primitive(draw->render, emit->prim)) {
- assert(0);
- return;
- }
+ if (!draw->render->set_primitive(draw->render, emit->prim))
+ goto fail;
- hw_verts = render->allocate_vertices(render,
- (ushort)translate->key.output_stride,
- (ushort)count);
- if (!hw_verts) {
- assert(0);
- return;
- }
+ if (!render->allocate_vertices(render,
+ (ushort)translate->key.output_stride,
+ (ushort)count))
+ goto fail;
+
+ hw_verts = render->map_vertices( render );
+ if (!hw_verts)
+ goto fail;
translate->set_buffer(translate, 0,
vertex_data, stride);
@@ -251,12 +262,12 @@ void draw_pt_emit_linear(struct pt_emit *emit,
translate->run(translate,
0,
- vertex_count,
+ count,
hw_verts);
if (0) {
unsigned i;
- for (i = 0; i < vertex_count; i++) {
+ for (i = 0; i < count; i++) {
debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i);
draw_dump_emitted_vertex( emit->vinfo,
(const uint8_t *)hw_verts +
@@ -264,13 +275,17 @@ void draw_pt_emit_linear(struct pt_emit *emit,
}
}
+ render->unmap_vertices( render, 0, count - 1 );
+
+ render->draw_arrays(render, 0, count);
+
+ render->release_vertices(render);
- render->draw_arrays(render, start, count);
+ return;
- render->release_vertices(render,
- hw_verts,
- translate->key.output_stride,
- vertex_count);
+fail:
+ assert(0);
+ return;
}
struct pt_emit *draw_pt_emit_create( struct draw_context *draw )
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index 6377f896fb..058caf7dcc 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -144,7 +144,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch,
i,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].pitch );
+ draw->pt.vertex_buffer[i].stride );
}
translate->run_elts( translate,
@@ -180,7 +180,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
i,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].pitch );
+ draw->pt.vertex_buffer[i].stride );
}
translate->run( translate,
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 3966ad48ba..6b7d02a19b 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -195,7 +195,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
i,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].pitch );
+ draw->pt.vertex_buffer[i].stride );
}
*max_vertices = (draw->render->max_vertex_buffer_bytes /
@@ -229,9 +229,16 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)feme->translate->key.output_stride,
- (ushort)fetch_count );
+ if (fetch_count >= UNDEFINED_VERTEX_ID) {
+ assert(0);
+ return;
+ }
+
+ draw->render->allocate_vertices( draw->render,
+ (ushort)feme->translate->key.output_stride,
+ (ushort)fetch_count );
+
+ hw_verts = draw->render->map_vertices( draw->render );
if (!hw_verts) {
assert(0);
return;
@@ -254,6 +261,10 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
}
}
+ draw->render->unmap_vertices( draw->render,
+ 0,
+ (ushort)(fetch_count - 1) );
+
/* XXX: Draw arrays path to avoid re-emitting index list again and
* again.
*/
@@ -263,10 +274,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
/* Done -- that was easy, wasn't it:
*/
- draw->render->release_vertices( draw->render,
- hw_verts,
- feme->translate->key.output_stride,
- fetch_count );
+ draw->render->release_vertices( draw->render );
}
@@ -283,13 +291,17 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)feme->translate->key.output_stride,
- (ushort)count );
- if (!hw_verts) {
- assert(0);
- return;
- }
+ if (count >= UNDEFINED_VERTEX_ID)
+ goto fail;
+
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)feme->translate->key.output_stride,
+ (ushort)count ))
+ goto fail;
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts)
+ goto fail;
/* Single routine to fetch vertices and emit HW verts.
*/
@@ -307,20 +319,21 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
}
}
+ draw->render->unmap_vertices( draw->render, 0, count - 1 );
+
/* XXX: Draw arrays path to avoid re-emitting index list again and
* again.
*/
- draw->render->draw_arrays( draw->render,
- 0, /*start*/
- count );
+ draw->render->draw_arrays( draw->render, 0, count );
/* Done -- that was easy, wasn't it:
*/
- draw->render->release_vertices( draw->render,
- hw_verts,
- feme->translate->key.output_stride,
- count );
+ draw->render->release_vertices( draw->render );
+ return;
+fail:
+ assert(0);
+ return;
}
@@ -338,9 +351,15 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)feme->translate->key.output_stride,
- (ushort)count );
+ if (count >= UNDEFINED_VERTEX_ID)
+ return FALSE;
+
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)feme->translate->key.output_stride,
+ (ushort)count ))
+ return FALSE;
+
+ hw_verts = draw->render->map_vertices( draw->render );
if (!hw_verts)
return FALSE;
@@ -351,6 +370,8 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
count,
hw_verts );
+ draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
+
/* XXX: Draw arrays path to avoid re-emitting index list again and
* again.
*/
@@ -360,10 +381,7 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
/* Done -- that was easy, wasn't it:
*/
- draw->render->release_vertices( draw->render,
- hw_verts,
- feme->translate->key.output_stride,
- count );
+ draw->render->release_vertices( draw->render );
return TRUE;
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index f7e6a1a8ee..cd9cd4b53f 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -121,7 +121,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
}
for (i = 0; i < 5 && i < nr_vbs; i++) {
- if (draw->pt.vertex_buffer[i].pitch == 0)
+ if (draw->pt.vertex_buffer[i].stride == 0)
fse->key.const_vbuffers |= (1<<i);
}
@@ -189,7 +189,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
i,
((const ubyte *) draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].pitch );
+ draw->pt.vertex_buffer[i].stride );
}
*max_vertices = (draw->render->max_vertex_buffer_bytes /
@@ -234,14 +234,17 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)fse->key.output_stride,
- (ushort)count );
+ if (count >= UNDEFINED_VERTEX_ID)
+ goto fail;
- if (!hw_verts) {
- assert(0);
- return;
- }
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)count ))
+ goto fail;
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts)
+ goto fail;
/* Single routine to fetch vertices, run shader and emit HW verts.
* Clipping is done elsewhere -- either by the API or on hardware,
@@ -251,13 +254,7 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
start, count,
hw_verts );
- /* Draw arrays path to avoid re-emitting index list again and
- * again.
- */
- draw->render->draw_arrays( draw->render,
- 0,
- count );
-
+
if (0) {
unsigned i;
for (i = 0; i < count; i++) {
@@ -269,12 +266,24 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
(const uint8_t *)hw_verts + fse->key.output_stride * i );
}
}
+
+ draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
+ /* Draw arrays path to avoid re-emitting index list again and
+ * again.
+ */
+ draw->render->draw_arrays( draw->render,
+ 0,
+ count );
+
- draw->render->release_vertices( draw->render,
- hw_verts,
- fse->key.output_stride,
- count );
+ draw->render->release_vertices( draw->render );
+
+ return;
+
+fail:
+ assert(0);
+ return;
}
@@ -293,13 +302,17 @@ fse_run(struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)fse->key.output_stride,
- (ushort)fetch_count );
- if (!hw_verts) {
- assert(0);
- return;
- }
+ if (fetch_count >= UNDEFINED_VERTEX_ID)
+ goto fail;
+
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)fetch_count ))
+ goto fail;
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts)
+ goto fail;
/* Single routine to fetch vertices, run shader and emit HW verts.
@@ -309,9 +322,6 @@ fse_run(struct draw_pt_middle_end *middle,
fetch_count,
hw_verts );
- draw->render->draw( draw->render,
- draw_elts,
- draw_count );
if (0) {
unsigned i;
@@ -323,12 +333,19 @@ fse_run(struct draw_pt_middle_end *middle,
}
}
+ draw->render->unmap_vertices( draw->render, 0, (ushort)(fetch_count - 1) );
+
+ draw->render->draw( draw->render,
+ draw_elts,
+ draw_count );
+
- draw->render->release_vertices( draw->render,
- hw_verts,
- fse->key.output_stride,
- fetch_count );
+ draw->render->release_vertices( draw->render );
+ return;
+fail:
+ assert(0);
+ return;
}
@@ -347,13 +364,17 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)fse->key.output_stride,
- (ushort)count );
+ if (count >= UNDEFINED_VERTEX_ID)
+ return FALSE;
- if (!hw_verts) {
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)count ))
+ return FALSE;
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts)
return FALSE;
- }
/* Single routine to fetch vertices, run shader and emit HW verts.
* Clipping is done elsewhere -- either by the API or on hardware,
@@ -369,11 +390,9 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
draw_count );
+ draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
- draw->render->release_vertices( draw->render,
- hw_verts,
- fse->key.output_stride,
- count );
+ draw->render->release_vertices( draw->render );
return TRUE;
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index ec3b41c320..38f9b604d3 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -251,9 +251,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
else {
draw_pt_emit_linear( fpme->emit,
(const float (*)[4])pipeline_verts->data,
- count,
fpme->vertex_size,
- 0, /*start*/
count );
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index c15afe65f1..d0e16c9bc3 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -67,7 +67,7 @@ static void varray_line_loop_segment(struct varray_frontend *varray,
unsigned segment_count,
boolean end )
{
- assert(segment_count+1 < varray->fetch_max);
+ assert(segment_count < varray->fetch_max);
if (segment_count >= 1) {
unsigned nr = 0, i;
@@ -77,7 +77,7 @@ static void varray_line_loop_segment(struct varray_frontend *varray,
if (end)
varray->fetch_elts[nr++] = start;
- assert(nr < FETCH_MAX);
+ assert(nr <= FETCH_MAX);
varray->middle->run(varray->middle,
varray->fetch_elts,
@@ -94,7 +94,7 @@ static void varray_fan_segment(struct varray_frontend *varray,
unsigned segment_start,
unsigned segment_count )
{
- assert(segment_count+1 < varray->fetch_max);
+ assert(segment_count < varray->fetch_max);
if (segment_count >= 2) {
unsigned nr = 0, i;
@@ -104,7 +104,7 @@ static void varray_fan_segment(struct varray_frontend *varray,
for (i = 0 ; i < segment_count; i++)
varray->fetch_elts[nr++] = start + segment_start + i;
- assert(nr < FETCH_MAX);
+ assert(nr <= FETCH_MAX);
varray->middle->run(varray->middle,
varray->fetch_elts,
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index 80d7200ca6..5d268a2226 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -324,7 +324,7 @@ vcache_check_run( struct draw_pt_front_end *frontend,
unsigned fetch_count = max_index + 1 - min_index;
const ushort *transformed_elts;
ushort *storage = NULL;
- boolean ok;
+ boolean ok = FALSE;
if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count,
@@ -413,11 +413,12 @@ vcache_check_run( struct draw_pt_front_end *frontend,
transformed_elts = storage;
}
- ok = vcache->middle->run_linear_elts( vcache->middle,
- min_index, /* start */
- fetch_count,
- transformed_elts,
- draw_count );
+ if (fetch_count < UNDEFINED_VERTEX_ID)
+ ok = vcache->middle->run_linear_elts( vcache->middle,
+ min_index, /* start */
+ fetch_count,
+ transformed_elts,
+ draw_count );
FREE(storage);
diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h
index 9ac068c47b..cccd3bf435 100644
--- a/src/gallium/auxiliary/draw/draw_vbuf.h
+++ b/src/gallium/auxiliary/draw/draw_vbuf.h
@@ -30,14 +30,17 @@
* Vertex buffer drawing stage.
*
* \author Keith Whitwell <keith@tungstengraphics.com>
- * \author José Fonseca <jrfonsec@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonsec@tungstengraphics.com>
*/
#ifndef DRAW_VBUF_H_
#define DRAW_VBUF_H_
+#include "pipe/p_compiler.h"
+
+struct pipe_rasterizer_state;
struct draw_context;
struct vertex_info;
@@ -77,9 +80,14 @@ struct vbuf_render {
* Hardware renderers will use ttm memory, others will just malloc
* something.
*/
- void *(*allocate_vertices)( struct vbuf_render *,
- ushort vertex_size,
- ushort nr_vertices );
+ boolean (*allocate_vertices)( struct vbuf_render *,
+ ushort vertex_size,
+ ushort nr_vertices );
+
+ void *(*map_vertices)( struct vbuf_render * );
+ void (*unmap_vertices)( struct vbuf_render *,
+ ushort min_index,
+ ushort max_index );
/**
* Notify the renderer of the current primitive when it changes.
@@ -106,10 +114,7 @@ struct vbuf_render {
/**
* Called when vbuf is done with this set of vertices:
*/
- void (*release_vertices)( struct vbuf_render *,
- void *vertices,
- unsigned vertex_size,
- unsigned vertices_used );
+ void (*release_vertices)( struct vbuf_render * );
void (*destroy)( struct vbuf_render * );
};
diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h
index a943607d7e..c143cf2372 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.h
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -81,9 +81,9 @@ struct vertex_info
* memcmp() comparisons.
*/
struct {
- ubyte interp_mode:4; /**< INTERP_x */
- ubyte emit:4; /**< EMIT_x */
- ubyte src_index; /**< map to post-xform attribs */
+ unsigned interp_mode:4; /**< INTERP_x */
+ unsigned emit:4; /**< EMIT_x */
+ unsigned src_index:8; /**< map to post-xform attribs */
} attrib[PIPE_MAX_SHADER_INPUTS];
};
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index 34adbd49b0..c057cd67fd 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -50,7 +50,7 @@ void draw_vs_set_constants( struct draw_context *draw,
const float (*constants)[4],
unsigned size )
{
- if (((unsigned)constants) & 0xf) {
+ if (((uintptr_t)constants) & 0xf) {
if (size > draw->vs.const_storage_size) {
if (draw->vs.aligned_constant_storage)
align_free((void *)draw->vs.aligned_constant_storage);
@@ -85,7 +85,10 @@ draw_create_vertex_shader(struct draw_context *draw,
if (!vs) {
vs = draw_create_vs_sse( draw, shader );
if (!vs) {
- vs = draw_create_vs_exec( draw, shader );
+ vs = draw_create_vs_ppc( draw, shader );
+ if (!vs) {
+ vs = draw_create_vs_exec( draw, shader );
+ }
}
}
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 68c24abad3..89ae158751 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -158,6 +158,10 @@ draw_create_vs_sse(struct draw_context *draw,
const struct pipe_shader_state *templ);
struct draw_vertex_shader *
+draw_create_vs_ppc(struct draw_context *draw,
+ const struct pipe_shader_state *templ);
+
+struct draw_vertex_shader *
draw_create_vs_llvm(struct draw_context *draw,
const struct pipe_shader_state *templ);
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 875ecb92db..1fb69ef81a 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -32,7 +32,7 @@
#include "util/u_memory.h"
#include "util/u_math.h"
#include "pipe/p_shader_tokens.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_exec.h"
@@ -884,7 +884,7 @@ static void set_fpu_round_nearest( struct aos_compilation *cp )
}
}
-
+#if 0
static void x87_emit_ex2( struct aos_compilation *cp )
{
struct x86_reg st0 = x86_make_reg(file_x87, 0);
@@ -907,13 +907,17 @@ static void x87_emit_ex2( struct aos_compilation *cp )
assert( stack == cp->func->x87_stack);
}
+#endif
+#if 0
static void PIPE_CDECL print_reg( const char *msg,
const float *reg )
{
debug_printf("%s: %f %f %f %f\n", msg, reg[0], reg[1], reg[2], reg[3]);
}
+#endif
+#if 0
static void emit_print( struct aos_compilation *cp,
const char *message, /* must point to a static string! */
unsigned file,
@@ -965,6 +969,7 @@ static void emit_print( struct aos_compilation *cp,
/* Done...
*/
}
+#endif
/**
* The traditional instructions. All operate on internal registers
@@ -1103,7 +1108,7 @@ static boolean emit_LG2( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
-
+#if 0
static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
@@ -1111,6 +1116,7 @@ static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_inst
x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
return TRUE;
}
+#endif
static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
@@ -1566,7 +1572,6 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst
*/
static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
-
if (0) {
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg r = aos_get_xmm_reg(cp);
@@ -1575,21 +1580,30 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
else {
- struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
- struct x86_reg r = aos_get_xmm_reg(cp);
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg r = aos_get_xmm_reg(cp);
struct x86_reg neg_half = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ );
struct x86_reg one_point_five = x86_make_disp( neg_half, 4 );
struct x86_reg src = get_xmm_writable( cp, arg0 );
-
- sse_rsqrtss( cp->func, r, src ); /* rsqrtss(a) */
- sse_mulss( cp->func, src, neg_half ); /* -.5 * a */
- sse_mulss( cp->func, src, r ); /* -.5 * a * r */
- sse_mulss( cp->func, src, r ); /* -.5 * a * r * r */
- sse_addss( cp->func, src, one_point_five ); /* 1.5 - .5 * a * r * r */
- sse_mulss( cp->func, r, src ); /* r * (1.5 - .5 * a * r * r) */
+ struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ sse_movaps(cp->func, tmp, src);
+ sse_mulps(cp->func, tmp, neg);
+ sse_maxps(cp->func, tmp, src);
+
+ sse_rsqrtss( cp->func, r, tmp ); /* rsqrtss(a) */
+ sse_mulss( cp->func, tmp, neg_half ); /* -.5 * a */
+ sse_mulss( cp->func, tmp, r ); /* -.5 * a * r */
+ sse_mulss( cp->func, tmp, r ); /* -.5 * a * r * r */
+ sse_addss( cp->func, tmp, one_point_five ); /* 1.5 - .5 * a * r * r */
+ sse_mulss( cp->func, r, tmp ); /* r * (1.5 - .5 * a * r * r) */
store_scalar_dest(cp, &op->FullDstRegisters[0], r);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+
return TRUE;
}
}
@@ -1877,7 +1891,7 @@ static boolean note_immediate( struct aos_compilation *cp,
unsigned pos = cp->num_immediates++;
unsigned j;
- for (j = 0; j < imm->Immediate.Size; j++) {
+ for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
cp->vaos->machine->immediate[pos][j] = imm->u.ImmediateFloat32[j].Float;
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 82d27d4493..b3200df811 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -62,12 +62,15 @@ vs_exec_prepare( struct draw_vertex_shader *shader,
{
struct exec_vertex_shader *evs = exec_vertex_shader(shader);
- /* specify the vertex program to interpret/execute */
- tgsi_exec_machine_bind_shader(evs->machine,
- shader->state.tokens,
- PIPE_MAX_SAMPLERS,
- NULL /*samplers*/ );
-
+ /* Specify the vertex program to interpret/execute.
+ * Avoid rebinding when possible.
+ */
+ if (evs->machine->Tokens != shader->state.tokens) {
+ tgsi_exec_machine_bind_shader(evs->machine,
+ shader->state.tokens,
+ draw->vs.num_samplers,
+ draw->vs.samplers);
+ }
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c
new file mode 100644
index 0000000000..d35db57d57
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c
@@ -0,0 +1,244 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Brian Paul
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_config.h"
+
+#include "draw_vs.h"
+
+#if defined(PIPE_ARCH_PPC)
+
+#include "pipe/p_shader_tokens.h"
+
+#include "draw_private.h"
+#include "draw_context.h"
+
+#include "rtasm/rtasm_cpu.h"
+#include "rtasm/rtasm_ppc.h"
+#include "tgsi/tgsi_ppc.h"
+#include "tgsi/tgsi_parse.h"
+
+
+
+typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4],
+ float (*outputs)[4][4],
+ float (*temps)[4][4],
+ float (*immeds)[4],
+ float (*consts)[4],
+ const float *builtins);
+
+
+struct draw_ppc_vertex_shader {
+ struct draw_vertex_shader base;
+ struct ppc_function ppc_program;
+
+ codegen_function func;
+};
+
+
+static void
+vs_ppc_prepare( struct draw_vertex_shader *base,
+ struct draw_context *draw )
+{
+ /* nothing */
+}
+
+
+/**
+ * Simplified vertex shader interface for the pt paths. Given the
+ * complexity of code-generating all the above operations together,
+ * it's time to try doing all the other stuff separately.
+ */
+static void
+vs_ppc_run_linear( struct draw_vertex_shader *base,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
+{
+ struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base;
+ unsigned int i;
+
+#define MAX_VERTICES 4
+
+ /* loop over verts */
+ for (i = 0; i < count; i += MAX_VERTICES) {
+ const uint max_vertices = MIN2(MAX_VERTICES, count - i);
+ float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB;
+ float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB;
+ float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB;
+ uint attr;
+
+ /* convert (up to) four input verts to SoA format */
+ for (attr = 0; attr < base->info.num_inputs; attr++) {
+ const float *vIn = (const float *) input;
+ uint vert;
+ for (vert = 0; vert < max_vertices; vert++) {
+#if 0
+ if (attr==0)
+ printf("Input v%d a%d: %f %f %f %f\n",
+ vert, attr, vIn[0], vIn[1], vIn[2], vIn[3]);
+#endif
+ inputs_soa[attr][0][vert] = vIn[attr * 4 + 0];
+ inputs_soa[attr][1][vert] = vIn[attr * 4 + 1];
+ inputs_soa[attr][2][vert] = vIn[attr * 4 + 2];
+ inputs_soa[attr][3][vert] = vIn[attr * 4 + 3];
+ vIn += input_stride / 4;
+ }
+ }
+
+ /* run compiled shader
+ */
+ shader->func(inputs_soa, outputs_soa, temps_soa,
+ (float (*)[4]) shader->base.immediates,
+ (float (*)[4]) constants,
+ ppc_builtin_constants);
+
+ /* convert (up to) four output verts from SoA back to AoS format */
+ for (attr = 0; attr < base->info.num_outputs; attr++) {
+ float *vOut = (float *) output;
+ uint vert;
+ for (vert = 0; vert < max_vertices; vert++) {
+ vOut[attr * 4 + 0] = outputs_soa[attr][0][vert];
+ vOut[attr * 4 + 1] = outputs_soa[attr][1][vert];
+ vOut[attr * 4 + 2] = outputs_soa[attr][2][vert];
+ vOut[attr * 4 + 3] = outputs_soa[attr][3][vert];
+#if 0
+ if (attr==0)
+ printf("Output v%d a%d: %f %f %f %f\n",
+ vert, attr, vOut[0], vOut[1], vOut[2], vOut[3]);
+#endif
+ vOut += output_stride / 4;
+ }
+ }
+
+ /* advance to next group of four input/output verts */
+ input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
+ output = (float (*)[4])((char *)output + output_stride * max_vertices);
+ }
+}
+
+
+static void
+vs_ppc_delete( struct draw_vertex_shader *base )
+{
+ struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base;
+
+ ppc_release_func( &shader->ppc_program );
+
+ align_free( (void *) shader->base.immediates );
+
+ FREE( (void*) shader->base.state.tokens );
+ FREE( shader );
+}
+
+
+struct draw_vertex_shader *
+draw_create_vs_ppc(struct draw_context *draw,
+ const struct pipe_shader_state *templ)
+{
+ struct draw_ppc_vertex_shader *vs;
+
+ vs = CALLOC_STRUCT( draw_ppc_vertex_shader );
+ if (vs == NULL)
+ return NULL;
+
+ /* we make a private copy of the tokens */
+ vs->base.state.tokens = tgsi_dup_tokens(templ->tokens);
+ if (!vs->base.state.tokens)
+ goto fail;
+
+ tgsi_scan_shader(templ->tokens, &vs->base.info);
+
+ vs->base.draw = draw;
+#if 0
+ if (1)
+ vs->base.create_varient = draw_vs_varient_aos_ppc;
+ else
+#endif
+ vs->base.create_varient = draw_vs_varient_generic;
+ vs->base.prepare = vs_ppc_prepare;
+ vs->base.run_linear = vs_ppc_run_linear;
+ vs->base.delete = vs_ppc_delete;
+
+ vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
+ sizeof(float), 16);
+
+ ppc_init_func( &vs->ppc_program );
+
+#if 0
+ ppc_print_code(&vs->ppc_program, TRUE);
+ ppc_indent(&vs->ppc_program, 8);
+#endif
+
+ if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens,
+ &vs->ppc_program,
+ (float (*)[4]) vs->base.immediates,
+ TRUE ))
+ goto fail;
+
+ vs->func = (codegen_function) ppc_get_func( &vs->ppc_program );
+ if (!vs->func) {
+ goto fail;
+ }
+
+ return &vs->base;
+
+fail:
+ /*
+ debug_error("tgsi_emit_ppc() failed, falling back to interpreter\n");
+ */
+
+ ppc_release_func( &vs->ppc_program );
+
+ FREE(vs);
+ return NULL;
+}
+
+
+
+#else /* PIPE_ARCH_PPC */
+
+
+struct draw_vertex_shader *
+draw_create_vs_ppc( struct draw_context *draw,
+ const struct pipe_shader_state *templ )
+{
+ return (void *) 0;
+}
+
+
+#endif /* PIPE_ARCH_PPC */
diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile
index c3f7bfba93..5a96d94ec3 100644
--- a/src/gallium/auxiliary/gallivm/Makefile
+++ b/src/gallium/auxiliary/gallivm/Makefile
@@ -66,12 +66,12 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES)
gallivm_builtins.cpp: llvm_builtins.c
clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin
- (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@
+ (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@
rm temp1.bin
gallivmsoabuiltins.cpp: soabuiltins.c
clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin
- (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@
+ (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@
rm temp2.bin
# Emacs tags
diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp
index 29adeea47d..f4af5cc8ad 100644
--- a/src/gallium/auxiliary/gallivm/gallivm.cpp
+++ b/src/gallium/auxiliary/gallivm/gallivm.cpp
@@ -53,7 +53,7 @@
#include <llvm/ModuleProvider.h>
#include <llvm/Pass.h>
#include <llvm/PassManager.h>
-#include <llvm/ParameterAttributes.h>
+#include <llvm/Attributes.h>
#include <llvm/Support/PatternMatch.h>
#include <llvm/ExecutionEngine/JIT.h>
#include <llvm/ExecutionEngine/Interpreter.h>
diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp
index fcc5c05794..634bac0150 100644
--- a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp
+++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp
@@ -137,4 +137,4 @@ static const unsigned char llvm_builtins_data[] = {
0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5,0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c,
0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73,0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84,
0x84,0x34,0x85,0x25,0x0c,0x92,0x20,0x59,0xc1,0x20,0x30,0x8f,0x2d,0x10,0x95,0x84,
-0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
+0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
index 3a4a41e544..1bd00a0c2a 100644
--- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
+++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
@@ -56,7 +56,7 @@
#include <llvm/ModuleProvider.h>
#include <llvm/Pass.h>
#include <llvm/PassManager.h>
-#include <llvm/ParameterAttributes.h>
+#include <llvm/Attributes.h>
#include <llvm/Support/PatternMatch.h>
#include <llvm/ExecutionEngine/JIT.h>
#include <llvm/ExecutionEngine/Interpreter.h>
@@ -158,8 +158,8 @@ void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog
llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod);
llvm::ExecutionEngine *ee = cpu->engine;
assert(ee);
- /*FIXME : remove */
- ee->DisableLazyCompilation();
+ /*FIXME : why was this disabled ? we need it for pow/sqrt/... */
+ ee->DisableLazyCompilation(false);
ee->addModuleProvider(mp);
llvm::Function *func = func_for_shader(prog);
@@ -179,8 +179,7 @@ struct gallivm_cpu_engine * gallivm_global_cpu_engine()
typedef void (*vertex_shader_runner)(void *ainputs,
void *dests,
- float (*aconsts)[4],
- void *temps);
+ float (*aconsts)[4]);
#define MAX_TGSI_VERTICES 4
/*!
@@ -202,7 +201,6 @@ int gallivm_cpu_vs_exec(struct gallivm_prog *prog,
unsigned int i, j;
unsigned slot;
vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function);
-
assert(runner);
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
@@ -224,8 +222,7 @@ int gallivm_cpu_vs_exec(struct gallivm_prog *prog,
/* run shader */
runner(machine->Inputs,
machine->Outputs,
- (float (*)[4]) constants,
- machine->Temps);
+ (float (*)[4]) constants);
/* Unswizzle all output results
*/
diff --git a/src/gallium/auxiliary/gallivm/gallivm_p.h b/src/gallium/auxiliary/gallivm/gallivm_p.h
index ebf3e11cd5..d2c5852bdf 100644
--- a/src/gallium/auxiliary/gallivm/gallivm_p.h
+++ b/src/gallium/auxiliary/gallivm/gallivm_p.h
@@ -101,10 +101,10 @@ static INLINE int gallivm_w_swizzle(int swizzle)
return w;
}
-#endif /* MESA_LLVM */
-
#if defined __cplusplus
}
#endif
+#endif /* MESA_LLVM */
+
#endif
diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp
index 599975d5ad..ee8162efce 100644
--- a/src/gallium/auxiliary/gallivm/instructions.cpp
+++ b/src/gallium/auxiliary/gallivm/instructions.cpp
@@ -43,7 +43,7 @@
#include <llvm/Function.h>
#include <llvm/InstrTypes.h>
#include <llvm/Instructions.h>
-#include <llvm/ParameterAttributes.h>
+#include <llvm/Attributes.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Bitcode/ReaderWriter.h>
diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
index a658072551..925e948763 100644
--- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp
+++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
@@ -37,7 +37,7 @@
#include <llvm/Function.h>
#include <llvm/Instructions.h>
#include <llvm/Transforms/Utils/Cloning.h>
-#include <llvm/ParameterAttributes.h>
+#include <llvm/Attributes.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Bitcode/ReaderWriter.h>
@@ -90,68 +90,11 @@ llvm::Value * InstructionsSoa::vectorFromVals(llvm::Value *x, llvm::Value *y,
return res;
}
-std::vector<llvm::Value*> InstructionsSoa::arl(const std::vector<llvm::Value*> in)
-{
- std::vector<llvm::Value*> res(4);
-
- //Extract x's
- llvm::Value *x1 = m_builder.CreateExtractElement(in[0],
- m_storage->constantInt(0),
- name("extractX"));
- //cast it to an unsigned int
- x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast"));
-
- res[0] = x1;//vectorFromVals(x1, x2, x3, x4);
- //only x is valid. the others shouldn't be necessary
- /*
- res[1] = Constant::getNullValue(m_floatVecType);
- res[2] = Constant::getNullValue(m_floatVecType);
- res[3] = Constant::getNullValue(m_floatVecType);
- */
-
- return res;
-}
-
-
-std::vector<llvm::Value*> InstructionsSoa::add(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2)
-{
- std::vector<llvm::Value*> res(4);
-
- res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx"));
- res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy"));
- res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz"));
- res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw"));
-
- return res;
-}
-
-std::vector<llvm::Value*> InstructionsSoa::mul(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2)
-{
- std::vector<llvm::Value*> res(4);
-
- res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx"));
- res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly"));
- res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz"));
- res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw"));
-
- return res;
-}
-
void InstructionsSoa::end()
{
m_builder.CreateRetVoid();
}
-std::vector<llvm::Value*> InstructionsSoa::madd(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2,
- const std::vector<llvm::Value*> in3)
-{
- std::vector<llvm::Value*> res = mul(in1, in2);
- return add(res, in3);
-}
-
std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector)
{
std::vector<llvm::Value*> res(4);
@@ -171,6 +114,11 @@ std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector)
return res;
}
+llvm::IRBuilder<>* InstructionsSoa::getIRBuilder()
+{
+ return &m_builder;
+}
+
void InstructionsSoa::createFunctionMap()
{
m_functionsMap[TGSI_OPCODE_ABS] = "abs";
@@ -258,11 +206,12 @@ llvm::Module * InstructionsSoa::currentModule() const
void InstructionsSoa::createBuiltins()
{
+ std::string ErrMsg;
MemoryBuffer *buffer = MemoryBuffer::getMemBuffer(
(const char*)&soabuiltins_data[0],
- (const char*)&soabuiltins_data[Elements(soabuiltins_data)]);
- m_builtins = ParseBitcodeFile(buffer);
- std::cout<<"Builtins created at "<<m_builtins<<std::endl;
+ (const char*)&soabuiltins_data[Elements(soabuiltins_data) - 1]);
+ m_builtins = ParseBitcodeFile(buffer, &ErrMsg);
+ std::cout<<"Builtins created at "<<m_builtins<<" ("<<ErrMsg<<")"<<std::endl;
assert(m_builtins);
createDependencies();
}
@@ -274,6 +223,41 @@ std::vector<llvm::Value*> InstructionsSoa::abs(const std::vector<llvm::Value*> i
return callBuiltin(func, in1);
}
+std::vector<llvm::Value*> InstructionsSoa::add(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx"));
+ res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy"));
+ res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz"));
+ res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw"));
+
+ return res;
+}
+
+std::vector<llvm::Value*> InstructionsSoa::arl(const std::vector<llvm::Value*> in)
+{
+ std::vector<llvm::Value*> res(4);
+
+ //Extract x's
+ llvm::Value *x1 = m_builder.CreateExtractElement(in[0],
+ m_storage->constantInt(0),
+ name("extractX"));
+ //cast it to an unsigned int
+ x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast"));
+
+ res[0] = x1;//vectorFromVals(x1, x2, x3, x4);
+ //only x is valid. the others shouldn't be necessary
+ /*
+ res[1] = Constant::getNullValue(m_floatVecType);
+ res[2] = Constant::getNullValue(m_floatVecType);
+ res[3] = Constant::getNullValue(m_floatVecType);
+ */
+
+ return res;
+}
+
std::vector<llvm::Value*> InstructionsSoa::dp3(const std::vector<llvm::Value*> in1,
const std::vector<llvm::Value*> in2)
{
@@ -281,6 +265,59 @@ std::vector<llvm::Value*> InstructionsSoa::dp3(const std::vector<llvm::Value*> i
return callBuiltin(func, in1, in2);
}
+std::vector<llvm::Value*> InstructionsSoa::lit(const std::vector<llvm::Value*> in)
+{
+ llvm::Function *func = function(TGSI_OPCODE_LIT);
+ return callBuiltin(func, in);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::madd(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2,
+ const std::vector<llvm::Value*> in3)
+{
+ std::vector<llvm::Value*> res = mul(in1, in2);
+ return add(res, in3);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::max(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ llvm::Function *func = function(TGSI_OPCODE_MAX);
+ return callBuiltin(func, in1, in2);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::min(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ llvm::Function *func = function(TGSI_OPCODE_MIN);
+ return callBuiltin(func, in1, in2);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::mul(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx"));
+ res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly"));
+ res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz"));
+ res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw"));
+
+ return res;
+}
+
+std::vector<llvm::Value*> InstructionsSoa::pow(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ llvm::Function *func = function(TGSI_OPCODE_POWER);
+ return callBuiltin(func, in1, in2);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::rsq(const std::vector<llvm::Value*> in)
+{
+ llvm::Function *func = function(TGSI_OPCODE_RSQ);
+ return callBuiltin(func, in);
+}
std::vector<llvm::Value*> InstructionsSoa::slt(const std::vector<llvm::Value*> in1,
const std::vector<llvm::Value*> in2)
@@ -289,6 +326,37 @@ std::vector<llvm::Value*> InstructionsSoa::slt(const std::vector<llvm::Value*> i
return callBuiltin(func, in1, in2);
}
+std::vector<llvm::Value*> InstructionsSoa::sub(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx"));
+ res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby"));
+ res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz"));
+ res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw"));
+
+ return res;
+}
+
+void checkFunction(Function *func)
+{
+ for (Function::const_iterator BI = func->begin(), BE = func->end();
+ BI != BE; ++BI) {
+ const BasicBlock &BB = *BI;
+ for (BasicBlock::const_iterator II = BB.begin(), IE = BB.end();
+ II != IE; ++II) {
+ const Instruction &I = *II;
+ std::cout<< "Instr = "<<I;
+ for (unsigned op = 0, E = I.getNumOperands(); op != E; ++op) {
+ const Value *Op = I.getOperand(op);
+ std::cout<< "\top = "<<Op<<"("<<op<<")"<<std::endl;
+ //I->setOperand(op, V);
+ }
+ }
+ }
+}
+
llvm::Value * InstructionsSoa::allocaTemp()
{
VectorType *vector = VectorType::get(Type::FloatTy, 4);
@@ -408,46 +476,6 @@ std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std
return allocaToResult(allocaPtr);
}
-std::vector<llvm::Value*> InstructionsSoa::pow(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2)
-{
- llvm::Function *func = function(TGSI_OPCODE_POWER);
- return callBuiltin(func, in1, in2);
-}
-
-std::vector<llvm::Value*> InstructionsSoa::min(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2)
-{
- llvm::Function *func = function(TGSI_OPCODE_MIN);
- return callBuiltin(func, in1, in2);
-}
-
-
-std::vector<llvm::Value*> InstructionsSoa::max(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2)
-{
- llvm::Function *func = function(TGSI_OPCODE_MAX);
- return callBuiltin(func, in1, in2);
-}
-
-void checkFunction(Function *func)
-{
- for (Function::const_iterator BI = func->begin(), BE = func->end();
- BI != BE; ++BI) {
- const BasicBlock &BB = *BI;
- for (BasicBlock::const_iterator II = BB.begin(), IE = BB.end();
- II != IE; ++II) {
- const Instruction &I = *II;
- std::cout<< "Instr = "<<I;
- for (unsigned op = 0, E = I.getNumOperands(); op != E; ++op) {
- const Value *Op = I.getOperand(op);
- std::cout<< "\top = "<<Op<<"("<<op<<")"<<std::endl;
- //I->setOperand(op, V);
- }
- }
- }
-}
-
void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op)
{
assert(originalFunc);
@@ -492,28 +520,4 @@ void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op)
}
}
-std::vector<llvm::Value*> InstructionsSoa::sub(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2)
-{
- std::vector<llvm::Value*> res(4);
-
- res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx"));
- res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby"));
- res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz"));
- res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw"));
-
- return res;
-}
-
-std::vector<llvm::Value*> InstructionsSoa::lit(const std::vector<llvm::Value*> in)
-{
- llvm::Function *func = function(TGSI_OPCODE_LIT);
- return callBuiltin(func, in);
-}
-
-std::vector<llvm::Value*> InstructionsSoa::rsq(const std::vector<llvm::Value*> in)
-{
- llvm::Function *func = function(TGSI_OPCODE_RSQ);
- return callBuiltin(func, in);
-}
diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h
index 3817fdc904..d6831e0a6b 100644
--- a/src/gallium/auxiliary/gallivm/instructionssoa.h
+++ b/src/gallium/auxiliary/gallivm/instructionssoa.h
@@ -76,6 +76,7 @@ public:
void end();
std::vector<llvm::Value*> extractVector(llvm::Value *vector);
+ llvm::IRBuilder<>* getIRBuilder();
private:
const char * name(const char *prefix) const;
llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y,
diff --git a/src/gallium/auxiliary/gallivm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp
index 6f373f6dd5..73df24c976 100644
--- a/src/gallium/auxiliary/gallivm/storage.cpp
+++ b/src/gallium/auxiliary/gallivm/storage.cpp
@@ -323,7 +323,7 @@ llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx,
if (indIdx) {
getElem = GetElementPtrInst::Create(ptr,
- BinaryOperator::create(Instruction::Add,
+ BinaryOperator::Create(Instruction::Add,
indIdx,
constantInt(idx),
name("add"),
diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp
index 78d754371f..4984ce985c 100644
--- a/src/gallium/auxiliary/gallivm/storagesoa.cpp
+++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp
@@ -30,7 +30,7 @@
#include "gallivm_p.h"
#include "pipe/p_shader_tokens.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include <llvm/BasicBlock.h>
#include <llvm/Module.h>
@@ -48,13 +48,11 @@ using namespace llvm;
StorageSoa::StorageSoa(llvm::BasicBlock *block,
llvm::Value *input,
llvm::Value *output,
- llvm::Value *consts,
- llvm::Value *temps)
+ llvm::Value *consts)
: m_block(block),
m_input(input),
m_output(output),
m_consts(consts),
- m_temps(temps),
m_immediates(0),
m_idx(0)
{
@@ -93,7 +91,7 @@ void StorageSoa::declareImmediates()
std::vector<float> vals(4);
std::vector<Constant*> channelArray;
- vals[0] = vec[0]; vals[1] = vec[0]; vals[2] = vec[0]; vals[3] = vec[0];
+ vals[0] = vec[0]; vals[1] = vec[1]; vals[2] = vec[2]; vals[3] = vec[3];
llvm::Constant *xChannel = createConstGlobalVector(vals);
vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1];
@@ -144,22 +142,43 @@ std::vector<llvm::Value*> StorageSoa::inputElement(llvm::Value *idx)
return res;
}
-std::vector<llvm::Value*> StorageSoa::constElement(llvm::Value *idx)
+llvm::Value* StorageSoa::unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value* vector, int cc)
{
- std::vector<llvm::Value*> res(4);
- llvm::Value *xChannel, *yChannel, *zChannel, *wChannel;
+ std::vector<llvm::Value*> x(4);
+ x[0] = m_builder->CreateExtractElement(vector,
+ constantInt(cc),
+ name("x"));
+
+ VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
+ Constant *constVector = Constant::getNullValue(vectorType);
+ Value *res = m_builder->CreateInsertElement(constVector, x[0],
+ constantInt(0),
+ name("vecx"));
+ res = m_builder->CreateInsertElement(res, x[0], constantInt(1),
+ name("vecxx"));
+ res = m_builder->CreateInsertElement(res, x[0], constantInt(2),
+ name("vecxxx"));
+ res = m_builder->CreateInsertElement(res, x[0], constantInt(3),
+ name("vecxxxx"));
+ return res;
+}
+
+std::vector<llvm::Value*> StorageSoa::constElement(llvm::IRBuilder<>* m_builder, llvm::Value *idx)
+{
+ llvm::Value* res;
+ std::vector<llvm::Value*> res2(4);
+ llvm::Value *xChannel;
xChannel = elementPointer(m_consts, idx, 0);
- yChannel = elementPointer(m_consts, idx, 1);
- zChannel = elementPointer(m_consts, idx, 2);
- wChannel = elementPointer(m_consts, idx, 3);
- res[0] = alignedArrayLoad(xChannel);
- res[1] = alignedArrayLoad(yChannel);
- res[2] = alignedArrayLoad(zChannel);
- res[3] = alignedArrayLoad(wChannel);
+ res = alignedArrayLoad(xChannel);
- return res;
+ res2[0]=unpackConstElement(m_builder, res,0);
+ res2[1]=unpackConstElement(m_builder, res,1);
+ res2[2]=unpackConstElement(m_builder, res,2);
+ res2[3]=unpackConstElement(m_builder, res,3);
+
+ return res2;
}
std::vector<llvm::Value*> StorageSoa::outputElement(llvm::Value *idx)
@@ -174,14 +193,15 @@ std::vector<llvm::Value*> StorageSoa::outputElement(llvm::Value *idx)
return res;
}
-std::vector<llvm::Value*> StorageSoa::tempElement(llvm::Value *idx)
+std::vector<llvm::Value*> StorageSoa::tempElement(llvm::IRBuilder<>* m_builder, int idx)
{
std::vector<llvm::Value*> res(4);
+ llvm::Value *temp = m_temps[idx];
- res[0] = element(m_temps, idx, 0);
- res[1] = element(m_temps, idx, 1);
- res[2] = element(m_temps, idx, 2);
- res[3] = element(m_temps, idx, 3);
+ res[0] = element(temp, constantInt(0), 0);
+ res[1] = element(temp, constantInt(0), 1);
+ res[2] = element(temp, constantInt(0), 2);
+ res[3] = element(temp, constantInt(0), 3);
return res;
}
@@ -260,6 +280,12 @@ llvm::Module * StorageSoa::currentModule() const
return m_block->getParent()->getParent();
}
+llvm::Constant * StorageSoa::createConstGlobalFloat(const float val)
+{
+ Constant*c = ConstantFP::get(APFloat(val));
+ return c;
+}
+
llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &vec)
{
VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
@@ -278,7 +304,7 @@ llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &v
}
std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, int swizzle,
- llvm::Value *indIdx)
+ llvm::IRBuilder<>* m_builder,llvm::Value *indIdx)
{
std::vector<llvm::Value*> val(4);
@@ -299,10 +325,10 @@ std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, in
val = outputElement(realIndex);
break;
case TGSI_FILE_TEMPORARY:
- val = tempElement(realIndex);
+ val = tempElement(m_builder, idx);
break;
case TGSI_FILE_CONSTANT:
- val = constElement(realIndex);
+ val = constElement(m_builder, realIndex);
break;
case TGSI_FILE_IMMEDIATE:
val = immediateElement(realIndex);
@@ -328,19 +354,39 @@ std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, in
return res;
}
+llvm::Value * StorageSoa::allocaTemp(llvm::IRBuilder<>* m_builder)
+{
+ VectorType *vector = VectorType::get(Type::FloatTy, 4);
+ ArrayType *vecArray = ArrayType::get(vector, 4);
+ AllocaInst *alloca = new AllocaInst(vecArray, "temp",
+ m_builder->GetInsertBlock());
+
+ return alloca;
+}
+
+
void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val,
- int mask)
+ int mask, llvm::IRBuilder<>* m_builder)
{
llvm::Value *out = 0;
+ llvm::Value *realIndex = 0;
switch(type) {
case TGSI_FILE_OUTPUT:
out = m_output;
+ realIndex = constantInt(idx);
break;
case TGSI_FILE_TEMPORARY:
- out = m_temps;
+ // if that temp doesn't already exist, alloca it
+ if (m_temps.find(idx) == m_temps.end())
+ m_temps[idx] = allocaTemp(m_builder);
+
+ out = m_temps[idx];
+
+ realIndex = constantInt(0);
break;
case TGSI_FILE_INPUT:
out = m_input;
+ realIndex = constantInt(idx);
break;
case TGSI_FILE_ADDRESS: {
llvm::Value *addr = m_addresses[idx];
@@ -358,7 +404,6 @@ void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector<llvm
assert(0);
break;
}
- llvm::Value *realIndex = constantInt(idx);
if ((mask & TGSI_WRITEMASK_X)) {
llvm::Value *xChannel = elementPointer(out, realIndex, 0);
new StoreInst(val[0], xChannel, false, m_block);
diff --git a/src/gallium/auxiliary/gallivm/storagesoa.h b/src/gallium/auxiliary/gallivm/storagesoa.h
index ae2fc7c6ae..56886f85e7 100644
--- a/src/gallium/auxiliary/gallivm/storagesoa.h
+++ b/src/gallium/auxiliary/gallivm/storagesoa.h
@@ -29,6 +29,7 @@
#define STORAGESOA_H
#include <pipe/p_shader_tokens.h>
+#include <llvm/Support/IRBuilder.h>
#include <vector>
#include <list>
@@ -51,14 +52,13 @@ public:
StorageSoa(llvm::BasicBlock *block,
llvm::Value *input,
llvm::Value *output,
- llvm::Value *consts,
- llvm::Value *temps);
+ llvm::Value *consts);
std::vector<llvm::Value*> load(enum tgsi_file_type type, int idx, int swizzle,
- llvm::Value *indIdx =0);
+ llvm::IRBuilder<>* m_builder, llvm::Value *indIdx =0);
void store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val,
- int mask);
+ int mask, llvm::IRBuilder<>* m_builder);
void addImmediate(float *vec);
void declareImmediates();
@@ -76,12 +76,14 @@ private:
const char *name(const char *prefix) const;
llvm::Value *alignedArrayLoad(llvm::Value *val);
llvm::Module *currentModule() const;
+ llvm::Constant *createConstGlobalFloat(const float val);
llvm::Constant *createConstGlobalVector(const std::vector<float> &vec);
std::vector<llvm::Value*> inputElement(llvm::Value *indIdx);
- std::vector<llvm::Value*> constElement(llvm::Value *indIdx);
+ llvm::Value* unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx, int cc);
+ std::vector<llvm::Value*> constElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx);
std::vector<llvm::Value*> outputElement(llvm::Value *indIdx);
- std::vector<llvm::Value*> tempElement(llvm::Value *indIdx);
+ std::vector<llvm::Value*> tempElement(llvm::IRBuilder<>* m_builder, int idx);
std::vector<llvm::Value*> immediateElement(llvm::Value *indIdx);
private:
llvm::BasicBlock *m_block;
@@ -89,12 +91,13 @@ private:
llvm::Value *m_input;
llvm::Value *m_output;
llvm::Value *m_consts;
- llvm::Value *m_temps;
+ std::map<int, llvm::Value*> m_temps;
llvm::GlobalVariable *m_immediates;
std::map<int, llvm::Value*> m_addresses;
std::vector<std::vector<float> > m_immediatesToFlush;
+ llvm::Value * allocaTemp(llvm::IRBuilder<>* m_builder);
mutable std::map<int, llvm::ConstantInt*> m_constInts;
mutable char m_name[32];
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index 7292c0e366..5b08200d14 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -25,7 +25,7 @@
#include <llvm/ModuleProvider.h>
#include <llvm/Pass.h>
#include <llvm/PassManager.h>
-#include <llvm/ParameterAttributes.h>
+#include <llvm/Attributes.h>
#include <llvm/Support/PatternMatch.h>
#include <llvm/ExecutionEngine/JIT.h>
#include <llvm/ExecutionEngine/Interpreter.h>
@@ -52,8 +52,7 @@ static inline FunctionType *vertexShaderFunctionType()
// pass are castable to the following:
// [4 x <4 x float>] inputs,
// [4 x <4 x float>] output,
- // [4 x [4 x float]] consts,
- // [4 x <4 x float>] temps
+ // [4 x [1 x float]] consts,
std::vector<const Type*> funcArgs;
VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
@@ -61,13 +60,12 @@ static inline FunctionType *vertexShaderFunctionType()
PointerType *vectorArrayPtr = PointerType::get(vectorArray, 0);
ArrayType *floatArray = ArrayType::get(Type::FloatTy, 4);
- ArrayType *constsArray = ArrayType::get(floatArray, 4);
+ ArrayType *constsArray = ArrayType::get(floatArray, 1);
PointerType *constsArrayPtr = PointerType::get(constsArray, 0);
funcArgs.push_back(vectorArrayPtr);//inputs
funcArgs.push_back(vectorArrayPtr);//output
funcArgs.push_back(constsArrayPtr);//consts
- funcArgs.push_back(vectorArrayPtr);//temps
FunctionType *functionType = FunctionType::get(
/*Result=*/Type::VoidTy,
@@ -162,7 +160,7 @@ translate_immediate(Storage *storage,
{
float vec[4];
int i;
- for (i = 0; i < imm->Immediate.Size - 1; ++i) {
+ for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) {
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
vec[i] = imm->u.ImmediateFloat32[i].Float;
@@ -181,7 +179,7 @@ translate_immediateir(StorageSoa *storage,
{
float vec[4];
int i;
- for (i = 0; i < imm->Immediate.Size - 1; ++i) {
+ for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) {
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
vec[i] = imm->u.ImmediateFloat32[i].Float;
@@ -707,9 +705,8 @@ translate_instructionir(llvm::Module *module,
if (src->SrcRegister.Indirect) {
indIdx = storage->addrElement(src->SrcRegisterInd.Index);
}
-
val = storage->load((enum tgsi_file_type)src->SrcRegister.File,
- src->SrcRegister.Index, swizzle, indIdx);
+ src->SrcRegister.Index, swizzle, instr->getIRBuilder(), indIdx);
inputs[i] = val;
}
@@ -1025,9 +1022,9 @@ translate_instructionir(llvm::Module *module,
/* store results */
for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
-
storage->store((enum tgsi_file_type)dst->DstRegister.File,
- dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+ dst->DstRegister.Index, out, dst->DstRegister.WriteMask,
+ instr->getIRBuilder() );
}
}
@@ -1122,8 +1119,6 @@ llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir,
output->setName("outputs");
Value *consts = args++;
consts->setName("consts");
- Value *temps = args++;
- temps->setName("temps");
BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0);
@@ -1132,7 +1127,7 @@ llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir,
fi = tgsi_default_full_instruction();
fd = tgsi_default_full_declaration();
- StorageSoa storage(label_entry, input, output, consts, temps);
+ StorageSoa storage(label_entry, input, output, consts);
InstructionsSoa instr(mod, shader, label_entry, &storage);
while(!tgsi_parse_end_of_tokens(&parse)) {
diff --git a/src/gallium/auxiliary/indices/Makefile b/src/gallium/auxiliary/indices/Makefile
new file mode 100644
index 0000000000..25ee899c40
--- /dev/null
+++ b/src/gallium/auxiliary/indices/Makefile
@@ -0,0 +1,12 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = indices
+
+C_SOURCES = \
+ u_indices_gen.c
+
+include ../../Makefile.template
+
+u_indices_gen.c: u_indices_gen.py
+ python $< > $@
diff --git a/src/gallium/auxiliary/indices/SConscript b/src/gallium/auxiliary/indices/SConscript
new file mode 100644
index 0000000000..65a43a9f64
--- /dev/null
+++ b/src/gallium/auxiliary/indices/SConscript
@@ -0,0 +1,17 @@
+Import('*')
+
+env.CodeGenerate(
+ target = 'u_indices_gen.c',
+ script = 'u_indices_gen.py',
+ source = [],
+ command = 'python $SCRIPT > $TARGET'
+)
+
+indices = env.ConvenienceLibrary(
+ target = 'indices',
+ source = [
+# 'u_indices.c',
+ 'u_indices_gen.c',
+ ])
+
+auxiliaries.insert(0, indices)
diff --git a/src/gallium/auxiliary/indices/u_indices.c b/src/gallium/auxiliary/indices/u_indices.c
new file mode 100644
index 0000000000..0cf7d88653
--- /dev/null
+++ b/src/gallium/auxiliary/indices/u_indices.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "u_indices.h"
+#include "u_indices_priv.h"
+
+static void translate_memcpy_ushort( const void *in,
+ unsigned nr,
+ void *out )
+{
+ memcpy(out, in, nr*sizeof(short));
+}
+
+static void translate_memcpy_uint( const void *in,
+ unsigned nr,
+ void *out )
+{
+ memcpy(out, in, nr*sizeof(int));
+}
+
+
+int u_index_translator( unsigned hw_mask,
+ unsigned prim,
+ unsigned in_index_size,
+ unsigned nr,
+ unsigned in_pv,
+ unsigned out_pv,
+ unsigned *out_prim,
+ unsigned *out_index_size,
+ unsigned *out_nr,
+ u_translate_func *out_translate )
+{
+ unsigned in_idx;
+ unsigned out_idx;
+ int ret = U_TRANSLATE_NORMAL;
+
+ u_index_init();
+
+ in_idx = in_size_idx(in_index_size);
+ *out_index_size = (in_index_size == 4) ? 4 : 2;
+ out_idx = out_size_idx(*out_index_size);
+
+ if ((hw_mask & (1<<prim)) &&
+ in_index_size == *out_index_size &&
+ in_pv == out_pv)
+ {
+ if (in_index_size == 4)
+ *out_translate = translate_memcpy_uint;
+ else
+ *out_translate = translate_memcpy_ushort;
+
+ *out_prim = prim;
+ *out_nr = nr;
+
+ return U_TRANSLATE_MEMCPY;
+ }
+ else {
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_POINTS;
+ *out_nr = nr;
+ break;
+
+ case PIPE_PRIM_LINES:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_LINES;
+ *out_nr = nr;
+ break;
+
+ case PIPE_PRIM_LINE_STRIP:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_LINES;
+ *out_nr = (nr - 1) * 2;
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_LINES;
+ *out_nr = nr * 2;
+ break;
+
+ case PIPE_PRIM_TRIANGLES:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = nr;
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr - 2) * 3;
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr - 2) * 3;
+ break;
+
+ case PIPE_PRIM_QUADS:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr / 4) * 6;
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr - 2) * 3;
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr - 2) * 3;
+ break;
+
+ default:
+ assert(0);
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_POINTS;
+ *out_nr = nr;
+ return U_TRANSLATE_ERROR;
+ }
+ }
+
+ return ret;
+}
+
+
+
+
+
+int u_index_generator( unsigned hw_mask,
+ unsigned prim,
+ unsigned start,
+ unsigned nr,
+ unsigned in_pv,
+ unsigned out_pv,
+ unsigned *out_prim,
+ unsigned *out_index_size,
+ unsigned *out_nr,
+ u_generate_func *out_generate )
+
+{
+ unsigned out_idx;
+
+ u_index_init();
+
+ *out_index_size = ((start + nr) > 0xfffe) ? 4 : 2;
+ out_idx = out_size_idx(*out_index_size);
+
+ if ((hw_mask & (1<<prim)) &&
+ (in_pv == out_pv)) {
+
+ *out_generate = generate[out_idx][in_pv][out_pv][PIPE_PRIM_POINTS];
+ *out_prim = prim;
+ *out_nr = nr;
+ return U_GENERATE_LINEAR;
+ }
+ else {
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_POINTS;
+ *out_nr = nr;
+ return U_GENERATE_REUSABLE;
+
+ case PIPE_PRIM_LINES:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_LINES;
+ *out_nr = nr;
+ return U_GENERATE_REUSABLE;
+
+ case PIPE_PRIM_LINE_STRIP:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_LINES;
+ *out_nr = (nr - 1) * 2;
+ return U_GENERATE_REUSABLE;
+
+ case PIPE_PRIM_LINE_LOOP:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_LINES;
+ *out_nr = nr * 2;
+ return U_GENERATE_ONE_OFF;
+
+ case PIPE_PRIM_TRIANGLES:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = nr;
+ return U_GENERATE_REUSABLE;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr - 2) * 3;
+ return U_GENERATE_REUSABLE;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr - 2) * 3;
+ return U_GENERATE_REUSABLE;
+
+ case PIPE_PRIM_QUADS:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr / 4) * 6;
+ return U_GENERATE_REUSABLE;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr - 2) * 3;
+ return U_GENERATE_REUSABLE;
+
+ case PIPE_PRIM_POLYGON:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr - 2) * 3;
+ return U_GENERATE_REUSABLE;
+
+ default:
+ assert(0);
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_POINTS;
+ *out_nr = nr;
+ return U_TRANSLATE_ERROR;
+ }
+ }
+}
diff --git a/src/gallium/auxiliary/indices/u_indices.h b/src/gallium/auxiliary/indices/u_indices.h
new file mode 100644
index 0000000000..abf5a3037d
--- /dev/null
+++ b/src/gallium/auxiliary/indices/u_indices.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef U_INDICES_H
+#define U_INDICES_H
+
+#include "pipe/p_compiler.h"
+
+#define PV_FIRST 0
+#define PV_LAST 1
+#define PV_COUNT 2
+
+typedef void (*u_translate_func)( const void *in,
+ unsigned nr,
+ void *out );
+
+typedef void (*u_generate_func)( unsigned nr,
+ void *out );
+
+
+/* Return codes describe the translate/generate operation. Caller may
+ * be able to reuse translated indices under some circumstances.
+ */
+#define U_TRANSLATE_ERROR -1
+#define U_TRANSLATE_NORMAL 1
+#define U_TRANSLATE_MEMCPY 2
+#define U_GENERATE_LINEAR 3
+#define U_GENERATE_REUSABLE 4
+#define U_GENERATE_ONE_OFF 5
+
+
+void u_index_init( void );
+
+int u_index_translator( unsigned hw_mask,
+ unsigned prim,
+ unsigned in_index_size,
+ unsigned nr,
+ unsigned in_pv, /* API */
+ unsigned out_pv, /* hardware */
+ unsigned *out_prim,
+ unsigned *out_index_size,
+ unsigned *out_nr,
+ u_translate_func *out_translate );
+
+/* Note that even when generating it is necessary to know what the
+ * API's PV is, as the indices generated will depend on whether it is
+ * the same as hardware or not, and in the case of triangle strips,
+ * whether it is first or last.
+ */
+int u_index_generator( unsigned hw_mask,
+ unsigned prim,
+ unsigned start,
+ unsigned nr,
+ unsigned in_pv, /* API */
+ unsigned out_pv, /* hardware */
+ unsigned *out_prim,
+ unsigned *out_index_size,
+ unsigned *out_nr,
+ u_generate_func *out_generate );
+
+
+#endif
diff --git a/src/gallium/auxiliary/indices/u_indices_gen.c b/src/gallium/auxiliary/indices/u_indices_gen.c
new file mode 100644
index 0000000000..3c981e5d7f
--- /dev/null
+++ b/src/gallium/auxiliary/indices/u_indices_gen.c
@@ -0,0 +1,5129 @@
+/* File automatically generated by indices.py */
+
+/*
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+
+/**
+ * @file
+ * Functions to translate and generate index lists
+ */
+
+#include "indices/u_indices.h"
+#include "indices/u_indices_priv.h"
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+
+
+static unsigned out_size_idx( unsigned index_size )
+{
+ switch (index_size) {
+ case 4: return OUT_UINT;
+ case 2: return OUT_USHORT;
+ default: assert(0); return OUT_USHORT;
+ }
+}
+
+static unsigned in_size_idx( unsigned index_size )
+{
+ switch (index_size) {
+ case 4: return IN_UINT;
+ case 2: return IN_USHORT;
+ case 1: return IN_UBYTE;
+ default: assert(0); return IN_UBYTE;
+ }
+}
+
+
+static u_translate_func translate[IN_COUNT][OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT];
+static u_generate_func generate[OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT];
+
+
+
+static void generate_points_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)(i);
+ }
+}
+static void generate_lines_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)(i);
+ (out+i)[1] = (ushort)(i+1);
+ }
+}
+static void generate_linestrip_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)(i);
+ (out+j)[1] = (ushort)(i+1);
+ }
+}
+static void generate_lineloop_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)(i);
+ (out+j)[1] = (ushort)(i+1);
+ }
+ (out+j)[0] = (ushort)(i);
+ (out+j)[1] = (ushort)(0);
+}
+static void generate_tris_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)(i);
+ (out+i)[1] = (ushort)(i+1);
+ (out+i)[2] = (ushort)(i+2);
+ }
+}
+static void generate_tristrip_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(i);
+ (out+j)[1] = (ushort)(i+1+(i&1));
+ (out+j)[2] = (ushort)(i+2-(i&1));
+ }
+}
+static void generate_trifan_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(0);
+ (out+j)[1] = (ushort)(i+1);
+ (out+j)[2] = (ushort)(i+2);
+ }
+}
+static void generate_quads_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)(i+0);
+ (out+j+0)[1] = (ushort)(i+1);
+ (out+j+0)[2] = (ushort)(i+3);
+ (out+j+3)[0] = (ushort)(i+1);
+ (out+j+3)[1] = (ushort)(i+2);
+ (out+j+3)[2] = (ushort)(i+3);
+ }
+}
+static void generate_quadstrip_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)(i+2);
+ (out+j+0)[1] = (ushort)(i+0);
+ (out+j+0)[2] = (ushort)(i+3);
+ (out+j+3)[0] = (ushort)(i+0);
+ (out+j+3)[1] = (ushort)(i+1);
+ (out+j+3)[2] = (ushort)(i+3);
+ }
+}
+static void generate_polygon_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(0);
+ (out+j)[1] = (ushort)(i+1);
+ (out+j)[2] = (ushort)(i+2);
+ }
+}
+static void generate_points_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)(i);
+ }
+}
+static void generate_lines_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)(i+1);
+ (out+i)[1] = (ushort)(i);
+ }
+}
+static void generate_linestrip_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)(i+1);
+ (out+j)[1] = (ushort)(i);
+ }
+}
+static void generate_lineloop_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)(i+1);
+ (out+j)[1] = (ushort)(i);
+ }
+ (out+j)[0] = (ushort)(0);
+ (out+j)[1] = (ushort)(i);
+}
+static void generate_tris_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)(i+1);
+ (out+i)[1] = (ushort)(i+2);
+ (out+i)[2] = (ushort)(i);
+ }
+}
+static void generate_tristrip_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(i+1+(i&1));
+ (out+j)[1] = (ushort)(i+2-(i&1));
+ (out+j)[2] = (ushort)(i);
+ }
+}
+static void generate_trifan_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(i+1);
+ (out+j)[1] = (ushort)(i+2);
+ (out+j)[2] = (ushort)(0);
+ }
+}
+static void generate_quads_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)(i+1);
+ (out+j+0)[1] = (ushort)(i+3);
+ (out+j+0)[2] = (ushort)(i+0);
+ (out+j+3)[0] = (ushort)(i+2);
+ (out+j+3)[1] = (ushort)(i+3);
+ (out+j+3)[2] = (ushort)(i+1);
+ }
+}
+static void generate_quadstrip_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)(i+0);
+ (out+j+0)[1] = (ushort)(i+3);
+ (out+j+0)[2] = (ushort)(i+2);
+ (out+j+3)[0] = (ushort)(i+1);
+ (out+j+3)[1] = (ushort)(i+3);
+ (out+j+3)[2] = (ushort)(i+0);
+ }
+}
+static void generate_polygon_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(i+1);
+ (out+j)[1] = (ushort)(i+2);
+ (out+j)[2] = (ushort)(0);
+ }
+}
+static void generate_points_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)(i);
+ }
+}
+static void generate_lines_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)(i+1);
+ (out+i)[1] = (ushort)(i);
+ }
+}
+static void generate_linestrip_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)(i+1);
+ (out+j)[1] = (ushort)(i);
+ }
+}
+static void generate_lineloop_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)(i+1);
+ (out+j)[1] = (ushort)(i);
+ }
+ (out+j)[0] = (ushort)(0);
+ (out+j)[1] = (ushort)(i);
+}
+static void generate_tris_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)(i+2);
+ (out+i)[1] = (ushort)(i);
+ (out+i)[2] = (ushort)(i+1);
+ }
+}
+static void generate_tristrip_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(i+2);
+ (out+j)[1] = (ushort)(i+(i&1));
+ (out+j)[2] = (ushort)(i+1-(i&1));
+ }
+}
+static void generate_trifan_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(i+2);
+ (out+j)[1] = (ushort)(0);
+ (out+j)[2] = (ushort)(i+1);
+ }
+}
+static void generate_quads_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)(i+3);
+ (out+j+0)[1] = (ushort)(i+0);
+ (out+j+0)[2] = (ushort)(i+1);
+ (out+j+3)[0] = (ushort)(i+3);
+ (out+j+3)[1] = (ushort)(i+1);
+ (out+j+3)[2] = (ushort)(i+2);
+ }
+}
+static void generate_quadstrip_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)(i+3);
+ (out+j+0)[1] = (ushort)(i+2);
+ (out+j+0)[2] = (ushort)(i+0);
+ (out+j+3)[0] = (ushort)(i+3);
+ (out+j+3)[1] = (ushort)(i+0);
+ (out+j+3)[2] = (ushort)(i+1);
+ }
+}
+static void generate_polygon_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(0);
+ (out+j)[1] = (ushort)(i+1);
+ (out+j)[2] = (ushort)(i+2);
+ }
+}
+static void generate_points_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)(i);
+ }
+}
+static void generate_lines_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)(i);
+ (out+i)[1] = (ushort)(i+1);
+ }
+}
+static void generate_linestrip_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)(i);
+ (out+j)[1] = (ushort)(i+1);
+ }
+}
+static void generate_lineloop_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)(i);
+ (out+j)[1] = (ushort)(i+1);
+ }
+ (out+j)[0] = (ushort)(i);
+ (out+j)[1] = (ushort)(0);
+}
+static void generate_tris_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)(i);
+ (out+i)[1] = (ushort)(i+1);
+ (out+i)[2] = (ushort)(i+2);
+ }
+}
+static void generate_tristrip_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(i+(i&1));
+ (out+j)[1] = (ushort)(i+1-(i&1));
+ (out+j)[2] = (ushort)(i+2);
+ }
+}
+static void generate_trifan_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(0);
+ (out+j)[1] = (ushort)(i+1);
+ (out+j)[2] = (ushort)(i+2);
+ }
+}
+static void generate_quads_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)(i+0);
+ (out+j+0)[1] = (ushort)(i+1);
+ (out+j+0)[2] = (ushort)(i+3);
+ (out+j+3)[0] = (ushort)(i+1);
+ (out+j+3)[1] = (ushort)(i+2);
+ (out+j+3)[2] = (ushort)(i+3);
+ }
+}
+static void generate_quadstrip_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)(i+2);
+ (out+j+0)[1] = (ushort)(i+0);
+ (out+j+0)[2] = (ushort)(i+3);
+ (out+j+3)[0] = (ushort)(i+0);
+ (out+j+3)[1] = (ushort)(i+1);
+ (out+j+3)[2] = (ushort)(i+3);
+ }
+}
+static void generate_polygon_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(i+1);
+ (out+j)[1] = (ushort)(i+2);
+ (out+j)[2] = (ushort)(0);
+ }
+}
+static void generate_points_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)(i);
+ }
+}
+static void generate_lines_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)(i);
+ (out+i)[1] = (uint)(i+1);
+ }
+}
+static void generate_linestrip_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)(i);
+ (out+j)[1] = (uint)(i+1);
+ }
+}
+static void generate_lineloop_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)(i);
+ (out+j)[1] = (uint)(i+1);
+ }
+ (out+j)[0] = (uint)(i);
+ (out+j)[1] = (uint)(0);
+}
+static void generate_tris_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)(i);
+ (out+i)[1] = (uint)(i+1);
+ (out+i)[2] = (uint)(i+2);
+ }
+}
+static void generate_tristrip_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(i);
+ (out+j)[1] = (uint)(i+1+(i&1));
+ (out+j)[2] = (uint)(i+2-(i&1));
+ }
+}
+static void generate_trifan_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(0);
+ (out+j)[1] = (uint)(i+1);
+ (out+j)[2] = (uint)(i+2);
+ }
+}
+static void generate_quads_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)(i+0);
+ (out+j+0)[1] = (uint)(i+1);
+ (out+j+0)[2] = (uint)(i+3);
+ (out+j+3)[0] = (uint)(i+1);
+ (out+j+3)[1] = (uint)(i+2);
+ (out+j+3)[2] = (uint)(i+3);
+ }
+}
+static void generate_quadstrip_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)(i+2);
+ (out+j+0)[1] = (uint)(i+0);
+ (out+j+0)[2] = (uint)(i+3);
+ (out+j+3)[0] = (uint)(i+0);
+ (out+j+3)[1] = (uint)(i+1);
+ (out+j+3)[2] = (uint)(i+3);
+ }
+}
+static void generate_polygon_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(0);
+ (out+j)[1] = (uint)(i+1);
+ (out+j)[2] = (uint)(i+2);
+ }
+}
+static void generate_points_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)(i);
+ }
+}
+static void generate_lines_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)(i+1);
+ (out+i)[1] = (uint)(i);
+ }
+}
+static void generate_linestrip_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)(i+1);
+ (out+j)[1] = (uint)(i);
+ }
+}
+static void generate_lineloop_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)(i+1);
+ (out+j)[1] = (uint)(i);
+ }
+ (out+j)[0] = (uint)(0);
+ (out+j)[1] = (uint)(i);
+}
+static void generate_tris_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)(i+1);
+ (out+i)[1] = (uint)(i+2);
+ (out+i)[2] = (uint)(i);
+ }
+}
+static void generate_tristrip_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(i+1+(i&1));
+ (out+j)[1] = (uint)(i+2-(i&1));
+ (out+j)[2] = (uint)(i);
+ }
+}
+static void generate_trifan_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(i+1);
+ (out+j)[1] = (uint)(i+2);
+ (out+j)[2] = (uint)(0);
+ }
+}
+static void generate_quads_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)(i+1);
+ (out+j+0)[1] = (uint)(i+3);
+ (out+j+0)[2] = (uint)(i+0);
+ (out+j+3)[0] = (uint)(i+2);
+ (out+j+3)[1] = (uint)(i+3);
+ (out+j+3)[2] = (uint)(i+1);
+ }
+}
+static void generate_quadstrip_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)(i+0);
+ (out+j+0)[1] = (uint)(i+3);
+ (out+j+0)[2] = (uint)(i+2);
+ (out+j+3)[0] = (uint)(i+1);
+ (out+j+3)[1] = (uint)(i+3);
+ (out+j+3)[2] = (uint)(i+0);
+ }
+}
+static void generate_polygon_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(i+1);
+ (out+j)[1] = (uint)(i+2);
+ (out+j)[2] = (uint)(0);
+ }
+}
+static void generate_points_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)(i);
+ }
+}
+static void generate_lines_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)(i+1);
+ (out+i)[1] = (uint)(i);
+ }
+}
+static void generate_linestrip_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)(i+1);
+ (out+j)[1] = (uint)(i);
+ }
+}
+static void generate_lineloop_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)(i+1);
+ (out+j)[1] = (uint)(i);
+ }
+ (out+j)[0] = (uint)(0);
+ (out+j)[1] = (uint)(i);
+}
+static void generate_tris_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)(i+2);
+ (out+i)[1] = (uint)(i);
+ (out+i)[2] = (uint)(i+1);
+ }
+}
+static void generate_tristrip_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(i+2);
+ (out+j)[1] = (uint)(i+(i&1));
+ (out+j)[2] = (uint)(i+1-(i&1));
+ }
+}
+static void generate_trifan_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(i+2);
+ (out+j)[1] = (uint)(0);
+ (out+j)[2] = (uint)(i+1);
+ }
+}
+static void generate_quads_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)(i+3);
+ (out+j+0)[1] = (uint)(i+0);
+ (out+j+0)[2] = (uint)(i+1);
+ (out+j+3)[0] = (uint)(i+3);
+ (out+j+3)[1] = (uint)(i+1);
+ (out+j+3)[2] = (uint)(i+2);
+ }
+}
+static void generate_quadstrip_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)(i+3);
+ (out+j+0)[1] = (uint)(i+2);
+ (out+j+0)[2] = (uint)(i+0);
+ (out+j+3)[0] = (uint)(i+3);
+ (out+j+3)[1] = (uint)(i+0);
+ (out+j+3)[2] = (uint)(i+1);
+ }
+}
+static void generate_polygon_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(0);
+ (out+j)[1] = (uint)(i+1);
+ (out+j)[2] = (uint)(i+2);
+ }
+}
+static void generate_points_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)(i);
+ }
+}
+static void generate_lines_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)(i);
+ (out+i)[1] = (uint)(i+1);
+ }
+}
+static void generate_linestrip_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)(i);
+ (out+j)[1] = (uint)(i+1);
+ }
+}
+static void generate_lineloop_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)(i);
+ (out+j)[1] = (uint)(i+1);
+ }
+ (out+j)[0] = (uint)(i);
+ (out+j)[1] = (uint)(0);
+}
+static void generate_tris_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)(i);
+ (out+i)[1] = (uint)(i+1);
+ (out+i)[2] = (uint)(i+2);
+ }
+}
+static void generate_tristrip_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(i+(i&1));
+ (out+j)[1] = (uint)(i+1-(i&1));
+ (out+j)[2] = (uint)(i+2);
+ }
+}
+static void generate_trifan_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(0);
+ (out+j)[1] = (uint)(i+1);
+ (out+j)[2] = (uint)(i+2);
+ }
+}
+static void generate_quads_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)(i+0);
+ (out+j+0)[1] = (uint)(i+1);
+ (out+j+0)[2] = (uint)(i+3);
+ (out+j+3)[0] = (uint)(i+1);
+ (out+j+3)[1] = (uint)(i+2);
+ (out+j+3)[2] = (uint)(i+3);
+ }
+}
+static void generate_quadstrip_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)(i+2);
+ (out+j+0)[1] = (uint)(i+0);
+ (out+j+0)[2] = (uint)(i+3);
+ (out+j+3)[0] = (uint)(i+0);
+ (out+j+3)[1] = (uint)(i+1);
+ (out+j+3)[2] = (uint)(i+3);
+ }
+}
+static void generate_polygon_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(i+1);
+ (out+j)[1] = (uint)(i+2);
+ (out+j)[2] = (uint)(0);
+ }
+}
+static void translate_points_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_linestrip_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_lineloop_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[0];
+}
+static void translate_tris_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ (out+i)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_tristrip_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1+(i&1)];
+ (out+j)[2] = (ushort)in[i+2-(i&1)];
+ }
+}
+static void translate_trifan_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quads_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+1];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+2];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_quadstrip_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+2];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+0];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_polygon_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_points_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i];
+ }
+}
+static void translate_linestrip_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+}
+static void translate_lineloop_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i];
+}
+static void translate_tris_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i+2];
+ (out+i)[2] = (ushort)in[i];
+ }
+}
+static void translate_tristrip_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1+(i&1)];
+ (out+j)[1] = (ushort)in[i+2-(i&1)];
+ (out+j)[2] = (ushort)in[i];
+ }
+}
+static void translate_trifan_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_quads_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+1];
+ (out+j+0)[1] = (ushort)in[i+3];
+ (out+j+0)[2] = (ushort)in[i+0];
+ (out+j+3)[0] = (ushort)in[i+2];
+ (out+j+3)[1] = (ushort)in[i+3];
+ (out+j+3)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_quadstrip_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+3];
+ (out+j+0)[2] = (ushort)in[i+2];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+3];
+ (out+j+3)[2] = (ushort)in[i+0];
+ }
+}
+static void translate_polygon_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_points_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i];
+ }
+}
+static void translate_linestrip_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+}
+static void translate_lineloop_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i];
+}
+static void translate_tris_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i+2];
+ (out+i)[1] = (ushort)in[i];
+ (out+i)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_tristrip_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+2];
+ (out+j)[1] = (ushort)in[i+(i&1)];
+ (out+j)[2] = (ushort)in[i+1-(i&1)];
+ }
+}
+static void translate_trifan_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+2];
+ (out+j)[1] = (ushort)in[0];
+ (out+j)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_quads_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+3];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+1];
+ (out+j+3)[0] = (ushort)in[i+3];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quadstrip_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+3];
+ (out+j+0)[1] = (ushort)in[i+2];
+ (out+j+0)[2] = (ushort)in[i+0];
+ (out+j+3)[0] = (ushort)in[i+3];
+ (out+j+3)[1] = (ushort)in[i+0];
+ (out+j+3)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_polygon_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_points_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_linestrip_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_lineloop_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[0];
+}
+static void translate_tris_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ (out+i)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_tristrip_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+(i&1)];
+ (out+j)[1] = (ushort)in[i+1-(i&1)];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_trifan_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quads_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+1];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+2];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_quadstrip_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+2];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+0];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_polygon_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_points_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ }
+}
+static void translate_linestrip_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+}
+static void translate_lineloop_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[0];
+}
+static void translate_tris_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ (out+i)[2] = (uint)in[i+2];
+ }
+}
+static void translate_tristrip_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1+(i&1)];
+ (out+j)[2] = (uint)in[i+2-(i&1)];
+ }
+}
+static void translate_trifan_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quads_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+1];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+2];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_quadstrip_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+2];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+0];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_polygon_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_points_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i];
+ }
+}
+static void translate_linestrip_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+}
+static void translate_lineloop_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i];
+}
+static void translate_tris_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i+2];
+ (out+i)[2] = (uint)in[i];
+ }
+}
+static void translate_tristrip_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1+(i&1)];
+ (out+j)[1] = (uint)in[i+2-(i&1)];
+ (out+j)[2] = (uint)in[i];
+ }
+}
+static void translate_trifan_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+static void translate_quads_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+1];
+ (out+j+0)[1] = (uint)in[i+3];
+ (out+j+0)[2] = (uint)in[i+0];
+ (out+j+3)[0] = (uint)in[i+2];
+ (out+j+3)[1] = (uint)in[i+3];
+ (out+j+3)[2] = (uint)in[i+1];
+ }
+}
+static void translate_quadstrip_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+3];
+ (out+j+0)[2] = (uint)in[i+2];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+3];
+ (out+j+3)[2] = (uint)in[i+0];
+ }
+}
+static void translate_polygon_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+static void translate_points_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i];
+ }
+}
+static void translate_linestrip_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+}
+static void translate_lineloop_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i];
+}
+static void translate_tris_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i+2];
+ (out+i)[1] = (uint)in[i];
+ (out+i)[2] = (uint)in[i+1];
+ }
+}
+static void translate_tristrip_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+2];
+ (out+j)[1] = (uint)in[i+(i&1)];
+ (out+j)[2] = (uint)in[i+1-(i&1)];
+ }
+}
+static void translate_trifan_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+2];
+ (out+j)[1] = (uint)in[0];
+ (out+j)[2] = (uint)in[i+1];
+ }
+}
+static void translate_quads_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+3];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+1];
+ (out+j+3)[0] = (uint)in[i+3];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quadstrip_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+3];
+ (out+j+0)[1] = (uint)in[i+2];
+ (out+j+0)[2] = (uint)in[i+0];
+ (out+j+3)[0] = (uint)in[i+3];
+ (out+j+3)[1] = (uint)in[i+0];
+ (out+j+3)[2] = (uint)in[i+1];
+ }
+}
+static void translate_polygon_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_points_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ }
+}
+static void translate_linestrip_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+}
+static void translate_lineloop_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[0];
+}
+static void translate_tris_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ (out+i)[2] = (uint)in[i+2];
+ }
+}
+static void translate_tristrip_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+(i&1)];
+ (out+j)[1] = (uint)in[i+1-(i&1)];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_trifan_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quads_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+1];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+2];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_quadstrip_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+2];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+0];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_polygon_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+static void translate_points_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_linestrip_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_lineloop_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[0];
+}
+static void translate_tris_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ (out+i)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_tristrip_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1+(i&1)];
+ (out+j)[2] = (ushort)in[i+2-(i&1)];
+ }
+}
+static void translate_trifan_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quads_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+1];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+2];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_quadstrip_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+2];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+0];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_polygon_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_points_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i];
+ }
+}
+static void translate_linestrip_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+}
+static void translate_lineloop_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i];
+}
+static void translate_tris_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i+2];
+ (out+i)[2] = (ushort)in[i];
+ }
+}
+static void translate_tristrip_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1+(i&1)];
+ (out+j)[1] = (ushort)in[i+2-(i&1)];
+ (out+j)[2] = (ushort)in[i];
+ }
+}
+static void translate_trifan_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_quads_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+1];
+ (out+j+0)[1] = (ushort)in[i+3];
+ (out+j+0)[2] = (ushort)in[i+0];
+ (out+j+3)[0] = (ushort)in[i+2];
+ (out+j+3)[1] = (ushort)in[i+3];
+ (out+j+3)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_quadstrip_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+3];
+ (out+j+0)[2] = (ushort)in[i+2];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+3];
+ (out+j+3)[2] = (ushort)in[i+0];
+ }
+}
+static void translate_polygon_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_points_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i];
+ }
+}
+static void translate_linestrip_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+}
+static void translate_lineloop_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i];
+}
+static void translate_tris_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i+2];
+ (out+i)[1] = (ushort)in[i];
+ (out+i)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_tristrip_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+2];
+ (out+j)[1] = (ushort)in[i+(i&1)];
+ (out+j)[2] = (ushort)in[i+1-(i&1)];
+ }
+}
+static void translate_trifan_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+2];
+ (out+j)[1] = (ushort)in[0];
+ (out+j)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_quads_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+3];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+1];
+ (out+j+3)[0] = (ushort)in[i+3];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quadstrip_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+3];
+ (out+j+0)[1] = (ushort)in[i+2];
+ (out+j+0)[2] = (ushort)in[i+0];
+ (out+j+3)[0] = (ushort)in[i+3];
+ (out+j+3)[1] = (ushort)in[i+0];
+ (out+j+3)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_polygon_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_points_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_linestrip_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_lineloop_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[0];
+}
+static void translate_tris_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ (out+i)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_tristrip_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+(i&1)];
+ (out+j)[1] = (ushort)in[i+1-(i&1)];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_trifan_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quads_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+1];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+2];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_quadstrip_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+2];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+0];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_polygon_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_points_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ }
+}
+static void translate_linestrip_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+}
+static void translate_lineloop_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[0];
+}
+static void translate_tris_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ (out+i)[2] = (uint)in[i+2];
+ }
+}
+static void translate_tristrip_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1+(i&1)];
+ (out+j)[2] = (uint)in[i+2-(i&1)];
+ }
+}
+static void translate_trifan_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quads_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+1];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+2];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_quadstrip_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+2];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+0];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_polygon_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_points_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i];
+ }
+}
+static void translate_linestrip_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+}
+static void translate_lineloop_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i];
+}
+static void translate_tris_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i+2];
+ (out+i)[2] = (uint)in[i];
+ }
+}
+static void translate_tristrip_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1+(i&1)];
+ (out+j)[1] = (uint)in[i+2-(i&1)];
+ (out+j)[2] = (uint)in[i];
+ }
+}
+static void translate_trifan_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+static void translate_quads_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+1];
+ (out+j+0)[1] = (uint)in[i+3];
+ (out+j+0)[2] = (uint)in[i+0];
+ (out+j+3)[0] = (uint)in[i+2];
+ (out+j+3)[1] = (uint)in[i+3];
+ (out+j+3)[2] = (uint)in[i+1];
+ }
+}
+static void translate_quadstrip_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+3];
+ (out+j+0)[2] = (uint)in[i+2];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+3];
+ (out+j+3)[2] = (uint)in[i+0];
+ }
+}
+static void translate_polygon_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+static void translate_points_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i];
+ }
+}
+static void translate_linestrip_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+}
+static void translate_lineloop_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i];
+}
+static void translate_tris_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i+2];
+ (out+i)[1] = (uint)in[i];
+ (out+i)[2] = (uint)in[i+1];
+ }
+}
+static void translate_tristrip_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+2];
+ (out+j)[1] = (uint)in[i+(i&1)];
+ (out+j)[2] = (uint)in[i+1-(i&1)];
+ }
+}
+static void translate_trifan_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+2];
+ (out+j)[1] = (uint)in[0];
+ (out+j)[2] = (uint)in[i+1];
+ }
+}
+static void translate_quads_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+3];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+1];
+ (out+j+3)[0] = (uint)in[i+3];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quadstrip_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+3];
+ (out+j+0)[1] = (uint)in[i+2];
+ (out+j+0)[2] = (uint)in[i+0];
+ (out+j+3)[0] = (uint)in[i+3];
+ (out+j+3)[1] = (uint)in[i+0];
+ (out+j+3)[2] = (uint)in[i+1];
+ }
+}
+static void translate_polygon_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_points_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ }
+}
+static void translate_linestrip_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+}
+static void translate_lineloop_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[0];
+}
+static void translate_tris_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ (out+i)[2] = (uint)in[i+2];
+ }
+}
+static void translate_tristrip_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+(i&1)];
+ (out+j)[1] = (uint)in[i+1-(i&1)];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_trifan_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quads_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+1];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+2];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_quadstrip_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+2];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+0];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_polygon_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+static void translate_points_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_linestrip_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_lineloop_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[0];
+}
+static void translate_tris_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ (out+i)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_tristrip_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1+(i&1)];
+ (out+j)[2] = (ushort)in[i+2-(i&1)];
+ }
+}
+static void translate_trifan_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quads_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+1];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+2];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_quadstrip_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+2];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+0];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_polygon_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_points_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i];
+ }
+}
+static void translate_linestrip_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+}
+static void translate_lineloop_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i];
+}
+static void translate_tris_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i+2];
+ (out+i)[2] = (ushort)in[i];
+ }
+}
+static void translate_tristrip_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1+(i&1)];
+ (out+j)[1] = (ushort)in[i+2-(i&1)];
+ (out+j)[2] = (ushort)in[i];
+ }
+}
+static void translate_trifan_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_quads_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+1];
+ (out+j+0)[1] = (ushort)in[i+3];
+ (out+j+0)[2] = (ushort)in[i+0];
+ (out+j+3)[0] = (ushort)in[i+2];
+ (out+j+3)[1] = (ushort)in[i+3];
+ (out+j+3)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_quadstrip_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+3];
+ (out+j+0)[2] = (ushort)in[i+2];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+3];
+ (out+j+3)[2] = (ushort)in[i+0];
+ }
+}
+static void translate_polygon_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_points_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i];
+ }
+}
+static void translate_linestrip_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+}
+static void translate_lineloop_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i];
+}
+static void translate_tris_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i+2];
+ (out+i)[1] = (ushort)in[i];
+ (out+i)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_tristrip_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+2];
+ (out+j)[1] = (ushort)in[i+(i&1)];
+ (out+j)[2] = (ushort)in[i+1-(i&1)];
+ }
+}
+static void translate_trifan_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+2];
+ (out+j)[1] = (ushort)in[0];
+ (out+j)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_quads_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+3];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+1];
+ (out+j+3)[0] = (ushort)in[i+3];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quadstrip_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+3];
+ (out+j+0)[1] = (ushort)in[i+2];
+ (out+j+0)[2] = (ushort)in[i+0];
+ (out+j+3)[0] = (ushort)in[i+3];
+ (out+j+3)[1] = (ushort)in[i+0];
+ (out+j+3)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_polygon_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_points_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_linestrip_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_lineloop_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[0];
+}
+static void translate_tris_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ (out+i)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_tristrip_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+(i&1)];
+ (out+j)[1] = (ushort)in[i+1-(i&1)];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_trifan_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quads_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+1];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+2];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_quadstrip_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+2];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+0];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_polygon_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_points_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ }
+}
+static void translate_linestrip_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+}
+static void translate_lineloop_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[0];
+}
+static void translate_tris_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ (out+i)[2] = (uint)in[i+2];
+ }
+}
+static void translate_tristrip_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1+(i&1)];
+ (out+j)[2] = (uint)in[i+2-(i&1)];
+ }
+}
+static void translate_trifan_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quads_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+1];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+2];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_quadstrip_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+2];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+0];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_polygon_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_points_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i];
+ }
+}
+static void translate_linestrip_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+}
+static void translate_lineloop_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i];
+}
+static void translate_tris_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i+2];
+ (out+i)[2] = (uint)in[i];
+ }
+}
+static void translate_tristrip_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1+(i&1)];
+ (out+j)[1] = (uint)in[i+2-(i&1)];
+ (out+j)[2] = (uint)in[i];
+ }
+}
+static void translate_trifan_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+static void translate_quads_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+1];
+ (out+j+0)[1] = (uint)in[i+3];
+ (out+j+0)[2] = (uint)in[i+0];
+ (out+j+3)[0] = (uint)in[i+2];
+ (out+j+3)[1] = (uint)in[i+3];
+ (out+j+3)[2] = (uint)in[i+1];
+ }
+}
+static void translate_quadstrip_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+3];
+ (out+j+0)[2] = (uint)in[i+2];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+3];
+ (out+j+3)[2] = (uint)in[i+0];
+ }
+}
+static void translate_polygon_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+static void translate_points_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i];
+ }
+}
+static void translate_linestrip_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+}
+static void translate_lineloop_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i];
+}
+static void translate_tris_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i+2];
+ (out+i)[1] = (uint)in[i];
+ (out+i)[2] = (uint)in[i+1];
+ }
+}
+static void translate_tristrip_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+2];
+ (out+j)[1] = (uint)in[i+(i&1)];
+ (out+j)[2] = (uint)in[i+1-(i&1)];
+ }
+}
+static void translate_trifan_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+2];
+ (out+j)[1] = (uint)in[0];
+ (out+j)[2] = (uint)in[i+1];
+ }
+}
+static void translate_quads_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+3];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+1];
+ (out+j+3)[0] = (uint)in[i+3];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quadstrip_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+3];
+ (out+j+0)[1] = (uint)in[i+2];
+ (out+j+0)[2] = (uint)in[i+0];
+ (out+j+3)[0] = (uint)in[i+3];
+ (out+j+3)[1] = (uint)in[i+0];
+ (out+j+3)[2] = (uint)in[i+1];
+ }
+}
+static void translate_polygon_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_points_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ }
+}
+static void translate_linestrip_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+}
+static void translate_lineloop_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[0];
+}
+static void translate_tris_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ (out+i)[2] = (uint)in[i+2];
+ }
+}
+static void translate_tristrip_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+(i&1)];
+ (out+j)[1] = (uint)in[i+1-(i&1)];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_trifan_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quads_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+1];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+2];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_quadstrip_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+2];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+0];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_polygon_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+void u_index_init( void )
+{
+ static int firsttime = 1;
+ if (!firsttime) return;
+ firsttime = 0;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_first2last;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_last2last;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_uint_first2last;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_uint_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_last2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_last2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_last2last;
+}
+#include "indices/u_indices.c"
diff --git a/src/gallium/auxiliary/indices/u_indices_gen.py b/src/gallium/auxiliary/indices/u_indices_gen.py
new file mode 100644
index 0000000000..af63d09930
--- /dev/null
+++ b/src/gallium/auxiliary/indices/u_indices_gen.py
@@ -0,0 +1,319 @@
+#!/usr/bin/env python
+copyright = '''
+/*
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+'''
+
+GENERATE, UBYTE, USHORT, UINT = 'generate', 'ubyte', 'ushort', 'uint'
+FIRST, LAST = 'first', 'last'
+
+INTYPES = (GENERATE, UBYTE, USHORT, UINT)
+OUTTYPES = (USHORT, UINT)
+PVS=(FIRST, LAST)
+PRIMS=('points',
+ 'lines',
+ 'linestrip',
+ 'lineloop',
+ 'tris',
+ 'trifan',
+ 'tristrip',
+ 'quads',
+ 'quadstrip',
+ 'polygon')
+
+LONGPRIMS=('PIPE_PRIM_POINTS',
+ 'PIPE_PRIM_LINES',
+ 'PIPE_PRIM_LINE_STRIP',
+ 'PIPE_PRIM_LINE_LOOP',
+ 'PIPE_PRIM_TRIANGLES',
+ 'PIPE_PRIM_TRIANGLE_FAN',
+ 'PIPE_PRIM_TRIANGLE_STRIP',
+ 'PIPE_PRIM_QUADS',
+ 'PIPE_PRIM_QUAD_STRIP',
+ 'PIPE_PRIM_POLYGON')
+
+longprim = dict(zip(PRIMS, LONGPRIMS))
+intype_idx = dict(ubyte='IN_UBYTE', ushort='IN_USHORT', uint='IN_UINT')
+outtype_idx = dict(ushort='OUT_USHORT', uint='OUT_UINT')
+pv_idx = dict(first='PV_FIRST', last='PV_LAST')
+
+
+def prolog():
+ print '''/* File automatically generated by indices.py */'''
+ print copyright
+ print r'''
+
+/**
+ * @file
+ * Functions to translate and generate index lists
+ */
+
+#include "indices/u_indices.h"
+#include "indices/u_indices_priv.h"
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+
+
+static unsigned out_size_idx( unsigned index_size )
+{
+ switch (index_size) {
+ case 4: return OUT_UINT;
+ case 2: return OUT_USHORT;
+ default: assert(0); return OUT_USHORT;
+ }
+}
+
+static unsigned in_size_idx( unsigned index_size )
+{
+ switch (index_size) {
+ case 4: return IN_UINT;
+ case 2: return IN_USHORT;
+ case 1: return IN_UBYTE;
+ default: assert(0); return IN_UBYTE;
+ }
+}
+
+
+static u_translate_func translate[IN_COUNT][OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT];
+static u_generate_func generate[OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT];
+
+
+'''
+
+def vert( intype, outtype, v0 ):
+ if intype == GENERATE:
+ return '(' + outtype + ')(' + v0 + ')'
+ else:
+ return '(' + outtype + ')in[' + v0 + ']'
+
+def point( intype, outtype, ptr, v0 ):
+ print ' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';'
+
+def line( intype, outtype, ptr, v0, v1 ):
+ print ' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';'
+ print ' (' + ptr + ')[1] = ' + vert( intype, outtype, v1 ) + ';'
+
+def tri( intype, outtype, ptr, v0, v1, v2 ):
+ print ' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';'
+ print ' (' + ptr + ')[1] = ' + vert( intype, outtype, v1 ) + ';'
+ print ' (' + ptr + ')[2] = ' + vert( intype, outtype, v2 ) + ';'
+
+def do_point( intype, outtype, ptr, v0 ):
+ point( intype, outtype, ptr, v0 )
+
+def do_line( intype, outtype, ptr, v0, v1, inpv, outpv ):
+ if inpv == outpv:
+ line( intype, outtype, ptr, v0, v1 )
+ else:
+ line( intype, outtype, ptr, v1, v0 )
+
+def do_tri( intype, outtype, ptr, v0, v1, v2, inpv, outpv ):
+ if inpv == outpv:
+ tri( intype, outtype, ptr, v0, v1, v2 )
+ else:
+ if inpv == FIRST:
+ tri( intype, outtype, ptr, v1, v2, v0 )
+ else:
+ tri( intype, outtype, ptr, v2, v0, v1 )
+
+def do_quad( intype, outtype, ptr, v0, v1, v2, v3, inpv, outpv ):
+ do_tri( intype, outtype, ptr+'+0', v0, v1, v3, inpv, outpv );
+ do_tri( intype, outtype, ptr+'+3', v1, v2, v3, inpv, outpv );
+
+def name(intype, outtype, inpv, outpv, prim):
+ if intype == GENERATE:
+ return 'generate_' + prim + '_' + outtype + '_' + inpv + '2' + outpv
+ else:
+ return 'translate_' + prim + '_' + intype + '2' + outtype + '_' + inpv + '2' + outpv
+
+def preamble(intype, outtype, inpv, outpv, prim):
+ print 'static void ' + name( intype, outtype, inpv, outpv, prim ) + '('
+ if intype != GENERATE:
+ print ' const void * _in,'
+ print ' unsigned nr,'
+ print ' void *_out )'
+ print '{'
+ if intype != GENERATE:
+ print ' const ' + intype + '*in = (const ' + intype + '*)_in;'
+ print ' ' + outtype + ' *out = (' + outtype + '*)_out;'
+ print ' unsigned i, j;'
+ print ' (void)j;'
+
+def postamble():
+ print '}'
+
+
+def points(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='points')
+ print ' for (i = 0; i < nr; i++) { '
+ do_point( intype, outtype, 'out+i', 'i' );
+ print ' }'
+ postamble()
+
+def lines(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='lines')
+ print ' for (i = 0; i < nr; i+=2) { '
+ do_line( intype, outtype, 'out+i', 'i', 'i+1', inpv, outpv );
+ print ' }'
+ postamble()
+
+def linestrip(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='linestrip')
+ print ' for (j = i = 0; j < nr; j+=2, i++) { '
+ do_line( intype, outtype, 'out+j', 'i', 'i+1', inpv, outpv );
+ print ' }'
+ postamble()
+
+def lineloop(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='lineloop')
+ print ' for (j = i = 0; j < nr - 2; j+=2, i++) { '
+ do_line( intype, outtype, 'out+j', 'i', 'i+1', inpv, outpv );
+ print ' }'
+ do_line( intype, outtype, 'out+j', 'i', '0', inpv, outpv );
+ postamble()
+
+def tris(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='tris')
+ print ' for (i = 0; i < nr; i+=3) { '
+ do_tri( intype, outtype, 'out+i', 'i', 'i+1', 'i+2', inpv, outpv );
+ print ' }'
+ postamble()
+
+
+def tristrip(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='tristrip')
+ print ' for (j = i = 0; j < nr; j+=3, i++) { '
+ if inpv == FIRST:
+ do_tri( intype, outtype, 'out+j', 'i', 'i+1+(i&1)', 'i+2-(i&1)', inpv, outpv );
+ else:
+ do_tri( intype, outtype, 'out+j', 'i+(i&1)', 'i+1-(i&1)', 'i+2', inpv, outpv );
+ print ' }'
+ postamble()
+
+
+def trifan(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='trifan')
+ print ' for (j = i = 0; j < nr; j+=3, i++) { '
+ do_tri( intype, outtype, 'out+j', '0', 'i+1', 'i+2', inpv, outpv );
+ print ' }'
+ postamble()
+
+
+
+def polygon(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='polygon')
+ print ' for (j = i = 0; j < nr; j+=3, i++) { '
+ if inpv == FIRST:
+ do_tri( intype, outtype, 'out+j', '0', 'i+1', 'i+2', inpv, outpv );
+ else:
+ do_tri( intype, outtype, 'out+j', 'i+1', 'i+2', '0', inpv, outpv );
+ print ' }'
+ postamble()
+
+
+def quads(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='quads')
+ print ' for (j = i = 0; j < nr; j+=6, i+=4) { '
+ do_quad( intype, outtype, 'out+j', 'i+0', 'i+1', 'i+2', 'i+3', inpv, outpv );
+ print ' }'
+ postamble()
+
+
+def quadstrip(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='quadstrip')
+ print ' for (j = i = 0; j < nr; j+=6, i+=2) { '
+ do_quad( intype, outtype, 'out+j', 'i+2', 'i+0', 'i+1', 'i+3', inpv, outpv );
+ print ' }'
+ postamble()
+
+
+def emit_funcs():
+ for intype in INTYPES:
+ for outtype in OUTTYPES:
+ for inpv in (FIRST, LAST):
+ for outpv in (FIRST, LAST):
+ points(intype, outtype, inpv, outpv)
+ lines(intype, outtype, inpv, outpv)
+ linestrip(intype, outtype, inpv, outpv)
+ lineloop(intype, outtype, inpv, outpv)
+ tris(intype, outtype, inpv, outpv)
+ tristrip(intype, outtype, inpv, outpv)
+ trifan(intype, outtype, inpv, outpv)
+ quads(intype, outtype, inpv, outpv)
+ quadstrip(intype, outtype, inpv, outpv)
+ polygon(intype, outtype, inpv, outpv)
+
+def init(intype, outtype, inpv, outpv, prim):
+ if intype == GENERATE:
+ print ('generate[' +
+ outtype_idx[outtype] +
+ '][' + pv_idx[inpv] +
+ '][' + pv_idx[outpv] +
+ '][' + longprim[prim] +
+ '] = ' + name( intype, outtype, inpv, outpv, prim ) + ';')
+ else:
+ print ('translate[' +
+ intype_idx[intype] +
+ '][' + outtype_idx[outtype] +
+ '][' + pv_idx[inpv] +
+ '][' + pv_idx[outpv] +
+ '][' + longprim[prim] +
+ '] = ' + name( intype, outtype, inpv, outpv, prim ) + ';')
+
+
+def emit_all_inits():
+ for intype in INTYPES:
+ for outtype in OUTTYPES:
+ for inpv in PVS:
+ for outpv in PVS:
+ for prim in PRIMS:
+ init(intype, outtype, inpv, outpv, prim)
+
+def emit_init():
+ print 'void u_index_init( void )'
+ print '{'
+ print ' static int firsttime = 1;'
+ print ' if (!firsttime) return;'
+ print ' firsttime = 0;'
+ emit_all_inits()
+ print '}'
+
+
+
+
+def epilog():
+ print '#include "indices/u_indices.c"'
+
+
+def main():
+ prolog()
+ emit_funcs()
+ emit_init()
+ epilog()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/gallium/auxiliary/indices/u_indices_priv.h b/src/gallium/auxiliary/indices/u_indices_priv.h
new file mode 100644
index 0000000000..9c3298c24d
--- /dev/null
+++ b/src/gallium/auxiliary/indices/u_indices_priv.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef U_INDICES_PRIV_H
+#define U_INDICES_PRIV_H
+
+#include "pipe/p_compiler.h"
+#include "u_indices.h"
+
+#define IN_UBYTE 0
+#define IN_USHORT 1
+#define IN_UINT 2
+#define IN_COUNT 3
+
+#define OUT_USHORT 0
+#define OUT_UINT 1
+#define OUT_COUNT 2
+
+
+#define PRIM_COUNT (PIPE_PRIM_POLYGON + 1)
+
+#endif
diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile
index f9b39d9ce0..1c00ba8d98 100644
--- a/src/gallium/auxiliary/pipebuffer/Makefile
+++ b/src/gallium/auxiliary/pipebuffer/Makefile
@@ -11,12 +11,9 @@ C_SOURCES = \
pb_bufmgr_debug.c \
pb_bufmgr_fenced.c \
pb_bufmgr_mm.c \
+ pb_bufmgr_ondemand.c \
pb_bufmgr_pool.c \
pb_bufmgr_slab.c \
- pb_validate.c \
- pb_winsys.c
+ pb_validate.c
include ../../Makefile.template
-
-symlinks:
-
diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript
index 56a40dda0d..8e9f06abe4 100644
--- a/src/gallium/auxiliary/pipebuffer/SConscript
+++ b/src/gallium/auxiliary/pipebuffer/SConscript
@@ -10,10 +10,10 @@ pipebuffer = env.ConvenienceLibrary(
'pb_bufmgr_debug.c',
'pb_bufmgr_fenced.c',
'pb_bufmgr_mm.c',
+ 'pb_bufmgr_ondemand.c',
'pb_bufmgr_pool.c',
'pb_bufmgr_slab.c',
'pb_validate.c',
- 'pb_winsys.c',
])
auxiliaries.insert(0, pipebuffer)
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
index 8505d333bd..e6b0b30ff4 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
@@ -37,7 +37,7 @@
* There is no obligation of a winsys driver to use this library. And a pipe
* driver should be completly agnostic about it.
*
- * \author Jos� Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
#ifndef PB_BUFFER_H_
@@ -45,7 +45,8 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
+#include "pipe/p_error.h"
#include "pipe/p_state.h"
#include "pipe/p_inlines.h"
@@ -56,6 +57,8 @@ extern "C" {
struct pb_vtbl;
+struct pb_validate;
+
/**
* Buffer description.
@@ -104,6 +107,13 @@ struct pb_vtbl
void (*unmap)( struct pb_buffer *buf );
+ enum pipe_error (*validate)( struct pb_buffer *buf,
+ struct pb_validate *vl,
+ unsigned flags );
+
+ void (*fence)( struct pb_buffer *buf,
+ struct pipe_fence_handle *fence );
+
/**
* Get the base buffer and the offset.
*
@@ -118,6 +128,7 @@ struct pb_vtbl
void (*get_base_buffer)( struct pb_buffer *buf,
struct pb_buffer **base_buf,
unsigned *offset );
+
};
@@ -148,6 +159,7 @@ pb_map(struct pb_buffer *buf,
assert(buf);
if(!buf)
return NULL;
+ assert(buf->base.refcount > 0);
return buf->vtbl->map(buf, flags);
}
@@ -158,6 +170,7 @@ pb_unmap(struct pb_buffer *buf)
assert(buf);
if(!buf)
return;
+ assert(buf->base.refcount > 0);
buf->vtbl->unmap(buf);
}
@@ -173,7 +186,33 @@ pb_get_base_buffer( struct pb_buffer *buf,
offset = 0;
return;
}
+ assert(buf->base.refcount > 0);
+ assert(buf->vtbl->get_base_buffer);
buf->vtbl->get_base_buffer(buf, base_buf, offset);
+ assert(*base_buf);
+ assert(*offset < (*base_buf)->base.size);
+}
+
+
+static INLINE enum pipe_error
+pb_validate(struct pb_buffer *buf, struct pb_validate *vl, unsigned flags)
+{
+ assert(buf);
+ if(!buf)
+ return PIPE_ERROR;
+ assert(buf->vtbl->validate);
+ return buf->vtbl->validate(buf, vl, flags);
+}
+
+
+static INLINE void
+pb_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence)
+{
+ assert(buf);
+ if(!buf)
+ return;
+ assert(buf->vtbl->fence);
+ buf->vtbl->fence(buf, fence);
}
@@ -183,6 +222,7 @@ pb_destroy(struct pb_buffer *buf)
assert(buf);
if(!buf)
return;
+ assert(buf->base.refcount == 0);
buf->vtbl->destroy(buf);
}
@@ -193,11 +233,16 @@ static INLINE void
pb_reference(struct pb_buffer **dst,
struct pb_buffer *src)
{
- if (src)
+ if (src) {
+ assert(src->base.refcount);
src->base.refcount++;
+ }
- if (*dst && --(*dst)->base.refcount == 0)
- pb_destroy( *dst );
+ if (*dst) {
+ assert((*dst)->base.refcount);
+ if(--(*dst)->base.refcount == 0)
+ pb_destroy( *dst );
+ }
*dst = src;
}
@@ -210,7 +255,13 @@ pb_reference(struct pb_buffer **dst,
static INLINE boolean
pb_check_alignment(size_t requested, size_t provided)
{
- return requested <= provided && (provided % requested) == 0 ? TRUE : FALSE;
+ if(!requested)
+ return TRUE;
+ if(requested > provided)
+ return FALSE;
+ if(provided % requested != 0)
+ return FALSE;
+ return TRUE;
}
@@ -234,10 +285,6 @@ pb_malloc_buffer_create(size_t size,
const struct pb_desc *desc);
-void
-pb_init_winsys(struct pipe_winsys *winsys);
-
-
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index c3d747898a..272e2205e3 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -29,7 +29,7 @@
* \file
* Implementation of fenced buffers.
*
- * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com>
+ * \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com>
* \author Thomas Hellström <thomas-at-tungstengraphics-dot-com>
*/
@@ -43,8 +43,7 @@
#include "pipe/p_compiler.h"
#include "pipe/p_error.h"
-#include "pipe/p_debug.h"
-#include "pipe/p_winsys.h"
+#include "util/u_debug.h"
#include "pipe/p_thread.h"
#include "util/u_memory.h"
#include "util/u_double_list.h"
@@ -59,19 +58,12 @@
*/
#define SUPER(__derived) (&(__derived)->base)
-#define PIPE_BUFFER_USAGE_CPU_READ_WRITE \
- ( PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE )
-#define PIPE_BUFFER_USAGE_GPU_READ_WRITE \
- ( PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE )
-#define PIPE_BUFFER_USAGE_WRITE \
- ( PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_GPU_WRITE )
-
struct fenced_buffer_list
{
pipe_mutex mutex;
- struct pipe_winsys *winsys;
+ struct pb_fence_ops *ops;
size_t numDelayed;
struct list_head delayed;
@@ -101,6 +93,8 @@ struct fenced_buffer
unsigned flags;
unsigned mapcount;
+ struct pb_validate *vl;
+ unsigned validation_flags;
struct pipe_fence_handle *fence;
struct list_head head;
@@ -112,7 +106,6 @@ static INLINE struct fenced_buffer *
fenced_buffer(struct pb_buffer *buf)
{
assert(buf);
- assert(buf->vtbl == &fenced_buffer_vtbl);
return (struct fenced_buffer *)buf;
}
@@ -164,12 +157,12 @@ static INLINE void
_fenced_buffer_remove(struct fenced_buffer_list *fenced_list,
struct fenced_buffer *fenced_buf)
{
- struct pipe_winsys *winsys = fenced_list->winsys;
+ struct pb_fence_ops *ops = fenced_list->ops;
assert(fenced_buf->fence);
assert(fenced_buf->list == fenced_list);
- winsys->fence_reference(winsys, &fenced_buf->fence, NULL);
+ ops->fence_reference(ops, &fenced_buf->fence, NULL);
fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE;
assert(fenced_buf->head.prev);
@@ -193,7 +186,7 @@ static INLINE enum pipe_error
_fenced_buffer_finish(struct fenced_buffer *fenced_buf)
{
struct fenced_buffer_list *fenced_list = fenced_buf->list;
- struct pipe_winsys *winsys = fenced_list->winsys;
+ struct pb_fence_ops *ops = fenced_list->ops;
#if 0
debug_warning("waiting for GPU");
@@ -201,7 +194,7 @@ _fenced_buffer_finish(struct fenced_buffer *fenced_buf)
assert(fenced_buf->fence);
if(fenced_buf->fence) {
- if(winsys->fence_finish(winsys, fenced_buf->fence, 0) != 0) {
+ if(ops->fence_finish(ops, fenced_buf->fence, 0) != 0) {
return PIPE_ERROR;
}
/* Remove from the fenced list */
@@ -221,7 +214,7 @@ static void
_fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
int wait)
{
- struct pipe_winsys *winsys = fenced_list->winsys;
+ struct pb_fence_ops *ops = fenced_list->ops;
struct list_head *curr, *next;
struct fenced_buffer *fenced_buf;
struct pipe_fence_handle *prev_fence = NULL;
@@ -234,15 +227,15 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
if(fenced_buf->fence != prev_fence) {
int signaled;
if (wait)
- signaled = winsys->fence_finish(winsys, fenced_buf->fence, 0);
+ signaled = ops->fence_finish(ops, fenced_buf->fence, 0);
else
- signaled = winsys->fence_signalled(winsys, fenced_buf->fence, 0);
+ signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
if (signaled != 0)
break;
prev_fence = fenced_buf->fence;
}
else {
- assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0);
+ assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0);
}
_fenced_buffer_remove(fenced_list, fenced_buf);
@@ -262,14 +255,14 @@ fenced_buffer_destroy(struct pb_buffer *buf)
pipe_mutex_lock(fenced_list->mutex);
assert(fenced_buf->base.base.refcount == 0);
if (fenced_buf->fence) {
- struct pipe_winsys *winsys = fenced_list->winsys;
- if(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0) {
+ struct pb_fence_ops *ops = fenced_list->ops;
+ if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) {
struct list_head *curr, *prev;
curr = &fenced_buf->head;
prev = curr->prev;
do {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
- assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0);
+ assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0);
_fenced_buffer_remove(fenced_list, fenced_buf);
curr = prev;
prev = curr->prev;
@@ -293,6 +286,7 @@ fenced_buffer_map(struct pb_buffer *buf,
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
void *map;
+ assert(flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE);
assert(!(flags & ~PIPE_BUFFER_USAGE_CPU_READ_WRITE));
flags &= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
@@ -334,6 +328,93 @@ fenced_buffer_unmap(struct pb_buffer *buf)
}
+static enum pipe_error
+fenced_buffer_validate(struct pb_buffer *buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ struct fenced_buffer *fenced_buf = fenced_buffer(buf);
+ enum pipe_error ret;
+
+ if(!vl) {
+ /* invalidate */
+ fenced_buf->vl = NULL;
+ fenced_buf->validation_flags = 0;
+ return PIPE_OK;
+ }
+
+ assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE);
+ assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE));
+ flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE;
+
+ /* Buffer cannot be validated in two different lists */
+ if(fenced_buf->vl && fenced_buf->vl != vl)
+ return PIPE_ERROR_RETRY;
+
+ /* Do not validate if buffer is still mapped */
+ if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) {
+ /* TODO: wait for the thread that mapped the buffer to unmap it */
+ return PIPE_ERROR_RETRY;
+ }
+
+ if(fenced_buf->vl == vl &&
+ (fenced_buf->validation_flags & flags) == flags) {
+ /* Nothing to do -- buffer already validated */
+ return PIPE_OK;
+ }
+
+ /* Final sanity checking */
+ assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE));
+ assert(!fenced_buf->mapcount);
+
+ ret = pb_validate(fenced_buf->buffer, vl, flags);
+ if (ret != PIPE_OK)
+ return ret;
+
+ fenced_buf->vl = vl;
+ fenced_buf->validation_flags |= flags;
+
+ return PIPE_OK;
+}
+
+
+static void
+fenced_buffer_fence(struct pb_buffer *buf,
+ struct pipe_fence_handle *fence)
+{
+ struct fenced_buffer *fenced_buf;
+ struct fenced_buffer_list *fenced_list;
+ struct pb_fence_ops *ops;
+
+ fenced_buf = fenced_buffer(buf);
+ fenced_list = fenced_buf->list;
+ ops = fenced_list->ops;
+
+ if(fence == fenced_buf->fence) {
+ /* Nothing to do */
+ return;
+ }
+
+ assert(fenced_buf->vl);
+ assert(fenced_buf->validation_flags);
+
+ pipe_mutex_lock(fenced_list->mutex);
+ if (fenced_buf->fence)
+ _fenced_buffer_remove(fenced_list, fenced_buf);
+ if (fence) {
+ ops->fence_reference(ops, &fenced_buf->fence, fence);
+ fenced_buf->flags |= fenced_buf->validation_flags;
+ _fenced_buffer_add(fenced_buf);
+ }
+ pipe_mutex_unlock(fenced_list->mutex);
+
+ pb_fence(fenced_buf->buffer, fence);
+
+ fenced_buf->vl = NULL;
+ fenced_buf->validation_flags = 0;
+}
+
+
static void
fenced_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
@@ -344,11 +425,13 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf,
}
-const struct pb_vtbl
+static const struct pb_vtbl
fenced_buffer_vtbl = {
fenced_buffer_destroy,
fenced_buffer_map,
fenced_buffer_unmap,
+ fenced_buffer_validate,
+ fenced_buffer_fence,
fenced_buffer_get_base_buffer
};
@@ -388,54 +471,8 @@ fenced_buffer_create(struct fenced_buffer_list *fenced_list,
}
-void
-buffer_fence(struct pb_buffer *buf,
- struct pipe_fence_handle *fence)
-{
- struct fenced_buffer *fenced_buf;
- struct fenced_buffer_list *fenced_list;
- struct pipe_winsys *winsys;
- /* FIXME: receive this as a parameter */
- unsigned flags = fence ? PIPE_BUFFER_USAGE_GPU_READ_WRITE : 0;
-
- /* This is a public function, so be extra cautious with the buffer passed,
- * as happens frequently to receive null buffers, or pointer to buffers
- * other than fenced buffers. */
- assert(buf);
- if(!buf)
- return;
- assert(buf->vtbl == &fenced_buffer_vtbl);
- if(buf->vtbl != &fenced_buffer_vtbl)
- return;
-
- fenced_buf = fenced_buffer(buf);
- fenced_list = fenced_buf->list;
- winsys = fenced_list->winsys;
-
- if(!fence || fence == fenced_buf->fence) {
- /* Handle the same fence case specially, not only because it is a fast
- * path, but mostly to avoid serializing two writes with the same fence,
- * as that would bring the hardware down to synchronous operation without
- * any benefit.
- */
- fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE;
- return;
- }
-
- pipe_mutex_lock(fenced_list->mutex);
- if (fenced_buf->fence)
- _fenced_buffer_remove(fenced_list, fenced_buf);
- if (fence) {
- winsys->fence_reference(winsys, &fenced_buf->fence, fence);
- fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE;
- _fenced_buffer_add(fenced_buf);
- }
- pipe_mutex_unlock(fenced_list->mutex);
-}
-
-
struct fenced_buffer_list *
-fenced_buffer_list_create(struct pipe_winsys *winsys)
+fenced_buffer_list_create(struct pb_fence_ops *ops)
{
struct fenced_buffer_list *fenced_list;
@@ -443,7 +480,7 @@ fenced_buffer_list_create(struct pipe_winsys *winsys)
if (!fenced_list)
return NULL;
- fenced_list->winsys = winsys;
+ fenced_list->ops = ops;
LIST_INITHEAD(&fenced_list->delayed);
fenced_list->numDelayed = 0;
@@ -473,7 +510,7 @@ fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
void
fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list)
{
- struct pipe_winsys *winsys = fenced_list->winsys;
+ struct pb_fence_ops *ops = fenced_list->ops;
struct list_head *curr, *next;
struct fenced_buffer *fenced_buf;
struct pipe_fence_handle *prev_fence = NULL;
@@ -500,7 +537,7 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list)
while(curr != &fenced_list->delayed) {
int signaled;
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
- signaled = winsys->fence_signalled(winsys, fenced_buf->fence, 0);
+ signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
debug_printf("%10p %7u %10p %s\n",
fenced_buf,
fenced_buf->base.base.refcount,
@@ -536,6 +573,8 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list)
pipe_mutex_unlock(fenced_list->mutex);
+ fenced_list->ops->destroy(fenced_list->ops);
+
FREE(fenced_list);
}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h
index 510f456508..034ca1e024 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h
@@ -44,14 +44,14 @@
* Between the handle's destruction, and the fence signalling, the buffer is
* stored in a fenced buffer list.
*
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
#ifndef PB_BUFFER_FENCED_H_
#define PB_BUFFER_FENCED_H_
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#ifdef __cplusplus
@@ -59,7 +59,6 @@ extern "C" {
#endif
-struct pipe_winsys;
struct pipe_buffer;
struct pipe_fence_handle;
@@ -70,12 +69,33 @@ struct pipe_fence_handle;
struct fenced_buffer_list;
-/**
- * The fenced buffer's virtual function table.
- *
- * NOTE: Made public for debugging purposes.
- */
-extern const struct pb_vtbl fenced_buffer_vtbl;
+struct pb_fence_ops
+{
+ void (*destroy)( struct pb_fence_ops *ops );
+
+ /** Set ptr = fence, with reference counting */
+ void (*fence_reference)( struct pb_fence_ops *ops,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence );
+
+ /**
+ * Checks whether the fence has been signalled.
+ * \param flags driver-specific meaning
+ * \return zero on success.
+ */
+ int (*fence_signalled)( struct pb_fence_ops *ops,
+ struct pipe_fence_handle *fence,
+ unsigned flag );
+
+ /**
+ * Wait for the fence to finish.
+ * \param flags driver-specific meaning
+ * \return zero on success.
+ */
+ int (*fence_finish)( struct pb_fence_ops *ops,
+ struct pipe_fence_handle *fence,
+ unsigned flag );
+};
/**
@@ -84,7 +104,7 @@ extern const struct pb_vtbl fenced_buffer_vtbl;
* See also fenced_bufmgr_create for a more convenient way to use this.
*/
struct fenced_buffer_list *
-fenced_buffer_list_create(struct pipe_winsys *winsys);
+fenced_buffer_list_create(struct pb_fence_ops *ops);
/**
@@ -115,17 +135,6 @@ fenced_buffer_create(struct fenced_buffer_list *fenced,
struct pb_buffer *buffer);
-/**
- * Set a buffer's fence.
- *
- * NOTE: Although it takes a generic pb_buffer argument, it will fail
- * on everything but buffers returned by fenced_buffer_create.
- */
-void
-buffer_fence(struct pb_buffer *buf,
- struct pipe_fence_handle *fence);
-
-
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
index 1bf22a2ec0..282802b171 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
@@ -34,7 +34,7 @@
*/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_memory.h"
#include "pb_buffer.h"
#include "pb_bufmgr.h"
@@ -81,6 +81,24 @@ malloc_buffer_unmap(struct pb_buffer *buf)
}
+static enum pipe_error
+malloc_buffer_validate(struct pb_buffer *buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ assert(0);
+ return PIPE_ERROR;
+}
+
+
+static void
+malloc_buffer_fence(struct pb_buffer *buf,
+ struct pipe_fence_handle *fence)
+{
+ assert(0);
+}
+
+
static void
malloc_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
@@ -96,6 +114,8 @@ malloc_buffer_vtbl = {
malloc_buffer_destroy,
malloc_buffer_map,
malloc_buffer_unmap,
+ malloc_buffer_validate,
+ malloc_buffer_fence,
malloc_buffer_get_base_buffer
};
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
index cafbee045a..fec8db91c7 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -43,7 +43,7 @@
* - the fenced buffer manager, which will delay buffer destruction until the
* the moment the card finishing processing it.
*
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
#ifndef PB_BUFMGR_H_
@@ -61,7 +61,6 @@ extern "C" {
struct pb_desc;
struct pipe_buffer;
-struct pipe_winsys;
/**
@@ -163,6 +162,8 @@ pb_cache_manager_create(struct pb_manager *provider,
unsigned usecs);
+struct pb_fence_ops;
+
/**
* Fenced buffer manager.
*
@@ -174,7 +175,7 @@ pb_cache_manager_create(struct pb_manager *provider,
*/
struct pb_manager *
fenced_bufmgr_create(struct pb_manager *provider,
- struct pipe_winsys *winsys);
+ struct pb_fence_ops *ops);
struct pb_manager *
@@ -183,6 +184,20 @@ pb_alt_manager_create(struct pb_manager *provider1,
/**
+ * Ondemand buffer manager.
+ *
+ * Buffers are created in malloc'ed memory (fast and cached), and the constents
+ * is transfered to a buffer from the provider (typically in slow uncached
+ * memory) when there is an attempt to validate the buffer.
+ *
+ * Ideal for situations where one does not know before hand whether a given
+ * buffer will effectively be used by the hardware or not.
+ */
+struct pb_manager *
+pb_ondemand_manager_create(struct pb_manager *provider);
+
+
+/**
* Debug buffer manager to detect buffer under- and overflows.
*
* Band size should be a multiple of the largest alignment
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
index c956924cc7..db67d46c56 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
@@ -34,7 +34,7 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_memory.h"
#include "pb_buffer.h"
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index 8f118874ec..29117efe9b 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -29,14 +29,13 @@
* \file
* Buffer cache.
*
- * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com>
+ * \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com>
* \author Thomas Hellström <thomas-at-tungstengraphics-dot-com>
*/
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
-#include "pipe/p_winsys.h"
+#include "util/u_debug.h"
#include "pipe/p_thread.h"
#include "util/u_memory.h"
#include "util/u_double_list.h"
@@ -183,6 +182,25 @@ pb_cache_buffer_unmap(struct pb_buffer *_buf)
}
+static enum pipe_error
+pb_cache_buffer_validate(struct pb_buffer *_buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
+ return pb_validate(buf->buffer, vl, flags);
+}
+
+
+static void
+pb_cache_buffer_fence(struct pb_buffer *_buf,
+ struct pipe_fence_handle *fence)
+{
+ struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
+ pb_fence(buf->buffer, fence);
+}
+
+
static void
pb_cache_buffer_get_base_buffer(struct pb_buffer *_buf,
struct pb_buffer **base_buf,
@@ -198,6 +216,8 @@ pb_cache_buffer_vtbl = {
pb_cache_buffer_destroy,
pb_cache_buffer_map,
pb_cache_buffer_unmap,
+ pb_cache_buffer_validate,
+ pb_cache_buffer_fence,
pb_cache_buffer_get_base_buffer
};
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
index 1675e6e182..070bf3f517 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
@@ -29,13 +29,12 @@
* \file
* Debug buffer manager to detect buffer under- and overflows.
*
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
-#include "pipe/p_winsys.h"
+#include "util/u_debug.h"
#include "pipe/p_thread.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -255,11 +254,35 @@ pb_debug_buffer_get_base_buffer(struct pb_buffer *_buf,
}
+static enum pipe_error
+pb_debug_buffer_validate(struct pb_buffer *_buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ struct pb_debug_buffer *buf = pb_debug_buffer(_buf);
+
+ pb_debug_buffer_check(buf);
+
+ return pb_validate(buf->buffer, vl, flags);
+}
+
+
+static void
+pb_debug_buffer_fence(struct pb_buffer *_buf,
+ struct pipe_fence_handle *fence)
+{
+ struct pb_debug_buffer *buf = pb_debug_buffer(_buf);
+ pb_fence(buf->buffer, fence);
+}
+
+
const struct pb_vtbl
pb_debug_buffer_vtbl = {
pb_debug_buffer_destroy,
pb_debug_buffer_map,
pb_debug_buffer_unmap,
+ pb_debug_buffer_validate,
+ pb_debug_buffer_fence,
pb_debug_buffer_get_base_buffer
};
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
index 8d67efab6c..144db5669b 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
@@ -30,11 +30,11 @@
* \file
* A buffer manager that wraps buffers in fenced buffers.
*
- * \author José Fonseca <jrfonseca@tungstengraphics.dot.com>
+ * \author Jose Fonseca <jrfonseca@tungstengraphics.dot.com>
*/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_memory.h"
#include "pb_buffer.h"
@@ -90,8 +90,7 @@ fenced_bufmgr_create_buffer(struct pb_manager *mgr,
fenced_buf = fenced_buffer_create(fenced_mgr->fenced_list, buf);
if(!fenced_buf) {
- assert(buf->base.refcount == 1);
- pb_destroy(buf);
+ pb_reference(&buf, NULL);
}
return fenced_buf;
@@ -127,7 +126,7 @@ fenced_bufmgr_destroy(struct pb_manager *mgr)
struct pb_manager *
fenced_bufmgr_create(struct pb_manager *provider,
- struct pipe_winsys *winsys)
+ struct pb_fence_ops *ops)
{
struct fenced_pb_manager *fenced_mgr;
@@ -143,7 +142,7 @@ fenced_bufmgr_create(struct pb_manager *provider,
fenced_mgr->base.flush = fenced_bufmgr_flush;
fenced_mgr->provider = provider;
- fenced_mgr->fenced_list = fenced_buffer_list_create(winsys);
+ fenced_mgr->fenced_list = fenced_buffer_list_create(ops);
if(!fenced_mgr->fenced_list) {
FREE(fenced_mgr);
return NULL;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
index 37984e7b7b..85ff3a09de 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
@@ -29,12 +29,12 @@
* \file
* Buffer manager using the old texture memory manager.
*
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
#include "pipe/p_defines.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_thread.h"
#include "util/u_memory.h"
#include "util/u_double_list.h"
@@ -100,7 +100,7 @@ mm_buffer_destroy(struct pb_buffer *buf)
assert(buf->base.refcount == 0);
pipe_mutex_lock(mm->mutex);
- mmFreeMem(mm_buf->block);
+ u_mmFreeMem(mm_buf->block);
FREE(buf);
pipe_mutex_unlock(mm->mutex);
}
@@ -124,6 +124,27 @@ mm_buffer_unmap(struct pb_buffer *buf)
}
+static enum pipe_error
+mm_buffer_validate(struct pb_buffer *buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ struct mm_buffer *mm_buf = mm_buffer(buf);
+ struct mm_pb_manager *mm = mm_buf->mgr;
+ return pb_validate(mm->buffer, vl, flags);
+}
+
+
+static void
+mm_buffer_fence(struct pb_buffer *buf,
+ struct pipe_fence_handle *fence)
+{
+ struct mm_buffer *mm_buf = mm_buffer(buf);
+ struct mm_pb_manager *mm = mm_buf->mgr;
+ pb_fence(mm->buffer, fence);
+}
+
+
static void
mm_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
@@ -141,6 +162,8 @@ mm_buffer_vtbl = {
mm_buffer_destroy,
mm_buffer_map,
mm_buffer_unmap,
+ mm_buffer_validate,
+ mm_buffer_fence,
mm_buffer_get_base_buffer
};
@@ -154,8 +177,8 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr,
struct mm_buffer *mm_buf;
/* We don't handle alignments larger then the one initially setup */
- assert(desc->alignment % (1 << mm->align2) == 0);
- if(desc->alignment % (1 << mm->align2))
+ assert(pb_check_alignment(desc->alignment, 1 << mm->align2));
+ if(!pb_check_alignment(desc->alignment, 1 << mm->align2))
return NULL;
pipe_mutex_lock(mm->mutex);
@@ -175,7 +198,7 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr,
mm_buf->mgr = mm;
- mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0);
+ mm_buf->block = u_mmAllocMem(mm->heap, size, mm->align2, 0);
if(!mm_buf->block) {
debug_printf("warning: heap full\n");
#if 0
@@ -209,7 +232,7 @@ mm_bufmgr_destroy(struct pb_manager *mgr)
pipe_mutex_lock(mm->mutex);
- mmDestroy(mm->heap);
+ u_mmDestroy(mm->heap);
pb_unmap(mm->buffer);
pb_reference(&mm->buffer, NULL);
@@ -250,7 +273,7 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
if(!mm->map)
goto failure;
- mm->heap = mmInit(0, size);
+ mm->heap = u_mmInit(0, size);
if (!mm->heap)
goto failure;
@@ -258,7 +281,7 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
failure:
if(mm->heap)
- mmDestroy(mm->heap);
+ u_mmDestroy(mm->heap);
if(mm->map)
pb_unmap(mm->buffer);
if(mm)
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
new file mode 100644
index 0000000000..3d9c7bba0b
--- /dev/null
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
@@ -0,0 +1,303 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * A variation of malloc buffers which get transferred to real graphics memory
+ * when there is an attempt to validate them.
+ *
+ * @author Jose Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "pb_buffer.h"
+#include "pb_bufmgr.h"
+
+
+struct pb_ondemand_manager;
+
+
+struct pb_ondemand_buffer
+{
+ struct pb_buffer base;
+
+ struct pb_ondemand_manager *mgr;
+
+ /** Regular malloc'ed memory */
+ void *data;
+ unsigned mapcount;
+
+ /** Real buffer */
+ struct pb_buffer *buffer;
+ size_t size;
+ struct pb_desc desc;
+};
+
+
+struct pb_ondemand_manager
+{
+ struct pb_manager base;
+
+ struct pb_manager *provider;
+};
+
+
+extern const struct pb_vtbl pb_ondemand_buffer_vtbl;
+
+static INLINE struct pb_ondemand_buffer *
+pb_ondemand_buffer(struct pb_buffer *buf)
+{
+ assert(buf);
+ assert(buf->vtbl == &pb_ondemand_buffer_vtbl);
+ return (struct pb_ondemand_buffer *)buf;
+}
+
+static INLINE struct pb_ondemand_manager *
+pb_ondemand_manager(struct pb_manager *mgr)
+{
+ assert(mgr);
+ return (struct pb_ondemand_manager *)mgr;
+}
+
+
+static void
+pb_ondemand_buffer_destroy(struct pb_buffer *_buf)
+{
+ struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
+
+ pb_reference(&buf->buffer, NULL);
+
+ align_free(buf->data);
+
+ FREE(buf);
+}
+
+
+static void *
+pb_ondemand_buffer_map(struct pb_buffer *_buf,
+ unsigned flags)
+{
+ struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
+
+ if(buf->buffer) {
+ assert(!buf->data);
+ return pb_map(buf->buffer, flags);
+ }
+ else {
+ assert(buf->data);
+ ++buf->mapcount;
+ return buf->data;
+ }
+}
+
+
+static void
+pb_ondemand_buffer_unmap(struct pb_buffer *_buf)
+{
+ struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
+
+ if(buf->buffer) {
+ assert(!buf->data);
+ pb_unmap(buf->buffer);
+ }
+ else {
+ assert(buf->data);
+ assert(buf->mapcount);
+ if(buf->mapcount)
+ --buf->mapcount;
+ }
+}
+
+
+static enum pipe_error
+pb_ondemand_buffer_instantiate(struct pb_ondemand_buffer *buf)
+{
+ if(!buf->buffer) {
+ struct pb_manager *provider = buf->mgr->provider;
+ uint8_t *map;
+
+ assert(!buf->mapcount);
+
+ buf->buffer = provider->create_buffer(provider, buf->size, &buf->desc);
+ if(!buf->buffer)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_READ);
+ if(!map) {
+ pb_reference(&buf->buffer, NULL);
+ return PIPE_ERROR;
+ }
+
+ memcpy(map, buf->data, buf->size);
+
+ pb_unmap(buf->buffer);
+
+ if(!buf->mapcount) {
+ FREE(buf->data);
+ buf->data = NULL;
+ }
+ }
+
+ return PIPE_OK;
+}
+
+static enum pipe_error
+pb_ondemand_buffer_validate(struct pb_buffer *_buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
+ enum pipe_error ret;
+
+ assert(!buf->mapcount);
+ if(buf->mapcount)
+ return PIPE_ERROR;
+
+ ret = pb_ondemand_buffer_instantiate(buf);
+ if(ret != PIPE_OK)
+ return ret;
+
+ return pb_validate(buf->buffer, vl, flags);
+}
+
+
+static void
+pb_ondemand_buffer_fence(struct pb_buffer *_buf,
+ struct pipe_fence_handle *fence)
+{
+ struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
+
+ assert(buf->buffer);
+ if(!buf->buffer)
+ return;
+
+ pb_fence(buf->buffer, fence);
+}
+
+
+static void
+pb_ondemand_buffer_get_base_buffer(struct pb_buffer *_buf,
+ struct pb_buffer **base_buf,
+ unsigned *offset)
+{
+ struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
+
+ if(pb_ondemand_buffer_instantiate(buf) != PIPE_OK) {
+ assert(0);
+ *base_buf = &buf->base;
+ *offset = 0;
+ return;
+ }
+
+ pb_get_base_buffer(buf->buffer, base_buf, offset);
+}
+
+
+const struct pb_vtbl
+pb_ondemand_buffer_vtbl = {
+ pb_ondemand_buffer_destroy,
+ pb_ondemand_buffer_map,
+ pb_ondemand_buffer_unmap,
+ pb_ondemand_buffer_validate,
+ pb_ondemand_buffer_fence,
+ pb_ondemand_buffer_get_base_buffer
+};
+
+
+static struct pb_buffer *
+pb_ondemand_manager_create_buffer(struct pb_manager *_mgr,
+ size_t size,
+ const struct pb_desc *desc)
+{
+ struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr);
+ struct pb_ondemand_buffer *buf;
+
+ buf = CALLOC_STRUCT(pb_ondemand_buffer);
+ if(!buf)
+ return NULL;
+
+ buf->base.base.refcount = 1;
+ buf->base.base.alignment = desc->alignment;
+ buf->base.base.usage = desc->usage;
+ buf->base.base.size = size;
+ buf->base.vtbl = &pb_ondemand_buffer_vtbl;
+
+ buf->mgr = mgr;
+
+ buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment);
+ if(!buf->data) {
+ FREE(buf);
+ return NULL;
+ }
+
+ buf->size = size;
+ buf->desc = *desc;
+
+ return &buf->base;
+}
+
+
+static void
+pb_ondemand_manager_flush(struct pb_manager *_mgr)
+{
+ struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr);
+
+ mgr->provider->flush(mgr->provider);
+}
+
+
+static void
+pb_ondemand_manager_destroy(struct pb_manager *_mgr)
+{
+ struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr);
+
+ FREE(mgr);
+}
+
+
+struct pb_manager *
+pb_ondemand_manager_create(struct pb_manager *provider)
+{
+ struct pb_ondemand_manager *mgr;
+
+ if(!provider)
+ return NULL;
+
+ mgr = CALLOC_STRUCT(pb_ondemand_manager);
+ if(!mgr)
+ return NULL;
+
+ mgr->base.destroy = pb_ondemand_manager_destroy;
+ mgr->base.create_buffer = pb_ondemand_manager_create_buffer;
+ mgr->base.flush = pb_ondemand_manager_flush;
+
+ mgr->provider = provider;
+
+ return &mgr->base;
+}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
index 61ac291ed7..12447acfd9 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
@@ -30,13 +30,13 @@
* \file
* Batch buffer pool management.
*
- * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com>
+ * \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com>
* \author Thomas Hellström <thomas-at-tungstengraphics-dot-com>
*/
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_thread.h"
#include "pipe/p_defines.h"
#include "util/u_memory.h"
@@ -138,6 +138,27 @@ pool_buffer_unmap(struct pb_buffer *buf)
}
+static enum pipe_error
+pool_buffer_validate(struct pb_buffer *buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ struct pool_buffer *pool_buf = pool_buffer(buf);
+ struct pool_pb_manager *pool = pool_buf->mgr;
+ return pb_validate(pool->buffer, vl, flags);
+}
+
+
+static void
+pool_buffer_fence(struct pb_buffer *buf,
+ struct pipe_fence_handle *fence)
+{
+ struct pool_buffer *pool_buf = pool_buffer(buf);
+ struct pool_pb_manager *pool = pool_buf->mgr;
+ pb_fence(pool->buffer, fence);
+}
+
+
static void
pool_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
@@ -155,6 +176,8 @@ pool_buffer_vtbl = {
pool_buffer_destroy,
pool_buffer_map,
pool_buffer_unmap,
+ pool_buffer_validate,
+ pool_buffer_fence,
pool_buffer_get_base_buffer
};
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
index 2a80154920..a3259351b9 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
@@ -38,7 +38,7 @@
#include "pipe/p_compiler.h"
#include "pipe/p_error.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_thread.h"
#include "pipe/p_defines.h"
#include "util/u_memory.h"
@@ -248,6 +248,25 @@ pb_slab_buffer_unmap(struct pb_buffer *_buf)
}
+static enum pipe_error
+pb_slab_buffer_validate(struct pb_buffer *_buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
+ return pb_validate(buf->slab->bo, vl, flags);
+}
+
+
+static void
+pb_slab_buffer_fence(struct pb_buffer *_buf,
+ struct pipe_fence_handle *fence)
+{
+ struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
+ pb_fence(buf->slab->bo, fence);
+}
+
+
static void
pb_slab_buffer_get_base_buffer(struct pb_buffer *_buf,
struct pb_buffer **base_buf,
@@ -264,6 +283,8 @@ pb_slab_buffer_vtbl = {
pb_slab_buffer_destroy,
pb_slab_buffer_map,
pb_slab_buffer_unmap,
+ pb_slab_buffer_validate,
+ pb_slab_buffer_fence,
pb_slab_buffer_get_base_buffer
};
diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c
index 1e54fc39d4..150fd50618 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_validate.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c
@@ -36,7 +36,7 @@
#include "pipe/p_compiler.h"
#include "pipe/p_error.h"
#include "util/u_memory.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pb_buffer.h"
#include "pb_buffer_fenced.h"
@@ -46,9 +46,16 @@
#define PB_VALIDATE_INITIAL_SIZE 1 /* 512 */
+struct pb_validate_entry
+{
+ struct pb_buffer *buf;
+ unsigned flags;
+};
+
+
struct pb_validate
{
- struct pb_buffer **buffers;
+ struct pb_validate_entry *entries;
unsigned used;
unsigned size;
};
@@ -56,43 +63,50 @@ struct pb_validate
enum pipe_error
pb_validate_add_buffer(struct pb_validate *vl,
- struct pb_buffer *buf)
+ struct pb_buffer *buf,
+ unsigned flags)
{
assert(buf);
if(!buf)
return PIPE_ERROR;
+ assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE);
+ assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE));
+ flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE;
+
/* We only need to store one reference for each buffer, so avoid storing
- * consecutive references for the same buffer. It might not be the more
- * common pasttern, but it is easy to implement.
+ * consecutive references for the same buffer. It might not be the most
+ * common pattern, but it is easy to implement.
*/
- if(vl->used && vl->buffers[vl->used - 1] == buf) {
+ if(vl->used && vl->entries[vl->used - 1].buf == buf) {
+ vl->entries[vl->used - 1].flags |= flags;
return PIPE_OK;
}
/* Grow the table */
if(vl->used == vl->size) {
unsigned new_size;
- struct pb_buffer **new_buffers;
+ struct pb_validate_entry *new_entries;
new_size = vl->size * 2;
if(!new_size)
return PIPE_ERROR_OUT_OF_MEMORY;
- new_buffers = (struct pb_buffer **)REALLOC(vl->buffers,
- vl->size*sizeof(struct pb_buffer *),
- new_size*sizeof(struct pb_buffer *));
- if(!new_buffers)
+ new_entries = (struct pb_validate_entry *)REALLOC(vl->entries,
+ vl->size*sizeof(struct pb_validate_entry),
+ new_size*sizeof(struct pb_validate_entry));
+ if(!new_entries)
return PIPE_ERROR_OUT_OF_MEMORY;
- memset(new_buffers + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_buffer *));
+ memset(new_entries + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_validate_entry));
vl->size = new_size;
- vl->buffers = new_buffers;
+ vl->entries = new_entries;
}
- assert(!vl->buffers[vl->used]);
- pb_reference(&vl->buffers[vl->used], buf);
+ assert(!vl->entries[vl->used].buf);
+ pb_reference(&vl->entries[vl->used].buf, buf);
+ vl->entries[vl->used].flags = flags;
++vl->used;
return PIPE_OK;
@@ -100,10 +114,36 @@ pb_validate_add_buffer(struct pb_validate *vl,
enum pipe_error
+pb_validate_foreach(struct pb_validate *vl,
+ enum pipe_error (*callback)(struct pb_buffer *buf, void *data),
+ void *data)
+{
+ unsigned i;
+ for(i = 0; i < vl->used; ++i) {
+ enum pipe_error ret;
+ ret = callback(vl->entries[i].buf, data);
+ if(ret != PIPE_OK)
+ return ret;
+ }
+ return PIPE_OK;
+}
+
+
+enum pipe_error
pb_validate_validate(struct pb_validate *vl)
{
- /* FIXME: go through each buffer, ensure its not mapped, its address is
- * available -- requires a new pb_buffer interface */
+ unsigned i;
+
+ for(i = 0; i < vl->used; ++i) {
+ enum pipe_error ret;
+ ret = pb_validate(vl->entries[i].buf, vl, vl->entries[i].flags);
+ if(ret != PIPE_OK) {
+ while(i--)
+ pb_validate(vl->entries[i].buf, NULL, 0);
+ return ret;
+ }
+ }
+
return PIPE_OK;
}
@@ -114,8 +154,8 @@ pb_validate_fence(struct pb_validate *vl,
{
unsigned i;
for(i = 0; i < vl->used; ++i) {
- buffer_fence(vl->buffers[i], fence);
- pb_reference(&vl->buffers[i], NULL);
+ pb_fence(vl->entries[i].buf, fence);
+ pb_reference(&vl->entries[i].buf, NULL);
}
vl->used = 0;
}
@@ -126,8 +166,8 @@ pb_validate_destroy(struct pb_validate *vl)
{
unsigned i;
for(i = 0; i < vl->used; ++i)
- pb_reference(&vl->buffers[i], NULL);
- FREE(vl->buffers);
+ pb_reference(&vl->entries[i].buf, NULL);
+ FREE(vl->entries);
FREE(vl);
}
@@ -142,8 +182,8 @@ pb_validate_create()
return NULL;
vl->size = PB_VALIDATE_INITIAL_SIZE;
- vl->buffers = (struct pb_buffer **)CALLOC(vl->size, sizeof(struct pb_buffer *));
- if(!vl->buffers) {
+ vl->entries = (struct pb_validate_entry *)CALLOC(vl->size, sizeof(struct pb_validate_entry));
+ if(!vl->entries) {
FREE(vl);
return NULL;
}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.h b/src/gallium/auxiliary/pipebuffer/pb_validate.h
index 3db1d5330b..dfb84df1ce 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_validate.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_validate.h
@@ -58,7 +58,13 @@ struct pb_validate;
enum pipe_error
pb_validate_add_buffer(struct pb_validate *vl,
- struct pb_buffer *buf);
+ struct pb_buffer *buf,
+ unsigned flags);
+
+enum pipe_error
+pb_validate_foreach(struct pb_validate *vl,
+ enum pipe_error (*callback)(struct pb_buffer *buf, void *data),
+ void *data);
/**
* Validate all buffers for hardware access.
@@ -71,7 +77,7 @@ pb_validate_validate(struct pb_validate *vl);
/**
* Fence all buffers and clear the list.
*
- * Should be called right before issuing commands to the hardware.
+ * Should be called right after issuing commands to the hardware.
*/
void
pb_validate_fence(struct pb_validate *vl,
diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.c b/src/gallium/auxiliary/pipebuffer/pb_winsys.c
deleted file mode 100644
index 28d137dbc4..0000000000
--- a/src/gallium/auxiliary/pipebuffer/pb_winsys.c
+++ /dev/null
@@ -1,170 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \file
- * Implementation of client buffer (also designated as "user buffers"), which
- * are just state-tracker owned data masqueraded as buffers.
- *
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
- */
-
-
-#include "pipe/p_winsys.h"
-#include "util/u_memory.h"
-
-#include "pb_buffer.h"
-
-
-/**
- * User buffers are special buffers that initially reference memory
- * held by the user but which may if necessary copy that memory into
- * device memory behind the scenes, for submission to hardware.
- *
- * These are particularly useful when the referenced data is never
- * submitted to hardware at all, in the particular case of software
- * vertex processing.
- */
-struct pb_user_buffer
-{
- struct pb_buffer base;
- void *data;
-};
-
-
-extern const struct pb_vtbl pb_user_buffer_vtbl;
-
-
-static INLINE struct pb_user_buffer *
-pb_user_buffer(struct pb_buffer *buf)
-{
- assert(buf);
- assert(buf->vtbl == &pb_user_buffer_vtbl);
- return (struct pb_user_buffer *)buf;
-}
-
-
-static void
-pb_user_buffer_destroy(struct pb_buffer *buf)
-{
- assert(buf);
- FREE(buf);
-}
-
-
-static void *
-pb_user_buffer_map(struct pb_buffer *buf,
- unsigned flags)
-{
- return pb_user_buffer(buf)->data;
-}
-
-
-static void
-pb_user_buffer_unmap(struct pb_buffer *buf)
-{
- /* No-op */
-}
-
-
-static void
-pb_user_buffer_get_base_buffer(struct pb_buffer *buf,
- struct pb_buffer **base_buf,
- unsigned *offset)
-{
- *base_buf = buf;
- *offset = 0;
-}
-
-
-const struct pb_vtbl
-pb_user_buffer_vtbl = {
- pb_user_buffer_destroy,
- pb_user_buffer_map,
- pb_user_buffer_unmap,
- pb_user_buffer_get_base_buffer
-};
-
-
-static struct pipe_buffer *
-pb_winsys_user_buffer_create(struct pipe_winsys *winsys,
- void *data,
- unsigned bytes)
-{
- struct pb_user_buffer *buf = CALLOC_STRUCT(pb_user_buffer);
-
- if(!buf)
- return NULL;
-
- buf->base.base.refcount = 1;
- buf->base.base.size = bytes;
- buf->base.base.alignment = 0;
- buf->base.base.usage = 0;
-
- buf->base.vtbl = &pb_user_buffer_vtbl;
- buf->data = data;
-
- return &buf->base.base;
-}
-
-
-static void *
-pb_winsys_buffer_map(struct pipe_winsys *winsys,
- struct pipe_buffer *buf,
- unsigned flags)
-{
- (void)winsys;
- return pb_map(pb_buffer(buf), flags);
-}
-
-
-static void
-pb_winsys_buffer_unmap(struct pipe_winsys *winsys,
- struct pipe_buffer *buf)
-{
- (void)winsys;
- pb_unmap(pb_buffer(buf));
-}
-
-
-static void
-pb_winsys_buffer_destroy(struct pipe_winsys *winsys,
- struct pipe_buffer *buf)
-{
- (void)winsys;
- pb_destroy(pb_buffer(buf));
-}
-
-
-void
-pb_init_winsys(struct pipe_winsys *winsys)
-{
- winsys->user_buffer_create = pb_winsys_user_buffer_create;
- winsys->buffer_map = pb_winsys_buffer_map;
- winsys->buffer_unmap = pb_winsys_buffer_unmap;
- winsys->buffer_destroy = pb_winsys_buffer_destroy;
-}
diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile
index 39b8a4dbd7..ab8ea464c6 100644
--- a/src/gallium/auxiliary/rtasm/Makefile
+++ b/src/gallium/auxiliary/rtasm/Makefile
@@ -7,9 +7,7 @@ C_SOURCES = \
rtasm_cpu.c \
rtasm_execmem.c \
rtasm_x86sse.c \
+ rtasm_ppc.c \
rtasm_ppc_spe.c
include ../../Makefile.template
-
-symlinks:
-
diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript
index 8ea25922aa..eb48368acc 100644
--- a/src/gallium/auxiliary/rtasm/SConscript
+++ b/src/gallium/auxiliary/rtasm/SConscript
@@ -6,6 +6,7 @@ rtasm = env.ConvenienceLibrary(
'rtasm_cpu.c',
'rtasm_execmem.c',
'rtasm_x86sse.c',
+ 'rtasm_ppc.c',
'rtasm_ppc_spe.c',
])
diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
index 5499018b21..03bdd47238 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
@@ -26,7 +26,7 @@
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "rtasm_cpu.h"
diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
index f16191cb61..5acc5bcb7b 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
@@ -31,19 +31,20 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_thread.h"
#include "util/u_memory.h"
#include "rtasm_execmem.h"
-#if defined(__linux__)
+#if defined(PIPE_OS_LINUX)
+
/*
* Allocate a large block of memory which can hold code then dole it out
* in pieces by means of the generic memory manager code.
-*/
+ */
#include <unistd.h>
#include <sys/mman.h>
@@ -62,7 +63,7 @@ static void
init_heap(void)
{
if (!exec_heap)
- exec_heap = mmInit( 0, EXEC_HEAP_SIZE );
+ exec_heap = u_mmInit( 0, EXEC_HEAP_SIZE );
if (!exec_mem)
exec_mem = (unsigned char *) mmap(0, EXEC_HEAP_SIZE,
@@ -83,7 +84,7 @@ rtasm_exec_malloc(size_t size)
if (exec_heap) {
size = (size + 31) & ~31; /* next multiple of 32 bytes */
- block = mmAllocMem( exec_heap, size, 5, 0 ); /* 5 -> 32-byte alignment */
+ block = u_mmAllocMem( exec_heap, size, 5, 0 ); /* 5 -> 32-byte alignment */
}
if (block)
@@ -103,17 +104,17 @@ rtasm_exec_free(void *addr)
pipe_mutex_lock(exec_mutex);
if (exec_heap) {
- struct mem_block *block = mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem);
+ struct mem_block *block = u_mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem);
if (block)
- mmFreeMem(block);
+ u_mmFreeMem(block);
}
pipe_mutex_unlock(exec_mutex);
}
-#else
+#else /* PIPE_OS_LINUX */
/*
* Just use regular memory.
@@ -133,4 +134,4 @@ rtasm_exec_free(void *addr)
}
-#endif
+#endif /* PIPE_OS_LINUX */
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c
new file mode 100644
index 0000000000..e3586482db
--- /dev/null
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c
@@ -0,0 +1,1077 @@
+/**************************************************************************
+ *
+ * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved.
+ * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * PPC code generation.
+ * For reference, see http://www.power.org/resources/reading/PowerISA_V2.05.pdf
+ * ABI info: http://www.cs.utsa.edu/~whaley/teach/cs6463FHPO/LEC/lec12_ho.pdf
+ *
+ * Other PPC refs:
+ * http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699600719DF2
+ * http://www.ibm.com/developerworks/eserver/library/es-archguide-v2.html
+ * http://www.freescale.com/files/product/doc/MPCFPE32B.pdf
+ *
+ * \author Brian Paul
+ */
+
+
+#include <stdio.h>
+#include "util/u_memory.h"
+#include "util/u_debug.h"
+#include "rtasm_execmem.h"
+#include "rtasm_ppc.h"
+
+
+void
+ppc_init_func(struct ppc_function *p)
+{
+ uint i;
+
+ memset(p, 0, sizeof(*p));
+
+ p->num_inst = 0;
+ p->max_inst = 100; /* first guess at buffer size */
+ p->store = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
+ p->reg_used = 0x0;
+ p->fp_used = 0x0;
+ p->vec_used = 0x0;
+
+ p->print = FALSE;
+ p->indent = 0;
+
+ /* only allow using gp registers 3..12 for now */
+ for (i = 0; i < 3; i++)
+ ppc_reserve_register(p, i);
+ for (i = 12; i < PPC_NUM_REGS; i++)
+ ppc_reserve_register(p, i);
+}
+
+
+void
+ppc_release_func(struct ppc_function *p)
+{
+ assert(p->num_inst <= p->max_inst);
+ if (p->store != NULL) {
+ rtasm_exec_free(p->store);
+ }
+ p->store = NULL;
+}
+
+
+uint
+ppc_num_instructions(const struct ppc_function *p)
+{
+ return p->num_inst;
+}
+
+
+void (*ppc_get_func(struct ppc_function *p))(void)
+{
+#if 0
+ DUMP_END();
+ if (DISASSEM && p->store)
+ debug_printf("disassemble %p %p\n", p->store, p->csr);
+
+ if (p->store == p->error_overflow)
+ return (void (*)(void)) NULL;
+ else
+#endif
+ return (void (*)(void)) p->store;
+}
+
+
+void
+ppc_dump_func(const struct ppc_function *p)
+{
+ uint i;
+ for (i = 0; i < p->num_inst; i++) {
+ debug_printf("%3u: 0x%08x\n", i, p->store[i]);
+ }
+}
+
+
+void
+ppc_print_code(struct ppc_function *p, boolean enable)
+{
+ p->print = enable;
+}
+
+
+void
+ppc_indent(struct ppc_function *p, int spaces)
+{
+ p->indent += spaces;
+}
+
+
+static void
+indent(const struct ppc_function *p)
+{
+ int i;
+ for (i = 0; i < p->indent; i++) {
+ putchar(' ');
+ }
+}
+
+
+void
+ppc_comment(struct ppc_function *p, int rel_indent, const char *s)
+{
+ if (p->print) {
+ p->indent += rel_indent;
+ indent(p);
+ p->indent -= rel_indent;
+ printf("# %s\n", s);
+ }
+}
+
+
+/**
+ * Mark a register as being unavailable.
+ */
+int
+ppc_reserve_register(struct ppc_function *p, int reg)
+{
+ assert(reg < PPC_NUM_REGS);
+ p->reg_used |= (1 << reg);
+ return reg;
+}
+
+
+/**
+ * Allocate a general purpose register.
+ * \return register index or -1 if none left.
+ */
+int
+ppc_allocate_register(struct ppc_function *p)
+{
+ unsigned i;
+ for (i = 0; i < PPC_NUM_REGS; i++) {
+ const uint64_t mask = 1 << i;
+ if ((p->reg_used & mask) == 0) {
+ p->reg_used |= mask;
+ return i;
+ }
+ }
+ printf("OUT OF PPC registers!\n");
+ return -1;
+}
+
+
+/**
+ * Mark the given general purpose register as "unallocated".
+ */
+void
+ppc_release_register(struct ppc_function *p, int reg)
+{
+ assert(reg < PPC_NUM_REGS);
+ assert(p->reg_used & (1 << reg));
+ p->reg_used &= ~(1 << reg);
+}
+
+
+/**
+ * Allocate a floating point register.
+ * \return register index or -1 if none left.
+ */
+int
+ppc_allocate_fp_register(struct ppc_function *p)
+{
+ unsigned i;
+ for (i = 0; i < PPC_NUM_FP_REGS; i++) {
+ const uint64_t mask = 1 << i;
+ if ((p->fp_used & mask) == 0) {
+ p->fp_used |= mask;
+ return i;
+ }
+ }
+ printf("OUT OF PPC FP registers!\n");
+ return -1;
+}
+
+
+/**
+ * Mark the given floating point register as "unallocated".
+ */
+void
+ppc_release_fp_register(struct ppc_function *p, int reg)
+{
+ assert(reg < PPC_NUM_FP_REGS);
+ assert(p->fp_used & (1 << reg));
+ p->fp_used &= ~(1 << reg);
+}
+
+
+/**
+ * Allocate a vector register.
+ * \return register index or -1 if none left.
+ */
+int
+ppc_allocate_vec_register(struct ppc_function *p)
+{
+ unsigned i;
+ for (i = 0; i < PPC_NUM_VEC_REGS; i++) {
+ const uint64_t mask = 1 << i;
+ if ((p->vec_used & mask) == 0) {
+ p->vec_used |= mask;
+ return i;
+ }
+ }
+ printf("OUT OF PPC VEC registers!\n");
+ return -1;
+}
+
+
+/**
+ * Mark the given vector register as "unallocated".
+ */
+void
+ppc_release_vec_register(struct ppc_function *p, int reg)
+{
+ assert(reg < PPC_NUM_VEC_REGS);
+ assert(p->vec_used & (1 << reg));
+ p->vec_used &= ~(1 << reg);
+}
+
+
+/**
+ * Append instruction to instruction buffer. Grow buffer if out of room.
+ */
+static void
+emit_instruction(struct ppc_function *p, uint32_t inst_bits)
+{
+ if (!p->store)
+ return; /* out of memory, drop the instruction */
+
+ if (p->num_inst == p->max_inst) {
+ /* allocate larger buffer */
+ uint32_t *newbuf;
+ p->max_inst *= 2; /* 2x larger */
+ newbuf = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
+ if (newbuf) {
+ memcpy(newbuf, p->store, p->num_inst * PPC_INST_SIZE);
+ }
+ rtasm_exec_free(p->store);
+ p->store = newbuf;
+ if (!p->store) {
+ /* out of memory */
+ p->num_inst = 0;
+ return;
+ }
+ }
+
+ p->store[p->num_inst++] = inst_bits;
+}
+
+
+union vx_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned vD:5;
+ unsigned vA:5;
+ unsigned vB:5;
+ unsigned op2:11;
+ } inst;
+};
+
+static INLINE void
+emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB,
+ const char *format, boolean transpose)
+{
+ union vx_inst inst;
+ inst.inst.op = 4;
+ inst.inst.vD = vD;
+ inst.inst.vA = vA;
+ inst.inst.vB = vB;
+ inst.inst.op2 = op2;
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ if (transpose)
+ printf(format, vD, vB, vA);
+ else
+ printf(format, vD, vA, vB);
+ }
+}
+
+
+union vxr_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned vD:5;
+ unsigned vA:5;
+ unsigned vB:5;
+ unsigned rC:1;
+ unsigned op2:10;
+ } inst;
+};
+
+static INLINE void
+emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB,
+ const char *format)
+{
+ union vxr_inst inst;
+ inst.inst.op = 4;
+ inst.inst.vD = vD;
+ inst.inst.vA = vA;
+ inst.inst.vB = vB;
+ inst.inst.rC = 0;
+ inst.inst.op2 = op2;
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf(format, vD, vA, vB);
+ }
+}
+
+
+union va_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned vD:5;
+ unsigned vA:5;
+ unsigned vB:5;
+ unsigned vC:5;
+ unsigned op2:6;
+ } inst;
+};
+
+static INLINE void
+emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC,
+ const char *format)
+{
+ union va_inst inst;
+ inst.inst.op = 4;
+ inst.inst.vD = vD;
+ inst.inst.vA = vA;
+ inst.inst.vB = vB;
+ inst.inst.vC = vC;
+ inst.inst.op2 = op2;
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf(format, vD, vA, vB, vC);
+ }
+}
+
+
+union i_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned li:24;
+ unsigned aa:1;
+ unsigned lk:1;
+ } inst;
+};
+
+static INLINE void
+emit_i(struct ppc_function *p, uint op, uint li, uint aa, uint lk)
+{
+ union i_inst inst;
+ inst.inst.op = op;
+ inst.inst.li = li;
+ inst.inst.aa = aa;
+ inst.inst.lk = lk;
+ emit_instruction(p, inst.bits);
+}
+
+
+union xl_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned bo:5;
+ unsigned bi:5;
+ unsigned unused:3;
+ unsigned bh:2;
+ unsigned op2:10;
+ unsigned lk:1;
+ } inst;
+};
+
+static INLINE void
+emit_xl(struct ppc_function *p, uint op, uint bo, uint bi, uint bh,
+ uint op2, uint lk)
+{
+ union xl_inst inst;
+ inst.inst.op = op;
+ inst.inst.bo = bo;
+ inst.inst.bi = bi;
+ inst.inst.unused = 0x0;
+ inst.inst.bh = bh;
+ inst.inst.op2 = op2;
+ inst.inst.lk = lk;
+ emit_instruction(p, inst.bits);
+}
+
+static INLINE void
+dump_xl(const char *name, uint inst)
+{
+ union xl_inst i;
+
+ i.bits = inst;
+ debug_printf("%s = 0x%08x\n", name, inst);
+ debug_printf(" op: %d 0x%x\n", i.inst.op, i.inst.op);
+ debug_printf(" bo: %d 0x%x\n", i.inst.bo, i.inst.bo);
+ debug_printf(" bi: %d 0x%x\n", i.inst.bi, i.inst.bi);
+ debug_printf(" unused: %d 0x%x\n", i.inst.unused, i.inst.unused);
+ debug_printf(" bh: %d 0x%x\n", i.inst.bh, i.inst.bh);
+ debug_printf(" op2: %d 0x%x\n", i.inst.op2, i.inst.op2);
+ debug_printf(" lk: %d 0x%x\n", i.inst.lk, i.inst.lk);
+}
+
+
+union x_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned vrs:5;
+ unsigned ra:5;
+ unsigned rb:5;
+ unsigned op2:10;
+ unsigned unused:1;
+ } inst;
+};
+
+static INLINE void
+emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2,
+ const char *format)
+{
+ union x_inst inst;
+ inst.inst.op = op;
+ inst.inst.vrs = vrs;
+ inst.inst.ra = ra;
+ inst.inst.rb = rb;
+ inst.inst.op2 = op2;
+ inst.inst.unused = 0x0;
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf(format, vrs, ra, rb);
+ }
+}
+
+
+union d_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned rt:5;
+ unsigned ra:5;
+ unsigned si:16;
+ } inst;
+};
+
+static INLINE void
+emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si,
+ const char *format, boolean transpose)
+{
+ union d_inst inst;
+ assert(si >= -32768);
+ assert(si <= 32767);
+ inst.inst.op = op;
+ inst.inst.rt = rt;
+ inst.inst.ra = ra;
+ inst.inst.si = (unsigned) (si & 0xffff);
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ if (transpose)
+ printf(format, rt, si, ra);
+ else
+ printf(format, rt, ra, si);
+ }
+}
+
+
+union a_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned frt:5;
+ unsigned fra:5;
+ unsigned frb:5;
+ unsigned unused:5;
+ unsigned op2:5;
+ unsigned rc:1;
+ } inst;
+};
+
+static INLINE void
+emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2,
+ uint rc, const char *format)
+{
+ union a_inst inst;
+ inst.inst.op = op;
+ inst.inst.frt = frt;
+ inst.inst.fra = fra;
+ inst.inst.frb = frb;
+ inst.inst.unused = 0x0;
+ inst.inst.op2 = op2;
+ inst.inst.rc = rc;
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf(format, frt, fra, frb);
+ }
+}
+
+
+union xo_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned rt:5;
+ unsigned ra:5;
+ unsigned rb:5;
+ unsigned oe:1;
+ unsigned op2:9;
+ unsigned rc:1;
+ } inst;
+};
+
+static INLINE void
+emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe,
+ uint op2, uint rc, const char *format)
+{
+ union xo_inst inst;
+ inst.inst.op = op;
+ inst.inst.rt = rt;
+ inst.inst.ra = ra;
+ inst.inst.rb = rb;
+ inst.inst.oe = oe;
+ inst.inst.op2 = op2;
+ inst.inst.rc = rc;
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf(format, rt, ra, rb);
+ }
+}
+
+
+
+
+
+/**
+ ** float vector arithmetic
+ **/
+
+/** vector float add */
+void
+ppc_vaddfp(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 10, vD, vA, vB, "vaddfp\t%u, v%u, v%u\n", FALSE);
+}
+
+/** vector float substract */
+void
+ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 74, vD, vA, vB, "vsubfp\tv%u, v%u, v%u\n", FALSE);
+}
+
+/** vector float min */
+void
+ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 1098, vD, vA, vB, "vminfp\tv%u, v%u, v%u\n", FALSE);
+}
+
+/** vector float max */
+void
+ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 1034, vD, vA, vB, "vmaxfp\tv%u, v%u, v%u\n", FALSE);
+}
+
+/** vector float mult add: vD = vA * vB + vC */
+void
+ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
+{
+ /* note arg order */
+ emit_va(p, 46, vD, vA, vC, vB, "vmaddfp\tv%u, v%u, v%u, v%u\n");
+}
+
+/** vector float negative mult subtract: vD = vA - vB * vC */
+void
+ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
+{
+ /* note arg order */
+ emit_va(p, 47, vD, vB, vA, vC, "vnmsubfp\tv%u, v%u, v%u, v%u\n");
+}
+
+/** vector float compare greater than */
+void
+ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vxr(p, 710, vD, vA, vB, "vcmpgtfpx\tv%u, v%u, v%u");
+}
+
+/** vector float compare greater than or equal to */
+void
+ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vxr(p, 454, vD, vA, vB, "vcmpgefpx\tv%u, v%u, v%u");
+}
+
+/** vector float compare equal */
+void
+ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vxr(p, 198, vD, vA, vB, "vcmpeqfpx\tv%u, v%u, v%u");
+}
+
+/** vector float 2^x */
+void
+ppc_vexptefp(struct ppc_function *p, uint vD, uint vB)
+{
+ emit_vx(p, 394, vD, 0, vB, "vexptefp\tv%u, 0%u, v%u\n", FALSE);
+}
+
+/** vector float log2(x) */
+void
+ppc_vlogefp(struct ppc_function *p, uint vD, uint vB)
+{
+ emit_vx(p, 458, vD, 0, vB, "vlogefp\tv%u, 0%u, v%u\n", FALSE);
+}
+
+/** vector float reciprocol */
+void
+ppc_vrefp(struct ppc_function *p, uint vD, uint vB)
+{
+ emit_vx(p, 266, vD, 0, vB, "vrefp\tv%u, 0%u, v%u\n", FALSE);
+}
+
+/** vector float reciprocol sqrt estimate */
+void
+ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB)
+{
+ emit_vx(p, 330, vD, 0, vB, "vrsqrtefp\tv%u, 0%u, v%u\n", FALSE);
+}
+
+/** vector float round to negative infinity */
+void
+ppc_vrfim(struct ppc_function *p, uint vD, uint vB)
+{
+ emit_vx(p, 714, vD, 0, vB, "vrfim\tv%u, 0%u, v%u\n", FALSE);
+}
+
+/** vector float round to positive infinity */
+void
+ppc_vrfip(struct ppc_function *p, uint vD, uint vB)
+{
+ emit_vx(p, 650, vD, 0, vB, "vrfip\tv%u, 0%u, v%u\n", FALSE);
+}
+
+/** vector float round to nearest int */
+void
+ppc_vrfin(struct ppc_function *p, uint vD, uint vB)
+{
+ emit_vx(p, 522, vD, 0, vB, "vrfin\tv%u, 0%u, v%u\n", FALSE);
+}
+
+/** vector float round to int toward zero */
+void
+ppc_vrfiz(struct ppc_function *p, uint vD, uint vB)
+{
+ emit_vx(p, 586, vD, 0, vB, "vrfiz\tv%u, 0%u, v%u\n", FALSE);
+}
+
+/** vector store: store vR at mem[rA+rB] */
+void
+ppc_stvx(struct ppc_function *p, uint vR, uint rA, uint rB)
+{
+ emit_x(p, 31, vR, rA, rB, 231, "stvx\tv%u, r%u, r%u\n");
+}
+
+/** vector load: vR = mem[rA+rB] */
+void
+ppc_lvx(struct ppc_function *p, uint vR, uint rA, uint rB)
+{
+ emit_x(p, 31, vR, rA, rB, 103, "lvx\tv%u, r%u, r%u\n");
+}
+
+/** load vector element word: vR = mem_word[ra+rb] */
+void
+ppc_lvewx(struct ppc_function *p, uint vR, uint rA, uint rB)
+{
+ emit_x(p, 31, vR, rA, rB, 71, "lvewx\tv%u, r%u, r%u\n");
+}
+
+
+
+
+/**
+ ** vector bitwise operations
+ **/
+
+/** vector and */
+void
+ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 1028, vD, vA, vB, "vand\tv%u, v%u, v%u\n", FALSE);
+}
+
+/** vector and complement */
+void
+ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 1092, vD, vA, vB, "vandc\tv%u, v%u, v%u\n", FALSE);
+}
+
+/** vector or */
+void
+ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 1156, vD, vA, vB, "vor\tv%u, v%u, v%u\n", FALSE);
+}
+
+/** vector nor */
+void
+ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 1284, vD, vA, vB, "vnor\tv%u, v%u, v%u\n", FALSE);
+}
+
+/** vector xor */
+void
+ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 1220, vD, vA, vB, "vxor\tv%u, v%u, v%u\n", FALSE);
+}
+
+/** Pseudo-instruction: vector move */
+void
+ppc_vmove(struct ppc_function *p, uint vD, uint vA)
+{
+ boolean print = p->print;
+ p->print = FALSE;
+ ppc_vor(p, vD, vA, vA);
+ if (print) {
+ indent(p);
+ printf("vor\tv%u, v%u, v%u \t# v%u = v%u\n", vD, vA, vA, vD, vA);
+ }
+ p->print = print;
+}
+
+/** Set vector register to {0,0,0,0} */
+void
+ppc_vzero(struct ppc_function *p, uint vr)
+{
+ boolean print = p->print;
+ p->print = FALSE;
+ ppc_vxor(p, vr, vr, vr);
+ if (print) {
+ indent(p);
+ printf("vxor\tv%u, v%u, v%u \t# v%u = {0,0,0,0}\n", vr, vr, vr, vr);
+ }
+ p->print = print;
+}
+
+
+
+
+/**
+ ** Vector shuffle / select / splat / etc
+ **/
+
+/** vector permute */
+void
+ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
+{
+ emit_va(p, 43, vD, vA, vB, vC, "vperm\tr%u, r%u, r%u, r%u");
+}
+
+/** vector select */
+void
+ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
+{
+ emit_va(p, 42, vD, vA, vB, vC, "vsel\tr%u, r%u, r%u, r%u");
+}
+
+/** vector splat byte */
+void
+ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm)
+{
+ emit_vx(p, 42, vD, imm, vB, "vspltb\tv%u, v%u, %u\n", TRUE);
+}
+
+/** vector splat half word */
+void
+ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm)
+{
+ emit_vx(p, 588, vD, imm, vB, "vsplthw\tv%u, v%u, %u\n", TRUE);
+}
+
+/** vector splat word */
+void
+ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm)
+{
+ emit_vx(p, 652, vD, imm, vB, "vspltw\tv%u, v%u, %u\n", TRUE);
+}
+
+/** vector splat signed immediate word */
+void
+ppc_vspltisw(struct ppc_function *p, uint vD, int imm)
+{
+ assert(imm >= -16);
+ assert(imm < 15);
+ emit_vx(p, 908, vD, imm, 0, "vspltisw\tv%u, %d, %u\n", FALSE);
+}
+
+/** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */
+void
+ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 388, vD, vA, vB, "vslw\tv%u, v%u, v%u\n", FALSE);
+}
+
+
+
+
+/**
+ ** integer arithmetic
+ **/
+
+/** rt = ra + imm */
+void
+ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm)
+{
+ emit_d(p, 14, rt, ra, imm, "addi\tr%u, r%u, %d\n", FALSE);
+}
+
+/** rt = ra + (imm << 16) */
+void
+ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm)
+{
+ emit_d(p, 15, rt, ra, imm, "addis\tr%u, r%u, %d\n", FALSE);
+}
+
+/** rt = ra + rb */
+void
+ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb)
+{
+ emit_xo(p, 31, rt, ra, rb, 0, 266, 0, "add\tr%u, r%u, r%u\n");
+}
+
+/** rt = ra AND ra */
+void
+ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb)
+{
+ emit_x(p, 31, ra, rt, rb, 28, "and\tr%u, r%u, r%u\n"); /* note argument order */
+}
+
+/** rt = ra AND imm */
+void
+ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm)
+{
+ /* note argument order */
+ emit_d(p, 28, ra, rt, imm, "andi\tr%u, r%u, %d\n", FALSE);
+}
+
+/** rt = ra OR ra */
+void
+ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb)
+{
+ emit_x(p, 31, ra, rt, rb, 444, "or\tr%u, r%u, r%u\n"); /* note argument order */
+}
+
+/** rt = ra OR imm */
+void
+ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm)
+{
+ /* note argument order */
+ emit_d(p, 24, ra, rt, imm, "ori\tr%u, r%u, %d\n", FALSE);
+}
+
+/** rt = ra XOR ra */
+void
+ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb)
+{
+ emit_x(p, 31, ra, rt, rb, 316, "xor\tr%u, r%u, r%u\n"); /* note argument order */
+}
+
+/** rt = ra XOR imm */
+void
+ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm)
+{
+ /* note argument order */
+ emit_d(p, 26, ra, rt, imm, "xori\tr%u, r%u, %d\n", FALSE);
+}
+
+/** pseudo instruction: move: rt = ra */
+void
+ppc_mr(struct ppc_function *p, uint rt, uint ra)
+{
+ ppc_or(p, rt, ra, ra);
+}
+
+/** pseudo instruction: load immediate: rt = imm */
+void
+ppc_li(struct ppc_function *p, uint rt, int imm)
+{
+ boolean print = p->print;
+ p->print = FALSE;
+ ppc_addi(p, rt, 0, imm);
+ if (print) {
+ indent(p);
+ printf("addi\tr%u, r0, %d \t# r%u = %d\n", rt, imm, rt, imm);
+ }
+ p->print = print;
+}
+
+/** rt = imm << 16 */
+void
+ppc_lis(struct ppc_function *p, uint rt, int imm)
+{
+ ppc_addis(p, rt, 0, imm);
+}
+
+/** rt = imm */
+void
+ppc_load_int(struct ppc_function *p, uint rt, int imm)
+{
+ ppc_lis(p, rt, (imm >> 16)); /* rt = imm >> 16 */
+ ppc_ori(p, rt, rt, (imm & 0xffff)); /* rt = rt | (imm & 0xffff) */
+}
+
+
+
+
+/**
+ ** integer load/store
+ **/
+
+/** store rs at memory[(ra)+d],
+ * then update ra = (ra)+d
+ */
+void
+ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d)
+{
+ emit_d(p, 37, rs, ra, d, "stwu\tr%u, %d(r%u)\n", TRUE);
+}
+
+/** store rs at memory[(ra)+d] */
+void
+ppc_stw(struct ppc_function *p, uint rs, uint ra, int d)
+{
+ emit_d(p, 36, rs, ra, d, "stw\tr%u, %d(r%u)\n", TRUE);
+}
+
+/** Load rt = mem[(ra)+d]; then zero set high 32 bits to zero. */
+void
+ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d)
+{
+ emit_d(p, 32, rt, ra, d, "lwz\tr%u, %d(r%u)\n", TRUE);
+}
+
+
+
+/**
+ ** Float (non-vector) arithmetic
+ **/
+
+/** add: frt = fra + frb */
+void
+ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb)
+{
+ emit_a(p, 63, frt, fra, frb, 21, 0, "fadd\tf%u, f%u, f%u\n");
+}
+
+/** sub: frt = fra - frb */
+void
+ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb)
+{
+ emit_a(p, 63, frt, fra, frb, 20, 0, "fsub\tf%u, f%u, f%u\n");
+}
+
+/** convert to int: rt = (int) ra */
+void
+ppc_fctiwz(struct ppc_function *p, uint rt, uint fra)
+{
+ emit_x(p, 63, rt, 0, fra, 15, "fctiwz\tr%u, r%u, r%u\n");
+}
+
+/** store frs at mem[(ra)+offset] */
+void
+ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset)
+{
+ emit_d(p, 52, frs, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE);
+}
+
+/** store frs at mem[(ra)+(rb)] */
+void
+ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb)
+{
+ emit_x(p, 31, frs, ra, rb, 983, "stfiwx\tr%u, r%u, r%u\n");
+}
+
+/** load frt = mem[(ra)+offset] */
+void
+ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset)
+{
+ emit_d(p, 48, frt, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE);
+}
+
+
+
+
+
+/**
+ ** branch instructions
+ **/
+
+/** BLR: Branch to link register (p. 35) */
+void
+ppc_blr(struct ppc_function *p)
+{
+ emit_i(p, 18, 0, 0, 1);
+ if (p->print) {
+ indent(p);
+ printf("blr\n");
+ }
+}
+
+/** Branch Conditional to Link Register (p. 36) */
+void
+ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg)
+{
+ emit_xl(p, 19, condOp, condReg, branchHint, 16, 0);
+ if (p->print) {
+ indent(p);
+ printf("bclr\t%u %u %u\n", condOp, branchHint, condReg);
+ }
+}
+
+/** Pseudo instruction: return from subroutine */
+void
+ppc_return(struct ppc_function *p)
+{
+ ppc_bclr(p, BRANCH_COND_ALWAYS, BRANCH_HINT_SUB_RETURN, 0);
+}
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h
new file mode 100644
index 0000000000..93e5f5187d
--- /dev/null
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h
@@ -0,0 +1,342 @@
+/**************************************************************************
+ *
+ * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved.
+ * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * PPC code generation.
+ * \author Brian Paul
+ */
+
+
+#ifndef RTASM_PPC_H
+#define RTASM_PPC_H
+
+
+#include "pipe/p_compiler.h"
+
+
+#define PPC_INST_SIZE 4 /**< 4 bytes / instruction */
+
+#define PPC_NUM_REGS 32
+#define PPC_NUM_FP_REGS 32
+#define PPC_NUM_VEC_REGS 32
+
+/** Stack pointer register */
+#define PPC_REG_SP 1
+
+/** Branch conditions */
+#define BRANCH_COND_ALWAYS 0x14 /* binary 1z1zz (z=ignored) */
+
+/** Branch hints */
+#define BRANCH_HINT_SUB_RETURN 0x0 /* binary 00 */
+
+
+struct ppc_function
+{
+ uint32_t *store; /**< instruction buffer */
+ uint num_inst;
+ uint max_inst;
+ uint32_t reg_used; /** used/free general-purpose registers bitmask */
+ uint32_t fp_used; /** used/free floating point registers bitmask */
+ uint32_t vec_used; /** used/free vector registers bitmask */
+ int indent;
+ boolean print;
+};
+
+
+
+extern void ppc_init_func(struct ppc_function *p);
+extern void ppc_release_func(struct ppc_function *p);
+extern uint ppc_num_instructions(const struct ppc_function *p);
+extern void (*ppc_get_func( struct ppc_function *p ))( void );
+extern void ppc_dump_func(const struct ppc_function *p);
+
+extern void ppc_print_code(struct ppc_function *p, boolean enable);
+extern void ppc_indent(struct ppc_function *p, int spaces);
+extern void ppc_comment(struct ppc_function *p, int rel_indent, const char *s);
+
+extern int ppc_reserve_register(struct ppc_function *p, int reg);
+extern int ppc_allocate_register(struct ppc_function *p);
+extern void ppc_release_register(struct ppc_function *p, int reg);
+extern int ppc_allocate_fp_register(struct ppc_function *p);
+extern void ppc_release_fp_register(struct ppc_function *p, int reg);
+extern int ppc_allocate_vec_register(struct ppc_function *p);
+extern void ppc_release_vec_register(struct ppc_function *p, int reg);
+
+
+
+/**
+ ** float vector arithmetic
+ **/
+
+/** vector float add */
+extern void
+ppc_vaddfp(struct ppc_function *p,uint vD, uint vA, uint vB);
+
+/** vector float substract */
+extern void
+ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector float min */
+extern void
+ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector float max */
+extern void
+ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector float mult add: vD = vA * vB + vC */
+extern void
+ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC);
+
+/** vector float negative mult subtract: vD = vA - vB * vC */
+extern void
+ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC);
+
+/** vector float compare greater than */
+extern void
+ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector float compare greater than or equal to */
+extern void
+ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector float compare equal */
+extern void
+ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector float 2^x */
+extern void
+ppc_vexptefp(struct ppc_function *p, uint vD, uint vB);
+
+/** vector float log2(x) */
+extern void
+ppc_vlogefp(struct ppc_function *p, uint vD, uint vB);
+
+/** vector float reciprocol */
+extern void
+ppc_vrefp(struct ppc_function *p, uint vD, uint vB);
+
+/** vector float reciprocol sqrt estimate */
+extern void
+ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB);
+
+/** vector float round to negative infinity */
+extern void
+ppc_vrfim(struct ppc_function *p, uint vD, uint vB);
+
+/** vector float round to positive infinity */
+extern void
+ppc_vrfip(struct ppc_function *p, uint vD, uint vB);
+
+/** vector float round to nearest int */
+extern void
+ppc_vrfin(struct ppc_function *p, uint vD, uint vB);
+
+/** vector float round to int toward zero */
+extern void
+ppc_vrfiz(struct ppc_function *p, uint vD, uint vB);
+
+
+/** vector store: store vR at mem[vA+vB] */
+extern void
+ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB);
+
+/** vector load: vR = mem[vA+vB] */
+extern void
+ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB);
+
+/** load vector element word: vR = mem_word[vA+vB] */
+extern void
+ppc_lvewx(struct ppc_function *p, uint vR, uint vA, uint vB);
+
+
+
+/**
+ ** vector bitwise operations
+ **/
+
+
+/** vector and */
+extern void
+ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector and complement */
+extern void
+ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector or */
+extern void
+ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector nor */
+extern void
+ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector xor */
+extern void
+ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** Pseudo-instruction: vector move */
+extern void
+ppc_vmove(struct ppc_function *p, uint vD, uint vA);
+
+/** Set vector register to {0,0,0,0} */
+extern void
+ppc_vzero(struct ppc_function *p, uint vr);
+
+
+
+/**
+ ** Vector shuffle / select / splat / etc
+ **/
+
+/** vector permute */
+extern void
+ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC);
+
+/** vector select */
+extern void
+ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC);
+
+/** vector splat byte */
+extern void
+ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm);
+
+/** vector splat half word */
+extern void
+ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm);
+
+/** vector splat word */
+extern void
+ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm);
+
+/** vector splat signed immediate word */
+extern void
+ppc_vspltisw(struct ppc_function *p, uint vD, int imm);
+
+/** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */
+extern void
+ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+
+
+/**
+ ** scalar arithmetic
+ **/
+
+extern void
+ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb);
+
+extern void
+ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm);
+
+extern void
+ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm);
+
+extern void
+ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb);
+
+extern void
+ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm);
+
+extern void
+ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb);
+
+extern void
+ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm);
+
+extern void
+ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb);
+
+extern void
+ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm);
+
+extern void
+ppc_mr(struct ppc_function *p, uint rt, uint ra);
+
+extern void
+ppc_li(struct ppc_function *p, uint rt, int imm);
+
+extern void
+ppc_lis(struct ppc_function *p, uint rt, int imm);
+
+extern void
+ppc_load_int(struct ppc_function *p, uint rt, int imm);
+
+
+
+/**
+ ** scalar load/store
+ **/
+
+extern void
+ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d);
+
+extern void
+ppc_stw(struct ppc_function *p, uint rs, uint ra, int d);
+
+extern void
+ppc_lwz(struct ppc_function *p, uint rs, uint ra, int d);
+
+
+
+/**
+ ** Float (non-vector) arithmetic
+ **/
+
+extern void
+ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb);
+
+extern void
+ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb);
+
+extern void
+ppc_fctiwz(struct ppc_function *p, uint rt, uint ra);
+
+extern void
+ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset);
+
+extern void
+ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb);
+
+extern void
+ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset);
+
+
+
+/**
+ ** branch instructions
+ **/
+
+extern void
+ppc_blr(struct ppc_function *p);
+
+void
+ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg);
+
+extern void
+ppc_return(struct ppc_function *p);
+
+
+#endif /* RTASM_PPC_H */
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
index a04cc6c4ff..53a0e722cf 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
@@ -27,12 +27,16 @@
* Real-time assembly generation interface for Cell B.E. SPEs.
*
* \author Ian Romanick <idr@us.ibm.com>
+ * \author Brian Paul
*/
+
+#include <stdio.h>
#include "pipe/p_compiler.h"
#include "util/u_memory.h"
#include "rtasm_ppc_spe.h"
+
#ifdef GALLIUM_CELL
/**
* SPE instruction types
@@ -143,21 +147,91 @@ union spe_inst_RI18 {
/*@}*/
-static void emit_RR(struct spe_function *p, unsigned op, unsigned rT,
- unsigned rA, unsigned rB)
+static void
+indent(const struct spe_function *p)
+{
+ int i;
+ for (i = 0; i < p->indent; i++) {
+ putchar(' ');
+ }
+}
+
+
+static const char *
+rem_prefix(const char *longname)
+{
+ return longname + 4;
+}
+
+
+static const char *
+reg_name(int reg)
+{
+ switch (reg) {
+ case SPE_REG_SP:
+ return "$sp";
+ case SPE_REG_RA:
+ return "$lr";
+ default:
+ {
+ /* cycle through four buffers to handle multiple calls per printf */
+ static char buf[4][10];
+ static int b = 0;
+ b = (b + 1) % 4;
+ sprintf(buf[b], "$%d", reg);
+ return buf[b];
+ }
+ }
+}
+
+
+static void
+emit_instruction(struct spe_function *p, uint32_t inst_bits)
+{
+ if (!p->store)
+ return; /* out of memory, drop the instruction */
+
+ if (p->num_inst == p->max_inst) {
+ /* allocate larger buffer */
+ uint32_t *newbuf;
+ p->max_inst *= 2; /* 2x larger */
+ newbuf = align_malloc(p->max_inst * SPE_INST_SIZE, 16);
+ if (newbuf) {
+ memcpy(newbuf, p->store, p->num_inst * SPE_INST_SIZE);
+ }
+ align_free(p->store);
+ p->store = newbuf;
+ if (!p->store) {
+ /* out of memory */
+ p->num_inst = 0;
+ return;
+ }
+ }
+
+ p->store[p->num_inst++] = inst_bits;
+}
+
+
+
+static void emit_RR(struct spe_function *p, unsigned op, int rT,
+ int rA, int rB, const char *name)
{
union spe_inst_RR inst;
inst.inst.op = op;
inst.inst.rB = rB;
inst.inst.rA = rA;
inst.inst.rT = rT;
- p->store[p->num_inst++] = inst.bits;
- assert(p->num_inst <= p->max_inst);
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf("%s\t%s, %s, %s\n",
+ rem_prefix(name), reg_name(rT), reg_name(rA), reg_name(rB));
+ }
}
-static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT,
- unsigned rA, unsigned rB, unsigned rC)
+static void emit_RRR(struct spe_function *p, unsigned op, int rT,
+ int rA, int rB, int rC, const char *name)
{
union spe_inst_RRR inst;
inst.inst.op = op;
@@ -165,155 +239,212 @@ static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.rB = rB;
inst.inst.rA = rA;
inst.inst.rC = rC;
- p->store[p->num_inst++] = inst.bits;
- assert(p->num_inst <= p->max_inst);
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf("%s\t%s, %s, %s, %s\n", rem_prefix(name), reg_name(rT),
+ reg_name(rA), reg_name(rB), reg_name(rC));
+ }
}
-static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT,
- unsigned rA, int imm)
+static void emit_RI7(struct spe_function *p, unsigned op, int rT,
+ int rA, int imm, const char *name)
{
union spe_inst_RI7 inst;
inst.inst.op = op;
inst.inst.i7 = imm;
inst.inst.rA = rA;
inst.inst.rT = rT;
- p->store[p->num_inst++] = inst.bits;
- assert(p->num_inst <= p->max_inst);
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf("%s\t%s, %s, 0x%x\n",
+ rem_prefix(name), reg_name(rT), reg_name(rA), imm);
+ }
}
-static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT,
- unsigned rA, int imm)
+static void emit_RI8(struct spe_function *p, unsigned op, int rT,
+ int rA, int imm, const char *name)
{
union spe_inst_RI8 inst;
inst.inst.op = op;
inst.inst.i8 = imm;
inst.inst.rA = rA;
inst.inst.rT = rT;
- p->store[p->num_inst++] = inst.bits;
- assert(p->num_inst <= p->max_inst);
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf("%s\t%s, %s, 0x%x\n",
+ rem_prefix(name), reg_name(rT), reg_name(rA), imm);
+ }
}
-static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT,
- unsigned rA, int imm)
+static void emit_RI10(struct spe_function *p, unsigned op, int rT,
+ int rA, int imm, const char *name)
{
union spe_inst_RI10 inst;
inst.inst.op = op;
inst.inst.i10 = imm;
inst.inst.rA = rA;
inst.inst.rT = rT;
- p->store[p->num_inst++] = inst.bits;
- assert(p->num_inst <= p->max_inst);
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf("%s\t%s, %s, 0x%x\n",
+ rem_prefix(name), reg_name(rT), reg_name(rA), imm);
+ }
}
-static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT,
- int imm)
+/** As above, but do range checking on signed immediate value */
+static void emit_RI10s(struct spe_function *p, unsigned op, int rT,
+ int rA, int imm, const char *name)
+{
+ assert(imm <= 511);
+ assert(imm >= -512);
+ emit_RI10(p, op, rT, rA, imm, name);
+}
+
+
+static void emit_RI16(struct spe_function *p, unsigned op, int rT,
+ int imm, const char *name)
{
union spe_inst_RI16 inst;
inst.inst.op = op;
inst.inst.i16 = imm;
inst.inst.rT = rT;
- p->store[p->num_inst++] = inst.bits;
- assert(p->num_inst <= p->max_inst);
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm);
+ }
}
-static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT,
- int imm)
+static void emit_RI18(struct spe_function *p, unsigned op, int rT,
+ int imm, const char *name)
{
union spe_inst_RI18 inst;
inst.inst.op = op;
inst.inst.i18 = imm;
inst.inst.rT = rT;
- p->store[p->num_inst++] = inst.bits;
- assert(p->num_inst <= p->max_inst);
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm);
+ }
}
-
+#define EMIT(_name, _op) \
+void _name (struct spe_function *p) \
+{ \
+ emit_RR(p, _op, 0, 0, 0, __FUNCTION__); \
+}
#define EMIT_(_name, _op) \
-void _name (struct spe_function *p, unsigned rT) \
+void _name (struct spe_function *p, int rT) \
{ \
- emit_RR(p, _op, rT, 0, 0); \
+ emit_RR(p, _op, rT, 0, 0, __FUNCTION__); \
}
#define EMIT_R(_name, _op) \
-void _name (struct spe_function *p, unsigned rT, unsigned rA) \
+void _name (struct spe_function *p, int rT, int rA) \
{ \
- emit_RR(p, _op, rT, rA, 0); \
+ emit_RR(p, _op, rT, rA, 0, __FUNCTION__); \
}
#define EMIT_RR(_name, _op) \
-void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) \
+void _name (struct spe_function *p, int rT, int rA, int rB) \
{ \
- emit_RR(p, _op, rT, rA, rB); \
+ emit_RR(p, _op, rT, rA, rB, __FUNCTION__); \
}
#define EMIT_RRR(_name, _op) \
-void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB, unsigned rC) \
+void _name (struct spe_function *p, int rT, int rA, int rB, int rC) \
{ \
- emit_RRR(p, _op, rT, rA, rB, rC); \
+ emit_RRR(p, _op, rT, rA, rB, rC, __FUNCTION__); \
}
#define EMIT_RI7(_name, _op) \
-void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \
+void _name (struct spe_function *p, int rT, int rA, int imm) \
{ \
- emit_RI7(p, _op, rT, rA, imm); \
+ emit_RI7(p, _op, rT, rA, imm, __FUNCTION__); \
}
#define EMIT_RI8(_name, _op, bias) \
-void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \
+void _name (struct spe_function *p, int rT, int rA, int imm) \
{ \
- emit_RI8(p, _op, rT, rA, bias - imm); \
+ emit_RI8(p, _op, rT, rA, bias - imm, __FUNCTION__); \
}
#define EMIT_RI10(_name, _op) \
-void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \
+void _name (struct spe_function *p, int rT, int rA, int imm) \
{ \
- emit_RI10(p, _op, rT, rA, imm); \
+ emit_RI10(p, _op, rT, rA, imm, __FUNCTION__); \
+}
+
+#define EMIT_RI10s(_name, _op) \
+void _name (struct spe_function *p, int rT, int rA, int imm) \
+{ \
+ emit_RI10s(p, _op, rT, rA, imm, __FUNCTION__); \
}
#define EMIT_RI16(_name, _op) \
-void _name (struct spe_function *p, unsigned rT, int imm) \
+void _name (struct spe_function *p, int rT, int imm) \
{ \
- emit_RI16(p, _op, rT, imm); \
+ emit_RI16(p, _op, rT, imm, __FUNCTION__); \
}
#define EMIT_RI18(_name, _op) \
-void _name (struct spe_function *p, unsigned rT, int imm) \
+void _name (struct spe_function *p, int rT, int imm) \
{ \
- emit_RI18(p, _op, rT, imm); \
+ emit_RI18(p, _op, rT, imm, __FUNCTION__); \
}
#define EMIT_I16(_name, _op) \
void _name (struct spe_function *p, int imm) \
{ \
- emit_RI16(p, _op, 0, imm); \
+ emit_RI16(p, _op, 0, imm, __FUNCTION__); \
}
#include "rtasm_ppc_spe.h"
+
/**
* Initialize an spe_function.
- * \param code_size size of instruction buffer to allocate, in bytes.
+ * \param code_size initial size of instruction buffer to allocate, in bytes.
+ * If zero, use a default.
*/
void spe_init_func(struct spe_function *p, unsigned code_size)
{
- p->store = align_malloc(code_size, 16);
+ uint i;
+
+ if (!code_size)
+ code_size = 64;
+
p->num_inst = 0;
p->max_inst = code_size / SPE_INST_SIZE;
+ p->store = align_malloc(code_size, 16);
+
+ p->set_count = 0;
+ memset(p->regs, 0, SPE_NUM_REGS * sizeof(p->regs[0]));
/* Conservatively treat R0 - R2 and R80 - R127 as non-volatile.
*/
- p->regs[0] = ~7;
- p->regs[1] = (1U << (80 - 64)) - 1;
+ p->regs[0] = p->regs[1] = p->regs[2] = 1;
+ for (i = 80; i <= 127; i++) {
+ p->regs[i] = 1;
+ }
+
+ p->print = FALSE;
+ p->indent = 0;
}
@@ -327,20 +458,23 @@ void spe_release_func(struct spe_function *p)
}
+/** Return current code size in bytes. */
+unsigned spe_code_size(const struct spe_function *p)
+{
+ return p->num_inst * SPE_INST_SIZE;
+}
+
+
/**
- * Alloate a SPE register.
+ * Allocate a SPE register.
* \return register index or -1 if none left.
*/
int spe_allocate_available_register(struct spe_function *p)
{
unsigned i;
for (i = 0; i < SPE_NUM_REGS; i++) {
- const uint64_t mask = (1ULL << (i % 64));
- const unsigned idx = i / 64;
-
- assert(idx < 2);
- if ((p->regs[idx] & mask) != 0) {
- p->regs[idx] &= ~mask;
+ if (p->regs[i] == 0) {
+ p->regs[i] = 1;
return i;
}
}
@@ -354,31 +488,161 @@ int spe_allocate_available_register(struct spe_function *p)
*/
int spe_allocate_register(struct spe_function *p, int reg)
{
- const unsigned idx = reg / 64;
- const unsigned bit = reg % 64;
-
assert(reg < SPE_NUM_REGS);
- assert((p->regs[idx] & (1ULL << bit)) != 0);
-
- p->regs[idx] &= ~(1ULL << bit);
+ assert(p->regs[reg] == 0);
+ p->regs[reg] = 1;
return reg;
}
/**
- * Mark the given SPE register as "unallocated".
+ * Mark the given SPE register as "unallocated". Note that this should
+ * only be used on registers allocated in the current register set; an
+ * assertion will fail if an attempt is made to deallocate a register
+ * allocated in an earlier register set.
*/
void spe_release_register(struct spe_function *p, int reg)
{
- const unsigned idx = reg / 64;
- const unsigned bit = reg % 64;
+ assert(reg >= 0);
+ assert(reg < SPE_NUM_REGS);
+ assert(p->regs[reg] == 1);
- assert(idx < 2);
+ p->regs[reg] = 0;
+}
- assert(reg < SPE_NUM_REGS);
- assert((p->regs[idx] & (1ULL << bit)) == 0);
+/**
+ * Start a new set of registers. This can be called if
+ * it will be difficult later to determine exactly what
+ * registers were actually allocated during a code generation
+ * sequence, and you really just want to deallocate all of them.
+ */
+void spe_allocate_register_set(struct spe_function *p)
+{
+ uint i;
+
+ /* Keep track of the set count. If it ever wraps around to 0,
+ * we're in trouble.
+ */
+ p->set_count++;
+ assert(p->set_count > 0);
+
+ /* Increment the allocation count of all registers currently
+ * allocated. Then any registers that are allocated in this set
+ * will be the only ones with a count of 1; they'll all be released
+ * when the register set is released.
+ */
+ for (i = 0; i < SPE_NUM_REGS; i++) {
+ if (p->regs[i] > 0)
+ p->regs[i]++;
+ }
+}
+
+void spe_release_register_set(struct spe_function *p)
+{
+ uint i;
+
+ /* If the set count drops below zero, we're in trouble. */
+ assert(p->set_count > 0);
+ p->set_count--;
- p->regs[idx] |= (1ULL << bit);
+ /* Drop the allocation level of all registers. Any allocated
+ * during this register set will drop to 0 and then become
+ * available.
+ */
+ for (i = 0; i < SPE_NUM_REGS; i++) {
+ if (p->regs[i] > 0)
+ p->regs[i]--;
+ }
+}
+
+
+unsigned
+spe_get_registers_used(const struct spe_function *p, ubyte used[])
+{
+ unsigned i, num = 0;
+ /* only count registers in the range available to callers */
+ for (i = 2; i < 80; i++) {
+ if (p->regs[i]) {
+ used[num++] = i;
+ }
+ }
+ return num;
+}
+
+
+void
+spe_print_code(struct spe_function *p, boolean enable)
+{
+ p->print = enable;
+}
+
+
+void
+spe_indent(struct spe_function *p, int spaces)
+{
+ p->indent += spaces;
+}
+
+
+void
+spe_comment(struct spe_function *p, int rel_indent, const char *s)
+{
+ if (p->print) {
+ p->indent += rel_indent;
+ indent(p);
+ p->indent -= rel_indent;
+ printf("# %s\n", s);
+ }
+}
+
+
+/**
+ * Load quad word.
+ * NOTE: offset is in bytes and the least significant 4 bits must be zero!
+ */
+void spe_lqd(struct spe_function *p, int rT, int rA, int offset)
+{
+ const boolean pSave = p->print;
+
+ /* offset must be a multiple of 16 */
+ assert(offset % 16 == 0);
+ /* offset must fit in 10-bit signed int field, after shifting */
+ assert((offset >> 4) <= 511);
+ assert((offset >> 4) >= -512);
+
+ p->print = FALSE;
+ emit_RI10(p, 0x034, rT, rA, offset >> 4, "spe_lqd");
+ p->print = pSave;
+
+ if (p->print) {
+ indent(p);
+ printf("lqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA));
+ }
+}
+
+
+/**
+ * Store quad word.
+ * NOTE: offset is in bytes and the least significant 4 bits must be zero!
+ */
+void spe_stqd(struct spe_function *p, int rT, int rA, int offset)
+{
+ const boolean pSave = p->print;
+
+ /* offset must be a multiple of 16 */
+ assert(offset % 16 == 0);
+ /* offset must fit in 10-bit signed int field, after shifting */
+ assert((offset >> 4) <= 511);
+ assert((offset >> 4) >= -512);
+
+ p->print = FALSE;
+ emit_RI10(p, 0x024, rT, rA, offset >> 4, "spe_stqd");
+ p->print = pSave;
+
+ if (p->print) {
+ indent(p);
+ printf("stqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA));
+ }
}
@@ -390,53 +654,53 @@ void spe_release_register(struct spe_function *p, int reg)
*/
/** Branch Indirect to address in rA */
-void spe_bi(struct spe_function *p, unsigned rA, int d, int e)
+void spe_bi(struct spe_function *p, int rA, int d, int e)
{
- emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4));
+ emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4), __FUNCTION__);
}
/** Interupt Return */
-void spe_iret(struct spe_function *p, unsigned rA, int d, int e)
+void spe_iret(struct spe_function *p, int rA, int d, int e)
{
- emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4));
+ emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4), __FUNCTION__);
}
/** Branch indirect and set link on external data */
-void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d,
+void spe_bisled(struct spe_function *p, int rT, int rA, int d,
int e)
{
- emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4));
+ emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
}
/** Branch indirect and set link. Save PC in rT, jump to rA. */
-void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d,
+void spe_bisl(struct spe_function *p, int rT, int rA, int d,
int e)
{
- emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4));
+ emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
}
/** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */
-void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
+void spe_biz(struct spe_function *p, int rT, int rA, int d, int e)
{
- emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4));
+ emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
}
/** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */
-void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
+void spe_binz(struct spe_function *p, int rT, int rA, int d, int e)
{
- emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4));
+ emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
}
/** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */
-void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
+void spe_bihz(struct spe_function *p, int rT, int rA, int d, int e)
{
- emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4));
+ emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
}
/** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */
-void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
+void spe_bihnz(struct spe_function *p, int rT, int rA, int d, int e)
{
- emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4));
+ emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
}
@@ -454,7 +718,6 @@ hbrr;
#if 0
stop;
EMIT_RR (spe_stopd, 0x140);
-EMIT_ (spe_lnop, 0x001);
EMIT_ (spe_nop, 0x201);
sync;
EMIT_ (spe_dsync, 0x003);
@@ -471,7 +734,7 @@ EMIT_R (spe_mtspr, 0x10c);
void
-spe_load_float(struct spe_function *p, unsigned rT, float x)
+spe_load_float(struct spe_function *p, int rT, float x)
{
if (x == 0.0f) {
spe_il(p, rT, 0x0);
@@ -498,45 +761,307 @@ spe_load_float(struct spe_function *p, unsigned rT, float x)
void
-spe_load_int(struct spe_function *p, unsigned rT, int i)
+spe_load_int(struct spe_function *p, int rT, int i)
{
if (-32768 <= i && i <= 32767) {
spe_il(p, rT, i);
}
else {
spe_ilhu(p, rT, i >> 16);
- spe_iohl(p, rT, i & 0xffff);
+ if (i & 0xffff)
+ spe_iohl(p, rT, i & 0xffff);
}
}
+void spe_load_uint(struct spe_function *p, int rT, uint ui)
+{
+ /* If the whole value is in the lower 18 bits, use ila, which
+ * doesn't sign-extend. Otherwise, if the two halfwords of
+ * the constant are identical, use ilh. Otherwise, if every byte of
+ * the desired value is 0x00 or 0xff, we can use Form Select Mask for
+ * Bytes Immediate (fsmbi) to load the value in a single instruction.
+ * Otherwise, in the general case, we have to use ilhu followed by iohl.
+ */
+ if ((ui & 0x0003ffff) == ui) {
+ spe_ila(p, rT, ui);
+ }
+ else if ((ui >> 16) == (ui & 0xffff)) {
+ spe_ilh(p, rT, ui & 0xffff);
+ }
+ else if (
+ ((ui & 0x000000ff) == 0 || (ui & 0x000000ff) == 0x000000ff) &&
+ ((ui & 0x0000ff00) == 0 || (ui & 0x0000ff00) == 0x0000ff00) &&
+ ((ui & 0x00ff0000) == 0 || (ui & 0x00ff0000) == 0x00ff0000) &&
+ ((ui & 0xff000000) == 0 || (ui & 0xff000000) == 0xff000000)
+ ) {
+ uint mask = 0;
+ /* fsmbi duplicates each bit in the given mask eight times,
+ * using a 16-bit value to initialize a 16-byte quadword.
+ * Each 4-bit nybble of the mask corresponds to a full word
+ * of the result; look at the value and figure out the mask
+ * (replicated for each word in the quadword), and then
+ * form the "select mask" to get the value.
+ */
+ if ((ui & 0x000000ff) == 0x000000ff) mask |= 0x1111;
+ if ((ui & 0x0000ff00) == 0x0000ff00) mask |= 0x2222;
+ if ((ui & 0x00ff0000) == 0x00ff0000) mask |= 0x4444;
+ if ((ui & 0xff000000) == 0xff000000) mask |= 0x8888;
+ spe_fsmbi(p, rT, mask);
+ }
+ else {
+ /* The general case: this usually uses two instructions, but
+ * may use only one if the low-order 16 bits of each word are 0.
+ */
+ spe_ilhu(p, rT, ui >> 16);
+ if (ui & 0xffff)
+ spe_iohl(p, rT, ui & 0xffff);
+ }
+}
+/**
+ * This function is constructed identically to spe_xor_uint() below.
+ * Changes to one should be made in the other.
+ */
void
-spe_splat(struct spe_function *p, unsigned rT, unsigned rA)
+spe_and_uint(struct spe_function *p, int rT, int rA, uint ui)
{
- spe_ila(p, rT, 66051);
- spe_shufb(p, rT, rA, rA, rT);
+ /* If we can, emit a single instruction, either And Byte Immediate
+ * (which uses the same constant across each byte), And Halfword Immediate
+ * (which sign-extends a 10-bit immediate to 16 bits and uses that
+ * across each halfword), or And Word Immediate (which sign-extends
+ * a 10-bit immediate to 32 bits).
+ *
+ * Otherwise, we'll need to use a temporary register.
+ */
+ uint tmp;
+
+ /* If the upper 23 bits are all 0s or all 1s, sign extension
+ * will work and we can use And Word Immediate
+ */
+ tmp = ui & 0xfffffe00;
+ if (tmp == 0xfffffe00 || tmp == 0) {
+ spe_andi(p, rT, rA, ui & 0x000003ff);
+ return;
+ }
+
+ /* If the ui field is symmetric along halfword boundaries and
+ * the upper 7 bits of each halfword are all 0s or 1s, we
+ * can use And Halfword Immediate
+ */
+ tmp = ui & 0xfe00fe00;
+ if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) {
+ spe_andhi(p, rT, rA, ui & 0x000003ff);
+ return;
+ }
+
+ /* If the ui field is symmetric in each byte, then we can use
+ * the And Byte Immediate instruction.
+ */
+ tmp = ui & 0x000000ff;
+ if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) {
+ spe_andbi(p, rT, rA, tmp);
+ return;
+ }
+
+ /* Otherwise, we'll have to use a temporary register. */
+ int tmp_reg = spe_allocate_available_register(p);
+ spe_load_uint(p, tmp_reg, ui);
+ spe_and(p, rT, rA, tmp_reg);
+ spe_release_register(p, tmp_reg);
}
+/**
+ * This function is constructed identically to spe_and_uint() above.
+ * Changes to one should be made in the other.
+ */
void
-spe_complement(struct spe_function *p, unsigned rT)
+spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui)
{
- spe_nor(p, rT, rT, rT);
+ /* If we can, emit a single instruction, either Exclusive Or Byte
+ * Immediate (which uses the same constant across each byte), Exclusive
+ * Or Halfword Immediate (which sign-extends a 10-bit immediate to
+ * 16 bits and uses that across each halfword), or Exclusive Or Word
+ * Immediate (which sign-extends a 10-bit immediate to 32 bits).
+ *
+ * Otherwise, we'll need to use a temporary register.
+ */
+ uint tmp;
+
+ /* If the upper 23 bits are all 0s or all 1s, sign extension
+ * will work and we can use Exclusive Or Word Immediate
+ */
+ tmp = ui & 0xfffffe00;
+ if (tmp == 0xfffffe00 || tmp == 0) {
+ spe_xori(p, rT, rA, ui & 0x000003ff);
+ return;
+ }
+
+ /* If the ui field is symmetric along halfword boundaries and
+ * the upper 7 bits of each halfword are all 0s or 1s, we
+ * can use Exclusive Or Halfword Immediate
+ */
+ tmp = ui & 0xfe00fe00;
+ if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) {
+ spe_xorhi(p, rT, rA, ui & 0x000003ff);
+ return;
+ }
+
+ /* If the ui field is symmetric in each byte, then we can use
+ * the Exclusive Or Byte Immediate instruction.
+ */
+ tmp = ui & 0x000000ff;
+ if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) {
+ spe_xorbi(p, rT, rA, tmp);
+ return;
+ }
+
+ /* Otherwise, we'll have to use a temporary register. */
+ int tmp_reg = spe_allocate_available_register(p);
+ spe_load_uint(p, tmp_reg, ui);
+ spe_xor(p, rT, rA, tmp_reg);
+ spe_release_register(p, tmp_reg);
+}
+
+void
+spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui)
+{
+ /* If the comparison value is 9 bits or less, it fits inside a
+ * Compare Equal Word Immediate instruction.
+ */
+ if ((ui & 0x000001ff) == ui) {
+ spe_ceqi(p, rT, rA, ui);
+ }
+ /* Otherwise, we're going to have to load a word first. */
+ else {
+ int tmp_reg = spe_allocate_available_register(p);
+ spe_load_uint(p, tmp_reg, ui);
+ spe_ceq(p, rT, rA, tmp_reg);
+ spe_release_register(p, tmp_reg);
+ }
+}
+
+void
+spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui)
+{
+ /* If the comparison value is 10 bits or less, it fits inside a
+ * Compare Logical Greater Than Word Immediate instruction.
+ */
+ if ((ui & 0x000003ff) == ui) {
+ spe_clgti(p, rT, rA, ui);
+ }
+ /* Otherwise, we're going to have to load a word first. */
+ else {
+ int tmp_reg = spe_allocate_available_register(p);
+ spe_load_uint(p, tmp_reg, ui);
+ spe_clgt(p, rT, rA, tmp_reg);
+ spe_release_register(p, tmp_reg);
+ }
+}
+
+void
+spe_splat(struct spe_function *p, int rT, int rA)
+{
+ /* Use a temporary, just in case rT == rA */
+ int tmp_reg = spe_allocate_available_register(p);
+ /* Duplicate bytes 0, 1, 2, and 3 across the whole register */
+ spe_ila(p, tmp_reg, 0x00010203);
+ spe_shufb(p, rT, rA, rA, tmp_reg);
+ spe_release_register(p, tmp_reg);
+}
+
+
+void
+spe_complement(struct spe_function *p, int rT, int rA)
+{
+ spe_nor(p, rT, rA, rA);
}
void
-spe_move(struct spe_function *p, unsigned rT, unsigned rA)
+spe_move(struct spe_function *p, int rT, int rA)
{
- spe_ori(p, rT, rA, 0);
+ /* Use different instructions depending on the instruction address
+ * to take advantage of the dual pipelines.
+ */
+ if (p->num_inst & 1)
+ spe_shlqbyi(p, rT, rA, 0); /* odd pipe */
+ else
+ spe_ori(p, rT, rA, 0); /* even pipe */
}
void
-spe_zero(struct spe_function *p, unsigned rT)
+spe_zero(struct spe_function *p, int rT)
{
spe_xor(p, rT, rT, rT);
}
+void
+spe_splat_word(struct spe_function *p, int rT, int rA, int word)
+{
+ assert(word >= 0);
+ assert(word <= 3);
+
+ if (word == 0) {
+ int tmp1 = rT;
+ spe_ila(p, tmp1, 66051);
+ spe_shufb(p, rT, rA, rA, tmp1);
+ }
+ else {
+ /* XXX review this, we may not need the rotqbyi instruction */
+ int tmp1 = rT;
+ int tmp2 = spe_allocate_available_register(p);
+
+ spe_ila(p, tmp1, 66051);
+ spe_rotqbyi(p, tmp2, rA, 4 * word);
+ spe_shufb(p, rT, tmp2, tmp2, tmp1);
+
+ spe_release_register(p, tmp2);
+ }
+}
+
+/**
+ * For each 32-bit float element of rA and rB, choose the smaller of the
+ * two, compositing them into the rT register.
+ *
+ * The Float Compare Greater Than (fcgt) instruction will put 1s into
+ * compare_reg where rA > rB, and 0s where rA <= rB.
+ *
+ * Then the Select Bits (selb) instruction will take bits from rA where
+ * compare_reg is 0, and from rB where compare_reg is 1; i.e., from rA
+ * where rA <= rB and from rB where rB > rA, which is exactly the
+ * "min" operation.
+ *
+ * The compare_reg could in many cases be the same as rT, unless
+ * rT == rA || rt == rB. But since this is common in constructions
+ * like "x = min(x, a)", we always allocate a new register to be safe.
+ */
+void
+spe_float_min(struct spe_function *p, int rT, int rA, int rB)
+{
+ int compare_reg = spe_allocate_available_register(p);
+ spe_fcgt(p, compare_reg, rA, rB);
+ spe_selb(p, rT, rA, rB, compare_reg);
+ spe_release_register(p, compare_reg);
+}
+
+/**
+ * For each 32-bit float element of rA and rB, choose the greater of the
+ * two, compositing them into the rT register.
+ *
+ * The logic is similar to that of spe_float_min() above; the only
+ * difference is that the registers on spe_selb() have been reversed,
+ * so that the larger of the two is selected instead of the smaller.
+ */
+void
+spe_float_max(struct spe_function *p, int rT, int rA, int rB)
+{
+ int compare_reg = spe_allocate_available_register(p);
+ spe_fcgt(p, compare_reg, rA, rB);
+ spe_selb(p, rT, rB, rA, compare_reg);
+ spe_release_register(p, compare_reg);
+}
+
#endif /* GALLIUM_CELL */
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
index d95e5aace3..65d9c77415 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
@@ -28,6 +28,7 @@
* For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf
*
* \author Ian Romanick <idr@us.ibm.com>
+ * \author Brian Paul
*/
#ifndef RTASM_PPC_SPE_H
@@ -39,10 +40,10 @@
/** number of general-purpose SIMD registers */
#define SPE_NUM_REGS 128
-/** Return Address register */
+/** Return Address register (aka $lr / Link Register) */
#define SPE_REG_RA 0
-/** Stack Pointer register */
+/** Stack Pointer register (aka $sp) */
#define SPE_REG_SP 1
@@ -52,308 +53,371 @@ struct spe_function
uint num_inst;
uint max_inst;
- /**
- * Mask of used / unused registers
- *
- * Each set bit corresponds to an available register. Each cleared bit
- * corresponds to an allocated register.
+ /**
+ * The "set count" reflects the number of nested register sets
+ * are allowed. In the unlikely case that we exceed the set count,
+ * register allocation will start to be confused, which is critical
+ * enough that we check for it.
+ */
+ unsigned char set_count;
+
+ /**
+ * Flags for used and unused registers. Each byte corresponds to a
+ * register; a 0 in that byte means that the register is available.
+ * A value of 1 means that the register was allocated in the current
+ * register set. Any other value N means that the register was allocated
+ * N register sets ago.
*
* \sa
* spe_allocate_register, spe_allocate_available_register,
- * spe_release_register
+ * spe_allocate_register_set, spe_release_register_set, spe_release_register,
*/
- uint64_t regs[SPE_NUM_REGS / 64];
+ unsigned char regs[SPE_NUM_REGS];
+
+ boolean print; /**< print/dump instructions as they're emitted? */
+ int indent; /**< number of spaces to indent */
};
-extern void spe_init_func(struct spe_function *p, unsigned code_size);
+
+extern void spe_init_func(struct spe_function *p, uint code_size);
extern void spe_release_func(struct spe_function *p);
+extern uint spe_code_size(const struct spe_function *p);
extern int spe_allocate_available_register(struct spe_function *p);
extern int spe_allocate_register(struct spe_function *p, int reg);
extern void spe_release_register(struct spe_function *p, int reg);
+extern void spe_allocate_register_set(struct spe_function *p);
+extern void spe_release_register_set(struct spe_function *p);
+
+extern uint spe_get_registers_used(const struct spe_function *p, ubyte used[]);
+
+extern void spe_print_code(struct spe_function *p, boolean enable);
+extern void spe_indent(struct spe_function *p, int spaces);
+extern void spe_comment(struct spe_function *p, int rel_indent, const char *s);
+
#endif /* RTASM_PPC_SPE_H */
-#ifndef EMIT_
-#define EMIT_(name, _op) \
- extern void _name (struct spe_function *p, unsigned rT)
+#ifndef EMIT
+#define EMIT(_name, _op) \
+ extern void _name (struct spe_function *p);
+#define EMIT_(_name, _op) \
+ extern void _name (struct spe_function *p, int rT);
#define EMIT_R(_name, _op) \
- extern void _name (struct spe_function *p, unsigned rT, unsigned rA)
+ extern void _name (struct spe_function *p, int rT, int rA);
#define EMIT_RR(_name, _op) \
- extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
- unsigned rB)
+ extern void _name (struct spe_function *p, int rT, int rA, int rB);
#define EMIT_RRR(_name, _op) \
- extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
- unsigned rB, unsigned rC)
+ extern void _name (struct spe_function *p, int rT, int rA, int rB, int rC);
#define EMIT_RI7(_name, _op) \
- extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
- int imm)
+ extern void _name (struct spe_function *p, int rT, int rA, int imm);
#define EMIT_RI8(_name, _op, bias) \
- extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
- int imm)
+ extern void _name (struct spe_function *p, int rT, int rA, int imm);
#define EMIT_RI10(_name, _op) \
- extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
- int imm)
+ extern void _name (struct spe_function *p, int rT, int rA, int imm);
+#define EMIT_RI10s(_name, _op) \
+ extern void _name (struct spe_function *p, int rT, int rA, int imm);
#define EMIT_RI16(_name, _op) \
- extern void _name (struct spe_function *p, unsigned rT, int imm)
+ extern void _name (struct spe_function *p, int rT, int imm);
#define EMIT_RI18(_name, _op) \
- extern void _name (struct spe_function *p, unsigned rT, int imm)
+ extern void _name (struct spe_function *p, int rT, int imm);
#define EMIT_I16(_name, _op) \
- extern void _name (struct spe_function *p, int imm)
+ extern void _name (struct spe_function *p, int imm);
#define UNDEF_EMIT_MACROS
-#endif /* EMIT_ */
+#endif /* EMIT */
/* Memory load / store instructions
*/
-EMIT_RI10(spe_lqd, 0x034);
-EMIT_RR (spe_lqx, 0x1c4);
-EMIT_RI16(spe_lqa, 0x061);
-EMIT_RI16(spe_lqr, 0x067);
-EMIT_RI10(spe_stqd, 0x024);
-EMIT_RR (spe_stqx, 0x144);
-EMIT_RI16(spe_stqa, 0x041);
-EMIT_RI16(spe_stqr, 0x047);
-EMIT_RI7 (spe_cbd, 0x1f4);
-EMIT_RR (spe_cbx, 0x1d4);
-EMIT_RI7 (spe_chd, 0x1f5);
-EMIT_RI7 (spe_chx, 0x1d5);
-EMIT_RI7 (spe_cwd, 0x1f6);
-EMIT_RI7 (spe_cwx, 0x1d6);
-EMIT_RI7 (spe_cdd, 0x1f7);
-EMIT_RI7 (spe_cdx, 0x1d7);
+EMIT_RR (spe_lqx, 0x1c4)
+EMIT_RI16(spe_lqa, 0x061)
+EMIT_RI16(spe_lqr, 0x067)
+EMIT_RR (spe_stqx, 0x144)
+EMIT_RI16(spe_stqa, 0x041)
+EMIT_RI16(spe_stqr, 0x047)
+EMIT_RI7 (spe_cbd, 0x1f4)
+EMIT_RR (spe_cbx, 0x1d4)
+EMIT_RI7 (spe_chd, 0x1f5)
+EMIT_RI7 (spe_chx, 0x1d5)
+EMIT_RI7 (spe_cwd, 0x1f6)
+EMIT_RI7 (spe_cwx, 0x1d6)
+EMIT_RI7 (spe_cdd, 0x1f7)
+EMIT_RI7 (spe_cdx, 0x1d7)
/* Constant formation instructions
*/
-EMIT_RI16(spe_ilh, 0x083);
-EMIT_RI16(spe_ilhu, 0x082);
-EMIT_RI16(spe_il, 0x081);
-EMIT_RI18(spe_ila, 0x021);
-EMIT_RI16(spe_iohl, 0x0c1);
-EMIT_RI16(spe_fsmbi, 0x065);
+EMIT_RI16(spe_ilh, 0x083)
+EMIT_RI16(spe_ilhu, 0x082)
+EMIT_RI16(spe_il, 0x081)
+EMIT_RI18(spe_ila, 0x021)
+EMIT_RI16(spe_iohl, 0x0c1)
+EMIT_RI16(spe_fsmbi, 0x065)
/* Integer and logical instructions
*/
-EMIT_RR (spe_ah, 0x0c8);
-EMIT_RI10(spe_ahi, 0x01d);
-EMIT_RR (spe_a, 0x0c0);
-EMIT_RI10(spe_ai, 0x01c);
-EMIT_RR (spe_sfh, 0x048);
-EMIT_RI10(spe_sfhi, 0x00d);
-EMIT_RR (spe_sf, 0x040);
-EMIT_RI10(spe_sfi, 0x00c);
-EMIT_RR (spe_addx, 0x340);
-EMIT_RR (spe_cg, 0x0c2);
-EMIT_RR (spe_cgx, 0x342);
-EMIT_RR (spe_sfx, 0x341);
-EMIT_RR (spe_bg, 0x042);
-EMIT_RR (spe_bgx, 0x343);
-EMIT_RR (spe_mpy, 0x3c4);
-EMIT_RR (spe_mpyu, 0x3cc);
-EMIT_RI10(spe_mpyi, 0x074);
-EMIT_RI10(spe_mpyui, 0x075);
-EMIT_RRR (spe_mpya, 0x00c);
-EMIT_RR (spe_mpyh, 0x3c5);
-EMIT_RR (spe_mpys, 0x3c7);
-EMIT_RR (spe_mpyhh, 0x3c6);
-EMIT_RR (spe_mpyhha, 0x346);
-EMIT_RR (spe_mpyhhu, 0x3ce);
-EMIT_RR (spe_mpyhhau, 0x34e);
-EMIT_R (spe_clz, 0x2a5);
-EMIT_R (spe_cntb, 0x2b4);
-EMIT_R (spe_fsmb, 0x1b6);
-EMIT_R (spe_fsmh, 0x1b5);
-EMIT_R (spe_fsm, 0x1b4);
-EMIT_R (spe_gbb, 0x1b2);
-EMIT_R (spe_gbh, 0x1b1);
-EMIT_R (spe_gb, 0x1b0);
-EMIT_RR (spe_avgb, 0x0d3);
-EMIT_RR (spe_absdb, 0x053);
-EMIT_RR (spe_sumb, 0x253);
-EMIT_R (spe_xsbh, 0x2b6);
-EMIT_R (spe_xshw, 0x2ae);
-EMIT_R (spe_xswd, 0x2a6);
-EMIT_RR (spe_and, 0x0c1);
-EMIT_RR (spe_andc, 0x2c1);
-EMIT_RI10(spe_andbi, 0x016);
-EMIT_RI10(spe_andhi, 0x015);
-EMIT_RI10(spe_andi, 0x014);
-EMIT_RR (spe_or, 0x041);
-EMIT_RR (spe_orc, 0x2c9);
-EMIT_RI10(spe_orbi, 0x006);
-EMIT_RI10(spe_orhi, 0x005);
-EMIT_RI10(spe_ori, 0x004);
-EMIT_R (spe_orx, 0x1f0);
-EMIT_RR (spe_xor, 0x241);
-EMIT_RI10(spe_xorbi, 0x026);
-EMIT_RI10(spe_xorhi, 0x025);
-EMIT_RI10(spe_xori, 0x024);
-EMIT_RR (spe_nand, 0x0c9);
-EMIT_RR (spe_nor, 0x049);
-EMIT_RR (spe_eqv, 0x249);
-EMIT_RRR (spe_selb, 0x008);
-EMIT_RRR (spe_shufb, 0x00b);
+EMIT_RR (spe_ah, 0x0c8)
+EMIT_RI10(spe_ahi, 0x01d)
+EMIT_RR (spe_a, 0x0c0)
+EMIT_RI10s(spe_ai, 0x01c)
+EMIT_RR (spe_sfh, 0x048)
+EMIT_RI10(spe_sfhi, 0x00d)
+EMIT_RR (spe_sf, 0x040)
+EMIT_RI10(spe_sfi, 0x00c)
+EMIT_RR (spe_addx, 0x340)
+EMIT_RR (spe_cg, 0x0c2)
+EMIT_RR (spe_cgx, 0x342)
+EMIT_RR (spe_sfx, 0x341)
+EMIT_RR (spe_bg, 0x042)
+EMIT_RR (spe_bgx, 0x343)
+EMIT_RR (spe_mpy, 0x3c4)
+EMIT_RR (spe_mpyu, 0x3cc)
+EMIT_RI10(spe_mpyi, 0x074)
+EMIT_RI10(spe_mpyui, 0x075)
+EMIT_RRR (spe_mpya, 0x00c)
+EMIT_RR (spe_mpyh, 0x3c5)
+EMIT_RR (spe_mpys, 0x3c7)
+EMIT_RR (spe_mpyhh, 0x3c6)
+EMIT_RR (spe_mpyhha, 0x346)
+EMIT_RR (spe_mpyhhu, 0x3ce)
+EMIT_RR (spe_mpyhhau, 0x34e)
+EMIT_R (spe_clz, 0x2a5)
+EMIT_R (spe_cntb, 0x2b4)
+EMIT_R (spe_fsmb, 0x1b6)
+EMIT_R (spe_fsmh, 0x1b5)
+EMIT_R (spe_fsm, 0x1b4)
+EMIT_R (spe_gbb, 0x1b2)
+EMIT_R (spe_gbh, 0x1b1)
+EMIT_R (spe_gb, 0x1b0)
+EMIT_RR (spe_avgb, 0x0d3)
+EMIT_RR (spe_absdb, 0x053)
+EMIT_RR (spe_sumb, 0x253)
+EMIT_R (spe_xsbh, 0x2b6)
+EMIT_R (spe_xshw, 0x2ae)
+EMIT_R (spe_xswd, 0x2a6)
+EMIT_RR (spe_and, 0x0c1)
+EMIT_RR (spe_andc, 0x2c1)
+EMIT_RI10s(spe_andbi, 0x016)
+EMIT_RI10s(spe_andhi, 0x015)
+EMIT_RI10s(spe_andi, 0x014)
+EMIT_RR (spe_or, 0x041)
+EMIT_RR (spe_orc, 0x2c9)
+EMIT_RI10s(spe_orbi, 0x006)
+EMIT_RI10s(spe_orhi, 0x005)
+EMIT_RI10s(spe_ori, 0x004)
+EMIT_R (spe_orx, 0x1f0)
+EMIT_RR (spe_xor, 0x241)
+EMIT_RI10s(spe_xorbi, 0x046)
+EMIT_RI10s(spe_xorhi, 0x045)
+EMIT_RI10s(spe_xori, 0x044)
+EMIT_RR (spe_nand, 0x0c9)
+EMIT_RR (spe_nor, 0x049)
+EMIT_RR (spe_eqv, 0x249)
+EMIT_RRR (spe_selb, 0x008)
+EMIT_RRR (spe_shufb, 0x00b)
/* Shift and rotate instructions
*/
-EMIT_RR (spe_shlh, 0x05f);
-EMIT_RI7 (spe_shlhi, 0x07f);
-EMIT_RR (spe_shl, 0x05b);
-EMIT_RI7 (spe_shli, 0x07b);
-EMIT_RR (spe_shlqbi, 0x1db);
-EMIT_RI7 (spe_shlqbii, 0x1fb);
-EMIT_RR (spe_shlqby, 0x1df);
-EMIT_RI7 (spe_shlqbyi, 0x1ff);
-EMIT_RR (spe_shlqbybi, 0x1cf);
-EMIT_RR (spe_roth, 0x05c);
-EMIT_RI7 (spe_rothi, 0x07c);
-EMIT_RR (spe_rot, 0x058);
-EMIT_RI7 (spe_roti, 0x078);
-EMIT_RR (spe_rotqby, 0x1dc);
-EMIT_RI7 (spe_rotqbyi, 0x1fc);
-EMIT_RR (spe_rotqbybi, 0x1cc);
-EMIT_RR (spe_rotqbi, 0x1d8);
-EMIT_RI7 (spe_rotqbii, 0x1f8);
-EMIT_RR (spe_rothm, 0x05d);
-EMIT_RI7 (spe_rothmi, 0x07d);
-EMIT_RR (spe_rotm, 0x059);
-EMIT_RI7 (spe_rotmi, 0x079);
-EMIT_RR (spe_rotqmby, 0x1dd);
-EMIT_RI7 (spe_rotqmbyi, 0x1fd);
-EMIT_RR (spe_rotqmbybi, 0x1cd);
-EMIT_RR (spe_rotqmbi, 0x1c9);
-EMIT_RI7 (spe_rotqmbii, 0x1f9);
-EMIT_RR (spe_rotmah, 0x05e);
-EMIT_RI7 (spe_rotmahi, 0x07e);
-EMIT_RR (spe_rotma, 0x05a);
-EMIT_RI7 (spe_rotmai, 0x07a);
+EMIT_RR (spe_shlh, 0x05f)
+EMIT_RI7 (spe_shlhi, 0x07f)
+EMIT_RR (spe_shl, 0x05b)
+EMIT_RI7 (spe_shli, 0x07b)
+EMIT_RR (spe_shlqbi, 0x1db)
+EMIT_RI7 (spe_shlqbii, 0x1fb)
+EMIT_RR (spe_shlqby, 0x1df)
+EMIT_RI7 (spe_shlqbyi, 0x1ff)
+EMIT_RR (spe_shlqbybi, 0x1cf)
+EMIT_RR (spe_roth, 0x05c)
+EMIT_RI7 (spe_rothi, 0x07c)
+EMIT_RR (spe_rot, 0x058)
+EMIT_RI7 (spe_roti, 0x078)
+EMIT_RR (spe_rotqby, 0x1dc)
+EMIT_RI7 (spe_rotqbyi, 0x1fc)
+EMIT_RR (spe_rotqbybi, 0x1cc)
+EMIT_RR (spe_rotqbi, 0x1d8)
+EMIT_RI7 (spe_rotqbii, 0x1f8)
+EMIT_RR (spe_rothm, 0x05d)
+EMIT_RI7 (spe_rothmi, 0x07d)
+EMIT_RR (spe_rotm, 0x059)
+EMIT_RI7 (spe_rotmi, 0x079)
+EMIT_RR (spe_rotqmby, 0x1dd)
+EMIT_RI7 (spe_rotqmbyi, 0x1fd)
+EMIT_RR (spe_rotqmbybi, 0x1cd)
+EMIT_RR (spe_rotqmbi, 0x1c9)
+EMIT_RI7 (spe_rotqmbii, 0x1f9)
+EMIT_RR (spe_rotmah, 0x05e)
+EMIT_RI7 (spe_rotmahi, 0x07e)
+EMIT_RR (spe_rotma, 0x05a)
+EMIT_RI7 (spe_rotmai, 0x07a)
/* Compare, branch, and halt instructions
*/
-EMIT_RR (spe_heq, 0x3d8);
-EMIT_RI10(spe_heqi, 0x07f);
-EMIT_RR (spe_hgt, 0x258);
-EMIT_RI10(spe_hgti, 0x04f);
-EMIT_RR (spe_hlgt, 0x2d8);
-EMIT_RI10(spe_hlgti, 0x05f);
-EMIT_RR (spe_ceqb, 0x3d0);
-EMIT_RI10(spe_ceqbi, 0x07e);
-EMIT_RR (spe_ceqh, 0x3c8);
-EMIT_RI10(spe_ceqhi, 0x07d);
-EMIT_RR (spe_ceq, 0x3c0);
-EMIT_RI10(spe_ceqi, 0x07c);
-EMIT_RR (spe_cgtb, 0x250);
-EMIT_RI10(spe_cgtbi, 0x04e);
-EMIT_RR (spe_cgth, 0x248);
-EMIT_RI10(spe_cgthi, 0x04d);
-EMIT_RR (spe_cgt, 0x240);
-EMIT_RI10(spe_cgti, 0x04c);
-EMIT_RR (spe_clgtb, 0x2d0);
-EMIT_RI10(spe_clgtbi, 0x05e);
-EMIT_RR (spe_clgth, 0x2c8);
-EMIT_RI10(spe_clgthi, 0x05d);
-EMIT_RR (spe_clgt, 0x2c0);
-EMIT_RI10(spe_clgti, 0x05c);
-EMIT_I16 (spe_br, 0x064);
-EMIT_I16 (spe_bra, 0x060);
-EMIT_RI16(spe_brsl, 0x066);
-EMIT_RI16(spe_brasl, 0x062);
-EMIT_RI16(spe_brnz, 0x042);
-EMIT_RI16(spe_brz, 0x040);
-EMIT_RI16(spe_brhnz, 0x046);
-EMIT_RI16(spe_brhz, 0x044);
-
-extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e);
-extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e);
-extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA,
+EMIT_RR (spe_heq, 0x3d8)
+EMIT_RI10(spe_heqi, 0x07f)
+EMIT_RR (spe_hgt, 0x258)
+EMIT_RI10(spe_hgti, 0x04f)
+EMIT_RR (spe_hlgt, 0x2d8)
+EMIT_RI10(spe_hlgti, 0x05f)
+EMIT_RR (spe_ceqb, 0x3d0)
+EMIT_RI10(spe_ceqbi, 0x07e)
+EMIT_RR (spe_ceqh, 0x3c8)
+EMIT_RI10(spe_ceqhi, 0x07d)
+EMIT_RR (spe_ceq, 0x3c0)
+EMIT_RI10(spe_ceqi, 0x07c)
+EMIT_RR (spe_cgtb, 0x250)
+EMIT_RI10(spe_cgtbi, 0x04e)
+EMIT_RR (spe_cgth, 0x248)
+EMIT_RI10(spe_cgthi, 0x04d)
+EMIT_RR (spe_cgt, 0x240)
+EMIT_RI10(spe_cgti, 0x04c)
+EMIT_RR (spe_clgtb, 0x2d0)
+EMIT_RI10(spe_clgtbi, 0x05e)
+EMIT_RR (spe_clgth, 0x2c8)
+EMIT_RI10(spe_clgthi, 0x05d)
+EMIT_RR (spe_clgt, 0x2c0)
+EMIT_RI10(spe_clgti, 0x05c)
+EMIT_I16 (spe_br, 0x064)
+EMIT_I16 (spe_bra, 0x060)
+EMIT_RI16(spe_brsl, 0x066)
+EMIT_RI16(spe_brasl, 0x062)
+EMIT_RI16(spe_brnz, 0x042)
+EMIT_RI16(spe_brz, 0x040)
+EMIT_RI16(spe_brhnz, 0x046)
+EMIT_RI16(spe_brhz, 0x044)
+
+/* Control instructions
+ */
+EMIT (spe_lnop, 0x001)
+
+extern void
+spe_lqd(struct spe_function *p, int rT, int rA, int offset);
+
+extern void
+spe_stqd(struct spe_function *p, int rT, int rA, int offset);
+
+extern void spe_bi(struct spe_function *p, int rA, int d, int e);
+extern void spe_iret(struct spe_function *p, int rA, int d, int e);
+extern void spe_bisled(struct spe_function *p, int rT, int rA,
int d, int e);
-extern void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA,
+extern void spe_bisl(struct spe_function *p, int rT, int rA,
int d, int e);
-extern void spe_biz(struct spe_function *p, unsigned rT, unsigned rA,
+extern void spe_biz(struct spe_function *p, int rT, int rA,
int d, int e);
-extern void spe_binz(struct spe_function *p, unsigned rT, unsigned rA,
+extern void spe_binz(struct spe_function *p, int rT, int rA,
int d, int e);
-extern void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA,
+extern void spe_bihz(struct spe_function *p, int rT, int rA,
int d, int e);
-extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA,
+extern void spe_bihnz(struct spe_function *p, int rT, int rA,
int d, int e);
/** Load/splat immediate float into rT. */
extern void
-spe_load_float(struct spe_function *p, unsigned rT, float x);
+spe_load_float(struct spe_function *p, int rT, float x);
/** Load/splat immediate int into rT. */
extern void
-spe_load_int(struct spe_function *p, unsigned rT, int i);
+spe_load_int(struct spe_function *p, int rT, int i);
+
+/** Load/splat immediate unsigned int into rT. */
+extern void
+spe_load_uint(struct spe_function *p, int rT, uint ui);
+
+/** And immediate value into rT. */
+extern void
+spe_and_uint(struct spe_function *p, int rT, int rA, uint ui);
+
+/** Xor immediate value into rT. */
+extern void
+spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui);
+
+/** Compare equal with immediate value. */
+extern void
+spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui);
+
+/** Compare greater with immediate value. */
+extern void
+spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui);
/** Replicate word 0 of rA across rT. */
extern void
-spe_splat(struct spe_function *p, unsigned rT, unsigned rA);
+spe_splat(struct spe_function *p, int rT, int rA);
-/** Complement/invert all bits in rT. */
+/** rT = complement_all_bits(rA). */
extern void
-spe_complement(struct spe_function *p, unsigned rT);
+spe_complement(struct spe_function *p, int rT, int rA);
/** rT = rA. */
extern void
-spe_move(struct spe_function *p, unsigned rT, unsigned rA);
+spe_move(struct spe_function *p, int rT, int rA);
/** rT = {0,0,0,0}. */
extern void
-spe_zero(struct spe_function *p, unsigned rT);
+spe_zero(struct spe_function *p, int rT);
+
+/** rT = splat(rA, word) */
+extern void
+spe_splat_word(struct spe_function *p, int rT, int rA, int word);
+
+/** rT = float min(rA, rB) */
+extern void
+spe_float_min(struct spe_function *p, int rT, int rA, int rB);
+
+/** rT = float max(rA, rB) */
+extern void
+spe_float_max(struct spe_function *p, int rT, int rA, int rB);
/* Floating-point instructions
*/
-EMIT_RR (spe_fa, 0x2c4);
-EMIT_RR (spe_dfa, 0x2cc);
-EMIT_RR (spe_fs, 0x2c5);
-EMIT_RR (spe_dfs, 0x2cd);
-EMIT_RR (spe_fm, 0x2c6);
-EMIT_RR (spe_dfm, 0x2ce);
-EMIT_RRR (spe_fma, 0x00e);
-EMIT_RR (spe_dfma, 0x35c);
-EMIT_RRR (spe_fnms, 0x00d);
-EMIT_RR (spe_dfnms, 0x35e);
-EMIT_RRR (spe_fms, 0x00f);
-EMIT_RR (spe_dfms, 0x35d);
-EMIT_RR (spe_dfnma, 0x35f);
-EMIT_R (spe_frest, 0x1b8);
-EMIT_R (spe_frsqest, 0x1b9);
-EMIT_RR (spe_fi, 0x3d4);
-EMIT_RI8 (spe_csflt, 0x1da, 155);
-EMIT_RI8 (spe_cflts, 0x1d8, 173);
-EMIT_RI8 (spe_cuflt, 0x1db, 155);
-EMIT_RI8 (spe_cfltu, 0x1d9, 173);
-EMIT_R (spe_frds, 0x3b9);
-EMIT_R (spe_fesd, 0x3b8);
-EMIT_RR (spe_dfceq, 0x3c3);
-EMIT_RR (spe_dfcmeq, 0x3cb);
-EMIT_RR (spe_dfcgt, 0x2c3);
-EMIT_RR (spe_dfcmgt, 0x2cb);
-EMIT_RI7 (spe_dftsv, 0x3bf);
-EMIT_RR (spe_fceq, 0x3c2);
-EMIT_RR (spe_fcmeq, 0x3ca);
-EMIT_RR (spe_fcgt, 0x2c2);
-EMIT_RR (spe_fcmgt, 0x2ca);
-EMIT_R (spe_fscrwr, 0x3ba);
-EMIT_ (spe_fscrrd, 0x398);
+EMIT_RR (spe_fa, 0x2c4)
+EMIT_RR (spe_dfa, 0x2cc)
+EMIT_RR (spe_fs, 0x2c5)
+EMIT_RR (spe_dfs, 0x2cd)
+EMIT_RR (spe_fm, 0x2c6)
+EMIT_RR (spe_dfm, 0x2ce)
+EMIT_RRR (spe_fma, 0x00e)
+EMIT_RR (spe_dfma, 0x35c)
+EMIT_RRR (spe_fnms, 0x00d)
+EMIT_RR (spe_dfnms, 0x35e)
+EMIT_RRR (spe_fms, 0x00f)
+EMIT_RR (spe_dfms, 0x35d)
+EMIT_RR (spe_dfnma, 0x35f)
+EMIT_R (spe_frest, 0x1b8)
+EMIT_R (spe_frsqest, 0x1b9)
+EMIT_RR (spe_fi, 0x3d4)
+EMIT_RI8 (spe_csflt, 0x1da, 155)
+EMIT_RI8 (spe_cflts, 0x1d8, 173)
+EMIT_RI8 (spe_cuflt, 0x1db, 155)
+EMIT_RI8 (spe_cfltu, 0x1d9, 173)
+EMIT_R (spe_frds, 0x3b9)
+EMIT_R (spe_fesd, 0x3b8)
+EMIT_RR (spe_dfceq, 0x3c3)
+EMIT_RR (spe_dfcmeq, 0x3cb)
+EMIT_RR (spe_dfcgt, 0x2c3)
+EMIT_RR (spe_dfcmgt, 0x2cb)
+EMIT_RI7 (spe_dftsv, 0x3bf)
+EMIT_RR (spe_fceq, 0x3c2)
+EMIT_RR (spe_fcmeq, 0x3ca)
+EMIT_RR (spe_fcgt, 0x2c2)
+EMIT_RR (spe_fcmgt, 0x2ca)
+EMIT_R (spe_fscrwr, 0x3ba)
+EMIT_ (spe_fscrrd, 0x398)
/* Channel instructions
*/
-EMIT_R (spe_rdch, 0x00d);
-EMIT_R (spe_rdchcnt, 0x00f);
-EMIT_R (spe_wrch, 0x10d);
+EMIT_R (spe_rdch, 0x00d)
+EMIT_R (spe_rdchcnt, 0x00f)
+EMIT_R (spe_wrch, 0x10d)
#ifdef UNDEF_EMIT_MACROS
+#undef EMIT
#undef EMIT_
#undef EMIT_R
#undef EMIT_RR
@@ -361,6 +425,7 @@ EMIT_R (spe_wrch, 0x10d);
#undef EMIT_RI7
#undef EMIT_RI8
#undef EMIT_RI10
+#undef EMIT_RI10s
#undef EMIT_RI16
#undef EMIT_RI18
#undef EMIT_I16
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index ad9d8f8ced..57fcf6de2a 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -26,7 +26,7 @@
#if defined(PIPE_ARCH_X86)
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_pointer.h"
#include "rtasm_execmem.h"
@@ -240,7 +240,8 @@ static void emit_modrm( struct x86_function *p,
/* Oh-oh we've stumbled into the SIB thing.
*/
if (regmem.file == file_REG32 &&
- regmem.idx == reg_SP) {
+ regmem.idx == reg_SP &&
+ regmem.mod != mod_REG) {
emit_1ub(p, 0x24); /* simplistic! */
}
@@ -439,25 +440,70 @@ void x86_call( struct x86_function *p, struct x86_reg reg)
}
-/* michal:
- * Temporary. As I need immediate operands, and dont want to mess with the codegen,
- * I load the immediate into general purpose register and use it.
- */
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
{
DUMP_RI( dst, imm );
+ assert(dst.file == file_REG32);
assert(dst.mod == mod_REG);
emit_1ub(p, 0xb8 + dst.idx);
emit_1i(p, imm);
}
-void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm )
+/**
+ * Immediate group 1 instructions.
+ */
+static INLINE void
+x86_group1_imm( struct x86_function *p,
+ unsigned op, struct x86_reg dst, int imm )
{
- DUMP_RI( dst, imm );
+ assert(dst.file == file_REG32);
assert(dst.mod == mod_REG);
- emit_1ub(p, 0x80);
- emit_modrm_noreg(p, 0, dst);
- emit_1ub(p, imm);
+ if(-0x80 <= imm && imm < 0x80) {
+ emit_1ub(p, 0x83);
+ emit_modrm_noreg(p, op, dst);
+ emit_1b(p, (char)imm);
+ }
+ else {
+ emit_1ub(p, 0x81);
+ emit_modrm_noreg(p, op, dst);
+ emit_1i(p, imm);
+ }
+}
+
+void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+ DUMP_RI( dst, imm );
+ x86_group1_imm(p, 0, dst, imm);
+}
+
+void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+ DUMP_RI( dst, imm );
+ x86_group1_imm(p, 1, dst, imm);
+}
+
+void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+ DUMP_RI( dst, imm );
+ x86_group1_imm(p, 4, dst, imm);
+}
+
+void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+ DUMP_RI( dst, imm );
+ x86_group1_imm(p, 5, dst, imm);
+}
+
+void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+ DUMP_RI( dst, imm );
+ x86_group1_imm(p, 6, dst, imm);
+}
+
+void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+ DUMP_RI( dst, imm );
+ x86_group1_imm(p, 7, dst, imm);
}
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index af79f07dd3..1b5eaaca85 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -152,12 +152,13 @@ void x86_jmp( struct x86_function *p, int label );
/* void x86_call( struct x86_function *p, void (*label)() ); */
void x86_call( struct x86_function *p, struct x86_reg reg);
-/* michal:
- * Temporary. As I need immediate operands, and dont want to mess with the codegen,
- * I load the immediate into general purpose register and use it.
- */
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );
-void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm );
+void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm );
+void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm );
+void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm );
+void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm );
+void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm );
+void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm );
/* Macro for sse_shufps() and sse2_pshufd():
diff --git a/src/gallium/auxiliary/sct/Makefile b/src/gallium/auxiliary/sct/Makefile
index 516d1756cf..a7d111b689 100644
--- a/src/gallium/auxiliary/sct/Makefile
+++ b/src/gallium/auxiliary/sct/Makefile
@@ -7,6 +7,3 @@ C_SOURCES = \
sct.c
include ../../Makefile.template
-
-symlinks:
-
diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
index c7155a9316..b4900e8dba 100644
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ b/src/gallium/auxiliary/tgsi/Makefile
@@ -11,6 +11,7 @@ C_SOURCES = \
tgsi_info.c \
tgsi_iterate.c \
tgsi_parse.c \
+ tgsi_ppc.c \
tgsi_scan.c \
tgsi_sse2.c \
tgsi_text.c \
@@ -18,6 +19,3 @@ C_SOURCES = \
tgsi_util.c
include ../../Makefile.template
-
-symlinks:
-
diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript
index 45bf3f6d57..8200cce42f 100644
--- a/src/gallium/auxiliary/tgsi/SConscript
+++ b/src/gallium/auxiliary/tgsi/SConscript
@@ -12,6 +12,7 @@ tgsi = env.ConvenienceLibrary(
'tgsi_parse.c',
'tgsi_sanity.c',
'tgsi_scan.c',
+ 'tgsi_ppc.c',
'tgsi_sse2.c',
'tgsi_text.c',
'tgsi_transform.c',
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index ed8fc5ac25..a1891a140a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi_build.h"
#include "tgsi_parse.h"
@@ -114,7 +114,7 @@ tgsi_default_declaration( void )
struct tgsi_declaration declaration;
declaration.Type = TGSI_TOKEN_TYPE_DECLARATION;
- declaration.Size = 1;
+ declaration.NrTokens = 1;
declaration.File = TGSI_FILE_NULL;
declaration.UsageMask = TGSI_WRITEMASK_XYZW;
declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT;
@@ -160,9 +160,9 @@ declaration_grow(
struct tgsi_declaration *declaration,
struct tgsi_header *header )
{
- assert( declaration->Size < 0xFF );
+ assert( declaration->NrTokens < 0xFF );
- declaration->Size++;
+ declaration->NrTokens++;
header_bodysize_grow( header );
}
@@ -308,7 +308,7 @@ tgsi_default_immediate( void )
struct tgsi_immediate immediate;
immediate.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
- immediate.Size = 1;
+ immediate.NrTokens = 1;
immediate.DataType = TGSI_IMM_FLOAT32;
immediate.Padding = 0;
immediate.Extended = 0;
@@ -345,9 +345,9 @@ immediate_grow(
struct tgsi_immediate *immediate,
struct tgsi_header *header )
{
- assert( immediate->Size < 0xFF );
+ assert( immediate->NrTokens < 0xFF );
- immediate->Size++;
+ immediate->NrTokens++;
header_bodysize_grow( header );
}
@@ -384,7 +384,7 @@ tgsi_build_full_immediate(
*immediate = tgsi_build_immediate( header );
- for( i = 0; i < full_imm->Immediate.Size - 1; i++ ) {
+ for( i = 0; i < full_imm->Immediate.NrTokens - 1; i++ ) {
struct tgsi_immediate_float32 *if32;
if( maxsize <= size )
@@ -411,7 +411,7 @@ tgsi_default_instruction( void )
struct tgsi_instruction instruction;
instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION;
- instruction.Size = 1;
+ instruction.NrTokens = 1;
instruction.Opcode = TGSI_OPCODE_MOV;
instruction.Saturate = TGSI_SAT_NONE;
instruction.NumDstRegs = 1;
@@ -453,9 +453,9 @@ instruction_grow(
struct tgsi_instruction *instruction,
struct tgsi_header *header )
{
- assert (instruction->Size < 0xFF);
+ assert (instruction->NrTokens < 0xFF);
- instruction->Size++;
+ instruction->NrTokens++;
header_bodysize_grow( header );
}
@@ -801,10 +801,14 @@ tgsi_default_instruction_ext_nv( void )
return instruction_ext_nv;
}
-union token_u32
+
+/** test for inequality of 32-bit values pointed to by a and b */
+static INLINE boolean
+compare32(const void *a, const void *b)
{
- unsigned u32;
-};
+ return *((uint32_t *) a) != *((uint32_t *) b);
+}
+
unsigned
tgsi_compare_instruction_ext_nv(
@@ -813,7 +817,7 @@ tgsi_compare_instruction_ext_nv(
{
a.Padding = b.Padding = 0;
a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+ return compare32(&a, &b);
}
struct tgsi_instruction_ext_nv
@@ -872,7 +876,7 @@ tgsi_compare_instruction_ext_label(
{
a.Padding = b.Padding = 0;
a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+ return compare32(&a, &b);
}
struct tgsi_instruction_ext_label
@@ -913,7 +917,7 @@ tgsi_compare_instruction_ext_texture(
{
a.Padding = b.Padding = 0;
a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+ return compare32(&a, &b);
}
struct tgsi_instruction_ext_texture
@@ -1035,7 +1039,7 @@ tgsi_compare_src_register_ext_swz(
{
a.Padding = b.Padding = 0;
a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+ return compare32(&a, &b);
}
struct tgsi_src_register_ext_swz
@@ -1103,7 +1107,7 @@ tgsi_compare_src_register_ext_mod(
{
a.Padding = b.Padding = 0;
a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+ return compare32(&a, &b);
}
struct tgsi_src_register_ext_mod
@@ -1249,7 +1253,7 @@ tgsi_compare_dst_register_ext_concode(
{
a.Padding = b.Padding = 0;
a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+ return compare32(&a, &b);
}
struct tgsi_dst_register_ext_concode
@@ -1307,7 +1311,7 @@ tgsi_compare_dst_register_ext_modulate(
{
a.Padding = b.Padding = 0;
a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+ return compare32(&a, &b);
}
struct tgsi_dst_register_ext_modulate
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 2ed8c2bf07..d57cb9139f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_string.h"
#include "tgsi_dump.h"
#include "tgsi_info.h"
@@ -285,7 +285,7 @@ iter_immediate(
ENM( imm->Immediate.DataType, immediate_type_names );
TXT( " { " );
- for (i = 0; i < imm->Immediate.Size - 1; i++) {
+ for (i = 0; i < imm->Immediate.NrTokens - 1; i++) {
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
FLT( imm->u.ImmediateFloat32[i].Float );
@@ -294,7 +294,7 @@ iter_immediate(
assert( 0 );
}
- if (i < imm->Immediate.Size - 2)
+ if (i < imm->Immediate.NrTokens - 2)
TXT( ", " );
}
TXT( " }" );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index be25cb45a0..3dc61c48ca 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_string.h"
#include "tgsi_dump_c.h"
#include "tgsi_build.h"
@@ -283,7 +283,7 @@ dump_immediate_verbose(
UIX( imm->Immediate.Padding );
}
- for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
+ for( i = 0; i < imm->Immediate.NrTokens - 1; i++ ) {
EOL();
switch( imm->Immediate.DataType ) {
case TGSI_IMM_FLOAT32:
@@ -646,7 +646,6 @@ tgsi_dump_c(
struct tgsi_full_declaration fd;
uint ignored = flags & TGSI_DUMP_C_IGNORED;
uint deflt = flags & TGSI_DUMP_C_DEFAULT;
- uint instno = 0;
tgsi_parse_init( &parse, tokens );
@@ -676,7 +675,7 @@ tgsi_dump_c(
ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES );
if( ignored ) {
TXT( "\nSize : " );
- UID( parse.FullToken.Token.Size );
+ UID( parse.FullToken.Token.NrTokens );
if( deflt || parse.FullToken.Token.Extended ) {
TXT( "\nExtended : " );
UID( parse.FullToken.Token.Extended );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index f98b66dc0b..94589cf79f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -133,7 +133,7 @@ tgsi_exec_machine_bind_shader(
struct tgsi_exec_machine *mach,
const struct tgsi_token *tokens,
uint numSamplers,
- struct tgsi_sampler *samplers)
+ struct tgsi_sampler **samplers)
{
uint k;
struct tgsi_parse_context parse;
@@ -202,7 +202,7 @@ tgsi_exec_machine_bind_shader(
case TGSI_TOKEN_TYPE_IMMEDIATE:
{
- uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
+ uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
assert( size % 4 == 0 );
assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
@@ -320,6 +320,7 @@ micro_add(
dst->f[3] = src0->f[3] + src1->f[3];
}
+#if 0
static void
micro_iadd(
union tgsi_exec_channel *dst,
@@ -331,6 +332,7 @@ micro_iadd(
dst->i[2] = src0->i[2] + src1->i[2];
dst->i[3] = src0->i[3] + src1->i[3];
}
+#endif
static void
micro_and(
@@ -408,6 +410,7 @@ micro_div(
}
}
+#if 0
static void
micro_udiv(
union tgsi_exec_channel *dst,
@@ -419,6 +422,7 @@ micro_udiv(
dst->u[2] = src0->u[2] / src1->u[2];
dst->u[3] = src0->u[3] / src1->u[3];
}
+#endif
static void
micro_eq(
@@ -434,6 +438,7 @@ micro_eq(
dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
}
+#if 0
static void
micro_ieq(
union tgsi_exec_channel *dst,
@@ -447,6 +452,7 @@ micro_ieq(
dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
}
+#endif
static void
micro_exp2(
@@ -466,6 +472,7 @@ micro_exp2(
#endif
}
+#if 0
static void
micro_f2ut(
union tgsi_exec_channel *dst,
@@ -476,6 +483,7 @@ micro_f2ut(
dst->u[2] = (uint) src->f[2];
dst->u[3] = (uint) src->f[3];
}
+#endif
static void
micro_flr(
@@ -570,6 +578,7 @@ micro_lt(
dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
}
+#if 0
static void
micro_ilt(
union tgsi_exec_channel *dst,
@@ -583,7 +592,9 @@ micro_ilt(
dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
}
+#endif
+#if 0
static void
micro_ult(
union tgsi_exec_channel *dst,
@@ -597,6 +608,7 @@ micro_ult(
dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
}
+#endif
static void
micro_max(
@@ -610,6 +622,7 @@ micro_max(
dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
}
+#if 0
static void
micro_imax(
union tgsi_exec_channel *dst,
@@ -621,7 +634,9 @@ micro_imax(
dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
}
+#endif
+#if 0
static void
micro_umax(
union tgsi_exec_channel *dst,
@@ -633,6 +648,7 @@ micro_umax(
dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
}
+#endif
static void
micro_min(
@@ -646,6 +662,7 @@ micro_min(
dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
}
+#if 0
static void
micro_imin(
union tgsi_exec_channel *dst,
@@ -657,7 +674,9 @@ micro_imin(
dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
}
+#endif
+#if 0
static void
micro_umin(
union tgsi_exec_channel *dst,
@@ -669,7 +688,9 @@ micro_umin(
dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
}
+#endif
+#if 0
static void
micro_umod(
union tgsi_exec_channel *dst,
@@ -681,6 +702,7 @@ micro_umod(
dst->u[2] = src0->u[2] % src1->u[2];
dst->u[3] = src0->u[3] % src1->u[3];
}
+#endif
static void
micro_mul(
@@ -694,6 +716,7 @@ micro_mul(
dst->f[3] = src0->f[3] * src1->f[3];
}
+#if 0
static void
micro_imul(
union tgsi_exec_channel *dst,
@@ -705,7 +728,9 @@ micro_imul(
dst->i[2] = src0->i[2] * src1->i[2];
dst->i[3] = src0->i[3] * src1->i[3];
}
+#endif
+#if 0
static void
micro_imul64(
union tgsi_exec_channel *dst0,
@@ -722,7 +747,9 @@ micro_imul64(
dst0->i[2] = 0;
dst0->i[3] = 0;
}
+#endif
+#if 0
static void
micro_umul64(
union tgsi_exec_channel *dst0,
@@ -739,7 +766,10 @@ micro_umul64(
dst0->u[2] = 0;
dst0->u[3] = 0;
}
+#endif
+
+#if 0
static void
micro_movc(
union tgsi_exec_channel *dst,
@@ -752,6 +782,7 @@ micro_movc(
dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
}
+#endif
static void
micro_neg(
@@ -764,6 +795,7 @@ micro_neg(
dst->f[3] = -src->f[3];
}
+#if 0
static void
micro_ineg(
union tgsi_exec_channel *dst,
@@ -774,6 +806,7 @@ micro_ineg(
dst->i[2] = -src->i[2];
dst->i[3] = -src->i[3];
}
+#endif
static void
micro_not(
@@ -874,6 +907,7 @@ micro_trunc(
dst->f[3] = (float) (int) src0->f[3];
}
+#if 0
static void
micro_ushr(
union tgsi_exec_channel *dst,
@@ -885,6 +919,7 @@ micro_ushr(
dst->u[2] = src0->u[2] >> src1->u[2];
dst->u[3] = src0->u[3] >> src1->u[3];
}
+#endif
static void
micro_sin(
@@ -919,6 +954,7 @@ micro_sub(
dst->f[3] = src0->f[3] - src1->f[3];
}
+#if 0
static void
micro_u2f(
union tgsi_exec_channel *dst,
@@ -929,6 +965,7 @@ micro_u2f(
dst->f[2] = (float) src->u[2];
dst->f[3] = (float) src->u[3];
}
+#endif
static void
micro_xor(
@@ -1045,11 +1082,28 @@ fetch_source(
union tgsi_exec_channel index;
uint swizzle;
+ /* We start with a direct index into a register file.
+ *
+ * file[1],
+ * where:
+ * file = SrcRegister.File
+ * [1] = SrcRegister.Index
+ */
index.i[0] =
index.i[1] =
index.i[2] =
index.i[3] = reg->SrcRegister.Index;
+ /* There is an extra source register that indirectly subscripts
+ * a register file. The direct index now becomes an offset
+ * that is being added to the indirect register.
+ *
+ * file[ind[2].x+1],
+ * where:
+ * ind = SrcRegisterInd.File
+ * [2] = SrcRegisterInd.Index
+ * .x = SrcRegisterInd.SwizzleX
+ */
if (reg->SrcRegister.Indirect) {
union tgsi_exec_channel index2;
union tgsi_exec_channel indir_index;
@@ -1086,19 +1140,31 @@ fetch_source(
}
}
- if( reg->SrcRegister.Dimension ) {
- switch( reg->SrcRegister.File ) {
+ /* There is an extra source register that is a second
+ * subscript to a register file. Effectively it means that
+ * the register file is actually a 2D array of registers.
+ *
+ * file[1][3] == file[1*sizeof(file[1])+3],
+ * where:
+ * [3] = SrcRegisterDim.Index
+ */
+ if (reg->SrcRegister.Dimension) {
+ /* The size of the first-order array depends on the register file type.
+ * We need to multiply the index to the first array to get an effective,
+ * "flat" index that points to the beginning of the second-order array.
+ */
+ switch (reg->SrcRegister.File) {
case TGSI_FILE_INPUT:
- index.i[0] *= 17;
- index.i[1] *= 17;
- index.i[2] *= 17;
- index.i[3] *= 17;
+ index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
break;
case TGSI_FILE_CONSTANT:
- index.i[0] *= 4096;
- index.i[1] *= 4096;
- index.i[2] *= 4096;
- index.i[3] *= 4096;
+ index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
break;
default:
assert( 0 );
@@ -1109,6 +1175,17 @@ fetch_source(
index.i[2] += reg->SrcRegisterDim.Index;
index.i[3] += reg->SrcRegisterDim.Index;
+ /* Again, the second subscript index can be addressed indirectly
+ * identically to the first one.
+ * Nothing stops us from indirectly addressing the indirect register,
+ * but there is no need for that, so we won't exercise it.
+ *
+ * file[1][ind[4].y+3],
+ * where:
+ * ind = SrcRegisterDimInd.File
+ * [4] = SrcRegisterDimInd.Index
+ * .y = SrcRegisterDimInd.SwizzleX
+ */
if (reg->SrcRegisterDim.Indirect) {
union tgsi_exec_channel index2;
union tgsi_exec_channel indir_index;
@@ -1141,6 +1218,11 @@ fetch_source(
index.i[i] = 0;
}
}
+
+ /* If by any chance there was a need for a 3D array of register
+ * files, we would have to check whether SrcRegisterDim is followed
+ * by a dimension register and continue the saga.
+ */
}
swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
@@ -1490,7 +1572,7 @@ exec_kilp(struct tgsi_exec_machine *mach,
/*
- * Fetch a texel using STR texture coordinates.
+ * Fetch a four texture samples using STR texture coordinates.
*/
static void
fetch_texel( struct tgsi_sampler *sampler,
@@ -1524,7 +1606,7 @@ exec_tex(struct tgsi_exec_machine *mach,
boolean projected)
{
const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
- union tgsi_exec_channel r[8];
+ union tgsi_exec_channel r[4];
uint chan_index;
float lodBias;
@@ -1532,6 +1614,7 @@ exec_tex(struct tgsi_exec_machine *mach,
switch (inst->InstructionExtTexture.Texture) {
case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
FETCH(&r[0], 0, CHAN_X);
@@ -1547,13 +1630,15 @@ exec_tex(struct tgsi_exec_machine *mach,
else
lodBias = 0.0;
- fetch_texel(&mach->Samplers[unit],
+ fetch_texel(mach->Samplers[unit],
&r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
break;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
FETCH(&r[0], 0, CHAN_X);
FETCH(&r[1], 0, CHAN_Y);
@@ -1573,7 +1658,7 @@ exec_tex(struct tgsi_exec_machine *mach,
else
lodBias = 0.0;
- fetch_texel(&mach->Samplers[unit],
+ fetch_texel(mach->Samplers[unit],
&r[0], &r[1], &r[2], lodBias, /* inputs */
&r[0], &r[1], &r[2], &r[3]); /* outputs */
break;
@@ -1599,7 +1684,7 @@ exec_tex(struct tgsi_exec_machine *mach,
else
lodBias = 0.0;
- fetch_texel(&mach->Samplers[unit],
+ fetch_texel(mach->Samplers[unit],
&r[0], &r[1], &r[2], lodBias,
&r[0], &r[1], &r[2], &r[3]);
break;
@@ -1709,6 +1794,7 @@ exec_declaration(
break;
default:
+ eval = NULL;
assert( 0 );
}
@@ -1751,7 +1837,7 @@ exec_instruction(
case TGSI_OPCODE_ARL:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_trunc( &r[0], &r[0] );
+ micro_flr( &r[0], &r[0] );
STORE( &r[0], 0, chan_index );
}
break;
@@ -1806,6 +1892,7 @@ exec_instruction(
case TGSI_OPCODE_RSQ:
/* TGSI_OPCODE_RECIPSQRT */
FETCH( &r[0], 0, CHAN_X );
+ micro_abs( &r[0], &r[0] );
micro_sqrt( &r[0], &r[0] );
micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index fc40a25e09..4ffd4efbff 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -68,17 +68,12 @@ struct tgsi_interp_coef
float dady[NUM_CHANNELS];
};
-
-struct softpipe_tile_cache; /**< Opaque to TGSI */
-
/**
* Information for sampling textures, which must be implemented
* by code outside the TGSI executor.
*/
struct tgsi_sampler
{
- const struct pipe_sampler_state *state;
- struct pipe_texture *texture;
/** Get samples for four fragments in a quad */
void (*get_samples)(struct tgsi_sampler *sampler,
const float s[QUAD_SIZE],
@@ -86,8 +81,6 @@ struct tgsi_sampler
const float p[QUAD_SIZE],
float lodbias,
float rgba[NUM_CHANNELS][QUAD_SIZE]);
- void *pipe; /*XXX temporary*/
- struct softpipe_tile_cache *cache;
};
/**
@@ -178,6 +171,16 @@ struct tgsi_exec_labels
#define TGSI_EXEC_MAX_LOOP_NESTING 20
#define TGSI_EXEC_MAX_CALL_NESTING 20
+/* The maximum number of input attributes per vertex. For 2D
+ * input register files, this is the stride between two 1D
+ * arrays.
+ */
+#define TGSI_EXEC_MAX_INPUT_ATTRIBS 17
+
+/* The maximum number of constant vectors per constant buffer.
+ */
+#define TGSI_EXEC_MAX_CONST_BUFFER 4096
+
/**
* Run-time virtual machine state for executing TGSI shader.
*/
@@ -195,7 +198,7 @@ struct tgsi_exec_machine
struct tgsi_exec_vector *Temps;
struct tgsi_exec_vector *Addrs;
- struct tgsi_sampler *Samplers;
+ struct tgsi_sampler **Samplers;
float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
unsigned ImmLimit;
@@ -258,7 +261,7 @@ tgsi_exec_machine_bind_shader(
struct tgsi_exec_machine *mach,
const struct tgsi_token *tokens,
uint numSamplers,
- struct tgsi_sampler *samplers);
+ struct tgsi_sampler **samplers);
uint
tgsi_exec_machine_run(
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 68c7a6b7f5..2b8a6f0fb1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi_info.h"
static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/tgsi_iterate.c
index 5371a88b96..d88c2558d8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_iterate.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi_iterate.h"
boolean
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 3757486ba9..22006edf3d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi_parse.h"
#include "tgsi_build.h"
@@ -88,16 +88,33 @@ tgsi_parse_end_of_tokens(
1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize;
}
+
+/**
+ * This function is used to avoid and work-around type punning/aliasing
+ * warnings. The warnings seem harmless on x86 but on PPC they cause
+ * real failures.
+ */
+static INLINE void
+copy_token(void *dst, const void *src)
+{
+ memcpy(dst, src, 4);
+}
+
+
+/**
+ * Get next 4-byte token, return it at address specified by 'token'
+ */
static void
next_token(
struct tgsi_parse_context *ctx,
void *token )
{
assert( !tgsi_parse_end_of_tokens( ctx ) );
-
- *(struct tgsi_token *) token = ctx->Tokens[ctx->Position++];
+ copy_token(token, &ctx->Tokens[ctx->Position]);
+ ctx->Position++;
}
+
void
tgsi_parse_token(
struct tgsi_parse_context *ctx )
@@ -116,7 +133,7 @@ tgsi_parse_token(
struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration;
*decl = tgsi_default_full_declaration();
- decl->Declaration = *(struct tgsi_declaration *) &token;
+ copy_token(&decl->Declaration, &token);
next_token( ctx, &decl->DeclarationRange );
@@ -132,15 +149,14 @@ tgsi_parse_token(
struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate;
*imm = tgsi_default_full_immediate();
- imm->Immediate = *(struct tgsi_immediate *) &token;
-
+ copy_token(&imm->Immediate, &token);
assert( !imm->Immediate.Extended );
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
imm->u.Pointer = MALLOC(
- sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) );
- for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
+ sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.NrTokens - 1) );
+ for( i = 0; i < imm->Immediate.NrTokens - 1; i++ ) {
next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] );
}
break;
@@ -158,8 +174,7 @@ tgsi_parse_token(
unsigned extended;
*inst = tgsi_default_full_instruction();
- inst->Instruction = *(struct tgsi_instruction *) &token;
-
+ copy_token(&inst->Instruction, &token);
extended = inst->Instruction.Extended;
while( extended ) {
@@ -169,18 +184,15 @@ tgsi_parse_token(
switch( token.Type ) {
case TGSI_INSTRUCTION_EXT_TYPE_NV:
- inst->InstructionExtNv =
- *(struct tgsi_instruction_ext_nv *) &token;
+ copy_token(&inst->InstructionExtNv, &token);
break;
case TGSI_INSTRUCTION_EXT_TYPE_LABEL:
- inst->InstructionExtLabel =
- *(struct tgsi_instruction_ext_label *) &token;
+ copy_token(&inst->InstructionExtLabel, &token);
break;
case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE:
- inst->InstructionExtTexture =
- *(struct tgsi_instruction_ext_texture *) &token;
+ copy_token(&inst->InstructionExtTexture, &token);
break;
default:
@@ -212,13 +224,13 @@ tgsi_parse_token(
switch( token.Type ) {
case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE:
- inst->FullDstRegisters[i].DstRegisterExtConcode =
- *(struct tgsi_dst_register_ext_concode *) &token;
+ copy_token(&inst->FullDstRegisters[i].DstRegisterExtConcode,
+ &token);
break;
case TGSI_DST_REGISTER_EXT_TYPE_MODULATE:
- inst->FullDstRegisters[i].DstRegisterExtModulate =
- *(struct tgsi_dst_register_ext_modulate *) &token;
+ copy_token(&inst->FullDstRegisters[i].DstRegisterExtModulate,
+ &token);
break;
default:
@@ -245,13 +257,13 @@ tgsi_parse_token(
switch( token.Type ) {
case TGSI_SRC_REGISTER_EXT_TYPE_SWZ:
- inst->FullSrcRegisters[i].SrcRegisterExtSwz =
- *(struct tgsi_src_register_ext_swz *) &token;
+ copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtSwz,
+ &token);
break;
case TGSI_SRC_REGISTER_EXT_TYPE_MOD:
- inst->FullSrcRegisters[i].SrcRegisterExtMod =
- *(struct tgsi_src_register_ext_mod *) &token;
+ copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtMod,
+ &token);
break;
default:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
new file mode 100644
index 0000000000..0c64ae5713
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -0,0 +1,1363 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * TGSI to PowerPC code generation.
+ */
+
+#include "pipe/p_config.h"
+
+#if defined(PIPE_ARCH_PPC)
+
+#include "util/u_debug.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_sse.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi_dump.h"
+#include "tgsi_exec.h"
+#include "tgsi_ppc.h"
+#include "rtasm/rtasm_ppc.h"
+
+
+/**
+ * Since it's pretty much impossible to form PPC vector immediates, load
+ * them from memory here:
+ */
+const float ppc_builtin_constants[] ALIGN16_ATTRIB = {
+ 1.0f, -128.0f, 128.0, 0.0
+};
+
+
+#define FOR_EACH_CHANNEL( CHAN )\
+ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
+
+#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
+ ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+
+#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
+ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
+
+#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
+ FOR_EACH_CHANNEL( CHAN )\
+ IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
+
+#define CHAN_X 0
+#define CHAN_Y 1
+#define CHAN_Z 2
+#define CHAN_W 3
+
+
+/**
+ * How many TGSI temps should be implemented with real PPC vector registers
+ * rather than memory.
+ */
+#define MAX_PPC_TEMPS 3
+
+
+/**
+ * Context/state used during code gen.
+ */
+struct gen_context
+{
+ struct ppc_function *f;
+ int inputs_reg; /**< GP register pointing to input params */
+ int outputs_reg; /**< GP register pointing to output params */
+ int temps_reg; /**< GP register pointing to temporary "registers" */
+ int immed_reg; /**< GP register pointing to immediates buffer */
+ int const_reg; /**< GP register pointing to constants buffer */
+ int builtins_reg; /**< GP register pointint to built-in constants */
+
+ int offset_reg; /**< used to reduce redundant li instructions */
+ int offset_value;
+
+ int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */
+ int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */
+
+ /**
+ * Map TGSI temps to PPC vector temps.
+ * We have 32 PPC vector regs. Use 16 of them for storing 4 TGSI temps.
+ * XXX currently only do this for TGSI temps [0..MAX_PPC_TEMPS-1].
+ */
+ int temps_map[MAX_PPC_TEMPS][4];
+
+ /**
+ * Cache of src registers.
+ * This is used to avoid redundant load instructions.
+ */
+ struct {
+ struct tgsi_full_src_register src;
+ uint chan;
+ uint vec;
+ } regs[12]; /* 3 src regs, 4 channels */
+ uint num_regs;
+};
+
+
+/**
+ * Initialize code generation context.
+ */
+static void
+init_gen_context(struct gen_context *gen, struct ppc_function *func)
+{
+ uint i;
+
+ memset(gen, 0, sizeof(*gen));
+ gen->f = func;
+ gen->inputs_reg = ppc_reserve_register(func, 3); /* first function param */
+ gen->outputs_reg = ppc_reserve_register(func, 4); /* second function param */
+ gen->temps_reg = ppc_reserve_register(func, 5); /* ... */
+ gen->immed_reg = ppc_reserve_register(func, 6);
+ gen->const_reg = ppc_reserve_register(func, 7);
+ gen->builtins_reg = ppc_reserve_register(func, 8);
+ gen->one_vec = -1;
+ gen->bit31_vec = -1;
+ gen->offset_reg = -1;
+ gen->offset_value = -9999999;
+ for (i = 0; i < MAX_PPC_TEMPS; i++) {
+ gen->temps_map[i][0] = ppc_allocate_vec_register(gen->f);
+ gen->temps_map[i][1] = ppc_allocate_vec_register(gen->f);
+ gen->temps_map[i][2] = ppc_allocate_vec_register(gen->f);
+ gen->temps_map[i][3] = ppc_allocate_vec_register(gen->f);
+ }
+}
+
+
+/**
+ * Is the given TGSI register stored as a real PPC vector register?
+ */
+static boolean
+is_ppc_vec_temporary(const struct tgsi_full_src_register *reg)
+{
+ return (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
+ reg->SrcRegister.Index < MAX_PPC_TEMPS);
+}
+
+
+/**
+ * Is the given TGSI register stored as a real PPC vector register?
+ */
+static boolean
+is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg)
+{
+ return (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
+ reg->DstRegister.Index < MAX_PPC_TEMPS);
+}
+
+
+
+/**
+ * All PPC vector load/store instructions form an effective address
+ * by adding the contents of two registers. For example:
+ * lvx v2,r8,r9 # v2 = memory[r8 + r9]
+ * stvx v2,r8,r9 # memory[r8 + r9] = v2;
+ * So our lvx/stvx instructions are typically preceded by an 'li' instruction
+ * to load r9 (above) with an immediate (an offset).
+ * This code emits that 'li' instruction, but only if the offset value is
+ * different than the previous 'li'.
+ * This optimization seems to save about 10% in the instruction count.
+ * Note that we need to unconditionally emit an 'li' inside basic blocks
+ * (such as inside loops).
+ */
+static int
+emit_li_offset(struct gen_context *gen, int offset)
+{
+ if (gen->offset_reg <= 0) {
+ /* allocate a GP register for storing load/store offset */
+ gen->offset_reg = ppc_allocate_register(gen->f);
+ }
+
+ /* emit new 'li' if offset is changing */
+ if (gen->offset_value < 0 || gen->offset_value != offset) {
+ gen->offset_value = offset;
+ ppc_li(gen->f, gen->offset_reg, offset);
+ }
+
+ return gen->offset_reg;
+}
+
+
+/**
+ * Forces subsequent emit_li_offset() calls to emit an 'li'.
+ * To be called at the top of basic blocks.
+ */
+static void
+reset_li_offset(struct gen_context *gen)
+{
+ gen->offset_value = -9999999;
+}
+
+
+
+/**
+ * Load the given vector register with {value, value, value, value}.
+ * The value must be in the ppu_builtin_constants[] array.
+ * We wouldn't need this if there was a simple way to load PPC vector
+ * registers with immediate values!
+ */
+static void
+load_constant_vec(struct gen_context *gen, int dst_vec, float value)
+{
+ uint pos;
+ for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) {
+ if (ppc_builtin_constants[pos] == value) {
+ int offset = pos * 4;
+ int offset_reg = emit_li_offset(gen, offset);
+
+ /* Load 4-byte word into vector register.
+ * The vector slot depends on the effective address we load from.
+ * We know that our builtins start at a 16-byte boundary so we
+ * know that 'swizzle' tells us which vector slot will have the
+ * loaded word. The other vector slots will be undefined.
+ */
+ ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg);
+ /* splat word[pos % 4] across the vector reg */
+ ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4);
+ return;
+ }
+ }
+ assert(0 && "Need to add new constant to ppc_builtin_constants array");
+}
+
+
+/**
+ * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}.
+ */
+static int
+gen_one_vec(struct gen_context *gen)
+{
+ if (gen->one_vec < 0) {
+ gen->one_vec = ppc_allocate_vec_register(gen->f);
+ load_constant_vec(gen, gen->one_vec, 1.0f);
+ }
+ return gen->one_vec;
+}
+
+/**
+ * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}.
+ */
+static int
+gen_get_bit31_vec(struct gen_context *gen)
+{
+ if (gen->bit31_vec < 0) {
+ gen->bit31_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vspltisw(gen->f, gen->bit31_vec, -1);
+ ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec);
+ }
+ return gen->bit31_vec;
+}
+
+
+/**
+ * Register fetch. Return PPC vector register with result.
+ */
+static int
+emit_fetch(struct gen_context *gen,
+ const struct tgsi_full_src_register *reg,
+ const unsigned chan_index)
+{
+ uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index);
+ int dst_vec = -1;
+
+ switch (swizzle) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ switch (reg->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ {
+ int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
+ int offset_reg = emit_li_offset(gen, offset);
+ dst_vec = ppc_allocate_vec_register(gen->f);
+ ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ if (is_ppc_vec_temporary(reg)) {
+ /* use PPC vec register */
+ dst_vec = gen->temps_map[reg->SrcRegister.Index][swizzle];
+ }
+ else {
+ /* use memory-based temp register "file" */
+ int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
+ int offset_reg = emit_li_offset(gen, offset);
+ dst_vec = ppc_allocate_vec_register(gen->f);
+ ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg);
+ }
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ {
+ int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
+ int offset_reg = emit_li_offset(gen, offset);
+ dst_vec = ppc_allocate_vec_register(gen->f);
+ /* Load 4-byte word into vector register.
+ * The vector slot depends on the effective address we load from.
+ * We know that our immediates start at a 16-byte boundary so we
+ * know that 'swizzle' tells us which vector slot will have the
+ * loaded word. The other vector slots will be undefined.
+ */
+ ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg);
+ /* splat word[swizzle] across the vector reg */
+ ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle);
+ }
+ break;
+ case TGSI_FILE_CONSTANT:
+ {
+ int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
+ int offset_reg = emit_li_offset(gen, offset);
+ dst_vec = ppc_allocate_vec_register(gen->f);
+ /* Load 4-byte word into vector register.
+ * The vector slot depends on the effective address we load from.
+ * We know that our constants start at a 16-byte boundary so we
+ * know that 'swizzle' tells us which vector slot will have the
+ * loaded word. The other vector slots will be undefined.
+ */
+ ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg);
+ /* splat word[swizzle] across the vector reg */
+ ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle);
+ }
+ break;
+ default:
+ assert( 0 );
+ }
+ break;
+ case TGSI_EXTSWIZZLE_ZERO:
+ ppc_vzero(gen->f, dst_vec);
+ break;
+ case TGSI_EXTSWIZZLE_ONE:
+ {
+ int one_vec = gen_one_vec(gen);
+ dst_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vmove(gen->f, dst_vec, one_vec);
+ }
+ break;
+ default:
+ assert( 0 );
+ }
+
+ assert(dst_vec >= 0);
+
+ {
+ uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index);
+ if (sign_op != TGSI_UTIL_SIGN_KEEP) {
+ int bit31_vec = gen_get_bit31_vec(gen);
+ int dst_vec2;
+
+ if (is_ppc_vec_temporary(reg)) {
+ /* need to use a new temp */
+ dst_vec2 = ppc_allocate_vec_register(gen->f);
+ }
+ else {
+ dst_vec2 = dst_vec;
+ }
+
+ switch (sign_op) {
+ case TGSI_UTIL_SIGN_CLEAR:
+ /* vec = vec & ~bit31 */
+ ppc_vandc(gen->f, dst_vec2, dst_vec, bit31_vec);
+ break;
+ case TGSI_UTIL_SIGN_SET:
+ /* vec = vec | bit31 */
+ ppc_vor(gen->f, dst_vec2, dst_vec, bit31_vec);
+ break;
+ case TGSI_UTIL_SIGN_TOGGLE:
+ /* vec = vec ^ bit31 */
+ ppc_vxor(gen->f, dst_vec2, dst_vec, bit31_vec);
+ break;
+ default:
+ assert(0);
+ }
+ return dst_vec2;
+ }
+ }
+
+ return dst_vec;
+}
+
+
+
+/**
+ * Test if two TGSI src registers refer to the same memory location.
+ * We use this to avoid redundant register loads.
+ */
+static boolean
+equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a,
+ const struct tgsi_full_src_register *b, uint chan_b)
+{
+ int swz_a, swz_b;
+ int sign_a, sign_b;
+ if (a->SrcRegister.File != b->SrcRegister.File)
+ return FALSE;
+ if (a->SrcRegister.Index != b->SrcRegister.Index)
+ return FALSE;
+ swz_a = tgsi_util_get_full_src_register_extswizzle(a, chan_a);
+ swz_b = tgsi_util_get_full_src_register_extswizzle(b, chan_b);
+ if (swz_a != swz_b)
+ return FALSE;
+ sign_a = tgsi_util_get_full_src_register_sign_mode(a, chan_a);
+ sign_b = tgsi_util_get_full_src_register_sign_mode(b, chan_b);
+ if (sign_a != sign_b)
+ return FALSE;
+ return TRUE;
+}
+
+
+/**
+ * Given a TGSI src register and channel index, return the PPC vector
+ * register containing the value. We use a cache to prevent re-loading
+ * the same register multiple times.
+ * \return index of PPC vector register with the desired src operand
+ */
+static int
+get_src_vec(struct gen_context *gen,
+ struct tgsi_full_instruction *inst, int src_reg, uint chan)
+{
+ const const struct tgsi_full_src_register *src =
+ &inst->FullSrcRegisters[src_reg];
+ int vec;
+ uint i;
+
+ /* check the cache */
+ for (i = 0; i < gen->num_regs; i++) {
+ if (equal_src_locs(&gen->regs[i].src, gen->regs[i].chan, src, chan)) {
+ /* cache hit */
+ assert(gen->regs[i].vec >= 0);
+ return gen->regs[i].vec;
+ }
+ }
+
+ /* cache miss: allocate new vec reg and emit fetch/load code */
+ vec = emit_fetch(gen, src, chan);
+ gen->regs[gen->num_regs].src = *src;
+ gen->regs[gen->num_regs].chan = chan;
+ gen->regs[gen->num_regs].vec = vec;
+ gen->num_regs++;
+
+ assert(gen->num_regs <= Elements(gen->regs));
+
+ assert(vec >= 0);
+
+ return vec;
+}
+
+
+/**
+ * Clear the src operand cache. To be called at the end of each emit function.
+ */
+static void
+release_src_vecs(struct gen_context *gen)
+{
+ uint i;
+ for (i = 0; i < gen->num_regs; i++) {
+ const const struct tgsi_full_src_register src = gen->regs[i].src;
+ if (!is_ppc_vec_temporary(&src)) {
+ ppc_release_vec_register(gen->f, gen->regs[i].vec);
+ }
+ }
+ gen->num_regs = 0;
+}
+
+
+
+static int
+get_dst_vec(struct gen_context *gen,
+ const struct tgsi_full_instruction *inst,
+ unsigned chan_index)
+{
+ const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0];
+
+ if (is_ppc_vec_temporary_dst(reg)) {
+ int vec = gen->temps_map[reg->DstRegister.Index][chan_index];
+ return vec;
+ }
+ else {
+ return ppc_allocate_vec_register(gen->f);
+ }
+}
+
+
+/**
+ * Register store. Store 'src_vec' at location indicated by 'reg'.
+ * \param free_vec Should the src_vec be released when done?
+ */
+static void
+emit_store(struct gen_context *gen,
+ int src_vec,
+ const struct tgsi_full_instruction *inst,
+ unsigned chan_index,
+ boolean free_vec)
+{
+ const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0];
+
+ switch (reg->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ {
+ int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
+ int offset_reg = emit_li_offset(gen, offset);
+ ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ if (is_ppc_vec_temporary_dst(reg)) {
+ if (!free_vec) {
+ int dst_vec = gen->temps_map[reg->DstRegister.Index][chan_index];
+ if (dst_vec != src_vec)
+ ppc_vmove(gen->f, dst_vec, src_vec);
+ }
+ free_vec = FALSE;
+ }
+ else {
+ int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
+ int offset_reg = emit_li_offset(gen, offset);
+ ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg);
+ }
+ break;
+#if 0
+ case TGSI_FILE_ADDRESS:
+ emit_addrs(
+ func,
+ xmm,
+ reg->DstRegister.Index,
+ chan_index );
+ break;
+#endif
+ default:
+ assert( 0 );
+ }
+
+#if 0
+ switch( inst->Instruction.Saturate ) {
+ case TGSI_SAT_NONE:
+ break;
+
+ case TGSI_SAT_ZERO_ONE:
+ /* assert( 0 ); */
+ break;
+
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ assert( 0 );
+ break;
+ }
+#endif
+
+ if (free_vec)
+ ppc_release_vec_register(gen->f, src_vec);
+}
+
+
+static void
+emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ int v0, v1;
+ uint chan_index;
+
+ v0 = get_src_vec(gen, inst, 0, CHAN_X);
+ v1 = ppc_allocate_vec_register(gen->f);
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_RSQ:
+ /* v1 = 1.0 / sqrt(v0) */
+ ppc_vrsqrtefp(gen->f, v1, v0);
+ break;
+ case TGSI_OPCODE_RCP:
+ /* v1 = 1.0 / v0 */
+ ppc_vrefp(gen->f, v1, v0);
+ break;
+ default:
+ assert(0);
+ }
+
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ emit_store(gen, v1, inst, chan_index, FALSE);
+ }
+
+ release_src_vecs(gen);
+ ppc_release_vec_register(gen->f, v1);
+}
+
+
+static void
+emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ uint chan_index;
+
+ FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
+ int v0 = get_src_vec(gen, inst, 0, chan_index); /* v0 = srcreg[0] */
+ int v1 = get_dst_vec(gen, inst, chan_index);
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ /* turn off the most significant bit of each vector float word */
+ {
+ int bit31_vec = gen_get_bit31_vec(gen);
+ ppc_vandc(gen->f, v1, v0, bit31_vec); /* v1 = v0 & ~bit31 */
+ }
+ break;
+ case TGSI_OPCODE_FLOOR:
+ ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */
+ break;
+ case TGSI_OPCODE_FRAC:
+ ppc_vrfim(gen->f, v1, v0); /* tmp = floor(v0) */
+ ppc_vsubfp(gen->f, v1, v0, v1); /* v1 = v0 - v1 */
+ break;
+ case TGSI_OPCODE_EXPBASE2:
+ ppc_vexptefp(gen->f, v1, v0); /* v1 = 2^v0 */
+ break;
+ case TGSI_OPCODE_LOGBASE2:
+ /* XXX this may be broken! */
+ ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */
+ break;
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_SWZ:
+ if (v0 != v1)
+ ppc_vmove(gen->f, v1, v0);
+ break;
+ default:
+ assert(0);
+ }
+ emit_store(gen, v1, inst, chan_index, TRUE); /* store v0 */
+ }
+
+ release_src_vecs(gen);
+}
+
+
+static void
+emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ int zero_vec = -1;
+ uint chan;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) {
+ zero_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vzero(gen->f, zero_vec);
+ }
+
+ FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) {
+ /* fetch src operands */
+ int v0 = get_src_vec(gen, inst, 0, chan);
+ int v1 = get_src_vec(gen, inst, 1, chan);
+ int v2 = get_dst_vec(gen, inst, chan);
+
+ /* emit binop */
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ADD:
+ ppc_vaddfp(gen->f, v2, v0, v1);
+ break;
+ case TGSI_OPCODE_SUB:
+ ppc_vsubfp(gen->f, v2, v0, v1);
+ break;
+ case TGSI_OPCODE_MUL:
+ ppc_vmaddfp(gen->f, v2, v0, v1, zero_vec);
+ break;
+ case TGSI_OPCODE_MIN:
+ ppc_vminfp(gen->f, v2, v0, v1);
+ break;
+ case TGSI_OPCODE_MAX:
+ ppc_vmaxfp(gen->f, v2, v0, v1);
+ break;
+ default:
+ assert(0);
+ }
+
+ /* store v2 */
+ emit_store(gen, v2, inst, chan, TRUE);
+ }
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_MUL)
+ ppc_release_vec_register(gen->f, zero_vec);
+
+ release_src_vecs(gen);
+}
+
+
+static void
+emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ uint chan;
+
+ FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) {
+ /* fetch src operands */
+ int v0 = get_src_vec(gen, inst, 0, chan);
+ int v1 = get_src_vec(gen, inst, 1, chan);
+ int v2 = get_src_vec(gen, inst, 2, chan);
+ int v3 = get_dst_vec(gen, inst, chan);
+
+ /* emit ALU */
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_MAD:
+ ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */
+ break;
+ case TGSI_OPCODE_LRP:
+ ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */
+ ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */
+ break;
+ default:
+ assert(0);
+ }
+
+ /* store v3 */
+ emit_store(gen, v3, inst, chan, TRUE);
+ }
+
+ release_src_vecs(gen);
+}
+
+
+/**
+ * Vector comparisons, resulting in 1.0 or 0.0 values.
+ */
+static void
+emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ uint chan;
+ int one_vec = gen_one_vec(gen);
+
+ FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) {
+ /* fetch src operands */
+ int v0 = get_src_vec(gen, inst, 0, chan);
+ int v1 = get_src_vec(gen, inst, 1, chan);
+ int v2 = get_dst_vec(gen, inst, chan);
+ boolean complement = FALSE;
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_SNE:
+ complement = TRUE;
+ /* fall-through */
+ case TGSI_OPCODE_SEQ:
+ ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */
+ break;
+
+ case TGSI_OPCODE_SGE:
+ complement = TRUE;
+ /* fall-through */
+ case TGSI_OPCODE_SLT:
+ ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */
+ break;
+
+ case TGSI_OPCODE_SLE:
+ complement = TRUE;
+ /* fall-through */
+ case TGSI_OPCODE_SGT:
+ ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */
+ break;
+ default:
+ assert(0);
+ }
+
+ /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */
+
+ if (complement)
+ ppc_vandc(gen->f, v2, one_vec, v2); /* v2 = one_vec & ~v2 */
+ else
+ ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */
+
+ /* store v2 */
+ emit_store(gen, v2, inst, chan, TRUE);
+ }
+
+ release_src_vecs(gen);
+}
+
+
+static void
+emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ int v0, v1, v2;
+ uint chan_index;
+
+ v2 = ppc_allocate_vec_register(gen->f);
+
+ ppc_vzero(gen->f, v2); /* v2 = {0, 0, 0, 0} */
+
+ v0 = get_src_vec(gen, inst, 0, CHAN_X); /* v0 = src0.XXXX */
+ v1 = get_src_vec(gen, inst, 1, CHAN_X); /* v1 = src1.XXXX */
+ ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
+
+ v0 = get_src_vec(gen, inst, 0, CHAN_Y); /* v0 = src0.YYYY */
+ v1 = get_src_vec(gen, inst, 1, CHAN_Y); /* v1 = src1.YYYY */
+ ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
+
+ v0 = get_src_vec(gen, inst, 0, CHAN_Z); /* v0 = src0.ZZZZ */
+ v1 = get_src_vec(gen, inst, 1, CHAN_Z); /* v1 = src1.ZZZZ */
+ ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) {
+ v0 = get_src_vec(gen, inst, 0, CHAN_W); /* v0 = src0.WWWW */
+ v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */
+ ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
+ }
+ else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) {
+ v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */
+ ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */
+ }
+
+ FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
+ emit_store(gen, v2, inst, chan_index, FALSE); /* store v2, free v2 later */
+ }
+
+ release_src_vecs(gen);
+
+ ppc_release_vec_register(gen->f, v2);
+}
+
+
+/** Approximation for vr = pow(va, vb) */
+static void
+ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb)
+{
+ /* pow(a,b) ~= exp2(log2(a) * b) */
+ int t_vec = ppc_allocate_vec_register(f);
+ int zero_vec = ppc_allocate_vec_register(f);
+
+ ppc_vzero(f, zero_vec);
+
+ ppc_vlogefp(f, t_vec, va); /* t = log2(va) */
+ ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb + zero */
+ ppc_vexptefp(f, vr, t_vec); /* vr = 2^t */
+
+ ppc_release_vec_register(f, t_vec);
+ ppc_release_vec_register(f, zero_vec);
+}
+
+
+static void
+emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ int one_vec = gen_one_vec(gen);
+
+ /* Compute X */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ emit_store(gen, one_vec, inst, CHAN_X, FALSE);
+ }
+
+ /* Compute Y, Z */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) ||
+ IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ int x_vec;
+ int zero_vec = ppc_allocate_vec_register(gen->f);
+
+ x_vec = get_src_vec(gen, inst, 0, CHAN_X); /* x_vec = src[0].x */
+
+ ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */
+ ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */
+
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ emit_store(gen, x_vec, inst, CHAN_Y, FALSE);
+ }
+
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ int y_vec, w_vec;
+ int z_vec = ppc_allocate_vec_register(gen->f);
+ int pow_vec = ppc_allocate_vec_register(gen->f);
+ int pos_vec = ppc_allocate_vec_register(gen->f);
+ int p128_vec = ppc_allocate_vec_register(gen->f);
+ int n128_vec = ppc_allocate_vec_register(gen->f);
+
+ y_vec = get_src_vec(gen, inst, 0, CHAN_Y); /* y_vec = src[0].y */
+ ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */
+
+ w_vec = get_src_vec(gen, inst, 0, CHAN_W); /* w_vec = src[0].w */
+
+ /* clamp W to [-128, 128] */
+ load_constant_vec(gen, p128_vec, 128.0f);
+ load_constant_vec(gen, n128_vec, -128.0f);
+ ppc_vmaxfp(gen->f, w_vec, w_vec, n128_vec); /* w = max(w, -128) */
+ ppc_vminfp(gen->f, w_vec, w_vec, p128_vec); /* w = min(w, 128) */
+
+ /* if temp.x > 0
+ * z = pow(tmp.y, tmp.w)
+ * else
+ * z = 0.0
+ */
+ ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec); /* pow = pow(y, w) */
+ ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */
+ ppc_vand(gen->f, z_vec, pow_vec, pos_vec); /* z = pow & pos */
+
+ emit_store(gen, z_vec, inst, CHAN_Z, FALSE);
+
+ ppc_release_vec_register(gen->f, z_vec);
+ ppc_release_vec_register(gen->f, pow_vec);
+ ppc_release_vec_register(gen->f, pos_vec);
+ ppc_release_vec_register(gen->f, p128_vec);
+ ppc_release_vec_register(gen->f, n128_vec);
+ }
+
+ ppc_release_vec_register(gen->f, zero_vec);
+ }
+
+ /* Compute W */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ emit_store(gen, one_vec, inst, CHAN_W, FALSE);
+ }
+
+ release_src_vecs(gen);
+}
+
+
+static void
+emit_exp(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ const int one_vec = gen_one_vec(gen);
+ int src_vec;
+
+ /* get src arg */
+ src_vec = get_src_vec(gen, inst, 0, CHAN_X);
+
+ /* Compute X = 2^floor(src) */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ int dst_vec = get_dst_vec(gen, inst, CHAN_X);
+ int tmp_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */
+ ppc_vexptefp(gen->f, dst_vec, tmp_vec); /* dst = 2 ^ tmp */
+ emit_store(gen, dst_vec, inst, CHAN_X, TRUE);
+ ppc_release_vec_register(gen->f, tmp_vec);
+ }
+
+ /* Compute Y = src - floor(src) */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ int dst_vec = get_dst_vec(gen, inst, CHAN_Y);
+ int tmp_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */
+ ppc_vsubfp(gen->f, dst_vec, src_vec, tmp_vec); /* dst = src - tmp */
+ emit_store(gen, dst_vec, inst, CHAN_Y, TRUE);
+ ppc_release_vec_register(gen->f, tmp_vec);
+ }
+
+ /* Compute Z = RoughApprox2ToX(src) */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ int dst_vec = get_dst_vec(gen, inst, CHAN_Z);
+ ppc_vexptefp(gen->f, dst_vec, src_vec); /* dst = 2 ^ src */
+ emit_store(gen, dst_vec, inst, CHAN_Z, TRUE);
+ }
+
+ /* Compute W = 1.0 */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ emit_store(gen, one_vec, inst, CHAN_W, FALSE);
+ }
+
+ release_src_vecs(gen);
+}
+
+
+static void
+emit_log(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ const int bit31_vec = gen_get_bit31_vec(gen);
+ const int one_vec = gen_one_vec(gen);
+ int src_vec, abs_vec;
+
+ /* get src arg */
+ src_vec = get_src_vec(gen, inst, 0, CHAN_X);
+
+ /* compute abs(src) */
+ abs_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vandc(gen->f, abs_vec, src_vec, bit31_vec); /* abs = src & ~bit31 */
+
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) &&
+ IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+
+ /* compute tmp = floor(log2(abs)) */
+ int tmp_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vlogefp(gen->f, tmp_vec, abs_vec); /* tmp = log2(abs) */
+ ppc_vrfim(gen->f, tmp_vec, tmp_vec); /* tmp = floor(tmp); */
+
+ /* Compute X = tmp */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ emit_store(gen, tmp_vec, inst, CHAN_X, FALSE);
+ }
+
+ /* Compute Y = abs / 2^tmp */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ const int zero_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vzero(gen->f, zero_vec);
+ ppc_vexptefp(gen->f, tmp_vec, tmp_vec); /* tmp = 2 ^ tmp */
+ ppc_vrefp(gen->f, tmp_vec, tmp_vec); /* tmp = 1 / tmp */
+ /* tmp = abs * tmp + zero */
+ ppc_vmaddfp(gen->f, tmp_vec, abs_vec, tmp_vec, zero_vec);
+ emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE);
+ ppc_release_vec_register(gen->f, zero_vec);
+ }
+
+ ppc_release_vec_register(gen->f, tmp_vec);
+ }
+
+ /* Compute Z = RoughApproxLog2(abs) */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ int dst_vec = get_dst_vec(gen, inst, CHAN_Z);
+ ppc_vlogefp(gen->f, dst_vec, abs_vec); /* dst = log2(abs) */
+ emit_store(gen, dst_vec, inst, CHAN_Z, TRUE);
+ }
+
+ /* Compute W = 1.0 */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ emit_store(gen, one_vec, inst, CHAN_W, FALSE);
+ }
+
+ ppc_release_vec_register(gen->f, abs_vec);
+ release_src_vecs(gen);
+}
+
+
+static void
+emit_pow(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ int s0_vec = get_src_vec(gen, inst, 0, CHAN_X);
+ int s1_vec = get_src_vec(gen, inst, 1, CHAN_X);
+ int pow_vec = ppc_allocate_vec_register(gen->f);
+ int chan;
+
+ ppc_vec_pow(gen->f, pow_vec, s0_vec, s1_vec);
+
+ FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) {
+ emit_store(gen, pow_vec, inst, chan, FALSE);
+ }
+
+ ppc_release_vec_register(gen->f, pow_vec);
+
+ release_src_vecs(gen);
+}
+
+
+static void
+emit_xpd(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ int x0_vec, y0_vec, z0_vec;
+ int x1_vec, y1_vec, z1_vec;
+ int zero_vec, tmp_vec;
+ int tmp2_vec;
+
+ zero_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vzero(gen->f, zero_vec);
+
+ tmp_vec = ppc_allocate_vec_register(gen->f);
+ tmp2_vec = ppc_allocate_vec_register(gen->f);
+
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) ||
+ IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ x0_vec = get_src_vec(gen, inst, 0, CHAN_X);
+ x1_vec = get_src_vec(gen, inst, 1, CHAN_X);
+ }
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) ||
+ IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ y0_vec = get_src_vec(gen, inst, 0, CHAN_Y);
+ y1_vec = get_src_vec(gen, inst, 1, CHAN_Y);
+ }
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) ||
+ IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ z0_vec = get_src_vec(gen, inst, 0, CHAN_Z);
+ z1_vec = get_src_vec(gen, inst, 1, CHAN_Z);
+ }
+
+ IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) {
+ /* tmp = y0 * z1 */
+ ppc_vmaddfp(gen->f, tmp_vec, y0_vec, z1_vec, zero_vec);
+ /* tmp = tmp - z0 * y1*/
+ ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, z0_vec, y1_vec);
+ emit_store(gen, tmp_vec, inst, CHAN_X, FALSE);
+ }
+ IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) {
+ /* tmp = z0 * x1 */
+ ppc_vmaddfp(gen->f, tmp_vec, z0_vec, x1_vec, zero_vec);
+ /* tmp = tmp - x0 * z1 */
+ ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, x0_vec, z1_vec);
+ emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE);
+ }
+ IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) {
+ /* tmp = x0 * y1 */
+ ppc_vmaddfp(gen->f, tmp_vec, x0_vec, y1_vec, zero_vec);
+ /* tmp = tmp - y0 * x1 */
+ ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, y0_vec, x1_vec);
+ emit_store(gen, tmp_vec, inst, CHAN_Z, FALSE);
+ }
+ /* W is undefined */
+
+ ppc_release_vec_register(gen->f, tmp_vec);
+ ppc_release_vec_register(gen->f, zero_vec);
+ release_src_vecs(gen);
+}
+
+static int
+emit_instruction(struct gen_context *gen,
+ struct tgsi_full_instruction *inst)
+{
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_SWZ:
+ case TGSI_OPCODE_ABS:
+ case TGSI_OPCODE_FLOOR:
+ case TGSI_OPCODE_FRAC:
+ case TGSI_OPCODE_EXPBASE2:
+ case TGSI_OPCODE_LOGBASE2:
+ emit_unaryop(gen, inst);
+ break;
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_RCP:
+ emit_scalar_unaryop(gen, inst);
+ break;
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_MAX:
+ emit_binop(gen, inst);
+ break;
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SGE:
+ emit_inequality(gen, inst);
+ break;
+ case TGSI_OPCODE_MAD:
+ case TGSI_OPCODE_LRP:
+ emit_triop(gen, inst);
+ break;
+ case TGSI_OPCODE_DP3:
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_DPH:
+ emit_dotprod(gen, inst);
+ break;
+ case TGSI_OPCODE_LIT:
+ emit_lit(gen, inst);
+ break;
+ case TGSI_OPCODE_LOG:
+ emit_log(gen, inst);
+ break;
+ case TGSI_OPCODE_EXP:
+ emit_exp(gen, inst);
+ break;
+ case TGSI_OPCODE_POW:
+ emit_pow(gen, inst);
+ break;
+ case TGSI_OPCODE_XPD:
+ emit_xpd(gen, inst);
+ break;
+ case TGSI_OPCODE_END:
+ /* normal end */
+ return 1;
+ default:
+ return 0;
+ }
+ return 1;
+}
+
+
+static void
+emit_declaration(
+ struct ppc_function *func,
+ struct tgsi_full_declaration *decl )
+{
+ if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+#if 0
+ unsigned first, last, mask;
+ unsigned i, j;
+
+ first = decl->DeclarationRange.First;
+ last = decl->DeclarationRange.Last;
+ mask = decl->Declaration.UsageMask;
+
+ for( i = first; i <= last; i++ ) {
+ for( j = 0; j < NUM_CHANNELS; j++ ) {
+ if( mask & (1 << j) ) {
+ switch( decl->Declaration.Interpolate ) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ emit_coef_a0( func, 0, i, j );
+ emit_inputs( func, 0, i, j );
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
+ emit_coef_dadx( func, 1, i, j );
+ emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
+ emit_coef_dady( func, 3, i, j );
+ emit_mul( func, 0, 1 ); /* x * dadx */
+ emit_coef_a0( func, 4, i, j );
+ emit_mul( func, 2, 3 ); /* y * dady */
+ emit_add( func, 0, 4 ); /* x * dadx + a0 */
+ emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */
+ emit_inputs( func, 0, i, j );
+ break;
+
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
+ emit_coef_dadx( func, 1, i, j );
+ emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
+ emit_coef_dady( func, 3, i, j );
+ emit_mul( func, 0, 1 ); /* x * dadx */
+ emit_tempf( func, 4, 0, TGSI_SWIZZLE_W );
+ emit_coef_a0( func, 5, i, j );
+ emit_rcp( func, 4, 4 ); /* 1.0 / w */
+ emit_mul( func, 2, 3 ); /* y * dady */
+ emit_add( func, 0, 5 ); /* x * dadx + a0 */
+ emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */
+ emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */
+ emit_inputs( func, 0, i, j );
+ break;
+
+ default:
+ assert( 0 );
+ break;
+ }
+ }
+ }
+ }
+#endif
+ }
+}
+
+
+
+static void
+emit_prologue(struct ppc_function *func)
+{
+ /* XXX set up stack frame */
+}
+
+
+static void
+emit_epilogue(struct ppc_function *func)
+{
+ ppc_comment(func, -4, "Epilogue:");
+ ppc_return(func);
+ /* XXX restore prev stack frame */
+#if 0
+ debug_printf("PPC: Emitted %u instructions\n", func->num_inst);
+#endif
+}
+
+
+
+/**
+ * Translate a TGSI vertex/fragment shader to PPC code.
+ *
+ * \param tokens the TGSI input shader
+ * \param func the output PPC code/function
+ * \param immediates buffer to place immediates, later passed to PPC func
+ * \return TRUE for success, FALSE if translation failed
+ */
+boolean
+tgsi_emit_ppc(const struct tgsi_token *tokens,
+ struct ppc_function *func,
+ float (*immediates)[4],
+ boolean do_swizzles )
+{
+ static int use_ppc_asm = -1;
+ struct tgsi_parse_context parse;
+ /*boolean instruction_phase = FALSE;*/
+ unsigned ok = 1;
+ uint num_immediates = 0;
+ struct gen_context gen;
+ uint ic = 0;
+
+ if (use_ppc_asm < 0) {
+ /* If GALLIUM_NOPPC is set, don't use PPC codegen */
+ use_ppc_asm = !debug_get_bool_option("GALLIUM_NOPPC", FALSE);
+ }
+ if (!use_ppc_asm)
+ return FALSE;
+
+ if (0) {
+ debug_printf("\n********* TGSI->PPC ********\n");
+ tgsi_dump(tokens, 0);
+ }
+
+ util_init_math();
+
+ init_gen_context(&gen, func);
+
+ emit_prologue(func);
+
+ tgsi_parse_init( &parse, tokens );
+
+ while (!tgsi_parse_end_of_tokens(&parse) && ok) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
+ emit_declaration(func, &parse.FullToken.FullDeclaration );
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if (func->print) {
+ _debug_printf("# ");
+ ic++;
+ tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic);
+ }
+
+ ok = emit_instruction(&gen, &parse.FullToken.FullInstruction);
+
+ if (!ok) {
+ debug_printf("failed to translate tgsi opcode %d to PPC (%s)\n",
+ parse.FullToken.FullInstruction.Instruction.Opcode,
+ parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
+ "vertex shader" : "fragment shader");
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ /* splat each immediate component into a float[4] vector for SoA */
+ {
+ const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+ uint i;
+ assert(size <= 4);
+ assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
+ for (i = 0; i < size; i++) {
+ immediates[num_immediates][i] =
+ parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+ }
+ num_immediates++;
+ }
+ break;
+
+ default:
+ ok = 0;
+ assert( 0 );
+ }
+ }
+
+ emit_epilogue(func);
+
+ tgsi_parse_free( &parse );
+
+ if (ppc_num_instructions(func) == 0) {
+ /* ran out of memory for instructions */
+ ok = FALSE;
+ }
+
+ if (!ok)
+ debug_printf("TGSI->PPC translation failed\n");
+
+ return ok;
+}
+
+#endif /* PIPE_ARCH_PPC */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.h b/src/gallium/auxiliary/tgsi/tgsi_ppc.h
new file mode 100644
index 0000000000..829ec075e7
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.h
@@ -0,0 +1,51 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef TGSI_PPC_H
+#define TGSI_PPC_H
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+struct tgsi_token;
+struct ppc_function;
+
+extern const float ppc_builtin_constants[];
+
+
+boolean
+tgsi_emit_ppc(const struct tgsi_token *tokens,
+ struct ppc_function *function,
+ float (*immediates)[4],
+ boolean do_swizzles);
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_PPC_H */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index bc7b941b78..76e773da91 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi_sanity.h"
#include "tgsi_info.h"
#include "tgsi_iterate.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 1239f6c076..c535788819 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -43,6 +43,9 @@
/**
+ * Scan the given TGSI shader to collect information such as number of
+ * registers used, special instructions used, etc.
+ * \return info the result of the scan
*/
void
tgsi_scan_shader(const struct tgsi_token *tokens,
@@ -115,7 +118,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
{
const struct tgsi_full_declaration *fulldecl
= &parse.FullToken.FullDeclaration;
- uint file = fulldecl->Declaration.File;
+ const uint file = fulldecl->Declaration.File;
uint reg;
for (reg = fulldecl->DeclarationRange.First;
reg <= fulldecl->DeclarationRange.Last;
@@ -131,8 +134,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex;
info->num_inputs++;
}
-
- if (file == TGSI_FILE_OUTPUT) {
+ else if (file == TGSI_FILE_OUTPUT) {
info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName;
info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex;
info->num_outputs++;
@@ -149,7 +151,14 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
- info->immediate_count++;
+ {
+ uint reg = info->immediate_count++;
+ uint file = TGSI_FILE_IMMEDIATE;
+
+ info->file_mask[file] |= (1 << reg);
+ info->file_count[file]++;
+ info->file_max[file] = MAX2(info->file_max[file], (int)reg);
+ }
break;
default:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index ff869c8312..d70bcd03c5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -25,9 +25,16 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "pipe/p_config.h"
+
+#if defined(PIPE_ARCH_X86)
+
+#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_math.h"
+#if defined(PIPE_ARCH_SSE)
+#include "util/u_sse.h"
+#endif
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_exec.h"
@@ -35,8 +42,6 @@
#include "rtasm/rtasm_x86sse.h"
-#ifdef PIPE_ARCH_X86
-
/* for 1/sqrt()
*
* This costs about 100fps (close to 10%) in gears:
@@ -509,10 +514,31 @@ emit_coef_dady(
* Function call helpers.
*/
+/**
+ * NOTE: In gcc, if the destination uses the SSE intrinsics, then it must be
+ * defined with __attribute__((force_align_arg_pointer)), as we do not guarantee
+ * that the stack pointer is 16 byte aligned, as expected.
+ */
static void
-emit_push_gp(
- struct x86_function *func )
+emit_func_call_dst(
+ struct x86_function *func,
+ unsigned xmm_save,
+ unsigned xmm_dst,
+ void (PIPE_CDECL *code)() )
{
+ struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
+ unsigned i, n;
+ unsigned xmm_mask;
+
+ /* Bitmask of the xmm registers to save */
+ xmm_mask = (1 << xmm_save) - 1;
+ xmm_mask &= ~(1 << xmm_dst);
+
+ sse_movaps(
+ func,
+ get_temp( TEMP_R0, 0 ),
+ make_xmm( xmm_dst ) );
+
x86_push(
func,
x86_make_reg( file_REG32, reg_AX) );
@@ -522,12 +548,49 @@ emit_push_gp(
x86_push(
func,
x86_make_reg( file_REG32, reg_DX) );
-}
+
+ for(i = 0, n = 0; i < 8; ++i)
+ if(xmm_mask & (1 << i))
+ ++n;
+
+ x86_sub_imm(
+ func,
+ x86_make_reg( file_REG32, reg_SP ),
+ n*16);
+
+ for(i = 0, n = 0; i < 8; ++i)
+ if(xmm_mask & (1 << i)) {
+ sse_movups(
+ func,
+ x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ),
+ make_xmm( i ) );
+ ++n;
+ }
+
+ x86_lea(
+ func,
+ ecx,
+ get_temp( TEMP_R0, 0 ) );
+
+ x86_push( func, ecx );
+ x86_mov_reg_imm( func, ecx, (unsigned long) code );
+ x86_call( func, ecx );
+ x86_pop(func, ecx );
+
+ for(i = 0, n = 0; i < 8; ++i)
+ if(xmm_mask & (1 << i)) {
+ sse_movups(
+ func,
+ make_xmm( i ),
+ x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ) );
+ ++n;
+ }
+
+ x86_add_imm(
+ func,
+ x86_make_reg( file_REG32, reg_SP ),
+ n*16);
-static void
-x86_pop_gp(
- struct x86_function *func )
-{
/* Restore GP registers in a reverse order.
*/
x86_pop(
@@ -539,39 +602,6 @@ x86_pop_gp(
x86_pop(
func,
x86_make_reg( file_REG32, reg_AX) );
-}
-
-static void
-emit_func_call_dst(
- struct x86_function *func,
- unsigned xmm_dst,
- void (PIPE_CDECL *code)() )
-{
- sse_movaps(
- func,
- get_temp( TEMP_R0, 0 ),
- make_xmm( xmm_dst ) );
-
- emit_push_gp(
- func );
-
- {
- struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
-
- x86_lea(
- func,
- ecx,
- get_temp( TEMP_R0, 0 ) );
-
- x86_push( func, ecx );
- x86_mov_reg_imm( func, ecx, (unsigned long) code );
- x86_call( func, ecx );
- x86_pop(func, ecx );
- }
-
-
- x86_pop_gp(
- func );
sse_movaps(
func,
@@ -582,6 +612,7 @@ emit_func_call_dst(
static void
emit_func_call_dst_src(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst,
unsigned xmm_src,
void (PIPE_CDECL *code)() )
@@ -593,10 +624,119 @@ emit_func_call_dst_src(
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
code );
}
+
+#if defined(PIPE_ARCH_SSE)
+
+/*
+ * Fast SSE2 implementation of special math functions.
+ */
+
+#define POLY0(x, c0) _mm_set1_ps(c0)
+#define POLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(POLY0(x, c1), x), _mm_set1_ps(c0))
+#define POLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(POLY1(x, c1, c2), x), _mm_set1_ps(c0))
+#define POLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(POLY2(x, c1, c2, c3), x), _mm_set1_ps(c0))
+#define POLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(POLY3(x, c1, c2, c3, c4), x), _mm_set1_ps(c0))
+#define POLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(POLY4(x, c1, c2, c3, c4, c5), x), _mm_set1_ps(c0))
+
+#define EXP_POLY_DEGREE 3
+#define LOG_POLY_DEGREE 5
+
+/**
+ * See http://www.devmaster.net/forums/showthread.php?p=43580
+ */
+static INLINE __m128
+exp2f4(__m128 x)
+{
+ __m128i ipart;
+ __m128 fpart, expipart, expfpart;
+
+ x = _mm_min_ps(x, _mm_set1_ps( 129.00000f));
+ x = _mm_max_ps(x, _mm_set1_ps(-126.99999f));
+
+ /* ipart = int(x - 0.5) */
+ ipart = _mm_cvtps_epi32(_mm_sub_ps(x, _mm_set1_ps(0.5f)));
+
+ /* fpart = x - ipart */
+ fpart = _mm_sub_ps(x, _mm_cvtepi32_ps(ipart));
+
+ /* expipart = (float) (1 << ipart) */
+ expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_set1_epi32(127)), 23));
+
+ /* minimax polynomial fit of 2**x, in range [-0.5, 0.5[ */
+#if EXP_POLY_DEGREE == 5
+ expfpart = POLY5(fpart, 9.9999994e-1f, 6.9315308e-1f, 2.4015361e-1f, 5.5826318e-2f, 8.9893397e-3f, 1.8775767e-3f);
+#elif EXP_POLY_DEGREE == 4
+ expfpart = POLY4(fpart, 1.0000026f, 6.9300383e-1f, 2.4144275e-1f, 5.2011464e-2f, 1.3534167e-2f);
+#elif EXP_POLY_DEGREE == 3
+ expfpart = POLY3(fpart, 9.9992520e-1f, 6.9583356e-1f, 2.2606716e-1f, 7.8024521e-2f);
+#elif EXP_POLY_DEGREE == 2
+ expfpart = POLY2(fpart, 1.0017247f, 6.5763628e-1f, 3.3718944e-1f);
+#else
+#error
+#endif
+
+ return _mm_mul_ps(expipart, expfpart);
+}
+
+
+/**
+ * See http://www.devmaster.net/forums/showthread.php?p=43580
+ */
+static INLINE __m128
+log2f4(__m128 x)
+{
+ __m128i expmask = _mm_set1_epi32(0x7f800000);
+ __m128i mantmask = _mm_set1_epi32(0x007fffff);
+ __m128 one = _mm_set1_ps(1.0f);
+
+ __m128i i = _mm_castps_si128(x);
+
+ /* exp = (float) exponent(x) */
+ __m128 exp = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(i, expmask), 23), _mm_set1_epi32(127)));
+
+ /* mant = (float) mantissa(x) */
+ __m128 mant = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(i, mantmask)), one);
+
+ __m128 logmant;
+
+ /* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[
+ * These coefficients can be generate with
+ * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
+ */
+#if LOG_POLY_DEGREE == 6
+ logmant = POLY5(mant, 3.11578814719469302614f, -3.32419399085241980044f, 2.59883907202499966007f, -1.23152682416275988241f, 0.318212422185251071475f, -0.0344359067839062357313f);
+#elif LOG_POLY_DEGREE == 5
+ logmant = POLY4(mant, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f);
+#elif LOG_POLY_DEGREE == 4
+ logmant = POLY3(mant, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f);
+#elif LOG_POLY_DEGREE == 3
+ logmant = POLY2(mant, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f);
+#else
+#error
+#endif
+
+ /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
+ logmant = _mm_mul_ps(logmant, _mm_sub_ps(mant, one));
+
+ return _mm_add_ps(logmant, exp);
+}
+
+
+static INLINE __m128
+powf4(__m128 x, __m128 y)
+{
+ return exp2f4(_mm_mul_ps(log2f4(x), y));
+}
+
+#endif /* PIPE_ARCH_SSE */
+
+
+
/**
* Low-level instruction translators.
*/
@@ -639,38 +779,42 @@ cos4f(
static void
emit_cos(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst )
{
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
cos4f );
}
static void PIPE_CDECL
+#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE)
+__attribute__((force_align_arg_pointer))
+#endif
ex24f(
float *store )
{
-#if FAST_MATH
+#if defined(PIPE_ARCH_SSE)
+ _mm_store_ps(&store[0], exp2f4( _mm_load_ps(&store[0]) ));
+#else
store[0] = util_fast_exp2( store[0] );
store[1] = util_fast_exp2( store[1] );
store[2] = util_fast_exp2( store[2] );
store[3] = util_fast_exp2( store[3] );
-#else
- store[0] = powf( 2.0f, store[0] );
- store[1] = powf( 2.0f, store[1] );
- store[2] = powf( 2.0f, store[2] );
- store[3] = powf( 2.0f, store[3] );
#endif
}
static void
emit_ex2(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst )
{
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
ex24f );
}
@@ -710,10 +854,12 @@ flr4f(
static void
emit_flr(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst )
{
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
flr4f );
}
@@ -731,31 +877,42 @@ frc4f(
static void
emit_frc(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst )
{
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
frc4f );
}
static void PIPE_CDECL
+#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE)
+__attribute__((force_align_arg_pointer))
+#endif
lg24f(
float *store )
{
+#if defined(PIPE_ARCH_SSE)
+ _mm_store_ps(&store[0], log2f4( _mm_load_ps(&store[0]) ));
+#else
store[0] = util_fast_log2( store[0] );
store[1] = util_fast_log2( store[1] );
store[2] = util_fast_log2( store[2] );
store[3] = util_fast_log2( store[3] );
+#endif
}
static void
emit_lg2(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst )
{
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
lg24f );
}
@@ -797,30 +954,32 @@ emit_neg(
}
static void PIPE_CDECL
+#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE)
+__attribute__((force_align_arg_pointer))
+#endif
pow4f(
float *store )
{
-#if FAST_MATH
+#if defined(PIPE_ARCH_SSE)
+ _mm_store_ps(&store[0], powf4( _mm_load_ps(&store[0]), _mm_load_ps(&store[4]) ));
+#else
store[0] = util_fast_pow( store[0], store[4] );
store[1] = util_fast_pow( store[1], store[5] );
store[2] = util_fast_pow( store[2], store[6] );
store[3] = util_fast_pow( store[3], store[7] );
-#else
- store[0] = powf( store[0], store[4] );
- store[1] = powf( store[1], store[5] );
- store[2] = powf( store[2], store[6] );
- store[3] = powf( store[3], store[7] );
#endif
}
static void
emit_pow(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst,
unsigned xmm_src )
{
emit_func_call_dst_src(
func,
+ xmm_save,
xmm_dst,
xmm_src,
pow4f );
@@ -855,10 +1014,12 @@ rnd4f(
static void
emit_rnd(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst )
{
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
rnd4f );
}
@@ -935,10 +1096,12 @@ sgn4f(
static void
emit_sgn(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst )
{
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
sgn4f );
}
@@ -955,10 +1118,12 @@ sin4f(
static void
emit_sin (struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst)
{
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
sin4f );
}
@@ -1378,7 +1543,7 @@ emit_instruction(
get_temp(
TGSI_EXEC_TEMP_MINUS_128_I,
TGSI_EXEC_TEMP_MINUS_128_C ) );
- emit_pow( func, 1, 2 );
+ emit_pow( func, 3, 1, 2 );
FETCH( func, *inst, 0, 0, CHAN_X );
sse_xorps(
func,
@@ -1410,6 +1575,7 @@ emit_instruction(
case TGSI_OPCODE_RSQ:
/* TGSI_OPCODE_RECIPSQRT */
FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_abs( func, 0 );
emit_rsqrt( func, 1, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( func, *inst, 1, 0, chan_index );
@@ -1424,11 +1590,11 @@ emit_instruction(
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
emit_MOV( func, 1, 0 );
- emit_flr( func, 1 );
+ emit_flr( func, 2, 1 );
/* dst.x = ex2(floor(src.x)) */
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
emit_MOV( func, 2, 1 );
- emit_ex2( func, 2 );
+ emit_ex2( func, 3, 2 );
STORE( func, *inst, 2, 0, CHAN_X );
}
/* dst.y = src.x - floor(src.x) */
@@ -1440,7 +1606,7 @@ emit_instruction(
}
/* dst.z = ex2(src.x) */
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- emit_ex2( func, 0 );
+ emit_ex2( func, 3, 0 );
STORE( func, *inst, 0, 0, CHAN_Z );
}
}
@@ -1458,21 +1624,21 @@ emit_instruction(
FETCH( func, *inst, 0, 0, CHAN_X );
emit_abs( func, 0 );
emit_MOV( func, 1, 0 );
- emit_lg2( func, 1 );
+ emit_lg2( func, 2, 1 );
/* dst.z = lg2(abs(src.x)) */
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
STORE( func, *inst, 1, 0, CHAN_Z );
}
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- emit_flr( func, 1 );
+ emit_flr( func, 2, 1 );
/* dst.x = floor(lg2(abs(src.x))) */
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
STORE( func, *inst, 1, 0, CHAN_X );
}
/* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- emit_ex2( func, 1 );
+ emit_ex2( func, 2, 1 );
emit_rcp( func, 1, 1 );
emit_mul( func, 0, 1 );
STORE( func, *inst, 0, 0, CHAN_Y );
@@ -1647,7 +1813,18 @@ emit_instruction(
case TGSI_OPCODE_DOT2ADD:
/* TGSI_OPCODE_DP2A */
- return 0;
+ FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */
+ FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */
+ emit_mul( func, 0, 1 ); /* xmm0 = xmm0 * xmm1 */
+ FETCH( func, *inst, 1, 0, CHAN_Y ); /* xmm1 = src[0].y */
+ FETCH( func, *inst, 2, 1, CHAN_Y ); /* xmm2 = src[1].y */
+ emit_mul( func, 1, 2 ); /* xmm1 = xmm1 * xmm2 */
+ emit_add( func, 0, 1 ); /* xmm0 = xmm0 + xmm1 */
+ FETCH( func, *inst, 1, 2, CHAN_X ); /* xmm1 = src[2].x */
+ emit_add( func, 0, 1 ); /* xmm0 = xmm0 + xmm1 */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index ); /* dest[ch] = xmm0 */
+ }
break;
case TGSI_OPCODE_INDEX:
@@ -1662,7 +1839,7 @@ emit_instruction(
/* TGSI_OPCODE_FRC */
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
- emit_frc( func, 0 );
+ emit_frc( func, 0, 0 );
STORE( func, *inst, 0, 0, chan_index );
}
break;
@@ -1675,7 +1852,7 @@ emit_instruction(
/* TGSI_OPCODE_FLR */
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
- emit_flr( func, 0 );
+ emit_flr( func, 0, 0 );
STORE( func, *inst, 0, 0, chan_index );
}
break;
@@ -1683,7 +1860,7 @@ emit_instruction(
case TGSI_OPCODE_ROUND:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
- emit_rnd( func, 0 );
+ emit_rnd( func, 0, 0 );
STORE( func, *inst, 0, 0, chan_index );
}
break;
@@ -1691,7 +1868,7 @@ emit_instruction(
case TGSI_OPCODE_EXPBASE2:
/* TGSI_OPCODE_EX2 */
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_ex2( func, 0 );
+ emit_ex2( func, 0, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( func, *inst, 0, 0, chan_index );
}
@@ -1700,7 +1877,7 @@ emit_instruction(
case TGSI_OPCODE_LOGBASE2:
/* TGSI_OPCODE_LG2 */
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_lg2( func, 0 );
+ emit_lg2( func, 0, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( func, *inst, 0, 0, chan_index );
}
@@ -1710,7 +1887,7 @@ emit_instruction(
/* TGSI_OPCODE_POW */
FETCH( func, *inst, 0, 0, CHAN_X );
FETCH( func, *inst, 1, 1, CHAN_X );
- emit_pow( func, 0, 1 );
+ emit_pow( func, 0, 0, 1 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( func, *inst, 0, 0, chan_index );
}
@@ -1801,7 +1978,7 @@ emit_instruction(
case TGSI_OPCODE_COS:
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_cos( func, 0 );
+ emit_cos( func, 0, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( func, *inst, 0, 0, chan_index );
}
@@ -1860,7 +2037,7 @@ emit_instruction(
case TGSI_OPCODE_SIN:
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_sin( func, 0 );
+ emit_sin( func, 0, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( func, *inst, 0, 0, chan_index );
}
@@ -1927,7 +2104,7 @@ emit_instruction(
case TGSI_OPCODE_ARR:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
- emit_rnd( func, 0 );
+ emit_rnd( func, 0, 0 );
emit_f2it( func, 0 );
STORE( func, *inst, 0, 0, chan_index );
}
@@ -1952,7 +2129,7 @@ emit_instruction(
/* TGSI_OPCODE_SGN */
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
- emit_sgn( func, 0 );
+ emit_sgn( func, 0, 0 );
STORE( func, *inst, 0, 0, chan_index );
}
break;
@@ -1964,12 +2141,12 @@ emit_instruction(
case TGSI_OPCODE_SCS:
IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_cos( func, 0 );
+ emit_cos( func, 0, 0 );
STORE( func, *inst, 0, 0, CHAN_X );
}
IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_sin( func, 0 );
+ emit_sin( func, 0, 0 );
STORE( func, *inst, 0, 0, CHAN_Y );
}
IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
@@ -1995,7 +2172,39 @@ emit_instruction(
break;
case TGSI_OPCODE_NRM:
- return 0;
+ /* fall-through */
+ case TGSI_OPCODE_NRM4:
+ /* 3 or 4-component normalization */
+ {
+ uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
+ /* note: cannot use xmm regs 2/3 here (see emit_rsqrt() above) */
+ FETCH( func, *inst, 4, 0, CHAN_X ); /* xmm4 = src[0].x */
+ FETCH( func, *inst, 5, 0, CHAN_Y ); /* xmm5 = src[0].y */
+ FETCH( func, *inst, 6, 0, CHAN_Z ); /* xmm6 = src[0].z */
+ if (dims == 4) {
+ FETCH( func, *inst, 7, 0, CHAN_W ); /* xmm7 = src[0].w */
+ }
+ emit_MOV( func, 0, 4 ); /* xmm0 = xmm3 */
+ emit_mul( func, 0, 4 ); /* xmm0 *= xmm3 */
+ emit_MOV( func, 1, 5 ); /* xmm1 = xmm4 */
+ emit_mul( func, 1, 5 ); /* xmm1 *= xmm4 */
+ emit_add( func, 0, 1 ); /* xmm0 += xmm1 */
+ emit_MOV( func, 1, 6 ); /* xmm1 = xmm5 */
+ emit_mul( func, 1, 6 ); /* xmm1 *= xmm5 */
+ emit_add( func, 0, 1 ); /* xmm0 += xmm1 */
+ if (dims == 4) {
+ emit_MOV( func, 1, 7 ); /* xmm1 = xmm7 */
+ emit_mul( func, 1, 7 ); /* xmm1 *= xmm7 */
+ emit_add( func, 0, 0 ); /* xmm0 += xmm1 */
+ }
+ emit_rsqrt( func, 1, 0 ); /* xmm1 = 1/sqrt(xmm0) */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ if (chan_index < dims) {
+ emit_mul( func, 4+chan_index, 1); /* xmm[4+ch] *= xmm1 */
+ STORE( func, *inst, 4+chan_index, 0, chan_index );
+ }
+ }
+ }
break;
case TGSI_OPCODE_DIV:
@@ -2003,7 +2212,16 @@ emit_instruction(
break;
case TGSI_OPCODE_DP2:
- return 0;
+ FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */
+ FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */
+ emit_mul( func, 0, 1 ); /* xmm0 = xmm0 * xmm1 */
+ FETCH( func, *inst, 1, 0, CHAN_Y ); /* xmm1 = src[0].y */
+ FETCH( func, *inst, 2, 1, CHAN_Y ); /* xmm2 = src[1].y */
+ emit_mul( func, 1, 2 ); /* xmm1 = xmm1 * xmm2 */
+ emit_add( func, 0, 1 ); /* xmm0 = xmm0 + xmm1 */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index ); /* dest[ch] = xmm0 */
+ }
break;
case TGSI_OPCODE_TXL:
@@ -2454,7 +2672,7 @@ tgsi_emit_sse2(
case TGSI_TOKEN_TYPE_IMMEDIATE:
/* simply copy the immediate values into the next immediates[] slot */
{
- const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
+ const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
uint i;
assert(size <= 4);
assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 9454563361..58fe07c11d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi_text.h"
#include "tgsi_build.h"
#include "tgsi_info.h"
@@ -1023,7 +1023,7 @@ static boolean parse_immediate( struct translate_ctx *ctx )
ctx->cur++;
imm = tgsi_default_full_immediate();
- imm.Immediate.Size += 4;
+ imm.Immediate.NrTokens += 4;
imm.Immediate.DataType = TGSI_IMM_FLOAT32;
imm.u.Pointer = values;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c
index ea87da31e5..062c1be938 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c
@@ -31,7 +31,7 @@
* Authors: Brian Paul
*/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi_transform.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 50101a9bb0..71f8a6ca40 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi_parse.h"
#include "tgsi_build.h"
diff --git a/src/gallium/auxiliary/translate/Makefile b/src/gallium/auxiliary/translate/Makefile
index ad2a5b705e..3c82f8ae03 100644
--- a/src/gallium/auxiliary/translate/Makefile
+++ b/src/gallium/auxiliary/translate/Makefile
@@ -10,6 +10,3 @@ C_SOURCES = \
translate_cache.c
include ../../Makefile.template
-
-symlinks:
-
diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile
index 5c227c1eb5..160df8dfa7 100644
--- a/src/gallium/auxiliary/util/Makefile
+++ b/src/gallium/auxiliary/util/Makefile
@@ -4,7 +4,7 @@ include $(TOP)/configs/current
LIBNAME = util
C_SOURCES = \
- p_debug.c \
+ u_debug.c \
u_blit.c \
u_cache.c \
u_draw_quad.c \
@@ -12,6 +12,8 @@ C_SOURCES = \
u_handle_table.c \
u_hash_table.c \
u_hash.c \
+ u_keymap.c \
+ u_linear.c \
u_math.c \
u_mm.c \
u_rect.c \
@@ -21,9 +23,7 @@ C_SOURCES = \
u_stream_wd.c \
u_tile.c \
u_time.c \
- u_timed_winsys.c
+ u_timed_winsys.c \
+ u_simple_screen.c
include ../../Makefile.template
-
-symlinks:
-
diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript
index 1ef06631bf..5d336ea082 100644
--- a/src/gallium/auxiliary/util/SConscript
+++ b/src/gallium/auxiliary/util/SConscript
@@ -3,25 +3,29 @@ Import('*')
util = env.ConvenienceLibrary(
target = 'util',
source = [
- 'p_debug.c',
- 'p_debug_mem.c',
- 'p_debug_prof.c',
'u_blit.c',
'u_cache.c',
+ 'u_debug.c',
+ 'u_debug_memory.c',
+ 'u_debug_profile.c',
+ 'u_debug_stack.c',
'u_draw_quad.c',
'u_gen_mipmap.c',
'u_handle_table.c',
'u_hash.c',
'u_hash_table.c',
+ 'u_keymap.c',
'u_math.c',
'u_mm.c',
'u_rect.c',
'u_simple_shaders.c',
'u_snprintf.c',
- 'u_stream_stdc.c',
- 'u_stream_wd.c',
+ 'u_stream_stdc.c',
+ 'u_stream_wd.c',
'u_tile.c',
'u_time.c',
+ 'u_timed_winsys.c',
+ 'u_simple_screen.c',
])
auxiliaries.insert(0, util)
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 2cef3338b5..efc3a874cc 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -34,10 +34,9 @@
#include "pipe/p_context.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_blit.h"
@@ -415,7 +414,7 @@ util_blit_pixels(struct blit_state *ctx,
memset(&fb, 0, sizeof(fb));
fb.width = dst->width;
fb.height = dst->height;
- fb.num_cbufs = 1;
+ fb.nr_cbufs = 1;
fb.cbufs[0] = dst;
cso_set_framebuffer(ctx->cso, &fb);
@@ -526,7 +525,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
memset(&fb, 0, sizeof(fb));
fb.width = dst->width;
fb.height = dst->height;
- fb.num_cbufs = 1;
+ fb.nr_cbufs = 1;
fb.cbufs[0] = dst;
cso_set_framebuffer(ctx->cso, &fb);
diff --git a/src/gallium/auxiliary/util/u_cache.c b/src/gallium/auxiliary/util/u_cache.c
index 0a1a64259f..41cd38171f 100644
--- a/src/gallium/auxiliary/util/u_cache.c
+++ b/src/gallium/auxiliary/util/u_cache.c
@@ -36,7 +36,7 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_memory.h"
diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 125f3daf00..e05c419b2f 100644
--- a/src/gallium/auxiliary/util/p_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -1,6 +1,7 @@
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (c) 2008 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -36,12 +37,20 @@
#include <windows.h>
#include <winddi.h>
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE)
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <windows.h>
+#include <types.h>
+
#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
#endif
#include <windows.h>
+#include <stdio.h>
#else
@@ -51,7 +60,7 @@
#endif
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_format.h"
#include "pipe/p_state.h"
#include "pipe/p_inlines.h"
@@ -98,7 +107,41 @@ void _debug_vprintf(const char *format, va_list ap)
OutputDebugStringA(buf);
buf[0] = '\0';
}
-#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) || defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)
+
+ if(GetConsoleWindow() && !IsDebuggerPresent()) {
+ vfprintf(stderr, format, ap);
+ fflush(stderr);
+ }
+
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE)
+ wchar_t *wide_format;
+ long wide_str_len;
+ char buf[512];
+ int ret;
+#if (_WIN32_WCE < 600)
+ ret = vsprintf(buf, format, ap);
+ if(ret < 0){
+ sprintf(buf, "Cant handle debug print!");
+ ret = 25;
+ }
+#else
+ ret = vsprintf_s(buf, 512, format, ap);
+ if(ret < 0){
+ sprintf_s(buf, 512, "Cant handle debug print!");
+ ret = 25;
+ }
+#endif
+ buf[ret] = '\0';
+ /* Format is ascii - needs to be converted to wchar_t for printing */
+ wide_str_len = MultiByteToWideChar(CP_ACP, 0, (const char *) buf, -1, NULL, 0);
+ wide_format = (wchar_t *) malloc((wide_str_len+1) * sizeof(wchar_t));
+ if (wide_format) {
+ MultiByteToWideChar(CP_ACP, 0, (const char *) buf, -1,
+ wide_format, wide_str_len);
+ NKDbgPrintfW(wide_format, wide_format);
+ free(wide_format);
+ }
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)
/* TODO */
#else /* !PIPE_SUBSYSTEM_WINDOWS */
#ifdef DEBUG
@@ -308,6 +351,13 @@ debug_get_flags_option(const char *name,
str = _debug_get_option(name);
if(!str)
result = dfault;
+ else if (!util_strcmp(str, "help")) {
+ result = dfault;
+ while (flags->name) {
+ debug_printf("%s: help for %s: %s [0x%lx]\n", __FUNCTION__, name, flags->name, flags->value);
+ flags++;
+ }
+ }
else {
result = 0;
while( flags->name ) {
@@ -317,7 +367,12 @@ debug_get_flags_option(const char *name,
}
}
- debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result);
+ if (str) {
+ debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str);
+ }
+ else {
+ debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result);
+ }
return result;
}
@@ -358,6 +413,32 @@ debug_dump_enum(const struct debug_named_value *names,
const char *
+debug_dump_enum_noprefix(const struct debug_named_value *names,
+ const char *prefix,
+ unsigned long value)
+{
+ static char rest[64];
+
+ while(names->name) {
+ if(names->value == value) {
+ const char *name = names->name;
+ while (*name == *prefix) {
+ name++;
+ prefix++;
+ }
+ return name;
+ }
+ ++names;
+ }
+
+
+
+ util_snprintf(rest, sizeof(rest), "0x%08lx", value);
+ return rest;
+}
+
+
+const char *
debug_dump_flags(const struct debug_named_value *names,
unsigned long value)
{
@@ -486,16 +567,24 @@ static const struct debug_named_value pipe_format_names[] = {
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SSCALED),
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SSCALED),
DEBUG_NAMED_VALUE(PIPE_FORMAT_L8_SRGB),
- DEBUG_NAMED_VALUE(PIPE_FORMAT_A8_L8_SRGB),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_A8L8_SRGB),
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_SRGB),
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SRGB),
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SRGB),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_A8R8G8B8_SRGB),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_X8R8G8B8_SRGB),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_B8G8R8A8_SRGB),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_B8G8R8X8_SRGB),
DEBUG_NAMED_VALUE(PIPE_FORMAT_X8UB8UG8SR8S_NORM),
DEBUG_NAMED_VALUE(PIPE_FORMAT_B6UG5SR5S_NORM),
DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGB),
DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGBA),
DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT3_RGBA),
DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT5_RGBA),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_SRGB),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_SRGBA),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT3_SRGBA),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT5_SRGBA),
#endif
DEBUG_NAMED_VALUE_END
};
@@ -561,34 +650,37 @@ void debug_dump_image(const char *prefix,
void debug_dump_surface(const char *prefix,
struct pipe_surface *surface)
{
- unsigned surface_usage;
+ struct pipe_texture *texture;
+ struct pipe_screen *screen;
+ struct pipe_transfer *transfer;
void *data;
if (!surface)
- goto error1;
+ return;
+
+ texture = surface->texture;
+ screen = texture->screen;
- /* XXX: force mappable surface */
- surface_usage = surface->usage;
- surface->usage |= PIPE_BUFFER_USAGE_CPU_READ;
+ transfer = screen->get_tex_transfer(screen, texture, surface->face,
+ surface->level, surface->zslice,
+ PIPE_TRANSFER_READ, 0, 0, surface->width,
+ surface->height);
- data = pipe_surface_map(surface,
- PIPE_BUFFER_USAGE_CPU_READ);
+ data = screen->transfer_map(screen, transfer);
if(!data)
- goto error2;
+ goto error;
debug_dump_image(prefix,
- surface->format,
- surface->block.size,
- surface->nblocksx,
- surface->nblocksy,
- surface->stride,
+ transfer->format,
+ transfer->block.size,
+ transfer->nblocksx,
+ transfer->nblocksy,
+ transfer->stride,
data);
- pipe_surface_unmap(surface);
-error2:
- surface->usage = surface_usage;
-error1:
- ;
+ screen->transfer_unmap(screen, transfer);
+error:
+ screen->tex_transfer_release(screen, &transfer);
}
@@ -627,8 +719,11 @@ void
debug_dump_surface_bmp(const char *filename,
struct pipe_surface *surface)
{
+#ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT
+ struct pipe_texture *texture;
+ struct pipe_screen *screen;
struct util_stream *stream;
- unsigned surface_usage;
+ struct pipe_transfer *transfer;
struct bmp_file_header bmfh;
struct bmp_info_header bmih;
float *rgba;
@@ -665,14 +760,18 @@ debug_dump_surface_bmp(const char *filename,
util_stream_write(stream, &bmfh, 14);
util_stream_write(stream, &bmih, 40);
+
+ texture = surface->texture;
+ screen = texture->screen;
- /* XXX: force mappable surface */
- surface_usage = surface->usage;
- surface->usage |= PIPE_BUFFER_USAGE_CPU_READ;
+ transfer = screen->get_tex_transfer(screen, texture, surface->face,
+ surface->level, surface->zslice,
+ PIPE_TRANSFER_READ, 0, 0, surface->width,
+ surface->height);
y = surface->height;
while(y--) {
- pipe_get_tile_rgba(surface,
+ pipe_get_tile_rgba(transfer,
0, y, surface->width, 1,
rgba);
for(x = 0; x < surface->width; ++x)
@@ -685,14 +784,15 @@ debug_dump_surface_bmp(const char *filename,
util_stream_write(stream, &pixel, 4);
}
}
-
- surface->usage = surface_usage;
+ screen->tex_transfer_release(screen, &transfer);
+
util_stream_close(stream);
error2:
FREE(rgba);
error1:
;
+#endif
}
#endif
diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/auxiliary/util/u_debug.h
index 3b00fb9aa8..b298b9b66d 100644
--- a/src/gallium/include/pipe/p_debug.h
+++ b/src/gallium/auxiliary/util/u_debug.h
@@ -35,13 +35,13 @@
* @author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
-#ifndef P_DEBUG_H_
-#define P_DEBUG_H_
+#ifndef U_DEBUG_H_
+#define U_DEBUG_H_
#include <stdarg.h>
-#include "p_compiler.h"
+#include "pipe/p_compiler.h"
#ifdef __cplusplus
@@ -261,6 +261,11 @@ const char *
debug_dump_enum(const struct debug_named_value *names,
unsigned long value);
+const char *
+debug_dump_enum_noprefix(const struct debug_named_value *names,
+ const char *prefix,
+ unsigned long value);
+
/**
* Convert binary flags value to a string.
@@ -353,4 +358,4 @@ void debug_dump_surface_bmp(const char *filename,
}
#endif
-#endif /* P_DEBUG_H_ */
+#endif /* U_DEBUG_H_ */
diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/u_debug_memory.c
index 250fd60f63..758541c282 100644
--- a/src/gallium/auxiliary/util/p_debug_mem.c
+++ b/src/gallium/auxiliary/util/u_debug_memory.c
@@ -44,11 +44,13 @@
#include <stdlib.h>
#endif
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
+#include "util/u_debug_stack.h"
#include "util/u_double_list.h"
#define DEBUG_MEMORY_MAGIC 0x6e34090aU
+#define DEBUG_MEMORY_STACK 0 /* XXX: disabled until we have symbol lookup */
#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && !defined(WINCE)
@@ -71,7 +73,9 @@ struct debug_memory_header
const char *file;
unsigned line;
const char *function;
+ struct debug_stack_frame backtrace[DEBUG_MEMORY_STACK];
size_t size;
+
unsigned magic;
};
@@ -136,6 +140,8 @@ debug_malloc(const char *file, unsigned line, const char *function,
hdr->size = size;
hdr->magic = DEBUG_MEMORY_MAGIC;
+ debug_backtrace_capture(hdr->backtrace, 0, DEBUG_MEMORY_STACK);
+
ftr = footer_from_header(hdr);
ftr->magic = DEBUG_MEMORY_MAGIC;
@@ -290,6 +296,7 @@ debug_memory_end(unsigned long start_no)
debug_printf("%s:%u:%s: %u bytes at %p not freed\n",
hdr->file, hdr->line, hdr->function,
hdr->size, ptr);
+ debug_backtrace_dump(hdr->backtrace, DEBUG_MEMORY_STACK);
total_size += hdr->size;
}
diff --git a/src/gallium/auxiliary/util/p_debug_prof.c b/src/gallium/auxiliary/util/u_debug_profile.c
index 5f9772ef91..6d8b244c3a 100644
--- a/src/gallium/auxiliary/util/p_debug_prof.c
+++ b/src/gallium/auxiliary/util/u_debug_profile.c
@@ -42,7 +42,7 @@
#include <windows.h>
#include <winddi.h>
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_string.h"
diff --git a/src/gallium/auxiliary/util/u_debug_stack.c b/src/gallium/auxiliary/util/u_debug_stack.c
new file mode 100644
index 0000000000..76068a6509
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_stack.c
@@ -0,0 +1,97 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Stack backtracing.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "u_debug.h"
+#include "u_debug_stack.h"
+
+
+void
+debug_backtrace_capture(struct debug_stack_frame *backtrace,
+ unsigned start_frame,
+ unsigned nr_frames)
+{
+ const void **frame_pointer = NULL;
+ unsigned i = 0;
+
+ if(!nr_frames)
+ return;
+
+#if defined(PIPE_CC_GCC)
+ frame_pointer = ((const void **)__builtin_frame_address(1));
+#elif defined(PIPE_CC_MSVC)
+ __asm {
+ mov frame_pointer, ebp
+ }
+ frame_pointer = (const void **)frame_pointer[0];
+#else
+ frame_pointer = NULL;
+#endif
+
+
+#ifdef PIPE_ARCH_X86
+ while(nr_frames) {
+ if(!frame_pointer)
+ break;
+
+ if(start_frame)
+ --start_frame;
+ else {
+ backtrace[i++].function = frame_pointer[1];
+ --nr_frames;
+ }
+
+ frame_pointer = (const void **)frame_pointer[0];
+ }
+#endif
+
+ while(nr_frames) {
+ backtrace[i++].function = NULL;
+ --nr_frames;
+ }
+}
+
+
+void
+debug_backtrace_dump(const struct debug_stack_frame *backtrace,
+ unsigned nr_frames)
+{
+ unsigned i;
+
+ for(i = 0; i < nr_frames; ++i) {
+ if(!backtrace[i].function)
+ break;
+ debug_printf("\t%p\n", backtrace[i].function);
+ }
+}
+
diff --git a/src/gallium/auxiliary/util/u_debug_stack.h b/src/gallium/auxiliary/util/u_debug_stack.h
new file mode 100644
index 0000000000..f50f04e0f7
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_stack.h
@@ -0,0 +1,65 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_DEBUG_STACK_H_
+#define U_DEBUG_STACK_H_
+
+
+/**
+ * @file
+ * Stack backtracing.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+struct debug_stack_frame
+{
+ const void *function;
+};
+
+
+void
+debug_backtrace_capture(struct debug_stack_frame *backtrace,
+ unsigned start_frame,
+ unsigned nr_frames);
+
+void
+debug_backtrace_dump(const struct debug_stack_frame *backtrace,
+ unsigned nr_frames);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_DEBUG_STACK_H_ */
diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c
index d7bb74b87b..f282f3d289 100644
--- a/src/gallium/auxiliary/util/u_draw_quad.c
+++ b/src/gallium/auxiliary/util/u_draw_quad.c
@@ -29,7 +29,6 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
#include "util/u_draw_quad.h"
@@ -53,7 +52,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
/* tell pipe about the vertex buffer */
vbuffer.buffer = vbuf;
- vbuffer.pitch = num_attribs * 4 * sizeof(float); /* vertex size */
+ vbuffer.stride = num_attribs * 4 * sizeof(float); /* vertex size */
vbuffer.buffer_offset = offset;
pipe->set_vertex_buffers(pipe, 1, &vbuffer);
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index b0de5968e9..90483fcb21 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -2,6 +2,7 @@
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
+ * Copyright 2008 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -34,10 +35,9 @@
#include "pipe/p_context.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_memory.h"
@@ -93,13 +93,82 @@ enum dtype
typedef ushort half_float;
-#if 0
-extern half_float
-float_to_half(float f);
+static half_float
+float_to_half(float f)
+{
+ /* XXX fix this */
+ return 0;
+}
+
+static float
+half_to_float(half_float h)
+{
+ /* XXX fix this */
+ return 0.0f;
+}
+
-extern float
-half_to_float(half_float h);
-#endif
+
+
+/**
+ * \name Support macros for do_row and do_row_3d
+ *
+ * The macro madness is here for two reasons. First, it compacts the code
+ * slightly. Second, it makes it much easier to adjust the specifics of the
+ * filter to tune the rounding characteristics.
+ */
+/*@{*/
+#define DECLARE_ROW_POINTERS(t, e) \
+ const t(*rowA)[e] = (const t(*)[e]) srcRowA; \
+ const t(*rowB)[e] = (const t(*)[e]) srcRowB; \
+ const t(*rowC)[e] = (const t(*)[e]) srcRowC; \
+ const t(*rowD)[e] = (const t(*)[e]) srcRowD; \
+ t(*dst)[e] = (t(*)[e]) dstRow
+
+#define DECLARE_ROW_POINTERS0(t) \
+ const t *rowA = (const t *) srcRowA; \
+ const t *rowB = (const t *) srcRowB; \
+ const t *rowC = (const t *) srcRowC; \
+ const t *rowD = (const t *) srcRowD; \
+ t *dst = (t *) dstRow
+
+#define FILTER_SUM_3D(Aj, Ak, Bj, Bk, Cj, Ck, Dj, Dk) \
+ ((unsigned) Aj + (unsigned) Ak \
+ + (unsigned) Bj + (unsigned) Bk \
+ + (unsigned) Cj + (unsigned) Ck \
+ + (unsigned) Dj + (unsigned) Dk \
+ + 4) >> 3
+
+#define FILTER_3D(e) \
+ do { \
+ dst[i][e] = FILTER_SUM_3D(rowA[j][e], rowA[k][e], \
+ rowB[j][e], rowB[k][e], \
+ rowC[j][e], rowC[k][e], \
+ rowD[j][e], rowD[k][e]); \
+ } while(0)
+
+#define FILTER_F_3D(e) \
+ do { \
+ dst[i][e] = (rowA[j][e] + rowA[k][e] \
+ + rowB[j][e] + rowB[k][e] \
+ + rowC[j][e] + rowC[k][e] \
+ + rowD[j][e] + rowD[k][e]) * 0.125F; \
+ } while(0)
+
+#define FILTER_HF_3D(e) \
+ do { \
+ const float aj = half_to_float(rowA[j][e]); \
+ const float ak = half_to_float(rowA[k][e]); \
+ const float bj = half_to_float(rowB[j][e]); \
+ const float bk = half_to_float(rowB[k][e]); \
+ const float cj = half_to_float(rowC[j][e]); \
+ const float ck = half_to_float(rowC[k][e]); \
+ const float dj = half_to_float(rowD[j][e]); \
+ const float dk = half_to_float(rowD[k][e]); \
+ dst[i][e] = float_to_half((aj + ak + bj + bk + cj + ck + dj + dk) \
+ * 0.125F); \
+ } while(0)
+/*@}*/
/**
@@ -471,6 +540,385 @@ do_row(enum dtype datatype, uint comps, int srcWidth,
}
+/**
+ * Average together four rows of a source image to produce a single new
+ * row in the dest image. It's legal for the two source rows to point
+ * to the same data. The source width must be equal to either the
+ * dest width or two times the dest width.
+ *
+ * \param datatype GL pixel type \c GL_UNSIGNED_BYTE, \c GL_UNSIGNED_SHORT,
+ * \c GL_FLOAT, etc.
+ * \param comps number of components per pixel (1..4)
+ * \param srcWidth Width of a row in the source data
+ * \param srcRowA Pointer to one of the rows of source data
+ * \param srcRowB Pointer to one of the rows of source data
+ * \param srcRowC Pointer to one of the rows of source data
+ * \param srcRowD Pointer to one of the rows of source data
+ * \param dstWidth Width of a row in the destination data
+ * \param srcRowA Pointer to the row of destination data
+ */
+static void
+do_row_3D(enum dtype datatype, uint comps, int srcWidth,
+ const void *srcRowA, const void *srcRowB,
+ const void *srcRowC, const void *srcRowD,
+ int dstWidth, void *dstRow)
+{
+ const uint k0 = (srcWidth == dstWidth) ? 0 : 1;
+ const uint colStride = (srcWidth == dstWidth) ? 1 : 2;
+ uint i, j, k;
+
+ assert(comps >= 1);
+ assert(comps <= 4);
+
+ if ((datatype == UBYTE) && (comps == 4)) {
+ DECLARE_ROW_POINTERS(ubyte, 4);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_3D(0);
+ FILTER_3D(1);
+ FILTER_3D(2);
+ FILTER_3D(3);
+ }
+ }
+ else if ((datatype == UBYTE) && (comps == 3)) {
+ DECLARE_ROW_POINTERS(ubyte, 3);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_3D(0);
+ FILTER_3D(1);
+ FILTER_3D(2);
+ }
+ }
+ else if ((datatype == UBYTE) && (comps == 2)) {
+ DECLARE_ROW_POINTERS(ubyte, 2);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_3D(0);
+ FILTER_3D(1);
+ }
+ }
+ else if ((datatype == UBYTE) && (comps == 1)) {
+ DECLARE_ROW_POINTERS(ubyte, 1);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_3D(0);
+ }
+ }
+ else if ((datatype == USHORT) && (comps == 4)) {
+ DECLARE_ROW_POINTERS(ushort, 4);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_3D(0);
+ FILTER_3D(1);
+ FILTER_3D(2);
+ FILTER_3D(3);
+ }
+ }
+ else if ((datatype == USHORT) && (comps == 3)) {
+ DECLARE_ROW_POINTERS(ushort, 3);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_3D(0);
+ FILTER_3D(1);
+ FILTER_3D(2);
+ }
+ }
+ else if ((datatype == USHORT) && (comps == 2)) {
+ DECLARE_ROW_POINTERS(ushort, 2);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_3D(0);
+ FILTER_3D(1);
+ }
+ }
+ else if ((datatype == USHORT) && (comps == 1)) {
+ DECLARE_ROW_POINTERS(ushort, 1);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_3D(0);
+ }
+ }
+ else if ((datatype == FLOAT) && (comps == 4)) {
+ DECLARE_ROW_POINTERS(float, 4);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_F_3D(0);
+ FILTER_F_3D(1);
+ FILTER_F_3D(2);
+ FILTER_F_3D(3);
+ }
+ }
+ else if ((datatype == FLOAT) && (comps == 3)) {
+ DECLARE_ROW_POINTERS(float, 3);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_F_3D(0);
+ FILTER_F_3D(1);
+ FILTER_F_3D(2);
+ }
+ }
+ else if ((datatype == FLOAT) && (comps == 2)) {
+ DECLARE_ROW_POINTERS(float, 2);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_F_3D(0);
+ FILTER_F_3D(1);
+ }
+ }
+ else if ((datatype == FLOAT) && (comps == 1)) {
+ DECLARE_ROW_POINTERS(float, 1);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_F_3D(0);
+ }
+ }
+ else if ((datatype == HALF_FLOAT) && (comps == 4)) {
+ DECLARE_ROW_POINTERS(half_float, 4);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_HF_3D(0);
+ FILTER_HF_3D(1);
+ FILTER_HF_3D(2);
+ FILTER_HF_3D(3);
+ }
+ }
+ else if ((datatype == HALF_FLOAT) && (comps == 3)) {
+ DECLARE_ROW_POINTERS(half_float, 4);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_HF_3D(0);
+ FILTER_HF_3D(1);
+ FILTER_HF_3D(2);
+ }
+ }
+ else if ((datatype == HALF_FLOAT) && (comps == 2)) {
+ DECLARE_ROW_POINTERS(half_float, 4);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_HF_3D(0);
+ FILTER_HF_3D(1);
+ }
+ }
+ else if ((datatype == HALF_FLOAT) && (comps == 1)) {
+ DECLARE_ROW_POINTERS(half_float, 4);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_HF_3D(0);
+ }
+ }
+ else if ((datatype == UINT) && (comps == 1)) {
+ const uint *rowA = (const uint *) srcRowA;
+ const uint *rowB = (const uint *) srcRowB;
+ const uint *rowC = (const uint *) srcRowC;
+ const uint *rowD = (const uint *) srcRowD;
+ float *dst = (float *) dstRow;
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ const uint64_t tmp = (((uint64_t) rowA[j] + (uint64_t) rowA[k])
+ + ((uint64_t) rowB[j] + (uint64_t) rowB[k])
+ + ((uint64_t) rowC[j] + (uint64_t) rowC[k])
+ + ((uint64_t) rowD[j] + (uint64_t) rowD[k]));
+ dst[i] = (float)((double) tmp * 0.125);
+ }
+ }
+ else if ((datatype == USHORT_5_6_5) && (comps == 3)) {
+ DECLARE_ROW_POINTERS0(ushort);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ const int rowAr0 = rowA[j] & 0x1f;
+ const int rowAr1 = rowA[k] & 0x1f;
+ const int rowBr0 = rowB[j] & 0x1f;
+ const int rowBr1 = rowB[k] & 0x1f;
+ const int rowCr0 = rowC[j] & 0x1f;
+ const int rowCr1 = rowC[k] & 0x1f;
+ const int rowDr0 = rowD[j] & 0x1f;
+ const int rowDr1 = rowD[k] & 0x1f;
+ const int rowAg0 = (rowA[j] >> 5) & 0x3f;
+ const int rowAg1 = (rowA[k] >> 5) & 0x3f;
+ const int rowBg0 = (rowB[j] >> 5) & 0x3f;
+ const int rowBg1 = (rowB[k] >> 5) & 0x3f;
+ const int rowCg0 = (rowC[j] >> 5) & 0x3f;
+ const int rowCg1 = (rowC[k] >> 5) & 0x3f;
+ const int rowDg0 = (rowD[j] >> 5) & 0x3f;
+ const int rowDg1 = (rowD[k] >> 5) & 0x3f;
+ const int rowAb0 = (rowA[j] >> 11) & 0x1f;
+ const int rowAb1 = (rowA[k] >> 11) & 0x1f;
+ const int rowBb0 = (rowB[j] >> 11) & 0x1f;
+ const int rowBb1 = (rowB[k] >> 11) & 0x1f;
+ const int rowCb0 = (rowC[j] >> 11) & 0x1f;
+ const int rowCb1 = (rowC[k] >> 11) & 0x1f;
+ const int rowDb0 = (rowD[j] >> 11) & 0x1f;
+ const int rowDb1 = (rowD[k] >> 11) & 0x1f;
+ const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
+ rowCr0, rowCr1, rowDr0, rowDr1);
+ const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
+ rowCg0, rowCg1, rowDg0, rowDg1);
+ const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
+ rowCb0, rowCb1, rowDb0, rowDb1);
+ dst[i] = (b << 11) | (g << 5) | r;
+ }
+ }
+ else if ((datatype == USHORT_4_4_4_4) && (comps == 4)) {
+ DECLARE_ROW_POINTERS0(ushort);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ const int rowAr0 = rowA[j] & 0xf;
+ const int rowAr1 = rowA[k] & 0xf;
+ const int rowBr0 = rowB[j] & 0xf;
+ const int rowBr1 = rowB[k] & 0xf;
+ const int rowCr0 = rowC[j] & 0xf;
+ const int rowCr1 = rowC[k] & 0xf;
+ const int rowDr0 = rowD[j] & 0xf;
+ const int rowDr1 = rowD[k] & 0xf;
+ const int rowAg0 = (rowA[j] >> 4) & 0xf;
+ const int rowAg1 = (rowA[k] >> 4) & 0xf;
+ const int rowBg0 = (rowB[j] >> 4) & 0xf;
+ const int rowBg1 = (rowB[k] >> 4) & 0xf;
+ const int rowCg0 = (rowC[j] >> 4) & 0xf;
+ const int rowCg1 = (rowC[k] >> 4) & 0xf;
+ const int rowDg0 = (rowD[j] >> 4) & 0xf;
+ const int rowDg1 = (rowD[k] >> 4) & 0xf;
+ const int rowAb0 = (rowA[j] >> 8) & 0xf;
+ const int rowAb1 = (rowA[k] >> 8) & 0xf;
+ const int rowBb0 = (rowB[j] >> 8) & 0xf;
+ const int rowBb1 = (rowB[k] >> 8) & 0xf;
+ const int rowCb0 = (rowC[j] >> 8) & 0xf;
+ const int rowCb1 = (rowC[k] >> 8) & 0xf;
+ const int rowDb0 = (rowD[j] >> 8) & 0xf;
+ const int rowDb1 = (rowD[k] >> 8) & 0xf;
+ const int rowAa0 = (rowA[j] >> 12) & 0xf;
+ const int rowAa1 = (rowA[k] >> 12) & 0xf;
+ const int rowBa0 = (rowB[j] >> 12) & 0xf;
+ const int rowBa1 = (rowB[k] >> 12) & 0xf;
+ const int rowCa0 = (rowC[j] >> 12) & 0xf;
+ const int rowCa1 = (rowC[k] >> 12) & 0xf;
+ const int rowDa0 = (rowD[j] >> 12) & 0xf;
+ const int rowDa1 = (rowD[k] >> 12) & 0xf;
+ const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
+ rowCr0, rowCr1, rowDr0, rowDr1);
+ const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
+ rowCg0, rowCg1, rowDg0, rowDg1);
+ const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
+ rowCb0, rowCb1, rowDb0, rowDb1);
+ const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1,
+ rowCa0, rowCa1, rowDa0, rowDa1);
+
+ dst[i] = (a << 12) | (b << 8) | (g << 4) | r;
+ }
+ }
+ else if ((datatype == USHORT_1_5_5_5_REV) && (comps == 4)) {
+ DECLARE_ROW_POINTERS0(ushort);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ const int rowAr0 = rowA[j] & 0x1f;
+ const int rowAr1 = rowA[k] & 0x1f;
+ const int rowBr0 = rowB[j] & 0x1f;
+ const int rowBr1 = rowB[k] & 0x1f;
+ const int rowCr0 = rowC[j] & 0x1f;
+ const int rowCr1 = rowC[k] & 0x1f;
+ const int rowDr0 = rowD[j] & 0x1f;
+ const int rowDr1 = rowD[k] & 0x1f;
+ const int rowAg0 = (rowA[j] >> 5) & 0x1f;
+ const int rowAg1 = (rowA[k] >> 5) & 0x1f;
+ const int rowBg0 = (rowB[j] >> 5) & 0x1f;
+ const int rowBg1 = (rowB[k] >> 5) & 0x1f;
+ const int rowCg0 = (rowC[j] >> 5) & 0x1f;
+ const int rowCg1 = (rowC[k] >> 5) & 0x1f;
+ const int rowDg0 = (rowD[j] >> 5) & 0x1f;
+ const int rowDg1 = (rowD[k] >> 5) & 0x1f;
+ const int rowAb0 = (rowA[j] >> 10) & 0x1f;
+ const int rowAb1 = (rowA[k] >> 10) & 0x1f;
+ const int rowBb0 = (rowB[j] >> 10) & 0x1f;
+ const int rowBb1 = (rowB[k] >> 10) & 0x1f;
+ const int rowCb0 = (rowC[j] >> 10) & 0x1f;
+ const int rowCb1 = (rowC[k] >> 10) & 0x1f;
+ const int rowDb0 = (rowD[j] >> 10) & 0x1f;
+ const int rowDb1 = (rowD[k] >> 10) & 0x1f;
+ const int rowAa0 = (rowA[j] >> 15) & 0x1;
+ const int rowAa1 = (rowA[k] >> 15) & 0x1;
+ const int rowBa0 = (rowB[j] >> 15) & 0x1;
+ const int rowBa1 = (rowB[k] >> 15) & 0x1;
+ const int rowCa0 = (rowC[j] >> 15) & 0x1;
+ const int rowCa1 = (rowC[k] >> 15) & 0x1;
+ const int rowDa0 = (rowD[j] >> 15) & 0x1;
+ const int rowDa1 = (rowD[k] >> 15) & 0x1;
+ const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
+ rowCr0, rowCr1, rowDr0, rowDr1);
+ const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
+ rowCg0, rowCg1, rowDg0, rowDg1);
+ const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
+ rowCb0, rowCb1, rowDb0, rowDb1);
+ const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1,
+ rowCa0, rowCa1, rowDa0, rowDa1);
+
+ dst[i] = (a << 15) | (b << 10) | (g << 5) | r;
+ }
+ }
+ else if ((datatype == UBYTE_3_3_2) && (comps == 3)) {
+ DECLARE_ROW_POINTERS0(ushort);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ const int rowAr0 = rowA[j] & 0x3;
+ const int rowAr1 = rowA[k] & 0x3;
+ const int rowBr0 = rowB[j] & 0x3;
+ const int rowBr1 = rowB[k] & 0x3;
+ const int rowCr0 = rowC[j] & 0x3;
+ const int rowCr1 = rowC[k] & 0x3;
+ const int rowDr0 = rowD[j] & 0x3;
+ const int rowDr1 = rowD[k] & 0x3;
+ const int rowAg0 = (rowA[j] >> 2) & 0x7;
+ const int rowAg1 = (rowA[k] >> 2) & 0x7;
+ const int rowBg0 = (rowB[j] >> 2) & 0x7;
+ const int rowBg1 = (rowB[k] >> 2) & 0x7;
+ const int rowCg0 = (rowC[j] >> 2) & 0x7;
+ const int rowCg1 = (rowC[k] >> 2) & 0x7;
+ const int rowDg0 = (rowD[j] >> 2) & 0x7;
+ const int rowDg1 = (rowD[k] >> 2) & 0x7;
+ const int rowAb0 = (rowA[j] >> 5) & 0x7;
+ const int rowAb1 = (rowA[k] >> 5) & 0x7;
+ const int rowBb0 = (rowB[j] >> 5) & 0x7;
+ const int rowBb1 = (rowB[k] >> 5) & 0x7;
+ const int rowCb0 = (rowC[j] >> 5) & 0x7;
+ const int rowCb1 = (rowC[k] >> 5) & 0x7;
+ const int rowDb0 = (rowD[j] >> 5) & 0x7;
+ const int rowDb1 = (rowD[k] >> 5) & 0x7;
+ const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
+ rowCr0, rowCr1, rowDr0, rowDr1);
+ const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
+ rowCg0, rowCg1, rowDg0, rowDg1);
+ const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
+ rowCb0, rowCb1, rowDb0, rowDb1);
+ dst[i] = (b << 5) | (g << 2) | r;
+ }
+ }
+ else {
+ debug_printf("bad format in do_row_3D()");
+ }
+}
+
+
+
static void
format_to_type_comps(enum pipe_format pformat,
enum dtype *datatype, uint *comps)
@@ -576,6 +1024,87 @@ reduce_2d(enum pipe_format pformat,
static void
+reduce_3d(enum pipe_format pformat,
+ int srcWidth, int srcHeight, int srcDepth,
+ int srcRowStride, const ubyte *srcPtr,
+ int dstWidth, int dstHeight, int dstDepth,
+ int dstRowStride, ubyte *dstPtr)
+{
+ const int bpt = pf_get_size(pformat);
+ const int border = 0;
+ int img, row;
+ int bytesPerSrcImage, bytesPerDstImage;
+ int bytesPerSrcRow, bytesPerDstRow;
+ int srcImageOffset, srcRowOffset;
+ enum dtype datatype;
+ uint comps;
+
+ format_to_type_comps(pformat, &datatype, &comps);
+
+ bytesPerSrcImage = srcWidth * srcHeight * bpt;
+ bytesPerDstImage = dstWidth * dstHeight * bpt;
+
+ bytesPerSrcRow = srcWidth * bpt;
+ bytesPerDstRow = dstWidth * bpt;
+
+ /* Offset between adjacent src images to be averaged together */
+ srcImageOffset = (srcDepth == dstDepth) ? 0 : bytesPerSrcImage;
+
+ /* Offset between adjacent src rows to be averaged together */
+ srcRowOffset = (srcHeight == dstHeight) ? 0 : srcWidth * bpt;
+
+ /*
+ * Need to average together up to 8 src pixels for each dest pixel.
+ * Break that down into 3 operations:
+ * 1. take two rows from source image and average them together.
+ * 2. take two rows from next source image and average them together.
+ * 3. take the two averaged rows and average them for the final dst row.
+ */
+
+ /*
+ _mesa_printf("mip3d %d x %d x %d -> %d x %d x %d\n",
+ srcWidth, srcHeight, srcDepth, dstWidth, dstHeight, dstDepth);
+ */
+
+ for (img = 0; img < dstDepth; img++) {
+ /* first source image pointer, skipping border */
+ const ubyte *imgSrcA = srcPtr
+ + (bytesPerSrcImage + bytesPerSrcRow + border) * bpt * border
+ + img * (bytesPerSrcImage + srcImageOffset);
+ /* second source image pointer, skipping border */
+ const ubyte *imgSrcB = imgSrcA + srcImageOffset;
+ /* address of the dest image, skipping border */
+ ubyte *imgDst = dstPtr
+ + (bytesPerDstImage + bytesPerDstRow + border) * bpt * border
+ + img * bytesPerDstImage;
+
+ /* setup the four source row pointers and the dest row pointer */
+ const ubyte *srcImgARowA = imgSrcA;
+ const ubyte *srcImgARowB = imgSrcA + srcRowOffset;
+ const ubyte *srcImgBRowA = imgSrcB;
+ const ubyte *srcImgBRowB = imgSrcB + srcRowOffset;
+ ubyte *dstImgRow = imgDst;
+
+ for (row = 0; row < dstHeight; row++) {
+ do_row_3D(datatype, comps, srcWidth,
+ srcImgARowA, srcImgARowB,
+ srcImgBRowA, srcImgBRowB,
+ dstWidth, dstImgRow);
+
+ /* advance to next rows */
+ srcImgARowA += bytesPerSrcRow + srcRowOffset;
+ srcImgARowB += bytesPerSrcRow + srcRowOffset;
+ srcImgBRowA += bytesPerSrcRow + srcRowOffset;
+ srcImgBRowB += bytesPerSrcRow + srcRowOffset;
+ dstImgRow += bytesPerDstRow;
+ }
+ }
+}
+
+
+
+
+static void
make_1d_mipmap(struct gen_mipmap_state *ctx,
struct pipe_texture *pt,
uint face, uint baseLevel, uint lastLevel)
@@ -587,31 +1116,30 @@ make_1d_mipmap(struct gen_mipmap_state *ctx,
for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
const uint srcLevel = dstLevel - 1;
- struct pipe_surface *srcSurf, *dstSurf;
+ struct pipe_transfer *srcTrans, *dstTrans;
void *srcMap, *dstMap;
- srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice,
- PIPE_BUFFER_USAGE_CPU_READ);
-
- dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice,
- PIPE_BUFFER_USAGE_CPU_WRITE);
-
- srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer,
- PIPE_BUFFER_USAGE_CPU_READ)
- + srcSurf->offset);
- dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer,
- PIPE_BUFFER_USAGE_CPU_WRITE)
- + dstSurf->offset);
+ srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
+ PIPE_TRANSFER_READ, 0, 0,
+ pt->width[srcLevel],
+ pt->height[srcLevel]);
+ dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
+ PIPE_TRANSFER_WRITE, 0, 0,
+ pt->width[dstLevel],
+ pt->height[dstLevel]);
+
+ srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
+ dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
reduce_1d(pt->format,
- srcSurf->width, srcMap,
- dstSurf->width, dstMap);
+ srcTrans->width, srcMap,
+ dstTrans->width, dstMap);
- pipe_buffer_unmap(screen, srcSurf->buffer);
- pipe_buffer_unmap(screen, dstSurf->buffer);
+ screen->transfer_unmap(screen, srcTrans);
+ screen->transfer_unmap(screen, dstTrans);
- pipe_surface_reference(&srcSurf, NULL);
- pipe_surface_reference(&dstSurf, NULL);
+ screen->tex_transfer_release(screen, &srcTrans);
+ screen->tex_transfer_release(screen, &dstTrans);
}
}
@@ -631,32 +1159,32 @@ make_2d_mipmap(struct gen_mipmap_state *ctx,
for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
const uint srcLevel = dstLevel - 1;
- struct pipe_surface *srcSurf, *dstSurf;
+ struct pipe_transfer *srcTrans, *dstTrans;
ubyte *srcMap, *dstMap;
- srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice,
- PIPE_BUFFER_USAGE_CPU_READ);
- dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice,
- PIPE_BUFFER_USAGE_CPU_WRITE);
-
- srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer,
- PIPE_BUFFER_USAGE_CPU_READ)
- + srcSurf->offset);
- dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer,
- PIPE_BUFFER_USAGE_CPU_WRITE)
- + dstSurf->offset);
+ srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
+ PIPE_TRANSFER_READ, 0, 0,
+ pt->width[srcLevel],
+ pt->height[srcLevel]);
+ dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
+ PIPE_TRANSFER_WRITE, 0, 0,
+ pt->width[dstLevel],
+ pt->height[dstLevel]);
+
+ srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
+ dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
reduce_2d(pt->format,
- srcSurf->width, srcSurf->height,
- srcSurf->stride, srcMap,
- dstSurf->width, dstSurf->height,
- dstSurf->stride, dstMap);
+ srcTrans->width, srcTrans->height,
+ srcTrans->stride, srcMap,
+ dstTrans->width, dstTrans->height,
+ dstTrans->stride, dstMap);
- pipe_buffer_unmap(screen, srcSurf->buffer);
- pipe_buffer_unmap(screen, dstSurf->buffer);
+ screen->transfer_unmap(screen, srcTrans);
+ screen->transfer_unmap(screen, dstTrans);
- pipe_surface_reference(&srcSurf, NULL);
- pipe_surface_reference(&dstSurf, NULL);
+ screen->tex_transfer_release(screen, &srcTrans);
+ screen->tex_transfer_release(screen, &dstTrans);
}
}
@@ -666,6 +1194,46 @@ make_3d_mipmap(struct gen_mipmap_state *ctx,
struct pipe_texture *pt,
uint face, uint baseLevel, uint lastLevel)
{
+#if 0
+ struct pipe_context *pipe = ctx->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ uint dstLevel, zslice = 0;
+
+ assert(pt->block.width == 1);
+ assert(pt->block.height == 1);
+
+ for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
+ const uint srcLevel = dstLevel - 1;
+ struct pipe_transfer *srcTrans, *dstTrans;
+ ubyte *srcMap, *dstMap;
+
+ srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
+ PIPE_TRANSFER_READ, 0, 0,
+ pt->width[srcLevel],
+ pt->height[srcLevel]);
+ dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
+ PIPE_TRANSFER_WRITE, 0, 0,
+ pt->width[dstLevel],
+ pt->height[dstLevel]);
+
+ srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
+ dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
+
+ reduce_3d(pt->format,
+ srcTrans->width, srcTrans->height,
+ srcTrans->stride, srcMap,
+ dstTrans->width, dstTrans->height,
+ dstTrans->stride, dstMap);
+
+ screen->transfer_unmap(screen, srcTrans);
+ screen->transfer_unmap(screen, dstTrans);
+
+ screen->tex_transfer_release(screen, &srcTrans);
+ screen->tex_transfer_release(screen, &dstTrans);
+ }
+#else
+ (void) reduce_3d;
+#endif
}
@@ -920,7 +1488,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
/* init framebuffer state */
memset(&fb, 0, sizeof(fb));
- fb.num_cbufs = 1;
+ fb.nr_cbufs = 1;
/* set min/mag to same filter for faster sw speed */
ctx->sampler.mag_img_filter = filter;
diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c
index 2d15932ce3..6da7353e25 100644
--- a/src/gallium/auxiliary/util/u_handle_table.c
+++ b/src/gallium/auxiliary/util/u_handle_table.c
@@ -34,7 +34,7 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_memory.h"
#include "util/u_handle_table.h"
diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c
index 0bc8de9632..2f83e318e4 100644
--- a/src/gallium/auxiliary/util/u_hash_table.c
+++ b/src/gallium/auxiliary/util/u_hash_table.c
@@ -39,7 +39,7 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "cso_cache/cso_hash.h"
diff --git a/src/gallium/auxiliary/util/u_keymap.c b/src/gallium/auxiliary/util/u_keymap.c
new file mode 100644
index 0000000000..3f70809efd
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_keymap.c
@@ -0,0 +1,309 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Key lookup/associative container.
+ *
+ * Like Jose's u_hash_table, based on CSO cache code for now.
+ *
+ * Author: Brian Paul
+ */
+
+
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+#include "pipe/p_error.h"
+
+#include "cso_cache/cso_hash.h"
+
+#include "util/u_memory.h"
+#include "util/u_keymap.h"
+
+
+struct keymap
+{
+ struct cso_hash *cso;
+ unsigned key_size;
+ unsigned max_entries; /* XXX not obeyed net */
+ unsigned num_entries;
+ keymap_delete_func delete_func;
+};
+
+
+struct keymap_item
+{
+ void *key, *value;
+};
+
+
+/**
+ * This the default key-delete function used when the client doesn't
+ * provide one.
+ */
+static void
+default_delete_func(const struct keymap *map,
+ const void *key, void *data, void *user)
+{
+ FREE((void*) data);
+}
+
+
+static INLINE struct keymap_item *
+hash_table_item(struct cso_hash_iter iter)
+{
+ return (struct keymap_item *) cso_hash_iter_data(iter);
+}
+
+
+/**
+ * Return 4-byte hash key for a block of bytes.
+ */
+static unsigned
+hash(const void *key, unsigned keySize)
+{
+ unsigned i, hash;
+
+ keySize /= 4; /* convert from bytes to uints */
+
+ hash = 0;
+ for (i = 0; i < keySize; i++) {
+ hash ^= (i + 1) * ((const unsigned *) key)[i];
+ }
+
+ /*hash = hash ^ (hash >> 11) ^ (hash >> 22);*/
+
+ return hash;
+}
+
+
+/**
+ * Create a new map.
+ * \param keySize size of the keys in bytes
+ * \param maxEntries max number of entries to allow (~0 = infinity)
+ * \param deleteFunc optional callback to call when entries
+ * are deleted/replaced
+ */
+struct keymap *
+util_new_keymap(unsigned keySize, unsigned maxEntries,
+ keymap_delete_func deleteFunc)
+{
+ struct keymap *map = MALLOC_STRUCT(keymap);
+ if (!map)
+ return NULL;
+
+ map->cso = cso_hash_create();
+ if (!map->cso) {
+ FREE(map);
+ return NULL;
+ }
+
+ map->max_entries = maxEntries;
+ map->num_entries = 0;
+ map->key_size = keySize;
+ map->delete_func = deleteFunc ? deleteFunc : default_delete_func;
+
+ return map;
+}
+
+
+/**
+ * Delete/free a keymap and all entries. The deleteFunc that was given at
+ * create time will be called for each entry.
+ * \param user user-provided pointer passed through to the delete callback
+ */
+void
+util_delete_keymap(struct keymap *map, void *user)
+{
+ util_keymap_remove_all(map, user);
+ cso_hash_delete(map->cso);
+ FREE(map);
+}
+
+
+static INLINE struct cso_hash_iter
+hash_table_find_iter(const struct keymap *map, const void *key,
+ unsigned key_hash)
+{
+ struct cso_hash_iter iter;
+ struct keymap_item *item;
+
+ iter = cso_hash_find(map->cso, key_hash);
+ while (!cso_hash_iter_is_null(iter)) {
+ item = (struct keymap_item *) cso_hash_iter_data(iter);
+ if (!memcmp(item->key, key, map->key_size))
+ break;
+ iter = cso_hash_iter_next(iter);
+ }
+
+ return iter;
+}
+
+
+static INLINE struct keymap_item *
+hash_table_find_item(const struct keymap *map, const void *key,
+ unsigned key_hash)
+{
+ struct cso_hash_iter iter = hash_table_find_iter(map, key, key_hash);
+ if (cso_hash_iter_is_null(iter)) {
+ return NULL;
+ }
+ else {
+ return hash_table_item(iter);
+ }
+}
+
+
+/**
+ * Insert a new key + data pointer into the table.
+ * Note: we create a copy of the key, but not the data!
+ * If the key is already present in the table, replace the existing
+ * entry (calling the delete callback on the previous entry).
+ * If the maximum capacity of the map is reached an old entry
+ * will be deleted (the delete callback will be called).
+ */
+boolean
+util_keymap_insert(struct keymap *map, const void *key,
+ const void *data, void *user)
+{
+ unsigned key_hash;
+ struct keymap_item *item;
+ struct cso_hash_iter iter;
+
+ assert(map);
+
+ key_hash = hash(key, map->key_size);
+
+ item = hash_table_find_item(map, key, key_hash);
+ if (item) {
+ /* call delete callback for old entry/item */
+ map->delete_func(map, item->key, item->value, user);
+ item->value = (void *) data;
+ return TRUE;
+ }
+
+ item = MALLOC_STRUCT(keymap_item);
+ if (!item)
+ return FALSE;
+
+ item->key = mem_dup(key, map->key_size);
+ item->value = (void *) data;
+
+ iter = cso_hash_insert(map->cso, key_hash, item);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(item);
+ return FALSE;
+ }
+
+ map->num_entries++;
+
+ return TRUE;
+}
+
+
+/**
+ * Look up a key in the map and return the associated data pointer.
+ */
+const void *
+util_keymap_lookup(const struct keymap *map, const void *key)
+{
+ unsigned key_hash;
+ struct keymap_item *item;
+
+ assert(map);
+
+ key_hash = hash(key, map->key_size);
+
+ item = hash_table_find_item(map, key, key_hash);
+ if (!item)
+ return NULL;
+
+ return item->value;
+}
+
+
+/**
+ * Remove an entry from the map.
+ * The delete callback will be called if the given key/entry is found.
+ * \param user passed to the delete callback as the last param.
+ */
+void
+util_keymap_remove(struct keymap *map, const void *key, void *user)
+{
+ unsigned key_hash;
+ struct cso_hash_iter iter;
+ struct keymap_item *item;
+
+ assert(map);
+
+ key_hash = hash(key, map->key_size);
+
+ iter = hash_table_find_iter(map, key, key_hash);
+ if (cso_hash_iter_is_null(iter))
+ return;
+
+ item = hash_table_item(iter);
+ assert(item);
+ map->delete_func(map, item->key, item->value, user);
+ FREE(item->key);
+ FREE(item);
+
+ map->num_entries--;
+
+ cso_hash_erase(map->cso, iter);
+}
+
+
+/**
+ * Remove all entries from the map, calling the delete callback for each.
+ * \param user passed to the delete callback as the last param.
+ */
+void
+util_keymap_remove_all(struct keymap *map, void *user)
+{
+ struct cso_hash_iter iter;
+ struct keymap_item *item;
+
+ assert(map);
+
+ iter = cso_hash_first_node(map->cso);
+ while (!cso_hash_iter_is_null(iter)) {
+ item = (struct keymap_item *)
+ cso_hash_take(map->cso, cso_hash_iter_key(iter));
+ map->delete_func(map, item->key, item->value, user);
+ FREE(item->key);
+ FREE(item);
+ iter = cso_hash_first_node(map->cso);
+ }
+}
+
+
+extern void
+util_keymap_info(const struct keymap *map)
+{
+ debug_printf("Keymap %p: %u of max %u entries\n",
+ (void *) map, map->num_entries, map->max_entries);
+}
diff --git a/src/gallium/auxiliary/util/u_keymap.h b/src/gallium/auxiliary/util/u_keymap.h
new file mode 100644
index 0000000000..8d60a76fc3
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_keymap.h
@@ -0,0 +1,68 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_KEYMAP_H
+#define U_KEYMAP_H
+
+#include "pipe/p_compiler.h"
+
+
+/** opaque keymap type */
+struct keymap;
+
+
+/** Delete/callback function type */
+typedef void (*keymap_delete_func)(const struct keymap *map,
+ const void *key, void *data,
+ void *user);
+
+
+extern struct keymap *
+util_new_keymap(unsigned keySize, unsigned maxEntries,
+ keymap_delete_func deleteFunc);
+
+extern void
+util_delete_keymap(struct keymap *map, void *user);
+
+extern boolean
+util_keymap_insert(struct keymap *map, const void *key,
+ const void *data, void *user);
+
+extern const void *
+util_keymap_lookup(const struct keymap *map, const void *key);
+
+extern void
+util_keymap_remove(struct keymap *map, const void *key, void *user);
+
+extern void
+util_keymap_remove_all(struct keymap *map, void *user);
+
+extern void
+util_keymap_info(const struct keymap *map);
+
+
+#endif /* U_KEYMAP_H */
diff --git a/src/gallium/auxiliary/util/u_linear.c b/src/gallium/auxiliary/util/u_linear.c
new file mode 100644
index 0000000000..6be365e53b
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_linear.c
@@ -0,0 +1,70 @@
+
+#include "util/u_debug.h"
+#include "u_linear.h"
+
+void
+pipe_linear_to_tile(size_t src_stride, const void *src_ptr,
+ struct pipe_tile_info *t, void *dst_ptr)
+{
+ int x, y, z;
+ char *ptr;
+ size_t bytes = t->cols * t->block.size;
+ char *dst_ptr2 = (char *) dst_ptr;
+
+ assert(pipe_linear_check_tile(t));
+
+ /* lets write lineary to the tiled buffer */
+ for (y = 0; y < t->tiles_y; y++) {
+ for (x = 0; x < t->tiles_x; x++) {
+ /* this inner loop could be replace with SSE magic */
+ ptr = (char*)src_ptr + src_stride * t->rows * y + bytes * x;
+ for (z = 0; z < t->rows; z++) {
+ memcpy(dst_ptr2, ptr, bytes);
+ dst_ptr2 += bytes;
+ ptr += src_stride;
+ }
+ }
+ }
+}
+
+void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr,
+ size_t dst_stride, void *dst_ptr)
+{
+ int x, y, z;
+ char *ptr;
+ size_t bytes = t->cols * t->block.size;
+ const char *src_ptr2 = (const char *) src_ptr;
+
+ /* lets read lineary from the tiled buffer */
+ for (y = 0; y < t->tiles_y; y++) {
+ for (x = 0; x < t->tiles_x; x++) {
+ /* this inner loop could be replace with SSE magic */
+ ptr = (char*)dst_ptr + dst_stride * t->rows * y + bytes * x;
+ for (z = 0; z < t->rows; z++) {
+ memcpy(ptr, src_ptr2, bytes);
+ src_ptr2 += bytes;
+ ptr += dst_stride;
+ }
+ }
+ }
+}
+
+void
+pipe_linear_fill_info(struct pipe_tile_info *t,
+ const struct pipe_format_block *block,
+ unsigned tile_width, unsigned tile_height,
+ unsigned tiles_x, unsigned tiles_y)
+{
+ t->block = *block;
+
+ t->tile.width = tile_width;
+ t->tile.height = tile_height;
+ t->cols = t->tile.width / t->block.width;
+ t->rows = t->tile.height / t->block.height;
+ t->tile.size = t->cols * t->rows * t->block.size;
+
+ t->tiles_x = tiles_x;
+ t->tiles_y = tiles_y;
+ t->stride = t->cols * t->tiles_x * t->block.size;
+ t->size = t->tiles_x * t->tiles_y * t->tile.size;
+}
diff --git a/src/gallium/auxiliary/util/u_linear.h b/src/gallium/auxiliary/util/u_linear.h
new file mode 100644
index 0000000000..1589f029bc
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_linear.h
@@ -0,0 +1,61 @@
+
+#ifndef U_LINEAR_H
+#define U_LINEAR_H
+
+#include "pipe/p_format.h"
+
+struct pipe_tile_info
+{
+ unsigned size;
+ unsigned stride;
+
+ /* The number of tiles */
+ unsigned tiles_x;
+ unsigned tiles_y;
+
+ /* size of each tile expressed in blocks */
+ unsigned cols;
+ unsigned rows;
+
+ /* Describe the tile in pixels */
+ struct pipe_format_block tile;
+
+ /* Describe each block within the tile */
+ struct pipe_format_block block;
+};
+
+void pipe_linear_to_tile(size_t src_stride, const void *src_ptr,
+ struct pipe_tile_info *t, void *dst_ptr);
+
+void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr,
+ size_t dst_stride, void *dst_ptr);
+
+/**
+ * Convenience function to fillout a pipe_tile_info struct.
+ * @t info to fill out.
+ * @block block info about pixel layout
+ * @tile_width the width of the tile in pixels
+ * @tile_height the height of the tile in pixels
+ * @tiles_x number of tiles in x axis
+ * @tiles_y number of tiles in y axis
+ */
+void pipe_linear_fill_info(struct pipe_tile_info *t,
+ const struct pipe_format_block *block,
+ unsigned tile_width, unsigned tile_height,
+ unsigned tiles_x, unsigned tiles_y);
+
+static INLINE boolean pipe_linear_check_tile(const struct pipe_tile_info *t)
+{
+ if (t->tile.size != t->block.size * t->cols * t->rows)
+ return FALSE;
+
+ if (t->stride != t->block.size * t->cols * t->tiles_x)
+ return FALSE;
+
+ if (t->size < t->stride * t->rows * t->tiles_y)
+ return FALSE;
+
+ return TRUE;
+}
+
+#endif /* U_LINEAR_H */
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 1ae3234423..1ecde7a912 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -40,7 +40,7 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#ifdef __cplusplus
@@ -68,7 +68,7 @@ __inline double ceil(double val)
return ceil_val;
}
-#ifndef PIPE_SUBSYSTEM_WINDOWS_CE
+#ifndef PIPE_SUBSYSTEM_WINDOWS_CE_OGL
__inline double floor(double val)
{
double floor_val;
@@ -341,6 +341,10 @@ unsigned ffs( unsigned u )
}
#endif
+#ifdef __MINGW32__
+#define ffs __builtin_ffs
+#endif
+
/**
* Return float bits.
diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h
index 79e34e185f..ceb3a1cb61 100644
--- a/src/gallium/auxiliary/util/u_memory.h
+++ b/src/gallium/auxiliary/util/u_memory.h
@@ -36,7 +36,7 @@
#include "util/u_pointer.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#ifdef __cplusplus
@@ -56,7 +56,7 @@ extern "C" {
/* memory debugging */
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#define MALLOC( _size ) \
debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size )
@@ -151,6 +151,8 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size )
#define CALLOC_STRUCT(T) (struct T *) CALLOC(1, sizeof(struct T))
+#define CALLOC_VARIANT_LENGTH_STRUCT(T,more_size) ((struct T *) CALLOC(1, sizeof(struct T) + more_size))
+
/**
* Return memory on given byte alignment
diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c
index 01dd67c810..151a480d34 100644
--- a/src/gallium/auxiliary/util/u_mm.c
+++ b/src/gallium/auxiliary/util/u_mm.c
@@ -24,14 +24,14 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_memory.h"
#include "util/u_mm.h"
void
-mmDumpMemInfo(const struct mem_block *heap)
+u_mmDumpMemInfo(const struct mem_block *heap)
{
debug_printf("Memory heap %p:\n", (void *)heap);
if (heap == 0) {
@@ -58,7 +58,7 @@ mmDumpMemInfo(const struct mem_block *heap)
}
struct mem_block *
-mmInit(int ofs, int size)
+u_mmInit(int ofs, int size)
{
struct mem_block *heap, *block;
@@ -165,7 +165,7 @@ SliceBlock(struct mem_block *p,
struct mem_block *
-mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch)
+u_mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch)
{
struct mem_block *p;
const int mask = (1 << align2)-1;
@@ -202,7 +202,7 @@ mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch)
struct mem_block *
-mmFindBlock(struct mem_block *heap, int start)
+u_mmFindBlock(struct mem_block *heap, int start)
{
struct mem_block *p;
@@ -241,7 +241,7 @@ Join2Blocks(struct mem_block *p)
}
int
-mmFreeMem(struct mem_block *b)
+u_mmFreeMem(struct mem_block *b)
{
if (!b)
return 0;
@@ -270,7 +270,7 @@ mmFreeMem(struct mem_block *b)
void
-mmDestroy(struct mem_block *heap)
+u_mmDestroy(struct mem_block *heap)
{
struct mem_block *p;
diff --git a/src/gallium/auxiliary/util/u_mm.h b/src/gallium/auxiliary/util/u_mm.h
index b226b101cb..ce20e48763 100644
--- a/src/gallium/auxiliary/util/u_mm.h
+++ b/src/gallium/auxiliary/util/u_mm.h
@@ -49,7 +49,7 @@ struct mem_block {
* input: total size in bytes
* return: a heap pointer if OK, NULL if error
*/
-extern struct mem_block *mmInit(int ofs, int size);
+extern struct mem_block *u_mmInit(int ofs, int size);
/**
* Allocate 'size' bytes with 2^align2 bytes alignment,
@@ -61,7 +61,7 @@ extern struct mem_block *mmInit(int ofs, int size);
* startSearch = linear offset from start of heap to begin search
* return: pointer to the allocated block, 0 if error
*/
-extern struct mem_block *mmAllocMem(struct mem_block *heap, int size, int align2,
+extern struct mem_block *u_mmAllocMem(struct mem_block *heap, int size, int align2,
int startSearch);
/**
@@ -69,23 +69,23 @@ extern struct mem_block *mmAllocMem(struct mem_block *heap, int size, int align2
* input: pointer to a block
* return: 0 if OK, -1 if error
*/
-extern int mmFreeMem(struct mem_block *b);
+extern int u_mmFreeMem(struct mem_block *b);
/**
* Free block starts at offset
* input: pointer to a heap, start offset
* return: pointer to a block
*/
-extern struct mem_block *mmFindBlock(struct mem_block *heap, int start);
+extern struct mem_block *u_mmFindBlock(struct mem_block *heap, int start);
/**
* destroy MM
*/
-extern void mmDestroy(struct mem_block *mmInit);
+extern void u_mmDestroy(struct mem_block *mmInit);
/**
* For debuging purpose.
*/
-extern void mmDumpMemInfo(const struct mem_block *mmInit);
+extern void u_mmDumpMemInfo(const struct mem_block *mmInit);
#endif
diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h
index e45e84ded2..d7c3995dbf 100644
--- a/src/gallium/auxiliary/util/u_prim.h
+++ b/src/gallium/auxiliary/util/u_prim.h
@@ -119,4 +119,20 @@ static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr )
}
+static INLINE boolean u_reduced_prim( unsigned pipe_prim )
+{
+ switch (pipe_prim) {
+ case PIPE_PRIM_POINTS:
+ return PIPE_PRIM_POINTS;
+
+ case PIPE_PRIM_LINES:
+ case PIPE_PRIM_LINE_STRIP:
+ case PIPE_PRIM_LINE_LOOP:
+ return PIPE_PRIM_LINES;
+
+ default:
+ return PIPE_PRIM_TRIANGLES;
+ }
+}
+
#endif
diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c
index fe81a685be..6e24e594e4 100644
--- a/src/gallium/auxiliary/util/u_rect.c
+++ b/src/gallium/auxiliary/util/u_rect.c
@@ -169,46 +169,35 @@ util_surface_copy(struct pipe_context *pipe,
unsigned w, unsigned h)
{
struct pipe_screen *screen = pipe->screen;
- struct pipe_surface *new_src = NULL, *new_dst = NULL;
+ struct pipe_transfer *src_trans, *dst_trans;
void *dst_map;
const void *src_map;
- assert(dst->block.size == src->block.size);
- assert(dst->block.width == src->block.width);
- assert(dst->block.height == src->block.height);
-
- if ((src->usage & PIPE_BUFFER_USAGE_CPU_READ) == 0) {
- /* Need to create new src surface which is CPU readable */
- assert(src->texture);
- if (!src->texture)
- return;
- new_src = screen->get_tex_surface(screen,
+ assert(src->texture && dst->texture);
+ if (!src->texture || !dst->texture)
+ return;
+ src_trans = screen->get_tex_transfer(screen,
src->texture,
src->face,
src->level,
src->zslice,
- PIPE_BUFFER_USAGE_CPU_READ);
- src = new_src;
- }
+ PIPE_TRANSFER_READ,
+ src_x, src_y, w, h);
- if ((dst->usage & PIPE_BUFFER_USAGE_CPU_WRITE) == 0) {
- /* Need to create new dst surface which is CPU writable */
- assert(dst->texture);
- if (!dst->texture)
- return;
- new_dst = screen->get_tex_surface(screen,
+ dst_trans = screen->get_tex_transfer(screen,
dst->texture,
dst->face,
dst->level,
dst->zslice,
- PIPE_BUFFER_USAGE_CPU_WRITE);
- dst = new_dst;
- }
+ PIPE_TRANSFER_WRITE,
+ dst_x, dst_y, w, h);
- src_map = pipe->screen->surface_map(screen,
- src, PIPE_BUFFER_USAGE_CPU_READ);
- dst_map = pipe->screen->surface_map(screen,
- dst, PIPE_BUFFER_USAGE_CPU_WRITE);
+ assert(dst_trans->block.size == src_trans->block.size);
+ assert(dst_trans->block.width == src_trans->block.width);
+ assert(dst_trans->block.height == src_trans->block.height);
+
+ src_map = pipe->screen->transfer_map(screen, src_trans);
+ dst_map = pipe->screen->transfer_map(screen, dst_trans);
assert(src_map);
assert(dst_map);
@@ -216,36 +205,25 @@ util_surface_copy(struct pipe_context *pipe,
if (src_map && dst_map) {
/* If do_flip, invert src_y position and pass negative src stride */
pipe_copy_rect(dst_map,
- &dst->block,
- dst->stride,
- dst_x, dst_y,
+ &dst_trans->block,
+ dst_trans->stride,
+ 0, 0,
w, h,
src_map,
- do_flip ? -(int) src->stride : src->stride,
- src_x,
- do_flip ? src_y + h - 1 : src_y);
+ do_flip ? -(int) src_trans->stride : src_trans->stride,
+ 0,
+ do_flip ? h - 1 : 0);
}
- pipe->screen->surface_unmap(pipe->screen, src);
- pipe->screen->surface_unmap(pipe->screen, dst);
+ pipe->screen->transfer_unmap(pipe->screen, src_trans);
+ pipe->screen->transfer_unmap(pipe->screen, dst_trans);
- if (new_src)
- screen->tex_surface_release(screen, &new_src);
- if (new_dst)
- screen->tex_surface_release(screen, &new_dst);
+ screen->tex_transfer_release(screen, &src_trans);
+ screen->tex_transfer_release(screen, &dst_trans);
}
-static void *
-get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y)
-{
- return (char *)dst_map
- + y / dst->block.height * dst->stride
- + x / dst->block.width * dst->block.size;
-}
-
-
#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8))
@@ -260,42 +238,38 @@ util_surface_fill(struct pipe_context *pipe,
unsigned width, unsigned height, unsigned value)
{
struct pipe_screen *screen = pipe->screen;
- struct pipe_surface *new_dst = NULL;
+ struct pipe_transfer *dst_trans;
void *dst_map;
- if ((dst->usage & PIPE_BUFFER_USAGE_CPU_WRITE) == 0) {
- /* Need to create new dst surface which is CPU writable */
- assert(dst->texture);
- if (!dst->texture)
- return;
- new_dst = screen->get_tex_surface(screen,
+ assert(dst->texture);
+ if (!dst->texture)
+ return;
+ dst_trans = screen->get_tex_transfer(screen,
dst->texture,
dst->face,
dst->level,
dst->zslice,
- PIPE_BUFFER_USAGE_CPU_WRITE);
- dst = new_dst;
- }
+ PIPE_TRANSFER_WRITE,
+ dstx, dsty, width, height);
- dst_map = pipe->screen->surface_map(screen,
- dst, PIPE_BUFFER_USAGE_CPU_WRITE);
+ dst_map = pipe->screen->transfer_map(screen, dst_trans);
assert(dst_map);
if (dst_map) {
- assert(dst->stride > 0);
+ assert(dst_trans->stride > 0);
- switch (dst->block.size) {
+ switch (dst_trans->block.size) {
case 1:
case 2:
case 4:
- pipe_fill_rect(dst_map, &dst->block, dst->stride,
- dstx, dsty, width, height, value);
+ pipe_fill_rect(dst_map, &dst_trans->block, dst_trans->stride,
+ 0, 0, width, height, value);
break;
case 8:
{
/* expand the 4-byte clear value to an 8-byte value */
- ushort *row = (ushort *) get_pointer(dst, dst_map, dstx, dsty);
+ ushort *row = (ushort *) dst_map;
ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff);
ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff);
ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff);
@@ -312,7 +286,7 @@ util_surface_fill(struct pipe_context *pipe,
row[j*4+2] = val2;
row[j*4+3] = val3;
}
- row += dst->stride/2;
+ row += dst_trans->stride/2;
}
}
break;
@@ -322,8 +296,6 @@ util_surface_fill(struct pipe_context *pipe,
}
}
- pipe->screen->surface_unmap(pipe->screen, dst);
-
- if (new_dst)
- screen->tex_surface_release(screen, &new_dst);
+ pipe->screen->transfer_unmap(pipe->screen, dst_trans);
+ screen->tex_transfer_release(screen, &dst_trans);
}
diff --git a/src/gallium/auxiliary/util/u_simple_screen.c b/src/gallium/auxiliary/util/u_simple_screen.c
new file mode 100644
index 0000000000..089bbbc48a
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_simple_screen.c
@@ -0,0 +1,143 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "u_simple_screen.h"
+
+#include "pipe/p_screen.h"
+#include "pipe/internal/p_winsys_screen.h"
+
+
+static struct pipe_buffer *
+pass_buffer_create(struct pipe_screen *screen,
+ unsigned alignment,
+ unsigned usage,
+ unsigned size)
+{
+ return screen->winsys->buffer_create(screen->winsys,
+ alignment, usage, size);
+}
+
+static struct pipe_buffer *
+pass_user_buffer_create(struct pipe_screen *screen,
+ void *ptr,
+ unsigned bytes)
+{
+ return screen->winsys->user_buffer_create(screen->winsys,
+ ptr, bytes);
+}
+
+static struct pipe_buffer *
+pass_surface_buffer_create(struct pipe_screen *screen,
+ unsigned width, unsigned height,
+ enum pipe_format format,
+ unsigned usage,
+ unsigned *stride)
+{
+ return screen->winsys->surface_buffer_create(screen->winsys,
+ width, height,
+ format, usage, stride);
+}
+
+static void *
+pass_buffer_map(struct pipe_screen *screen,
+ struct pipe_buffer *buf,
+ unsigned usage)
+{
+ return screen->winsys->buffer_map(screen->winsys,
+ buf, usage);
+}
+
+static void
+pass_buffer_unmap(struct pipe_screen *screen,
+ struct pipe_buffer *buf)
+{
+ screen->winsys->buffer_unmap(screen->winsys, buf);
+}
+
+static void
+pass_buffer_destroy(struct pipe_screen *screen,
+ struct pipe_buffer *buf)
+{
+ screen->winsys->buffer_destroy(screen->winsys, buf);
+}
+
+
+static void
+pass_flush_frontbuffer(struct pipe_screen *screen,
+ struct pipe_surface *surf,
+ void *context_private)
+{
+ screen->winsys->flush_frontbuffer(screen->winsys,
+ surf, context_private);
+}
+
+static void
+pass_fence_reference(struct pipe_screen *screen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+ screen->winsys->fence_reference(screen->winsys,
+ ptr, fence);
+}
+
+static int
+pass_fence_signalled(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flag)
+{
+ return screen->winsys->fence_signalled(screen->winsys,
+ fence, flag);
+}
+
+static int
+pass_fence_finish(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flag)
+{
+ return screen->winsys->fence_finish(screen->winsys,
+ fence, flag);
+}
+
+void u_simple_screen_init(struct pipe_screen *screen)
+{
+ screen->buffer_create = pass_buffer_create;
+ screen->user_buffer_create = pass_user_buffer_create;
+ screen->surface_buffer_create = pass_surface_buffer_create;
+
+ screen->buffer_map = pass_buffer_map;
+ screen->buffer_unmap = pass_buffer_unmap;
+ screen->buffer_destroy = pass_buffer_destroy;
+ screen->flush_frontbuffer = pass_flush_frontbuffer;
+ screen->fence_reference = pass_fence_reference;
+ screen->fence_signalled = pass_fence_signalled;
+ screen->fence_finish = pass_fence_finish;
+}
+
+const char* u_simple_screen_winsys_name(struct pipe_screen *screen)
+{
+ return screen->winsys->get_name(screen->winsys);
+}
diff --git a/src/gallium/auxiliary/util/u_simple_screen.h b/src/gallium/auxiliary/util/u_simple_screen.h
new file mode 100644
index 0000000000..6612a8a7c0
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_simple_screen.h
@@ -0,0 +1,47 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_SIMPLE_SCREEN_H
+#define U_SIMPLE_SCREEN_H
+
+struct pipe_screen;
+struct pipe_winsys;
+
+/**
+ * The following function initializes a simple passthrough screen.
+ *
+ * All the relevant screen function pointers will forwarded to the
+ * winsys.
+ */
+void u_simple_screen_init(struct pipe_screen *screen);
+
+/**
+ * Returns the name of the winsys associated with this screen.
+ */
+const char* u_simple_screen_winsys_name(struct pipe_screen *screen);
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c
index f06d13c2c4..2fcad6fe3d 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -34,10 +34,10 @@
#include "pipe/p_context.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
+#include "pipe/p_screen.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_memory.h"
@@ -359,3 +359,10 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe,
return pipe->create_fs_state(pipe, shader);
}
+
+void
+util_free_shader(struct pipe_shader_state *shader)
+{
+ FREE((struct tgsi_token *)shader->tokens);
+ shader->tokens = NULL;
+}
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h
index 8ca4977d71..99b8d9067d 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.h
+++ b/src/gallium/auxiliary/util/u_simple_shaders.h
@@ -60,6 +60,10 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe,
struct pipe_shader_state *shader);
+extern void
+util_free_shader(struct pipe_shader_state *shader);
+
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h
new file mode 100644
index 0000000000..e2a8491e62
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_sse.h
@@ -0,0 +1,77 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * SSE intrinsics portability header.
+ *
+ * Although the SSE intrinsics are support by all modern x86 and x86-64
+ * compilers, there are some intrisincs missing in some implementations
+ * (especially older MSVC versions). This header abstracts that away.
+ */
+
+#ifndef U_SSE_H_
+#define U_SSE_H_
+
+#include "pipe/p_config.h"
+
+#if defined(PIPE_ARCH_SSE)
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+
+
+/* MSVC before VC8 does not support the _mm_castxxx_yyy */
+#if defined(_MSC_VER) && _MSC_VER < 1500
+
+union __declspec(align(16)) m128_types {
+ __m128 m128;
+ __m128i m128i;
+ __m128d m128d;
+};
+
+static __inline __m128
+_mm_castsi128_ps(__m128i a)
+{
+ union m128_types u;
+ u.m128i = a;
+ return u.m128;
+}
+
+static __inline __m128i
+_mm_castps_si128(__m128 a)
+{
+ union m128_types u;
+ u.m128 = a;
+ return u.m128i;
+}
+
+#endif /* defined(_MSC_VER) && _MSC_VER < 1500 */
+
+#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */
+
+#endif /* U_SSE_H_ */
diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
index 336c7714d4..d31ca9c029 100644
--- a/src/gallium/auxiliary/util/u_tile.c
+++ b/src/gallium/auxiliary/util/u_tile.c
@@ -28,7 +28,6 @@
/**
* RGBA/float tile get/put functions.
* Usable both by drivers and state trackers.
- * Surfaces should already be in a mapped state.
*/
@@ -42,58 +41,58 @@
/**
- * Move raw block of pixels from surface to user memory.
- * This should be usable by any hw driver that has mappable surfaces.
+ * Move raw block of pixels from transfer object to user memory.
*/
void
-pipe_get_tile_raw(struct pipe_surface *ps,
+pipe_get_tile_raw(struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
void *dst, int dst_stride)
{
+ struct pipe_screen *screen = pt->texture->screen;
const void *src;
if (dst_stride == 0)
- dst_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size;
+ dst_stride = pf_get_nblocksx(&pt->block, w) * pt->block.size;
- if (pipe_clip_tile(x, y, &w, &h, ps))
+ if (pipe_clip_tile(x, y, &w, &h, pt))
return;
- src = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ);
+ src = screen->transfer_map(screen, pt);
assert(src);
if(!src)
return;
- pipe_copy_rect(dst, &ps->block, dst_stride, 0, 0, w, h, src, ps->stride, x, y);
+ pipe_copy_rect(dst, &pt->block, dst_stride, 0, 0, w, h, src, pt->stride, x, y);
- pipe_surface_unmap(ps);
+ screen->transfer_unmap(screen, pt);
}
/**
- * Move raw block of pixels from user memory to surface.
- * This should be usable by any hw driver that has mappable surfaces.
+ * Move raw block of pixels from user memory to transfer object.
*/
void
-pipe_put_tile_raw(struct pipe_surface *ps,
+pipe_put_tile_raw(struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
const void *src, int src_stride)
{
+ struct pipe_screen *screen = pt->texture->screen;
void *dst;
if (src_stride == 0)
- src_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size;
+ src_stride = pf_get_nblocksx(&pt->block, w) * pt->block.size;
- if (pipe_clip_tile(x, y, &w, &h, ps))
+ if (pipe_clip_tile(x, y, &w, &h, pt))
return;
- dst = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE);
+ dst = screen->transfer_map(screen, pt);
assert(dst);
if(!dst)
return;
- pipe_copy_rect(dst, &ps->block, ps->stride, x, y, w, h, src, src_stride, 0, 0);
+ pipe_copy_rect(dst, &pt->block, pt->stride, x, y, w, h, src, src_stride, 0, 0);
- pipe_surface_unmap(ps);
+ screen->transfer_unmap(screen, pt);
}
@@ -460,7 +459,7 @@ l8_put_tile_rgba(ubyte *dst,
for (j = 0; j < w; j++, pRow += 4) {
unsigned r;
r = float_to_ubyte(pRow[0]);
- *dst++ = r;
+ *dst++ = (ubyte) r;
}
p += src_stride;
}
@@ -504,7 +503,7 @@ a8_put_tile_rgba(ubyte *dst,
for (j = 0; j < w; j++, pRow += 4) {
unsigned a;
a = float_to_ubyte(pRow[3]);
- *dst++ = a;
+ *dst++ = (ubyte) a;
}
p += src_stride;
}
@@ -634,7 +633,7 @@ i8_put_tile_rgba(ubyte *dst,
for (j = 0; j < w; j++, pRow += 4) {
unsigned r;
r = float_to_ubyte(pRow[0]);
- *dst++ = r;
+ *dst++ = (ubyte) r;
}
p += src_stride;
}
@@ -772,7 +771,7 @@ z24s8_get_tile_rgba(const unsigned *src,
/*** PIPE_FORMAT_Z32_FLOAT ***/
/**
- * Return each Z value as four floats.
+ * Return each Z value as four floats in [0,1].
*/
static void
z32f_get_tile_rgba(const float *src,
@@ -977,49 +976,49 @@ pipe_tile_raw_to_rgba(enum pipe_format format,
void
-pipe_get_tile_rgba(struct pipe_surface *ps,
+pipe_get_tile_rgba(struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
float *p)
{
unsigned dst_stride = w * 4;
void *packed;
- if (pipe_clip_tile(x, y, &w, &h, ps))
+ if (pipe_clip_tile(x, y, &w, &h, pt))
return;
- packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size);
+ packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size);
if (!packed)
return;
- if(ps->format == PIPE_FORMAT_YCBCR || ps->format == PIPE_FORMAT_YCBCR_REV)
+ if(pt->format == PIPE_FORMAT_YCBCR || pt->format == PIPE_FORMAT_YCBCR_REV)
assert((x & 1) == 0);
- pipe_get_tile_raw(ps, x, y, w, h, packed, 0);
+ pipe_get_tile_raw(pt, x, y, w, h, packed, 0);
- pipe_tile_raw_to_rgba(ps->format, packed, w, h, p, dst_stride);
+ pipe_tile_raw_to_rgba(pt->format, packed, w, h, p, dst_stride);
FREE(packed);
}
void
-pipe_put_tile_rgba(struct pipe_surface *ps,
+pipe_put_tile_rgba(struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
const float *p)
{
unsigned src_stride = w * 4;
void *packed;
- if (pipe_clip_tile(x, y, &w, &h, ps))
+ if (pipe_clip_tile(x, y, &w, &h, pt))
return;
- packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size);
+ packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size);
if (!packed)
return;
- switch (ps->format) {
+ switch (pt->format) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
break;
@@ -1073,10 +1072,10 @@ pipe_put_tile_rgba(struct pipe_surface *ps,
/*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
break;
default:
- debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(ps->format));
+ debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(pt->format));
}
- pipe_put_tile_raw(ps, x, y, w, h, packed, 0);
+ pipe_put_tile_raw(pt, x, y, w, h, packed, 0);
FREE(packed);
}
@@ -1086,62 +1085,63 @@ pipe_put_tile_rgba(struct pipe_surface *ps,
* Get a block of Z values, converted to 32-bit range.
*/
void
-pipe_get_tile_z(struct pipe_surface *ps,
+pipe_get_tile_z(struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
uint *z)
{
+ struct pipe_screen *screen = pt->texture->screen;
const uint dstStride = w;
ubyte *map;
uint *pDest = z;
uint i, j;
- if (pipe_clip_tile(x, y, &w, &h, ps))
+ if (pipe_clip_tile(x, y, &w, &h, pt))
return;
- map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ);
+ map = (ubyte *)screen->transfer_map(screen, pt);
if (!map) {
assert(0);
return;
}
- switch (ps->format) {
+ switch (pt->format) {
case PIPE_FORMAT_Z32_UNORM:
{
- const uint *pSrc
- = (const uint *)(map + y * ps->stride + x*4);
+ const uint *ptrc
+ = (const uint *)(map + y * pt->stride + x*4);
for (i = 0; i < h; i++) {
- memcpy(pDest, pSrc, 4 * w);
+ memcpy(pDest, ptrc, 4 * w);
pDest += dstStride;
- pSrc += ps->stride/4;
+ ptrc += pt->stride/4;
}
}
break;
case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
{
- const uint *pSrc
- = (const uint *)(map + y * ps->stride + x*4);
+ const uint *ptrc
+ = (const uint *)(map + y * pt->stride + x*4);
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
/* convert 24-bit Z to 32-bit Z */
- pDest[j] = (pSrc[j] << 8) | (pSrc[j] & 0xff);
+ pDest[j] = (ptrc[j] << 8) | (ptrc[j] & 0xff);
}
pDest += dstStride;
- pSrc += ps->stride/4;
+ ptrc += pt->stride/4;
}
}
break;
case PIPE_FORMAT_Z16_UNORM:
{
- const ushort *pSrc
- = (const ushort *)(map + y * ps->stride + x*2);
+ const ushort *ptrc
+ = (const ushort *)(map + y * pt->stride + x*2);
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
/* convert 16-bit Z to 32-bit Z */
- pDest[j] = (pSrc[j] << 16) | pSrc[j];
+ pDest[j] = (ptrc[j] << 16) | ptrc[j];
}
pDest += dstStride;
- pSrc += ps->stride/2;
+ ptrc += pt->stride/2;
}
}
break;
@@ -1149,64 +1149,65 @@ pipe_get_tile_z(struct pipe_surface *ps,
assert(0);
}
- pipe_surface_unmap(ps);
+ screen->transfer_unmap(screen, pt);
}
void
-pipe_put_tile_z(struct pipe_surface *ps,
+pipe_put_tile_z(struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
const uint *zSrc)
{
+ struct pipe_screen *screen = pt->texture->screen;
const uint srcStride = w;
- const uint *pSrc = zSrc;
+ const uint *ptrc = zSrc;
ubyte *map;
uint i, j;
- if (pipe_clip_tile(x, y, &w, &h, ps))
+ if (pipe_clip_tile(x, y, &w, &h, pt))
return;
- map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE);
+ map = (ubyte *)screen->transfer_map(screen, pt);
if (!map) {
assert(0);
return;
}
- switch (ps->format) {
+ switch (pt->format) {
case PIPE_FORMAT_Z32_UNORM:
{
- uint *pDest = (uint *) (map + y * ps->stride + x*4);
+ uint *pDest = (uint *) (map + y * pt->stride + x*4);
for (i = 0; i < h; i++) {
- memcpy(pDest, pSrc, 4 * w);
- pDest += ps->stride/4;
- pSrc += srcStride;
+ memcpy(pDest, ptrc, 4 * w);
+ pDest += pt->stride/4;
+ ptrc += srcStride;
}
}
break;
case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
{
- uint *pDest = (uint *) (map + y * ps->stride + x*4);
+ uint *pDest = (uint *) (map + y * pt->stride + x*4);
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
/* convert 32-bit Z to 24-bit Z (0 stencil) */
- pDest[j] = pSrc[j] >> 8;
+ pDest[j] = ptrc[j] >> 8;
}
- pDest += ps->stride/4;
- pSrc += srcStride;
+ pDest += pt->stride/4;
+ ptrc += srcStride;
}
}
break;
case PIPE_FORMAT_Z16_UNORM:
{
- ushort *pDest = (ushort *) (map + y * ps->stride + x*2);
+ ushort *pDest = (ushort *) (map + y * pt->stride + x*2);
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
/* convert 32-bit Z to 16-bit Z */
- pDest[j] = pSrc[j] >> 16;
+ pDest[j] = ptrc[j] >> 16;
}
- pDest += ps->stride/2;
- pSrc += srcStride;
+ pDest += pt->stride/2;
+ ptrc += srcStride;
}
}
break;
@@ -1214,7 +1215,7 @@ pipe_put_tile_z(struct pipe_surface *ps,
assert(0);
}
- pipe_surface_unmap(ps);
+ screen->transfer_unmap(screen, pt);
}
diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h
index a8ac805308..1453af38b8 100644
--- a/src/gallium/auxiliary/util/u_tile.h
+++ b/src/gallium/auxiliary/util/u_tile.h
@@ -30,24 +30,24 @@
#include "pipe/p_compiler.h"
-struct pipe_surface;
+struct pipe_transfer;
/**
- * Clip tile against surface dims.
+ * Clip tile against transfer dims.
* \return TRUE if tile is totally clipped, FALSE otherwise
*/
static INLINE boolean
-pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_surface *ps)
+pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_transfer *pt)
{
- if (x >= ps->width)
+ if (x >= pt->width)
return TRUE;
- if (y >= ps->height)
+ if (y >= pt->height)
return TRUE;
- if (x + *w > ps->width)
- *w = ps->width - x;
- if (y + *h > ps->height)
- *h = ps->height - y;
+ if (x + *w > pt->width)
+ *w = pt->width - x;
+ if (y + *h > pt->height)
+ *h = pt->height - y;
return FALSE;
}
@@ -56,34 +56,34 @@ extern "C" {
#endif
void
-pipe_get_tile_raw(struct pipe_surface *ps,
+pipe_get_tile_raw(struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
void *p, int dst_stride);
void
-pipe_put_tile_raw(struct pipe_surface *ps,
+pipe_put_tile_raw(struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
const void *p, int src_stride);
void
-pipe_get_tile_rgba(struct pipe_surface *ps,
+pipe_get_tile_rgba(struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
float *p);
void
-pipe_put_tile_rgba(struct pipe_surface *ps,
+pipe_put_tile_rgba(struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
const float *p);
void
-pipe_get_tile_z(struct pipe_surface *ps,
+pipe_get_tile_z(struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
uint *z);
void
-pipe_put_tile_z(struct pipe_surface *ps,
+pipe_put_tile_z(struct pipe_transfer *pt,
uint x, uint y, uint w, uint h,
const uint *z);
diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c
index f84514165a..dde2c74fa8 100644
--- a/src/gallium/auxiliary/util/u_time.c
+++ b/src/gallium/auxiliary/util/u_time.c
@@ -200,7 +200,7 @@ util_time_timeout(const struct util_time *start,
}
-#if defined(PIPE_SUBSYSYEM_WINDOWS_DISPLAY)
+#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
void util_time_sleep(unsigned usecs)
{
LONGLONG start, curr, end;
diff --git a/src/gallium/auxiliary/util/u_timed_winsys.c b/src/gallium/auxiliary/util/u_timed_winsys.c
index 8beb3b4c88..f237e12d73 100644
--- a/src/gallium/auxiliary/util/u_timed_winsys.c
+++ b/src/gallium/auxiliary/util/u_timed_winsys.c
@@ -29,7 +29,7 @@
* Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com>
*/
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "u_timed_winsys.h"
#include "util/u_memory.h"
#include "util/u_time.h"
@@ -121,7 +121,8 @@ timed_buffer_create(struct pipe_winsys *winsys,
struct pipe_winsys *backend = timed_winsys(winsys)->backend;
uint64_t start = time_start();
- struct pipe_buffer *buf = backend->buffer_create( backend, alignment, usage, size );
+ struct pipe_buffer *buf =
+ backend->buffer_create( backend, alignment, usage, size );
time_finish(winsys, start, 0, __FUNCTION__);
@@ -205,34 +206,18 @@ timed_flush_frontbuffer( struct pipe_winsys *winsys,
-static struct pipe_surface *
-timed_surface_alloc(struct pipe_winsys *winsys)
-{
- struct pipe_winsys *backend = timed_winsys(winsys)->backend;
- uint64_t start = time_start();
-
- struct pipe_surface *surf = backend->surface_alloc( backend );
-
- time_finish(winsys, start, 6, __FUNCTION__);
-
- return surf;
-}
-
-
-
-static int
-timed_surface_alloc_storage(struct pipe_winsys *winsys,
- struct pipe_surface *surf,
+static struct pipe_buffer *
+timed_surface_buffer_create(struct pipe_winsys *winsys,
unsigned width, unsigned height,
enum pipe_format format,
- unsigned flags,
- unsigned tex_usage)
+ unsigned usage,
+ unsigned *stride)
{
struct pipe_winsys *backend = timed_winsys(winsys)->backend;
uint64_t start = time_start();
- int ret = backend->surface_alloc_storage( backend, surf, width, height,
- format, flags, tex_usage );
+ struct pipe_buffer *ret = backend->surface_buffer_create( backend, width, height,
+ format, usage, stride );
time_finish(winsys, start, 7, __FUNCTION__);
@@ -240,19 +225,6 @@ timed_surface_alloc_storage(struct pipe_winsys *winsys,
}
-static void
-timed_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s)
-{
- struct pipe_winsys *backend = timed_winsys(winsys)->backend;
- uint64_t start = time_start();
-
- backend->surface_release( backend, s );
-
- time_finish(winsys, start, 8, __FUNCTION__);
-}
-
-
-
static const char *
timed_get_name( struct pipe_winsys *winsys )
{
@@ -329,11 +301,9 @@ struct pipe_winsys *u_timed_winsys_create( struct pipe_winsys *backend )
ws->base.buffer_unmap = timed_buffer_unmap;
ws->base.buffer_destroy = timed_buffer_destroy;
ws->base.buffer_create = timed_buffer_create;
+ ws->base.surface_buffer_create = timed_surface_buffer_create;
ws->base.flush_frontbuffer = timed_flush_frontbuffer;
ws->base.get_name = timed_get_name;
- ws->base.surface_alloc = timed_surface_alloc;
- ws->base.surface_alloc_storage = timed_surface_alloc_storage;
- ws->base.surface_release = timed_surface_release;
ws->base.fence_reference = timed_fence_reference;
ws->base.fence_signalled = timed_fence_signalled;
ws->base.fence_finish = timed_fence_finish;
diff --git a/src/gallium/drivers/Makefile b/src/gallium/drivers/Makefile
index 6161cb6ff8..9fe9b2c11d 100644
--- a/src/gallium/drivers/Makefile
+++ b/src/gallium/drivers/Makefile
@@ -1,20 +1,12 @@
+# src/gallium/drivers/Makefile
TOP = ../../..
include $(TOP)/configs/current
+SUBDIRS = $(GALLIUM_DRIVERS_DIRS)
-SUBDIRS = $(GALLIUM_DRIVER_DIRS)
-
-
-default: subdirs
-
-
-subdirs:
+default install clean:
@for dir in $(SUBDIRS) ; do \
if [ -d $$dir ] ; then \
- (cd $$dir && $(MAKE)) || exit 1 ; \
+ (cd $$dir && $(MAKE) $@) || exit 1; \
fi \
done
-
-
-clean:
- rm -f `find . -name \*.[oa]`
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h
index cb0631baf5..1f6860da11 100644
--- a/src/gallium/drivers/cell/common.h
+++ b/src/gallium/drivers/cell/common.h
@@ -49,6 +49,15 @@
}
+
+#define JOIN(x, y) JOIN_AGAIN(x, y)
+#define JOIN_AGAIN(x, y) x ## y
+
+#define STATIC_ASSERT(e) \
+{typedef char JOIN(assertion_failed_at_line_, __LINE__) [(e) ? 1 : -1];}
+
+
+
/** for sanity checking */
#define ASSERT_ALIGN16(ptr) \
ASSERT((((unsigned long) (ptr)) & 0xf) == 0);
@@ -64,9 +73,13 @@
#define ROUNDUP16(k) (((k) + 0xf) & ~0xf)
-#define CELL_MAX_SPUS 6
+#define CELL_MAX_SPUS 8
#define CELL_MAX_SAMPLERS 4
+#define CELL_MAX_TEXTURE_LEVELS 12 /* 2k x 2k */
+#define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */
+#define CELL_MAX_WIDTH 1024 /**< max framebuffer width */
+#define CELL_MAX_HEIGHT 1024 /**< max framebuffer width */
#define TILE_SIZE 32
@@ -94,48 +107,106 @@
#define CELL_CMD_STATE_BIND_VS 18
#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19
#define CELL_CMD_STATE_ATTRIB_FETCH 20
-#define CELL_CMD_VS_EXECUTE 22
-#define CELL_CMD_FLUSH_BUFFER_RANGE 23
+#define CELL_CMD_STATE_FS_CONSTANTS 21
+#define CELL_CMD_STATE_RASTERIZER 22
+#define CELL_CMD_VS_EXECUTE 23
+#define CELL_CMD_FLUSH_BUFFER_RANGE 24
+#define CELL_CMD_FENCE 25
+/** Command/batch buffers */
#define CELL_NUM_BUFFERS 4
#define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */
#define CELL_BUFFER_STATUS_FREE 10
#define CELL_BUFFER_STATUS_USED 20
+/** Debug flags */
+#define CELL_DEBUG_CHECKER (1 << 0)
+#define CELL_DEBUG_ASM (1 << 1)
+#define CELL_DEBUG_SYNC (1 << 2)
+#define CELL_DEBUG_FRAGMENT_OPS (1 << 3)
+#define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4)
+#define CELL_DEBUG_CMD (1 << 5)
+#define CELL_DEBUG_CACHE (1 << 6)
-#define CELL_DEBUG_CHECKER (1 << 0)
-#define CELL_DEBUG_SYNC (1 << 1)
+#define CELL_FENCE_IDLE 0
+#define CELL_FENCE_EMITTED 1
+#define CELL_FENCE_SIGNALLED 2
+#define CELL_FACING_FRONT 0
+#define CELL_FACING_BACK 1
+struct cell_fence
+{
+ /** There's a 16-byte status qword per SPU */
+ volatile uint status[CELL_MAX_SPUS][4];
+};
-/** Max instructions for doing per-fragment operations */
-#define SPU_MAX_FRAGMENT_OPS_INSTS 64
+#ifdef __SPU__
+typedef vector unsigned int opcode_t;
+#else
+typedef unsigned int opcode_t[4];
+#endif
+
+/**
+ * Fence command sent to SPUs. In response, the SPUs will write
+ * CELL_FENCE_STATUS_SIGNALLED back to the fence status word in main memory.
+ */
+struct cell_command_fence
+{
+ opcode_t opcode; /**< CELL_CMD_FENCE */
+ struct cell_fence *fence;
+ uint32_t pad_[3];
+};
/**
* Command to specify per-fragment operations state and generated code.
+ * Note that this is a variant-length structure, allocated with as
+ * much memory as needed to hold the generated code; the "code"
+ * field *must* be the last field in the structure. Also, the entire
+ * length of the structure (including the variant code field) must be
+ * a multiple of 8 bytes; we require that this structure itself be
+ * a multiple of 8 bytes, and that the generated code also be a multiple
+ * of 8 bytes.
+ *
+ * Also note that the dsa, blend, blend_color fields are really only needed
+ * for the fallback/C per-pixel code. They're not used when we generate
+ * dynamic SPU fragment code (which is the normal case), and will eventually
+ * be removed from this structure.
*/
struct cell_command_fragment_ops
{
- uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */
+ opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */
+
+ /* Fields for the fallback case */
struct pipe_depth_stencil_alpha_state dsa;
struct pipe_blend_state blend;
- unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS];
+ struct pipe_blend_color blend_color;
+
+ /* Fields for the generated SPU code */
+ unsigned total_code_size;
+ unsigned front_code_index;
+ unsigned back_code_index;
+ /* this field has variant length, and must be the last field in
+ * the structure
+ */
+ unsigned code[0];
};
/** Max instructions for fragment programs */
-#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 128
+#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 512
/**
- * Command to send a fragment progra to SPUs.
+ * Command to send a fragment program to SPUs.
*/
struct cell_command_fragment_program
{
- uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */
+ opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */
uint num_inst; /**< Number of instructions */
+ uint32_t pad[3];
unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
};
@@ -145,10 +216,21 @@ struct cell_command_fragment_program
*/
struct cell_command_framebuffer
{
- uint64_t opcode; /**< CELL_CMD_FRAMEBUFFER */
+ opcode_t opcode; /**< CELL_CMD_STATE_FRAMEBUFFER */
int width, height;
void *color_start, *depth_start;
enum pipe_format color_format, depth_format;
+ uint32_t pad_[2];
+};
+
+
+/**
+ * Tell SPUs about rasterizer state.
+ */
+struct cell_command_rasterizer
+{
+ opcode_t opcode; /**< CELL_CMD_STATE_RASTERIZER */
+ struct pipe_rasterizer_state rasterizer;
};
@@ -157,9 +239,10 @@ struct cell_command_framebuffer
*/
struct cell_command_clear_surface
{
- uint64_t opcode; /**< CELL_CMD_CLEAR_SURFACE */
+ opcode_t opcode; /**< CELL_CMD_CLEAR_SURFACE */
uint surface; /**< Temporary: 0=color, 1=Z */
uint value;
+ uint32_t pad[2];
};
@@ -206,7 +289,7 @@ struct cell_shader_info
#define SPU_VERTS_PER_BATCH 64
struct cell_command_vs
{
- uint64_t opcode; /**< CELL_CMD_VS_EXECUTE */
+ opcode_t opcode; /**< CELL_CMD_VS_EXECUTE */
uint64_t vOut[SPU_VERTS_PER_BATCH];
unsigned num_elts;
unsigned elts[SPU_VERTS_PER_BATCH];
@@ -218,7 +301,7 @@ struct cell_command_vs
struct cell_command_render
{
- uint64_t opcode; /**< CELL_CMD_RENDER */
+ opcode_t opcode; /**< CELL_CMD_RENDER */
uint prim_type; /**< PIPE_PRIM_x */
uint num_verts;
uint vertex_size; /**< bytes per vertex */
@@ -227,44 +310,51 @@ struct cell_command_render
float xmin, ymin, xmax, ymax; /* XXX another dummy field */
uint min_index;
boolean inline_verts;
+ uint32_t pad_[1];
};
struct cell_command_release_verts
{
- uint64_t opcode; /**< CELL_CMD_RELEASE_VERTS */
+ opcode_t opcode; /**< CELL_CMD_RELEASE_VERTS */
uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */
+ uint32_t pad_[3];
};
struct cell_command_sampler
{
- uint64_t opcode; /**< CELL_CMD_STATE_SAMPLER */
+ opcode_t opcode; /**< CELL_CMD_STATE_SAMPLER */
uint unit;
struct pipe_sampler_state state;
+ uint32_t pad_[1];
};
struct cell_command_texture
{
- uint64_t opcode; /**< CELL_CMD_STATE_TEXTURE */
+ opcode_t opcode; /**< CELL_CMD_STATE_TEXTURE */
+ uint target; /**< PIPE_TEXTURE_x */
uint unit;
- void *start; /**< Address in main memory */
- ushort width, height;
+ void *start[CELL_MAX_TEXTURE_LEVELS]; /**< Address in main memory */
+ ushort width[CELL_MAX_TEXTURE_LEVELS];
+ ushort height[CELL_MAX_TEXTURE_LEVELS];
+ ushort depth[CELL_MAX_TEXTURE_LEVELS];
};
-/** XXX unions don't seem to work */
-/* XXX this should go away; all commands should be placed in batch buffers */
-struct cell_command
+#define MAX_SPU_FUNCTIONS 12
+/**
+ * Used to tell the PPU about the address of particular functions in the
+ * SPU's address space.
+ */
+struct cell_spu_function_info
{
-#if 0
- struct cell_command_framebuffer fb;
- struct cell_command_clear_surface clear;
- struct cell_command_render render;
-#endif
- struct cell_command_vs vs;
-} ALIGN16_ATTRIB;
+ uint num;
+ char names[MAX_SPU_FUNCTIONS][16];
+ uint addrs[MAX_SPU_FUNCTIONS];
+ char pad[12]; /**< Pad struct to multiple of 16 bytes (256 currently) */
+};
/** This is the object passed to spe_create_thread() */
@@ -273,11 +363,13 @@ struct cell_init_info
unsigned id;
unsigned num_spus;
unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */
- struct cell_command *cmd;
+ float inv_timebase; /**< 1.0/timebase, for perf measurement */
/** Buffers for command batches, vertex/index data */
ubyte *buffers[CELL_NUM_BUFFERS];
uint *buffer_status; /**< points at cell_context->buffer_status */
+
+ struct cell_spu_function_info *spu_functions;
} ALIGN16_ATTRIB;
diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile
index b28f4c5c31..c92f8e5cba 100644
--- a/src/gallium/drivers/cell/ppu/Makefile
+++ b/src/gallium/drivers/cell/ppu/Makefile
@@ -24,6 +24,7 @@ SOURCES = \
cell_clear.c \
cell_context.c \
cell_draw_arrays.c \
+ cell_fence.c \
cell_flush.c \
cell_gen_fragment.c \
cell_gen_fp.c \
@@ -38,8 +39,7 @@ SOURCES = \
cell_texture.c \
cell_vbuf.c \
cell_vertex_fetch.c \
- cell_vertex_shader.c \
- cell_winsys.c
+ cell_vertex_shader.c
OBJECTS = $(SOURCES:.c=.o) \
@@ -54,6 +54,9 @@ INCLUDE_DIRS = \
$(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
+.c.s:
+ $(CC) -S $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
+
default: $(CELL_LIB)
diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c
index 16882c0129..fe144f8b84 100644
--- a/src/gallium/drivers/cell/ppu/cell_batch.c
+++ b/src/gallium/drivers/cell/ppu/cell_batch.c
@@ -28,6 +28,7 @@
#include "cell_context.h"
#include "cell_batch.h"
+#include "cell_fence.h"
#include "cell_spu.h"
@@ -42,7 +43,9 @@
uint
cell_get_empty_buffer(struct cell_context *cell)
{
- uint buf = 0, tries = 0;
+ static uint prev_buffer = 0;
+ uint buf = (prev_buffer + 1) % CELL_NUM_BUFFERS;
+ uint tries = 0;
/* Find a buffer that's marked as free by all SPUs */
while (1) {
@@ -58,8 +61,13 @@ cell_get_empty_buffer(struct cell_context *cell)
cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
}
/*
- printf("PPU: ALLOC BUFFER %u\n", buf);
+ printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries);
*/
+ prev_buffer = buf;
+
+ /* release tex buffer associated w/ prev use of this batch buf */
+ cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]);
+
return buf;
}
}
@@ -82,6 +90,38 @@ cell_get_empty_buffer(struct cell_context *cell)
/**
+ * Append a fence command to the current batch buffer.
+ * Note that we're sure there's always room for this because of the
+ * adjusted size check in cell_batch_free_space().
+ */
+static void
+emit_fence(struct cell_context *cell)
+{
+ const uint batch = cell->cur_batch;
+ const uint size = cell->buffer_size[batch];
+ struct cell_command_fence *fence_cmd;
+ struct cell_fence *fence = &cell->fenced_buffers[batch].fence;
+ uint i;
+
+ /* set fence status to emitted, not yet signalled */
+ for (i = 0; i < cell->num_spus; i++) {
+ fence->status[i][0] = CELL_FENCE_EMITTED;
+ }
+
+ STATIC_ASSERT(sizeof(struct cell_command_fence) % 16 == 0);
+ ASSERT(size % 16 == 0);
+ ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE);
+
+ fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size);
+ fence_cmd->opcode[0] = CELL_CMD_FENCE;
+ fence_cmd->fence = fence;
+
+ /* update batch buffer size */
+ cell->buffer_size[batch] = size + sizeof(struct cell_command_fence);
+}
+
+
+/**
* Flush the current batch buffer to the SPUs.
* An empty buffer will be found and set as the new current batch buffer
* for subsequent commands/data.
@@ -91,7 +131,7 @@ cell_batch_flush(struct cell_context *cell)
{
static boolean flushing = FALSE;
uint batch = cell->cur_batch;
- const uint size = cell->buffer_size[batch];
+ uint size = cell->buffer_size[batch];
uint spu, cmd_word;
assert(!flushing);
@@ -99,6 +139,14 @@ cell_batch_flush(struct cell_context *cell)
if (size == 0)
return;
+ /* Before we use this batch buffer, make sure any fenced texture buffers
+ * are released.
+ */
+ if (cell->fenced_buffers[batch].head) {
+ emit_fence(cell);
+ size = cell->buffer_size[batch];
+ }
+
flushing = TRUE;
assert(batch < CELL_NUM_BUFFERS);
@@ -139,74 +187,24 @@ uint
cell_batch_free_space(const struct cell_context *cell)
{
uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch];
+ free -= sizeof(struct cell_command_fence);
return free;
}
/**
- * Append data to the current batch buffer.
- * \param data address of block of bytes to append
- * \param bytes size of block of bytes
- */
-void
-cell_batch_append(struct cell_context *cell, const void *data, uint bytes)
-{
- uint size;
-
- ASSERT(bytes % 8 == 0);
- ASSERT(bytes <= CELL_BUFFER_SIZE);
- ASSERT(cell->cur_batch >= 0);
-
-#ifdef ASSERT
- {
- uint spu;
- for (spu = 0; spu < cell->num_spus; spu++) {
- ASSERT(cell->buffer_status[spu][cell->cur_batch][0]
- == CELL_BUFFER_STATUS_USED);
- }
- }
-#endif
-
- size = cell->buffer_size[cell->cur_batch];
-
- if (size + bytes > CELL_BUFFER_SIZE) {
- cell_batch_flush(cell);
- size = 0;
- }
-
- ASSERT(size + bytes <= CELL_BUFFER_SIZE);
-
- memcpy(cell->buffer[cell->cur_batch] + size, data, bytes);
-
- cell->buffer_size[cell->cur_batch] = size + bytes;
-}
-
-
-/**
* Allocate space in the current batch buffer for 'bytes' space.
+ * Bytes must be a multiple of 16 bytes. Allocation will be 16 byte aligned.
* \return address in batch buffer to put data
*/
void *
-cell_batch_alloc(struct cell_context *cell, uint bytes)
-{
- return cell_batch_alloc_aligned(cell, bytes, 1);
-}
-
-
-/**
- * Same as \sa cell_batch_alloc, but return an address at a particular
- * alignment.
- */
-void *
-cell_batch_alloc_aligned(struct cell_context *cell, uint bytes,
- uint alignment)
+cell_batch_alloc16(struct cell_context *cell, uint bytes)
{
void *pos;
- uint size, padbytes;
+ uint size;
- ASSERT(bytes % 8 == 0);
+ ASSERT(bytes % 16 == 0);
ASSERT(bytes <= CELL_BUFFER_SIZE);
- ASSERT(alignment > 0);
ASSERT(cell->cur_batch >= 0);
#ifdef ASSERT
@@ -221,17 +219,12 @@ cell_batch_alloc_aligned(struct cell_context *cell, uint bytes,
size = cell->buffer_size[cell->cur_batch];
- padbytes = (alignment - (size % alignment)) % alignment;
-
- if (padbytes + size + bytes > CELL_BUFFER_SIZE) {
+ if (bytes > cell_batch_free_space(cell)) {
cell_batch_flush(cell);
size = 0;
}
- else {
- size += padbytes;
- }
- ASSERT(size % alignment == 0);
+ ASSERT(size % 16 == 0);
ASSERT(size + bytes <= CELL_BUFFER_SIZE);
pos = (void *) (cell->buffer[cell->cur_batch] + size);
diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h
index f74dd60079..290136031a 100644
--- a/src/gallium/drivers/cell/ppu/cell_batch.h
+++ b/src/gallium/drivers/cell/ppu/cell_batch.h
@@ -44,15 +44,8 @@ cell_batch_flush(struct cell_context *cell);
extern uint
cell_batch_free_space(const struct cell_context *cell);
-extern void
-cell_batch_append(struct cell_context *cell, const void *data, uint bytes);
-
-extern void *
-cell_batch_alloc(struct cell_context *cell, uint bytes);
-
extern void *
-cell_batch_alloc_aligned(struct cell_context *cell, uint bytes,
- uint alignment);
+cell_batch_alloc16(struct cell_context *cell, uint bytes);
extern void
cell_init_batch_buffers(struct cell_context *cell);
diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c
index c9c0c721bb..edc06747ac 100644
--- a/src/gallium/drivers/cell/ppu/cell_clear.c
+++ b/src/gallium/drivers/cell/ppu/cell_clear.c
@@ -70,18 +70,12 @@ void
cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps,
unsigned clearValue)
{
- struct pipe_screen *screen = pipe->screen;
struct cell_context *cell = cell_context(pipe);
uint surfIndex;
if (cell->dirty)
cell_update_derived(cell);
-
- if (!cell->cbuf_map[0])
- cell->cbuf_map[0] = screen->surface_map(screen, ps,
- PIPE_BUFFER_USAGE_GPU_WRITE);
-
if (ps == cell->framebuffer.zsbuf) {
/* clear z/stencil buffer */
surfIndex = 1;
@@ -99,11 +93,25 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps,
/* Build a CLEAR command and place it in the current batch buffer */
{
+ STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0);
struct cell_command_clear_surface *clr
= (struct cell_command_clear_surface *)
- cell_batch_alloc(cell, sizeof(*clr));
- clr->opcode = CELL_CMD_CLEAR_SURFACE;
+ cell_batch_alloc16(cell, sizeof(*clr));
+ clr->opcode[0] = CELL_CMD_CLEAR_SURFACE;
clr->surface = surfIndex;
clr->value = clearValue;
}
+
+ /* Technically, the surface's contents are now known and cleared,
+ * so we could set the status to PIPE_SURFACE_STATUS_CLEAR. But
+ * it turns out it's quite painful to recognize when any particular
+ * surface goes from PIPE_SURFACE_STATUS_CLEAR to
+ * PIPE_SURFACE_STATUS_DEFINED (i.e. with known contents), because
+ * the drawing commands could be operating on numerous draw buffers,
+ * which we'd have to iterate through to set all their stati...
+ * For now, we cheat a bit and set the surface's status to DEFINED
+ * right here. Later we should revisit this and set the status to
+ * CLEAR here, and find a better place to set the status to DEFINED.
+ */
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c
index 71f1a3049d..ae82ded334 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.c
+++ b/src/gallium/drivers/cell/ppu/cell_context.c
@@ -36,7 +36,7 @@
#include "pipe/p_defines.h"
#include "pipe/p_format.h"
#include "util/u_memory.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_screen.h"
#include "draw/draw_context.h"
@@ -47,6 +47,7 @@
#include "cell_clear.h"
#include "cell_context.h"
#include "cell_draw_arrays.h"
+#include "cell_fence.h"
#include "cell_flush.h"
#include "cell_state.h"
#include "cell_surface.h"
@@ -62,6 +63,8 @@ cell_destroy_context( struct pipe_context *pipe )
{
struct cell_context *cell = cell_context(pipe);
+ util_delete_keymap(cell->fragment_ops_cache, NULL);
+
cell_spu_exit(cell);
align_free(cell);
@@ -85,13 +88,16 @@ cell_draw_create(struct cell_context *cell)
}
-#ifdef DEBUG
static const struct debug_named_value cell_debug_flags[] = {
{"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */
+ {"asm", CELL_DEBUG_ASM}, /**< dump SPU asm code */
{"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */
+ {"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/
+ {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/
+ {"cmd", CELL_DEBUG_CMD}, /**< SPUs dump command buffer info */
+ {"cache", CELL_DEBUG_CACHE}, /**< report texture cache stats on exit */
{NULL, 0}
};
-#endif
struct pipe_context *
@@ -99,6 +105,7 @@ cell_create_context(struct pipe_screen *screen,
struct cell_winsys *cws)
{
struct cell_context *cell;
+ uint i;
/* some fields need to be 16-byte aligned, so align the whole object */
cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16);
@@ -125,11 +132,14 @@ cell_create_context(struct pipe_screen *screen,
cell_init_state_functions(cell);
cell_init_shader_functions(cell);
cell_init_surface_functions(cell);
- cell_init_texture_functions(cell);
cell_init_vertex_functions(cell);
cell->draw = cell_draw_create(cell);
+ /* Create cache of fragment ops generated code */
+ cell->fragment_ops_cache =
+ util_new_keymap(sizeof(struct cell_fragment_ops_key), ~0, NULL);
+
cell_init_vbuf(cell);
draw_set_rasterize_stage(cell->draw, cell->vbuf);
@@ -143,17 +153,31 @@ cell_create_context(struct pipe_screen *screen,
cell_debug_flags,
0 );
+ for (i = 0; i < CELL_NUM_BUFFERS; i++)
+ cell_fence_init(&cell->fenced_buffers[i].fence);
+
+
/*
* SPU stuff
*/
- cell->num_spus = 6;
- /* XXX is this in SDK 3.0 only?
- cell->num_spus = spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, -1);
- */
+ /* This call only works with SDK 3.0. Anyone still using 2.1??? */
+ cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1);
+ cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
+ if (cell->debug_flags) {
+ printf("Cell: found %d Cell(s) with %u SPUs\n",
+ cell->num_cells, cell->num_spus);
+ }
+ if (getenv("CELL_NUM_SPUS")) {
+ cell->num_spus = atoi(getenv("CELL_NUM_SPUS"));
+ assert(cell->num_spus > 0);
+ }
cell_start_spus(cell);
cell_init_batch_buffers(cell);
+ /* make sure SPU initializations are done before proceeding */
+ cell_flush_int(cell, CELL_FLUSH_WAIT);
+
return &cell->pipe;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
index 14914b9c6f..ca03dc1511 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.h
+++ b/src/gallium/drivers/cell/ppu/cell_context.h
@@ -38,6 +38,7 @@
#include "cell/common.h"
#include "rtasm/rtasm_ppc_spe.h"
#include "tgsi/tgsi_scan.h"
+#include "util/u_keymap.h"
struct cell_vbuf_render;
@@ -67,31 +68,29 @@ struct cell_fragment_shader_state
/**
- * Cell blend state atom, subclass of pipe_blend_state.
+ * Key for mapping per-fragment state to cached SPU machine code.
+ * keymap(cell_fragment_ops_key) => cell_command_fragment_ops
*/
-struct cell_blend_state
+struct cell_fragment_ops_key
{
- struct pipe_blend_state base;
-
- /**
- * Generated code to perform alpha blending
- */
- struct spe_function code;
+ struct pipe_blend_state blend;
+ struct pipe_blend_color blend_color;
+ struct pipe_depth_stencil_alpha_state dsa;
+ enum pipe_format color_format;
+ enum pipe_format zs_format;
};
+struct cell_buffer_node;
+
/**
- * Cell depth/stencil/alpha state atom, subclass of
- * pipe_depth_stencil_alpha_state.
+ * Fenced buffer list. List of buffers which can be unreferenced after
+ * the fence has been executed/signalled.
*/
-struct cell_depth_stencil_alpha_state
+struct cell_buffer_list
{
- struct pipe_depth_stencil_alpha_state base;
-
- /**
- * Generated code to perform alpha, stencil, and depth testing on the SPE
- */
- struct spe_function code;
+ struct cell_fence fence ALIGN16_ATTRIB;
+ struct cell_buffer_node *head;
};
@@ -104,10 +103,10 @@ struct cell_context
struct cell_winsys *winsys;
- const struct cell_blend_state *blend;
+ const struct pipe_blend_state *blend;
const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
uint num_samplers;
- const struct cell_depth_stencil_alpha_state *depth_stencil;
+ const struct pipe_depth_stencil_alpha_state *depth_stencil;
const struct pipe_rasterizer_state *rasterizer;
const struct cell_vertex_shader_state *vs;
const struct cell_fragment_shader_state *fs;
@@ -128,6 +127,9 @@ struct cell_context
struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
uint num_vertex_elements;
+ struct pipe_transfer *cbuf_transfer[PIPE_MAX_COLOR_BUFS];
+ struct pipe_transfer *zsbuf_transfer;
+
ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS];
ubyte *zsbuf_map;
@@ -135,6 +137,11 @@ struct cell_context
uint *tex_map;
uint dirty;
+ uint dirty_textures; /* bitmask of texture units */
+ uint dirty_samplers; /* bitmask of sampler units */
+
+ /** Cache of code generated for per-fragment ops */
+ struct keymap *fragment_ops_cache;
/** The primitive drawing context */
struct draw_context *draw;
@@ -149,8 +156,9 @@ struct cell_context
/** Mapped constant buffers */
void *mapped_constants[PIPE_SHADER_TYPES];
+ struct cell_spu_function_info spu_functions ALIGN16_ATTRIB;
- uint num_spus;
+ uint num_cells, num_spus;
/** Buffers for command batches, vertex/index data */
uint buffer_size[CELL_NUM_BUFFERS];
@@ -162,6 +170,14 @@ struct cell_context
uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB;
+ /** Associated with each command/batch buffer is a list of pipe_buffers
+ * that are fenced. When the last command in a buffer is executed, the
+ * fence will be signalled, indicating that any pipe_buffers preceeding
+ * that fence can be unreferenced (and probably freed).
+ */
+ struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS];
+
+
struct spe_function attrib_fetch;
unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS];
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
index 880d535320..644496db40 100644
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
@@ -33,7 +33,7 @@
#include "pipe/p_defines.h"
#include "pipe/p_context.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_inlines.h"
#include "cell_context.h"
@@ -51,9 +51,9 @@ cell_map_constant_buffers(struct cell_context *sp)
struct pipe_winsys *ws = sp->pipe.winsys;
uint i;
for (i = 0; i < 2; i++) {
- if (sp->constants[i].size) {
+ if (sp->constants[i].buffer && sp->constants[i].buffer->size) {
sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer,
- PIPE_BUFFER_USAGE_CPU_READ);
+ PIPE_BUFFER_USAGE_CPU_READ);
cell_flush_buffer_range(sp, sp->mapped_constants[i],
sp->constants[i].buffer->size);
}
@@ -61,7 +61,7 @@ cell_map_constant_buffers(struct cell_context *sp)
draw_set_mapped_constant_buffer(sp->draw,
sp->mapped_constants[PIPE_SHADER_VERTEX],
- sp->constants[PIPE_SHADER_VERTEX].size);
+ sp->constants[PIPE_SHADER_VERTEX].buffer->size);
}
static void
@@ -70,7 +70,7 @@ cell_unmap_constant_buffers(struct cell_context *sp)
struct pipe_winsys *ws = sp->pipe.winsys;
uint i;
for (i = 0; i < 2; i++) {
- if (sp->constants[i].size)
+ if (sp->constants[i].buffer && sp->constants[i].buffer->size)
ws->buffer_unmap(ws, sp->constants[i].buffer);
sp->mapped_constants[i] = NULL;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c
new file mode 100644
index 0000000000..867b5dcaa0
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_fence.c
@@ -0,0 +1,168 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <unistd.h>
+#include "util/u_memory.h"
+#include "pipe/p_inlines.h"
+#include "cell_context.h"
+#include "cell_batch.h"
+#include "cell_fence.h"
+#include "cell_texture.h"
+
+
+void
+cell_fence_init(struct cell_fence *fence)
+{
+ uint i;
+ ASSERT_ALIGN16(fence->status);
+ for (i = 0; i < CELL_MAX_SPUS; i++) {
+ fence->status[i][0] = CELL_FENCE_IDLE;
+ }
+}
+
+
+boolean
+cell_fence_signalled(const struct cell_context *cell,
+ const struct cell_fence *fence)
+{
+ uint i;
+ for (i = 0; i < cell->num_spus; i++) {
+ if (fence->status[i][0] != CELL_FENCE_SIGNALLED)
+ return FALSE;
+ /*assert(fence->status[i][0] == CELL_FENCE_EMITTED);*/
+ }
+ return TRUE;
+}
+
+
+void
+cell_fence_finish(const struct cell_context *cell,
+ const struct cell_fence *fence)
+{
+ while (!cell_fence_signalled(cell, fence)) {
+ usleep(10);
+ }
+
+#ifdef DEBUG
+ {
+ uint i;
+ for (i = 0; i < cell->num_spus; i++) {
+ assert(fence->status[i][0] == CELL_FENCE_SIGNALLED);
+ }
+ }
+#endif
+}
+
+
+
+
+struct cell_buffer_node
+{
+ struct pipe_buffer *buffer;
+ struct cell_buffer_node *next;
+};
+
+
+static void
+cell_add_buffer_to_list(struct cell_context *cell,
+ struct cell_buffer_list *list,
+ struct pipe_buffer *buffer)
+{
+ struct pipe_screen *ps = cell->pipe.screen;
+ struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node);
+ /* create new list node which references the buffer, insert at head */
+ if (node) {
+ pipe_buffer_reference(ps, &node->buffer, buffer);
+ node->next = list->head;
+ list->head = node;
+ }
+}
+
+
+/**
+ * Wait for completion of the given fence, then unreference any buffers
+ * on the list.
+ * This typically unrefs/frees texture buffers after any rendering which uses
+ * them has completed.
+ */
+void
+cell_free_fenced_buffers(struct cell_context *cell,
+ struct cell_buffer_list *list)
+{
+ if (list->head) {
+ struct pipe_screen *ps = cell->pipe.screen;
+ struct cell_buffer_node *node;
+
+ cell_fence_finish(cell, &list->fence);
+
+ /* traverse the list, unreferencing buffers, freeing nodes */
+ node = list->head;
+ while (node) {
+ struct cell_buffer_node *next = node->next;
+ assert(node->buffer);
+ pipe_buffer_unmap(ps, node->buffer);
+#if 0
+ printf("Unref buffer %p\n", node->buffer);
+ if (node->buffer->refcount == 1)
+ printf(" Delete!\n");
+#endif
+ pipe_buffer_reference(ps, &node->buffer, NULL);
+ FREE(node);
+ node = next;
+ }
+ list->head = NULL;
+ }
+}
+
+
+/**
+ * This should be called for each render command.
+ * Any texture buffers that are current bound will be added to a fenced
+ * list to be freed later when the fence is executed/signalled.
+ */
+void
+cell_add_fenced_textures(struct cell_context *cell)
+{
+ struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch];
+ uint i;
+
+ for (i = 0; i < cell->num_textures; i++) {
+ struct cell_texture *ct = cell->texture[i];
+ if (ct) {
+ uint level;
+ for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
+ if (ct->tiled_buffer[level]) {
+#if 0
+ printf("Adding texture %p buffer %p to list\n",
+ ct, ct->tiled_buffer[level]);
+#endif
+ cell_add_buffer_to_list(cell, list, ct->tiled_buffer[level]);
+ }
+ }
+ }
+ }
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h
new file mode 100644
index 0000000000..536b4ba411
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_fence.h
@@ -0,0 +1,57 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef CELL_FENCE_H
+#define CELL_FENCE_H
+
+
+extern void
+cell_fence_init(struct cell_fence *fence);
+
+
+extern boolean
+cell_fence_signalled(const struct cell_context *cell,
+ const struct cell_fence *fence);
+
+
+extern void
+cell_fence_finish(const struct cell_context *cell,
+ const struct cell_fence *fence);
+
+
+
+extern void
+cell_free_fenced_buffers(struct cell_context *cell,
+ struct cell_buffer_list *list);
+
+
+extern void
+cell_add_fenced_textures(struct cell_context *cell);
+
+
+#endif /* CELL_FENCE_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c
index 6596b72010..8275c9dc9c 100644
--- a/src/gallium/drivers/cell/ppu/cell_flush.c
+++ b/src/gallium/drivers/cell/ppu/cell_flush.c
@@ -49,7 +49,7 @@ cell_flush(struct pipe_context *pipe, unsigned flags,
flags |= CELL_FLUSH_WAIT;
}
- if (flags & PIPE_FLUSH_SWAPBUFFERS)
+ if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_RENDER_CACHE))
flags |= CELL_FLUSH_WAIT;
draw_flush( cell->draw );
@@ -72,8 +72,9 @@ cell_flush_int(struct cell_context *cell, unsigned flags)
flushing = TRUE;
if (flags & CELL_FLUSH_WAIT) {
- uint64_t *cmd = (uint64_t *) cell_batch_alloc(cell, sizeof(uint64_t));
- *cmd = CELL_CMD_FINISH;
+ STATIC_ASSERT(sizeof(opcode_t) % 16 == 0);
+ opcode_t *cmd = (opcode_t*) cell_batch_alloc16(cell, sizeof(opcode_t));
+ *cmd[0] = CELL_CMD_FINISH;
}
cell_batch_flush(cell);
@@ -101,11 +102,11 @@ void
cell_flush_buffer_range(struct cell_context *cell, void *ptr,
unsigned size)
{
- uint64_t batch[1 + (ROUNDUP8(sizeof(struct cell_buffer_range)) / 8)];
- struct cell_buffer_range *br = (struct cell_buffer_range *) & batch[1];
-
+ STATIC_ASSERT((sizeof(opcode_t) + sizeof(struct cell_buffer_range)) % 16 == 0);
+ uint32_t *batch = (uint32_t*)cell_batch_alloc16(cell,
+ sizeof(opcode_t) + sizeof(struct cell_buffer_range));
+ struct cell_buffer_range *br = (struct cell_buffer_range *) &batch[4];
batch[0] = CELL_CMD_FLUSH_BUFFER_RANGE;
br->base = (uintptr_t) ptr;
br->size = size;
- cell_batch_append(cell, batch, sizeof(batch));
}
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 6ffe94eb14..5a889a6119 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -2,6 +2,7 @@
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
+ * Copyright 2009 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -37,7 +38,7 @@
* \author Brian Paul
*/
-
+#include <math.h>
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_shader_tokens.h"
@@ -51,25 +52,55 @@
#include "cell_gen_fp.h"
-/** Set to 1 to enable debug/disassembly printfs */
-#define DISASSEM 01
+#define MAX_TEMPS 16
+#define MAX_IMMED 8
+#define CHAN_X 0
+#define CHAN_Y 1
+#define CHAN_Z 2
+#define CHAN_W 3
/**
* Context needed during code generation.
*/
struct codegen
{
+ struct cell_context *cell;
int inputs_reg; /**< 1st function parameter */
int outputs_reg; /**< 2nd function parameter */
int constants_reg; /**< 3rd function parameter */
- int temp_regs[8][4]; /**< maps TGSI temps to SPE registers */
+ int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */
+ int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */
+
+ int num_imm; /**< number of immediates */
int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
+ int addr_reg; /**< address register, integer values */
+
/** Per-instruction temps / intermediate temps */
int num_itemps;
- int itemps[3];
+ int itemps[12];
+
+ /** Current IF/ELSE/ENDIF nesting level */
+ int if_nesting;
+ /** Current BGNLOOP/ENDLOOP nesting level */
+ int loop_nesting;
+ /** Location of start of current loop */
+ int loop_start;
+
+ /** Index of if/conditional mask register */
+ int cond_mask_reg;
+ /** Index of loop mask register */
+ int loop_mask_reg;
+
+ /** Index of master execution mask register */
+ int exec_mask_reg;
+
+ /** KIL mask: indicates which fragments have been killed */
+ int kill_mask_reg;
+
+ int frame_size; /**< Stack frame size, in words */
struct spe_function *f;
boolean error;
@@ -112,19 +143,122 @@ get_const_one_reg(struct codegen *gen)
{
if (gen->one_reg <= 0) {
gen->one_reg = spe_allocate_available_register(gen->f);
- }
- /* one = {1.0, 1.0, 1.0, 1.0} */
- spe_load_float(gen->f, gen->one_reg, 1.0f);
-#if DISASSEM
- printf("il\tr%d, 1.0f\n", gen->one_reg);
-#endif
+ spe_indent(gen->f, 4);
+ spe_comment(gen->f, -4, "init constant reg = 1.0:");
+
+ /* one = {1.0, 1.0, 1.0, 1.0} */
+ spe_load_float(gen->f, gen->one_reg, 1.0f);
+
+ spe_indent(gen->f, -4);
+ }
return gen->one_reg;
}
/**
+ * Return index of the address register.
+ * Used for indirect register loads/stores.
+ */
+static int
+get_address_reg(struct codegen *gen)
+{
+ if (gen->addr_reg <= 0) {
+ gen->addr_reg = spe_allocate_available_register(gen->f);
+
+ spe_indent(gen->f, 4);
+ spe_comment(gen->f, -4, "init address reg = 0:");
+
+ /* init addr = {0, 0, 0, 0} */
+ spe_zero(gen->f, gen->addr_reg);
+
+ spe_indent(gen->f, -4);
+ }
+
+ return gen->addr_reg;
+}
+
+
+/**
+ * Return index of the master execution mask.
+ * The register is allocated an initialized upon the first call.
+ *
+ * The master execution mask controls which pixels in a quad are
+ * modified, according to surrounding conditionals, loops, etc.
+ */
+static int
+get_exec_mask_reg(struct codegen *gen)
+{
+ if (gen->exec_mask_reg <= 0) {
+ gen->exec_mask_reg = spe_allocate_available_register(gen->f);
+
+ /* XXX this may not be needed */
+ spe_comment(gen->f, 0*-4, "initialize master execution mask = ~0");
+ spe_load_int(gen->f, gen->exec_mask_reg, ~0);
+ }
+
+ return gen->exec_mask_reg;
+}
+
+
+/** Return index of the conditional (if/else) execution mask register */
+static int
+get_cond_mask_reg(struct codegen *gen)
+{
+ if (gen->cond_mask_reg <= 0) {
+ gen->cond_mask_reg = spe_allocate_available_register(gen->f);
+ }
+
+ return gen->cond_mask_reg;
+}
+
+
+/** Return index of the loop execution mask register */
+static int
+get_loop_mask_reg(struct codegen *gen)
+{
+ if (gen->loop_mask_reg <= 0) {
+ gen->loop_mask_reg = spe_allocate_available_register(gen->f);
+ }
+
+ return gen->loop_mask_reg;
+}
+
+
+
+static boolean
+is_register_src(struct codegen *gen, int channel,
+ const struct tgsi_full_src_register *src)
+{
+ int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
+ int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
+
+ if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) {
+ return FALSE;
+ }
+ if (src->SrcRegister.File == TGSI_FILE_TEMPORARY ||
+ src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+
+static boolean
+is_memory_dst(struct codegen *gen, int channel,
+ const struct tgsi_full_dst_register *dst)
+{
+ if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
+ return TRUE;
+ }
+ else {
+ return FALSE;
+ }
+}
+
+
+/**
* Return the index of the SPU temporary containing the named TGSI
* source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
* just return the corresponding SPE register. If the TGIS register
@@ -136,35 +270,99 @@ get_src_reg(struct codegen *gen,
int channel,
const struct tgsi_full_src_register *src)
{
- int reg;
+ int reg = -1;
+ int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
+ boolean reg_is_itemp = FALSE;
+ uint sign_op;
+
+ assert(swizzle >= TGSI_SWIZZLE_X);
+ assert(swizzle <= TGSI_EXTSWIZZLE_ONE);
+
+ if (swizzle == TGSI_EXTSWIZZLE_ONE) {
+ /* Load const one float and early out */
+ reg = get_const_one_reg(gen);
+ }
+ else if (swizzle == TGSI_EXTSWIZZLE_ZERO) {
+ /* Load const zero float and early out */
+ reg = get_itemp(gen);
+ spe_xor(gen->f, reg, reg, reg);
+ }
+ else {
+ int index = src->SrcRegister.Index;
+
+ assert(swizzle < 4);
+
+ if (src->SrcRegister.Indirect) {
+ /* XXX unfinished */
+ }
+
+ switch (src->SrcRegister.File) {
+ case TGSI_FILE_TEMPORARY:
+ reg = gen->temp_regs[index][swizzle];
+ break;
+ case TGSI_FILE_INPUT:
+ {
+ /* offset is measured in quadwords, not bytes */
+ int offset = index * 4 + swizzle;
+ reg = get_itemp(gen);
+ reg_is_itemp = TRUE;
+ /* Load: reg = memory[(machine_reg) + offset] */
+ spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16);
+ }
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ reg = gen->imm_regs[index][swizzle];
+ break;
+ case TGSI_FILE_CONSTANT:
+ {
+ /* offset is measured in quadwords, not bytes */
+ int offset = index * 4 + swizzle;
+ reg = get_itemp(gen);
+ reg_is_itemp = TRUE;
+ /* Load: reg = memory[(machine_reg) + offset] */
+ spe_lqd(gen->f, reg, gen->constants_reg, offset * 16);
+ }
+ break;
+ default:
+ assert(0);
+ }
+ }
- /* XXX need to examine src swizzle info here.
- * That will involve changing the channel var...
+ /*
+ * Handle absolute value, negate or set-negative of src register.
*/
+ sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
+ if (sign_op != TGSI_UTIL_SIGN_KEEP) {
+ /*
+ * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
+ */
+ const int bit31mask_reg = get_itemp(gen);
+ int result_reg;
+
+ if (reg_is_itemp) {
+ /* re-use 'reg' for the result */
+ result_reg = reg;
+ }
+ else {
+ /* alloc a new reg for the result */
+ result_reg = get_itemp(gen);
+ }
+ /* mask with bit 31 set, the rest cleared */
+ spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
- switch (src->SrcRegister.File) {
- case TGSI_FILE_TEMPORARY:
- reg = gen->temp_regs[src->SrcRegister.Index][channel];
- break;
- case TGSI_FILE_INPUT:
- {
- /* offset is measured in quadwords, not bytes */
- int offset = src->SrcRegister.Index * 4 + channel;
- reg = get_itemp(gen);
- /* Load: reg = memory[(machine_reg) + offset] */
- spe_lqd(gen->f, reg, gen->inputs_reg, offset);
-#if DISASSEM
- printf("lqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset);
-#endif
+ if (sign_op == TGSI_UTIL_SIGN_CLEAR) {
+ spe_andc(gen->f, result_reg, reg, bit31mask_reg);
}
- break;
- case TGSI_FILE_IMMEDIATE:
- /* xxx fall-through for now / fix */
- case TGSI_FILE_CONSTANT:
- /* xxx fall-through for now / fix */
- default:
- assert(0);
+ else if (sign_op == TGSI_UTIL_SIGN_SET) {
+ spe_and(gen->f, result_reg, reg, bit31mask_reg);
+ }
+ else {
+ assert(sign_op == TGSI_UTIL_SIGN_TOGGLE);
+ spe_xor(gen->f, result_reg, reg, bit31mask_reg);
+ }
+
+ reg = result_reg;
}
return reg;
@@ -183,11 +381,14 @@ get_dst_reg(struct codegen *gen,
int channel,
const struct tgsi_full_dst_register *dest)
{
- int reg;
+ int reg = -1;
switch (dest->DstRegister.File) {
case TGSI_FILE_TEMPORARY:
- reg = gen->temp_regs[dest->DstRegister.Index][channel];
+ if (gen->if_nesting > 0 || gen->loop_nesting > 0)
+ reg = get_itemp(gen);
+ else
+ reg = gen->temp_regs[dest->DstRegister.Index][channel];
break;
case TGSI_FILE_OUTPUT:
reg = get_itemp(gen);
@@ -211,19 +412,59 @@ store_dest_reg(struct codegen *gen,
int value_reg, int channel,
const struct tgsi_full_dst_register *dest)
{
+ /*
+ * XXX need to implement dst reg clamping/saturation
+ */
+#if 0
+ switch (inst->Instruction.Saturate) {
+ case TGSI_SAT_NONE:
+ break;
+ case TGSI_SAT_ZERO_ONE:
+ break;
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ break;
+ default:
+ assert( 0 );
+ }
+#endif
+
switch (dest->DstRegister.File) {
case TGSI_FILE_TEMPORARY:
- /* no-op */
+ if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
+ int d_reg = gen->temp_regs[dest->DstRegister.Index][channel];
+ int exec_reg = get_exec_mask_reg(gen);
+ /* Mix d with new value according to exec mask:
+ * d[i] = mask_reg[i] ? value_reg : d_reg
+ */
+ spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg);
+ }
+ else {
+ /* we're not inside a condition or loop: do nothing special */
+
+ }
break;
case TGSI_FILE_OUTPUT:
{
/* offset is measured in quadwords, not bytes */
int offset = dest->DstRegister.Index * 4 + channel;
- /* Store: memory[(machine_reg) + offset] = reg */
- spe_stqd(gen->f, value_reg, gen->outputs_reg, offset);
-#if DISASSEM
- printf("stqd\tr%d, r%d + %d\n", value_reg, gen->outputs_reg, offset);
-#endif
+ if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
+ int exec_reg = get_exec_mask_reg(gen);
+ int curval_reg = get_itemp(gen);
+ /* First read the current value from memory:
+ * Load: curval = memory[(machine_reg) + offset]
+ */
+ spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
+ /* Mix curval with newvalue according to exec mask:
+ * d[i] = mask_reg[i] ? value_reg : d_reg
+ */
+ spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg);
+ /* Store: memory[(machine_reg) + offset] = curval */
+ spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
+ }
+ else {
+ /* Store: memory[(machine_reg) + offset] = reg */
+ spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16);
+ }
}
break;
default:
@@ -232,125 +473,1265 @@ store_dest_reg(struct codegen *gen,
}
+
+static void
+emit_prologue(struct codegen *gen)
+{
+ gen->frame_size = 1024; /* XXX temporary, should be dynamic */
+
+ spe_comment(gen->f, 0, "Function prologue:");
+
+ /* save $lr on stack # stqd $lr,16($sp) */
+ spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
+
+ if (gen->frame_size >= 512) {
+ /* offset is too large for ai instruction */
+ int offset_reg = spe_allocate_available_register(gen->f);
+ int sp_reg = spe_allocate_available_register(gen->f);
+ /* offset = -framesize */
+ spe_load_int(gen->f, offset_reg, -gen->frame_size);
+ /* sp = $sp */
+ spe_move(gen->f, sp_reg, SPE_REG_SP);
+ /* $sp = $sp + offset_reg */
+ spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
+ /* save $sp in stack frame */
+ spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0);
+ /* clean up */
+ spe_release_register(gen->f, offset_reg);
+ spe_release_register(gen->f, sp_reg);
+ }
+ else {
+ /* save stack pointer # stqd $sp,-frameSize($sp) */
+ spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
+
+ /* adjust stack pointer # ai $sp,$sp,-frameSize */
+ spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
+ }
+}
+
+
+static void
+emit_epilogue(struct codegen *gen)
+{
+ const int return_reg = 3;
+
+ spe_comment(gen->f, 0, "Function epilogue:");
+
+ spe_comment(gen->f, 0, "return the killed mask");
+ if (gen->kill_mask_reg > 0) {
+ /* shader called KIL, return the "alive" mask */
+ spe_move(gen->f, return_reg, gen->kill_mask_reg);
+ }
+ else {
+ /* return {0,0,0,0} */
+ spe_load_uint(gen->f, return_reg, 0);
+ }
+
+ spe_comment(gen->f, 0, "restore stack and return");
+ if (gen->frame_size >= 512) {
+ /* offset is too large for ai instruction */
+ int offset_reg = spe_allocate_available_register(gen->f);
+ /* offset = framesize */
+ spe_load_int(gen->f, offset_reg, gen->frame_size);
+ /* $sp = $sp + offset */
+ spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
+ /* clean up */
+ spe_release_register(gen->f, offset_reg);
+ }
+ else {
+ /* restore stack pointer # ai $sp,$sp,frameSize */
+ spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size);
+ }
+
+ /* restore $lr # lqd $lr,16($sp) */
+ spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
+
+ /* return from function call */
+ spe_bi(gen->f, SPE_REG_RA, 0, 0);
+}
+
+
+#define FOR_EACH_ENABLED_CHANNEL(inst, ch) \
+ for (ch = 0; ch < 4; ch++) \
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch))
+
+
+static boolean
+emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch = 0, src_reg, addr_reg;
+
+ src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ addr_reg = get_address_reg(gen);
+
+ /* convert float to int */
+ spe_cflts(gen->f, addr_reg, src_reg, 0);
+
+ free_itemps(gen);
+
+ return TRUE;
+}
+
+
static boolean
emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
+ int ch, src_reg[4], dst_reg[4];
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) &&
+ is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) {
+ /* special-case: register to memory store */
+ store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+ else {
+ spe_move(gen->f, dst_reg[ch], src_reg[ch]);
+ store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+ }
+
+ free_itemps(gen);
+
+ return TRUE;
+}
+
+/**
+ * Emit binary operation
+ */
+static boolean
+emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], s2_reg[4], d_reg[4];
+
+ /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+
+ /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ /* Emit actual SPE instruction: d = s1 + s2 */
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ADD:
+ spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ break;
+ case TGSI_OPCODE_SUB:
+ spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ break;
+ case TGSI_OPCODE_MUL:
+ spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ break;
+ default:
+ ;
+ }
+ }
+
+ /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ /* Free any intermediate temps we allocated */
+ free_itemps(gen);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit multiply add. See emit_ADD for comments.
+ */
+static boolean
+emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4];
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+/**
+ * Emit linear interpolate. See emit_ADD for comments.
+ */
+static boolean
+emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4];
+
+ /* setup/get src/dst/temp regs */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
+ }
+
+ /* d = s3 + s1(s2 - s3) */
+ /* do all subtracts, then all fma, then all stores to better pipeline */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+
+/**
+ * Emit reciprocal or recip sqrt.
+ */
+static boolean
+emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], d_reg[4], tmp_reg[4];
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) {
+ /* tmp = 1/s1 */
+ spe_frest(gen->f, tmp_reg[ch], s1_reg[ch]);
+ }
+ else {
+ /* tmp = 1/sqrt(s1) */
+ spe_frsqest(gen->f, tmp_reg[ch], s1_reg[ch]);
+ }
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ /* d = float_interp(s1, tmp) */
+ spe_fi(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+/**
+ * Emit absolute value. See emit_ADD for comments.
+ */
+static boolean
+emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], d_reg[4];
+ const int bit31mask_reg = get_itemp(gen);
+
+ /* mask with bit 31 set, the rest cleared */
+ spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+
+ /* d = sign bit cleared in s1 */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_andc(gen->f, d_reg[ch], s1_reg[ch], bit31mask_reg);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+/**
+ * Emit 3 component dot product. See emit_ADD for comments.
+ */
+static boolean
+emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
int ch;
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- /* XXX we don't always need to actually emit a mov instruction here */
- spe_move(gen->f, dst_reg, src_reg);
-#if DISASSEM
- printf("mov\tr%d, r%d\n", dst_reg, src_reg);
-#endif
- store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
+ int s1x_reg, s1y_reg, s1z_reg;
+ int s2x_reg, s2y_reg, s2z_reg;
+ int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
+
+ s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ s2y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ s2z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+
+ /* t0 = x0 * x1 */
+ spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg);
+
+ /* t1 = y0 * y1 */
+ spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg);
+
+ /* t0 = z0 * z1 + t0 */
+ spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg);
+
+ /* t0 = t0 + t1 */
+ spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, t0_reg);
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+/**
+ * Emit 4 component dot product. See emit_ADD for comments.
+ */
+static boolean
+emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ int s0x_reg, s0y_reg, s0z_reg, s0w_reg;
+ int s1x_reg, s1y_reg, s1z_reg, s1w_reg;
+ int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
+
+ s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ s0z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ s0w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]);
+ s1w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
+
+ /* t0 = x0 * x1 */
+ spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg);
+
+ /* t1 = y0 * y1 */
+ spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg);
+
+ /* t0 = z0 * z1 + t0 */
+ spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg);
+
+ /* t1 = w0 * w1 + t1 */
+ spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg);
+
+ /* t0 = t0 + t1 */
+ spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, t0_reg);
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+/**
+ * Emit homogeneous dot product. See emit_ADD for comments.
+ */
+static boolean
+emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ /* XXX rewrite this function to look more like DP3/DP4 */
+ int ch;
+ int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ int tmp_reg = get_itemp(gen);
+
+ /* t = x0 * x1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ /* t = y0 * y1 + t */
+ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ /* t = z0 * z1 + t */
+ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
+ /* t = w1 + t */
+ spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg);
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, tmp_reg);
+ store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+/**
+ * Emit 3-component vector normalize.
+ */
+static boolean
+emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ int src_reg[3];
+ int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
+
+ src_reg[0] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+
+ /* t0 = x * x */
+ spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]);
+
+ /* t1 = y * y */
+ spe_fm(gen->f, t1_reg, src_reg[1], src_reg[1]);
+
+ /* t0 = z * z + t0 */
+ spe_fma(gen->f, t0_reg, src_reg[2], src_reg[2], t0_reg);
+
+ /* t0 = t0 + t1 */
+ spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
+
+ /* t1 = 1.0 / sqrt(t0) */
+ spe_frsqest(gen->f, t1_reg, t0_reg);
+ spe_fi(gen->f, t1_reg, t0_reg, t1_reg);
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ /* dst = src[ch] * t1 */
+ spe_fm(gen->f, d_reg, src_reg[ch], t1_reg);
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+/**
+ * Emit cross product. See emit_ADD for comments.
+ */
+static boolean
+emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ int tmp_reg = get_itemp(gen);
+
+ /* t = z0 * y1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ /* t = y0 * z1 - t */
+ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) {
+ store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]);
+ }
+
+ s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ /* t = x0 * z1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ /* t = z0 * x1 - t */
+ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) {
+ store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]);
+ }
+
+ s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ /* t = y0 * x1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ /* t = x0 * y1 - t */
+ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) {
+ store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+/**
+ * Emit inequality instruction.
+ * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
+ * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
+ * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
+ */
+static boolean
+emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg;
+ bool complement = FALSE;
+
+ one_reg = get_const_one_reg(gen);
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_SGT:
+ spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ break;
+ case TGSI_OPCODE_SLT:
+ spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]);
+ break;
+ case TGSI_OPCODE_SGE:
+ spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]);
+ complement = TRUE;
+ break;
+ case TGSI_OPCODE_SLE:
+ spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ complement = TRUE;
+ break;
+ case TGSI_OPCODE_SEQ:
+ spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ break;
+ case TGSI_OPCODE_SNE:
+ spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ complement = TRUE;
+ break;
+ default:
+ assert(0);
}
}
- return true;
+
+ /* convert d from 0x0/0xffffffff to 0.0/1.0 */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ /* d = d & one_reg */
+ if (complement)
+ spe_andc(gen->f, d_reg[ch], one_reg, d_reg[ch]);
+ else
+ spe_and(gen->f, d_reg[ch], one_reg, d_reg[ch]);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
}
/**
- * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
- * becomes (up to) four SPU "fa" instructions because we're doing SOA
- * processing.
+ * Emit compare.
*/
static boolean
-emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
+emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
int ch;
- /* Loop over Red/Green/Blue/Alpha channels */
- for (ch = 0; ch < 4; ch++) {
- /* If the dest R, G, B or A writemask is enabled... */
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- /* get indexes of the two src, one dest SPE registers */
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
-
- /* Emit actual SPE instruction: d = s1 + s2 */
- spe_fa(gen->f, d_reg, s1_reg, s2_reg);
-#if DISASSEM
- printf("fa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ int zero_reg = get_itemp(gen);
+
+ spe_zero(gen->f, zero_reg);
+
+ /* d = (s1 < 0) ? s2 : s3 */
+ spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
+ spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+
+ return TRUE;
+}
+
+/**
+ * Emit trunc.
+ * Convert float to signed int
+ * Convert signed int to float
+ */
+static boolean
+emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], d_reg[4];
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+
+ /* Convert float to int */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_cflts(gen->f, d_reg[ch], s1_reg[ch], 0);
+ }
+
+ /* Convert int to float */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_csflt(gen->f, d_reg[ch], d_reg[ch], 0);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+/**
+ * Emit floor.
+ * If negative int subtract one
+ * Convert float to signed int
+ * Convert signed int to float
+ */
+static boolean
+emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg;
+
+ zero_reg = get_itemp(gen);
+ spe_zero(gen->f, zero_reg);
+ one_reg = get_const_one_reg(gen);
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
+ }
+
+ /* If negative, subtract 1.0 */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]);
+ }
+
+ /* Convert float to int */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
+ }
+
+ /* Convert int to float */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_csflt(gen->f, d_reg[ch], tmp_reg[ch], 0);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+/**
+ * Compute frac = Input - FLR(Input)
+ */
+static boolean
+emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg;
+
+ zero_reg = get_itemp(gen);
+ spe_zero(gen->f, zero_reg);
+ one_reg = get_const_one_reg(gen);
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
+ }
+
+ /* If negative, subtract 1.0 */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]);
+ }
+
+ /* Convert float to int */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
+ }
+
+ /* Convert int to float */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_csflt(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
+ }
+
+ /* d = s1 - FLR(s1) */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fs(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]);
+ }
+
+ /* store result */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+#if 0
+static void
+print_functions(struct cell_context *cell)
+{
+ struct cell_spu_function_info *funcs = &cell->spu_functions;
+ uint i;
+ for (i = 0; i < funcs->num; i++) {
+ printf("SPU func %u: %s at %u\n",
+ i, funcs->names[i], funcs->addrs[i]);
+ }
+}
#endif
- /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- /* Free any intermediate temps we allocated */
- free_itemps(gen);
+
+static uint
+lookup_function(struct cell_context *cell, const char *funcname)
+{
+ const struct cell_spu_function_info *funcs = &cell->spu_functions;
+ uint i, addr = 0;
+ for (i = 0; i < funcs->num; i++) {
+ if (strcmp(funcs->names[i], funcname) == 0) {
+ addr = funcs->addrs[i];
}
}
- return true;
+ assert(addr && "spu function not found");
+ return addr / 4; /* discard 2 least significant bits */
}
/**
- * Emit multiply. See emit_ADD for comments.
+ * Emit code to call a SPU function.
+ * Used to implement instructions like SIN/COS/POW/TEX/etc.
+ * If scalar, only the X components of the src regs are used, and the
+ * result is replicated across the dest register's XYZW components.
*/
static boolean
-emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst)
+emit_function_call(struct codegen *gen,
+ const struct tgsi_full_instruction *inst,
+ char *funcname, uint num_args, boolean scalar)
+{
+ const uint addr = lookup_function(gen->cell, funcname);
+ char comment[100];
+ int s_regs[3];
+ int func_called = FALSE;
+ uint a, ch;
+ int retval_reg = -1;
+
+ assert(num_args <= 3);
+
+ snprintf(comment, sizeof(comment), "CALL %s:", funcname);
+ spe_comment(gen->f, -4, comment);
+
+ if (scalar) {
+ for (a = 0; a < num_args; a++) {
+ s_regs[a] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[a]);
+ }
+ /* we'll call the function, put the return value in this register,
+ * then replicate it across all write-enabled components in d_reg.
+ */
+ retval_reg = spe_allocate_available_register(gen->f);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int d_reg;
+ ubyte usedRegs[SPE_NUM_REGS];
+ uint i, numUsed;
+
+ if (!scalar) {
+ for (a = 0; a < num_args; a++) {
+ s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]);
+ }
+ }
+
+ d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+
+ if (!scalar || !func_called) {
+ /* for a scalar function, we'll really only call the function once */
+
+ numUsed = spe_get_registers_used(gen->f, usedRegs);
+ assert(numUsed < gen->frame_size / 16 - 2);
+
+ /* save registers to stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ int offset = 2 + i;
+ spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
+
+ /* setup function arguments */
+ for (a = 0; a < num_args; a++) {
+ spe_move(gen->f, 3 + a, s_regs[a]);
+ }
+
+ /* branch to function, save return addr */
+ spe_brasl(gen->f, SPE_REG_RA, addr);
+
+ /* save function's return value */
+ if (scalar)
+ spe_move(gen->f, retval_reg, 3);
+ else
+ spe_move(gen->f, d_reg, 3);
+
+ /* restore registers from stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ if (reg != d_reg && reg != retval_reg) {
+ int offset = 2 + i;
+ spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
+ }
+
+ func_called = TRUE;
+ }
+
+ if (scalar) {
+ spe_move(gen->f, d_reg, retval_reg);
+ }
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+
+ if (scalar) {
+ spe_release_register(gen->f, retval_reg);
+ }
+
+ return TRUE;
+}
+
+
+static boolean
+emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
+ const uint target = inst->InstructionExtTexture.Texture;
+ const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+ uint addr;
int ch;
+ int coord_regs[4], d_regs[4];
+
+ switch (target) {
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_2D:
+ addr = lookup_function(gen->cell, "spu_tex_2d");
+ break;
+ case TGSI_TEXTURE_3D:
+ addr = lookup_function(gen->cell, "spu_tex_3d");
+ break;
+ case TGSI_TEXTURE_CUBE:
+ addr = lookup_function(gen->cell, "spu_tex_cube");
+ break;
+ default:
+ ASSERT(0 && "unsupported texture target");
+ return FALSE;
+ }
+
+ assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER);
+
+ spe_comment(gen->f, -4, "CALL tex:");
+
+ /* get src/dst reg info */
for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- /* d = s1 * s2 */
- spe_fm(gen->f, d_reg, s1_reg, s2_reg);
-#if DISASSEM
- printf("fm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
-#endif
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
+ coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+
+ {
+ ubyte usedRegs[SPE_NUM_REGS];
+ uint i, numUsed;
+
+ numUsed = spe_get_registers_used(gen->f, usedRegs);
+ assert(numUsed < gen->frame_size / 16 - 2);
+
+ /* save registers to stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ int offset = 2 + i;
+ spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
+
+ /* setup function arguments (XXX depends on target) */
+ for (i = 0; i < 4; i++) {
+ spe_move(gen->f, 3 + i, coord_regs[i]);
}
+ spe_load_uint(gen->f, 7, unit); /* sampler unit */
+
+ /* branch to function, save return addr */
+ spe_brasl(gen->f, SPE_REG_RA, addr);
+
+ /* save function's return values (four pixel's colors) */
+ for (i = 0; i < 4; i++) {
+ spe_move(gen->f, d_regs[i], 3 + i);
+ }
+
+ /* restore registers from stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ if (reg != d_regs[0] &&
+ reg != d_regs[1] &&
+ reg != d_regs[2] &&
+ reg != d_regs[3]) {
+ int offset = 2 + i;
+ spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
+ }
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
}
- return true;
+
+ return TRUE;
}
/**
- * Emit set-if-greater-than.
- * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
- * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
- * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
+ * KILL if any of src reg values are less than zero.
*/
static boolean
-emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
+emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
int ch;
+ int s_regs[4], kil_reg = -1, cmp_reg, zero_reg;
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
-
- /* d = (s1 > s2) */
- spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
-#if DISASSEM
- printf("fcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
-#endif
+ spe_comment(gen->f, -4, "CALL kil:");
- /* convert d from 0x0/0xffffffff to 0.0/1.0 */
- /* d = d & one_reg */
- spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
-#if DISASSEM
- printf("and\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen));
-#endif
+ /* zero = {0,0,0,0} */
+ zero_reg = get_itemp(gen);
+ spe_zero(gen->f, zero_reg);
+
+ cmp_reg = get_itemp(gen);
+
+ /* get src regs */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ }
+
+ /* test if any src regs are < 0 */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ if (kil_reg >= 0) {
+ /* cmp = 0 > src ? : ~0 : 0 */
+ spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]);
+ /* kil = kil | cmp */
+ spe_or(gen->f, kil_reg, kil_reg, cmp_reg);
+ }
+ else {
+ kil_reg = get_itemp(gen);
+ /* kil = 0 > src ? : ~0 : 0 */
+ spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]);
+ }
+ }
+
+ if (gen->if_nesting || gen->loop_nesting) {
+ /* may have been a conditional kil */
+ spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg);
+ }
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
+ /* allocate the kill mask reg if needed */
+ if (gen->kill_mask_reg <= 0) {
+ gen->kill_mask_reg = spe_allocate_available_register(gen->f);
+ spe_move(gen->f, gen->kill_mask_reg, kil_reg);
+ }
+ else {
+ spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg);
+ }
+
+ free_itemps(gen);
+
+ return TRUE;
+}
+
+
+
+/**
+ * Emit min or max.
+ */
+static boolean
+emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
+ }
+
+ /* d = (s0 > s1) ? s0 : s1 */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_MAX)
+ spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]);
+ else
+ spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+/**
+ * Emit code to update the execution mask.
+ * This needs to be done whenever the execution status of a conditional
+ * or loop is changed.
+ */
+static void
+emit_update_exec_mask(struct codegen *gen)
+{
+ const int exec_reg = get_exec_mask_reg(gen);
+ const int cond_reg = gen->cond_mask_reg;
+ const int loop_reg = gen->loop_mask_reg;
+
+ spe_comment(gen->f, 0, "Update master execution mask");
+
+ if (gen->if_nesting > 0 && gen->loop_nesting > 0) {
+ /* exec_mask = cond_mask & loop_mask */
+ assert(cond_reg > 0);
+ assert(loop_reg > 0);
+ spe_and(gen->f, exec_reg, cond_reg, loop_reg);
+ }
+ else if (gen->if_nesting > 0) {
+ assert(cond_reg > 0);
+ spe_move(gen->f, exec_reg, cond_reg);
+ }
+ else if (gen->loop_nesting > 0) {
+ assert(loop_reg > 0);
+ spe_move(gen->f, exec_reg, loop_reg);
+ }
+ else {
+ spe_load_int(gen->f, exec_reg, ~0x0);
+ }
+}
+
+
+static boolean
+emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ const int channel = 0;
+ int cond_reg;
+
+ cond_reg = get_cond_mask_reg(gen);
+
+ /* XXX push cond exec mask */
+
+ spe_comment(gen->f, 0, "init conditional exec mask = ~0:");
+ spe_load_int(gen->f, cond_reg, ~0);
+
+ /* update conditional execution mask with the predicate register */
+ int tmp_reg = get_itemp(gen);
+ int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]);
+
+ /* tmp = (s1_reg == 0) */
+ spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
+ /* tmp = !tmp */
+ spe_complement(gen->f, tmp_reg, tmp_reg);
+ /* cond_mask = cond_mask & tmp */
+ spe_and(gen->f, cond_reg, cond_reg, tmp_reg);
+
+ gen->if_nesting++;
+
+ /* update the master execution mask */
+ emit_update_exec_mask(gen);
+
+ free_itemps(gen);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ const int cond_reg = get_cond_mask_reg(gen);
+
+ spe_comment(gen->f, 0, "cond exec mask = !cond exec mask");
+ spe_complement(gen->f, cond_reg, cond_reg);
+ emit_update_exec_mask(gen);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ /* XXX todo: pop cond exec mask */
+
+ gen->if_nesting--;
+
+ emit_update_exec_mask(gen);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int exec_reg, loop_reg;
+
+ exec_reg = get_exec_mask_reg(gen);
+ loop_reg = get_loop_mask_reg(gen);
+
+ /* XXX push loop_exec mask */
+
+ spe_comment(gen->f, 0*-4, "initialize loop exec mask = ~0");
+ spe_load_int(gen->f, loop_reg, ~0x0);
+
+ gen->loop_nesting++;
+ gen->loop_start = spe_code_size(gen->f); /* in bytes */
+
+ return TRUE;
+}
+
+
+static boolean
+emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ const int loop_reg = get_loop_mask_reg(gen);
+ const int tmp_reg = get_itemp(gen);
+ int offset;
+
+ /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */
+ spe_orx(gen->f, tmp_reg, loop_reg);
+
+ offset = gen->loop_start - spe_code_size(gen->f); /* in bytes */
+
+ /* branch back to top of loop if tmp_reg != 0 */
+ spe_brnz(gen->f, tmp_reg, offset / 4);
+
+ /* XXX pop loop_exec mask */
+
+ gen->loop_nesting--;
+
+ emit_update_exec_mask(gen);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ const int exec_reg = get_exec_mask_reg(gen);
+ const int loop_reg = get_loop_mask_reg(gen);
+
+ assert(gen->loop_nesting > 0);
+
+ spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask");
+ spe_andc(gen->f, loop_reg, loop_reg, exec_reg);
+
+ emit_update_exec_mask(gen);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ assert(gen->loop_nesting > 0);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst,
+ boolean ddx)
+{
+ int ch;
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+
+ int t1_reg = get_itemp(gen);
+ int t2_reg = get_itemp(gen);
+
+ spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */
+ if (ddx) {
+ spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */
+ }
+ else {
+ spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */
}
+ spe_fs(gen->f, d_reg, t2_reg, t1_reg);
+
+ free_itemps(gen);
}
- return true;
+ return TRUE;
}
+
+
/**
* Emit END instruction.
* We just return from the shader function at this point.
@@ -361,12 +1742,8 @@ emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
static boolean
emit_END(struct codegen *gen)
{
- /* return from function call */
- spe_bi(gen->f, SPE_REG_RA, 0, 0);
-#if DISASSEM
- printf("bi\trRA\n");
-#endif
- return true;
+ emit_epilogue(gen);
+ return TRUE;
}
@@ -378,24 +1755,153 @@ emit_instruction(struct codegen *gen,
const struct tgsi_full_instruction *inst)
{
switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ARL:
+ return emit_ARL(gen, inst);
case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_SWZ:
return emit_MOV(gen, inst);
- case TGSI_OPCODE_MUL:
- return emit_MUL(gen, inst);
case TGSI_OPCODE_ADD:
- return emit_ADD(gen, inst);
+ case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_MUL:
+ return emit_binop(gen, inst);
+ case TGSI_OPCODE_MAD:
+ return emit_MAD(gen, inst);
+ case TGSI_OPCODE_LERP:
+ return emit_LERP(gen, inst);
+ case TGSI_OPCODE_DP3:
+ return emit_DP3(gen, inst);
+ case TGSI_OPCODE_DP4:
+ return emit_DP4(gen, inst);
+ case TGSI_OPCODE_DPH:
+ return emit_DPH(gen, inst);
+ case TGSI_OPCODE_NRM:
+ return emit_NRM3(gen, inst);
+ case TGSI_OPCODE_XPD:
+ return emit_XPD(gen, inst);
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ return emit_RCP_RSQ(gen, inst);
+ case TGSI_OPCODE_ABS:
+ return emit_ABS(gen, inst);
case TGSI_OPCODE_SGT:
- return emit_SGT(gen, inst);
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SNE:
+ return emit_inequality(gen, inst);
+ case TGSI_OPCODE_CMP:
+ return emit_CMP(gen, inst);
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_MAX:
+ return emit_MIN_MAX(gen, inst);
+ case TGSI_OPCODE_TRUNC:
+ return emit_TRUNC(gen, inst);
+ case TGSI_OPCODE_FLR:
+ return emit_FLR(gen, inst);
+ case TGSI_OPCODE_FRC:
+ return emit_FRC(gen, inst);
case TGSI_OPCODE_END:
return emit_END(gen);
+ case TGSI_OPCODE_COS:
+ return emit_function_call(gen, inst, "spu_cos", 1, TRUE);
+ case TGSI_OPCODE_SIN:
+ return emit_function_call(gen, inst, "spu_sin", 1, TRUE);
+ case TGSI_OPCODE_POW:
+ return emit_function_call(gen, inst, "spu_pow", 2, TRUE);
+ case TGSI_OPCODE_EXPBASE2:
+ return emit_function_call(gen, inst, "spu_exp2", 1, TRUE);
+ case TGSI_OPCODE_LOGBASE2:
+ return emit_function_call(gen, inst, "spu_log2", 1, TRUE);
+ case TGSI_OPCODE_TEX:
+ /* fall-through for now */
+ case TGSI_OPCODE_TXD:
+ /* fall-through for now */
+ case TGSI_OPCODE_TXB:
+ /* fall-through for now */
+ case TGSI_OPCODE_TXL:
+ /* fall-through for now */
+ case TGSI_OPCODE_TXP:
+ return emit_TEX(gen, inst);
+ case TGSI_OPCODE_KIL:
+ return emit_KIL(gen, inst);
+
+ case TGSI_OPCODE_IF:
+ return emit_IF(gen, inst);
+ case TGSI_OPCODE_ELSE:
+ return emit_ELSE(gen, inst);
+ case TGSI_OPCODE_ENDIF:
+ return emit_ENDIF(gen, inst);
+
+ case TGSI_OPCODE_BGNLOOP2:
+ return emit_BGNLOOP(gen, inst);
+ case TGSI_OPCODE_ENDLOOP2:
+ return emit_ENDLOOP(gen, inst);
+ case TGSI_OPCODE_BRK:
+ return emit_BRK(gen, inst);
+ case TGSI_OPCODE_CONT:
+ return emit_CONT(gen, inst);
+
+ case TGSI_OPCODE_DDX:
+ return emit_DDX_DDY(gen, inst, TRUE);
+ case TGSI_OPCODE_DDY:
+ return emit_DDX_DDY(gen, inst, FALSE);
+
/* XXX lots more cases to do... */
default:
- return false;
+ fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n",
+ inst->Instruction.Opcode);
+ return FALSE;
}
- return true;
+ return TRUE;
+}
+
+
+
+/**
+ * Emit code for a TGSI immediate value (vector of four floats).
+ * This involves register allocation and initialization.
+ * XXX the initialization should be done by a "prepare" stage, not
+ * per quad execution!
+ */
+static boolean
+emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
+{
+ int ch;
+
+ assert(gen->num_imm < MAX_TEMPS);
+
+ for (ch = 0; ch < 4; ch++) {
+ float val = immed->u.ImmediateFloat32[ch].Float;
+
+ if (ch > 0 && val == immed->u.ImmediateFloat32[ch - 1].Float) {
+ /* re-use previous register */
+ gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1];
+ }
+ else {
+ char str[100];
+ int reg = spe_allocate_available_register(gen->f);
+
+ if (reg < 0)
+ return FALSE;
+
+ sprintf(str, "init $%d = %f", reg, val);
+ spe_comment(gen->f, 0, str);
+
+ /* update immediate map */
+ gen->imm_regs[gen->num_imm][ch] = reg;
+
+ /* emit initializer instruction */
+ spe_load_float(gen->f, reg, val);
+ }
+ }
+
+ gen->num_imm++;
+
+ return TRUE;
}
@@ -405,44 +1911,46 @@ emit_instruction(struct codegen *gen,
* We only care about TGSI TEMPORARY register declarations at this time.
* For each TGSI TEMPORARY we allocate four SPE registers.
*/
-static void
-emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl)
+static boolean
+emit_declaration(struct cell_context *cell,
+ struct codegen *gen, const struct tgsi_full_declaration *decl)
{
int i, ch;
switch (decl->Declaration.File) {
case TGSI_FILE_TEMPORARY:
-#if DISASSEM
- printf("Declare temp reg %d .. %d\n",
- decl->DeclarationRange.First,
- decl->DeclarationRange.Last);
-#endif
for (i = decl->DeclarationRange.First;
i <= decl->DeclarationRange.Last;
i++) {
+ assert(i < MAX_TEMPS);
for (ch = 0; ch < 4; ch++) {
gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
+ if (gen->temp_regs[i][ch] < 0)
+ return FALSE; /* out of regs */
}
/* XXX if we run out of SPE registers, we need to spill
* to SPU memory. someday...
*/
-#if DISASSEM
- printf(" SPE regs: %d %d %d %d\n",
- gen->temp_regs[i][0],
- gen->temp_regs[i][1],
- gen->temp_regs[i][2],
- gen->temp_regs[i][3]);
-#endif
+ {
+ char buf[100];
+ sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i,
+ gen->temp_regs[i][0], gen->temp_regs[i][1],
+ gen->temp_regs[i][2], gen->temp_regs[i][3]);
+ spe_comment(gen->f, 0, buf);
+ }
}
break;
default:
; /* ignore */
}
+
+ return TRUE;
}
+
/**
* Translate TGSI shader code to SPE instructions. This is done when
* the state tracker gives us a new shader (via pipe->create_fs_state()).
@@ -458,8 +1966,10 @@ cell_gen_fragment_program(struct cell_context *cell,
{
struct tgsi_parse_context parse;
struct codegen gen;
+ uint ic = 0;
memset(&gen, 0, sizeof(gen));
+ gen.cell = cell;
gen.f = f;
/* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
@@ -472,50 +1982,63 @@ cell_gen_fragment_program(struct cell_context *cell,
spe_allocate_register(f, gen.outputs_reg);
spe_allocate_register(f, gen.constants_reg);
-#if DISASSEM
- printf("Begin %s\n", __FUNCTION__);
- tgsi_dump(tokens, 0);
-#endif
+ if (cell->debug_flags & CELL_DEBUG_ASM) {
+ spe_print_code(f, TRUE);
+ spe_indent(f, 2*8);
+ printf("Begin %s\n", __FUNCTION__);
+ tgsi_dump(tokens, 0);
+ }
tgsi_parse_init(&parse, tokens);
+ emit_prologue(&gen);
+
while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
tgsi_parse_token(&parse);
switch (parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_IMMEDIATE:
-#if 0
- if (!note_immediate(&gen, &parse.FullToken.FullImmediate ))
- goto fail;
-#endif
+ if (f->print) {
+ _debug_printf(" # ");
+ tgsi_dump_immediate(&parse.FullToken.FullImmediate);
+ }
+ if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
+ gen.error = TRUE;
break;
case TGSI_TOKEN_TYPE_DECLARATION:
- emit_declaration(&gen, &parse.FullToken.FullDeclaration);
+ if (f->print) {
+ _debug_printf(" # ");
+ tgsi_dump_declaration(&parse.FullToken.FullDeclaration);
+ }
+ if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration))
+ gen.error = TRUE;
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
- if (!emit_instruction(&gen, &parse.FullToken.FullInstruction )) {
- gen.error = true;
+ if (f->print) {
+ _debug_printf(" # ");
+ ic++;
+ tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic);
}
+ if (!emit_instruction(&gen, &parse.FullToken.FullInstruction))
+ gen.error = TRUE;
break;
default:
assert(0);
-
}
}
-
if (gen.error) {
/* terminate the SPE code */
return emit_END(&gen);
}
-#if DISASSEM
- printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
- printf("End %s\n", __FUNCTION__);
-#endif
+ if (cell->debug_flags & CELL_DEBUG_ASM) {
+ printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
+ printf("End %s\n", __FUNCTION__);
+ }
tgsi_parse_free( &parse );
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
index 06219d4e98..66d4b3b6a3 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
@@ -2,6 +2,7 @@
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
+ * Copyright 2009 VMware, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -25,11 +26,10 @@
*
**************************************************************************/
-
-
/**
* Generate SPU per-fragment code (actually per-quad code).
* \author Brian Paul
+ * \author Bob Ellison
*/
@@ -54,12 +54,17 @@
* \param ifragZ_reg register containing integer fragment Z values (in)
* \param ifbZ_reg register containing integer frame buffer Z values (in/out)
* \param zmask_reg register containing result of Z test/comparison (out)
+ *
+ * Returns TRUE if the Z-buffer needs to be updated.
*/
-static void
-gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa,
- struct spe_function *f,
+static boolean
+gen_depth_test(struct spe_function *f,
+ const struct pipe_depth_stencil_alpha_state *dsa,
int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg)
{
+ /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_
+ * quantities. This only makes a difference for 32-bit Z values though.
+ */
ASSERT(dsa->depth.enabled);
switch (dsa->depth.func) {
@@ -79,28 +84,28 @@ gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa,
case PIPE_FUNC_GREATER:
/* zmask = (ifragZ > ref) */
- spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
+ spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
/* mask = (mask & zmask) */
spe_and(f, mask_reg, mask_reg, zmask_reg);
break;
case PIPE_FUNC_LESS:
/* zmask = (ref > ifragZ) */
- spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
+ spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
/* mask = (mask & zmask) */
spe_and(f, mask_reg, mask_reg, zmask_reg);
break;
case PIPE_FUNC_LEQUAL:
/* zmask = (ifragZ > ref) */
- spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
+ spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
/* mask = (mask & ~zmask) */
spe_andc(f, mask_reg, mask_reg, zmask_reg);
break;
case PIPE_FUNC_GEQUAL:
/* zmask = (ref > ifragZ) */
- spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
+ spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
/* mask = (mask & ~zmask) */
spe_andc(f, mask_reg, mask_reg, zmask_reg);
break;
@@ -129,7 +134,10 @@ gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa,
* framebufferZ = (ztest_passed ? fragmentZ : framebufferZ;
*/
spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg);
+ return TRUE;
}
+
+ return FALSE;
}
@@ -153,7 +161,7 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
if ((dsa->alpha.func != PIPE_FUNC_NEVER) &&
(dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
/* load/splat the alpha reference float value */
- spe_load_float(f, ref_reg, dsa->alpha.ref);
+ spe_load_float(f, ref_reg, dsa->alpha.ref_value);
}
/* emit code to do the alpha comparison, updating 'mask' */
@@ -230,6 +238,134 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
}
+/**
+ * This pair of functions is used inline to allocate and deallocate
+ * optional constant registers. Once a constant is discovered to be
+ * needed, we will likely need it again, so we don't want to deallocate
+ * it and have to allocate and load it again unnecessarily.
+ */
+static INLINE void
+setup_optional_register(struct spe_function *f,
+ int *r)
+{
+ if (*r < 0)
+ *r = spe_allocate_available_register(f);
+}
+
+static INLINE void
+release_optional_register(struct spe_function *f,
+ int r)
+{
+ if (r >= 0)
+ spe_release_register(f, r);
+}
+
+static INLINE void
+setup_const_register(struct spe_function *f,
+ int *r,
+ float value)
+{
+ if (*r >= 0)
+ return;
+ setup_optional_register(f, r);
+ spe_load_float(f, *r, value);
+}
+
+static INLINE void
+release_const_register(struct spe_function *f,
+ int r)
+{
+ release_optional_register(f, r);
+}
+
+
+
+/**
+ * Unpack/convert framebuffer colors from four 32-bit packed colors
+ * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA).
+ * Each 8-bit color component is expanded into a float in [0.0, 1.0].
+ */
+static void
+unpack_colors(struct spe_function *f,
+ enum pipe_format color_format,
+ int fbRGBA_reg,
+ int fbR_reg, int fbG_reg, int fbB_reg, int fbA_reg)
+{
+ int mask0_reg = spe_allocate_available_register(f);
+ int mask1_reg = spe_allocate_available_register(f);
+ int mask2_reg = spe_allocate_available_register(f);
+ int mask3_reg = spe_allocate_available_register(f);
+
+ spe_load_int(f, mask0_reg, 0xff);
+ spe_load_int(f, mask1_reg, 0xff00);
+ spe_load_int(f, mask2_reg, 0xff0000);
+ spe_load_int(f, mask3_reg, 0xff000000);
+
+ spe_comment(f, 0, "Unpack framebuffer colors, convert to floats");
+
+ switch (color_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ /* fbB = fbRGBA & mask */
+ spe_and(f, fbB_reg, fbRGBA_reg, mask0_reg);
+
+ /* fbG = fbRGBA & mask */
+ spe_and(f, fbG_reg, fbRGBA_reg, mask1_reg);
+
+ /* fbR = fbRGBA & mask */
+ spe_and(f, fbR_reg, fbRGBA_reg, mask2_reg);
+
+ /* fbA = fbRGBA & mask */
+ spe_and(f, fbA_reg, fbRGBA_reg, mask3_reg);
+
+ /* fbG = fbG >> 8 */
+ spe_roti(f, fbG_reg, fbG_reg, -8);
+
+ /* fbR = fbR >> 16 */
+ spe_roti(f, fbR_reg, fbR_reg, -16);
+
+ /* fbA = fbA >> 24 */
+ spe_roti(f, fbA_reg, fbA_reg, -24);
+ break;
+
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ /* fbA = fbRGBA & mask */
+ spe_and(f, fbA_reg, fbRGBA_reg, mask0_reg);
+
+ /* fbR = fbRGBA & mask */
+ spe_and(f, fbR_reg, fbRGBA_reg, mask1_reg);
+
+ /* fbG = fbRGBA & mask */
+ spe_and(f, fbG_reg, fbRGBA_reg, mask2_reg);
+
+ /* fbB = fbRGBA & mask */
+ spe_and(f, fbB_reg, fbRGBA_reg, mask3_reg);
+
+ /* fbR = fbR >> 8 */
+ spe_roti(f, fbR_reg, fbR_reg, -8);
+
+ /* fbG = fbG >> 16 */
+ spe_roti(f, fbG_reg, fbG_reg, -16);
+
+ /* fbB = fbB >> 24 */
+ spe_roti(f, fbB_reg, fbB_reg, -24);
+ break;
+
+ default:
+ ASSERT(0);
+ }
+
+ /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */
+ spe_cuflt(f, fbR_reg, fbR_reg, 8);
+ spe_cuflt(f, fbG_reg, fbG_reg, 8);
+ spe_cuflt(f, fbB_reg, fbB_reg, 8);
+ spe_cuflt(f, fbA_reg, fbA_reg, 8);
+
+ spe_release_register(f, mask0_reg);
+ spe_release_register(f, mask1_reg);
+ spe_release_register(f, mask2_reg);
+ spe_release_register(f, mask3_reg);
+}
+
/**
* Generate SPE code to implement the given blend mode for a quad of pixels.
@@ -242,6 +378,7 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
*/
static void
gen_blend(const struct pipe_blend_state *blend,
+ const struct pipe_blend_color *blend_color,
struct spe_function *f,
enum pipe_format color_format,
int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg,
@@ -262,211 +399,464 @@ gen_blend(const struct pipe_blend_state *blend,
int fbB_reg = spe_allocate_available_register(f);
int fbA_reg = spe_allocate_available_register(f);
- int one_reg = spe_allocate_available_register(f);
int tmp_reg = spe_allocate_available_register(f);
- boolean one_reg_set = false; /* avoid setting one_reg more than once */
-
- ASSERT(blend->blend_enable);
-
- /* Unpack/convert framebuffer colors from four 32-bit packed colors
- * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA).
- * Each 8-bit color component is expanded into a float in [0.0, 1.0].
+ /* Optional constant registers we might or might not end up using;
+ * if we do use them, make sure we only allocate them once by
+ * keeping a flag on each one.
*/
- {
- int mask_reg = spe_allocate_available_register(f);
-
- /* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */
- spe_load_int(f, mask_reg, 0xff);
-
- /* XXX there may be more clever ways to implement the following code */
- switch (color_format) {
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- /* fbB = fbB & mask */
- spe_and(f, fbB_reg, fbRGBA_reg, mask_reg);
- /* mask = mask << 8 */
- spe_roti(f, mask_reg, mask_reg, 8);
-
- /* fbG = fbRGBA & mask */
- spe_and(f, fbG_reg, fbRGBA_reg, mask_reg);
- /* fbG = fbG >> 8 */
- spe_roti(f, fbG_reg, fbG_reg, -8);
- /* mask = mask << 8 */
- spe_roti(f, mask_reg, mask_reg, 8);
-
- /* fbR = fbRGBA & mask */
- spe_and(f, fbR_reg, fbRGBA_reg, mask_reg);
- /* fbR = fbR >> 16 */
- spe_roti(f, fbR_reg, fbR_reg, -16);
- /* mask = mask << 8 */
- spe_roti(f, mask_reg, mask_reg, 8);
-
- /* fbA = fbRGBA & mask */
- spe_and(f, fbA_reg, fbRGBA_reg, mask_reg);
- /* fbA = fbA >> 24 */
- spe_roti(f, fbA_reg, fbA_reg, -24);
- break;
-
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- /* fbA = fbA & mask */
- spe_and(f, fbA_reg, fbRGBA_reg, mask_reg);
- /* mask = mask << 8 */
- spe_roti(f, mask_reg, mask_reg, 8);
-
- /* fbR = fbRGBA & mask */
- spe_and(f, fbR_reg, fbRGBA_reg, mask_reg);
- /* fbR = fbR >> 8 */
- spe_roti(f, fbR_reg, fbR_reg, -8);
- /* mask = mask << 8 */
- spe_roti(f, mask_reg, mask_reg, 8);
-
- /* fbG = fbRGBA & mask */
- spe_and(f, fbG_reg, fbRGBA_reg, mask_reg);
- /* fbG = fbG >> 16 */
- spe_roti(f, fbG_reg, fbG_reg, -16);
- /* mask = mask << 8 */
- spe_roti(f, mask_reg, mask_reg, 8);
-
- /* fbB = fbRGBA & mask */
- spe_and(f, fbB_reg, fbRGBA_reg, mask_reg);
- /* fbB = fbB >> 24 */
- spe_roti(f, fbB_reg, fbB_reg, -24);
- break;
+ int one_reg = -1;
+ int constR_reg = -1, constG_reg = -1, constB_reg = -1, constA_reg = -1;
- default:
- ASSERT(0);
- }
-
- /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */
- spe_cuflt(f, fbR_reg, fbR_reg, 8);
- spe_cuflt(f, fbG_reg, fbG_reg, 8);
- spe_cuflt(f, fbB_reg, fbB_reg, 8);
- spe_cuflt(f, fbA_reg, fbA_reg, 8);
-
- spe_release_register(f, mask_reg);
- }
+ ASSERT(blend->blend_enable);
+ /* packed RGBA -> float colors */
+ unpack_colors(f, color_format, fbRGBA_reg,
+ fbR_reg, fbG_reg, fbB_reg, fbA_reg);
/*
- * Compute Src RGB terms
+ * Compute Src RGB terms. We're actually looking for the value
+ * of (the appropriate RGB factors) * (the incoming source RGB color),
+ * because in some cases (like PIPE_BLENDFACTOR_ONE and
+ * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math.
*/
switch (blend->rgb_src_factor) {
case PIPE_BLENDFACTOR_ONE:
+ /* factors = (1,1,1), so term = (R,G,B) */
spe_move(f, term1R_reg, fragR_reg);
spe_move(f, term1G_reg, fragG_reg);
spe_move(f, term1B_reg, fragB_reg);
break;
case PIPE_BLENDFACTOR_ZERO:
- spe_zero(f, term1R_reg);
- spe_zero(f, term1G_reg);
- spe_zero(f, term1B_reg);
+ /* factors = (0,0,0), so term = (0,0,0) */
+ spe_load_float(f, term1R_reg, 0.0f);
+ spe_load_float(f, term1G_reg, 0.0f);
+ spe_load_float(f, term1B_reg, 0.0f);
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* factors = (R,G,B), so term = (R*R, G*G, B*B) */
spe_fm(f, term1R_reg, fragR_reg, fragR_reg);
spe_fm(f, term1G_reg, fragG_reg, fragG_reg);
spe_fm(f, term1B_reg, fragB_reg, fragB_reg);
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
+ /* factors = (A,A,A), so term = (R*A, G*A, B*A) */
spe_fm(f, term1R_reg, fragR_reg, fragA_reg);
spe_fm(f, term1G_reg, fragG_reg, fragA_reg);
spe_fm(f, term1B_reg, fragB_reg, fragA_reg);
break;
- /* XXX more cases */
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B))
+ * or in other words term = (R-R*R, G-G*G, B-B*B)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, fragR_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, fragG_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, fragB_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */
+ spe_fm(f, term1R_reg, fragR_reg, fbR_reg);
+ spe_fm(f, term1G_reg, fragG_reg, fbG_reg);
+ spe_fm(f, term1B_reg, fragB_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb))
+ * or term = (R-R*Rfb, G-G*Gfb, B-B*Bfb)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, fbR_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, fbG_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, fbB_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A))
+ * or term = (R-R*A,G-G*A,B-B*A)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, fragA_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, fragA_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, fragA_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */
+ spe_fm(f, term1R_reg, fragR_reg, fbA_reg);
+ spe_fm(f, term1G_reg, fragG_reg, fbA_reg);
+ spe_fm(f, term1B_reg, fragB_reg, fbA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb))
+ * or term = (R-R*Afb,G-G*Afb,b-B*Afb)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, fbA_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, fbA_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, fbA_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg, blend_color->color[2]);
+ /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */
+ spe_fm(f, term1R_reg, fragR_reg, constR_reg);
+ spe_fm(f, term1G_reg, fragG_reg, constG_reg);
+ spe_fm(f, term1B_reg, fragB_reg, constB_reg);
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ /* we'll need the optional constant alpha register */
+ setup_const_register(f, &constA_reg, blend_color->color[3]);
+ /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */
+ spe_fm(f, term1R_reg, fragR_reg, constA_reg);
+ spe_fm(f, term1G_reg, fragG_reg, constA_reg);
+ spe_fm(f, term1B_reg, fragB_reg, constA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg, blend_color->color[2]);
+ /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc))
+ * or term = (R-R*Rc, G-G*Gc, B-B*Bc)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, constR_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, constG_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, constB_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg, blend_color->color[2]);
+ /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac))
+ * or term = (R-R*Ac,G-G*Ac,B-B*Ac)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, constA_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, constA_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, constA_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ /* We'll need the optional {1,1,1,1} register */
+ setup_const_register(f, &one_reg, 1.0f);
+ /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so
+ * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb))
+ * We could expand the term (as a*min(b,c) == min(a*b,a*c)
+ * as long as a is positive), but then we'd have to do three
+ * spe_float_min() functions instead of one, so this is simpler.
+ */
+ /* tmp = 1 - Afb */
+ spe_fs(f, tmp_reg, one_reg, fbA_reg);
+ /* tmp = min(A,tmp) */
+ spe_float_min(f, tmp_reg, fragA_reg, tmp_reg);
+ /* term = R*tmp */
+ spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
+ spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
+ spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
+ break;
+
+ /* These are special D3D cases involving a second color output
+ * from the fragment shader. I'm not sure we can support them
+ * yet... XXX
+ */
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+
default:
ASSERT(0);
}
/*
- * Compute Src Alpha term
+ * Compute Src Alpha term. Like the above, we're looking for
+ * the full term A*factor, not just the factor itself, because
+ * in many cases we can avoid doing unnecessary multiplies.
*/
switch (blend->alpha_src_factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ /* factor = 0, so term = 0 */
+ spe_load_float(f, term1A_reg, 0.0f);
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* fall through */
case PIPE_BLENDFACTOR_ONE:
+ /* factor = 1, so term = A */
spe_move(f, term1A_reg, fragA_reg);
break;
+
case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* factor = A, so term = A*A */
spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
break;
- /* XXX more cases */
+
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* factor = 1-A, so term = A*(1-A) = A-A*A */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term1A_reg, fragA_reg, fragA_reg, fragA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* factor = Afb, so term = A*Afb */
+ spe_fm(f, term1A_reg, fragA_reg, fbA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* factor = 1-Afb, so term = A*(1-Afb) = A - A*Afb */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term1A_reg, fragA_reg, fbA_reg, fragA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* We need the optional constA_reg register */
+ setup_const_register(f, &constA_reg, blend_color->color[3]);
+ /* factor = Ac, so term = A*Ac */
+ spe_fm(f, term1A_reg, fragA_reg, constA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* We need the optional constA_reg register */
+ setup_const_register(f, &constA_reg, blend_color->color[3]);
+ /* factor = 1-Ac, so term = A*(1-Ac) = A-A*Ac */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term1A_reg, fragA_reg, constA_reg, fragA_reg);
+ break;
+
+ /* These are special D3D cases involving a second color output
+ * from the fragment shader. I'm not sure we can support them
+ * yet... XXX
+ */
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
default:
ASSERT(0);
}
/*
- * Compute Dest RGB terms
+ * Compute Dest RGB term. Like the above, we're looking for
+ * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because
+ * in many cases we can avoid doing unnecessary multiplies.
*/
switch (blend->rgb_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
+ /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */
spe_move(f, term2R_reg, fbR_reg);
spe_move(f, term2G_reg, fbG_reg);
spe_move(f, term2B_reg, fbB_reg);
break;
case PIPE_BLENDFACTOR_ZERO:
- spe_zero(f, term2R_reg);
- spe_zero(f, term2G_reg);
- spe_zero(f, term2B_reg);
+ /* factor s= (0,0,0), so term = (0,0,0) */
+ spe_load_float(f, term2R_reg, 0.0f);
+ spe_load_float(f, term2G_reg, 0.0f);
+ spe_load_float(f, term2B_reg, 0.0f);
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* factors = (R,G,B), so term = (R*Rfb, G*Gfb, B*Bfb) */
spe_fm(f, term2R_reg, fbR_reg, fragR_reg);
spe_fm(f, term2G_reg, fbG_reg, fragG_reg);
spe_fm(f, term2B_reg, fbB_reg, fragB_reg);
break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* factors = (1-R,1-G,1-B), so term = (Rfb*(1-R), Gfb*(1-G), Bfb*(1-B))
+ * or in other words term = (Rfb-Rfb*R, Gfb-Gfb*G, Bfb-Bfb*B)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term2R_reg, fragR_reg, fbR_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fragG_reg, fbG_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fragB_reg, fbB_reg, fbB_reg);
+ break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
+ /* factors = (A,A,A), so term = (Rfb*A, Gfb*A, Bfb*A) */
spe_fm(f, term2R_reg, fbR_reg, fragA_reg);
spe_fm(f, term2G_reg, fbG_reg, fragA_reg);
spe_fm(f, term2B_reg, fbB_reg, fragA_reg);
break;
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- /* one = {1.0, 1.0, 1.0, 1.0} */
- if (!one_reg_set) {
- spe_load_float(f, one_reg, 1.0f);
- one_reg_set = true;
- }
- /* tmp = one - fragA */
- spe_fs(f, tmp_reg, one_reg, fragA_reg);
- /* term = fb * tmp */
- spe_fm(f, term2R_reg, fbR_reg, tmp_reg);
- spe_fm(f, term2G_reg, fbG_reg, tmp_reg);
- spe_fm(f, term2B_reg, fbB_reg, tmp_reg);
- break;
- /* XXX more cases */
+ /* factors = (1-A,1-A,1-A) so term = (Rfb-Rfb*A,Gfb-Gfb*A,Bfb-Bfb*A) */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* factors = (Rfb,Gfb,Bfb), so term = (Rfb*Rfb, Gfb*Gfb, Bfb*Bfb) */
+ spe_fm(f, term2R_reg, fbR_reg, fbR_reg);
+ spe_fm(f, term2G_reg, fbG_reg, fbG_reg);
+ spe_fm(f, term2B_reg, fbB_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (Rfb*(1-Rfb),Gfb*(1-Gfb),Bfb*(1-Bfb))
+ * or term = (Rfb-Rfb*Rfb, Gfb-Gfb*Gfb, Bfb-Bfb*Bfb)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term2R_reg, fbR_reg, fbR_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fbG_reg, fbG_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fbB_reg, fbB_reg, fbB_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ /* factors = (Afb, Afb, Afb), so term = (Rfb*Afb, Gfb*Afb, Bfb*Afb) */
+ spe_fm(f, term2R_reg, fbR_reg, fbA_reg);
+ spe_fm(f, term2G_reg, fbG_reg, fbA_reg);
+ spe_fm(f, term2B_reg, fbB_reg, fbA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (Rfb*(1-Afb),Gfb*(1-Afb),Bfb*(1-Afb))
+ * or term = (Rfb-Rfb*Afb,Gfb-Gfb*Afb,Bfb-Bfb*Afb)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term2R_reg, fbR_reg, fbA_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fbG_reg, fbA_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fbB_reg, fbA_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg, blend_color->color[2]);
+ /* now, factor = (Rc,Gc,Bc), so term = (Rfb*Rc,Gfb*Gc,Bfb*Bc) */
+ spe_fm(f, term2R_reg, fbR_reg, constR_reg);
+ spe_fm(f, term2G_reg, fbG_reg, constG_reg);
+ spe_fm(f, term2B_reg, fbB_reg, constB_reg);
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ /* we'll need the optional constant alpha register */
+ setup_const_register(f, &constA_reg, blend_color->color[3]);
+ /* factor = (Ac,Ac,Ac), so term = (Rfb*Ac,Gfb*Ac,Bfb*Ac) */
+ spe_fm(f, term2R_reg, fbR_reg, constA_reg);
+ spe_fm(f, term2G_reg, fbG_reg, constA_reg);
+ spe_fm(f, term2B_reg, fbB_reg, constA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg, blend_color->color[2]);
+ /* factor = (1-Rc,1-Gc,1-Bc), so term = (Rfb*(1-Rc),Gfb*(1-Gc),Bfb*(1-Bc))
+ * or term = (Rfb-Rfb*Rc, Gfb-Gfb*Gc, Bfb-Bfb*Bc)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term2R_reg, fbR_reg, constR_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fbG_reg, constG_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fbB_reg, constB_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg, blend_color->color[2]);
+ /* factor = (1-Ac,1-Ac,1-Ac), so term = (Rfb*(1-Ac),Gfb*(1-Ac),Bfb*(1-Ac))
+ * or term = (Rfb-Rfb*Ac,Gfb-Gfb*Ac,Bfb-Bfb*Ac)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term2R_reg, fbR_reg, constA_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fbG_reg, constA_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fbB_reg, constA_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest RGB */
+ ASSERT(0);
+ break;
+
+ /* These are special D3D cases involving a second color output
+ * from the fragment shader. I'm not sure we can support them
+ * yet... XXX
+ */
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+
default:
ASSERT(0);
}
/*
- * Compute Dest Alpha term
+ * Compute Dest Alpha term. Like the above, we're looking for
+ * the full term Afb*factor, not just the factor itself, because
+ * in many cases we can avoid doing unnecessary multiplies.
*/
switch (blend->alpha_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
+ /* factor = 1, so term = Afb */
spe_move(f, term2A_reg, fbA_reg);
break;
case PIPE_BLENDFACTOR_ZERO:
- spe_zero(f, term2A_reg);
+ /* factor = 0, so term = 0 */
+ spe_load_float(f, term2A_reg, 0.0f);
break;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
+
+ case PIPE_BLENDFACTOR_SRC_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* factor = A, so term = Afb*A */
spe_fm(f, term2A_reg, fbA_reg, fragA_reg);
break;
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- /* one = {1.0, 1.0, 1.0, 1.0} */
- if (!one_reg_set) {
- spe_load_float(f, one_reg, 1.0f);
- one_reg_set = true;
- }
- /* tmp = one - fragA */
- spe_fs(f, tmp_reg, one_reg, fragA_reg);
- /* termA = fbA * tmp */
- spe_fm(f, term2A_reg, fbA_reg, tmp_reg);
+
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* factor = 1-A, so term = Afb*(1-A) = Afb-Afb*A */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* factor = Afb, so term = Afb*Afb */
+ spe_fm(f, term2A_reg, fbA_reg, fbA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* factor = 1-Afb, so term = Afb*(1-Afb) = Afb - Afb*Afb */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term2A_reg, fbA_reg, fbA_reg, fbA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* We need the optional constA_reg register */
+ setup_const_register(f, &constA_reg, blend_color->color[3]);
+ /* factor = Ac, so term = Afb*Ac */
+ spe_fm(f, term2A_reg, fbA_reg, constA_reg);
break;
- /* XXX more cases */
+
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* We need the optional constA_reg register */
+ setup_const_register(f, &constA_reg, blend_color->color[3]);
+ /* factor = 1-Ac, so term = Afb*(1-Ac) = Afb-Afb*Ac */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term2A_reg, fbA_reg, constA_reg, fbA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest alpha */
+ ASSERT(0);
+ break;
+
+ /* These are special D3D cases involving a second color output
+ * from the fragment shader. I'm not sure we can support them
+ * yet... XXX
+ */
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
default:
ASSERT(0);
}
/*
- * Combine Src/Dest RGB terms
+ * Combine Src/Dest RGB terms as per the blend equation.
*/
switch (blend->rgb_func) {
case PIPE_BLEND_ADD:
@@ -479,7 +869,21 @@ gen_blend(const struct pipe_blend_state *blend,
spe_fs(f, fragG_reg, term1G_reg, term2G_reg);
spe_fs(f, fragB_reg, term1B_reg, term2B_reg);
break;
- /* XXX more cases */
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ spe_fs(f, fragR_reg, term2R_reg, term1R_reg);
+ spe_fs(f, fragG_reg, term2G_reg, term1G_reg);
+ spe_fs(f, fragB_reg, term2B_reg, term1B_reg);
+ break;
+ case PIPE_BLEND_MIN:
+ spe_float_min(f, fragR_reg, term1R_reg, term2R_reg);
+ spe_float_min(f, fragG_reg, term1G_reg, term2G_reg);
+ spe_float_min(f, fragB_reg, term1B_reg, term2B_reg);
+ break;
+ case PIPE_BLEND_MAX:
+ spe_float_max(f, fragR_reg, term1R_reg, term2R_reg);
+ spe_float_max(f, fragG_reg, term1G_reg, term2G_reg);
+ spe_float_max(f, fragB_reg, term1B_reg, term2B_reg);
+ break;
default:
ASSERT(0);
}
@@ -494,7 +898,15 @@ gen_blend(const struct pipe_blend_state *blend,
case PIPE_BLEND_SUBTRACT:
spe_fs(f, fragA_reg, term1A_reg, term2A_reg);
break;
- /* XXX more cases */
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ spe_fs(f, fragA_reg, term2A_reg, term1A_reg);
+ break;
+ case PIPE_BLEND_MIN:
+ spe_float_min(f, fragA_reg, term1A_reg, term2A_reg);
+ break;
+ case PIPE_BLEND_MAX:
+ spe_float_max(f, fragA_reg, term1A_reg, term2A_reg);
+ break;
default:
ASSERT(0);
}
@@ -514,8 +926,14 @@ gen_blend(const struct pipe_blend_state *blend,
spe_release_register(f, fbB_reg);
spe_release_register(f, fbA_reg);
- spe_release_register(f, one_reg);
spe_release_register(f, tmp_reg);
+
+ /* Free any optional registers that actually got used */
+ release_const_register(f, one_reg);
+ release_const_register(f, constR_reg);
+ release_const_register(f, constG_reg);
+ release_const_register(f, constB_reg);
+ release_const_register(f, constA_reg);
}
@@ -524,24 +942,74 @@ gen_logicop(const struct pipe_blend_state *blend,
struct spe_function *f,
int fragRGBA_reg, int fbRGBA_reg)
{
- /* XXX to-do */
- /* operate on 32-bit packed pixels, not float colors */
-}
-
-
-static void
-gen_colormask(uint colormask,
- struct spe_function *f,
- int fragRGBA_reg, int fbRGBA_reg)
-{
- /* XXX to-do */
- /* operate on 32-bit packed pixels, not float colors */
+ /* We've got four 32-bit RGBA packed pixels in each of
+ * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
+ * reds, greens, blues, and alphas.
+ * */
+ ASSERT(blend->logicop_enable);
+
+ switch(blend->logicop_func) {
+ case PIPE_LOGICOP_CLEAR: /* 0 */
+ spe_zero(f, fragRGBA_reg);
+ break;
+ case PIPE_LOGICOP_NOR: /* ~(s | d) */
+ spe_nor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_AND_INVERTED: /* ~s & d */
+ /* andc R, A, B computes R = A & ~B */
+ spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg);
+ break;
+ case PIPE_LOGICOP_COPY_INVERTED: /* ~s */
+ spe_complement(f, fragRGBA_reg, fragRGBA_reg);
+ break;
+ case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */
+ /* andc R, A, B computes R = A & ~B */
+ spe_andc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_INVERT: /* ~d */
+ /* Note that (A nor A) == ~(A|A) == ~A */
+ spe_nor(f, fragRGBA_reg, fbRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_XOR: /* s ^ d */
+ spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_NAND: /* ~(s & d) */
+ spe_nand(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_AND: /* s & d */
+ spe_and(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */
+ spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ spe_complement(f, fragRGBA_reg, fragRGBA_reg);
+ break;
+ case PIPE_LOGICOP_NOOP: /* d */
+ spe_move(f, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_OR_INVERTED: /* ~s | d */
+ /* orc R, A, B computes R = A | ~B */
+ spe_orc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg);
+ break;
+ case PIPE_LOGICOP_COPY: /* s */
+ break;
+ case PIPE_LOGICOP_OR_REVERSE: /* s | ~d */
+ /* orc R, A, B computes R = A | ~B */
+ spe_orc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_OR: /* s | d */
+ spe_or(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_SET: /* 1 */
+ spe_load_int(f, fragRGBA_reg, 0xffffffff);
+ break;
+ default:
+ ASSERT(0);
+ }
}
-
/**
- * Generate code to pack a quad of float colors into a four 32-bit integers.
+ * Generate code to pack a quad of float colors into four 32-bit integers.
*
* \param f SPE function to append instruction onto.
* \param color_format the dest color packing format
@@ -557,13 +1025,16 @@ gen_pack_colors(struct spe_function *f,
int r_reg, int g_reg, int b_reg, int a_reg,
int rgba_reg)
{
+ int rg_reg = spe_allocate_available_register(f);
+ int ba_reg = spe_allocate_available_register(f);
+
/* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */
spe_cfltu(f, r_reg, r_reg, 32);
spe_cfltu(f, g_reg, g_reg, 32);
spe_cfltu(f, b_reg, b_reg, 32);
spe_cfltu(f, a_reg, a_reg, 32);
- /* Shift the most significant bytes to least the significant positions.
+ /* Shift the most significant bytes to the least significant positions.
* I.e.: reg = reg >> 24
*/
spe_rotmi(f, r_reg, r_reg, -24);
@@ -595,12 +1066,936 @@ gen_pack_colors(struct spe_function *f,
* OR-ing all those together gives us four packed colors:
* RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699}
*/
- spe_or(f, rgba_reg, r_reg, g_reg);
- spe_or(f, rgba_reg, rgba_reg, b_reg);
- spe_or(f, rgba_reg, rgba_reg, a_reg);
+ spe_or(f, rg_reg, r_reg, g_reg);
+ spe_or(f, ba_reg, a_reg, b_reg);
+ spe_or(f, rgba_reg, rg_reg, ba_reg);
+
+ spe_release_register(f, rg_reg);
+ spe_release_register(f, ba_reg);
}
+static void
+gen_colormask(struct spe_function *f,
+ uint colormask,
+ enum pipe_format color_format,
+ int fragRGBA_reg, int fbRGBA_reg)
+{
+ /* We've got four 32-bit RGBA packed pixels in each of
+ * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
+ * reds, greens, blues, and alphas. Further, the pixels
+ * are packed according to the given color format, not
+ * necessarily RGBA...
+ */
+ uint r_mask;
+ uint g_mask;
+ uint b_mask;
+ uint a_mask;
+
+ /* Calculate exactly where the bits for any particular color
+ * end up, so we can mask them correctly.
+ */
+ switch(color_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ /* ARGB */
+ a_mask = 0xff000000;
+ r_mask = 0x00ff0000;
+ g_mask = 0x0000ff00;
+ b_mask = 0x000000ff;
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ /* BGRA */
+ b_mask = 0xff000000;
+ g_mask = 0x00ff0000;
+ r_mask = 0x0000ff00;
+ a_mask = 0x000000ff;
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ /* For each R, G, B, and A component we're supposed to mask out,
+ * clear its bits. Then our mask operation later will work
+ * as expected.
+ */
+ if (!(colormask & PIPE_MASK_R)) {
+ r_mask = 0;
+ }
+ if (!(colormask & PIPE_MASK_G)) {
+ g_mask = 0;
+ }
+ if (!(colormask & PIPE_MASK_B)) {
+ b_mask = 0;
+ }
+ if (!(colormask & PIPE_MASK_A)) {
+ a_mask = 0;
+ }
+
+ /* Get a temporary register to hold the mask that will be applied
+ * to the fragment
+ */
+ int colormask_reg = spe_allocate_available_register(f);
+
+ /* The actual mask we're going to use is an OR of the remaining R, G, B,
+ * and A masks. Load the result value into our temporary register.
+ */
+ spe_load_uint(f, colormask_reg, r_mask | g_mask | b_mask | a_mask);
+
+ /* Use the mask register to select between the fragment color
+ * values and the frame buffer color values. Wherever the
+ * mask has a 0 bit, the current frame buffer color should override
+ * the fragment color. Wherever the mask has a 1 bit, the
+ * fragment color should persevere. The Select Bits (selb rt, rA, rB, rM)
+ * instruction will select bits from its first operand rA wherever the
+ * the mask bits rM are 0, and from its second operand rB wherever the
+ * mask bits rM are 1. That means that the frame buffer color is the
+ * first operand, and the fragment color the second.
+ */
+ spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg);
+
+ /* Release the temporary register and we're done */
+ spe_release_register(f, colormask_reg);
+}
+
+
+/**
+ * This function is annoyingly similar to gen_depth_test(), above, except
+ * that instead of comparing two varying values (i.e. fragment and buffer),
+ * we're comparing a varying value with a static value. As such, we have
+ * access to the Compare Immediate instructions where we don't in
+ * gen_depth_test(), which is what makes us very different.
+ *
+ * There's some added complexity if there's a non-trivial state->mask
+ * value; then stencil and reference both must be masked
+ *
+ * The return value in the stencil_pass_reg is a bitmask of valid
+ * fragments that also passed the stencil test. The bitmask of valid
+ * fragments that failed would be found in
+ * (fragment_mask_reg & ~stencil_pass_reg).
+ */
+static void
+gen_stencil_test(struct spe_function *f,
+ const struct pipe_stencil_state *state,
+ uint stencil_max_value,
+ int fragment_mask_reg,
+ int fbS_reg,
+ int stencil_pass_reg)
+{
+ /* Generate code that puts the set of passing fragments into the
+ * stencil_pass_reg register, taking into account whether each fragment
+ * was active to begin with.
+ */
+ switch (state->func) {
+ case PIPE_FUNC_EQUAL:
+ if (state->valuemask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & (s == reference) */
+ spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ((s&mask) == (reference&mask)) */
+ uint tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
+ spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil,
+ state->valuemask & state->ref_value);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+
+ case PIPE_FUNC_NOTEQUAL:
+ if (state->valuemask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & ~(s == reference) */
+ spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ~((s&mask) == (reference&mask)) */
+ int tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
+ spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil,
+ state->valuemask & state->ref_value);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+
+ case PIPE_FUNC_LESS:
+ if (state->valuemask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & (reference < s) */
+ spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, state->ref_value);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ((reference&mask) < (s & mask)) */
+ int tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
+ spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil,
+ state->valuemask & state->ref_value);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+
+ case PIPE_FUNC_GREATER:
+ if (state->valuemask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & (reference > s) */
+ /* There's no convenient Compare Less Than Immediate instruction, so
+ * we'll have to do this one the harder way, by loading a register and
+ * comparing directly. Compare Logical Greater Than Word (clgt)
+ * treats its operands as unsigned - no sign extension.
+ */
+ int tmp_reg = spe_allocate_available_register(f);
+ spe_load_uint(f, tmp_reg, state->ref_value);
+ spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ((reference&mask) > (s&mask)) */
+ int tmp_reg = spe_allocate_available_register(f);
+ int tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_load_uint(f, tmp_reg, state->valuemask & state->ref_value);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
+ spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+
+ case PIPE_FUNC_GEQUAL:
+ if (state->valuemask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & (reference >= s)
+ * = fragment_mask & ~(s > reference) */
+ spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg,
+ state->ref_value);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ~((s&mask) > (reference&mask)) */
+ int tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
+ spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil,
+ state->valuemask & state->ref_value);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+
+ case PIPE_FUNC_LEQUAL:
+ if (state->valuemask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & (reference <= s) ]
+ * = fragment_mask & ~(reference > s) */
+ /* As above, we have to do this by loading a register */
+ int tmp_reg = spe_allocate_available_register(f);
+ spe_load_uint(f, tmp_reg, state->ref_value);
+ spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ~((reference&mask) > (s&mask)) */
+ int tmp_reg = spe_allocate_available_register(f);
+ int tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_load_uint(f, tmp_reg, state->ref_value & state->valuemask);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
+ spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+
+ case PIPE_FUNC_NEVER:
+ /* stencil_pass = fragment_mask & 0 = 0 */
+ spe_load_uint(f, stencil_pass_reg, 0);
+ break;
+
+ case PIPE_FUNC_ALWAYS:
+ /* stencil_pass = fragment_mask & 1 = fragment_mask */
+ spe_move(f, stencil_pass_reg, fragment_mask_reg);
+ break;
+ }
+
+ /* The fragments that passed the stencil test are now in stencil_pass_reg.
+ * The fragments that failed would be (fragment_mask_reg & ~stencil_pass_reg).
+ */
+}
+
+
+/**
+ * This function generates code that calculates a set of new stencil values
+ * given the earlier values and the operation to apply. It does not
+ * apply any tests. It is intended to be called up to 3 times
+ * (for the stencil fail operation, for the stencil pass-z fail operation,
+ * and for the stencil pass-z pass operation) to collect up to three
+ * possible sets of values, and for the caller to combine them based
+ * on the result of the tests.
+ *
+ * stencil_max_value should be (2^n - 1) where n is the number of bits
+ * in the stencil buffer - in other words, it should be usable as a mask.
+ */
+static void
+gen_stencil_values(struct spe_function *f,
+ uint stencil_op,
+ uint stencil_ref_value,
+ uint stencil_max_value,
+ int fbS_reg,
+ int newS_reg)
+{
+ /* The code below assumes that newS_reg and fbS_reg are not the same
+ * register; if they can be, the calculations below will have to use
+ * an additional temporary register. For now, mark the assumption
+ * with an assertion that will fail if they are the same.
+ */
+ ASSERT(fbS_reg != newS_reg);
+
+ /* The code also assumes the the stencil_max_value is of the form
+ * 2^n-1 and can therefore be used as a mask for the valid bits in
+ * addition to a maximum. Make sure this is the case as well.
+ * The clever math below exploits the fact that incrementing a
+ * binary number serves to flip all the bits of a number starting at
+ * the LSB and continuing to (and including) the first zero bit
+ * found. That means that a number and its increment will always
+ * have at least one bit in common (the high order bit, if nothing
+ * else) *unless* the number is zero, *or* the number is of a form
+ * consisting of some number of 1s in the low-order bits followed
+ * by nothing but 0s in the high-order bits. The latter case
+ * implies it's of the form 2^n-1.
+ */
+ ASSERT(stencil_max_value > 0 && ((stencil_max_value + 1) & stencil_max_value) == 0);
+
+ switch(stencil_op) {
+ case PIPE_STENCIL_OP_KEEP:
+ /* newS = S */
+ spe_move(f, newS_reg, fbS_reg);
+ break;
+
+ case PIPE_STENCIL_OP_ZERO:
+ /* newS = 0 */
+ spe_zero(f, newS_reg);
+ break;
+
+ case PIPE_STENCIL_OP_REPLACE:
+ /* newS = stencil reference value */
+ spe_load_uint(f, newS_reg, stencil_ref_value);
+ break;
+
+ case PIPE_STENCIL_OP_INCR: {
+ /* newS = (s == max ? max : s + 1) */
+ int equals_reg = spe_allocate_available_register(f);
+
+ spe_compare_equal_uint(f, equals_reg, fbS_reg, stencil_max_value);
+ /* Add Word Immediate computes rT = rA + 10-bit signed immediate */
+ spe_ai(f, newS_reg, fbS_reg, 1);
+ /* Select from the current value or the new value based on the equality test */
+ spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg);
+
+ spe_release_register(f, equals_reg);
+ break;
+ }
+ case PIPE_STENCIL_OP_DECR: {
+ /* newS = (s == 0 ? 0 : s - 1) */
+ int equals_reg = spe_allocate_available_register(f);
+
+ spe_compare_equal_uint(f, equals_reg, fbS_reg, 0);
+ /* Add Word Immediate with a (-1) value works */
+ spe_ai(f, newS_reg, fbS_reg, -1);
+ /* Select from the current value or the new value based on the equality test */
+ spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg);
+
+ spe_release_register(f, equals_reg);
+ break;
+ }
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ /* newS = (s == max ? 0 : s + 1), but since max is 2^n-1, we can
+ * do a normal add and mask off the correct bits
+ */
+ spe_ai(f, newS_reg, fbS_reg, 1);
+ spe_and_uint(f, newS_reg, newS_reg, stencil_max_value);
+ break;
+
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ /* newS = (s == 0 ? max : s - 1), but we'll pull the same mask trick as above */
+ spe_ai(f, newS_reg, fbS_reg, -1);
+ spe_and_uint(f, newS_reg, newS_reg, stencil_max_value);
+ break;
+
+ case PIPE_STENCIL_OP_INVERT:
+ /* newS = ~s. We take advantage of the mask/max value to invert only
+ * the valid bits for the field so we don't have to do an extra "and".
+ */
+ spe_xor_uint(f, newS_reg, fbS_reg, stencil_max_value);
+ break;
+
+ default:
+ ASSERT(0);
+ }
+}
+
+
+/**
+ * This function generates code to get all the necessary possible
+ * stencil values. For each of the output registers (fail_reg,
+ * zfail_reg, and zpass_reg), it either allocates a new register
+ * and calculates a new set of values based on the stencil operation,
+ * or it reuses a register allocation and calculation done for an
+ * earlier (matching) operation, or it reuses the fbS_reg register
+ * (if the stencil operation is KEEP, which doesn't change the
+ * stencil buffer).
+ *
+ * Since this function allocates a variable number of registers,
+ * to avoid incurring complex logic to free them, they should
+ * be allocated after a spe_allocate_register_set() call
+ * and released by the corresponding spe_release_register_set() call.
+ */
+static void
+gen_get_stencil_values(struct spe_function *f,
+ const struct pipe_stencil_state *stencil,
+ const uint depth_enabled,
+ int fbS_reg,
+ int *fail_reg,
+ int *zfail_reg,
+ int *zpass_reg)
+{
+ uint zfail_op;
+
+ /* Stenciling had better be enabled here */
+ ASSERT(stencil->enabled);
+
+ /* If the depth test is not enabled, it is treated as though it always
+ * passes, which means that the zfail_op is not considered - a
+ * failing stencil test triggers the fail_op, and a passing one
+ * triggers the zpass_op
+ *
+ * As an optimization, override calculation of the zfail_op values
+ * if they aren't going to be used. By setting the value of
+ * the operation to PIPE_STENCIL_OP_KEEP, its value will be assumed
+ * to match the incoming stencil values, and no calculation will
+ * be done.
+ */
+ if (depth_enabled) {
+ zfail_op = stencil->zfail_op;
+ }
+ else {
+ zfail_op = PIPE_STENCIL_OP_KEEP;
+ }
+
+ /* One-sided or front-facing stencil */
+ if (stencil->fail_op == PIPE_STENCIL_OP_KEEP) {
+ *fail_reg = fbS_reg;
+ }
+ else {
+ *fail_reg = spe_allocate_available_register(f);
+ gen_stencil_values(f, stencil->fail_op, stencil->ref_value,
+ 0xff, fbS_reg, *fail_reg);
+ }
+
+ /* Check the possibly overridden value, not the structure value */
+ if (zfail_op == PIPE_STENCIL_OP_KEEP) {
+ *zfail_reg = fbS_reg;
+ }
+ else if (zfail_op == stencil->fail_op) {
+ *zfail_reg = *fail_reg;
+ }
+ else {
+ *zfail_reg = spe_allocate_available_register(f);
+ gen_stencil_values(f, stencil->zfail_op, stencil->ref_value,
+ 0xff, fbS_reg, *zfail_reg);
+ }
+
+ if (stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
+ *zpass_reg = fbS_reg;
+ }
+ else if (stencil->zpass_op == stencil->fail_op) {
+ *zpass_reg = *fail_reg;
+ }
+ else if (stencil->zpass_op == zfail_op) {
+ *zpass_reg = *zfail_reg;
+ }
+ else {
+ *zpass_reg = spe_allocate_available_register(f);
+ gen_stencil_values(f, stencil->zpass_op, stencil->ref_value,
+ 0xff, fbS_reg, *zpass_reg);
+ }
+}
+
+/**
+ * Note that fbZ_reg may *not* be set on entry, if in fact
+ * the depth test is not enabled. This function must not use
+ * the register if depth is not enabled.
+ */
+static boolean
+gen_stencil_depth_test(struct spe_function *f,
+ const struct pipe_depth_stencil_alpha_state *dsa,
+ const uint facing,
+ const int mask_reg, const int fragZ_reg,
+ const int fbZ_reg, const int fbS_reg)
+{
+ /* True if we've generated code that could require writeback to the
+ * depth and/or stencil buffers
+ */
+ boolean modified_buffers = FALSE;
+
+ boolean need_to_calculate_stencil_values;
+ boolean need_to_writemask_stencil_values;
+
+ struct pipe_stencil_state *stencil;
+
+ /* Registers. We may or may not actually allocate these, depending
+ * on whether the state values indicate that we need them.
+ */
+ int stencil_pass_reg, stencil_fail_reg;
+ int stencil_fail_values, stencil_pass_depth_fail_values, stencil_pass_depth_pass_values;
+ int stencil_writemask_reg;
+ int zmask_reg;
+ int newS_reg;
+
+ /* Stenciling is quite complex: up to six different configurable stencil
+ * operations/calculations can be required (three each for front-facing
+ * and back-facing fragments). Many of those operations will likely
+ * be identical, so there's good reason to try to avoid calculating
+ * the same values more than once (which unfortunately makes the code less
+ * straightforward).
+ *
+ * To make register management easier, we start a new
+ * register set; we can release all the registers in the set at
+ * once, and avoid having to keep track of exactly which registers
+ * we allocate. We can still allocate and free registers as
+ * desired (if we know we no longer need a register), but we don't
+ * have to spend the complexity to track the more difficult variant
+ * register usage scenarios.
+ */
+ spe_comment(f, 0, "Allocating stencil register set");
+ spe_allocate_register_set(f);
+
+ /* The facing we're given is the fragment facing; it doesn't
+ * exactly match the stencil facing. If stencil is enabled,
+ * but two-sided stencil is *not* enabled, we use the same
+ * stencil settings for both front- and back-facing fragments.
+ * We only use the "back-facing" stencil for backfacing fragments
+ * if two-sided stenciling is enabled.
+ */
+ if (facing == CELL_FACING_BACK && dsa->stencil[1].enabled) {
+ stencil = &dsa->stencil[1];
+ }
+ else {
+ stencil = &dsa->stencil[0];
+ }
+
+ /* Calculate the writemask. If the writemask is trivial (either
+ * all 0s, meaning that we don't need to calculate any stencil values
+ * because they're not going to change the stencil anyway, or all 1s,
+ * meaning that we have to calculate the stencil values but do not
+ * need to mask them), we can avoid generating code. Don't forget
+ * that we need to consider backfacing stencil, if enabled.
+ *
+ * Note that if the backface stencil is *not* enabled, the backface
+ * stencil will have the same values as the frontface stencil.
+ */
+ if (stencil->fail_op == PIPE_STENCIL_OP_KEEP &&
+ stencil->zfail_op == PIPE_STENCIL_OP_KEEP &&
+ stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
+ need_to_calculate_stencil_values = FALSE;
+ need_to_writemask_stencil_values = FALSE;
+ }
+ else if (stencil->writemask == 0x0) {
+ /* All changes are writemasked out, so no need to calculate
+ * what those changes might be, and no need to write anything back.
+ */
+ need_to_calculate_stencil_values = FALSE;
+ need_to_writemask_stencil_values = FALSE;
+ }
+ else if (stencil->writemask == 0xff) {
+ /* Still trivial, but a little less so. We need to write the stencil
+ * values, but we don't need to mask them.
+ */
+ need_to_calculate_stencil_values = TRUE;
+ need_to_writemask_stencil_values = FALSE;
+ }
+ else {
+ /* The general case: calculate, mask, and write */
+ need_to_calculate_stencil_values = TRUE;
+ need_to_writemask_stencil_values = TRUE;
+
+ /* While we're here, generate code that calculates what the
+ * writemask should be. If backface stenciling is enabled,
+ * and the backface writemask is not the same as the frontface
+ * writemask, we'll have to generate code that merges the
+ * two masks into a single effective mask based on fragment facing.
+ */
+ spe_comment(f, 0, "Computing stencil writemask");
+ stencil_writemask_reg = spe_allocate_available_register(f);
+ spe_load_uint(f, stencil_writemask_reg, dsa->stencil[facing].writemask);
+ }
+
+ /* At least one-sided stenciling must be on. Generate code that
+ * runs the stencil test on the basic/front-facing stencil, leaving
+ * the mask of passing stencil bits in stencil_pass_reg. This mask will
+ * be used both to mask the set of active pixels, and also to
+ * determine how the stencil buffer changes.
+ *
+ * This test will *not* change the value in mask_reg (because we don't
+ * yet know whether to apply the two-sided stencil or one-sided stencil).
+ */
+ spe_comment(f, 0, "Running basic stencil test");
+ stencil_pass_reg = spe_allocate_available_register(f);
+ gen_stencil_test(f, stencil, 0xff, mask_reg, fbS_reg, stencil_pass_reg);
+
+ /* Generate code that, given the mask of valid fragments and the
+ * mask of valid fragments that passed the stencil test, computes
+ * the mask of valid fragments that failed the stencil test. We
+ * have to do this before we run a depth test (because the
+ * depth test should not be performed on fragments that failed the
+ * stencil test, and because the depth test will update the
+ * mask of valid fragments based on the results of the depth test).
+ */
+ spe_comment(f, 0, "Computing stencil fail mask and updating fragment mask");
+ stencil_fail_reg = spe_allocate_available_register(f);
+ spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg);
+ /* Now remove the stenciled-out pixels from the valid fragment mask,
+ * so we can later use the valid fragment mask in the depth test.
+ */
+ spe_and(f, mask_reg, mask_reg, stencil_pass_reg);
+
+ /* We may not need to calculate stencil values, if the writemask is off */
+ if (need_to_calculate_stencil_values) {
+ /* Generate code that calculates exactly which stencil values we need,
+ * without calculating the same value twice (say, if two different
+ * stencil ops have the same value). This code will work for one-sided
+ * and two-sided stenciling (so that we take into account that operations
+ * may match between front and back stencils), and will also take into
+ * account whether the depth test is enabled (if the depth test is off,
+ * we don't need any of the zfail results, because the depth test always
+ * is considered to pass if it is disabled). Any register value that
+ * does not need to be calculated will come back with the same value
+ * that's in fbS_reg.
+ *
+ * This function will allocate a variant number of registers that
+ * will be released as part of the register set.
+ */
+ spe_comment(f, 0, facing == CELL_FACING_FRONT
+ ? "Computing front-facing stencil values"
+ : "Computing back-facing stencil values");
+ gen_get_stencil_values(f, stencil, dsa->depth.enabled, fbS_reg,
+ &stencil_fail_values, &stencil_pass_depth_fail_values,
+ &stencil_pass_depth_pass_values);
+ }
+
+ /* We now have all the stencil values we need. We also need
+ * the results of the depth test to figure out which
+ * stencil values will become the new stencil values. (Even if
+ * we aren't actually calculating stencil values, we need to apply
+ * the depth test if it's enabled.)
+ *
+ * The code generated by gen_depth_test() returns the results of the
+ * test in the given register, but also alters the mask_reg based
+ * on the results of the test.
+ */
+ if (dsa->depth.enabled) {
+ spe_comment(f, 0, "Running stencil depth test");
+ zmask_reg = spe_allocate_available_register(f);
+ modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg,
+ fbZ_reg, zmask_reg);
+ }
+
+ if (need_to_calculate_stencil_values) {
+
+ /* If we need to writemask the stencil values before going into
+ * the stencil buffer, we'll have to use a new register to
+ * hold the new values. If not, we can just keep using the
+ * current register.
+ */
+ if (need_to_writemask_stencil_values) {
+ newS_reg = spe_allocate_available_register(f);
+ spe_comment(f, 0, "Saving current stencil values for writemasking");
+ spe_move(f, newS_reg, fbS_reg);
+ }
+ else {
+ newS_reg = fbS_reg;
+ }
+
+ /* Merge in the selected stencil fail values */
+ if (stencil_fail_values != fbS_reg) {
+ spe_comment(f, 0, "Loading stencil fail values");
+ spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg);
+ modified_buffers = TRUE;
+ }
+
+ /* Same for the stencil pass/depth fail values. If this calculation
+ * is not needed (say, if depth test is off), then the
+ * stencil_pass_depth_fail_values register will be equal to fbS_reg
+ * and we'll skip the calculation.
+ */
+ if (stencil_pass_depth_fail_values != fbS_reg) {
+ /* We don't actually have a stencil pass/depth fail mask yet.
+ * Calculate it here from the stencil passing mask and the
+ * depth passing mask. Note that zmask_reg *must* have been
+ * set above if we're here.
+ */
+ uint stencil_pass_depth_fail_mask =
+ spe_allocate_available_register(f);
+
+ spe_comment(f, 0, "Loading stencil pass/depth fail values");
+ spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg);
+
+ spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values,
+ stencil_pass_depth_fail_mask);
+
+ spe_release_register(f, stencil_pass_depth_fail_mask);
+ modified_buffers = TRUE;
+ }
+
+ /* Same for the stencil pass/depth pass mask. Note that we
+ * *can* get here with zmask_reg being unset (if the depth
+ * test is off but the stencil test is on). In this case,
+ * we assume the depth test passes, and don't need to mask
+ * the stencil pass mask with the Z mask.
+ */
+ if (stencil_pass_depth_pass_values != fbS_reg) {
+ if (dsa->depth.enabled) {
+ uint stencil_pass_depth_pass_mask = spe_allocate_available_register(f);
+ /* We'll need a separate register */
+ spe_comment(f, 0, "Loading stencil pass/depth pass values");
+ spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg);
+ spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask);
+ spe_release_register(f, stencil_pass_depth_pass_mask);
+ }
+ else {
+ /* We can use the same stencil-pass register */
+ spe_comment(f, 0, "Loading stencil pass values");
+ spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_reg);
+ }
+ modified_buffers = TRUE;
+ }
+
+ /* Almost done. If we need to writemask, do it now, leaving the
+ * results in the fbS_reg register passed in. If we don't need
+ * to writemask, then the results are *already* in the fbS_reg,
+ * so there's nothing more to do.
+ */
+
+ if (need_to_writemask_stencil_values && modified_buffers) {
+ /* The Select Bytes command makes a fine writemask. Where
+ * the mask is 0, the first (original) values are retained,
+ * effectively masking out changes. Where the mask is 1, the
+ * second (new) values are retained, incorporating changes.
+ */
+ spe_comment(f, 0, "Writemasking new stencil values");
+ spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg);
+ }
+
+ } /* done calculating stencil values */
+
+ /* The stencil and/or depth values have been applied, and the
+ * mask_reg, fbS_reg, and fbZ_reg values have been updated.
+ * We're all done, except that we've allocated a fair number
+ * of registers that we didn't bother tracking. Release all
+ * those registers as part of the register set, and go home.
+ */
+ spe_comment(f, 0, "Releasing stencil register set");
+ spe_release_register_set(f);
+
+ /* Return TRUE if we could have modified the stencil and/or
+ * depth buffers.
+ */
+ return modified_buffers;
+}
+
+
+/**
+ * Generate depth and/or stencil test code.
+ * \param cell context
+ * \param dsa depth/stencil/alpha state
+ * \param f spe function to emit
+ * \param facing either CELL_FACING_FRONT or CELL_FACING_BACK
+ * \param mask_reg register containing the pixel alive/dead mask
+ * \param depth_tile_reg register containing address of z/stencil tile
+ * \param quad_offset_reg offset to quad from start of tile
+ * \param fragZ_reg register containg fragment Z values
+ */
+static void
+gen_depth_stencil(struct cell_context *cell,
+ const struct pipe_depth_stencil_alpha_state *dsa,
+ struct spe_function *f,
+ uint facing,
+ int mask_reg,
+ int depth_tile_reg,
+ int quad_offset_reg,
+ int fragZ_reg)
+
+{
+ const enum pipe_format zs_format = cell->framebuffer.zsbuf->format;
+ boolean write_depth_stencil;
+
+ /* framebuffer's combined z/stencil values register */
+ int fbZS_reg = spe_allocate_available_register(f);
+
+ /* Framebufer Z values register */
+ int fbZ_reg = spe_allocate_available_register(f);
+
+ /* Framebuffer stencil values register (may not be used) */
+ int fbS_reg = spe_allocate_available_register(f);
+
+ /* 24-bit mask register (may not be used) */
+ int zmask_reg = spe_allocate_available_register(f);
+
+ /**
+ * The following code:
+ * 1. fetch quad of packed Z/S values from the framebuffer tile.
+ * 2. extract the separate the Z and S values from packed values
+ * 3. convert fragment Z values from float in [0,1] to 32/24/16-bit ints
+ *
+ * The instructions for doing this are interleaved for better performance.
+ */
+ spe_comment(f, 0, "Fetch Z/stencil quad from tile");
+
+ switch(zs_format) {
+ case PIPE_FORMAT_S8Z24_UNORM: /* fall through */
+ case PIPE_FORMAT_X8Z24_UNORM:
+ /* prepare mask to extract Z vals from ZS vals */
+ spe_load_uint(f, zmask_reg, 0x00ffffff);
+
+ /* convert fragment Z from [0,1] to 32-bit ints */
+ spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
+
+ /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
+ spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
+
+ /* right shift 32-bit fragment Z to 24 bits */
+ spe_rotmi(f, fragZ_reg, fragZ_reg, -8);
+
+ /* extract 24-bit Z values from ZS values by masking */
+ spe_and(f, fbZ_reg, fbZS_reg, zmask_reg);
+
+ /* extract 8-bit stencil values by shifting */
+ spe_rotmi(f, fbS_reg, fbZS_reg, -24);
+ break;
+
+ case PIPE_FORMAT_Z24S8_UNORM: /* fall through */
+ case PIPE_FORMAT_Z24X8_UNORM:
+ /* convert fragment Z from [0,1] to 32-bit ints */
+ spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
+
+ /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
+ spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
+
+ /* right shift 32-bit fragment Z to 24 bits */
+ spe_rotmi(f, fragZ_reg, fragZ_reg, -8);
+
+ /* extract 24-bit Z values from ZS values by shifting */
+ spe_rotmi(f, fbZ_reg, fbZS_reg, -8);
+
+ /* extract 8-bit stencil values by masking */
+ spe_and_uint(f, fbS_reg, fbZS_reg, 0x000000ff);
+ break;
+
+ case PIPE_FORMAT_Z32_UNORM:
+ /* Load: fbZ_reg = memory[depth_tile_reg + offset_reg] */
+ spe_lqx(f, fbZ_reg, depth_tile_reg, quad_offset_reg);
+
+ /* convert fragment Z from [0,1] to 32-bit ints */
+ spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
+
+ /* No stencil, so can't do anything there */
+ break;
+
+ case PIPE_FORMAT_Z16_UNORM:
+ /* XXX This code for 16bpp Z is broken! */
+
+ /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
+ spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
+
+ /* Copy over 4 32-bit values */
+ spe_move(f, fbZ_reg, fbZS_reg);
+
+ /* convert Z from [0,1] to 16-bit ints */
+ spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
+ spe_rotmi(f, fragZ_reg, fragZ_reg, -16);
+ /* No stencil */
+ break;
+
+ default:
+ ASSERT(0); /* invalid format */
+ }
+
+ /* If stencil is enabled, use the stencil-specific code
+ * generator to generate both the stencil and depth (if needed)
+ * tests. Otherwise, if only depth is enabled, generate
+ * a quick depth test. The test generators themselves will
+ * report back whether the depth/stencil buffer has to be
+ * written back.
+ */
+ if (dsa->stencil[0].enabled) {
+ /* This will perform the stencil and depth tests, and update
+ * the mask_reg, fbZ_reg, and fbS_reg as required by the
+ * tests.
+ */
+ ASSERT(fbS_reg >= 0);
+ spe_comment(f, 0, "Perform stencil test");
+
+ /* Note that fbZ_reg may not be set on entry, if stenciling
+ * is enabled but there's no Z-buffer. The
+ * gen_stencil_depth_test() function must ignore the
+ * fbZ_reg register if depth is not enabled.
+ */
+ write_depth_stencil = gen_stencil_depth_test(f, dsa, facing,
+ mask_reg, fragZ_reg,
+ fbZ_reg, fbS_reg);
+ }
+ else if (dsa->depth.enabled) {
+ int zmask_reg = spe_allocate_available_register(f);
+ ASSERT(fbZ_reg >= 0);
+ spe_comment(f, 0, "Perform depth test");
+ write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg,
+ fbZ_reg, zmask_reg);
+ spe_release_register(f, zmask_reg);
+ }
+ else {
+ write_depth_stencil = FALSE;
+ }
+
+ if (write_depth_stencil) {
+ /* Merge latest Z and Stencil values into fbZS_reg.
+ * fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
+ * fbS_reg has four 8-bit Z values in bits [7..0].
+ */
+ spe_comment(f, 0, "Store quad's depth/stencil values in tile");
+ if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
+ zs_format == PIPE_FORMAT_X8Z24_UNORM) {
+ spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */
+ spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
+ }
+ else if (zs_format == PIPE_FORMAT_Z24S8_UNORM ||
+ zs_format == PIPE_FORMAT_Z24X8_UNORM) {
+ spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */
+ spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
+ }
+ else if (zs_format == PIPE_FORMAT_Z32_UNORM) {
+ spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */
+ }
+ else if (zs_format == PIPE_FORMAT_Z16_UNORM) {
+ spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */
+ }
+ else if (zs_format == PIPE_FORMAT_S8_UNORM) {
+ ASSERT(0); /* XXX to do */
+ }
+ else {
+ ASSERT(0); /* bad zs_format */
+ }
+
+ /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
+ spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
+ }
+
+ /* Don't need these any more */
+ spe_release_register(f, fbZS_reg);
+ spe_release_register(f, fbZ_reg);
+ spe_release_register(f, fbS_reg);
+ spe_release_register(f, zmask_reg);
+}
+
/**
@@ -621,14 +2016,21 @@ gen_pack_colors(struct spe_function *f,
* should be much faster.
*
* \param cell the rendering context (in)
- * \param f the generated function (out)
+ * \param facing whether the generated code is for front-facing or
+ * back-facing fragments
+ * \param f the generated function (in/out); on input, the function
+ * must already have been initialized. On exit, whatever
+ * instructions within the generated function have had
+ * the fragment ops appended.
*/
void
-cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
+cell_gen_fragment_function(struct cell_context *cell,
+ const uint facing,
+ struct spe_function *f)
{
- const struct pipe_depth_stencil_alpha_state *dsa =
- &cell->depth_stencil->base;
- const struct pipe_blend_state *blend = &cell->blend->base;
+ const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil;
+ const struct pipe_blend_state *blend = cell->blend;
+ const struct pipe_blend_color *blend_color = &cell->blend_color;
const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format;
/* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
@@ -643,15 +2045,23 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
const int fragA_reg = 11; /* vector float */
const int mask_reg = 12; /* vector uint */
+ ASSERT(facing == CELL_FACING_FRONT || facing == CELL_FACING_BACK);
+
/* offset of quad from start of tile
* XXX assuming 4-byte pixels for color AND Z/stencil!!!!
*/
int quad_offset_reg;
int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */
- int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */
- spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
+ if (cell->debug_flags & CELL_DEBUG_ASM) {
+ spe_print_code(f, TRUE);
+ spe_indent(f, 8);
+ spe_comment(f, -4, facing == CELL_FACING_FRONT
+ ? "Begin front-facing per-fragment ops"
+ : "Begin back-facing per-fragment ops");
+ }
+
spe_allocate_register(f, x_reg);
spe_allocate_register(f, y_reg);
spe_allocate_register(f, color_tile_reg);
@@ -665,7 +2075,6 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
quad_offset_reg = spe_allocate_available_register(f);
fbRGBA_reg = spe_allocate_available_register(f);
- fbZS_reg = spe_allocate_available_register(f);
/* compute offset of quad from start of tile, in bytes */
{
@@ -674,8 +2083,9 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
ASSERT(TILE_SIZE == 32);
- spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */
+ spe_comment(f, 0, "Compute quad offset within tile");
spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */
+ spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */
spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */
spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */
spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */
@@ -684,139 +2094,33 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
spe_release_register(f, y2_reg);
}
-
+ /* Generate the alpha test, if needed. */
if (dsa->alpha.enabled) {
gen_alpha_test(dsa, f, mask_reg, fragA_reg);
}
+ /* generate depth and/or stencil test code */
if (dsa->depth.enabled || dsa->stencil[0].enabled) {
- const enum pipe_format zs_format = cell->framebuffer.zsbuf->format;
- boolean write_depth_stencil;
-
- int fbZ_reg = spe_allocate_available_register(f); /* Z values */
- int fbS_reg = spe_allocate_available_register(f); /* Stencil values */
-
- /* fetch quad of depth/stencil values from tile at (x,y) */
- /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
- spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
-
- if (dsa->depth.enabled) {
- /* Extract Z bits from fbZS_reg into fbZ_reg */
- if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
- zs_format == PIPE_FORMAT_X8Z24_UNORM) {
- int mask_reg = spe_allocate_available_register(f);
- spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */
- spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */
- spe_release_register(f, mask_reg);
- /* OK, fbZ_reg has four 24-bit Z values now */
- }
- else {
- /* XXX handle other z/stencil formats */
- ASSERT(0);
- }
-
- /* Convert fragZ values from float[4] to uint[4] */
- if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
- zs_format == PIPE_FORMAT_X8Z24_UNORM ||
- zs_format == PIPE_FORMAT_Z24S8_UNORM ||
- zs_format == PIPE_FORMAT_Z24X8_UNORM) {
- /* 24-bit Z values */
- int scale_reg = spe_allocate_available_register(f);
-
- /* scale_reg[0,1,2,3] = float(2^24-1) */
- spe_load_float(f, scale_reg, (float) 0xffffff);
-
- /* XXX these two instructions might be combined */
- spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */
- spe_cfltu(f, fragZ_reg, fragZ_reg, 0); /* fragZ = (int) fragZ */
-
- spe_release_register(f, scale_reg);
- }
- else {
- /* XXX handle 16-bit Z format */
- ASSERT(0);
- }
- }
-
- if (dsa->stencil[0].enabled) {
- /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */
- if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
- zs_format == PIPE_FORMAT_X8Z24_UNORM) {
- /* XXX extract with a shift */
- ASSERT(0);
- }
- else if (zs_format == PIPE_FORMAT_Z24S8_UNORM ||
- zs_format == PIPE_FORMAT_Z24X8_UNORM) {
- /* XXX extract with a mask */
- ASSERT(0);
- }
- }
-
-
- if (dsa->stencil[0].enabled) {
- /* XXX this may involve depth testing too */
- // gen_stencil_test(dsa, f, ... );
- ASSERT(0);
- }
- else if (dsa->depth.enabled) {
- int zmask_reg = spe_allocate_available_register(f);
- gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg);
- spe_release_register(f, zmask_reg);
- }
-
- /* do we need to write Z and/or Stencil back into framebuffer? */
- write_depth_stencil = (dsa->depth.writemask |
- dsa->stencil[0].write_mask |
- dsa->stencil[1].write_mask);
-
- if (write_depth_stencil) {
- /* Merge latest Z and Stencil values into fbZS_reg.
- * fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
- * fbS_reg has four 8-bit Z values in bits [7..0].
- */
- if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
- zs_format == PIPE_FORMAT_X8Z24_UNORM) {
- spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */
- spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
- }
- else if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
- zs_format == PIPE_FORMAT_X8Z24_UNORM) {
- /* XXX to do */
- ASSERT(0);
- }
- else if (zs_format == PIPE_FORMAT_Z16_UNORM) {
- /* XXX to do */
- ASSERT(0);
- }
- else if (zs_format == PIPE_FORMAT_S8_UNORM) {
- /* XXX to do */
- ASSERT(0);
- }
- else {
- /* bad zs_format */
- ASSERT(0);
- }
-
- /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
- spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
- }
-
- spe_release_register(f, fbZ_reg);
- spe_release_register(f, fbS_reg);
+ gen_depth_stencil(cell, dsa, f,
+ facing,
+ mask_reg,
+ depth_tile_reg,
+ quad_offset_reg,
+ fragZ_reg);
}
-
/* Get framebuffer quad/colors. We'll need these for blending,
* color masking, and to obey the quad/pixel mask.
* Load: fbRGBA_reg = memory[color_tile + quad_offset]
* Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking
* we could skip this load.
*/
+ spe_comment(f, 0, "Fetch quad colors from tile");
spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg);
-
if (blend->blend_enable) {
- gen_blend(blend, f, color_format,
+ spe_comment(f, 0, "Perform blending");
+ gen_blend(blend, blend_color, f, color_format,
fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg);
}
@@ -829,19 +2133,21 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
int rgba_reg = spe_allocate_available_register(f);
/* Pack four float colors as four 32-bit int colors */
+ spe_comment(f, 0, "Convert float quad colors to packed int framebuffer colors");
gen_pack_colors(f, color_format,
fragR_reg, fragG_reg, fragB_reg, fragA_reg,
rgba_reg);
if (blend->logicop_enable) {
+ spe_comment(f, 0, "Compute logic op");
gen_logicop(blend, f, rgba_reg, fbRGBA_reg);
}
- if (blend->colormask != 0xf) {
- gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg);
+ if (blend->colormask != PIPE_MASK_RGBA) {
+ spe_comment(f, 0, "Compute color mask");
+ gen_colormask(f, blend->colormask, color_format, rgba_reg, fbRGBA_reg);
}
-
/* Mix fragment colors with framebuffer colors using the quad/pixel mask:
* if (mask[i])
* rgba[i] = rgba[i];
@@ -853,6 +2159,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
/* Store updated quad in tile:
* memory[color_tile + quad_offset] = rgba_reg;
*/
+ spe_comment(f, 0, "Store quad colors into color tile");
spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg);
spe_release_register(f, rgba_reg);
@@ -862,9 +2169,13 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */
-
spe_release_register(f, fbRGBA_reg);
- spe_release_register(f, fbZS_reg);
spe_release_register(f, quad_offset_reg);
-}
+ if (cell->debug_flags & CELL_DEBUG_ASM) {
+ char buffer[1024];
+ sprintf(buffer, "End %s-facing per-fragment ops: %d instructions",
+ facing == CELL_FACING_FRONT ? "front" : "back", f->num_inst);
+ spe_comment(f, -4, buffer);
+ }
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
index b59de198dc..21b35d1faf 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
@@ -31,7 +31,7 @@
extern void
-cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f);
+cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f);
#endif /* CELL_GEN_FRAGMENT_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
index 475c6ef0ce..facd9551fe 100644
--- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c
+++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
@@ -35,9 +35,9 @@
#include "draw/draw_context.h"
#include "cell_context.h"
#include "cell_flush.h"
+#include "cell_pipe_state.h"
#include "cell_state.h"
#include "cell_texture.h"
-#include "cell_state_per_fragment.h"
@@ -45,24 +45,18 @@ static void *
cell_create_blend_state(struct pipe_context *pipe,
const struct pipe_blend_state *blend)
{
- struct cell_blend_state *cb = MALLOC(sizeof(struct cell_blend_state));
-
- (void) memcpy(cb, blend, sizeof(*blend));
-#if 0
- cell_generate_alpha_blend(cb);
-#endif
- return cb;
+ return mem_dup(blend, sizeof(*blend));
}
static void
-cell_bind_blend_state(struct pipe_context *pipe, void *state)
+cell_bind_blend_state(struct pipe_context *pipe, void *blend)
{
struct cell_context *cell = cell_context(pipe);
draw_flush(cell->draw);
- cell->blend = (struct cell_blend_state *) state;
+ cell->blend = (struct pipe_blend_state *) blend;
cell->dirty |= CELL_NEW_BLEND;
}
@@ -70,10 +64,7 @@ cell_bind_blend_state(struct pipe_context *pipe, void *state)
static void
cell_delete_blend_state(struct pipe_context *pipe, void *blend)
{
- struct cell_blend_state *cb = (struct cell_blend_state *) blend;
-
- spe_release_func(& cb->code);
- FREE(cb);
+ FREE(blend);
}
@@ -95,41 +86,29 @@ cell_set_blend_color(struct pipe_context *pipe,
static void *
cell_create_depth_stencil_alpha_state(struct pipe_context *pipe,
- const struct pipe_depth_stencil_alpha_state *depth_stencil)
+ const struct pipe_depth_stencil_alpha_state *dsa)
{
- struct cell_depth_stencil_alpha_state *cdsa =
- MALLOC(sizeof(struct cell_depth_stencil_alpha_state));
-
- (void) memcpy(cdsa, depth_stencil, sizeof(*depth_stencil));
-#if 0
- cell_generate_depth_stencil_test(cdsa);
-#endif
- return cdsa;
+ return mem_dup(dsa, sizeof(*dsa));
}
static void
cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe,
- void *depth_stencil)
+ void *dsa)
{
struct cell_context *cell = cell_context(pipe);
draw_flush(cell->draw);
- cell->depth_stencil =
- (struct cell_depth_stencil_alpha_state *) depth_stencil;
+ cell->depth_stencil = (struct pipe_depth_stencil_alpha_state *) dsa;
cell->dirty |= CELL_NEW_DEPTH_STENCIL;
}
static void
-cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth)
+cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *dsa)
{
- struct cell_depth_stencil_alpha_state *cdsa =
- (struct cell_depth_stencil_alpha_state *) depth;
-
- spe_release_func(& cdsa->code);
- FREE(cdsa);
+ FREE(dsa);
}
@@ -191,24 +170,23 @@ cell_set_polygon_stipple( struct pipe_context *pipe,
static void *
cell_create_rasterizer_state(struct pipe_context *pipe,
- const struct pipe_rasterizer_state *setup)
+ const struct pipe_rasterizer_state *rasterizer)
{
- struct pipe_rasterizer_state *state
- = MALLOC(sizeof(struct pipe_rasterizer_state));
- memcpy(state, setup, sizeof(struct pipe_rasterizer_state));
- return state;
+ return mem_dup(rasterizer, sizeof(*rasterizer));
}
static void
-cell_bind_rasterizer_state(struct pipe_context *pipe, void *setup)
+cell_bind_rasterizer_state(struct pipe_context *pipe, void *rast)
{
+ struct pipe_rasterizer_state *rasterizer =
+ (struct pipe_rasterizer_state *) rast;
struct cell_context *cell = cell_context(pipe);
/* pass-through to draw module */
- draw_set_rasterizer_state(cell->draw, setup);
+ draw_set_rasterizer_state(cell->draw, rasterizer);
- cell->rasterizer = (struct pipe_rasterizer_state *)setup;
+ cell->rasterizer = rasterizer;
cell->dirty |= CELL_NEW_RASTERIZER;
}
@@ -235,17 +213,24 @@ cell_bind_sampler_states(struct pipe_context *pipe,
unsigned num, void **samplers)
{
struct cell_context *cell = cell_context(pipe);
+ uint i, changed = 0x0;
assert(num <= CELL_MAX_SAMPLERS);
draw_flush(cell->draw);
- memcpy(cell->sampler, samplers, num * sizeof(void *));
- memset(&cell->sampler[num], 0, (CELL_MAX_SAMPLERS - num) *
- sizeof(void *));
- cell->num_samplers = num;
+ for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
+ struct pipe_sampler_state *new_samp = i < num ? samplers[i] : NULL;
+ if (cell->sampler[i] != new_samp) {
+ cell->sampler[i] = new_samp;
+ changed |= (1 << i);
+ }
+ }
- cell->dirty |= CELL_NEW_SAMPLER;
+ if (changed) {
+ cell->dirty |= CELL_NEW_SAMPLER;
+ cell->dirty_samplers |= changed;
+ }
}
@@ -263,30 +248,101 @@ cell_set_sampler_textures(struct pipe_context *pipe,
unsigned num, struct pipe_texture **texture)
{
struct cell_context *cell = cell_context(pipe);
- uint i;
+ uint i, changed = 0x0;
assert(num <= CELL_MAX_SAMPLERS);
- /* Check for no-op */
- if (num == cell->num_textures &&
- !memcmp(cell->texture, texture, num * sizeof(struct pipe_texture *)))
- return;
-
- draw_flush(cell->draw);
-
for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
- struct pipe_texture *tex = i < num ? texture[i] : NULL;
+ struct cell_texture *new_tex = cell_texture(i < num ? texture[i] : NULL);
+ struct cell_texture *old_tex = cell->texture[i];
+ if (old_tex != new_tex) {
+
+ pipe_texture_reference((struct pipe_texture **) &cell->texture[i],
+ (struct pipe_texture *) new_tex);
- pipe_texture_reference((struct pipe_texture **) &cell->texture[i], tex);
+ changed |= (1 << i);
+ }
}
+
cell->num_textures = num;
- cell_update_texture_mapping(cell);
+ if (changed) {
+ cell->dirty |= CELL_NEW_TEXTURE;
+ cell->dirty_textures |= changed;
+ }
+}
+
- cell->dirty |= CELL_NEW_TEXTURE;
+/**
+ * Map color and z/stencil framebuffer surfaces.
+ */
+static void
+cell_map_surfaces(struct cell_context *cell)
+{
+ struct pipe_screen *screen = cell->pipe.screen;
+ uint i;
+
+ for (i = 0; i < 1; i++) {
+ struct pipe_surface *ps = cell->framebuffer.cbufs[i];
+ if (ps) {
+ cell->cbuf_transfer[i] =
+ screen->get_tex_transfer(screen, ps->texture, ps->face,
+ ps->level, ps->zslice,
+ PIPE_TRANSFER_READ_WRITE,
+ 0, 0, ps->width, ps->height);
+
+ cell->cbuf_map[i] =
+ screen->transfer_map(screen, cell->cbuf_transfer[i]);
+ }
+ }
+
+ {
+ struct pipe_surface *ps = cell->framebuffer.zsbuf;
+ if (ps) {
+ cell->zsbuf_transfer =
+ screen->get_tex_transfer(screen, ps->texture, ps->face,
+ ps->level, ps->zslice,
+ PIPE_TRANSFER_READ_WRITE,
+ 0, 0, ps->width, ps->height);
+
+ cell->zsbuf_map =
+ screen->transfer_map(screen, cell->zsbuf_transfer);
+ }
+ }
}
+/**
+ * Unmap color and z/stencil framebuffer surfaces.
+ */
+static void
+cell_unmap_surfaces(struct cell_context *cell)
+{
+ struct pipe_screen *screen = cell->pipe.screen;
+ uint i;
+
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ if (cell->cbuf_transfer[i] && cell->cbuf_map[i]) {
+ /* unmap color buffer/surface [i] */
+ screen->transfer_unmap(screen, cell->cbuf_transfer[i]);
+ cell->cbuf_map[i] = NULL;
+
+ /* get rid of transfer object [i] */
+ screen->tex_transfer_release(screen, &cell->cbuf_transfer[i]);
+ assert(cell->cbuf_transfer[i] == NULL);
+ }
+ }
+
+ if (cell->zsbuf_transfer && cell->zsbuf_map) {
+ screen->transfer_unmap(screen, cell->zsbuf_transfer);
+ cell->zsbuf_map = NULL;
+
+ /* get rid of transfer object */
+ screen->tex_transfer_release(screen, &cell->zsbuf_transfer);
+ assert(cell->zsbuf_transfer == NULL);
+ }
+}
+
static void
cell_set_framebuffer_state(struct pipe_context *pipe,
@@ -295,24 +351,10 @@ cell_set_framebuffer_state(struct pipe_context *pipe,
struct cell_context *cell = cell_context(pipe);
if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) {
- struct pipe_surface *csurf = fb->cbufs[0];
- struct pipe_surface *zsurf = fb->zsbuf;
uint i;
- uint flags = (PIPE_BUFFER_USAGE_GPU_WRITE |
- PIPE_BUFFER_USAGE_GPU_READ);
/* unmap old surfaces */
- for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
- if (cell->framebuffer.cbufs[i] && cell->cbuf_map[i]) {
- pipe_surface_unmap(cell->framebuffer.cbufs[i]);
- cell->cbuf_map[i] = NULL;
- }
- }
-
- if (cell->framebuffer.zsbuf && cell->zsbuf_map) {
- pipe_surface_unmap(cell->framebuffer.zsbuf);
- cell->zsbuf_map = NULL;
- }
+ cell_unmap_surfaces(cell);
/* Finish any pending rendering to the current surface before
* installing a new surface!
@@ -324,18 +366,14 @@ cell_set_framebuffer_state(struct pipe_context *pipe,
*/
cell->framebuffer.width = fb->width;
cell->framebuffer.height = fb->height;
- cell->framebuffer.num_cbufs = fb->num_cbufs;
+ cell->framebuffer.nr_cbufs = fb->nr_cbufs;
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]);
}
pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf);
/* map new surfaces */
- if (csurf)
- cell->cbuf_map[0] = pipe_surface_map(csurf, flags);
-
- if (zsurf)
- cell->zsbuf_map = pipe_surface_map(zsurf, flags);
+ cell_map_surfaces(cell);
cell->dirty |= CELL_NEW_FRAMEBUFFER;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c
index dd25ae880e..79cb8df82f 100644
--- a/src/gallium/drivers/cell/ppu/cell_render.c
+++ b/src/gallium/drivers/cell/ppu/cell_render.c
@@ -152,6 +152,7 @@ cell_flush_prim_buffer(struct cell_context *cell)
struct cell_command_render *render = &cell_global.command[i].render;
render->prim_type = PIPE_PRIM_TRIANGLES;
render->num_verts = cell->prim_buffer.num_verts;
+ render->front_winding = cell->rasterizer->front_winding;
render->vertex_size = cell->vertex_info->size * 4;
render->xmin = cell->prim_buffer.xmin;
render->ymin = cell->prim_buffer.ymin;
diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c
index 139b3719b6..512d85d352 100644
--- a/src/gallium/drivers/cell/ppu/cell_screen.c
+++ b/src/gallium/drivers/cell/ppu/cell_screen.c
@@ -27,7 +27,8 @@
#include "util/u_memory.h"
-#include "pipe/p_winsys.h"
+#include "util/u_simple_screen.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
@@ -58,9 +59,9 @@ cell_get_param(struct pipe_screen *screen, int param)
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
return CELL_MAX_SAMPLERS;
case PIPE_CAP_NPOT_TEXTURES:
- return 0;
+ return 1;
case PIPE_CAP_TWO_SIDED_STENCIL:
- return 0;
+ return 1;
case PIPE_CAP_GLSL:
return 1;
case PIPE_CAP_S3TC:
@@ -68,19 +69,23 @@ cell_get_param(struct pipe_screen *screen, int param)
case PIPE_CAP_ANISOTROPIC_FILTER:
return 0;
case PIPE_CAP_POINT_SPRITE:
- return 0;
+ return 1;
case PIPE_CAP_MAX_RENDER_TARGETS:
return 1;
case PIPE_CAP_OCCLUSION_QUERY:
- return 0;
+ return 1;
case PIPE_CAP_TEXTURE_SHADOW_MAP:
- return 0;
+ return 10;
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
- return 12; /* max 2Kx2K */
+ return CELL_MAX_TEXTURE_LEVELS;
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
return 8; /* max 128x128x128 */
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
- return 12; /* max 2Kx2K */
+ return CELL_MAX_TEXTURE_LEVELS;
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ return 1; /* XXX not really true */
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ return 0; /* XXX to do */
default:
return 0;
}
@@ -165,6 +170,7 @@ cell_create_screen(struct pipe_winsys *winsys)
screen->is_format_supported = cell_is_format_supported;
cell_init_screen_texture_funcs(screen);
+ u_simple_screen_init(screen);
return screen;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c
index 9508227e29..28e5e6d706 100644
--- a/src/gallium/drivers/cell/ppu/cell_spu.c
+++ b/src/gallium/drivers/cell/ppu/cell_spu.c
@@ -36,6 +36,7 @@
#include "cell_spu.h"
#include "pipe/p_format.h"
#include "pipe/p_state.h"
+#include "util/u_memory.h"
#include "cell/common.h"
@@ -52,6 +53,35 @@ struct cell_global_info cell_global;
/**
+ * Scan /proc/cpuinfo to determine the timebase for the system.
+ * This is used by the SPUs to convert 'decrementer' ticks to seconds.
+ * There may be a better way to get this value...
+ */
+static unsigned
+get_timebase(void)
+{
+ FILE *f = fopen("/proc/cpuinfo", "r");
+ unsigned timebase;
+
+ assert(f);
+ while (!feof(f)) {
+ char line[80];
+ fgets(line, sizeof(line), f);
+ if (strncmp(line, "timebase", 8) == 0) {
+ char *colon = strchr(line, ':');
+ if (colon) {
+ timebase = atoi(colon + 2);
+ break;
+ }
+ }
+ }
+ fclose(f);
+
+ return timebase;
+}
+
+
+/**
* Write a 1-word message to the given SPE mailbox.
*/
void
@@ -114,6 +144,7 @@ cell_start_spus(struct cell_context *cell)
{
static boolean one_time_init = FALSE;
uint i, j;
+ uint timebase = get_timebase();
if (one_time_init) {
fprintf(stderr, "PPU: Multiple rendering contexts not yet supported "
@@ -123,24 +154,29 @@ cell_start_spus(struct cell_context *cell)
one_time_init = TRUE;
- assert(cell->num_spus <= MAX_SPUS);
-
- ASSERT_ALIGN16(&cell_global.command[0]);
- ASSERT_ALIGN16(&cell_global.command[1]);
+ assert(cell->num_spus <= CELL_MAX_SPUS);
ASSERT_ALIGN16(&cell_global.inits[0]);
ASSERT_ALIGN16(&cell_global.inits[1]);
+ /*
+ * Initialize the global 'inits' structure for each SPU.
+ * A pointer to the init struct will be passed to each SPU.
+ * The SPUs will then each grab their init info with mfc_get().
+ */
for (i = 0; i < cell->num_spus; i++) {
cell_global.inits[i].id = i;
cell_global.inits[i].num_spus = cell->num_spus;
cell_global.inits[i].debug_flags = cell->debug_flags;
- cell_global.inits[i].cmd = &cell_global.command[i];
+ cell_global.inits[i].inv_timebase = 1000.0f / timebase;
+
for (j = 0; j < CELL_NUM_BUFFERS; j++) {
cell_global.inits[i].buffers[j] = cell->buffer[j];
}
cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0];
+ cell_global.inits[i].spu_functions = &cell->spu_functions;
+
cell_global.spe_contexts[i] = spe_context_create(0, NULL);
if (!cell_global.spe_contexts[i]) {
fprintf(stderr, "spe_context_create() failed\n");
diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h
index 137f26612e..c93958a9ed 100644
--- a/src/gallium/drivers/cell/ppu/cell_spu.h
+++ b/src/gallium/drivers/cell/ppu/cell_spu.h
@@ -30,14 +30,12 @@
#include <libspe2.h>
-#include <libmisc.h>
+#include <pthread.h>
#include "cell/common.h"
#include "cell_context.h"
-#define MAX_SPUS 8
-
/**
* Global vars, for now anyway.
*/
@@ -46,14 +44,13 @@ struct cell_global_info
/**
* SPU/SPE handles, etc
*/
- spe_context_ptr_t spe_contexts[MAX_SPUS];
- pthread_t spe_threads[MAX_SPUS];
+ spe_context_ptr_t spe_contexts[CELL_MAX_SPUS];
+ pthread_t spe_threads[CELL_MAX_SPUS];
/**
- * Data sent to SPUs
+ * Data sent to SPUs at start-up
*/
- struct cell_init_info inits[MAX_SPUS];
- struct cell_command command[MAX_SPUS];
+ struct cell_init_info inits[CELL_MAX_SPUS];
};
diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h
index a7771a55a3..b193170f9c 100644
--- a/src/gallium/drivers/cell/ppu/cell_state.h
+++ b/src/gallium/drivers/cell/ppu/cell_state.h
@@ -44,8 +44,9 @@
#define CELL_NEW_TEXTURE 0x800
#define CELL_NEW_VERTEX 0x1000
#define CELL_NEW_VS 0x2000
-#define CELL_NEW_CONSTANTS 0x4000
-#define CELL_NEW_VERTEX_INFO 0x8000
+#define CELL_NEW_VS_CONSTANTS 0x4000
+#define CELL_NEW_FS_CONSTANTS 0x8000
+#define CELL_NEW_VERTEX_INFO 0x10000
extern void
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index 2da3097983..ff529fe22c 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -25,26 +25,163 @@
*
**************************************************************************/
+#include "pipe/p_inlines.h"
#include "util/u_memory.h"
#include "cell_context.h"
#include "cell_gen_fragment.h"
#include "cell_state.h"
#include "cell_state_emit.h"
-#include "cell_state_per_fragment.h"
#include "cell_batch.h"
#include "cell_texture.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
+/**
+ * Find/create a cell_command_fragment_ops object corresponding to the
+ * current blend/stencil/z/colormask/etc. state.
+ */
+static struct cell_command_fragment_ops *
+lookup_fragment_ops(struct cell_context *cell)
+{
+ struct cell_fragment_ops_key key;
+ struct cell_command_fragment_ops *ops;
+
+ /*
+ * Build key
+ */
+ memset(&key, 0, sizeof(key));
+ key.blend = *cell->blend;
+ key.blend_color = cell->blend_color;
+ key.dsa = *cell->depth_stencil;
+
+ if (cell->framebuffer.cbufs[0])
+ key.color_format = cell->framebuffer.cbufs[0]->format;
+ else
+ key.color_format = PIPE_FORMAT_NONE;
+
+ if (cell->framebuffer.zsbuf)
+ key.zs_format = cell->framebuffer.zsbuf->format;
+ else
+ key.zs_format = PIPE_FORMAT_NONE;
+
+ /*
+ * Look up key in cache.
+ */
+ ops = (struct cell_command_fragment_ops *)
+ util_keymap_lookup(cell->fragment_ops_cache, &key);
+
+ /*
+ * If not found, create/save new fragment ops command.
+ */
+ if (!ops) {
+ struct spe_function spe_code_front, spe_code_back;
+ unsigned int facing_dependent, total_code_size;
+
+ if (0)
+ debug_printf("**** Create New Fragment Ops\n");
+
+ /* Prepare the buffer that will hold the generated code. The
+ * "0" passed in for the size means that the SPE code will
+ * use a default size.
+ */
+ spe_init_func(&spe_code_front, 0);
+ spe_init_func(&spe_code_back, 0);
+
+ /* Generate new code. Always generate new code for both front-facing
+ * and back-facing fragments, even if it's the same code in both
+ * cases.
+ */
+ cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front);
+ cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back);
+
+ /* Make sure the code is a multiple of 8 bytes long; this is
+ * required to ensure that the dual pipe instruction alignment
+ * is correct. It's also important for the SPU unpacking,
+ * which assumes 8-byte boundaries.
+ */
+ unsigned int front_code_size = spe_code_size(&spe_code_front);
+ while (front_code_size % 8 != 0) {
+ spe_lnop(&spe_code_front);
+ front_code_size = spe_code_size(&spe_code_front);
+ }
+ unsigned int back_code_size = spe_code_size(&spe_code_back);
+ while (back_code_size % 8 != 0) {
+ spe_lnop(&spe_code_back);
+ back_code_size = spe_code_size(&spe_code_back);
+ }
+
+ /* Determine whether the code we generated is facing-dependent, by
+ * determining whether the generated code is different for the front-
+ * and back-facing fragments.
+ */
+ if (front_code_size == back_code_size && memcmp(spe_code_front.store, spe_code_back.store, front_code_size) == 0) {
+ /* Code is identical; only need one copy. */
+ facing_dependent = 0;
+ total_code_size = front_code_size;
+ }
+ else {
+ /* Code is different for front-facing and back-facing fragments.
+ * Need to send both copies.
+ */
+ facing_dependent = 1;
+ total_code_size = front_code_size + back_code_size;
+ }
+
+ /* alloc new fragment ops command. Note that this structure
+ * has variant length based on the total code size required.
+ */
+ ops = CALLOC_VARIANT_LENGTH_STRUCT(cell_command_fragment_ops, total_code_size);
+ /* populate the new cell_command_fragment_ops object */
+ ops->opcode[0] = CELL_CMD_STATE_FRAGMENT_OPS;
+ ops->total_code_size = total_code_size;
+ ops->front_code_index = 0;
+ memcpy(ops->code, spe_code_front.store, front_code_size);
+ if (facing_dependent) {
+ /* We have separate front- and back-facing code. Append the
+ * back-facing code to the buffer. Be careful because the code
+ * size is in bytes, but the buffer is of unsigned elements.
+ */
+ ops->back_code_index = front_code_size / sizeof(spe_code_front.store[0]);
+ memcpy(ops->code + ops->back_code_index, spe_code_back.store, back_code_size);
+ }
+ else {
+ /* Use the same code for front- and back-facing fragments */
+ ops->back_code_index = ops->front_code_index;
+ }
+
+ /* Set the fields for the fallback case. Note that these fields
+ * (and the whole fallback case) will eventually go away.
+ */
+ ops->dsa = *cell->depth_stencil;
+ ops->blend = *cell->blend;
+ ops->blend_color = cell->blend_color;
+
+ /* insert cell_command_fragment_ops object into keymap/cache */
+ util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL);
+
+ /* release rtasm buffer */
+ spe_release_func(&spe_code_front);
+ spe_release_func(&spe_code_back);
+ }
+ else {
+ if (0)
+ debug_printf("**** Re-use Fragment Ops\n");
+ }
+
+ return ops;
+}
+
+
+
static void
emit_state_cmd(struct cell_context *cell, uint cmd,
const void *state, uint state_size)
{
- uint64_t *dst = (uint64_t *)
- cell_batch_alloc(cell, ROUNDUP8(sizeof(uint64_t) + state_size));
+ uint32_t *dst = (uint32_t *)
+ cell_batch_alloc16(cell, ROUNDUP16(sizeof(opcode_t) + state_size));
*dst = cmd;
- memcpy(dst + 1, state, state_size);
+ memcpy(dst + 4, state, state_size);
}
@@ -58,9 +195,10 @@ cell_emit_state(struct cell_context *cell)
if (cell->dirty & CELL_NEW_FRAMEBUFFER) {
struct pipe_surface *cbuf = cell->framebuffer.cbufs[0];
struct pipe_surface *zbuf = cell->framebuffer.zsbuf;
+ STATIC_ASSERT(sizeof(struct cell_command_framebuffer) % 16 == 0);
struct cell_command_framebuffer *fb
- = cell_batch_alloc(cell, sizeof(*fb));
- fb->opcode = CELL_CMD_STATE_FRAMEBUFFER;
+ = cell_batch_alloc16(cell, sizeof(*fb));
+ fb->opcode[0] = CELL_CMD_STATE_FRAMEBUFFER;
fb->color_start = cell->cbuf_map[0];
fb->color_format = cbuf->format;
fb->depth_start = cell->zsbuf_map;
@@ -73,11 +211,20 @@ cell_emit_state(struct cell_context *cell)
#endif
}
+ if (cell->dirty & (CELL_NEW_RASTERIZER)) {
+ STATIC_ASSERT(sizeof(struct cell_command_rasterizer) % 16 == 0);
+ struct cell_command_rasterizer *rast =
+ cell_batch_alloc16(cell, sizeof(*rast));
+ rast->opcode[0] = CELL_CMD_STATE_RASTERIZER;
+ rast->rasterizer = *cell->rasterizer;
+ }
+
if (cell->dirty & (CELL_NEW_FS)) {
/* Send new fragment program to SPUs */
+ STATIC_ASSERT(sizeof(struct cell_command_fragment_program) % 16 == 0);
struct cell_command_fragment_program *fp
- = cell_batch_alloc(cell, sizeof(*fp));
- fp->opcode = CELL_CMD_STATE_FRAGMENT_PROGRAM;
+ = cell_batch_alloc16(cell, sizeof(*fp));
+ fp->opcode[0] = CELL_CMD_STATE_FRAGMENT_PROGRAM;
fp->num_inst = cell->fs->code.num_inst;
memcpy(&fp->code, cell->fs->code.store,
SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
@@ -90,59 +237,83 @@ cell_emit_state(struct cell_context *cell)
}
}
+ if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) {
+ const uint shader = PIPE_SHADER_FRAGMENT;
+ const uint num_const = cell->constants[shader].buffer->size / sizeof(float);
+ uint i, j;
+ float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float)));
+ uint32_t *ibuf = (uint32_t *) buf;
+ const float *constants = pipe_buffer_map(cell->pipe.screen,
+ cell->constants[shader].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS;
+ ibuf[4] = num_const;
+ j = 8;
+ for (i = 0; i < num_const; i++) {
+ buf[j++] = constants[i];
+ }
+ pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader].buffer);
+ }
+
if (cell->dirty & (CELL_NEW_FRAMEBUFFER |
CELL_NEW_DEPTH_STENCIL |
CELL_NEW_BLEND)) {
- /* XXX we don't want to always do codegen here. We should have
- * a hash/lookup table to cache previous results...
- */
- struct cell_command_fragment_ops *fops
- = cell_batch_alloc(cell, sizeof(*fops));
- struct spe_function spe_code;
-
- /* generate new code */
- cell_gen_fragment_function(cell, &spe_code);
- /* put the new code into the batch buffer */
- fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS;
- memcpy(&fops->code, spe_code.store,
- SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
- fops->dsa = cell->depth_stencil->base;
- fops->blend = cell->blend->base;
- /* free codegen buffer */
- spe_release_func(&spe_code);
+ struct cell_command_fragment_ops *fops, *fops_cmd;
+ /* Note that cell_command_fragment_ops is a variant-sized record */
+ fops = lookup_fragment_ops(cell);
+ fops_cmd = cell_batch_alloc16(cell, ROUNDUP16(sizeof(*fops_cmd) + fops->total_code_size));
+ memcpy(fops_cmd, fops, sizeof(*fops) + fops->total_code_size);
}
if (cell->dirty & CELL_NEW_SAMPLER) {
uint i;
for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
- if (cell->sampler[i]) {
- struct cell_command_sampler *sampler
- = cell_batch_alloc(cell, sizeof(*sampler));
- sampler->opcode = CELL_CMD_STATE_SAMPLER;
- sampler->unit = i;
- sampler->state = *cell->sampler[i];
+ if (cell->dirty_samplers & (1 << i)) {
+ if (cell->sampler[i]) {
+ STATIC_ASSERT(sizeof(struct cell_command_sampler) % 16 == 0);
+ struct cell_command_sampler *sampler
+ = cell_batch_alloc16(cell, sizeof(*sampler));
+ sampler->opcode[0] = CELL_CMD_STATE_SAMPLER;
+ sampler->unit = i;
+ sampler->state = *cell->sampler[i];
+ }
}
}
+ cell->dirty_samplers = 0x0;
}
if (cell->dirty & CELL_NEW_TEXTURE) {
uint i;
for (i = 0;i < CELL_MAX_SAMPLERS; i++) {
- struct cell_command_texture *texture
- = cell_batch_alloc(cell, sizeof(*texture));
- texture->opcode = CELL_CMD_STATE_TEXTURE;
- texture->unit = i;
- if (cell->texture[i]) {
- texture->start = cell->texture[i]->tiled_data;
- texture->width = cell->texture[i]->base.width[0];
- texture->height = cell->texture[i]->base.height[0];
- }
- else {
- texture->start = NULL;
- texture->width = 1;
- texture->height = 1;
+ if (cell->dirty_textures & (1 << i)) {
+ STATIC_ASSERT(sizeof(struct cell_command_texture) % 16 == 0);
+ struct cell_command_texture *texture
+ = (struct cell_command_texture *)cell_batch_alloc16(cell, sizeof(*texture));
+ texture->opcode[0] = CELL_CMD_STATE_TEXTURE;
+ texture->unit = i;
+ if (cell->texture[i]) {
+ uint level;
+ for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
+ texture->start[level] = cell->texture[i]->tiled_mapped[level];
+ texture->width[level] = cell->texture[i]->base.width[level];
+ texture->height[level] = cell->texture[i]->base.height[level];
+ texture->depth[level] = cell->texture[i]->base.depth[level];
+ }
+ texture->target = cell->texture[i]->base.target;
+ }
+ else {
+ uint level;
+ for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
+ texture->start[level] = NULL;
+ texture->width[level] = 0;
+ texture->height[level] = 0;
+ texture->depth[level] = 0;
+ }
+ texture->target = 0;
+ }
}
}
+ cell->dirty_textures = 0x0;
}
if (cell->dirty & CELL_NEW_VERTEX_INFO) {
diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
index 78cb446c14..d97c22b2ef 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
@@ -297,7 +297,7 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
int face_stencil = spe_allocate_available_register(f);
int stencil_src = stencil;
const unsigned ref = (dsa->stencil[face].ref_value
- & dsa->stencil[face].value_mask);
+ & dsa->stencil[face].valuemask);
boolean complement = FALSE;
int stored;
int tmp = spe_allocate_available_register(f);
@@ -305,9 +305,9 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
&& (dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
- && (dsa->stencil[face].value_mask != 0x0ff)) {
+ && (dsa->stencil[face].valuemask != 0x0ff)) {
stored = spe_allocate_available_register(f);
- spe_andi(f, stored, stencil, dsa->stencil[face].value_mask);
+ spe_andi(f, stored, stencil, dsa->stencil[face].valuemask);
} else {
stored = stencil;
}
@@ -395,7 +395,7 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
* - For depth-pass if the stencil test is NEVER
* - Any of the 3 conditions if the operation is KEEP
*/
- if (dsa->stencil[face].write_mask != 0) {
+ if (dsa->stencil[face].writemask != 0) {
if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
&& (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) {
if (complement) {
@@ -449,10 +449,10 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
*/
if (stencil_src == stencil) {
spe_release_register(f, face_stencil);
- } else if (dsa->stencil[face].write_mask != 0x0ff) {
+ } else if (dsa->stencil[face].writemask != 0x0ff) {
int tmp = spe_allocate_available_register(f);
- spe_il(f, tmp, dsa->stencil[face].write_mask);
+ spe_il(f, tmp, dsa->stencil[face].writemask);
spe_selb(f, stencil_src, stencil, stencil_src, tmp);
spe_release_register(f, tmp);
@@ -580,8 +580,8 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
dsa->stencil[i].zpass_op);
printf("# ref value / value mask / write mask: %02x %02x %02x\n",
dsa->stencil[i].ref_value,
- dsa->stencil[i].value_mask,
- dsa->stencil[i].write_mask);
+ dsa->stencil[i].valuemask,
+ dsa->stencil[i].writemask);
}
printf("\t.text\n");
diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c
index 3a0d066da2..bf517ea563 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_shader.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c
@@ -28,7 +28,7 @@
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "draw/draw_context.h"
#include "tgsi/tgsi_parse.h"
@@ -186,18 +186,21 @@ cell_set_constant_buffer(struct pipe_context *pipe,
const struct pipe_constant_buffer *buf)
{
struct cell_context *cell = cell_context(pipe);
- struct pipe_winsys *ws = pipe->winsys;
assert(shader < PIPE_SHADER_TYPES);
assert(index == 0);
- /* note: reference counting */
- winsys_buffer_reference(ws,
- &cell->constants[shader].buffer,
- buf->buffer);
- cell->constants[shader].size = buf->size;
+ draw_flush(cell->draw);
- cell->dirty |= CELL_NEW_CONSTANTS;
+ /* note: reference counting */
+ pipe_buffer_reference(pipe->screen,
+ &cell->constants[shader].buffer,
+ buf->buffer);
+
+ if (shader == PIPE_SHADER_VERTEX)
+ cell->dirty |= CELL_NEW_VS_CONSTANTS;
+ else if (shader == PIPE_SHADER_FRAGMENT)
+ cell->dirty |= CELL_NEW_FS_CONSTANTS;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c
index 732c64082e..c9203fee08 100644
--- a/src/gallium/drivers/cell/ppu/cell_surface.c
+++ b/src/gallium/drivers/cell/ppu/cell_surface.c
@@ -27,6 +27,7 @@
#include "util/u_rect.h"
#include "cell_context.h"
+#include "cell_surface.h"
void
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
index b6590dfb86..fa52e2cbea 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.c
+++ b/src/gallium/drivers/cell/ppu/cell_texture.c
@@ -28,12 +28,13 @@
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
* Michel Dänzer <michel@tungstengraphics.com>
+ * Brian Paul
*/
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -42,30 +43,31 @@
#include "cell_texture.h"
-/* Simple, maximally packed layout.
- */
-static unsigned minify( unsigned d )
+static unsigned
+minify(unsigned d)
{
return MAX2(1, d>>1);
}
static void
-cell_texture_layout(struct cell_texture * spt)
+cell_texture_layout(struct cell_texture *ct)
{
- struct pipe_texture *pt = &spt->base;
+ struct pipe_texture *pt = &ct->base;
unsigned level;
unsigned width = pt->width[0];
unsigned height = pt->height[0];
unsigned depth = pt->depth[0];
- spt->buffer_size = 0;
+ ct->buffer_size = 0;
for ( level = 0 ; level <= pt->last_level ; level++ ) {
unsigned size;
unsigned w_tile, h_tile;
+ assert(level < CELL_MAX_TEXTURE_LEVELS);
+
/* width, height, rounded up to tile size */
w_tile = align(width, TILE_SIZE);
h_tile = align(height, TILE_SIZE);
@@ -76,9 +78,9 @@ cell_texture_layout(struct cell_texture * spt)
pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile);
pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile);
- spt->stride[level] = pt->nblocksx[level] * pt->block.size;
+ ct->stride[level] = pt->nblocksx[level] * pt->block.size;
- spt->level_offset[level] = spt->buffer_size;
+ ct->level_offset[level] = ct->buffer_size;
size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size;
if (pt->target == PIPE_TEXTURE_CUBE)
@@ -86,7 +88,7 @@ cell_texture_layout(struct cell_texture * spt)
else
size *= depth;
- spt->buffer_size += size;
+ ct->buffer_size += size;
width = minify(width);
height = minify(height);
@@ -100,26 +102,25 @@ cell_texture_create(struct pipe_screen *screen,
const struct pipe_texture *templat)
{
struct pipe_winsys *ws = screen->winsys;
- struct cell_texture *spt = CALLOC_STRUCT(cell_texture);
- if (!spt)
+ struct cell_texture *ct = CALLOC_STRUCT(cell_texture);
+ if (!ct)
return NULL;
- spt->base = *templat;
- spt->base.refcount = 1;
- spt->base.screen = screen;
+ ct->base = *templat;
+ ct->base.refcount = 1;
+ ct->base.screen = screen;
- cell_texture_layout(spt);
+ cell_texture_layout(ct);
- spt->buffer = ws->buffer_create(ws, 32,
- PIPE_BUFFER_USAGE_PIXEL,
- spt->buffer_size);
+ ct->buffer = ws->buffer_create(ws, 32, PIPE_BUFFER_USAGE_PIXEL,
+ ct->buffer_size);
- if (!spt->buffer) {
- FREE(spt);
+ if (!ct->buffer) {
+ FREE(ct);
return NULL;
}
- return &spt->base;
+ return &ct->base;
}
@@ -135,244 +136,514 @@ cell_texture_release(struct pipe_screen *screen,
__FUNCTION__, (void *) *pt, (*pt)->refcount - 1);
*/
if (--(*pt)->refcount <= 0) {
- struct cell_texture *spt = cell_texture(*pt);
+ /* Delete this texture now.
+ * But note that the underlying pipe_buffer may linger...
+ */
+ struct cell_texture *ct = cell_texture(*pt);
+ uint i;
/*
- DBG("%s deleting %p\n", __FUNCTION__, (void *) spt);
+ DBG("%s deleting %p\n", __FUNCTION__, (void *) ct);
*/
- pipe_buffer_reference(screen, &spt->buffer, NULL);
+ pipe_buffer_reference(screen, &ct->buffer, NULL);
+
+ for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
+ /* Unreference the tiled image buffer.
+ * It may not actually be deleted until a fence is hit.
+ */
+ if (ct->tiled_buffer[i]) {
+ ct->tiled_mapped[i] = NULL;
+ pipe_buffer_reference(screen, &ct->tiled_buffer[i], NULL);
+ }
+ }
- FREE(spt);
+ FREE(ct);
}
*pt = NULL;
}
-#if 0
+
+/**
+ * Convert image from linear layout to tiled layout. 4-byte pixels.
+ */
static void
-cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture,
- uint face, uint levelsMask)
+twiddle_image_uint(uint w, uint h, uint tile_size, uint *dst,
+ uint src_stride, const uint *src)
{
- /* XXX TO DO: re-tile the texture data ... */
+ const uint tile_size2 = tile_size * tile_size;
+ const uint h_t = (h + tile_size - 1) / tile_size;
+ const uint w_t = (w + tile_size - 1) / tile_size;
-}
-#endif
+ uint it, jt; /* tile counters */
+ uint i, j; /* intra-tile counters */
+ src_stride /= 4; /* convert from bytes to pixels */
-static struct pipe_surface *
-cell_get_tex_surface(struct pipe_screen *screen,
- struct pipe_texture *pt,
- unsigned face, unsigned level, unsigned zslice,
- unsigned usage)
-{
- struct pipe_winsys *ws = screen->winsys;
- struct cell_texture *spt = cell_texture(pt);
- struct pipe_surface *ps;
+ /* loop over dest tiles */
+ for (it = 0; it < h_t; it++) {
+ for (jt = 0; jt < w_t; jt++) {
+ /* start of dest tile: */
+ uint *tdst = dst + (it * w_t + jt) * tile_size2;
- ps = ws->surface_alloc(ws);
- if (ps) {
- assert(ps->refcount);
- assert(ps->winsys);
- winsys_buffer_reference(ws, &ps->buffer, spt->buffer);
- ps->format = pt->format;
- ps->block = pt->block;
- ps->width = pt->width[level];
- ps->height = pt->height[level];
- ps->nblocksx = pt->nblocksx[level];
- ps->nblocksy = pt->nblocksy[level];
- ps->stride = spt->stride[level];
- ps->offset = spt->level_offset[level];
- ps->usage = usage;
+ /* compute size of this tile (may be smaller than tile_size) */
+ /* XXX note: a compiler bug was found here. That's why the code
+ * looks as it does.
+ */
+ uint tile_width = w - jt * tile_size;
+ tile_width = MIN2(tile_width, tile_size);
+ uint tile_height = h - it * tile_size;
+ tile_height = MIN2(tile_height, tile_size);
- /* XXX may need to override usage flags (see sp_texture.c) */
+ /* loop over texels in the tile */
+ for (i = 0; i < tile_height; i++) {
+ for (j = 0; j < tile_width; j++) {
+ const uint srci = it * tile_size + i;
+ const uint srcj = jt * tile_size + j;
+ ASSERT(srci < h);
+ ASSERT(srcj < w);
+ tdst[i * tile_size + j] = src[srci * src_stride + srcj];
+ }
+ }
+ }
+ }
+}
- pipe_texture_reference(&ps->texture, pt);
- ps->face = face;
- ps->level = level;
- ps->zslice = zslice;
- if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) {
- ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) *
- ps->nblocksy *
- ps->stride;
- }
- else {
- assert(face == 0);
- assert(zslice == 0);
+/**
+ * For Cell. Basically, rearrange the pixels/quads from this layout:
+ * +--+--+--+--+
+ * |p0|p1|p2|p3|....
+ * +--+--+--+--+
+ *
+ * to this layout:
+ * +--+--+
+ * |p0|p1|....
+ * +--+--+
+ * |p2|p3|
+ * +--+--+
+ */
+static void
+twiddle_tile(const uint *tileIn, uint *tileOut)
+{
+ int y, x;
+
+ for (y = 0; y < TILE_SIZE; y+=2) {
+ for (x = 0; x < TILE_SIZE; x+=2) {
+ int k = 4 * (y/2 * TILE_SIZE/2 + x/2);
+ tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k];
+ tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1];
+ tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2];
+ tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3];
}
}
- return ps;
}
-
/**
- * Copy tile data from linear layout to tiled layout.
- * XXX this should be rolled into the future surface-creation code.
- * XXX also need "untile" code...
+ * Convert image from tiled layout to linear layout. 4-byte pixels.
*/
static void
-tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src)
+untwiddle_image_uint(uint w, uint h, uint tile_size, uint *dst,
+ uint dst_stride, const uint *src)
{
const uint tile_size2 = tile_size * tile_size;
- const uint h_t = h / tile_size, w_t = w / tile_size;
-
+ const uint h_t = (h + tile_size - 1) / tile_size;
+ const uint w_t = (w + tile_size - 1) / tile_size;
+ uint *tile_buf;
uint it, jt; /* tile counters */
uint i, j; /* intra-tile counters */
- /* loop over dest tiles */
+ dst_stride /= 4; /* convert from bytes to pixels */
+
+ tile_buf = align_malloc(tile_size * tile_size * 4, 16);
+
+ /* loop over src tiles */
for (it = 0; it < h_t; it++) {
for (jt = 0; jt < w_t; jt++) {
- /* start of dest tile: */
- uint *tdst = dst + (it * w_t + jt) * tile_size2;
+ /* start of src tile: */
+ const uint *tsrc = src + (it * w_t + jt) * tile_size2;
+
+ twiddle_tile(tsrc, tile_buf);
+ tsrc = tile_buf;
+
+ /* compute size of this tile (may be smaller than tile_size) */
+ /* XXX note: a compiler bug was found here. That's why the code
+ * looks as it does.
+ */
+ uint tile_width = w - jt * tile_size;
+ tile_width = MIN2(tile_width, tile_size);
+ uint tile_height = h - it * tile_size;
+ tile_height = MIN2(tile_height, tile_size);
+
/* loop over texels in the tile */
- for (i = 0; i < tile_size; i++) {
- for (j = 0; j < tile_size; j++) {
- const uint srci = it * tile_size + i;
- const uint srcj = jt * tile_size + j;
- *tdst++ = src[srci * w + srcj];
+ for (i = 0; i < tile_height; i++) {
+ for (j = 0; j < tile_width; j++) {
+ uint dsti = it * tile_size + i;
+ uint dstj = jt * tile_size + j;
+ ASSERT(dsti < h);
+ ASSERT(dstj < w);
+ dst[dsti * dst_stride + dstj] = tsrc[i * tile_size + j];
}
}
}
}
-}
+ align_free(tile_buf);
+}
/**
* Convert linear texture image data to tiled format for SPU usage.
- * XXX recast this in terms of pipe_surfaces (aka texture views).
*/
static void
-cell_tile_texture(struct cell_context *cell,
- struct cell_texture *texture)
+cell_twiddle_texture(struct pipe_screen *screen,
+ struct pipe_surface *surface)
{
- struct pipe_screen *screen = cell->pipe.screen;
- uint face = 0, level = 0, zslice = 0;
- struct pipe_surface *surf;
- const uint w = texture->base.width[0], h = texture->base.height[0];
- const uint *src;
-
- /* temporary restrictions: */
- assert(w >= TILE_SIZE);
- assert(h >= TILE_SIZE);
- assert(w % TILE_SIZE == 0);
- assert(h % TILE_SIZE == 0);
-
- surf = screen->get_tex_surface(screen, &texture->base, face, level, zslice,
- PIPE_BUFFER_USAGE_CPU_WRITE);
- ASSERT(surf);
-
- src = (const uint *) pipe_surface_map(surf, PIPE_BUFFER_USAGE_CPU_WRITE);
-
- if (texture->tiled_data) {
- align_free(texture->tiled_data);
+#if 0 // XXX fix me
+ struct cell_texture *ct = cell_texture(surface->texture);
+ const uint level = surface->level;
+ const uint texWidth = ct->base.width[level];
+ const uint texHeight = ct->base.height[level];
+ const uint bufWidth = align(texWidth, TILE_SIZE);
+ const uint bufHeight = align(texHeight, TILE_SIZE);
+ const void *map = screen->surface_map(screen, surface, PIPE_BUFFER_USAGE_CPU_READ);
+ const uint *src = (const uint *) map;
+
+ switch (ct->base.format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ {
+ int numFaces = ct->base.target == PIPE_TEXTURE_CUBE ? 6 : 1;
+ int offset = bufWidth * bufHeight * 4 * surface->face;
+ uint *dst;
+
+ if (!ct->tiled_buffer[level]) {
+ /* allocate buffer for tiled data now */
+ struct pipe_winsys *ws = screen->winsys;
+ uint bytes = bufWidth * bufHeight * 4 * numFaces;
+ ct->tiled_buffer[level] =
+ ws->buffer_create(ws, 16, PIPE_BUFFER_USAGE_PIXEL, bytes);
+ /* and map it */
+ ct->tiled_mapped[level] =
+ ws->buffer_map(ws, ct->tiled_buffer[level],
+ PIPE_BUFFER_USAGE_GPU_READ);
+ }
+ dst = (uint *) ((ubyte *) ct->tiled_mapped[level] + offset);
+
+ twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst,
+ surface->stride, src);
+ }
+ break;
+ default:
+ printf("Cell: twiddle unsupported texture format %s\n",
+ pf_name(ct->base.format));
}
- texture->tiled_data = align_malloc(w * h * 4, 16);
- tile_copy_data(w, h, TILE_SIZE, texture->tiled_data, src);
+ screen->surface_unmap(screen, surface);
+#endif
+}
+
- pipe_surface_unmap(surf);
+/**
+ * Convert SPU tiled texture image data to linear format for app usage.
+ */
+static void
+cell_untwiddle_texture(struct pipe_screen *screen,
+ struct pipe_surface *surface)
+{
+#if 0 // XXX fix me
+ struct cell_texture *ct = cell_texture(surface->texture);
+ const uint level = surface->level;
+ const uint texWidth = ct->base.width[level];
+ const uint texHeight = ct->base.height[level];
+ const void *map = screen->surface_map(screen, surface, PIPE_BUFFER_USAGE_CPU_READ);
+ const uint *src = (const uint *) ((const ubyte *) map + surface->offset);
+
+ switch (ct->base.format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ {
+ int numFaces = ct->base.target == PIPE_TEXTURE_CUBE ? 6 : 1;
+ int offset = surface->stride * texHeight * 4 * surface->face;
+ uint *dst;
+
+ if (!ct->untiled_data[level]) {
+ ct->untiled_data[level] =
+ align_malloc(surface->stride * texHeight * 4 * numFaces, 16);
+ }
- pipe_surface_reference(&surf, NULL);
+ dst = (uint *) ((ubyte *) ct->untiled_data[level] + offset);
+
+ untwiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst,
+ surface->stride, src);
+ }
+ break;
+ default:
+ {
+ ct->untiled_data[level] = NULL;
+ printf("Cell: untwiddle unsupported texture format %s\n",
+ pf_name(ct->base.format));
+ }
+ }
+
+ screen->surface_unmap(screen, surface);
+#endif
}
-void
-cell_update_texture_mapping(struct cell_context *cell)
+static struct pipe_surface *
+cell_get_tex_surface(struct pipe_screen *screen,
+ struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned usage)
{
-#if 0
- uint face = 0, level = 0, zslice = 0;
+ struct cell_texture *ct = cell_texture(pt);
+ struct pipe_surface *ps;
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (ps) {
+ ps->refcount = 1;
+ pipe_texture_reference(&ps->texture, pt);
+ ps->format = pt->format;
+ //ps->block = pt->block;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ //ps->nblocksx = pt->nblocksx[level];
+ //ps->nblocksy = pt->nblocksy[level];
+ //ps->stride = ct->stride[level];
+ ps->offset = ct->level_offset[level];
+ ps->usage = usage;
+
+ /* XXX may need to override usage flags (see sp_texture.c) */
+
+ pipe_texture_reference(&ps->texture, pt);
+ ps->face = face;
+ ps->level = level;
+ ps->zslice = zslice;
+
+ if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) {
+#if 0 // XXX fix me
+ ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) *
+ ps->nblocksy *
+ ps->stride;
#endif
- uint i;
+ }
+ else {
+ assert(face == 0);
+ assert(zslice == 0);
+ }
- for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
- if (cell->texture[i])
- cell_tile_texture(cell, cell->texture[i]);
+ if (ps->usage & PIPE_BUFFER_USAGE_CPU_READ) {
+ /* convert from tiled to linear layout */
+ cell_untwiddle_texture(screen, ps);
+ }
}
+ return ps;
+}
-#if 0
- if (cell->tex_surf && cell->tex_map) {
- pipe_surface_unmap(cell->tex_surf);
- cell->tex_map = NULL;
+
+static void
+cell_tex_surface_release(struct pipe_screen *screen,
+ struct pipe_surface **s)
+{
+ struct cell_texture *ct = cell_texture((*s)->texture);
+ const uint level = (*s)->level;
+ struct pipe_surface *surf = *s;
+
+ if ((surf->usage & PIPE_BUFFER_USAGE_CPU_READ) && (ct->untiled_data[level]))
+ {
+ align_free(ct->untiled_data[level]);
+ ct->untiled_data[level] = NULL;
}
- /* XXX free old surface */
+ if ((ct->base.tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) &&
+ (surf->usage & PIPE_BUFFER_USAGE_CPU_WRITE)) {
+ /* convert from linear to tiled layout */
+ cell_twiddle_texture(screen, surf);
+ }
- cell->tex_surf = cell_get_tex_surface(&cell->pipe,
- &cell->texture[0]->base,
- face, level, zslice);
+ /* XXX if done rendering to teximage, re-tile */
- cell->tex_map = pipe_surface_map(cell->tex_surf);
-#endif
+ if (--surf->refcount == 0) {
+ pipe_texture_reference(&surf->texture, NULL);
+ FREE(surf);
+ }
+ *s = NULL;
+}
+
+
+static struct pipe_transfer *
+cell_get_tex_transfer(struct pipe_screen *screen,
+ struct pipe_texture *texture,
+ unsigned face, unsigned level, unsigned zslice,
+ enum pipe_transfer_usage usage,
+ unsigned x, unsigned y, unsigned w, unsigned h)
+{
+ struct cell_texture *ct = cell_texture(texture);
+ struct cell_transfer *ctrans;
+ struct pipe_transfer *pt;
+
+ assert(texture);
+ assert(level <= texture->last_level);
+
+ ctrans = CALLOC_STRUCT(cell_transfer);
+ pt = &ctrans->base;
+ if (ctrans) {
+ pt->refcount = 1;
+ pipe_texture_reference(&pt->texture, texture);
+ pt->format = texture->format;
+ pt->block = texture->block;
+ pt->x = x;
+ pt->y = y;
+ pt->width = w;
+ pt->height = h;
+ pt->nblocksx = texture->nblocksx[level];
+ pt->nblocksy = texture->nblocksy[level];
+ pt->stride = ct->stride[level];
+ ctrans->offset = ct->level_offset[level];
+ pt->usage = usage;
+ pt->face = face;
+ pt->level = level;
+ pt->zslice = zslice;
+
+ if (texture->target == PIPE_TEXTURE_CUBE ||
+ texture->target == PIPE_TEXTURE_3D) {
+ ctrans->offset += ((texture->target == PIPE_TEXTURE_CUBE) ? face :
+ zslice) * pt->nblocksy * pt->stride;
+ }
+ else {
+ assert(face == 0);
+ assert(zslice == 0);
+ }
+ }
+ return pt;
}
static void
-cell_tex_surface_release(struct pipe_screen *screen,
- struct pipe_surface **s)
+cell_tex_transfer_release(struct pipe_screen *screen,
+ struct pipe_transfer **t)
{
+ struct cell_transfer *transfer = cell_transfer(*t);
/* Effectively do the texture_update work here - if texture images
* needed post-processing to put them into hardware layout, this is
- * where it would happen. For softpipe, nothing to do.
+ * where it would happen. For cell, nothing to do.
*/
- assert ((*s)->texture);
- pipe_texture_reference(&(*s)->texture, NULL);
-
- screen->winsys->surface_release(screen->winsys, s);
+ assert (transfer->base.texture);
+ if (--transfer->base.refcount == 0) {
+ pipe_texture_reference(&transfer->base.texture, NULL);
+ FREE(transfer);
+ }
+ *t = NULL;
}
static void *
-cell_surface_map( struct pipe_screen *screen,
- struct pipe_surface *surface,
- unsigned flags )
+cell_transfer_map( struct pipe_screen *screen,
+ struct pipe_transfer *transfer )
{
ubyte *map;
+ struct cell_texture *spt;
+ unsigned flags = 0;
- if (flags & ~surface->usage) {
- assert(0);
- return NULL;
+ assert(transfer->texture);
+ spt = cell_texture(transfer->texture);
+
+ if (transfer->usage != PIPE_TRANSFER_READ) {
+ flags |= PIPE_BUFFER_USAGE_CPU_WRITE;
}
- map = pipe_buffer_map( screen, surface->buffer, flags );
+ if (transfer->usage != PIPE_TRANSFER_WRITE) {
+ flags |= PIPE_BUFFER_USAGE_CPU_READ;
+ }
+
+ map = pipe_buffer_map(screen, spt->buffer, flags);
if (map == NULL)
return NULL;
/* May want to different things here depending on read/write nature
* of the map:
*/
- if (surface->texture &&
- (flags & PIPE_BUFFER_USAGE_CPU_WRITE))
+ if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ)
{
/* Do something to notify sharing contexts of a texture change.
- * In softpipe, that would mean flushing the texture cache.
+ * In cell, that would mean flushing the texture cache.
*/
-#if 0
+#if 00
cell_screen(screen)->timestamp++;
#endif
}
- return map + surface->offset;
+ return map + cell_transfer(transfer)->offset +
+ transfer->y / transfer->block.height * transfer->stride +
+ transfer->x / transfer->block.width * transfer->block.size;
}
static void
-cell_surface_unmap(struct pipe_screen *screen,
- struct pipe_surface *surface)
+cell_transfer_unmap(struct pipe_screen *screen,
+ struct pipe_transfer *transfer)
{
- pipe_buffer_unmap( screen, surface->buffer );
+ struct cell_texture *spt;
+
+ assert(transfer->texture);
+ spt = cell_texture(transfer->texture);
+
+ pipe_buffer_unmap( screen, spt->buffer );
}
-void
-cell_init_texture_functions(struct cell_context *cell)
+static void *
+cell_surface_map(struct pipe_screen *screen,
+ struct pipe_surface *surface,
+ unsigned flags)
{
- /*cell->pipe.texture_update = cell_texture_update;*/
+ ubyte *map;
+ struct cell_texture *ct = cell_texture(surface->texture);
+ const uint level = surface->level;
+
+ assert(ct);
+
+#if 0
+ if (flags & ~surface->usage) {
+ assert(0);
+ return NULL;
+ }
+#endif
+
+ map = pipe_buffer_map( screen, ct->buffer, flags );
+ if (map == NULL) {
+ return NULL;
+ }
+ else {
+ if ((surface->usage & PIPE_BUFFER_USAGE_CPU_READ) &&
+ (ct->untiled_data[level])) {
+ return (void *) ((ubyte *) ct->untiled_data[level] + surface->offset);
+ }
+ else {
+ return (void *) (map + surface->offset);
+ }
+ }
}
+static void
+cell_surface_unmap(struct pipe_screen *screen,
+ struct pipe_surface *surface)
+{
+ struct cell_texture *ct = cell_texture(surface->texture);
+
+ assert(ct);
+
+ pipe_buffer_unmap( screen, ct->buffer );
+}
+
+
+
void
cell_init_screen_texture_funcs(struct pipe_screen *screen)
{
@@ -382,6 +653,8 @@ cell_init_screen_texture_funcs(struct pipe_screen *screen)
screen->get_tex_surface = cell_get_tex_surface;
screen->tex_surface_release = cell_tex_surface_release;
- screen->surface_map = cell_surface_map;
- screen->surface_unmap = cell_surface_unmap;
+ screen->get_tex_transfer = cell_get_tex_transfer;
+ screen->tex_transfer_release = cell_tex_transfer_release;
+ screen->transfer_map = cell_transfer_map;
+ screen->transfer_unmap = cell_transfer_unmap;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h
index 6d37e95ebc..fc6486adbe 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.h
+++ b/src/gallium/drivers/cell/ppu/cell_texture.h
@@ -40,15 +40,31 @@ struct cell_texture
{
struct pipe_texture base;
- unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS];
- unsigned long stride[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned long level_offset[CELL_MAX_TEXTURE_LEVELS];
+ unsigned long stride[CELL_MAX_TEXTURE_LEVELS];
/* The data is held here:
*/
struct pipe_buffer *buffer;
unsigned long buffer_size;
- void *tiled_data; /* XXX this may be temporary */ /*ALIGN16*/
+ /** Texture data in tiled layout is held here */
+ struct pipe_buffer *tiled_buffer[CELL_MAX_TEXTURE_LEVELS];
+ /** Mapped, tiled texture data */
+ void *tiled_mapped[CELL_MAX_TEXTURE_LEVELS];
+
+ struct pipe_transfer *transfer;
+
+ /** The original, linear texture data */
+ void *untiled_data[CELL_MAX_TEXTURE_LEVELS];
+};
+
+
+struct cell_transfer
+{
+ struct pipe_transfer base;
+
+ unsigned long offset;
};
@@ -60,13 +76,12 @@ cell_texture(struct pipe_texture *pt)
}
-
-extern void
-cell_update_texture_mapping(struct cell_context *cell);
-
-
-extern void
-cell_init_texture_functions(struct cell_context *cell);
+/** cast wrapper */
+static INLINE struct cell_transfer *
+cell_transfer(struct pipe_transfer *pt)
+{
+ return (struct cell_transfer *) pt;
+}
extern void
diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c
index aa63435b93..cfaffb52a8 100644
--- a/src/gallium/drivers/cell/ppu/cell_vbuf.c
+++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c
@@ -38,6 +38,7 @@
#include "cell_batch.h"
#include "cell_context.h"
+#include "cell_fence.h"
#include "cell_flush.h"
#include "cell_spu.h"
#include "cell_vbuf.h"
@@ -61,6 +62,7 @@ struct cell_vbuf_render
uint vertex_size; /**< in bytes */
void *vertex_buffer; /**< just for debug, really */
uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */
+ uint vertex_buffer_size; /**< size in bytes */
};
@@ -81,24 +83,26 @@ cell_vbuf_get_vertex_info(struct vbuf_render *vbr)
}
-static void *
+static boolean
cell_vbuf_allocate_vertices(struct vbuf_render *vbr,
ushort vertex_size, ushort nr_vertices)
{
struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
+ unsigned size = vertex_size * nr_vertices;
/*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/
assert(cvbr->vertex_buf == ~0);
cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell);
cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf];
+ cvbr->vertex_buffer_size = size;
cvbr->vertex_size = vertex_size;
- return cvbr->vertex_buffer;
+
+ return cvbr->vertex_buffer != NULL;
}
static void
-cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices,
- unsigned vertex_size, unsigned vertices_used)
+cell_vbuf_release_vertices(struct vbuf_render *vbr)
{
struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
struct cell_context *cell = cvbr->cell;
@@ -108,23 +112,47 @@ cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices,
__FUNCTION__, cvbr->vertex_buf, vertices_used);
*/
+ /* Make sure texture buffers aren't released until we're done rendering
+ * with them.
+ */
+ cell_add_fenced_textures(cell);
+
/* Tell SPUs they can release the vert buf */
if (cvbr->vertex_buf != ~0U) {
+ STATIC_ASSERT(sizeof(struct cell_command_release_verts) % 16 == 0);
struct cell_command_release_verts *release
= (struct cell_command_release_verts *)
- cell_batch_alloc(cell, sizeof(struct cell_command_release_verts));
- release->opcode = CELL_CMD_RELEASE_VERTS;
+ cell_batch_alloc16(cell, sizeof(struct cell_command_release_verts));
+ release->opcode[0] = CELL_CMD_RELEASE_VERTS;
release->vertex_buf = cvbr->vertex_buf;
}
cvbr->vertex_buf = ~0;
cell_flush_int(cell, 0x0);
- assert(vertices == cvbr->vertex_buffer);
cvbr->vertex_buffer = NULL;
}
+static void *
+cell_vbuf_map_vertices(struct vbuf_render *vbr)
+{
+ struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
+ return cvbr->vertex_buffer;
+}
+
+
+static void
+cell_vbuf_unmap_vertices(struct vbuf_render *vbr,
+ ushort min_index,
+ ushort max_index )
+{
+ struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
+ assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size );
+ /* do nothing */
+}
+
+
static boolean
cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
@@ -204,15 +232,16 @@ cell_vbuf_draw(struct vbuf_render *vbr,
/* build/insert batch RENDER command */
{
- const uint index_bytes = ROUNDUP8(nr_indices * 2);
- const uint vertex_bytes = nr_vertices * 4 * cell->vertex_info.size;
+ const uint index_bytes = ROUNDUP16(nr_indices * 2);
+ const uint vertex_bytes = ROUNDUP16(nr_vertices * 4 * cell->vertex_info.size);
+ STATIC_ASSERT(sizeof(struct cell_command_render) % 16 == 0);
const uint batch_size = sizeof(struct cell_command_render) + index_bytes;
struct cell_command_render *render
= (struct cell_command_render *)
- cell_batch_alloc(cell, batch_size);
+ cell_batch_alloc16(cell, batch_size);
- render->opcode = CELL_CMD_RENDER;
+ render->opcode[0] = CELL_CMD_RENDER;
render->prim_type = cvbr->prim;
render->num_indexes = nr_indices;
@@ -230,7 +259,7 @@ cell_vbuf_draw(struct vbuf_render *vbr,
min_index == 0 &&
vertex_bytes + 16 <= cell_batch_free_space(cell)) {
/* vertex data inlined, after indices, at 16-byte boundary */
- void *dst = cell_batch_alloc_aligned(cell, vertex_bytes, 16);
+ void *dst = cell_batch_alloc16(cell, vertex_bytes);
memcpy(dst, vertices, vertex_bytes);
render->inline_verts = TRUE;
render->vertex_buf = ~0;
@@ -287,6 +316,8 @@ cell_init_vbuf(struct cell_context *cell)
cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info;
cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices;
+ cell->vbuf_render->base.map_vertices = cell_vbuf_map_vertices;
+ cell->vbuf_render->base.unmap_vertices = cell_vbuf_unmap_vertices;
cell->vbuf_render->base.set_primitive = cell_vbuf_set_primitive;
cell->vbuf_render->base.draw = cell_vbuf_draw;
cell->vbuf_render->base.release_vertices = cell_vbuf_release_vertices;
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
index 566df7f59e..9cba537d9e 100644
--- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
+++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
@@ -73,8 +73,8 @@ emit_matrix_transpose(struct spe_function *p,
int col3;
- spe_lqd(p, shuf_hi, shuf_ptr, 3);
- spe_lqd(p, shuf_lo, shuf_ptr, 4);
+ spe_lqd(p, shuf_hi, shuf_ptr, 3*16);
+ spe_lqd(p, shuf_lo, shuf_ptr, 4*16);
spe_shufb(p, t1, row0, row2, shuf_hi);
spe_shufb(p, t2, row0, row2, shuf_lo);
@@ -122,13 +122,13 @@ emit_matrix_transpose(struct spe_function *p,
*/
switch (count) {
case 4:
- spe_stqd(p, col3, dest_ptr, 3);
+ spe_stqd(p, col3, dest_ptr, 3 * 16);
case 3:
- spe_stqd(p, col2, dest_ptr, 2);
+ spe_stqd(p, col2, dest_ptr, 2 * 16);
case 2:
- spe_stqd(p, col1, dest_ptr, 1);
+ spe_stqd(p, col1, dest_ptr, 1 * 16);
case 1:
- spe_stqd(p, col0, dest_ptr, 0);
+ spe_stqd(p, col0, dest_ptr, 0 * 16);
}
@@ -145,6 +145,8 @@ emit_matrix_transpose(struct spe_function *p,
}
+#if 0
+/* This appears to not be used currently */
static void
emit_fetch(struct spe_function *p,
unsigned in_ptr, unsigned *offset,
@@ -166,17 +168,17 @@ emit_fetch(struct spe_function *p,
float scale_signed = 0.0;
float scale_unsigned = 0.0;
- spe_lqd(p, v0, in_ptr, 0 + offset[0]);
- spe_lqd(p, v1, in_ptr, 1 + offset[0]);
- spe_lqd(p, v2, in_ptr, 2 + offset[0]);
- spe_lqd(p, v3, in_ptr, 3 + offset[0]);
+ spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16);
+ spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16);
+ spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16);
+ spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16);
offset[0] += 4;
switch (bytes) {
case 1:
scale_signed = 1.0f / 127.0f;
scale_unsigned = 1.0f / 255.0f;
- spe_lqd(p, tmp, shuf_ptr, 1);
+ spe_lqd(p, tmp, shuf_ptr, 1 * 16);
spe_shufb(p, v0, v0, v0, tmp);
spe_shufb(p, v1, v1, v1, tmp);
spe_shufb(p, v2, v2, v2, tmp);
@@ -185,7 +187,7 @@ emit_fetch(struct spe_function *p,
case 2:
scale_signed = 1.0f / 32767.0f;
scale_unsigned = 1.0f / 65535.0f;
- spe_lqd(p, tmp, shuf_ptr, 2);
+ spe_lqd(p, tmp, shuf_ptr, 2 * 16);
spe_shufb(p, v0, v0, v0, tmp);
spe_shufb(p, v1, v1, v1, tmp);
spe_shufb(p, v2, v2, v2, tmp);
@@ -241,11 +243,11 @@ emit_fetch(struct spe_function *p,
switch (count) {
case 1:
- spe_stqd(p, float_zero, out_ptr, 1);
+ spe_stqd(p, float_zero, out_ptr, 1 * 16);
case 2:
- spe_stqd(p, float_zero, out_ptr, 2);
+ spe_stqd(p, float_zero, out_ptr, 2 * 16);
case 3:
- spe_stqd(p, float_one, out_ptr, 3);
+ spe_stqd(p, float_one, out_ptr, 3 * 16);
}
if (float_zero != -1) {
@@ -256,6 +258,7 @@ emit_fetch(struct spe_function *p,
spe_release_register(p, float_one);
}
}
+#endif
void cell_update_vertex_fetch(struct draw_context *draw)
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
index 2b10c116fa..403cf6d50f 100644
--- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
+++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
@@ -31,7 +31,7 @@
#include "pipe/p_defines.h"
#include "pipe/p_context.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "util/u_math.h"
#include "cell_context.h"
diff --git a/src/gallium/drivers/cell/spu/.gitignore b/src/gallium/drivers/cell/spu/.gitignore
new file mode 100644
index 0000000000..2be9a2d324
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/.gitignore
@@ -0,0 +1 @@
+g3d_spu
diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile
index 1ae0dfb8c1..3cc52301da 100644
--- a/src/gallium/drivers/cell/spu/Makefile
+++ b/src/gallium/drivers/cell/spu/Makefile
@@ -16,8 +16,10 @@ PROG_SPU_EMBED_O = $(PROG)_spu-embed.o
SOURCES = \
- spu_main.c \
+ spu_command.c \
spu_dcache.c \
+ spu_funcs.c \
+ spu_main.c \
spu_per_fragment_op.c \
spu_render.c \
spu_texture.c \
@@ -31,9 +33,10 @@ OLD_SOURCES = \
spu_vertex_shader.c
-SPU_OBJECTS = $(SOURCES:.c=.o) \
+SPU_OBJECTS = $(SOURCES:.c=.o)
+
+SPU_ASM_OUT = $(SOURCES:.c=.s)
-SPU_ASM_OUT = $(SOURCES:.c=.s) \
INCLUDE_DIRS = \
-I$(TOP)/src/mesa \
diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h
index fd8dc6ded3..d7ce005524 100644
--- a/src/gallium/drivers/cell/spu/spu_colorpack.h
+++ b/src/gallium/drivers/cell/spu/spu_colorpack.h
@@ -31,6 +31,7 @@
#define SPU_COLORPACK_H
+#include <transpose_matrix4x4.h>
#include <spu_intrinsics.h>
@@ -84,10 +85,10 @@ spu_unpack_B8G8R8A8(uint color)
vector unsigned int color_u4 = spu_splats(color);
color_u4 = spu_shuffle(color_u4, color_u4,
((vector unsigned char) {
- 10, 10, 10, 10,
- 5, 5, 5, 5,
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
0, 0, 0, 0,
- 15, 15, 15, 15}) );
+ 3, 3, 3, 3}) );
return spu_convtf(color_u4, 32);
}
@@ -98,13 +99,47 @@ spu_unpack_A8R8G8B8(uint color)
vector unsigned int color_u4 = spu_splats(color);
color_u4 = spu_shuffle(color_u4, color_u4,
((vector unsigned char) {
- 5, 5, 5, 5,
- 10, 10, 10, 10,
- 15, 15, 15, 15,
+ 1, 1, 1, 1,
+ 2, 2, 2, 2,
+ 3, 3, 3, 3,
0, 0, 0, 0}) );
-
return spu_convtf(color_u4, 32);
}
+/**
+ * \param color_in - array of 32-bit packed ARGB colors
+ * \param color_out - returns float colors in RRRR, GGGG, BBBB, AAAA order
+ */
+static INLINE void
+spu_unpack_A8R8G8B8_transpose4(const vector unsigned int color_in[4],
+ vector float color_out[4])
+{
+ vector unsigned int c0;
+
+ c0 = spu_shuffle(color_in[0], color_in[0],
+ ((vector unsigned char) {
+ 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
+ color_out[0] = spu_convtf(c0, 32);
+
+ c0 = spu_shuffle(color_in[1], color_in[1],
+ ((vector unsigned char) {
+ 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
+ color_out[1] = spu_convtf(c0, 32);
+
+ c0 = spu_shuffle(color_in[2], color_in[2],
+ ((vector unsigned char) {
+ 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
+ color_out[2] = spu_convtf(c0, 32);
+
+ c0 = spu_shuffle(color_in[3], color_in[3],
+ ((vector unsigned char) {
+ 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
+ color_out[3] = spu_convtf(c0, 32);
+
+ _transpose_matrix4x4(color_out, color_out);
+}
+
+
+
#endif /* SPU_COLORPACK_H */
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c
new file mode 100644
index 0000000000..5c0179d954
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_command.c
@@ -0,0 +1,815 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * SPU command processing code
+ */
+
+
+#include <stdio.h>
+#include <libmisc.h>
+
+#include "pipe/p_defines.h"
+
+#include "spu_command.h"
+#include "spu_main.h"
+#include "spu_render.h"
+#include "spu_per_fragment_op.h"
+#include "spu_texture.h"
+#include "spu_tile.h"
+#include "spu_vertex_shader.h"
+#include "spu_dcache.h"
+#include "cell/common.h"
+
+
+struct spu_vs_context draw;
+
+
+/**
+ * Buffers containing dynamically generated SPU code:
+ */
+static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
+ ALIGN16_ATTRIB;
+
+
+
+static INLINE int
+align(int value, int alignment)
+{
+ return (value + alignment - 1) & ~(alignment - 1);
+}
+
+
+
+/**
+ * Tell the PPU that this SPU has finished copying a buffer to
+ * local store and that it may be reused by the PPU.
+ * This is done by writting a 16-byte batch-buffer-status block back into
+ * main memory (in cell_context->buffer_status[]).
+ */
+static void
+release_buffer(uint buffer)
+{
+ /* Evidently, using less than a 16-byte status doesn't work reliably */
+ static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE,
+ CELL_BUFFER_STATUS_FREE,
+ CELL_BUFFER_STATUS_FREE,
+ CELL_BUFFER_STATUS_FREE};
+ const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
+ uint *dst = spu.init.buffer_status + index;
+
+ ASSERT(buffer < CELL_NUM_BUFFERS);
+
+ mfc_put((void *) &status, /* src in local memory */
+ (unsigned int) dst, /* dst in main memory */
+ sizeof(status), /* size */
+ TAG_MISC, /* tag is unimportant */
+ 0, /* tid */
+ 0 /* rid */);
+}
+
+
+/**
+ * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory.
+ * There's a qword of status per SPU.
+ */
+static void
+cmd_fence(struct cell_command_fence *fence_cmd)
+{
+ static const vector unsigned int status = {CELL_FENCE_SIGNALLED,
+ CELL_FENCE_SIGNALLED,
+ CELL_FENCE_SIGNALLED,
+ CELL_FENCE_SIGNALLED};
+ uint *dst = (uint *) fence_cmd->fence;
+ dst += 4 * spu.init.id; /* main store/memory address, not local store */
+ ASSERT_ALIGN16(dst);
+ mfc_put((void *) &status, /* src in local memory */
+ (unsigned int) dst, /* dst in main memory */
+ sizeof(status), /* size */
+ TAG_FENCE, /* tag */
+ 0, /* tid */
+ 0 /* rid */);
+}
+
+
+static void
+cmd_clear_surface(const struct cell_command_clear_surface *clear)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value);
+
+ if (clear->surface == 0) {
+ spu.fb.color_clear_value = clear->value;
+ if (spu.init.debug_flags & CELL_DEBUG_CHECKER) {
+ uint x = (spu.init.id << 4) | (spu.init.id << 12) |
+ (spu.init.id << 20) | (spu.init.id << 28);
+ spu.fb.color_clear_value ^= x;
+ }
+ }
+ else {
+ spu.fb.depth_clear_value = clear->value;
+ }
+
+#define CLEAR_OPT 1
+#if CLEAR_OPT
+
+ /* Simply set all tiles' status to CLEAR.
+ * When we actually begin rendering into a tile, we'll initialize it to
+ * the clear value. If any tiles go untouched during the frame,
+ * really_clear_tiles() will set them to the clear value.
+ */
+ if (clear->surface == 0) {
+ memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
+ }
+ else {
+ memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
+ }
+
+#else
+
+ /*
+ * This path clears the whole framebuffer to the clear color right now.
+ */
+
+ /*
+ printf("SPU: %s num=%d w=%d h=%d\n",
+ __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
+ */
+
+ /* init a single tile to the clear value */
+ if (clear->surface == 0) {
+ clear_c_tile(&spu.ctile);
+ }
+ else {
+ clear_z_tile(&spu.ztile);
+ }
+
+ /* walk over my tiles, writing the 'clear' tile's data */
+ {
+ const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
+ uint i;
+ for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
+ uint tx = i % spu.fb.width_tiles;
+ uint ty = i / spu.fb.width_tiles;
+ if (clear->surface == 0)
+ put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
+ else
+ put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
+ }
+ }
+
+ if (spu.init.debug_flags & CELL_DEBUG_SYNC) {
+ wait_on_mask(1 << TAG_SURFACE_CLEAR);
+ }
+
+#endif /* CLEAR_OPT */
+
+ D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n");
+}
+
+
+static void
+cmd_release_verts(const struct cell_command_release_verts *release)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf);
+ ASSERT(release->vertex_buf != ~0U);
+ release_buffer(release->vertex_buf);
+}
+
+
+/**
+ * Process a CELL_CMD_STATE_FRAGMENT_OPS command.
+ * This involves installing new fragment ops SPU code.
+ * If this function is never called, we'll use a regular C fallback function
+ * for fragment processing.
+ */
+static void
+cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n");
+
+ /* Copy state info (for fallback case only - this will eventually
+ * go away when the fallback case goes away)
+ */
+ memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
+ memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
+ memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color));
+
+ /* Make sure the SPU knows which buffers it's expected to read when
+ * it's told to pull tiles.
+ */
+ spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled);
+
+ /* If we're forcing the fallback code to be used (for debug purposes),
+ * install that. Otherwise install the incoming SPU code.
+ */
+ if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) {
+ static unsigned int warned = 0;
+ if (!warned) {
+ fprintf(stderr, "Cell Warning: using fallback per-fragment code\n");
+ warned = 1;
+ }
+ /* The following two lines aren't really necessary if you
+ * know the debug flags won't change during a run, and if you
+ * know that the function pointers are initialized correctly.
+ * We set them here to allow a person to change the debug
+ * flags during a run (from inside a debugger).
+ */
+ spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
+ return;
+ }
+
+ /* Make sure the SPU code buffer is large enough to hold the incoming code.
+ * Note that we *don't* use align_malloc() and align_free(), because
+ * those utility functions are *not* available in SPU code.
+ * */
+ if (spu.fragment_ops_code_size < fops->total_code_size) {
+ if (spu.fragment_ops_code != NULL) {
+ free(spu.fragment_ops_code);
+ }
+ spu.fragment_ops_code_size = fops->total_code_size;
+ spu.fragment_ops_code = malloc(fops->total_code_size);
+ if (spu.fragment_ops_code == NULL) {
+ /* Whoops. */
+ fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size);
+ spu.fragment_ops_code = NULL;
+ spu.fragment_ops_code_size = 0;
+ spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
+ return;
+ }
+ }
+
+ /* Copy the SPU code from the command buffer to the spu buffer */
+ memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size);
+
+ /* Set the pointers for the front-facing and back-facing fragments
+ * to the specified offsets within the code. Note that if the
+ * front-facing and back-facing code are the same, they'll have
+ * the same offset.
+ */
+ spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index];
+ spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index];
+}
+
+static void
+cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n");
+ /* Copy SPU code from batch buffer to spu buffer */
+ memcpy(spu.fragment_program_code, fp->code,
+ SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
+#if 01
+ /* Point function pointer at new code */
+ spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
+#endif
+}
+
+
+static uint
+cmd_state_fs_constants(const qword *buffer, uint pos)
+{
+ const uint num_const = spu_extract((vector unsigned int)buffer[pos+1], 0);
+ const float *constants = (const float *) &buffer[pos+2];
+ uint i;
+
+ D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const);
+
+ /* Expand each float to float[4] for SOA execution */
+ for (i = 0; i < num_const; i++) {
+ D_PRINTF(CELL_DEBUG_CMD, " const[%u] = %f\n", i, constants[i]);
+ spu.constants[i] = spu_splats(constants[i]);
+ }
+
+ /* return new buffer pos (in 16-byte words) */
+ return pos + 2 + (ROUNDUP16(num_const * sizeof(float)) / 16);
+}
+
+
+static void
+cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
+ cmd->width,
+ cmd->height,
+ cmd->color_start,
+ cmd->color_format,
+ cmd->depth_format);
+
+ ASSERT_ALIGN16(cmd->color_start);
+ ASSERT_ALIGN16(cmd->depth_start);
+
+ spu.fb.color_start = cmd->color_start;
+ spu.fb.depth_start = cmd->depth_start;
+ spu.fb.color_format = cmd->color_format;
+ spu.fb.depth_format = cmd->depth_format;
+ spu.fb.width = cmd->width;
+ spu.fb.height = cmd->height;
+ spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE;
+ spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE;
+
+ switch (spu.fb.depth_format) {
+ case PIPE_FORMAT_Z32_UNORM:
+ spu.fb.zsize = 4;
+ spu.fb.zscale = (float) 0xffffffffu;
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ spu.fb.zsize = 4;
+ spu.fb.zscale = (float) 0x00ffffffu;
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
+ spu.fb.zsize = 2;
+ spu.fb.zscale = (float) 0xffffu;
+ break;
+ default:
+ spu.fb.zsize = 0;
+ break;
+ }
+}
+
+
+/**
+ * Tex texture mask_s/t and scale_s/t fields depend on the texture size and
+ * sampler wrap modes.
+ */
+static void
+update_tex_masks(struct spu_texture *texture,
+ const struct pipe_sampler_state *sampler)
+{
+ uint i;
+
+ for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
+ int width = texture->level[i].width;
+ int height = texture->level[i].height;
+
+ if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT)
+ texture->level[i].mask_s = spu_splats(width - 1);
+ else
+ texture->level[i].mask_s = spu_splats(~0);
+
+ if (sampler->wrap_t == PIPE_TEX_WRAP_REPEAT)
+ texture->level[i].mask_t = spu_splats(height - 1);
+ else
+ texture->level[i].mask_t = spu_splats(~0);
+
+ if (sampler->normalized_coords) {
+ texture->level[i].scale_s = spu_splats((float) width);
+ texture->level[i].scale_t = spu_splats((float) height);
+ }
+ else {
+ texture->level[i].scale_s = spu_splats(1.0f);
+ texture->level[i].scale_t = spu_splats(1.0f);
+ }
+ }
+}
+
+
+static void
+cmd_state_sampler(const struct cell_command_sampler *sampler)
+{
+ uint unit = sampler->unit;
+
+ D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit);
+
+ spu.sampler[unit] = sampler->state;
+
+ switch (spu.sampler[unit].min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear;
+ break;
+ case PIPE_TEX_FILTER_ANISO:
+ /* fall-through, for now */
+ case PIPE_TEX_FILTER_NEAREST:
+ spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest;
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ switch (spu.sampler[sampler->unit].mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear;
+ break;
+ case PIPE_TEX_FILTER_ANISO:
+ /* fall-through, for now */
+ case PIPE_TEX_FILTER_NEAREST:
+ spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest;
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ switch (spu.sampler[sampler->unit].min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ spu.sample_texture_2d[unit] = sample_texture_2d_lod;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit];
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
+}
+
+
+static void
+cmd_state_texture(const struct cell_command_texture *texture)
+{
+ const uint unit = texture->unit;
+ uint i;
+
+ D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit);
+
+ spu.texture[unit].max_level = 0;
+ spu.texture[unit].target = texture->target;
+
+ for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
+ uint width = texture->width[i];
+ uint height = texture->height[i];
+ uint depth = texture->depth[i];
+
+ D_PRINTF(CELL_DEBUG_CMD, " LEVEL %u: at %p size[0] %u x %u\n", i,
+ texture->start[i], texture->width[i], texture->height[i]);
+
+ spu.texture[unit].level[i].start = texture->start[i];
+ spu.texture[unit].level[i].width = width;
+ spu.texture[unit].level[i].height = height;
+ spu.texture[unit].level[i].depth = depth;
+
+ spu.texture[unit].level[i].tiles_per_row =
+ (width + TILE_SIZE - 1) / TILE_SIZE;
+
+ spu.texture[unit].level[i].bytes_per_image =
+ 4 * align(width, TILE_SIZE) * align(height, TILE_SIZE) * depth;
+
+ spu.texture[unit].level[i].max_s = spu_splats((int) width - 1);
+ spu.texture[unit].level[i].max_t = spu_splats((int) height - 1);
+
+ if (texture->start[i])
+ spu.texture[unit].max_level = i;
+ }
+
+ update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
+}
+
+
+static void
+cmd_state_vertex_info(const struct vertex_info *vinfo)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs);
+ ASSERT(vinfo->num_attribs >= 1);
+ ASSERT(vinfo->num_attribs <= 8);
+ memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo));
+}
+
+
+static void
+cmd_state_vs_array_info(const struct cell_array_info *vs_info)
+{
+ const unsigned attr = vs_info->attr;
+
+ ASSERT(attr < PIPE_MAX_ATTRIBS);
+ draw.vertex_fetch.src_ptr[attr] = vs_info->base;
+ draw.vertex_fetch.pitch[attr] = vs_info->pitch;
+ draw.vertex_fetch.size[attr] = vs_info->size;
+ draw.vertex_fetch.code_offset[attr] = vs_info->function_offset;
+ draw.vertex_fetch.dirty = 1;
+}
+
+
+static void
+cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
+{
+ mfc_get(attribute_fetch_code_buffer,
+ (unsigned int) code->base, /* src */
+ code->size,
+ TAG_BATCH_BUFFER,
+ 0, /* tid */
+ 0 /* rid */);
+ wait_on_mask(1 << TAG_BATCH_BUFFER);
+
+ draw.vertex_fetch.code = attribute_fetch_code_buffer;
+}
+
+
+static void
+cmd_finish(void)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "FINISH\n");
+ really_clear_tiles(0);
+ /* wait for all outstanding DMAs to finish */
+ mfc_write_tag_mask(~0);
+ mfc_read_tag_status_all();
+ /* send mbox message to PPU */
+ spu_write_out_mbox(CELL_CMD_FINISH);
+}
+
+
+/**
+ * Execute a batch of commands which was sent to us by the PPU.
+ * See the cell_emit_state.c code to see where the commands come from.
+ *
+ * The opcode param encodes the location of the buffer and its size.
+ */
+static void
+cmd_batch(uint opcode)
+{
+ const uint buf = (opcode >> 8) & 0xff;
+ uint size = (opcode >> 16);
+ qword buffer[CELL_BUFFER_SIZE / 16] ALIGN16_ATTRIB;
+ const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]);
+ uint pos;
+
+ D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n",
+ buf, size, spu.init.buffers[buf]);
+
+ ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH);
+
+ ASSERT_ALIGN16(spu.init.buffers[buf]);
+
+ size = ROUNDUP16(size);
+
+ ASSERT_ALIGN16(spu.init.buffers[buf]);
+
+ mfc_get(buffer, /* dest */
+ (unsigned int) spu.init.buffers[buf], /* src */
+ size,
+ TAG_BATCH_BUFFER,
+ 0, /* tid */
+ 0 /* rid */);
+ wait_on_mask(1 << TAG_BATCH_BUFFER);
+
+ /* Tell PPU we're done copying the buffer to local store */
+ D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf);
+ release_buffer(buf);
+
+ /*
+ * Loop over commands in the batch buffer
+ */
+ for (pos = 0; pos < usize; /* no incr */) {
+ switch (si_to_uint(buffer[pos])) {
+ /*
+ * rendering commands
+ */
+ case CELL_CMD_CLEAR_SURFACE:
+ {
+ struct cell_command_clear_surface *clr
+ = (struct cell_command_clear_surface *) &buffer[pos];
+ cmd_clear_surface(clr);
+ pos += sizeof(*clr) / 16;
+ }
+ break;
+ case CELL_CMD_RENDER:
+ {
+ struct cell_command_render *render
+ = (struct cell_command_render *) &buffer[pos];
+ uint pos_incr;
+ cmd_render(render, &pos_incr);
+ pos += ((pos_incr+1)&~1) / 2; // should 'fix' cmd_render return
+ }
+ break;
+ /*
+ * state-update commands
+ */
+ case CELL_CMD_STATE_FRAMEBUFFER:
+ {
+ struct cell_command_framebuffer *fb
+ = (struct cell_command_framebuffer *) &buffer[pos];
+ cmd_state_framebuffer(fb);
+ pos += sizeof(*fb) / 16;
+ }
+ break;
+ case CELL_CMD_STATE_FRAGMENT_OPS:
+ {
+ struct cell_command_fragment_ops *fops
+ = (struct cell_command_fragment_ops *) &buffer[pos];
+ cmd_state_fragment_ops(fops);
+ /* This is a variant-sized command */
+ pos += ROUNDUP16(sizeof(*fops) + fops->total_code_size) / 16;
+ }
+ break;
+ case CELL_CMD_STATE_FRAGMENT_PROGRAM:
+ {
+ struct cell_command_fragment_program *fp
+ = (struct cell_command_fragment_program *) &buffer[pos];
+ cmd_state_fragment_program(fp);
+ pos += sizeof(*fp) / 16;
+ }
+ break;
+ case CELL_CMD_STATE_FS_CONSTANTS:
+ pos = cmd_state_fs_constants(buffer, pos);
+ break;
+ case CELL_CMD_STATE_RASTERIZER:
+ {
+ struct cell_command_rasterizer *rast =
+ (struct cell_command_rasterizer *) &buffer[pos];
+ spu.rasterizer = rast->rasterizer;
+ pos += sizeof(*rast) / 16;
+ }
+ break;
+ case CELL_CMD_STATE_SAMPLER:
+ {
+ struct cell_command_sampler *sampler
+ = (struct cell_command_sampler *) &buffer[pos];
+ cmd_state_sampler(sampler);
+ pos += sizeof(*sampler) / 16;
+ }
+ break;
+ case CELL_CMD_STATE_TEXTURE:
+ {
+ struct cell_command_texture *texture
+ = (struct cell_command_texture *) &buffer[pos];
+ cmd_state_texture(texture);
+ pos += sizeof(*texture) / 16;
+ }
+ break;
+ case CELL_CMD_STATE_VERTEX_INFO:
+ cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]);
+ pos += 1 + ROUNDUP16(sizeof(struct vertex_info)) / 16;
+ break;
+ case CELL_CMD_STATE_VIEWPORT:
+ (void) memcpy(& draw.viewport, &buffer[pos+1],
+ sizeof(struct pipe_viewport_state));
+ pos += 1 + ROUNDUP16(sizeof(struct pipe_viewport_state)) / 16;
+ break;
+ case CELL_CMD_STATE_UNIFORMS:
+ draw.constants = (const float (*)[4]) (uintptr_t)spu_extract((vector unsigned int)buffer[pos+1],0);
+ pos += 2;
+ break;
+ case CELL_CMD_STATE_VS_ARRAY_INFO:
+ cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]);
+ pos += 1 + ROUNDUP16(sizeof(struct cell_array_info)) / 16;
+ break;
+ case CELL_CMD_STATE_BIND_VS:
+#if 0
+ spu_bind_vertex_shader(&draw,
+ (struct cell_shader_info *) &buffer[pos+1]);
+#endif
+ pos += 1 + ROUNDUP16(sizeof(struct cell_shader_info)) / 16;
+ break;
+ case CELL_CMD_STATE_ATTRIB_FETCH:
+ cmd_state_attrib_fetch((struct cell_attribute_fetch_code *)
+ &buffer[pos+1]);
+ pos += 1 + ROUNDUP16(sizeof(struct cell_attribute_fetch_code)) / 16;
+ break;
+ /*
+ * misc commands
+ */
+ case CELL_CMD_FINISH:
+ cmd_finish();
+ pos += 1;
+ break;
+ case CELL_CMD_FENCE:
+ {
+ struct cell_command_fence *fence_cmd =
+ (struct cell_command_fence *) &buffer[pos];
+ cmd_fence(fence_cmd);
+ pos += sizeof(*fence_cmd) / 16;
+ }
+ break;
+ case CELL_CMD_RELEASE_VERTS:
+ {
+ struct cell_command_release_verts *release
+ = (struct cell_command_release_verts *) &buffer[pos];
+ cmd_release_verts(release);
+ pos += sizeof(*release) / 16;
+ }
+ break;
+ case CELL_CMD_FLUSH_BUFFER_RANGE: {
+ struct cell_buffer_range *br = (struct cell_buffer_range *)
+ &buffer[pos+1];
+
+ spu_dcache_mark_dirty((unsigned) br->base, br->size);
+ pos += 1 + ROUNDUP16(sizeof(struct cell_buffer_range)) / 16;
+ break;
+ }
+ default:
+ printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, si_to_uint(buffer[pos]));
+ ASSERT(0);
+ break;
+ }
+ }
+
+ D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n");
+}
+
+
+#define PERF 0
+
+
+/**
+ * Main loop for SPEs: Get a command, execute it, repeat.
+ */
+void
+command_loop(void)
+{
+ int exitFlag = 0;
+ uint t0, t1;
+
+ D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n");
+
+ while (!exitFlag) {
+ unsigned opcode;
+
+ D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n");
+
+ if (PERF)
+ spu_write_decrementer(~0);
+
+ /* read/wait from mailbox */
+ opcode = (unsigned int) spu_read_in_mbox();
+ D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode);
+
+ if (PERF)
+ t0 = spu_read_decrementer();
+
+ switch (opcode & CELL_CMD_OPCODE_MASK) {
+ case CELL_CMD_EXIT:
+ D_PRINTF(CELL_DEBUG_CMD, "EXIT\n");
+ exitFlag = 1;
+ break;
+ case CELL_CMD_VS_EXECUTE:
+#if 0
+ spu_execute_vertex_shader(&draw, &cmd.vs);
+#endif
+ break;
+ case CELL_CMD_BATCH:
+ cmd_batch(opcode);
+ break;
+ default:
+ printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK);
+ }
+
+ if (PERF) {
+ t1 = spu_read_decrementer();
+ printf("wait mbox time: %gms batch time: %gms\n",
+ (~0u - t0) * spu.init.inv_timebase,
+ (t0 - t1) * spu.init.inv_timebase);
+ }
+ }
+
+ D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n");
+
+ if (spu.init.debug_flags & CELL_DEBUG_CACHE)
+ spu_dcache_report();
+}
+
+/* Initialize this module; we manage the fragment ops buffer here. */
+void
+spu_command_init(void)
+{
+ /* Install default/fallback fragment processing function.
+ * This will normally be overriden by a code-gen'd function
+ * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set.
+ */
+ spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
+
+ /* Set up the basic empty buffer for code-gen'ed fragment ops */
+ spu.fragment_ops_code = NULL;
+ spu.fragment_ops_code_size = 0;
+}
+
+void
+spu_command_close(void)
+{
+ /* Deallocate the code-gen buffer for fragment ops, and reset the
+ * fragment ops functions to their initial setting (just to leave
+ * things in a good state).
+ */
+ if (spu.fragment_ops_code != NULL) {
+ free(spu.fragment_ops_code);
+ }
+ spu_command_init();
+}
diff --git a/src/gallium/drivers/cell/spu/spu_command.h b/src/gallium/drivers/cell/spu/spu_command.h
new file mode 100644
index 0000000000..83dcdade28
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_command.h
@@ -0,0 +1,35 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+extern void
+command_loop(void);
+
+extern void
+spu_command_init(void);
+
+extern void
+spu_command_close(void);
diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c
index 167404cdc5..a6d67634fd 100644
--- a/src/gallium/drivers/cell/spu/spu_dcache.c
+++ b/src/gallium/drivers/cell/spu/spu_dcache.c
@@ -36,7 +36,9 @@
#define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0)
#define CACHE_LOG2NNWAY 2
#define CACHE_LOG2NSETS 6
-/*#define CACHE_STATS 1*/
+#ifdef DEBUG
+#define CACHE_STATS 1
+#endif
#include <cache-api.h>
/* Yes folks, this is ugly.
diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c
new file mode 100644
index 0000000000..ff3d609d25
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_funcs.c
@@ -0,0 +1,173 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * SPU functions accessed by shaders.
+ *
+ * Authors: Brian Paul
+ */
+
+
+#include <string.h>
+#include <libmisc.h>
+#include <math.h>
+#include <cos14_v.h>
+#include <sin14_v.h>
+#include <simdmath/exp2f4.h>
+#include <simdmath/log2f4.h>
+#include <simdmath/powf4.h>
+
+#include "cell/common.h"
+#include "spu_main.h"
+#include "spu_funcs.h"
+#include "spu_texture.h"
+
+
+/** For "return"-ing four vectors */
+struct vec_4x4
+{
+ vector float v[4];
+};
+
+
+static vector float
+spu_cos(vector float x)
+{
+ return _cos14_v(x);
+}
+
+static vector float
+spu_sin(vector float x)
+{
+ return _sin14_v(x);
+}
+
+static vector float
+spu_pow(vector float x, vector float y)
+{
+ return _powf4(x, y);
+}
+
+static vector float
+spu_exp2(vector float x)
+{
+ return _exp2f4(x);
+}
+
+static vector float
+spu_log2(vector float x)
+{
+ return _log2f4(x);
+}
+
+
+static struct vec_4x4
+spu_tex_2d(vector float s, vector float t, vector float r, vector float q,
+ unsigned unit)
+{
+ struct vec_4x4 colors;
+ (void) r;
+ (void) q;
+ spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v);
+ return colors;
+}
+
+static struct vec_4x4
+spu_tex_3d(vector float s, vector float t, vector float r, vector float q,
+ unsigned unit)
+{
+ struct vec_4x4 colors;
+ (void) r;
+ (void) q;
+ spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v);
+ return colors;
+}
+
+static struct vec_4x4
+spu_tex_cube(vector float s, vector float t, vector float r, vector float q,
+ unsigned unit)
+{
+ struct vec_4x4 colors;
+ (void) q;
+ sample_texture_cube(s, t, r, unit, colors.v);
+ return colors;
+}
+
+
+/**
+ * Add named function to list of "exported" functions that will be
+ * made available to the PPU-hosted code generator.
+ */
+static void
+export_func(struct cell_spu_function_info *spu_functions,
+ const char *name, void *addr)
+{
+ uint n = spu_functions->num;
+ ASSERT(strlen(name) < 16);
+ strcpy(spu_functions->names[n], name);
+ spu_functions->addrs[n] = (uint) addr;
+ spu_functions->num++;
+ ASSERT(spu_functions->num <= 16);
+}
+
+
+/**
+ * Return info about the SPU's function to the PPU / main memory.
+ * The PPU needs to know the address of some SPU-side functions so
+ * that we can generate shader code with function calls.
+ */
+void
+return_function_info(void)
+{
+ struct cell_spu_function_info funcs ALIGN16_ATTRIB;
+ int tag = TAG_MISC;
+
+ ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */
+
+ funcs.num = 0;
+ export_func(&funcs, "spu_cos", &spu_cos);
+ export_func(&funcs, "spu_sin", &spu_sin);
+ export_func(&funcs, "spu_pow", &spu_pow);
+ export_func(&funcs, "spu_exp2", &spu_exp2);
+ export_func(&funcs, "spu_log2", &spu_log2);
+ export_func(&funcs, "spu_tex_2d", &spu_tex_2d);
+ export_func(&funcs, "spu_tex_3d", &spu_tex_3d);
+ export_func(&funcs, "spu_tex_cube", &spu_tex_cube);
+
+ /* Send the function info back to the PPU / main memory */
+ mfc_put((void *) &funcs, /* src in local store */
+ (unsigned int) spu.init.spu_functions, /* dst in main memory */
+ sizeof(funcs), /* bytes */
+ tag,
+ 0, /* tid */
+ 0 /* rid */);
+ wait_on_mask(1 << tag);
+}
+
+
+
diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.c b/src/gallium/drivers/cell/spu/spu_funcs.h
index d570bbd2f9..3adb6ae99f 100644
--- a/src/gallium/drivers/cell/ppu/cell_winsys.c
+++ b/src/gallium/drivers/cell/spu/spu_funcs.h
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,16 +25,11 @@
*
**************************************************************************/
+#ifndef SPU_FUNCS_H
+#define SPU_FUNCS_H
-#include "util/u_memory.h"
-#include "cell_winsys.h"
+extern void
+return_function_info(void);
+#endif
-struct cell_winsys *
-cell_get_winsys(uint format)
-{
- struct cell_winsys *cws = CALLOC_STRUCT(cell_winsys);
- if (cws)
- cws->preferredFormat = format;
- return cws;
-}
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index 78260c4259..97c86d194d 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -32,16 +32,15 @@
#include <stdio.h>
#include <libmisc.h>
+#include "pipe/p_defines.h"
+
+#include "spu_funcs.h"
+#include "spu_command.h"
#include "spu_main.h"
-#include "spu_render.h"
#include "spu_per_fragment_op.h"
#include "spu_texture.h"
-#include "spu_tile.h"
//#include "spu_test.h"
-#include "spu_vertex_shader.h"
-#include "spu_dcache.h"
#include "cell/common.h"
-#include "pipe/p_defines.h"
/*
@@ -50,600 +49,8 @@ helpful headers:
/opt/cell/sdk/usr/include/libmisc.h
*/
-boolean Debug = FALSE;
-
struct spu_global spu;
-struct spu_vs_context draw;
-
-
-/**
- * Buffers containing dynamically generated SPU code:
- */
-static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
- ALIGN16_ATTRIB;
-
-
-
-/**
- * Tell the PPU that this SPU has finished copying a buffer to
- * local store and that it may be reused by the PPU.
- * This is done by writting a 16-byte batch-buffer-status block back into
- * main memory (in cell_context->buffer_status[]).
- */
-static void
-release_buffer(uint buffer)
-{
- /* Evidently, using less than a 16-byte status doesn't work reliably */
- static const uint status[4] ALIGN16_ATTRIB
- = {CELL_BUFFER_STATUS_FREE, 0, 0, 0};
-
- const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
- uint *dst = spu.init.buffer_status + index;
-
- ASSERT(buffer < CELL_NUM_BUFFERS);
-
- mfc_put((void *) &status, /* src in local memory */
- (unsigned int) dst, /* dst in main memory */
- sizeof(status), /* size */
- TAG_MISC, /* tag is unimportant */
- 0, /* tid */
- 0 /* rid */);
-}
-
-
-/**
- * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled
- * tiles back to the main framebuffer.
- */
-static void
-really_clear_tiles(uint surfaceIndex)
-{
- const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
- uint i;
-
- if (surfaceIndex == 0) {
- clear_c_tile(&spu.ctile);
-
- for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
- uint tx = i % spu.fb.width_tiles;
- uint ty = i / spu.fb.width_tiles;
- if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) {
- put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
- }
- }
- }
- else {
- clear_z_tile(&spu.ztile);
-
- for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
- uint tx = i % spu.fb.width_tiles;
- uint ty = i / spu.fb.width_tiles;
- if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR)
- put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1);
- }
- }
-
-#if 0
- wait_on_mask(1 << TAG_SURFACE_CLEAR);
-#endif
-}
-
-
-static void
-cmd_clear_surface(const struct cell_command_clear_surface *clear)
-{
- if (Debug)
- printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu.init.id,
- clear->surface, clear->value);
-
- if (clear->surface == 0) {
- spu.fb.color_clear_value = clear->value;
- if (spu.init.debug_flags & CELL_DEBUG_CHECKER) {
- uint x = (spu.init.id << 4) | (spu.init.id << 12) |
- (spu.init.id << 20) | (spu.init.id << 28);
- spu.fb.color_clear_value ^= x;
- }
- }
- else {
- spu.fb.depth_clear_value = clear->value;
- }
-
-#define CLEAR_OPT 1
-#if CLEAR_OPT
-
- /* Simply set all tiles' status to CLEAR.
- * When we actually begin rendering into a tile, we'll initialize it to
- * the clear value. If any tiles go untouched during the frame,
- * really_clear_tiles() will set them to the clear value.
- */
- if (clear->surface == 0) {
- memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
- }
- else {
- memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
- }
-
-#else
-
- /*
- * This path clears the whole framebuffer to the clear color right now.
- */
-
- /*
- printf("SPU: %s num=%d w=%d h=%d\n",
- __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
- */
-
- /* init a single tile to the clear value */
- if (clear->surface == 0) {
- clear_c_tile(&spu.ctile);
- }
- else {
- clear_z_tile(&spu.ztile);
- }
-
- /* walk over my tiles, writing the 'clear' tile's data */
- {
- const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
- uint i;
- for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
- uint tx = i % spu.fb.width_tiles;
- uint ty = i / spu.fb.width_tiles;
- if (clear->surface == 0)
- put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
- else
- put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
- }
- }
-
- if (spu.init.debug_flags & CELL_DEBUG_SYNC) {
- wait_on_mask(1 << TAG_SURFACE_CLEAR);
- }
-
-#endif /* CLEAR_OPT */
-
- if (Debug)
- printf("SPU %u: CLEAR SURF done\n", spu.init.id);
-}
-
-
-static void
-cmd_release_verts(const struct cell_command_release_verts *release)
-{
- if (Debug)
- printf("SPU %u: RELEASE VERTS %u\n",
- spu.init.id, release->vertex_buf);
- ASSERT(release->vertex_buf != ~0U);
- release_buffer(release->vertex_buf);
-}
-
-
-/**
- * Process a CELL_CMD_STATE_FRAGMENT_OPS command.
- * This involves installing new fragment ops SPU code.
- * If this function is never called, we'll use a regular C fallback function
- * for fragment processing.
- */
-static void
-cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
-{
- if (Debug)
- printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id);
- /* Copy SPU code from batch buffer to spu buffer */
- memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
- /* Copy state info (for fallback case only) */
- memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
- memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
-
- /* Point function pointer at new code */
- spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code;
-
- spu.read_depth = spu.depth_stencil_alpha.depth.enabled;
- spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled;
-}
-
-
-static void
-cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
-{
- if (Debug)
- printf("SPU %u: CMD_STATE_FRAGMENT_PROGRAM\n", spu.init.id);
- /* Copy SPU code from batch buffer to spu buffer */
- memcpy(spu.fragment_program_code, fp->code,
- SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
-#if 01
- /* Point function pointer at new code */
- spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
-#endif
-}
-
-
-static void
-cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
-{
- if (Debug)
- printf("SPU %u: FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
- spu.init.id,
- cmd->width,
- cmd->height,
- cmd->color_start,
- cmd->color_format,
- cmd->depth_format);
-
- ASSERT_ALIGN16(cmd->color_start);
- ASSERT_ALIGN16(cmd->depth_start);
-
- spu.fb.color_start = cmd->color_start;
- spu.fb.depth_start = cmd->depth_start;
- spu.fb.color_format = cmd->color_format;
- spu.fb.depth_format = cmd->depth_format;
- spu.fb.width = cmd->width;
- spu.fb.height = cmd->height;
- spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE;
- spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE;
-
- switch (spu.fb.depth_format) {
- case PIPE_FORMAT_Z32_UNORM:
- spu.fb.zsize = 4;
- spu.fb.zscale = (float) 0xffffffffu;
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- case PIPE_FORMAT_S8Z24_UNORM:
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_X8Z24_UNORM:
- spu.fb.zsize = 4;
- spu.fb.zscale = (float) 0x00ffffffu;
- break;
- case PIPE_FORMAT_Z16_UNORM:
- spu.fb.zsize = 2;
- spu.fb.zscale = (float) 0xffffu;
- break;
- default:
- spu.fb.zsize = 0;
- break;
- }
-}
-
-
-static void
-cmd_state_sampler(const struct cell_command_sampler *sampler)
-{
- if (Debug)
- printf("SPU %u: SAMPLER [%u]\n",
- spu.init.id, sampler->unit);
-
- spu.sampler[sampler->unit] = sampler->state;
- if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR)
- spu.sample_texture[sampler->unit] = sample_texture_bilinear;
- else
- spu.sample_texture[sampler->unit] = sample_texture_nearest;
-}
-
-
-static void
-cmd_state_texture(const struct cell_command_texture *texture)
-{
- const uint unit = texture->unit;
- const uint width = texture->width;
- const uint height = texture->height;
-
- if (Debug) {
- printf("SPU %u: TEXTURE [%u] at %p size %u x %u\n", spu.init.id,
- texture->unit, texture->start,
- texture->width, texture->height);
- }
-
- spu.texture[unit].start = texture->start;
- spu.texture[unit].width = width;
- spu.texture[unit].height = height;
-
- spu.texture[unit].tiles_per_row = width / TILE_SIZE;
-
- spu.texture[unit].tex_size = (vector float) { width, height, 0.0, 0.0};
- spu.texture[unit].tex_size_mask = (vector unsigned int)
- { width - 1, height - 1, 0, 0 };
- spu.texture[unit].tex_size_x_mask = spu_splats(width - 1);
- spu.texture[unit].tex_size_y_mask = spu_splats(height - 1);
-}
-
-
-static void
-cmd_state_vertex_info(const struct vertex_info *vinfo)
-{
- if (Debug) {
- printf("SPU %u: VERTEX_INFO num_attribs=%u\n", spu.init.id,
- vinfo->num_attribs);
- }
- ASSERT(vinfo->num_attribs >= 1);
- ASSERT(vinfo->num_attribs <= 8);
- memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo));
-}
-
-
-static void
-cmd_state_vs_array_info(const struct cell_array_info *vs_info)
-{
- const unsigned attr = vs_info->attr;
-
- ASSERT(attr < PIPE_MAX_ATTRIBS);
- draw.vertex_fetch.src_ptr[attr] = vs_info->base;
- draw.vertex_fetch.pitch[attr] = vs_info->pitch;
- draw.vertex_fetch.size[attr] = vs_info->size;
- draw.vertex_fetch.code_offset[attr] = vs_info->function_offset;
- draw.vertex_fetch.dirty = 1;
-}
-
-
-static void
-cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
-{
- mfc_get(attribute_fetch_code_buffer,
- (unsigned int) code->base, /* src */
- code->size,
- TAG_BATCH_BUFFER,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask(1 << TAG_BATCH_BUFFER);
-
- draw.vertex_fetch.code = attribute_fetch_code_buffer;
-}
-
-
-static void
-cmd_finish(void)
-{
- if (Debug)
- printf("SPU %u: FINISH\n", spu.init.id);
- really_clear_tiles(0);
- /* wait for all outstanding DMAs to finish */
- mfc_write_tag_mask(~0);
- mfc_read_tag_status_all();
- /* send mbox message to PPU */
- spu_write_out_mbox(CELL_CMD_FINISH);
-}
-
-
-/**
- * Execute a batch of commands which was sent to us by the PPU.
- * See the cell_emit_state.c code to see where the commands come from.
- *
- * The opcode param encodes the location of the buffer and its size.
- */
-static void
-cmd_batch(uint opcode)
-{
- const uint buf = (opcode >> 8) & 0xff;
- uint size = (opcode >> 16);
- uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB;
- const unsigned usize = size / sizeof(buffer[0]);
- uint pos;
-
- if (Debug)
- printf("SPU %u: BATCH buffer %u, len %u, from %p\n",
- spu.init.id, buf, size, spu.init.buffers[buf]);
-
- ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH);
-
- ASSERT_ALIGN16(spu.init.buffers[buf]);
-
- size = ROUNDUP16(size);
-
- ASSERT_ALIGN16(spu.init.buffers[buf]);
-
- mfc_get(buffer, /* dest */
- (unsigned int) spu.init.buffers[buf], /* src */
- size,
- TAG_BATCH_BUFFER,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask(1 << TAG_BATCH_BUFFER);
-
- /* Tell PPU we're done copying the buffer to local store */
- if (Debug)
- printf("SPU %u: release batch buf %u\n", spu.init.id, buf);
- release_buffer(buf);
-
- /*
- * Loop over commands in the batch buffer
- */
- for (pos = 0; pos < usize; /* no incr */) {
- switch (buffer[pos]) {
- /*
- * rendering commands
- */
- case CELL_CMD_CLEAR_SURFACE:
- {
- struct cell_command_clear_surface *clr
- = (struct cell_command_clear_surface *) &buffer[pos];
- cmd_clear_surface(clr);
- pos += sizeof(*clr) / 8;
- }
- break;
- case CELL_CMD_RENDER:
- {
- struct cell_command_render *render
- = (struct cell_command_render *) &buffer[pos];
- uint pos_incr;
- cmd_render(render, &pos_incr);
- pos += pos_incr;
- }
- break;
- /*
- * state-update commands
- */
- case CELL_CMD_STATE_FRAMEBUFFER:
- {
- struct cell_command_framebuffer *fb
- = (struct cell_command_framebuffer *) &buffer[pos];
- cmd_state_framebuffer(fb);
- pos += sizeof(*fb) / 8;
- }
- break;
- case CELL_CMD_STATE_FRAGMENT_OPS:
- {
- struct cell_command_fragment_ops *fops
- = (struct cell_command_fragment_ops *) &buffer[pos];
- cmd_state_fragment_ops(fops);
- pos += sizeof(*fops) / 8;
- }
- break;
- case CELL_CMD_STATE_FRAGMENT_PROGRAM:
- {
- struct cell_command_fragment_program *fp
- = (struct cell_command_fragment_program *) &buffer[pos];
- cmd_state_fragment_program(fp);
- pos += sizeof(*fp) / 8;
- }
- break;
- case CELL_CMD_STATE_SAMPLER:
- {
- struct cell_command_sampler *sampler
- = (struct cell_command_sampler *) &buffer[pos];
- cmd_state_sampler(sampler);
- pos += sizeof(*sampler) / 8;
- }
- break;
- case CELL_CMD_STATE_TEXTURE:
- {
- struct cell_command_texture *texture
- = (struct cell_command_texture *) &buffer[pos];
- cmd_state_texture(texture);
- pos += sizeof(*texture) / 8;
- }
- break;
- case CELL_CMD_STATE_VERTEX_INFO:
- cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]);
- pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8);
- break;
- case CELL_CMD_STATE_VIEWPORT:
- (void) memcpy(& draw.viewport, &buffer[pos+1],
- sizeof(struct pipe_viewport_state));
- pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8);
- break;
- case CELL_CMD_STATE_UNIFORMS:
- draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1];
- pos += 2;
- break;
- case CELL_CMD_STATE_VS_ARRAY_INFO:
- cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]);
- pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8);
- break;
- case CELL_CMD_STATE_BIND_VS:
-#if 0
- spu_bind_vertex_shader(&draw,
- (struct cell_shader_info *) &buffer[pos+1]);
-#endif
- pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8);
- break;
- case CELL_CMD_STATE_ATTRIB_FETCH:
- cmd_state_attrib_fetch((struct cell_attribute_fetch_code *)
- &buffer[pos+1]);
- pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8);
- break;
- /*
- * misc commands
- */
- case CELL_CMD_FINISH:
- cmd_finish();
- pos += 1;
- break;
- case CELL_CMD_RELEASE_VERTS:
- {
- struct cell_command_release_verts *release
- = (struct cell_command_release_verts *) &buffer[pos];
- cmd_release_verts(release);
- pos += sizeof(*release) / 8;
- }
- break;
- case CELL_CMD_FLUSH_BUFFER_RANGE: {
- struct cell_buffer_range *br = (struct cell_buffer_range *)
- &buffer[pos+1];
-
- spu_dcache_mark_dirty((unsigned) br->base, br->size);
- pos += (1 + ROUNDUP8(sizeof(struct cell_buffer_range)) / 8);
- break;
- }
- default:
- printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]);
- ASSERT(0);
- break;
- }
- }
-
- if (Debug)
- printf("SPU %u: BATCH complete\n", spu.init.id);
-}
-
-
-/**
- * Temporary/simple main loop for SPEs: Get a command, execute it, repeat.
- */
-static void
-main_loop(void)
-{
- struct cell_command cmd;
- int exitFlag = 0;
-
- if (Debug)
- printf("SPU %u: Enter main loop\n", spu.init.id);
-
- ASSERT((sizeof(struct cell_command) & 0xf) == 0);
- ASSERT_ALIGN16(&cmd);
-
- while (!exitFlag) {
- unsigned opcode;
- int tag = 0;
-
- if (Debug)
- printf("SPU %u: Wait for cmd...\n", spu.init.id);
-
- /* read/wait from mailbox */
- opcode = (unsigned int) spu_read_in_mbox();
-
- if (Debug)
- printf("SPU %u: got cmd 0x%x\n", spu.init.id, opcode);
-
- /* command payload */
- mfc_get(&cmd, /* dest */
- (unsigned int) spu.init.cmd, /* src */
- sizeof(struct cell_command), /* bytes */
- tag,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask( 1 << tag );
-
- /*
- * NOTE: most commands should be contained in a batch buffer
- */
-
- switch (opcode & CELL_CMD_OPCODE_MASK) {
- case CELL_CMD_EXIT:
- if (Debug)
- printf("SPU %u: EXIT\n", spu.init.id);
- exitFlag = 1;
- break;
- case CELL_CMD_VS_EXECUTE:
-#if 0
- spu_execute_vertex_shader(&draw, &cmd.vs);
-#endif
- break;
- case CELL_CMD_BATCH:
- cmd_batch(opcode);
- break;
- default:
- printf("Bad opcode!\n");
- }
-
- }
-
- if (Debug)
- printf("SPU %u: Exit main loop\n", spu.init.id);
-
- spu_dcache_report();
-}
-
-
static void
one_time_init(void)
@@ -651,15 +58,8 @@ one_time_init(void)
memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status));
memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status));
invalidate_tex_cache();
-
- /* Install default/fallback fragment processing function.
- * This will normally be overriden by a code-gen'd function.
- */
- spu.fragment_ops = spu_fallback_fragment_ops;
}
-
-
/* In some versions of the SDK the SPE main takes 'unsigned long' as a
* parameter. In others it takes 'unsigned long long'. Use a define to
* select between the two.
@@ -682,12 +82,16 @@ main(main_param_t speid, main_param_t argp)
ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4);
ASSERT(sizeof(struct cell_command_render) % 8 == 0);
+ ASSERT(sizeof(struct cell_command_fragment_ops) % 8 == 0);
+ ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0);
one_time_init();
+ spu_command_init();
- if (Debug)
- printf("SPU: main() speid=%lu\n", (unsigned long) speid);
+ D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid);
+ D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n");
+ /* get initialization data */
mfc_get(&spu.init, /* dest */
(unsigned int) argp, /* src */
sizeof(struct cell_init_info), /* bytes */
@@ -696,12 +100,18 @@ main(main_param_t speid, main_param_t argp)
0 /* rid */);
wait_on_mask( 1 << tag );
+ if (spu.init.id == 0) {
+ return_function_info();
+ }
+
#if 0
if (spu.init.id==0)
- spu_test_misc();
+ spu_test_misc(spu.init.id);
#endif
- main_loop();
+ command_loop();
+
+ spu_command_close();
return 0;
}
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index 2c7b625840..33767e7c51 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -36,9 +36,18 @@
#include "pipe/p_state.h"
-
-#define MAX_WIDTH 1024
-#define MAX_HEIGHT 1024
+#if DEBUG
+/* These debug macros use the unusual construction ", ##__VA_ARGS__"
+ * which expands to the expected comma + args if variadic arguments
+ * are supplied, but swallows the comma if there are no variadic
+ * arguments (which avoids syntax errors that would otherwise occur).
+ */
+#define D_PRINTF(flag, format,...) \
+ if (spu.init.debug_flags & (flag)) \
+ printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__)
+#else
+#define D_PRINTF(...)
+#endif
/**
@@ -61,8 +70,11 @@ typedef union {
/** Function for sampling textures */
-typedef vector float (*spu_sample_texture_func)(uint unit,
- vector float texcoord);
+typedef void (*spu_sample_texture_2d_func)(vector float s,
+ vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
+
/** Function for performing per-fragment ops */
typedef void (*spu_fragment_ops_func)(uint x, uint y,
@@ -76,9 +88,9 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y,
vector unsigned int mask);
/** Function for running fragment program */
-typedef void (*spu_fragment_program_func)(vector float *inputs,
- vector float *outputs,
- vector float *constants);
+typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs,
+ vector float *outputs,
+ vector float *constants);
struct spu_framebuffer
@@ -98,15 +110,27 @@ struct spu_framebuffer
} ALIGN16_ATTRIB;
-struct spu_texture
+/** per-texture level info */
+struct spu_texture_level
{
void *start;
- ushort width, height;
+ ushort width, height, depth;
ushort tiles_per_row;
- vector float tex_size;
- vector unsigned int tex_size_mask; /**< == int(size - 1) */
- vector unsigned int tex_size_x_mask; /**< == int(size - 1) */
- vector unsigned int tex_size_y_mask; /**< == int(size - 1) */
+ uint bytes_per_image;
+ /** texcoord scale factors */
+ vector float scale_s, scale_t, scale_r;
+ /** texcoord masks (if REPEAT then size-1, else ~0) */
+ vector signed int mask_s, mask_t, mask_r;
+ /** texcoord clamp limits */
+ vector signed int max_s, max_t, max_r;
+} ALIGN16_ATTRIB;
+
+
+struct spu_texture
+{
+ struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS];
+ uint max_level;
+ uint target; /**< PIPE_TEXTURE_x */
} ALIGN16_ATTRIB;
@@ -124,7 +148,9 @@ struct spu_global
struct spu_framebuffer fb;
struct pipe_depth_stencil_alpha_state depth_stencil_alpha;
struct pipe_blend_state blend;
+ struct pipe_blend_color blend_color;
struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
+ struct pipe_rasterizer_state rasterizer;
struct spu_texture texture[PIPE_MAX_SAMPLERS];
struct vertex_info vertex_info;
@@ -133,39 +159,38 @@ struct spu_global
tile_t ztile ALIGN16_ATTRIB;
/** Read depth/stencil tiles? */
- boolean read_depth;
- boolean read_stencil;
+ boolean read_depth_stencil;
/** Current tiles' status */
ubyte cur_ctile_status, cur_ztile_status;
/** Status of all tiles in framebuffer */
- ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
- ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
+ ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
+ ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
- /** Current fragment ops machine code */
- uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS];
- /** Current fragment ops function */
- spu_fragment_ops_func fragment_ops;
+ /** Current fragment ops machine code, at 8-byte boundary */
+ uint *fragment_ops_code;
+ uint fragment_ops_code_size;
+ /** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */
+ spu_fragment_ops_func fragment_ops[2];
- /** Current fragment program machine code */
- uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
+ /** Current fragment program machine code, at 8-byte boundary */
+ uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB;
/** Current fragment ops function */
spu_fragment_program_func fragment_program;
/** Current texture sampler function */
- spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS];
+ spu_sample_texture_2d_func sample_texture_2d[CELL_MAX_SAMPLERS];
+ spu_sample_texture_2d_func min_sample_texture_2d[CELL_MAX_SAMPLERS];
+ spu_sample_texture_2d_func mag_sample_texture_2d[CELL_MAX_SAMPLERS];
- /** Fragment program constants (XXX preliminary/used) */
-#define MAX_CONSTANTS 32
- vector float constants[MAX_CONSTANTS];
+ /** Fragment program constants */
+ vector float constants[4 * CELL_MAX_CONSTANTS];
} ALIGN16_ATTRIB;
extern struct spu_global spu;
-extern boolean Debug;
-
@@ -184,7 +209,7 @@ extern boolean Debug;
#define TAG_DCACHE1 21
#define TAG_DCACHE2 22
#define TAG_DCACHE3 23
-
+#define TAG_FENCE 24
static INLINE void
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
index 03dd547845..eba9f95cf1 100644
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
@@ -40,6 +40,24 @@
#define LINEAR_QUAD_LAYOUT 1
+static INLINE vector float
+spu_min(vector float a, vector float b)
+{
+ vector unsigned int m;
+ m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */
+ return spu_sel(a, b, m);
+}
+
+
+static INLINE vector float
+spu_max(vector float a, vector float b)
+{
+ vector unsigned int m;
+ m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */
+ return spu_sel(b, a, m);
+}
+
+
/**
* Called by rasterizer for each quad after the shader has run. Do
* all the per-fragment operations including alpha test, z test,
@@ -60,11 +78,14 @@ spu_fallback_fragment_ops(uint x, uint y,
vector unsigned int mask)
{
vector float frag_aos[4];
- unsigned int c0, c1, c2, c3;
+ unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */
+ unsigned int fragc0, fragc1, fragc2, fragc3; /* fragment colors */
- /* do alpha test */
+ /*
+ * Do alpha test
+ */
if (spu.depth_stencil_alpha.alpha.enabled) {
- vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref);
+ vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref_value);
vector unsigned int amask;
switch (spu.depth_stencil_alpha.alpha.func) {
@@ -102,7 +123,10 @@ spu_fallback_fragment_ops(uint x, uint y,
mask = spu_and(mask, amask);
}
- /* Z and/or stencil testing... */
+
+ /*
+ * Z and/or stencil testing...
+ */
if (spu.depth_stencil_alpha.depth.enabled ||
spu.depth_stencil_alpha.stencil[0].enabled) {
@@ -178,6 +202,32 @@ spu_fallback_fragment_ops(uint x, uint y,
}
}
+
+ /*
+ * If we'll need the current framebuffer/tile colors for blending
+ * or logicop or colormask, fetch them now.
+ */
+ if (spu.blend.blend_enable ||
+ spu.blend.logicop_enable ||
+ spu.blend.colormask != 0xf) {
+
+#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
+ fbc0 = colorTile->ui[y][x*2+0];
+ fbc1 = colorTile->ui[y][x*2+1];
+ fbc2 = colorTile->ui[y][x*2+2];
+ fbc3 = colorTile->ui[y][x*2+3];
+#else
+ fbc0 = colorTile->ui[y+0][x+0];
+ fbc1 = colorTile->ui[y+0][x+1];
+ fbc2 = colorTile->ui[y+1][x+0];
+ fbc3 = colorTile->ui[y+1][x+1];
+#endif
+ }
+
+
+ /*
+ * Do blending
+ */
if (spu.blend.blend_enable) {
/* blending terms, misc regs */
vector float term1r, term1g, term1b, term1a;
@@ -186,43 +236,30 @@ spu_fallback_fragment_ops(uint x, uint y,
vector float fbRGBA[4]; /* current framebuffer colors */
- /* get colors from framebuffer/tile */
+ /* convert framebuffer colors from packed int to vector float */
{
- vector float fc[4];
- uint c0, c1, c2, c3;
-
-#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
- c0 = colorTile->ui[y][x*2+0];
- c1 = colorTile->ui[y][x*2+1];
- c2 = colorTile->ui[y][x*2+2];
- c3 = colorTile->ui[y][x*2+3];
-#else
- c0 = colorTile->ui[y+0][x+0];
- c1 = colorTile->ui[y+0][x+1];
- c2 = colorTile->ui[y+1][x+0];
- c3 = colorTile->ui[y+1][x+1];
-#endif
+ vector float temp[4]; /* float colors in AOS form */
switch (spu.fb.color_format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
- fc[0] = spu_unpack_B8G8R8A8(c0);
- fc[1] = spu_unpack_B8G8R8A8(c1);
- fc[2] = spu_unpack_B8G8R8A8(c2);
- fc[3] = spu_unpack_B8G8R8A8(c3);
+ temp[0] = spu_unpack_B8G8R8A8(fbc0);
+ temp[1] = spu_unpack_B8G8R8A8(fbc1);
+ temp[2] = spu_unpack_B8G8R8A8(fbc2);
+ temp[3] = spu_unpack_B8G8R8A8(fbc3);
break;
case PIPE_FORMAT_A8R8G8B8_UNORM:
- fc[0] = spu_unpack_A8R8G8B8(c0);
- fc[1] = spu_unpack_A8R8G8B8(c1);
- fc[2] = spu_unpack_A8R8G8B8(c2);
- fc[3] = spu_unpack_A8R8G8B8(c3);
+ temp[0] = spu_unpack_A8R8G8B8(fbc0);
+ temp[1] = spu_unpack_A8R8G8B8(fbc1);
+ temp[2] = spu_unpack_A8R8G8B8(fbc2);
+ temp[3] = spu_unpack_A8R8G8B8(fbc3);
break;
default:
ASSERT(0);
}
- _transpose_matrix4x4(fbRGBA, fc);
+ _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */
}
/*
- * Compute Src RGB terms
+ * Compute Src RGB terms (fragment color * factor)
*/
switch (spu.blend.rgb_src_factor) {
case PIPE_BLENDFACTOR_ONE:
@@ -245,13 +282,33 @@ spu_fallback_fragment_ops(uint x, uint y,
term1g = spu_mul(fragG, fragA);
term1b = spu_mul(fragB, fragA);
break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ term1r = spu_mul(fragR, fbRGBA[0]);
+ term1g = spu_mul(fragG, fbRGBA[1]);
+ term1b = spu_mul(fragB, fbRGBA[1]);
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term1r = spu_mul(fragR, fbRGBA[3]);
+ term1g = spu_mul(fragG, fbRGBA[3]);
+ term1b = spu_mul(fragB, fbRGBA[3]);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[0]));
+ term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[1]));
+ term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[2]));
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[3]));
+ term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[3]));
+ term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[3]));
+ break;
/* XXX more cases */
default:
ASSERT(0);
}
/*
- * Compute Src Alpha term
+ * Compute Src Alpha term (fragment alpha * factor)
*/
switch (spu.blend.alpha_src_factor) {
case PIPE_BLENDFACTOR_ONE:
@@ -263,19 +320,29 @@ spu_fallback_fragment_ops(uint x, uint y,
case PIPE_BLENDFACTOR_SRC_ALPHA:
term1a = spu_mul(fragA, fragA);
break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term1a = spu_mul(fragA, fbRGBA[3]);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term1a = spu_mul(fragR, spu_splats(spu.blend_color.color[3]));
+ break;
/* XXX more cases */
default:
ASSERT(0);
}
/*
- * Compute Dest RGB terms
+ * Compute Dest RGB terms (framebuffer color * factor)
*/
switch (spu.blend.rgb_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
- term2r = fragR;
- term2g = fragG;
- term2b = fragB;
+ term2r = fbRGBA[0];
+ term2g = fbRGBA[1];
+ term2b = fbRGBA[2];
break;
case PIPE_BLENDFACTOR_ZERO:
term2r =
@@ -299,17 +366,37 @@ spu_fallback_fragment_ops(uint x, uint y,
term2g = spu_mul(fbRGBA[1], tmp);
term2b = spu_mul(fbRGBA[2], tmp);
break;
- /* XXX more cases */
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ term2r = spu_mul(fbRGBA[0], fbRGBA[0]);
+ term2g = spu_mul(fbRGBA[1], fbRGBA[1]);
+ term2b = spu_mul(fbRGBA[2], fbRGBA[2]);
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term2r = spu_mul(fbRGBA[0], fbRGBA[3]);
+ term2g = spu_mul(fbRGBA[1], fbRGBA[3]);
+ term2b = spu_mul(fbRGBA[2], fbRGBA[3]);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[0]));
+ term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[1]));
+ term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[2]));
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[3]));
+ term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[3]));
+ term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[3]));
+ break;
+ /* XXX more cases */
default:
ASSERT(0);
}
/*
- * Compute Dest Alpha term
+ * Compute Dest Alpha term (framebuffer alpha * factor)
*/
switch (spu.blend.alpha_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
- term2a = fragA;
+ term2a = fbRGBA[3];
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
term2a = spu_splats(0.0f);
@@ -322,6 +409,16 @@ spu_fallback_fragment_ops(uint x, uint y,
tmp = spu_sub(one, fragA);
term2a = spu_mul(fbRGBA[3], tmp);
break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term2a = spu_mul(fbRGBA[3], fbRGBA[3]);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term2a = spu_mul(fbRGBA[3], spu_splats(spu.blend_color.color[3]));
+ break;
/* XXX more cases */
default:
ASSERT(0);
@@ -341,7 +438,21 @@ spu_fallback_fragment_ops(uint x, uint y,
fragG = spu_sub(term1g, term2g);
fragB = spu_sub(term1b, term2b);
break;
- /* XXX more cases */
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ fragR = spu_sub(term2r, term1r);
+ fragG = spu_sub(term2g, term1g);
+ fragB = spu_sub(term2b, term1b);
+ break;
+ case PIPE_BLEND_MIN:
+ fragR = spu_min(term1r, term2r);
+ fragG = spu_min(term1g, term2g);
+ fragB = spu_min(term1b, term2b);
+ break;
+ case PIPE_BLEND_MAX:
+ fragR = spu_max(term1r, term2r);
+ fragG = spu_max(term1g, term2g);
+ fragB = spu_max(term1b, term2b);
+ break;
default:
ASSERT(0);
}
@@ -356,7 +467,15 @@ spu_fallback_fragment_ops(uint x, uint y,
case PIPE_BLEND_SUBTRACT:
fragA = spu_sub(term1a, term2a);
break;
- /* XXX more cases */
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ fragA = spu_sub(term2a, term1a);
+ break;
+ case PIPE_BLEND_MIN:
+ fragA = spu_min(term1a, term2a);
+ break;
+ case PIPE_BLEND_MAX:
+ fragA = spu_max(term1a, term2a);
+ break;
default:
ASSERT(0);
}
@@ -384,21 +503,20 @@ spu_fallback_fragment_ops(uint x, uint y,
#endif
/*
- * Pack float colors into 32-bit RGBA words.
+ * Pack fragment float colors into 32-bit RGBA words.
*/
switch (spu.fb.color_format) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
- c0 = spu_pack_A8R8G8B8(frag_aos[0]);
- c1 = spu_pack_A8R8G8B8(frag_aos[1]);
- c2 = spu_pack_A8R8G8B8(frag_aos[2]);
- c3 = spu_pack_A8R8G8B8(frag_aos[3]);
+ fragc0 = spu_pack_A8R8G8B8(frag_aos[0]);
+ fragc1 = spu_pack_A8R8G8B8(frag_aos[1]);
+ fragc2 = spu_pack_A8R8G8B8(frag_aos[2]);
+ fragc3 = spu_pack_A8R8G8B8(frag_aos[3]);
break;
-
case PIPE_FORMAT_B8G8R8A8_UNORM:
- c0 = spu_pack_B8G8R8A8(frag_aos[0]);
- c1 = spu_pack_B8G8R8A8(frag_aos[1]);
- c2 = spu_pack_B8G8R8A8(frag_aos[2]);
- c3 = spu_pack_B8G8R8A8(frag_aos[3]);
+ fragc0 = spu_pack_B8G8R8A8(frag_aos[0]);
+ fragc1 = spu_pack_B8G8R8A8(frag_aos[1]);
+ fragc2 = spu_pack_B8G8R8A8(frag_aos[2]);
+ fragc3 = spu_pack_B8G8R8A8(frag_aos[3]);
break;
default:
fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n");
@@ -407,20 +525,57 @@ spu_fallback_fragment_ops(uint x, uint y,
/*
- * Color masking
+ * Do color masking
*/
if (spu.blend.colormask != 0xf) {
- /* XXX to do */
- /* apply color mask to 32-bit packed colors */
+ uint cmask = 0x0; /* each byte corresponds to a color channel */
+
+ /* Form bitmask depending on color buffer format and colormask bits */
+ switch (spu.fb.color_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ if (spu.blend.colormask & PIPE_MASK_R)
+ cmask |= 0x00ff0000; /* red */
+ if (spu.blend.colormask & PIPE_MASK_G)
+ cmask |= 0x0000ff00; /* green */
+ if (spu.blend.colormask & PIPE_MASK_B)
+ cmask |= 0x000000ff; /* blue */
+ if (spu.blend.colormask & PIPE_MASK_A)
+ cmask |= 0xff000000; /* alpha */
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ if (spu.blend.colormask & PIPE_MASK_R)
+ cmask |= 0x0000ff00; /* red */
+ if (spu.blend.colormask & PIPE_MASK_G)
+ cmask |= 0x00ff0000; /* green */
+ if (spu.blend.colormask & PIPE_MASK_B)
+ cmask |= 0xff000000; /* blue */
+ if (spu.blend.colormask & PIPE_MASK_A)
+ cmask |= 0x000000ff; /* alpha */
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Apply color mask to the 32-bit packed colors.
+ * if (cmask[i])
+ * frag color[i] = frag color[i];
+ * else
+ * frag color[i] = framebuffer color[i];
+ */
+ fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask);
+ fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask);
+ fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask);
+ fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask);
}
/*
- * Logic Ops
+ * Do logic ops
*/
if (spu.blend.logicop_enable) {
/* XXX to do */
- /* apply logicop to 32-bit packed colors */
+ /* apply logicop to 32-bit packed colors (fragcx and fbcx) */
}
@@ -431,45 +586,46 @@ spu_fallback_fragment_ops(uint x, uint y,
spu.cur_ctile_status = TILE_STATUS_DIRTY;
}
else {
+ /* write no fragments */
return;
}
/*
- * Write new quad colors to the framebuffer/tile.
+ * Write new fragment/quad colors to the framebuffer/tile.
* Only write pixels where the corresponding mask word is set.
*/
#if LINEAR_QUAD_LAYOUT
/*
* Quad layout:
* +--+--+--+--+
- * |p0|p1|p2|p3|
+ * |p0|p1|p2|p3|...
* +--+--+--+--+
*/
if (spu_extract(mask, 0))
- colorTile->ui[y][x*2] = c0;
+ colorTile->ui[y][x*2] = fragc0;
if (spu_extract(mask, 1))
- colorTile->ui[y][x*2+1] = c1;
+ colorTile->ui[y][x*2+1] = fragc1;
if (spu_extract(mask, 2))
- colorTile->ui[y][x*2+2] = c2;
+ colorTile->ui[y][x*2+2] = fragc2;
if (spu_extract(mask, 3))
- colorTile->ui[y][x*2+3] = c3;
+ colorTile->ui[y][x*2+3] = fragc3;
#else
/*
* Quad layout:
* +--+--+
- * |p0|p1|
+ * |p0|p1|...
* +--+--+
- * |p2|p3|
+ * |p2|p3|...
* +--+--+
*/
if (spu_extract(mask, 0))
- colorTile->ui[y+0][x+0] = c0;
+ colorTile->ui[y+0][x+0] = fragc0;
if (spu_extract(mask, 1))
- colorTile->ui[y+0][x+1] = c1;
+ colorTile->ui[y+0][x+1] = fragc1;
if (spu_extract(mask, 2))
- colorTile->ui[y+1][x+0] = c2;
+ colorTile->ui[y+1][x+0] = fragc2;
if (spu_extract(mask, 3))
- colorTile->ui[y+1][x+1] = c3;
+ colorTile->ui[y+1][x+1] = fragc3;
#endif
}
diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c
index 305dc98881..7c225e2f27 100644
--- a/src/gallium/drivers/cell/spu/spu_render.c
+++ b/src/gallium/drivers/cell/spu/spu_render.c
@@ -98,7 +98,7 @@ my_tile(uint tx, uint ty)
static INLINE void
get_cz_tiles(uint tx, uint ty)
{
- if (spu.read_depth) {
+ if (spu.read_depth_stencil) {
if (spu.cur_ztile_status != TILE_STATUS_CLEAR) {
//printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty);
get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1);
@@ -153,7 +153,7 @@ static INLINE void
wait_put_cz_tiles(void)
{
wait_on_mask(1 << TAG_WRITE_TILE_COLOR);
- if (spu.read_depth) {
+ if (spu.read_depth_stencil) {
wait_on_mask(1 << TAG_WRITE_TILE_Z);
}
}
@@ -175,22 +175,14 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
const ubyte *vertices;
const ushort *indexes;
uint i, j;
+ uint num_tiles;
-
- if (Debug) {
- printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u "
- "inline_vert=%u\n",
- spu.init.id,
- render->prim_type,
- render->num_verts,
- render->num_indexes,
- render->inline_verts);
-
- /*
- printf(" bound: %g, %g .. %g, %g\n",
- render->xmin, render->ymin, render->xmax, render->ymax);
- */
- }
+ D_PRINTF(CELL_DEBUG_CMD,
+ "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n",
+ render->prim_type,
+ render->num_verts,
+ render->num_indexes,
+ render->inline_verts);
ASSERT(sizeof(*render) % 4 == 0);
ASSERT(total_vertex_bytes % 16 == 0);
@@ -251,6 +243,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */
+ num_tiles = 0;
+
/**
** loop over tiles, rendering tris
**/
@@ -264,6 +258,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
if (!my_tile(tx, ty))
continue;
+ num_tiles++;
+
spu.cur_ctile_status = spu.ctile_status[ty][tx];
spu.cur_ztile_status = spu.ztile_status[ty][tx];
@@ -293,9 +289,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
spu.ztile_status[ty][tx] = spu.cur_ztile_status;
}
- if (Debug)
- printf("SPU %u: RENDER done\n",
- spu.init.id);
+ D_PRINTF(CELL_DEBUG_CMD,
+ "RENDER done (%u tiles hit)\n",
+ num_tiles);
}
-
-
diff --git a/src/gallium/drivers/cell/spu/spu_shuffle.h b/src/gallium/drivers/cell/spu/spu_shuffle.h
new file mode 100644
index 0000000000..74f2a0b6d2
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_shuffle.h
@@ -0,0 +1,186 @@
+#ifndef SPU_SHUFFLE_H
+#define SPU_SHUFFLE_H
+
+/*
+ * Generate shuffle patterns with minimal fuss.
+ *
+ * Based on ideas from
+ * http://www.insomniacgames.com/tech/articles/0408/files/shuffles.pdf
+ *
+ * A-P indicates 0-15th position in first vector
+ * a-p indicates 0-15th position in second vector
+ *
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * |00|01|02|03|04|05|06|07|08|09|0a|0b|0c|0d|0e|0f|
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * | A| B| C| D|
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | A| B| C| D| E| F| G| H|
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * | A| B| C| D| E| F| G| H| I| J| K| L| M| N| O| P|
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ *
+ * x or X indicates 0xff
+ * 8 indicates 0x80
+ * 0 indicates 0x00
+ *
+ * The macros SHUFFLE4() SHUFFLE8() and SHUFFLE16() provide a const vector
+ * unsigned char literal suitable for use with spu_shuffle().
+ *
+ * The macros SHUFB4() SHUFB8() and SHUFB16() provide a const qword vector
+ * literal suitable for use with si_shufb().
+ *
+ *
+ * For example :
+ * SHUFB4(A,A,A,A)
+ * expands to :
+ * ((const qword){0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3})
+ *
+ * SHUFFLE8(A,B,a,b,C,c,8,8)
+ * expands to :
+ * ((const vector unsigned char){0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13,
+ * 0x04,0x05,0x14,0x15,0xe0,0xe0,0xe0,0xe0})
+ *
+ */
+
+#include <spu_intrinsics.h>
+
+#define SHUFFLE_PATTERN_4_A__ 0x00, 0x01, 0x02, 0x03
+#define SHUFFLE_PATTERN_4_B__ 0x04, 0x05, 0x06, 0x07
+#define SHUFFLE_PATTERN_4_C__ 0x08, 0x09, 0x0a, 0x0b
+#define SHUFFLE_PATTERN_4_D__ 0x0c, 0x0d, 0x0e, 0x0f
+#define SHUFFLE_PATTERN_4_a__ 0x10, 0x11, 0x12, 0x13
+#define SHUFFLE_PATTERN_4_b__ 0x14, 0x15, 0x16, 0x17
+#define SHUFFLE_PATTERN_4_c__ 0x18, 0x19, 0x1a, 0x1b
+#define SHUFFLE_PATTERN_4_d__ 0x1c, 0x1d, 0x1e, 0x1f
+#define SHUFFLE_PATTERN_4_X__ 0xc0, 0xc0, 0xc0, 0xc0
+#define SHUFFLE_PATTERN_4_x__ 0xc0, 0xc0, 0xc0, 0xc0
+#define SHUFFLE_PATTERN_4_0__ 0x80, 0x80, 0x80, 0x80
+#define SHUFFLE_PATTERN_4_8__ 0xe0, 0xe0, 0xe0, 0xe0
+
+#define SHUFFLE_VECTOR_4__(A, B, C, D) \
+ SHUFFLE_PATTERN_4_##A##__, \
+ SHUFFLE_PATTERN_4_##B##__, \
+ SHUFFLE_PATTERN_4_##C##__, \
+ SHUFFLE_PATTERN_4_##D##__
+
+#define SHUFFLE4(A, B, C, D) \
+ ((const vector unsigned char){ \
+ SHUFFLE_VECTOR_4__(A, B, C, D) \
+ })
+
+#define SHUFB4(A, B, C, D) \
+ ((const qword){ \
+ SHUFFLE_VECTOR_4__(A, B, C, D) \
+ })
+
+
+#define SHUFFLE_PATTERN_8_A__ 0x00, 0x01
+#define SHUFFLE_PATTERN_8_B__ 0x02, 0x03
+#define SHUFFLE_PATTERN_8_C__ 0x04, 0x05
+#define SHUFFLE_PATTERN_8_D__ 0x06, 0x07
+#define SHUFFLE_PATTERN_8_E__ 0x08, 0x09
+#define SHUFFLE_PATTERN_8_F__ 0x0a, 0x0b
+#define SHUFFLE_PATTERN_8_G__ 0x0c, 0x0d
+#define SHUFFLE_PATTERN_8_H__ 0x0e, 0x0f
+#define SHUFFLE_PATTERN_8_a__ 0x10, 0x11
+#define SHUFFLE_PATTERN_8_b__ 0x12, 0x13
+#define SHUFFLE_PATTERN_8_c__ 0x14, 0x15
+#define SHUFFLE_PATTERN_8_d__ 0x16, 0x17
+#define SHUFFLE_PATTERN_8_e__ 0x18, 0x19
+#define SHUFFLE_PATTERN_8_f__ 0x1a, 0x1b
+#define SHUFFLE_PATTERN_8_g__ 0x1c, 0x1d
+#define SHUFFLE_PATTERN_8_h__ 0x1e, 0x1f
+#define SHUFFLE_PATTERN_8_X__ 0xc0, 0xc0
+#define SHUFFLE_PATTERN_8_x__ 0xc0, 0xc0
+#define SHUFFLE_PATTERN_8_0__ 0x80, 0x80
+#define SHUFFLE_PATTERN_8_8__ 0xe0, 0xe0
+
+
+#define SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
+ SHUFFLE_PATTERN_8_##A##__, \
+ SHUFFLE_PATTERN_8_##B##__, \
+ SHUFFLE_PATTERN_8_##C##__, \
+ SHUFFLE_PATTERN_8_##D##__, \
+ SHUFFLE_PATTERN_8_##E##__, \
+ SHUFFLE_PATTERN_8_##F##__, \
+ SHUFFLE_PATTERN_8_##G##__, \
+ SHUFFLE_PATTERN_8_##H##__
+
+#define SHUFFLE8(A, B, C, D, E, F, G, H) \
+ ((const vector unsigned char){ \
+ SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
+ })
+
+#define SHUFB8(A, B, C, D, E, F, G, H) \
+ ((const qword){ \
+ SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
+ })
+
+
+#define SHUFFLE_PATTERN_16_A__ 0x00
+#define SHUFFLE_PATTERN_16_B__ 0x01
+#define SHUFFLE_PATTERN_16_C__ 0x02
+#define SHUFFLE_PATTERN_16_D__ 0x03
+#define SHUFFLE_PATTERN_16_E__ 0x04
+#define SHUFFLE_PATTERN_16_F__ 0x05
+#define SHUFFLE_PATTERN_16_G__ 0x06
+#define SHUFFLE_PATTERN_16_H__ 0x07
+#define SHUFFLE_PATTERN_16_I__ 0x08
+#define SHUFFLE_PATTERN_16_J__ 0x09
+#define SHUFFLE_PATTERN_16_K__ 0x0a
+#define SHUFFLE_PATTERN_16_L__ 0x0b
+#define SHUFFLE_PATTERN_16_M__ 0x0c
+#define SHUFFLE_PATTERN_16_N__ 0x0d
+#define SHUFFLE_PATTERN_16_O__ 0x0e
+#define SHUFFLE_PATTERN_16_P__ 0x0f
+#define SHUFFLE_PATTERN_16_a__ 0x10
+#define SHUFFLE_PATTERN_16_b__ 0x11
+#define SHUFFLE_PATTERN_16_c__ 0x12
+#define SHUFFLE_PATTERN_16_d__ 0x13
+#define SHUFFLE_PATTERN_16_e__ 0x14
+#define SHUFFLE_PATTERN_16_f__ 0x15
+#define SHUFFLE_PATTERN_16_g__ 0x16
+#define SHUFFLE_PATTERN_16_h__ 0x17
+#define SHUFFLE_PATTERN_16_i__ 0x18
+#define SHUFFLE_PATTERN_16_j__ 0x19
+#define SHUFFLE_PATTERN_16_k__ 0x1a
+#define SHUFFLE_PATTERN_16_l__ 0x1b
+#define SHUFFLE_PATTERN_16_m__ 0x1c
+#define SHUFFLE_PATTERN_16_n__ 0x1d
+#define SHUFFLE_PATTERN_16_o__ 0x1e
+#define SHUFFLE_PATTERN_16_p__ 0x1f
+#define SHUFFLE_PATTERN_16_X__ 0xc0
+#define SHUFFLE_PATTERN_16_x__ 0xc0
+#define SHUFFLE_PATTERN_16_0__ 0x80
+#define SHUFFLE_PATTERN_16_8__ 0xe0
+
+#define SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
+ SHUFFLE_PATTERN_16_##A##__, \
+ SHUFFLE_PATTERN_16_##B##__, \
+ SHUFFLE_PATTERN_16_##C##__, \
+ SHUFFLE_PATTERN_16_##D##__, \
+ SHUFFLE_PATTERN_16_##E##__, \
+ SHUFFLE_PATTERN_16_##F##__, \
+ SHUFFLE_PATTERN_16_##G##__, \
+ SHUFFLE_PATTERN_16_##H##__, \
+ SHUFFLE_PATTERN_16_##I##__, \
+ SHUFFLE_PATTERN_16_##J##__, \
+ SHUFFLE_PATTERN_16_##K##__, \
+ SHUFFLE_PATTERN_16_##L##__, \
+ SHUFFLE_PATTERN_16_##M##__, \
+ SHUFFLE_PATTERN_16_##N##__, \
+ SHUFFLE_PATTERN_16_##O##__, \
+ SHUFFLE_PATTERN_16_##P##__
+
+#define SHUFFLE16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
+ ((const vector unsigned char){ \
+ SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
+ })
+
+#define SHUFB16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
+ ((const qword){ \
+ SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
+ })
+
+#endif
diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c
index 117b8a36f8..69784c8978 100644
--- a/src/gallium/drivers/cell/spu/spu_texture.c
+++ b/src/gallium/drivers/cell/spu/spu_texture.c
@@ -26,6 +26,8 @@
**************************************************************************/
+#include <math.h>
+
#include "pipe/p_compiler.h"
#include "spu_main.h"
#include "spu_texture.h"
@@ -40,37 +42,19 @@
void
invalidate_tex_cache(void)
{
- uint unit = 0;
- uint bytes = 4 * spu.texture[unit].width
- * spu.texture[unit].height;
-
- spu_dcache_mark_dirty((unsigned) spu.texture[unit].start, bytes);
-}
+ uint lvl;
+ for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) {
+ uint unit = 0;
+ uint bytes = 4 * spu.texture[unit].level[lvl].width
+ * spu.texture[unit].level[lvl].height;
+ if (spu.texture[unit].target == PIPE_TEXTURE_CUBE)
+ bytes *= 6;
+ else if (spu.texture[unit].target == PIPE_TEXTURE_3D)
+ bytes *= spu.texture[unit].level[lvl].depth;
-/**
- * XXX look into getting texels for all four pixels in a quad at once.
- */
-static uint
-get_texel(uint unit, vec_uint4 coordinate)
-{
- /*
- * XXX we could do the "/ TILE_SIZE" and "% TILE_SIZE" operations as
- * SIMD since X and Y are already in a SIMD register.
- */
- const unsigned texture_ea = (uintptr_t) spu.texture[unit].start;
- ushort x = spu_extract(coordinate, 0);
- ushort y = spu_extract(coordinate, 1);
- unsigned tile_offset = sizeof(tile_t)
- * ((y / TILE_SIZE * spu.texture[unit].tiles_per_row) + (x / TILE_SIZE));
- ushort texel_offset = (ushort) 4
- * (ushort) (((ushort) (y % TILE_SIZE) * (ushort) TILE_SIZE) + (x % TILE_SIZE));
- vec_uint4 tmp;
-
- spu_dcache_fetch_unaligned((qword *) & tmp,
- texture_ea + tile_offset + texel_offset,
- 4);
- return spu_extract(tmp, 0);
+ spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes);
+ }
}
@@ -88,15 +72,17 @@ get_texel(uint unit, vec_uint4 coordinate)
* a time.
*/
static void
-get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels)
+get_four_texels(const struct spu_texture_level *tlevel, uint face,
+ vec_int4 x, vec_int4 y,
+ vec_uint4 *texels)
{
- const unsigned texture_ea = (uintptr_t) spu.texture[unit].start;
- vec_uint4 tile_x = spu_rlmask(x, -5);
- vec_uint4 tile_y = spu_rlmask(y, -5);
- const qword offset_x = si_andi((qword) x, 0x1f);
- const qword offset_y = si_andi((qword) y, 0x1f);
+ unsigned texture_ea = (uintptr_t) tlevel->start;
+ const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */
+ const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */
+ const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */
+ const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */
- const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row);
+ const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row);
const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t));
qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
@@ -107,6 +93,8 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels)
vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset);
+ texture_ea = texture_ea + face * tlevel->bytes_per_image;
+
spu_dcache_fetch_unaligned((qword *) & texels[0],
texture_ea + spu_extract(offset, 0), 4);
spu_dcache_fetch_unaligned((qword *) & texels[1],
@@ -118,83 +106,536 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels)
}
+/** clamp vec to [0, max] */
+static INLINE vector signed int
+spu_clamp(vector signed int vec, vector signed int max)
+{
+ static const vector signed int zero = {0,0,0,0};
+ vector unsigned int c;
+ c = spu_cmpgt(vec, zero); /* c = vec > zero ? ~0 : 0 */
+ vec = spu_sel(zero, vec, c);
+ c = spu_cmpgt(vec, max); /* c = vec > max ? ~0 : 0 */
+ vec = spu_sel(vec, max, c);
+ return vec;
+}
+
+
+
/**
- * Get texture sample at texcoord.
+ * Do nearest texture sampling for four pixels.
+ * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
*/
-vector float
-sample_texture_nearest(uint unit, vector float texcoord)
+void
+sample_texture_2d_nearest(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4])
{
- vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size);
- vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */
- itc = spu_and(itc, spu.texture[unit].tex_size_mask); /* mask (GL_REPEAT) */
- uint texel = get_texel(unit, itc);
- return spu_unpack_A8R8G8B8(texel);
+ const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
+ vector float ss = spu_mul(s, tlevel->scale_s);
+ vector float tt = spu_mul(t, tlevel->scale_t);
+ vector signed int is = spu_convts(ss, 0);
+ vector signed int it = spu_convts(tt, 0);
+ vec_uint4 texels[4];
+
+ /* PIPE_TEX_WRAP_REPEAT */
+ is = spu_and(is, tlevel->mask_s);
+ it = spu_and(it, tlevel->mask_t);
+
+ /* PIPE_TEX_WRAP_CLAMP */
+ is = spu_clamp(is, tlevel->max_s);
+ it = spu_clamp(it, tlevel->max_t);
+
+ get_four_texels(tlevel, face, is, it, texels);
+
+ /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */
+ spu_unpack_A8R8G8B8_transpose4(texels, colors);
}
-vector float
-sample_texture_bilinear(uint unit, vector float texcoord)
+/**
+ * Do bilinear texture sampling for four pixels.
+ * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
+ */
+void
+sample_texture_2d_bilinear(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4])
{
- static const vec_uint4 offset_x = {0, 0, 1, 1};
- static const vec_uint4 offset_y = {0, 1, 0, 1};
+ const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
+ static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
- vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size);
- tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */
+ vector float ss = spu_madd(s, tlevel->scale_s, half);
+ vector float tt = spu_madd(t, tlevel->scale_t, half);
- /* integer texcoords S,T: */
- vec_uint4 itc = spu_convtu(tc, 0); /* convert to int */
+ vector signed int is0 = spu_convts(ss, 0);
+ vector signed int it0 = spu_convts(tt, 0);
- vec_uint4 texels[4];
-
- /* setup texcoords for quad:
- * +-----+-----+
- * |x0,y0|x1,y1|
- * +-----+-----+
- * |x2,y2|x3,y3|
- * +-----+-----+
- */
- vec_uint4 x = spu_splats(spu_extract(itc, 0));
- vec_uint4 y = spu_splats(spu_extract(itc, 1));
- x = spu_add(x, offset_x);
- y = spu_add(y, offset_y);
+ /* is + 1, it + 1 */
+ vector signed int is1 = spu_add(is0, 1);
+ vector signed int it1 = spu_add(it0, 1);
- /* GL_REPEAT wrap mode: */
- x = spu_and(x, spu.texture[unit].tex_size_x_mask);
- y = spu_and(y, spu.texture[unit].tex_size_y_mask);
+ /* PIPE_TEX_WRAP_REPEAT */
+ is0 = spu_and(is0, tlevel->mask_s);
+ it0 = spu_and(it0, tlevel->mask_t);
+ is1 = spu_and(is1, tlevel->mask_s);
+ it1 = spu_and(it1, tlevel->mask_t);
- get_four_texels(unit, x, y, texels);
+ /* PIPE_TEX_WRAP_CLAMP */
+ is0 = spu_clamp(is0, tlevel->max_s);
+ it0 = spu_clamp(it0, tlevel->max_t);
+ is1 = spu_clamp(is1, tlevel->max_s);
+ it1 = spu_clamp(it1, tlevel->max_t);
- /* integer A8R8G8B8 to float texel conversion */
- vector float texel00 = spu_unpack_A8R8G8B8(spu_extract(texels[0], 0));
- vector float texel01 = spu_unpack_A8R8G8B8(spu_extract(texels[1], 0));
- vector float texel10 = spu_unpack_A8R8G8B8(spu_extract(texels[2], 0));
- vector float texel11 = spu_unpack_A8R8G8B8(spu_extract(texels[3], 0));
+ /* get packed int texels */
+ vector unsigned int texels[16];
+ get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */
+ get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */
+ get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */
+ get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */
+ /* convert packed int texels to float colors */
+ vector float ftexels[16];
+ spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0);
+ spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4);
+ spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8);
+ spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12);
/* Compute weighting factors in [0,1]
* Multiply texcoord by 1024, AND with 1023, convert back to float.
*/
- vector float tc1024 = spu_mul(tc, spu_splats(1024.0f));
- vector signed int itc1024 = spu_convts(tc1024, 0);
- itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1));
- vector float weight = spu_convtf(itc1024, 10);
-
- /* smeared frac and 1-frac */
- vector float sfrac = spu_splats(spu_extract(weight, 0));
- vector float tfrac = spu_splats(spu_extract(weight, 1));
- vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac);
- vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac);
-
- /* multiply the samples (colors) by the S/T weights */
- texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1);
- texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1);
- texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac );
- texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac );
-
- /* compute sum of weighted samples */
- vector float texel_sum = spu_add(texel00, texel01);
- texel_sum = spu_add(texel_sum, texel10);
- texel_sum = spu_add(texel_sum, texel11);
-
- return texel_sum;
+ vector float ss1024 = spu_mul(ss, spu_splats(1024.0f));
+ vector signed int iss1024 = spu_convts(ss1024, 0);
+ iss1024 = spu_and(iss1024, 1023);
+ vector float sWeights0 = spu_convtf(iss1024, 10);
+
+ vector float tt1024 = spu_mul(tt, spu_splats(1024.0f));
+ vector signed int itt1024 = spu_convts(tt1024, 0);
+ itt1024 = spu_and(itt1024, 1023);
+ vector float tWeights0 = spu_convtf(itt1024, 10);
+
+ /* 1 - sWeight and 1 - tWeight */
+ vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0);
+ vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0);
+
+ /* reds, for four pixels */
+ ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/
+ ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/
+ ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/
+ ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/
+ colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]),
+ spu_add(ftexels[8], ftexels[12]));
+
+ /* greens, for four pixels */
+ ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/
+ ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/
+ ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/
+ ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/
+ colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]),
+ spu_add(ftexels[9], ftexels[13]));
+
+ /* blues, for four pixels */
+ ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/
+ ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/
+ ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/
+ ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/
+ colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]),
+ spu_add(ftexels[10], ftexels[14]));
+
+ /* alphas, for four pixels */
+ ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/
+ ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/
+ ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/
+ ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/
+ colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]),
+ spu_add(ftexels[11], ftexels[15]));
+}
+
+
+
+/**
+ * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h
+ */
+static INLINE void
+transpose(vector unsigned int *mOut0,
+ vector unsigned int *mOut1,
+ vector unsigned int *mOut2,
+ vector unsigned int *mOut3,
+ vector unsigned int *mIn)
+{
+ vector unsigned int abcd, efgh, ijkl, mnop; /* input vectors */
+ vector unsigned int aeim, bfjn, cgko, dhlp; /* output vectors */
+ vector unsigned int aibj, ckdl, emfn, gohp; /* intermediate vectors */
+
+ vector unsigned char shufflehi = ((vector unsigned char) {
+ 0x00, 0x01, 0x02, 0x03,
+ 0x10, 0x11, 0x12, 0x13,
+ 0x04, 0x05, 0x06, 0x07,
+ 0x14, 0x15, 0x16, 0x17});
+ vector unsigned char shufflelo = ((vector unsigned char) {
+ 0x08, 0x09, 0x0A, 0x0B,
+ 0x18, 0x19, 0x1A, 0x1B,
+ 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x1C, 0x1D, 0x1E, 0x1F});
+ abcd = *(mIn+0);
+ efgh = *(mIn+1);
+ ijkl = *(mIn+2);
+ mnop = *(mIn+3);
+
+ aibj = spu_shuffle(abcd, ijkl, shufflehi);
+ ckdl = spu_shuffle(abcd, ijkl, shufflelo);
+ emfn = spu_shuffle(efgh, mnop, shufflehi);
+ gohp = spu_shuffle(efgh, mnop, shufflelo);
+
+ aeim = spu_shuffle(aibj, emfn, shufflehi);
+ bfjn = spu_shuffle(aibj, emfn, shufflelo);
+ cgko = spu_shuffle(ckdl, gohp, shufflehi);
+ dhlp = spu_shuffle(ckdl, gohp, shufflelo);
+
+ *mOut0 = aeim;
+ *mOut1 = bfjn;
+ *mOut2 = cgko;
+ *mOut3 = dhlp;
+}
+
+
+/**
+ * Bilinear filtering, using int instead of float arithmetic for computing
+ * sample weights.
+ */
+void
+sample_texture_2d_bilinear_int(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4])
+{
+ const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
+ static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
+
+ /* Scale texcoords by size of texture, and add half pixel bias */
+ vector float ss = spu_madd(s, tlevel->scale_s, half);
+ vector float tt = spu_madd(t, tlevel->scale_t, half);
+
+ /* convert float coords to fixed-pt coords with 7 fraction bits */
+ vector signed int is = spu_convts(ss, 7); /* XXX really need floor() here */
+ vector signed int it = spu_convts(tt, 7); /* XXX really need floor() here */
+
+ /* compute integer texel weights in [0, 127] */
+ vector signed int sWeights0 = spu_and(is, 127);
+ vector signed int tWeights0 = spu_and(it, 127);
+ vector signed int sWeights1 = spu_sub(127, sWeights0);
+ vector signed int tWeights1 = spu_sub(127, tWeights0);
+
+ /* texel coords: is0 = is / 128, it0 = is / 128 */
+ vector signed int is0 = spu_rlmask(is, -7);
+ vector signed int it0 = spu_rlmask(it, -7);
+
+ /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */
+ vector signed int is1 = spu_add(is0, 1);
+ vector signed int it1 = spu_add(it0, 1);
+
+ /* PIPE_TEX_WRAP_REPEAT */
+ is0 = spu_and(is0, tlevel->mask_s);
+ it0 = spu_and(it0, tlevel->mask_t);
+ is1 = spu_and(is1, tlevel->mask_s);
+ it1 = spu_and(it1, tlevel->mask_t);
+
+ /* PIPE_TEX_WRAP_CLAMP */
+ is0 = spu_clamp(is0, tlevel->max_s);
+ it0 = spu_clamp(it0, tlevel->max_t);
+ is1 = spu_clamp(is1, tlevel->max_s);
+ it1 = spu_clamp(it1, tlevel->max_t);
+
+ /* get packed int texels */
+ vector unsigned int texels[16];
+ get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */
+ get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */
+ get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */
+ get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */
+
+ /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */
+ {
+ static const unsigned char ZERO = 0x80;
+ int i;
+ for (i = 0; i < 16; i++) {
+ texels[i] = spu_shuffle(texels[i], texels[i],
+ ((vector unsigned char) {
+ ZERO, ZERO, ZERO, 1,
+ ZERO, ZERO, ZERO, 2,
+ ZERO, ZERO, ZERO, 3,
+ ZERO, ZERO, ZERO, 0}));
+ }
+ }
+
+ /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */
+ vector unsigned int texel0, texel1, texel2, texel3, texel4, texel5, texel6, texel7,
+ texel8, texel9, texel10, texel11, texel12, texel13, texel14, texel15;
+ transpose(&texel0, &texel1, &texel2, &texel3, texels + 0);
+ transpose(&texel4, &texel5, &texel6, &texel7, texels + 4);
+ transpose(&texel8, &texel9, &texel10, &texel11, texels + 8);
+ transpose(&texel12, &texel13, &texel14, &texel15, texels + 12);
+
+ /* computed weighted colors */
+ vector unsigned int c0, c1, c2, c3, cSum;
+
+ /* red */
+ c0 = (vector unsigned int) si_mpy((qword) texel0, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel4, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel8, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel12, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
+ colors[0] = spu_convtf(cSum, 22);
+
+ /* green */
+ c0 = (vector unsigned int) si_mpy((qword) texel1, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel5, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel9, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel13, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
+ colors[1] = spu_convtf(cSum, 22);
+
+ /* blue */
+ c0 = (vector unsigned int) si_mpy((qword) texel2, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel6, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel10, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel14, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
+ colors[2] = spu_convtf(cSum, 22);
+
+ /* alpha */
+ c0 = (vector unsigned int) si_mpy((qword) texel3, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel7, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel11, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel15, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
+ colors[3] = spu_convtf(cSum, 22);
+}
+
+
+
+/**
+ * Compute level of detail factor from texcoords.
+ */
+static INLINE float
+compute_lambda_2d(uint unit, vector float s, vector float t)
+{
+ uint baseLevel = 0;
+ float width = spu.texture[unit].level[baseLevel].width;
+ float height = spu.texture[unit].level[baseLevel].width;
+ float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0));
+ float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0));
+ float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0));
+ float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0));
+#if 0
+ /* ideal value */
+ float x = dsdx * dsdx + dtdx * dtdx;
+ float y = dsdy * dsdy + dtdy * dtdy;
+ float rho = x > y ? x : y;
+ rho = sqrtf(rho);
+#else
+ /* approximation */
+ dsdx = fabsf(dsdx);
+ dsdy = fabsf(dsdy);
+ dtdx = fabsf(dtdx);
+ dtdy = fabsf(dtdy);
+ float rho = (dsdx + dsdy + dtdx + dtdy) * 0.5;
+#endif
+ float lambda = logf(rho) * 1.442695f; /* compute logbase2(rho) */
+ return lambda;
+}
+
+
+/**
+ * Blend two sets of colors according to weight.
+ */
+static void
+blend_colors(vector float c0[4], const vector float c1[4], float weight)
+{
+ vector float t = spu_splats(weight);
+ vector float dc0 = spu_sub(c1[0], c0[0]);
+ vector float dc1 = spu_sub(c1[1], c0[1]);
+ vector float dc2 = spu_sub(c1[2], c0[2]);
+ vector float dc3 = spu_sub(c1[3], c0[3]);
+ c0[0] = spu_madd(dc0, t, c0[0]);
+ c0[1] = spu_madd(dc1, t, c0[1]);
+ c0[2] = spu_madd(dc2, t, c0[2]);
+ c0[3] = spu_madd(dc3, t, c0[3]);
+}
+
+
+/**
+ * Texture sampling with level of detail selection and possibly mipmap
+ * interpolation.
+ */
+void
+sample_texture_2d_lod(vector float s, vector float t,
+ uint unit, uint level_ignored, uint face,
+ vector float colors[4])
+{
+ /*
+ * Note that we're computing a lambda/lod here that's used for all
+ * four pixels in the quad.
+ */
+ float lambda = compute_lambda_2d(unit, s, t);
+
+ (void) face;
+ (void) level_ignored;
+
+ /* apply lod bias */
+ lambda += spu.sampler[unit].lod_bias;
+
+ /* clamp */
+ if (lambda < spu.sampler[unit].min_lod)
+ lambda = spu.sampler[unit].min_lod;
+ else if (lambda > spu.sampler[unit].max_lod)
+ lambda = spu.sampler[unit].max_lod;
+
+ if (lambda <= 0.0f) {
+ /* magnify */
+ spu.mag_sample_texture_2d[unit](s, t, unit, 0, face, colors);
+ }
+ else {
+ /* minify */
+ if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) {
+ /* sample two mipmap levels and interpolate */
+ int level = (int) lambda;
+ if (level > (int) spu.texture[unit].max_level)
+ level = spu.texture[unit].max_level;
+ spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors);
+ if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) {
+ /* sample second mipmap level */
+ float weight = lambda - (float) level;
+ level++;
+ if (level <= (int) spu.texture[unit].max_level) {
+ vector float colors2[4];
+ spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors2);
+ blend_colors(colors, colors2, weight);
+ }
+ }
+ }
+ else {
+ /* sample one mipmap level */
+ int level = (int) (lambda + 0.5f);
+ if (level > (int) spu.texture[unit].max_level)
+ level = spu.texture[unit].max_level;
+ spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors);
+ }
+ }
+}
+
+
+/** XXX need a SIMD version of this */
+static unsigned
+choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
+{
+ /*
+ major axis
+ direction target sc tc ma
+ ---------- ------------------------------- --- --- ---
+ +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
+ -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
+ +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
+ -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
+ +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
+ -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
+ */
+ const float arx = fabsf(rx);
+ const float ary = fabsf(ry);
+ const float arz = fabsf(rz);
+ unsigned face;
+ float sc, tc, ma;
+
+ if (arx > ary && arx > arz) {
+ if (rx >= 0.0F) {
+ face = PIPE_TEX_FACE_POS_X;
+ sc = -rz;
+ tc = -ry;
+ ma = arx;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_X;
+ sc = rz;
+ tc = -ry;
+ ma = arx;
+ }
+ }
+ else if (ary > arx && ary > arz) {
+ if (ry >= 0.0F) {
+ face = PIPE_TEX_FACE_POS_Y;
+ sc = rx;
+ tc = rz;
+ ma = ary;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_Y;
+ sc = rx;
+ tc = -rz;
+ ma = ary;
+ }
+ }
+ else {
+ if (rz > 0.0F) {
+ face = PIPE_TEX_FACE_POS_Z;
+ sc = rx;
+ tc = -ry;
+ ma = arz;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_Z;
+ sc = -rx;
+ tc = -ry;
+ ma = arz;
+ }
+ }
+
+ *newS = (sc / ma + 1.0F) * 0.5F;
+ *newT = (tc / ma + 1.0F) * 0.5F;
+
+ return face;
+}
+
+
+
+void
+sample_texture_cube(vector float s, vector float t, vector float r,
+ uint unit, vector float colors[4])
+{
+ uint p, faces[4], level = 0;
+ float newS[4], newT[4];
+
+ /* Compute cube faces referenced by the four sets of texcoords.
+ * XXX we should SIMD-ize this.
+ */
+ for (p = 0; p < 4; p++) {
+ float rx = spu_extract(s, p);
+ float ry = spu_extract(t, p);
+ float rz = spu_extract(r, p);
+ faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]);
+ }
+
+ if (faces[0] == faces[1] &&
+ faces[0] == faces[2] &&
+ faces[0] == faces[3]) {
+ /* GOOD! All four texcoords refer to the same cube face */
+ s = (vector float) {newS[0], newS[1], newS[2], newS[3]};
+ t = (vector float) {newT[0], newT[1], newT[2], newT[3]};
+ spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors);
+ }
+ else {
+ /* BAD! The four texcoords refer to different faces */
+ for (p = 0; p < 4; p++) {
+ vector float c[4];
+
+ spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]),
+ unit, level, faces[p], c);
+
+ float red = spu_extract(c[0], p);
+ float green = spu_extract(c[1], p);
+ float blue = spu_extract(c[2], p);
+ float alpha = spu_extract(c[3], p);
+
+ colors[0] = spu_insert(red, colors[0], p);
+ colors[1] = spu_insert(green, colors[1], p);
+ colors[2] = spu_insert(blue, colors[2], p);
+ colors[3] = spu_insert(alpha, colors[3], p);
+ }
+ }
}
diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h
index f7c9738be8..7b75b007b5 100644
--- a/src/gallium/drivers/cell/spu/spu_texture.h
+++ b/src/gallium/drivers/cell/spu/spu_texture.h
@@ -36,12 +36,32 @@ extern void
invalidate_tex_cache(void);
-extern vector float
-sample_texture_nearest(uint unit, vector float texcoord);
+extern void
+sample_texture_2d_nearest(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
+
+
+extern void
+sample_texture_2d_bilinear(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
+
+extern void
+sample_texture_2d_bilinear_int(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
+
+extern void
+sample_texture_2d_lod(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
-extern vector float
-sample_texture_bilinear(uint unit, vector float texcoord);
+
+extern void
+sample_texture_cube(vector float s, vector float t, vector float r,
+ uint unit, vector float colors[4]);
#endif /* SPU_TEXTURE_H */
diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c
index 216a33126b..6905015a48 100644
--- a/src/gallium/drivers/cell/spu/spu_tile.c
+++ b/src/gallium/drivers/cell/spu/spu_tile.c
@@ -87,3 +87,40 @@ put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf)
0 /* rid */);
}
+
+/**
+ * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled
+ * tiles back to the main framebuffer.
+ */
+void
+really_clear_tiles(uint surfaceIndex)
+{
+ const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
+ uint i;
+
+ if (surfaceIndex == 0) {
+ clear_c_tile(&spu.ctile);
+
+ for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
+ uint tx = i % spu.fb.width_tiles;
+ uint ty = i / spu.fb.width_tiles;
+ if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) {
+ put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
+ }
+ }
+ }
+ else {
+ clear_z_tile(&spu.ztile);
+
+ for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
+ uint tx = i % spu.fb.width_tiles;
+ uint ty = i / spu.fb.width_tiles;
+ if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR)
+ put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1);
+ }
+ }
+
+#if 0
+ wait_on_mask(1 << TAG_SURFACE_CLEAR);
+#endif
+}
diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h
index 1b5491112d..7bfb52be8f 100644
--- a/src/gallium/drivers/cell/spu/spu_tile.h
+++ b/src/gallium/drivers/cell/spu/spu_tile.h
@@ -36,12 +36,14 @@
-void
+extern void
get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf);
-void
+extern void
put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf);
+extern void
+really_clear_tiles(uint surfaceIndex);
static INLINE void
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index 8b93878192..d727268475 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -29,12 +29,12 @@
* Triangle rendering within a tile.
*/
-#include <transpose_matrix4x4.h>
#include "pipe/p_compiler.h"
#include "pipe/p_format.h"
#include "util/u_math.h"
#include "spu_colorpack.h"
#include "spu_main.h"
+#include "spu_shuffle.h"
#include "spu_texture.h"
#include "spu_tile.h"
#include "spu_tri.h"
@@ -43,11 +43,6 @@
/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
typedef vector unsigned int mask_t;
-typedef union
-{
- vector float v;
- float f[4];
-} float4;
/**
@@ -61,7 +56,7 @@ struct vertex_header {
/* XXX fix this */
#undef CEILF
-#define CEILF(X) ((float) (int) ((X) + 0.99999))
+#define CEILF(X) ((float) (int) ((X) + 0.99999f))
#define QUAD_TOP_LEFT 0
@@ -75,14 +70,25 @@ struct vertex_header {
#define MASK_ALL 0xf
+#define CHAN0 0
+#define CHAN1 1
+#define CHAN2 2
+#define CHAN3 3
+
+
#define DEBUG_VERTS 0
/**
* Triangle edge info
*/
struct edge {
- float dx; /**< X(v1) - X(v0), used only during setup */
- float dy; /**< Y(v1) - Y(v0), used only during setup */
+ union {
+ struct {
+ float dx; /**< X(v1) - X(v0), used only during setup */
+ float dy; /**< Y(v1) - Y(v0), used only during setup */
+ };
+ vec_float4 ds; /**< vector accessor for dx and dy */
+ };
float dxdy; /**< dx/dy */
float sx, sy; /**< first sample point coord */
int lines; /**< number of lines on this edge */
@@ -91,9 +97,9 @@ struct edge {
struct interp_coef
{
- float4 a0;
- float4 dadx;
- float4 dady;
+ vector float a0;
+ vector float dadx;
+ vector float dady;
};
@@ -107,34 +113,32 @@ struct setup_stage {
* turn. Currently fixed at 4 floats, but should change in time.
* Codegen will help cope with this.
*/
- const struct vertex_header *vmax;
- const struct vertex_header *vmid;
- const struct vertex_header *vmin;
- const struct vertex_header *vprovoke;
+ union {
+ struct {
+ const struct vertex_header *vmin;
+ const struct vertex_header *vmid;
+ const struct vertex_header *vmax;
+ const struct vertex_header *vprovoke;
+ };
+ qword vertex_headers;
+ };
struct edge ebot;
struct edge etop;
struct edge emaj;
- float oneoverarea;
+ float oneOverArea; /* XXX maybe make into vector? */
- uint tx, ty;
+ uint facing;
+
+ uint tx, ty; /**< position of current tile (x, y) */
int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy;
-#if 0
- struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
-#else
struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
-#endif
-
-#if 0
- struct quad_header quad;
-#endif
struct {
- int left[2]; /**< [0] = row0, [1] = row1 */
- int right[2];
+ vec_int4 quad; /**< [0] = row0, [1] = row1; {left[0],left[1],right[0],right[1]} */
int y;
unsigned y_flags;
unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
@@ -142,118 +146,103 @@ struct setup_stage {
};
-
static struct setup_stage setup;
+static INLINE vector float
+splatx(vector float v)
+{
+ return spu_splats(spu_extract(v, CHAN0));
+}
-
-#if 0
-/**
- * Basically a cast wrapper.
- */
-static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
+static INLINE vector float
+splaty(vector float v)
{
- return (struct setup_stage *)stage;
+ return spu_splats(spu_extract(v, CHAN1));
}
-#endif
-#if 0
-/**
- * Clip setup.quad against the scissor/surface bounds.
- */
-static INLINE void
-quad_clip(struct setup_stage *setup)
+static INLINE vector float
+splatz(vector float v)
{
- const struct pipe_scissor_state *cliprect = &setup.softpipe->cliprect;
- const int minx = (int) cliprect->minx;
- const int maxx = (int) cliprect->maxx;
- const int miny = (int) cliprect->miny;
- const int maxy = (int) cliprect->maxy;
-
- if (setup.quad.x0 >= maxx ||
- setup.quad.y0 >= maxy ||
- setup.quad.x0 + 1 < minx ||
- setup.quad.y0 + 1 < miny) {
- /* totally clipped */
- setup.quad.mask = 0x0;
- return;
- }
- if (setup.quad.x0 < minx)
- setup.quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
- if (setup.quad.y0 < miny)
- setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
- if (setup.quad.x0 == maxx - 1)
- setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
- if (setup.quad.y0 == maxy - 1)
- setup.quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
+ return spu_splats(spu_extract(v, CHAN2));
}
-#endif
-#if 0
-/**
- * Emit a quad (pass to next stage) with clipping.
- */
-static INLINE void
-clip_emit_quad(struct setup_stage *setup)
+static INLINE vector float
+splatw(vector float v)
{
- quad_clip(setup);
- if (setup.quad.mask) {
- struct softpipe_context *sp = setup.softpipe;
- sp->quad.first->run(sp->quad.first, &setup.quad);
- }
+ return spu_splats(spu_extract(v, CHAN3));
}
-#endif
+
/**
- * Evaluate attribute coefficients (plane equations) to compute
- * attribute values for the four fragments in a quad.
- * Eg: four colors will be computed (in AoS format).
+ * Setup fragment shader inputs by evaluating triangle's vertex
+ * attribute coefficient info.
+ * \param x quad x pos
+ * \param y quad y pos
+ * \param fragZ returns quad Z values
+ * \param fragInputs returns fragment program inputs
+ * Note: this code could be incorporated into the fragment program
+ * itself to avoid the loop and switch.
*/
-static INLINE void
-eval_coeff(uint slot, float x, float y, vector float result[4])
+static void
+eval_inputs(float x, float y, vector float *fragZ, vector float fragInputs[])
{
- switch (spu.vertex_info.interp_mode[slot]) {
- case INTERP_CONSTANT:
- result[QUAD_TOP_LEFT] =
- result[QUAD_TOP_RIGHT] =
- result[QUAD_BOTTOM_LEFT] =
- result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v;
- break;
+ static const vector float deltaX = (const vector float) {0, 1, 0, 1};
+ static const vector float deltaY = (const vector float) {0, 0, 1, 1};
+
+ const uint posSlot = 0;
+ const vector float pos = setup.coef[posSlot].a0;
+ const vector float dposdx = setup.coef[posSlot].dadx;
+ const vector float dposdy = setup.coef[posSlot].dady;
+ const vector float fragX = spu_splats(x) + deltaX;
+ const vector float fragY = spu_splats(y) + deltaY;
+ vector float fragW, wInv;
+ uint i;
- case INTERP_LINEAR:
- /* fall-through, for now */
- default:
- {
- register vector float dadx = setup.coef[slot].dadx.v;
- register vector float dady = setup.coef[slot].dady.v;
- register vector float topLeft
- = spu_add(setup.coef[slot].a0.v,
- spu_add(spu_mul(spu_splats(x), dadx),
- spu_mul(spu_splats(y), dady)));
-
- result[QUAD_TOP_LEFT] = topLeft;
- result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx);
- result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady);
- result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady);
+ *fragZ = splatz(pos) + fragX * splatz(dposdx) + fragY * splatz(dposdy);
+ fragW = splatw(pos) + fragX * splatw(dposdx) + fragY * splatw(dposdy);
+ wInv = spu_re(fragW); /* 1 / w */
+
+ /* loop over fragment program inputs */
+ for (i = 0; i < spu.vertex_info.num_attribs; i++) {
+ uint attr = i + 1;
+ enum interp_mode interp = spu.vertex_info.attrib[attr].interp_mode;
+
+ /* constant term */
+ vector float a0 = setup.coef[attr].a0;
+ vector float r0 = splatx(a0);
+ vector float r1 = splaty(a0);
+ vector float r2 = splatz(a0);
+ vector float r3 = splatw(a0);
+
+ if (interp == INTERP_LINEAR || interp == INTERP_PERSPECTIVE) {
+ /* linear term */
+ vector float dadx = setup.coef[attr].dadx;
+ vector float dady = setup.coef[attr].dady;
+ /* Use SPU intrinsics here to get slightly better code.
+ * originally: r0 += fragX * splatx(dadx) + fragY * splatx(dady);
+ */
+ r0 = spu_madd(fragX, splatx(dadx), spu_madd(fragY, splatx(dady), r0));
+ r1 = spu_madd(fragX, splaty(dadx), spu_madd(fragY, splaty(dady), r1));
+ r2 = spu_madd(fragX, splatz(dadx), spu_madd(fragY, splatz(dady), r2));
+ r3 = spu_madd(fragX, splatw(dadx), spu_madd(fragY, splatw(dady), r3));
+ if (interp == INTERP_PERSPECTIVE) {
+ /* perspective term */
+ r0 *= wInv;
+ r1 *= wInv;
+ r2 *= wInv;
+ r3 *= wInv;
+ }
}
+ fragInputs[CHAN0] = r0;
+ fragInputs[CHAN1] = r1;
+ fragInputs[CHAN2] = r2;
+ fragInputs[CHAN3] = r3;
+ fragInputs += 4;
}
}
-static INLINE vector float
-eval_z(float x, float y)
-{
- const uint slot = 0;
- const float dzdx = setup.coef[slot].dadx.f[2];
- const float dzdy = setup.coef[slot].dady.f[2];
- const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy;
- const vector float topLeftv = spu_splats(topLeft);
- const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy };
- return spu_add(topLeftv, derivs);
-}
-
-
/**
* Emit a quad (pass to next stage). No clipping is done.
* Note: about 1/5 to 1/7 of the time, mask is zero and this function
@@ -261,120 +250,51 @@ eval_z(float x, float y)
* overall.
*/
static INLINE void
-emit_quad( int x, int y, mask_t mask )
+emit_quad( int x, int y, mask_t mask)
{
/* If any bits in mask are set... */
if (spu_extract(spu_orx(mask), 0)) {
const int ix = x - setup.cliprect_minx;
const int iy = y - setup.cliprect_miny;
- vector float colors[4];
spu.cur_ctile_status = TILE_STATUS_DIRTY;
spu.cur_ztile_status = TILE_STATUS_DIRTY;
- if (spu.texture[0].start) {
- /* texture mapping */
- const uint unit = 0;
- vector float texcoords[4];
- eval_coeff(2, (float) x, (float) y, texcoords);
-
- if (spu_extract(mask, 0))
- colors[0] = spu.sample_texture[unit](unit, texcoords[0]);
- if (spu_extract(mask, 1))
- colors[1] = spu.sample_texture[unit](unit, texcoords[1]);
- if (spu_extract(mask, 2))
- colors[2] = spu.sample_texture[unit](unit, texcoords[2]);
- if (spu_extract(mask, 3))
- colors[3] = spu.sample_texture[unit](unit, texcoords[3]);
-
-
- if (spu.texture[1].start) {
- /* multi-texture mapping */
- const uint unit = 1;
- vector float colors1[4];
-
- eval_coeff(2, (float) x, (float) y, texcoords);
-
- if (spu_extract(mask, 0))
- colors1[0] = spu.sample_texture[unit](unit, texcoords[0]);
- if (spu_extract(mask, 1))
- colors1[1] = spu.sample_texture[unit](unit, texcoords[1]);
- if (spu_extract(mask, 2))
- colors1[2] = spu.sample_texture[unit](unit, texcoords[2]);
- if (spu_extract(mask, 3))
- colors1[3] = spu.sample_texture[unit](unit, texcoords[3]);
-
- /* hack: modulate first texture by second */
- colors[0] = spu_mul(colors[0], colors1[0]);
- colors[1] = spu_mul(colors[1], colors1[1]);
- colors[2] = spu_mul(colors[2], colors1[2]);
- colors[3] = spu_mul(colors[3], colors1[3]);
- }
-
- }
- else {
- /* simple shading */
-#if 0
- eval_coeff(1, (float) x, (float) y, colors);
-
-#else
- /* XXX new fragment program code */
-
- if (spu.fragment_program) {
- vector float inputs[4*4], outputs[2*4];
-
- /* setup inputs */
- eval_coeff(1, (float) x, (float) y, inputs);
-
- /* Execute the current fragment program */
- spu.fragment_program(inputs, outputs, spu.constants);
-
- /* Copy outputs */
- colors[0] = outputs[0*4+0];
- colors[1] = outputs[0*4+1];
- colors[2] = outputs[0*4+2];
- colors[3] = outputs[0*4+3];
-
- if (0 && spu.init.id==0 && y == 48) {
- printf("colors[0] = %f %f %f %f\n",
- spu_extract(colors[0], 0),
- spu_extract(colors[0], 1),
- spu_extract(colors[0], 2),
- spu_extract(colors[0], 3));
- printf("colors[1] = %f %f %f %f\n",
- spu_extract(colors[1], 0),
- spu_extract(colors[1], 1),
- spu_extract(colors[1], 2),
- spu_extract(colors[1], 3));
- }
-
- }
-#endif
- }
-
-
{
- /* Convert fragment data from AoS to SoA format.
- * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA)
- * This is temporary!
+ /*
+ * Run fragment shader, execute per-fragment ops, update fb/tile.
*/
- vector float soa_frag[4];
- _transpose_matrix4x4(soa_frag, colors);
+ vector float inputs[4*4], outputs[2*4];
+ vector unsigned int kill_mask;
+ vector float fragZ;
+
+ eval_inputs((float) x, (float) y, &fragZ, inputs);
- float4 fragZ;
+ ASSERT(spu.fragment_program);
+ ASSERT(spu.fragment_ops);
- fragZ.v = eval_z((float) x, (float) y);
+ /* Execute the current fragment program */
+ kill_mask = spu.fragment_program(inputs, outputs, spu.constants);
- /* Do all per-fragment/quad operations here, including:
- * alpha test, z test, stencil test, blend and framebuffer writing.
+ mask = spu_andc(mask, kill_mask);
+
+ /* Execute per-fragment/quad operations, including:
+ * alpha test, z test, stencil test, blend and framebuffer writing.
+ * Note that there are two different fragment operations functions
+ * that can be called, one for front-facing fragments, and one
+ * for back-facing fragments. (Often the two are the same;
+ * but in some cases, like two-sided stenciling, they can be
+ * very different.) So choose the correct function depending
+ * on the calculated facing.
*/
- spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile,
- fragZ.v,
- soa_frag[0], soa_frag[1],
- soa_frag[2], soa_frag[3],
+ spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile,
+ fragZ,
+ outputs[0*4+0],
+ outputs[0*4+1],
+ outputs[0*4+2],
+ outputs[0*4+3],
mask);
}
-
}
}
@@ -383,64 +303,49 @@ emit_quad( int x, int y, mask_t mask )
* Given an X or Y coordinate, return the block/quad coordinate that it
* belongs to.
*/
-static INLINE int block( int x )
+static INLINE int
+block(int x)
{
return x & ~1;
}
/**
- * Compute mask which indicates which pixels in the 2x2 quad are actually inside
- * the triangle's bounds.
- * The mask is a uint4 vector and each element will be 0 or 0xffffffff.
- */
-static INLINE mask_t calculate_mask( int x )
-{
- /* This is a little tricky.
- * Use & instead of && to avoid branches.
- * Use negation to convert true/false to ~0/0 values.
- */
- mask_t mask;
- mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0);
- mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1);
- mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2);
- mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3);
- return mask;
-}
-
-
-/**
* Render a horizontal span of quads
*/
-static void flush_spans( void )
+static void
+flush_spans(void)
{
int minleft, maxright;
- int x;
+
+ const int l0 = spu_extract(setup.span.quad, 0);
+ const int l1 = spu_extract(setup.span.quad, 1);
+ const int r0 = spu_extract(setup.span.quad, 2);
+ const int r1 = spu_extract(setup.span.quad, 3);
switch (setup.span.y_flags) {
case 0x3:
/* both odd and even lines written (both quad rows) */
- minleft = MIN2(setup.span.left[0], setup.span.left[1]);
- maxright = MAX2(setup.span.right[0], setup.span.right[1]);
+ minleft = MIN2(l0, l1);
+ maxright = MAX2(r0, r1);
break;
case 0x1:
/* only even line written (quad top row) */
- minleft = setup.span.left[0];
- maxright = setup.span.right[0];
+ minleft = l0;
+ maxright = r0;
break;
case 0x2:
/* only odd line written (quad bottom row) */
- minleft = setup.span.left[1];
- maxright = setup.span.right[1];
+ minleft = l1;
+ maxright = r1;
break;
default:
return;
}
-
/* OK, we're very likely to need the tile data now.
* clear or finish waiting if needed.
*/
@@ -457,7 +362,7 @@ static void flush_spans( void )
}
ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
- if (spu.read_depth) {
+ if (spu.read_depth_stencil) {
if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
/* wait for mfc_get() to complete */
//printf("SPU: %u: waiting for ztile\n", spu.init.id);
@@ -472,93 +377,119 @@ static void flush_spans( void )
ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED);
}
- /* XXX this loop could be moved into the above switch cases and
- * calculate_mask() could be simplified a bit...
- */
- for (x = block(minleft); x <= block(maxright); x += 2) {
-#if 1
- emit_quad( x, setup.span.y, calculate_mask( x ) );
-#endif
+ /* XXX this loop could be moved into the above switch cases... */
+
+ /* Setup for mask calculation */
+ const vec_int4 quad_LlRr = setup.span.quad;
+ const vec_int4 quad_RrLl = spu_rlqwbyte(quad_LlRr, 8);
+ const vec_int4 quad_LLll = spu_shuffle(quad_LlRr, quad_LlRr, SHUFFLE4(A,A,B,B));
+ const vec_int4 quad_RRrr = spu_shuffle(quad_RrLl, quad_RrLl, SHUFFLE4(A,A,B,B));
+
+ const vec_int4 twos = spu_splats(2);
+
+ const int x = block(minleft);
+ vec_int4 xs = {x, x+1, x, x+1};
+
+ for (; spu_extract(xs, 0) <= block(maxright); xs += twos) {
+ /**
+ * Computes mask to indicate which pixels in the 2x2 quad are actually
+ * inside the triangle's bounds.
+ */
+
+ /* Calculate ({x,x+1,x,x+1} >= {l[0],l[0],l[1],l[1]}) */
+ const mask_t gt_LLll_xs = spu_cmpgt(quad_LLll, xs);
+ const mask_t gte_xs_LLll = spu_nand(gt_LLll_xs, gt_LLll_xs);
+
+ /* Calculate ({r[0],r[0],r[1],r[1]} > {x,x+1,x,x+1}) */
+ const mask_t gt_RRrr_xs = spu_cmpgt(quad_RRrr, xs);
+
+ /* Combine results to create mask */
+ const mask_t mask = spu_and(gte_xs_LLll, gt_RRrr_xs);
+
+ emit_quad(spu_extract(xs, 0), setup.span.y, mask);
}
setup.span.y = 0;
setup.span.y_flags = 0;
- setup.span.right[0] = 0;
- setup.span.right[1] = 0;
+ /* Zero right elements */
+ setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0));
}
+
#if DEBUG_VERTS
-static void print_vertex(const struct vertex_header *v)
+static void
+print_vertex(const struct vertex_header *v)
{
- int i;
- fprintf(stderr, "Vertex: (%p)\n", v);
- for (i = 0; i < setup.quad.nr_attrs; i++) {
- fprintf(stderr, " %d: %f %f %f %f\n", i,
- v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]);
+ uint i;
+ fprintf(stderr, " Vertex: (%p)\n", v);
+ for (i = 0; i < spu.vertex_info.num_attribs; i++) {
+ fprintf(stderr, " %d: %f %f %f %f\n", i,
+ spu_extract(v->data[i], 0),
+ spu_extract(v->data[i], 1),
+ spu_extract(v->data[i], 2),
+ spu_extract(v->data[i], 3));
}
}
#endif
-static boolean setup_sort_vertices(const struct vertex_header *v0,
- const struct vertex_header *v1,
- const struct vertex_header *v2)
+/**
+ * Sort vertices from top to bottom.
+ * Compute area and determine front vs. back facing.
+ * Do coarse clip test against tile bounds
+ * \return FALSE if tri is totally outside tile, TRUE otherwise
+ */
+static boolean
+setup_sort_vertices(const struct vertex_header *v0,
+ const struct vertex_header *v1,
+ const struct vertex_header *v2)
{
+ float area, sign;
#if DEBUG_VERTS
- fprintf(stderr, "Triangle:\n");
- print_vertex(v0);
- print_vertex(v1);
- print_vertex(v2);
+ if (spu.init.id==0) {
+ fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id);
+ print_vertex(v0);
+ print_vertex(v1);
+ print_vertex(v2);
+ }
#endif
- setup.vprovoke = v2;
-
/* determine bottom to top order of vertices */
{
- float y0 = spu_extract(v0->data[0], 1);
- float y1 = spu_extract(v1->data[0], 1);
- float y2 = spu_extract(v2->data[0], 1);
- if (y0 <= y1) {
- if (y1 <= y2) {
- /* y0<=y1<=y2 */
- setup.vmin = v0;
- setup.vmid = v1;
- setup.vmax = v2;
- }
- else if (y2 <= y0) {
- /* y2<=y0<=y1 */
- setup.vmin = v2;
- setup.vmid = v0;
- setup.vmax = v1;
- }
- else {
- /* y0<=y2<=y1 */
- setup.vmin = v0;
- setup.vmid = v2;
- setup.vmax = v1;
- }
- }
- else {
- if (y0 <= y2) {
- /* y1<=y0<=y2 */
- setup.vmin = v1;
- setup.vmid = v0;
- setup.vmax = v2;
- }
- else if (y2 <= y1) {
- /* y2<=y1<=y0 */
- setup.vmin = v2;
- setup.vmid = v1;
- setup.vmax = v0;
- }
- else {
- /* y1<=y2<=y0 */
- setup.vmin = v1;
- setup.vmid = v2;
- setup.vmax = v0;
- }
- }
+ /* A table of shuffle patterns for putting vertex_header pointers into
+ correct order. Quite magical. */
+ const vec_uchar16 sort_order_patterns[] = {
+ SHUFFLE4(A,B,C,C),
+ SHUFFLE4(C,A,B,C),
+ SHUFFLE4(A,C,B,C),
+ SHUFFLE4(B,C,A,C),
+ SHUFFLE4(B,A,C,C),
+ SHUFFLE4(C,B,A,C) };
+
+ /* The vertex_header pointers, packed for easy shuffling later */
+ const vec_uint4 vs = {(unsigned)v0, (unsigned)v1, (unsigned)v2};
+
+ /* Collate y values into two vectors for comparison.
+ Using only one shuffle constant! ;) */
+ const vec_float4 y_02_ = spu_shuffle(v0->data[0], v2->data[0], SHUFFLE4(0,B,b,C));
+ const vec_float4 y_10_ = spu_shuffle(v1->data[0], v0->data[0], SHUFFLE4(0,B,b,C));
+ const vec_float4 y_012 = spu_shuffle(y_02_, v1->data[0], SHUFFLE4(0,B,b,C));
+ const vec_float4 y_120 = spu_shuffle(y_10_, v2->data[0], SHUFFLE4(0,B,b,C));
+
+ /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */
+ const vec_uint4 compare = spu_cmpgt(y_012, y_120);
+ /* Compress the result of the comparison into 4 bits */
+ const vec_uint4 gather = spu_gather(compare);
+ /* Subtract one to attain the index into the LUT. Magical. */
+ const unsigned int index = spu_extract(gather, 0) - 1;
+
+ /* Load the appropriate pattern and construct the desired vector. */
+ setup.vertex_headers = (qword)spu_shuffle(vs, vs, sort_order_patterns[index]);
+
+ /* Using the result of the comparison, set sign.
+ Very magical. */
+ sign = ((si_to_uint(si_cntb((qword)gather)) == 2) ? 1.0f : -1.0f);
}
/* Check if triangle is completely outside the tile bounds */
@@ -575,41 +506,28 @@ static boolean setup_sort_vertices(const struct vertex_header *v0,
spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx)
return FALSE;
- setup.ebot.dx = spu_extract(setup.vmid->data[0], 0) - spu_extract(setup.vmin->data[0], 0);
- setup.ebot.dy = spu_extract(setup.vmid->data[0], 1) - spu_extract(setup.vmin->data[0], 1);
- setup.emaj.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmin->data[0], 0);
- setup.emaj.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmin->data[0], 1);
- setup.etop.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmid->data[0], 0);
- setup.etop.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmid->data[0], 1);
+ setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]);
+ setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]);
+ setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]);
/*
* Compute triangle's area. Use 1/area to compute partial
* derivatives of attributes later.
- *
- * The area will be the same as prim->det, but the sign may be
- * different depending on how the vertices get sorted above.
- *
- * To determine whether the primitive is front or back facing we
- * use the prim->det value because its sign is correct.
*/
- {
- const float area = (setup.emaj.dx * setup.ebot.dy -
- setup.ebot.dx * setup.emaj.dy);
-
- setup.oneoverarea = 1.0f / area;
- /*
- _mesa_printf("%s one-over-area %f area %f det %f\n",
- __FUNCTION__, setup.oneoverarea, area, prim->det );
- */
- }
+ area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy;
+
+ setup.oneOverArea = 1.0f / area;
-#if 0
- /* We need to know if this is a front or back-facing triangle for:
- * - the GLSL gl_FrontFacing fragment attribute (bool)
- * - two-sided stencil test
+ /* The product of area * sign indicates front/back orientation (0/1).
+ * Just in case someone gets the bright idea of switching the front
+ * and back constants without noticing that we're assuming their
+ * values in this operation, also assert that the values are
+ * what we think they are.
*/
- setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
-#endif
+ ASSERT(CELL_FACING_FRONT == 0);
+ ASSERT(CELL_FACING_BACK == 1);
+ setup.facing = (area * sign > 0.0f)
+ ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW);
return TRUE;
}
@@ -622,63 +540,11 @@ static boolean setup_sort_vertices(const struct vertex_header *v0,
* \param slot which attribute slot
*/
static INLINE void
-const_coeff(uint slot)
+const_coeff4(uint slot)
{
- setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0};
- setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0};
- setup.coef[slot].a0.v = setup.vprovoke->data[slot];
-}
-
-
-/**
- * Compute a0, dadx and dady for a linearly interpolated coefficient,
- * for a triangle.
- */
-static INLINE void
-tri_linear_coeff(uint slot, uint firstComp, uint lastComp)
-{
- uint i;
- const float *vmin_d = (float *) &setup.vmin->data[slot];
- const float *vmid_d = (float *) &setup.vmid->data[slot];
- const float *vmax_d = (float *) &setup.vmax->data[slot];
- const float x = spu_extract(setup.vmin->data[0], 0) - 0.5f;
- const float y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
-
- for (i = firstComp; i < lastComp; i++) {
- float botda = vmid_d[i] - vmin_d[i];
- float majda = vmax_d[i] - vmin_d[i];
- float a = setup.ebot.dy * majda - botda * setup.emaj.dy;
- float b = setup.emaj.dx * botda - majda * setup.ebot.dx;
-
- ASSERT(slot < PIPE_MAX_SHADER_INPUTS);
-
- setup.coef[slot].dadx.f[i] = a * setup.oneoverarea;
- setup.coef[slot].dady.f[i] = b * setup.oneoverarea;
-
- /* calculate a0 as the value which would be sampled for the
- * fragment at (0,0), taking into account that we want to sample at
- * pixel centers, in other words (0.5, 0.5).
- *
- * this is neat but unfortunately not a good way to do things for
- * triangles with very large values of dadx or dady as it will
- * result in the subtraction and re-addition from a0 of a very
- * large number, which means we'll end up loosing a lot of the
- * fractional bits and precision from a0. the way to fix this is
- * to define a0 as the sample at a pixel center somewhere near vmin
- * instead - i'll switch to this later.
- */
- setup.coef[slot].a0.f[i] = (vmin_d[i] -
- (setup.coef[slot].dadx.f[i] * x +
- setup.coef[slot].dady.f[i] * y));
- }
-
- /*
- _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n",
- slot, "xyzw"[i],
- setup.coef[slot].a0[i],
- setup.coef[slot].dadx.f[i],
- setup.coef[slot].dady.f[i]);
- */
+ setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0};
+ setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0};
+ setup.coef[slot].a0 = setup.vprovoke->data[slot];
}
@@ -702,18 +568,16 @@ tri_linear_coeff4(uint slot)
vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
spu_mul(majda, spu_splats(setup.ebot.dx)));
- setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneoverarea));
- setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneoverarea));
+ setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
+ setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
- vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx);
- vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy);
+ vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
+ vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
- setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy));
+ setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
}
-
-#if 0
/**
* Compute a0, dadx and dady for a perspective-corrected interpolant,
* for a triangle.
@@ -722,82 +586,76 @@ tri_linear_coeff4(uint slot)
* Later, when we compute the value at a particular fragment position we'll
* divide the interpolated value by the interpolated W at that fragment.
*/
-static void tri_persp_coeff( unsigned slot,
- unsigned i )
+static void
+tri_persp_coeff4(uint slot)
{
- /* premultiply by 1/w:
- */
- float mina = setup.vmin->data[slot][i] * setup.vmin->data[0][3];
- float mida = setup.vmid->data[slot][i] * setup.vmid->data[0][3];
- float maxa = setup.vmax->data[slot][i] * setup.vmax->data[0][3];
-
- float botda = mida - mina;
- float majda = maxa - mina;
- float a = setup.ebot.dy * majda - botda * setup.emaj.dy;
- float b = setup.emaj.dx * botda - majda * setup.ebot.dx;
-
- /*
- printf("tri persp %d,%d: %f %f %f\n", slot, i,
- setup.vmin->data[slot][i],
- setup.vmid->data[slot][i],
- setup.vmax->data[slot][i]
- );
- */
+ const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
+ const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
+
+ const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3));
+ const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3));
+ const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3));
+
+ vector float vmin_d = setup.vmin->data[slot];
+ vector float vmid_d = setup.vmid->data[slot];
+ vector float vmax_d = setup.vmax->data[slot];
+
+ vmin_d = spu_mul(vmin_d, vmin_w);
+ vmid_d = spu_mul(vmid_d, vmid_w);
+ vmax_d = spu_mul(vmax_d, vmax_w);
+
+ vector float botda = vmid_d - vmin_d;
+ vector float majda = vmax_d - vmin_d;
- assert(slot < PIPE_MAX_SHADER_INPUTS);
- assert(i <= 3);
+ vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
+ spu_mul(botda, spu_splats(setup.emaj.dy)));
+ vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
+ spu_mul(majda, spu_splats(setup.ebot.dx)));
- setup.coef[slot].dadx.f[i] = a * setup.oneoverarea;
- setup.coef[slot].dady.f[i] = b * setup.oneoverarea;
- setup.coef[slot].a0.f[i] = (mina -
- (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) +
- setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f)));
+ setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
+ setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
+
+ vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
+ vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
+
+ setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
}
-#endif
+
/**
* Compute the setup.coef[] array dadx, dady, a0 values.
* Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
*/
-static void setup_tri_coefficients(void)
+static void
+setup_tri_coefficients(void)
{
-#if 1
uint i;
for (i = 0; i < spu.vertex_info.num_attribs; i++) {
- switch (spu.vertex_info.interp_mode[i]) {
+ switch (spu.vertex_info.attrib[i].interp_mode) {
case INTERP_NONE:
break;
- case INTERP_POS:
- /*tri_linear_coeff(i, 2, 3);*/
- /* XXX interp W if PERSPECTIVE... */
- tri_linear_coeff4(i);
- break;
case INTERP_CONSTANT:
- const_coeff(i);
+ const_coeff4(i);
break;
+ case INTERP_POS:
+ /* fall-through */
case INTERP_LINEAR:
tri_linear_coeff4(i);
break;
case INTERP_PERSPECTIVE:
- tri_linear_coeff4(i); /* temporary */
+ tri_persp_coeff4(i);
break;
default:
ASSERT(0);
}
}
-#else
- ASSERT(spu.vertex_info.interp_mode[0] == INTERP_POS);
- ASSERT(spu.vertex_info.interp_mode[1] == INTERP_LINEAR ||
- spu.vertex_info.interp_mode[1] == INTERP_CONSTANT);
- tri_linear_coeff(0, 2, 3); /* slot 0, z */
- tri_linear_coeff(1, 0, 4); /* slot 1, color */
-#endif
}
-static void setup_tri_edges(void)
+static void
+setup_tri_edges(void)
{
float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f;
float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f;
@@ -827,9 +685,8 @@ static void setup_tri_edges(void)
* Render the upper or lower half of a triangle.
* Scissoring/cliprect is applied here too.
*/
-static void subtriangle( struct edge *eleft,
- struct edge *eright,
- unsigned lines )
+static void
+subtriangle(struct edge *eleft, struct edge *eright, unsigned lines)
{
const int minx = setup.cliprect_minx;
const int maxx = setup.cliprect_maxx;
@@ -881,9 +738,11 @@ static void subtriangle( struct edge *eleft,
setup.span.y = block(_y);
}
- setup.span.left[_y&1] = left;
- setup.span.right[_y&1] = right;
- setup.span.y_flags |= 1<<(_y&1);
+ int offset = _y&1;
+ vec_int4 quad_LlRr = {left, left, right, right};
+ /* Store left and right in 0 or 1 row of quad based on offset */
+ setup.span.quad = spu_sel(quad_LlRr, setup.span.quad, spu_maskw(5<<offset));
+ setup.span.y_flags |= 1<<offset;
}
}
@@ -902,7 +761,8 @@ static void subtriangle( struct edge *eleft,
* The tile data should have already been fetched.
*/
boolean
-tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty)
+tri_draw(const float *v0, const float *v1, const float *v2,
+ uint tx, uint ty)
{
setup.tx = tx;
setup.ty = ty;
@@ -924,21 +784,16 @@ tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty)
setup.span.y = 0;
setup.span.y_flags = 0;
- setup.span.right[0] = 0;
- setup.span.right[1] = 0;
- /* setup.span.z_mode = tri_z_mode( setup.ctx ); */
+ /* Zero right elements */
+ setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0));
- /* init_constant_attribs( setup ); */
-
- if (setup.oneoverarea < 0.0) {
- /* emaj on left:
- */
+ if (setup.oneOverArea < 0.0) {
+ /* emaj on left */
subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines );
subtriangle( &setup.emaj, &setup.etop, setup.etop.lines );
}
else {
- /* emaj on right:
- */
+ /* emaj on right */
subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines );
subtriangle( &setup.etop, &setup.emaj, setup.etop.lines );
}
diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c
index b8a0d4a265..af25dd3718 100644
--- a/src/gallium/drivers/cell/spu/spu_util.c
+++ b/src/gallium/drivers/cell/spu/spu_util.c
@@ -1,7 +1,7 @@
#include "cell/common.h"
#include "pipe/p_shader_tokens.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi/tgsi_parse.h"
//#include "tgsi_build.h"
#include "tgsi/tgsi_util.h"
diff --git a/src/gallium/drivers/failover/Makefile b/src/gallium/drivers/failover/Makefile
index f08b8df07a..dfb7f5dcf6 100644
--- a/src/gallium/drivers/failover/Makefile
+++ b/src/gallium/drivers/failover/Makefile
@@ -9,6 +9,3 @@ C_SOURCES = \
fo_context.c
include ../../Makefile.template
-
-symlinks:
-
diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c
index 10c4ffc209..0742b27b8f 100644
--- a/src/gallium/drivers/failover/fo_context.c
+++ b/src/gallium/drivers/failover/fo_context.c
@@ -27,7 +27,7 @@
#include "pipe/p_defines.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "util/u_memory.h"
#include "pipe/p_context.h"
diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h
index c6409fe1e1..9ba86ba866 100644
--- a/src/gallium/drivers/failover/fo_context.h
+++ b/src/gallium/drivers/failover/fo_context.h
@@ -114,5 +114,12 @@ failover_context( struct pipe_context *pipe )
return (struct failover_context *)pipe;
}
+/* Internal functions
+ */
+void
+failover_set_constant_buffer(struct pipe_context *pipe,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *buf);
+
#endif /* FO_CONTEXT_H */
diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile
index 41a61a0020..12821c5a76 100644
--- a/src/gallium/drivers/i915simple/Makefile
+++ b/src/gallium/drivers/i915simple/Makefile
@@ -26,6 +26,3 @@ C_SOURCES = \
i915_surface.c
include ../../Makefile.template
-
-symlinks:
-
diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h
index 45bf4f4028..a433cf054d 100644
--- a/src/gallium/drivers/i915simple/i915_batch.h
+++ b/src/gallium/drivers/i915simple/i915_batch.h
@@ -108,7 +108,7 @@ i915_batchbuffer_flush( struct i915_batchbuffer *batch,
#define OUT_RELOC( buf, flags, delta ) \
i915_batchbuffer_reloc( i915->batch, buf, flags, delta )
-#define FLUSH_BATCH(fence) do { \
+#define FLUSH_BATCH(fence) do { \
i915->winsys->batch_flush( i915->winsys, fence ); \
i915->hardware_dirty = ~0; \
} while (0)
diff --git a/src/gallium/drivers/i915simple/i915_blit.c b/src/gallium/drivers/i915simple/i915_blit.c
index 45fae4c999..448a4708ce 100644
--- a/src/gallium/drivers/i915simple/i915_blit.c
+++ b/src/gallium/drivers/i915simple/i915_blit.c
@@ -38,7 +38,7 @@
void
i915_fill_blit(struct i915_context *i915,
unsigned cpp,
- short dst_pitch,
+ unsigned short dst_pitch,
struct pipe_buffer *dst_buffer,
unsigned dst_offset,
short x, short y,
@@ -47,15 +47,23 @@ i915_fill_blit(struct i915_context *i915,
{
unsigned BR13, CMD;
+
+ I915_DBG(i915,
+ "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
+ __FUNCTION__,
+ dst_buffer, dst_pitch, dst_offset, x, y, w, h);
+
switch (cpp) {
case 1:
case 2:
case 3:
- BR13 = dst_pitch | (0xF0 << 16) | (1 << 24);
+ BR13 = (((int) dst_pitch) & 0xffff) |
+ (0xF0 << 16) | (1 << 24);
CMD = XY_COLOR_BLT_CMD;
break;
case 4:
- BR13 = dst_pitch | (0xF0 << 16) | (1 << 24) | (1 << 25);
+ BR13 = (((int) dst_pitch) & 0xffff) |
+ (0xF0 << 16) | (1 << 24) | (1 << 25);
CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA |
XY_COLOR_BLT_WRITE_RGB);
break;
@@ -63,10 +71,6 @@ i915_fill_blit(struct i915_context *i915,
return;
}
-// DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
-// __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h);
-
-
if (!BEGIN_BATCH(6, 1)) {
FLUSH_BATCH(NULL);
assert(BEGIN_BATCH(6, 1));
@@ -77,6 +81,7 @@ i915_fill_blit(struct i915_context *i915,
OUT_BATCH(((y + h) << 16) | (x + w));
OUT_RELOC( dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset);
OUT_BATCH(color);
+ FLUSH_BATCH(NULL);
}
@@ -84,10 +89,10 @@ void
i915_copy_blit( struct i915_context *i915,
unsigned do_flip,
unsigned cpp,
- short src_pitch,
+ unsigned short src_pitch,
struct pipe_buffer *src_buffer,
unsigned src_offset,
- short dst_pitch,
+ unsigned short dst_pitch,
struct pipe_buffer *dst_buffer,
unsigned dst_offset,
short src_x, short src_y,
@@ -105,20 +110,16 @@ i915_copy_blit( struct i915_context *i915,
src_buffer, src_pitch, src_offset, src_x, src_y,
dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
- src_pitch *= (short) cpp;
- dst_pitch *= (short) cpp;
-
switch (cpp) {
case 1:
case 2:
case 3:
- BR13 = (((int) dst_pitch) & 0xffff) |
+ BR13 = (((int) dst_pitch) & 0xffff) |
(0xCC << 16) | (1 << 24);
CMD = XY_SRC_COPY_BLT_CMD;
break;
case 4:
- BR13 =
- (((int) dst_pitch) & 0xffff) |
+ BR13 = (((int) dst_pitch) & 0xffff) |
(0xCC << 16) | (1 << 24) | (1 << 25);
CMD =
(XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
@@ -152,6 +153,7 @@ i915_copy_blit( struct i915_context *i915,
OUT_BATCH((src_y << 16) | src_x);
OUT_BATCH(((int) src_pitch & 0xffff));
OUT_RELOC(src_buffer, I915_BUFFER_ACCESS_READ, src_offset);
+ FLUSH_BATCH(NULL);
}
diff --git a/src/gallium/drivers/i915simple/i915_blit.h b/src/gallium/drivers/i915simple/i915_blit.h
index 6e5b44e124..0bb3453861 100644
--- a/src/gallium/drivers/i915simple/i915_blit.h
+++ b/src/gallium/drivers/i915simple/i915_blit.h
@@ -33,10 +33,10 @@
extern void i915_copy_blit(struct i915_context *i915,
unsigned do_flip,
unsigned cpp,
- short src_pitch,
+ unsigned short src_pitch,
struct pipe_buffer *src_buffer,
unsigned src_offset,
- short dst_pitch,
+ unsigned short dst_pitch,
struct pipe_buffer *dst_buffer,
unsigned dst_offset,
short srcx, short srcy,
@@ -45,7 +45,7 @@ extern void i915_copy_blit(struct i915_context *i915,
extern void i915_fill_blit(struct i915_context *i915,
unsigned cpp,
- short dst_pitch,
+ unsigned short dst_pitch,
struct pipe_buffer *dst_buffer,
unsigned dst_offset,
short x, short y,
diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c
index 6dd3eda85d..3e3a596884 100644
--- a/src/gallium/drivers/i915simple/i915_context.c
+++ b/src/gallium/drivers/i915simple/i915_context.c
@@ -34,7 +34,7 @@
#include "draw/draw_context.h"
#include "pipe/p_defines.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_inlines.h"
#include "util/u_memory.h"
#include "pipe/p_screen.h"
diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915simple/i915_debug.c
index 5e26d1b905..e08582efab 100644
--- a/src/gallium/drivers/i915simple/i915_debug.c
+++ b/src/gallium/drivers/i915simple/i915_debug.c
@@ -29,8 +29,9 @@
#include "i915_context.h"
#include "i915_winsys.h"
#include "i915_debug.h"
-#include "pipe/p_winsys.h"
-#include "pipe/p_debug.h"
+#include "i915_batch.h"
+#include "pipe/internal/p_winsys_screen.h"
+#include "util/u_debug.h"
static void
@@ -210,6 +211,7 @@ BITS(
PRINTF(stream, ": 0x%x\n", ((dw) & himask) >> (lo));
}
+#ifdef DEBUG
#define MBZ( dw, hi, lo) do { \
unsigned x = (dw) >> (lo); \
unsigned lomask = (1 << (lo)) - 1; \
@@ -217,6 +219,10 @@ BITS(
himask = (1UL << (hi)) - 1; \
assert ((x & himask & ~lomask) == 0); \
} while (0)
+#else
+#define MBZ( dw, hi, lo) do { \
+} while (0)
+#endif
static void
FLAG(
@@ -858,19 +864,17 @@ static boolean i915_debug_packet( struct debug_stream *stream )
void
-i915_dump_batchbuffer( struct i915_context *i915 )
+i915_dump_batchbuffer( struct i915_batchbuffer *batch )
{
struct debug_stream stream;
- /* TODO fix me */
- unsigned *start = 0;/*i915->batch_start;*/
- unsigned *end = 0;/*i915->winsys->batch_start( i915->winsys, 0, 0 );*/
+ unsigned *start = (unsigned*)batch->map;
+ unsigned *end = (unsigned*)batch->ptr;
unsigned long bytes = (unsigned long) (end - start) * 4;
boolean done = FALSE;
stream.offset = 0;
stream.ptr = (char *)start;
stream.print_addresses = 0;
- stream.winsys = i915->pipe.winsys;
if (!start || !end) {
debug_printf( "\n\nBATCH: ???\n");
diff --git a/src/gallium/drivers/i915simple/i915_debug.h b/src/gallium/drivers/i915simple/i915_debug.h
index afb63edabf..16ca7277c7 100644
--- a/src/gallium/drivers/i915simple/i915_debug.h
+++ b/src/gallium/drivers/i915simple/i915_debug.h
@@ -41,7 +41,6 @@ struct debug_stream
char *ptr; /* pointer to gtt offset zero */
char *end; /* pointer to gtt offset zero */
unsigned print_addresses;
- struct pipe_winsys *winsys;
};
@@ -73,7 +72,7 @@ void i915_print_ureg(const char *msg, unsigned ureg);
#if defined(DEBUG) && defined(FILE_DEBUG_FLAG)
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
static INLINE void
I915_DBG(
@@ -105,9 +104,9 @@ I915_DBG(
#endif
-void i915_dump_batchbuffer( struct i915_context *i915 );
-
+struct i915_batchbuffer;
+void i915_dump_batchbuffer( struct i915_batchbuffer *i915 );
void i915_debug_init( struct i915_context *i915 );
diff --git a/src/gallium/drivers/i915simple/i915_debug_fp.c b/src/gallium/drivers/i915simple/i915_debug_fp.c
index 48be3e1472..9c5b117b6d 100644
--- a/src/gallium/drivers/i915simple/i915_debug_fp.c
+++ b/src/gallium/drivers/i915simple/i915_debug_fp.c
@@ -28,7 +28,7 @@
#include "i915_reg.h"
#include "i915_debug.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "util/u_memory.h"
diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c
index 34b4a846c1..961c1bf213 100644
--- a/src/gallium/drivers/i915simple/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c
@@ -144,7 +144,7 @@ src_vector(struct i915_fp_compile *p,
const struct tgsi_full_src_register *source)
{
uint index = source->SrcRegister.Index;
- uint src, sem_name, sem_ind;
+ uint src = 0, sem_name, sem_ind;
switch (source->SrcRegister.File) {
case TGSI_FILE_TEMPORARY:
@@ -321,16 +321,27 @@ static uint
translate_tex_src_target(struct i915_fp_compile *p, uint tex)
{
switch (tex) {
+ case TGSI_TEXTURE_SHADOW1D:
+ /* fall-through */
case TGSI_TEXTURE_1D:
return D0_SAMPLE_TYPE_2D;
+
+ case TGSI_TEXTURE_SHADOW2D:
+ /* fall-through */
case TGSI_TEXTURE_2D:
return D0_SAMPLE_TYPE_2D;
+
+ case TGSI_TEXTURE_SHADOWRECT:
+ /* fall-through */
case TGSI_TEXTURE_RECT:
return D0_SAMPLE_TYPE_2D;
+
case TGSI_TEXTURE_3D:
return D0_SAMPLE_TYPE_VOLUME;
+
case TGSI_TEXTURE_CUBE:
return D0_SAMPLE_TYPE_CUBE;
+
default:
i915_program_error(p, "TexSrc type");
return 0;
@@ -964,7 +975,7 @@ i915_translate_instructions(struct i915_fp_compile *p,
= &parse.FullToken.FullImmediate;
const uint pos = p->num_immediates++;
uint j;
- for (j = 0; j < imm->Immediate.Size; j++) {
+ for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
p->immediates[pos][j] = imm->u.ImmediateFloat32[j].Float;
}
}
diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
index 4fda1ab64f..58c41840e1 100644
--- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
@@ -40,9 +40,9 @@
#include "draw/draw_context.h"
#include "draw/draw_vbuf.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -62,7 +62,7 @@ struct i915_vbuf_render {
struct i915_context *i915;
/** Vertex size in bytes */
- unsigned vertex_size;
+ size_t vertex_size;
/** Software primitive */
unsigned prim;
@@ -79,6 +79,7 @@ struct i915_vbuf_render {
size_t vbo_offset;
void *vbo_ptr;
size_t vbo_alloc_size;
+ size_t vbo_max_used;
};
@@ -108,7 +109,7 @@ i915_vbuf_render_get_vertex_info( struct vbuf_render *render )
}
-static void *
+static boolean
i915_vbuf_render_allocate_vertices( struct vbuf_render *render,
ushort vertex_size,
ushort nr_vertices )
@@ -124,7 +125,8 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render,
if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) {
} else {
i915->vbo_flushed = 0;
- pipe_buffer_reference(screen, &i915_render->vbo, NULL);
+ if (i915_render->vbo)
+ pipe_buffer_reference(screen, &i915_render->vbo, NULL);
}
if (!i915_render->vbo) {
@@ -134,19 +136,49 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render,
64,
I915_BUFFER_USAGE_LIT_VERTEX,
i915_render->vbo_size);
- i915_render->vbo_ptr = pipe_buffer_map(screen,
- i915_render->vbo,
- PIPE_BUFFER_USAGE_CPU_WRITE);
- pipe_buffer_unmap(screen, i915_render->vbo);
+
}
+ i915_render->vertex_size = vertex_size;
i915->vbo = i915_render->vbo;
i915->vbo_offset = i915_render->vbo_offset;
i915->dirty |= I915_NEW_VBO;
+ if (!i915_render->vbo)
+ return FALSE;
+ return TRUE;
+}
+
+
+static void *
+i915_vbuf_render_map_vertices( struct vbuf_render *render )
+{
+ struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+ struct i915_context *i915 = i915_render->i915;
+ struct pipe_screen *screen = i915->pipe.screen;
+
+ if (i915->vbo_flushed)
+ debug_printf("%s bad vbo flush occured stalling on hw\n");
+
+ i915_render->vbo_ptr = pipe_buffer_map(screen,
+ i915_render->vbo,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+
return (unsigned char *)i915_render->vbo_ptr + i915->vbo_offset;
}
+static void
+i915_vbuf_render_unmap_vertices( struct vbuf_render *render,
+ ushort min_index,
+ ushort max_index )
+{
+ struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+ struct i915_context *i915 = i915_render->i915;
+ struct pipe_screen *screen = i915->pipe.screen;
+
+ i915_render->vbo_max_used = MAX2(i915_render->vbo_max_used, i915_render->vertex_size * (max_index + 1));
+ pipe_buffer_unmap(screen, i915_render->vbo);
+}
static boolean
i915_vbuf_render_set_primitive( struct vbuf_render *render,
@@ -197,9 +229,7 @@ i915_vbuf_render_set_primitive( struct vbuf_render *render,
i915_render->fallback = 0;
return TRUE;
default:
- assert((int)"Error unkown primtive type" & 0);
- /* Actually, can handle a lot more just fine... Fixme.
- */
+ /* FIXME: Actually, can handle a lot more just fine... */
return FALSE;
}
}
@@ -456,18 +486,15 @@ out:
static void
-i915_vbuf_render_release_vertices( struct vbuf_render *render,
- void *vertices,
- unsigned vertex_size,
- unsigned vertices_used )
+i915_vbuf_render_release_vertices( struct vbuf_render *render )
{
struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
struct i915_context *i915 = i915_render->i915;
- size_t size = (size_t)vertex_size * (size_t)vertices_used;
assert(i915->vbo);
- i915_render->vbo_offset += size;
+ i915_render->vbo_offset += i915_render->vbo_max_used;
+ i915_render->vbo_max_used = 0;
i915->vbo = NULL;
i915->dirty |= I915_NEW_VBO;
}
@@ -501,6 +528,8 @@ i915_vbuf_render_create( struct i915_context *i915 )
i915_render->base.get_vertex_info = i915_vbuf_render_get_vertex_info;
i915_render->base.allocate_vertices = i915_vbuf_render_allocate_vertices;
+ i915_render->base.map_vertices = i915_vbuf_render_map_vertices;
+ i915_render->base.unmap_vertices = i915_vbuf_render_unmap_vertices;
i915_render->base.set_primitive = i915_vbuf_render_set_primitive;
i915_render->base.draw = i915_vbuf_render_draw;
i915_render->base.draw_arrays = i915_vbuf_render_draw_arrays;
diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c
index 1c976082df..b7bd3b3b74 100644
--- a/src/gallium/drivers/i915simple/i915_screen.c
+++ b/src/gallium/drivers/i915simple/i915_screen.c
@@ -27,7 +27,8 @@
#include "util/u_memory.h"
-#include "pipe/p_winsys.h"
+#include "util/u_simple_screen.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_inlines.h"
#include "util/u_string.h"
@@ -203,16 +204,79 @@ i915_destroy_screen( struct pipe_screen *screen )
}
+static struct pipe_transfer*
+i915_get_tex_transfer(struct pipe_screen *screen,
+ struct pipe_texture *texture,
+ unsigned face, unsigned level, unsigned zslice,
+ enum pipe_transfer_usage usage, unsigned x, unsigned y,
+ unsigned w, unsigned h)
+{
+ struct i915_texture *tex = (struct i915_texture *)texture;
+ struct i915_transfer *trans;
+ unsigned offset; /* in bytes */
+
+ if (texture->target == PIPE_TEXTURE_CUBE) {
+ offset = tex->image_offset[level][face];
+ }
+ else if (texture->target == PIPE_TEXTURE_3D) {
+ offset = tex->image_offset[level][zslice];
+ }
+ else {
+ offset = tex->image_offset[level][0];
+ assert(face == 0);
+ assert(zslice == 0);
+ }
+
+ trans = CALLOC_STRUCT(i915_transfer);
+ if (trans) {
+ trans->base.refcount = 1;
+ pipe_texture_reference(&trans->base.texture, texture);
+ trans->base.format = trans->base.format;
+ trans->base.width = w;
+ trans->base.height = h;
+ trans->base.block = texture->block;
+ trans->base.nblocksx = texture->nblocksx[level];
+ trans->base.nblocksy = texture->nblocksy[level];
+ trans->base.stride = tex->stride;
+ trans->offset = offset;
+ trans->base.usage = usage;
+ }
+ return &trans->base;
+}
+
+static void
+i915_tex_transfer_release(struct pipe_screen *screen,
+ struct pipe_transfer **transfer)
+{
+ struct pipe_transfer *trans = *transfer;
+
+ if (--trans->refcount == 0) {
+ pipe_texture_reference(&trans->texture, NULL);
+ FREE(trans);
+ }
+
+ *transfer = NULL;
+}
+
static void *
-i915_surface_map( struct pipe_screen *screen,
- struct pipe_surface *surface,
- unsigned flags )
+i915_transfer_map( struct pipe_screen *screen,
+ struct pipe_transfer *transfer )
{
- char *map = pipe_buffer_map( screen, surface->buffer, flags );
+ struct i915_texture *tex = (struct i915_texture *)transfer->texture;
+ char *map;
+ unsigned flags = 0;
+
+ if (transfer->usage != PIPE_TRANSFER_WRITE)
+ flags |= PIPE_BUFFER_USAGE_CPU_READ;
+
+ if (transfer->usage != PIPE_TRANSFER_READ)
+ flags |= PIPE_BUFFER_USAGE_CPU_WRITE;
+
+ map = pipe_buffer_map( screen, tex->buffer, flags );
if (map == NULL)
return NULL;
- if (surface->texture &&
+ if (transfer->texture &&
(flags & PIPE_BUFFER_USAGE_CPU_WRITE))
{
/* Do something to notify contexts of a texture change.
@@ -220,14 +284,17 @@ i915_surface_map( struct pipe_screen *screen,
/* i915_screen(screen)->timestamp++; */
}
- return map + surface->offset;
+ return map + i915_transfer(transfer)->offset +
+ transfer->y / transfer->block.height * transfer->stride +
+ transfer->x / transfer->block.width * transfer->block.size;
}
static void
-i915_surface_unmap(struct pipe_screen *screen,
- struct pipe_surface *surface)
+i915_transfer_unmap(struct pipe_screen *screen,
+ struct pipe_transfer *transfer)
{
- pipe_buffer_unmap( screen, surface->buffer );
+ struct i915_texture *tex = (struct i915_texture *)transfer->texture;
+ pipe_buffer_unmap( screen, tex->buffer );
}
@@ -275,10 +342,13 @@ i915_create_screen(struct pipe_winsys *winsys, uint pci_id)
i915screen->screen.get_param = i915_get_param;
i915screen->screen.get_paramf = i915_get_paramf;
i915screen->screen.is_format_supported = i915_is_format_supported;
- i915screen->screen.surface_map = i915_surface_map;
- i915screen->screen.surface_unmap = i915_surface_unmap;
+ i915screen->screen.get_tex_transfer = i915_get_tex_transfer;
+ i915screen->screen.tex_transfer_release = i915_tex_transfer_release;
+ i915screen->screen.transfer_map = i915_transfer_map;
+ i915screen->screen.transfer_unmap = i915_transfer_unmap;
i915_init_screen_texture_functions(&i915screen->screen);
+ u_simple_screen_init(&i915screen->screen);
return &i915screen->screen;
}
diff --git a/src/gallium/drivers/i915simple/i915_screen.h b/src/gallium/drivers/i915simple/i915_screen.h
index 73b0ff05ce..a371663453 100644
--- a/src/gallium/drivers/i915simple/i915_screen.h
+++ b/src/gallium/drivers/i915simple/i915_screen.h
@@ -50,13 +50,30 @@ struct i915_screen
};
-/** cast wrapper */
+/**
+ * Subclass of pipe_transfer
+ */
+struct i915_transfer
+{
+ struct pipe_transfer base;
+
+ unsigned offset;
+};
+
+
+/** cast wrappers */
static INLINE struct i915_screen *
i915_screen(struct pipe_screen *pscreen)
{
return (struct i915_screen *) pscreen;
}
+static INLINE struct i915_transfer *
+i915_transfer( struct pipe_transfer *transfer )
+{
+ return (struct i915_transfer *)transfer;
+}
+
extern struct pipe_screen *
i915_create_screen(struct pipe_winsys *winsys, uint pci_id);
diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c
index d2487d8277..273e74002a 100644
--- a/src/gallium/drivers/i915simple/i915_state.c
+++ b/src/gallium/drivers/i915simple/i915_state.c
@@ -30,7 +30,7 @@
#include "draw/draw_context.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -318,8 +318,8 @@ i915_create_depth_stencil_state(struct pipe_context *pipe,
struct i915_depth_stencil_state *cso = CALLOC_STRUCT( i915_depth_stencil_state );
{
- int testmask = depth_stencil->stencil[0].value_mask & 0xff;
- int writemask = depth_stencil->stencil[0].write_mask & 0xff;
+ int testmask = depth_stencil->stencil[0].valuemask & 0xff;
+ int writemask = depth_stencil->stencil[0].writemask & 0xff;
cso->stencil_modes4 |= (_3DSTATE_MODES_4_CMD |
ENABLE_STENCIL_TEST_MASK |
@@ -350,8 +350,8 @@ i915_create_depth_stencil_state(struct pipe_context *pipe,
int dfop = i915_translate_stencil_op(depth_stencil->stencil[1].zfail_op);
int dpop = i915_translate_stencil_op(depth_stencil->stencil[1].zpass_op);
int ref = depth_stencil->stencil[1].ref_value & 0xff;
- int tmask = depth_stencil->stencil[1].value_mask & 0xff;
- int wmask = depth_stencil->stencil[1].write_mask & 0xff;
+ int tmask = depth_stencil->stencil[1].valuemask & 0xff;
+ int wmask = depth_stencil->stencil[1].writemask & 0xff;
cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS |
BFO_ENABLE_STENCIL_FUNCS |
@@ -394,7 +394,7 @@ i915_create_depth_stencil_state(struct pipe_context *pipe,
if (depth_stencil->alpha.enabled) {
int test = i915_translate_compare_func(depth_stencil->alpha.func);
- ubyte refByte = float_to_ubyte(depth_stencil->alpha.ref);
+ ubyte refByte = float_to_ubyte(depth_stencil->alpha.ref_value);
cso->depth_LIS6 |= (S6_ALPHA_TEST_ENABLE |
(test << S6_ALPHA_TEST_FUNC_SHIFT) |
@@ -535,13 +535,13 @@ static void i915_set_constant_buffer(struct pipe_context *pipe,
*/
if (buf) {
void *mapped;
- if (buf->size &&
+ if (buf->buffer && buf->buffer->size &&
(mapped = ws->buffer_map(ws, buf->buffer,
- PIPE_BUFFER_USAGE_CPU_READ))) {
- memcpy(i915->current.constants[shader], mapped, buf->size);
+ PIPE_BUFFER_USAGE_CPU_READ))) {
+ memcpy(i915->current.constants[shader], mapped, buf->buffer->size);
ws->buffer_unmap(ws, buf->buffer);
i915->current.num_user_constants[shader]
- = buf->size / (4 * sizeof(float));
+ = buf->buffer->size / (4 * sizeof(float));
}
else {
i915->current.num_user_constants[shader] = 0;
diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c
index 9bd6f92323..26e03f5127 100644
--- a/src/gallium/drivers/i915simple/i915_state_emit.c
+++ b/src/gallium/drivers/i915simple/i915_state_emit.c
@@ -211,20 +211,23 @@ i915_emit_hardware_state(struct i915_context *i915 )
struct pipe_surface *depth_surface = i915->framebuffer.zsbuf;
if (cbuf_surface) {
- unsigned cpitch = cbuf_surface->stride;
unsigned ctile = BUF_3D_USE_FENCE;
- if (cbuf_surface->texture &&
- ((struct i915_texture*)(cbuf_surface->texture))->tiled) {
+ struct i915_texture *tex = (struct i915_texture *)
+ cbuf_surface->texture;
+ struct pipe_buffer *buffer = tex->buffer;
+ assert(tex);
+
+ if (tex && tex->tiled) {
ctile = BUF_3D_TILED_SURFACE;
}
OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
- OUT_BATCH(BUF_3D_ID_COLOR_BACK |
- BUF_3D_PITCH(cpitch) | /* pitch in bytes */
+ OUT_BATCH(BUF_3D_ID_COLOR_BACK |
+ BUF_3D_PITCH(tex->stride) | /* pitch in bytes */
ctile);
- OUT_RELOC(cbuf_surface->buffer,
+ OUT_RELOC(tex->buffer,
I915_BUFFER_ACCESS_WRITE,
cbuf_surface->offset);
}
@@ -232,20 +235,23 @@ i915_emit_hardware_state(struct i915_context *i915 )
/* What happens if no zbuf??
*/
if (depth_surface) {
- unsigned zpitch = depth_surface->stride;
unsigned ztile = BUF_3D_USE_FENCE;
- if (depth_surface->texture &&
- ((struct i915_texture*)(depth_surface->texture))->tiled) {
+ struct i915_texture *tex = (struct i915_texture *)
+ depth_surface->texture;
+ struct pipe_buffer *buffer = tex->buffer;
+ assert(tex);
+
+ if (tex && tex->tiled) {
ztile = BUF_3D_TILED_SURFACE;
}
OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
OUT_BATCH(BUF_3D_ID_DEPTH |
- BUF_3D_PITCH(zpitch) | /* pitch in bytes */
+ BUF_3D_PITCH(tex->stride) | /* pitch in bytes */
ztile);
- OUT_RELOC(depth_surface->buffer,
+ OUT_RELOC(tex->buffer,
I915_BUFFER_ACCESS_WRITE,
depth_surface->offset);
}
diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c
index 62f1926644..7eec649906 100644
--- a/src/gallium/drivers/i915simple/i915_surface.c
+++ b/src/gallium/drivers/i915simple/i915_surface.c
@@ -31,7 +31,7 @@
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "util/u_tile.h"
#include "util/u_rect.h"
@@ -47,42 +47,22 @@ i915_surface_copy(struct pipe_context *pipe,
struct pipe_surface *src,
unsigned srcx, unsigned srcy, unsigned width, unsigned height)
{
- assert( dst != src );
- assert( dst->block.size == src->block.size );
- assert( dst->block.width == src->block.height );
- assert( dst->block.height == src->block.height );
+ struct i915_texture *dst_tex = (struct i915_texture *)dst->texture;
+ struct i915_texture *src_tex = (struct i915_texture *)src->texture;
- if (0) {
- void *dst_map = pipe->screen->surface_map( pipe->screen,
- dst,
- PIPE_BUFFER_USAGE_CPU_WRITE );
-
- const void *src_map = pipe->screen->surface_map( pipe->screen,
- src,
- PIPE_BUFFER_USAGE_CPU_READ );
-
- pipe_copy_rect(dst_map,
- &dst->block,
- dst->stride,
- dstx, dsty,
- width, height,
- src_map,
- do_flip ? -(int) src->stride : src->stride,
- srcx, do_flip ? height - 1 - srcy : srcy);
+ assert( dst != src );
+ assert( dst_tex->base.block.size == src_tex->base.block.size );
+ assert( dst_tex->base.block.width == src_tex->base.block.height );
+ assert( dst_tex->base.block.height == src_tex->base.block.height );
+ assert( dst_tex->base.block.width == 1 );
+ assert( dst_tex->base.block.height == 1 );
- pipe->screen->surface_unmap(pipe->screen, src);
- pipe->screen->surface_unmap(pipe->screen, dst);
- }
- else {
- assert(dst->block.width == 1);
- assert(dst->block.height == 1);
- i915_copy_blit( i915_context(pipe),
- do_flip,
- dst->block.size,
- (short) src->stride, src->buffer, src->offset,
- (short) dst->stride, dst->buffer, dst->offset,
- (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height );
- }
+ i915_copy_blit( i915_context(pipe),
+ do_flip,
+ dst_tex->base.block.size,
+ (unsigned short) src_tex->stride, src_tex->buffer, src->offset,
+ (unsigned short) dst_tex->stride, dst_tex->buffer, dst->offset,
+ (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height );
}
@@ -92,26 +72,18 @@ i915_surface_fill(struct pipe_context *pipe,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height, unsigned value)
{
- if (0) {
- void *dst_map = pipe->screen->surface_map( pipe->screen,
- dst,
- PIPE_BUFFER_USAGE_CPU_WRITE );
+ struct i915_texture *tex = (struct i915_texture *)dst->texture;
- pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value);
+ assert(tex->base.block.width == 1);
+ assert(tex->base.block.height == 1);
- pipe->screen->surface_unmap(pipe->screen, dst);
- }
- else {
- assert(dst->block.width == 1);
- assert(dst->block.height == 1);
- i915_fill_blit( i915_context(pipe),
- dst->block.size,
- (short) dst->stride,
- dst->buffer, dst->offset,
- (short) dstx, (short) dsty,
- (short) width, (short) height,
- value );
- }
+ i915_fill_blit( i915_context(pipe),
+ tex->base.block.size,
+ (unsigned short) tex->stride,
+ tex->buffer, dst->offset,
+ (short) dstx, (short) dsty,
+ (short) width, (short) height,
+ value );
}
diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c
index bd87217063..957726523f 100644
--- a/src/gallium/drivers/i915simple/i915_texture.c
+++ b/src/gallium/drivers/i915simple/i915_texture.c
@@ -34,7 +34,7 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -178,7 +178,9 @@ i915_displaytarget_layout(struct i915_texture *tex)
if (tex->base.width[0] >= 128) {
tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size);
tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8);
+#if 0 /* used for tiled display targets */
tex->tiled = 1;
+#endif
} else {
tex->stride = round_up(tex->base.nblocksx[0] * pt->block.size, 64);
tex->total_nblocksy = tex->base.nblocksy[0];
@@ -206,11 +208,10 @@ i945_miptree_layout_2d( struct i915_texture *tex )
unsigned nblocksx = pt->nblocksx[0];
unsigned nblocksy = pt->nblocksy[0];
-#if 0 /* used for tiled display targets */
- if (pt->last_level == 0 && pt->block.size == 4)
+ /* used for tiled display targets */
+ if (0)
if (i915_displaytarget_layout(tex))
return;
-#endif
tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4);
@@ -605,8 +606,8 @@ i915_texture_create(struct pipe_screen *screen,
tex_size = tex->stride * tex->total_nblocksy;
tex->buffer = ws->buffer_create(ws, 64,
- PIPE_BUFFER_USAGE_PIXEL,
- tex_size);
+ PIPE_BUFFER_USAGE_PIXEL,
+ tex_size);
if (!tex->buffer)
goto fail;
@@ -663,7 +664,6 @@ i915_get_tex_surface(struct pipe_screen *screen,
unsigned flags)
{
struct i915_texture *tex = (struct i915_texture *)pt;
- struct pipe_winsys *ws = screen->winsys;
struct pipe_surface *ps;
unsigned offset; /* in bytes */
@@ -682,16 +682,10 @@ i915_get_tex_surface(struct pipe_screen *screen,
ps = CALLOC_STRUCT(pipe_surface);
if (ps) {
ps->refcount = 1;
- ps->winsys = ws;
pipe_texture_reference(&ps->texture, pt);
- pipe_buffer_reference(screen, &ps->buffer, tex->buffer);
ps->format = pt->format;
ps->width = pt->width[level];
ps->height = pt->height[level];
- ps->block = pt->block;
- ps->nblocksx = pt->nblocksx[level];
- ps->nblocksy = pt->nblocksy[level];
- ps->stride = tex->stride;
ps->offset = offset;
ps->usage = flags;
ps->status = PIPE_SURFACE_STATUS_DEFINED;
@@ -756,7 +750,6 @@ i915_tex_surface_release(struct pipe_screen *screen,
}
pipe_texture_reference(&surf->texture, NULL);
- pipe_buffer_reference(screen, &surf->buffer, NULL);
FREE(surf);
}
diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile
index e97146e57c..19182afa75 100644
--- a/src/gallium/drivers/i965simple/Makefile
+++ b/src/gallium/drivers/i965simple/Makefile
@@ -50,5 +50,3 @@ C_SOURCES = \
brw_wm_surface_state.c
include ../../Makefile.template
-
-symlinks:
diff --git a/src/gallium/drivers/i965simple/brw_blit.c b/src/gallium/drivers/i965simple/brw_blit.c
index 8494f70493..4d11f8d2ab 100644
--- a/src/gallium/drivers/i965simple/brw_blit.c
+++ b/src/gallium/drivers/i965simple/brw_blit.c
@@ -35,7 +35,7 @@
#include "brw_reg.h"
#include "pipe/p_context.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#define FILE_DEBUG_FLAG DEBUG_BLIT
diff --git a/src/gallium/drivers/i965simple/brw_cc.c b/src/gallium/drivers/i965simple/brw_cc.c
index 79d4150383..3668123e2e 100644
--- a/src/gallium/drivers/i965simple/brw_cc.c
+++ b/src/gallium/drivers/i965simple/brw_cc.c
@@ -166,8 +166,8 @@ static void upload_cc_unit( struct brw_context *brw )
cc.cc0.stencil_pass_depth_pass_op = brw_translate_stencil_op(
brw->attribs.DepthStencil->stencil[0].zpass_op);
cc.cc1.stencil_ref = brw->attribs.DepthStencil->stencil[0].ref_value;
- cc.cc1.stencil_write_mask = brw->attribs.DepthStencil->stencil[0].write_mask;
- cc.cc1.stencil_test_mask = brw->attribs.DepthStencil->stencil[0].value_mask;
+ cc.cc1.stencil_write_mask = brw->attribs.DepthStencil->stencil[0].writemask;
+ cc.cc1.stencil_test_mask = brw->attribs.DepthStencil->stencil[0].valuemask;
if (brw->attribs.DepthStencil->stencil[1].enabled) {
cc.cc0.bf_stencil_enable = brw->attribs.DepthStencil->stencil[1].enabled;
@@ -180,14 +180,14 @@ static void upload_cc_unit( struct brw_context *brw )
cc.cc0.bf_stencil_pass_depth_pass_op = brw_translate_stencil_op(
brw->attribs.DepthStencil->stencil[1].zpass_op);
cc.cc1.bf_stencil_ref = brw->attribs.DepthStencil->stencil[1].ref_value;
- cc.cc2.bf_stencil_write_mask = brw->attribs.DepthStencil->stencil[1].write_mask;
- cc.cc2.bf_stencil_test_mask = brw->attribs.DepthStencil->stencil[1].value_mask;
+ cc.cc2.bf_stencil_write_mask = brw->attribs.DepthStencil->stencil[1].writemask;
+ cc.cc2.bf_stencil_test_mask = brw->attribs.DepthStencil->stencil[1].valuemask;
}
/* Not really sure about this:
*/
- if (brw->attribs.DepthStencil->stencil[0].write_mask ||
- brw->attribs.DepthStencil->stencil[1].write_mask)
+ if (brw->attribs.DepthStencil->stencil[0].writemask ||
+ brw->attribs.DepthStencil->stencil[1].writemask)
cc.cc0.stencil_write_enable = 1;
}
@@ -233,7 +233,7 @@ static void upload_cc_unit( struct brw_context *brw )
cc.cc3.alpha_test_func =
brw_translate_compare_func(brw->attribs.DepthStencil->alpha.func);
- cc.cc7.alpha_ref.ub[0] = float_to_ubyte(brw->attribs.DepthStencil->alpha.ref);
+ cc.cc7.alpha_ref.ub[0] = float_to_ubyte(brw->attribs.DepthStencil->alpha.ref_value);
cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
}
diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c
index 96920df008..c74cbf8d73 100644
--- a/src/gallium/drivers/i965simple/brw_context.c
+++ b/src/gallium/drivers/i965simple/brw_context.c
@@ -37,7 +37,7 @@
#include "brw_tex_layout.h"
#include "brw_winsys.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_context.h"
#include "util/u_memory.h"
#include "pipe/p_screen.h"
diff --git a/src/gallium/drivers/i965simple/brw_curbe.c b/src/gallium/drivers/i965simple/brw_curbe.c
index 824ee7fd6d..904cde8e30 100644
--- a/src/gallium/drivers/i965simple/brw_curbe.c
+++ b/src/gallium/drivers/i965simple/brw_curbe.c
@@ -38,7 +38,7 @@
#include "brw_util.h"
#include "brw_wm.h"
#include "pipe/p_state.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -257,13 +257,13 @@ static void upload_constant_buffer(struct brw_context *brw)
if (brw->vs.prog_data->num_consts) {
/* map the vertex constant buffer and copy to curbe: */
void *data = ws->buffer_map(ws, cbuffer->buffer, 0);
- /* FIXME: this is wrong. the cbuffer->size currently
+ /* FIXME: this is wrong. the cbuffer->buffer->size currently
* represents size of consts + immediates. so if we'll
* have both we'll copy over the end of the buffer
* with the subsequent memcpy */
- memcpy(&buf[offset], data, cbuffer->size);
+ memcpy(&buf[offset], data, cbuffer->buffer->size);
ws->buffer_unmap(ws, cbuffer->buffer);
- offset += cbuffer->size;
+ offset += cbuffer->buffer->size;
}
/*immediates*/
if (brw->vs.prog_data->num_imm) {
diff --git a/src/gallium/drivers/i965simple/brw_draw.c b/src/gallium/drivers/i965simple/brw_draw.c
index 7598e3dc8a..648aaa0da5 100644
--- a/src/gallium/drivers/i965simple/brw_draw.c
+++ b/src/gallium/drivers/i965simple/brw_draw.c
@@ -34,7 +34,7 @@
#include "brw_state.h"
#include "pipe/p_context.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
static unsigned hw_prim[PIPE_PRIM_POLYGON+1] = {
_3DPRIM_POINTLIST,
diff --git a/src/gallium/drivers/i965simple/brw_draw_upload.c b/src/gallium/drivers/i965simple/brw_draw_upload.c
index 7c20ea52af..2d9ca3f2ea 100644
--- a/src/gallium/drivers/i965simple/brw_draw_upload.c
+++ b/src/gallium/drivers/i965simple/brw_draw_upload.c
@@ -223,7 +223,7 @@ boolean brw_upload_vertex_buffers( struct brw_context *brw )
break;
}
- vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->pitch;
+ vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->stride;
vbp.vb[i].vb0.bits.pad = 0;
vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA;
vbp.vb[i].vb0.bits.vb_index = i;
diff --git a/src/gallium/drivers/i965simple/brw_eu_debug.c b/src/gallium/drivers/i965simple/brw_eu_debug.c
index 4a94ddefa6..4adfb0c02f 100644
--- a/src/gallium/drivers/i965simple/brw_eu_debug.c
+++ b/src/gallium/drivers/i965simple/brw_eu_debug.c
@@ -30,7 +30,7 @@
*/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "brw_eu.h"
diff --git a/src/gallium/drivers/i965simple/brw_misc_state.c b/src/gallium/drivers/i965simple/brw_misc_state.c
index be812c5da9..99ff4403a5 100644
--- a/src/gallium/drivers/i965simple/brw_misc_state.c
+++ b/src/gallium/drivers/i965simple/brw_misc_state.c
@@ -223,7 +223,7 @@ static void upload_depthbuffer(struct brw_context *brw)
OUT_BATCH(0);
} else {
unsigned int format;
-
+ struct brw_texture *tex = (struct brw_texture *)depth_surface->texture;
assert(depth_surface->block.width == 1);
assert(depth_surface->block.height == 1);
switch (depth_surface->block.size) {
@@ -246,7 +246,7 @@ static void upload_depthbuffer(struct brw_context *brw)
(BRW_TILEWALK_YMAJOR << 26) |
// (depth_surface->region->tiled << 27) |
(BRW_SURFACE_2D << 29));
- OUT_RELOC(depth_surface->buffer,
+ OUT_RELOC(tex->buffer,
PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE, 0);
OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
((depth_surface->stride/depth_surface->block.size - 1) << 6) |
diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c
index ab7cd624b2..b22e105f10 100644
--- a/src/gallium/drivers/i965simple/brw_screen.c
+++ b/src/gallium/drivers/i965simple/brw_screen.c
@@ -27,8 +27,9 @@
#include "util/u_memory.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "util/u_string.h"
+#include "util/u_simple_screen.h"
#include "brw_context.h"
#include "brw_screen.h"
@@ -239,6 +240,7 @@ brw_create_screen(struct pipe_winsys *winsys, uint pci_id)
brwscreen->screen.is_format_supported = brw_is_format_supported;
brw_init_screen_texture_funcs(&brwscreen->screen);
+ u_simple_screen_init(&brwscreen->screen);
return &brwscreen->screen;
}
diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c
index af46cb546f..b47f5373f3 100644
--- a/src/gallium/drivers/i965simple/brw_state.c
+++ b/src/gallium/drivers/i965simple/brw_state.c
@@ -30,7 +30,7 @@
*/
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "util/u_memory.h"
#include "pipe/p_inlines.h"
#include "pipe/p_shader_tokens.h"
diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c
index 007dc8f9de..e91263cb1f 100644
--- a/src/gallium/drivers/i965simple/brw_state_pool.c
+++ b/src/gallium/drivers/i965simple/brw_state_pool.c
@@ -42,7 +42,7 @@
* the pool.
*/
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "pipe/p_inlines.h"
diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c
index b89756c47b..0a95dce194 100644
--- a/src/gallium/drivers/i965simple/brw_surface.c
+++ b/src/gallium/drivers/i965simple/brw_surface.c
@@ -30,7 +30,7 @@
#include "brw_state.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "util/u_tile.h"
#include "util/u_rect.h"
@@ -74,13 +74,15 @@ brw_surface_copy(struct pipe_context *pipe,
pipe->screen->surface_unmap(pipe->screen, dst);
}
else {
+ struct brw_texture *dst_tex = (struct brw_texture *)dst->texture;
+ struct brw_texture *src_tex = (struct brw_texture *)src->texture;
assert(dst->block.width == 1);
assert(dst->block.height == 1);
brw_copy_blit(brw_context(pipe),
do_flip,
dst->block.size,
- (short) src->stride/src->block.size, src->buffer, src->offset, FALSE,
- (short) dst->stride/dst->block.size, dst->buffer, dst->offset, FALSE,
+ (short) src->stride/src->block.size, src_tex->buffer, src->offset, FALSE,
+ (short) dst->stride/dst->block.size, dst_tex->buffer, dst->offset, FALSE,
(short) srcx, (short) srcy, (short) dstx, (short) dsty,
(short) width, (short) height, PIPE_LOGICOP_COPY);
}
@@ -103,12 +105,13 @@ brw_surface_fill(struct pipe_context *pipe,
pipe->screen->surface_unmap(pipe->screen, dst);
}
else {
+ struct brw_texture *tex = (struct brw_texture *)dst->texture;
assert(dst->block.width == 1);
assert(dst->block.height == 1);
brw_fill_blit(brw_context(pipe),
dst->block.size,
(short) dst->stride/dst->block.size,
- dst->buffer, dst->offset, FALSE,
+ tex->buffer, dst->offset, FALSE,
(short) dstx, (short) dsty,
(short) width, (short) height,
value);
diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c
index cc0c665e02..448229ed4e 100644
--- a/src/gallium/drivers/i965simple/brw_tex_layout.c
+++ b/src/gallium/drivers/i965simple/brw_tex_layout.c
@@ -37,7 +37,7 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "brw_context.h"
@@ -296,9 +296,9 @@ brw_texture_create_screen(struct pipe_screen *screen,
if (brw_miptree_layout(tex))
tex->buffer = ws->buffer_create(ws, 64,
- PIPE_BUFFER_USAGE_PIXEL,
- tex->stride *
- tex->total_nblocksy);
+ PIPE_BUFFER_USAGE_PIXEL,
+ tex->stride *
+ tex->total_nblocksy);
if (!tex->buffer) {
FREE(tex);
@@ -322,7 +322,6 @@ brw_texture_release_screen(struct pipe_screen *screen,
__FUNCTION__, (void *) *pt, (*pt)->refcount - 1);
*/
if (--(*pt)->refcount <= 0) {
- struct pipe_winsys *ws = screen->winsys;
struct brw_texture *tex = (struct brw_texture *)*pt;
uint i;
@@ -330,7 +329,7 @@ brw_texture_release_screen(struct pipe_screen *screen,
DBG("%s deleting %p\n", __FUNCTION__, (void *) tex);
*/
- winsys_buffer_reference(ws, &tex->buffer, NULL);
+ pipe_buffer_reference(screen, &tex->buffer, NULL);
for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++)
if (tex->image_offset[i])
@@ -347,7 +346,6 @@ brw_get_tex_surface_screen(struct pipe_screen *screen,
struct pipe_texture *pt,
unsigned face, unsigned level, unsigned zslice)
{
- struct pipe_winsys *ws = screen->winsys;
struct brw_texture *tex = (struct brw_texture *)pt;
struct pipe_surface *ps;
unsigned offset; /* in bytes */
@@ -365,11 +363,10 @@ brw_get_tex_surface_screen(struct pipe_screen *screen,
assert(zslice == 0);
}
- ps = ws->surface_alloc(ws);
+ ps = CALLOC_STRUCT(pipe_surface);
if (ps) {
- assert(ps->format);
- assert(ps->refcount);
- winsys_buffer_reference(ws, &ps->buffer, tex->buffer);
+ ps->refcount = 1;
+ pipe_texture_reference(&ps->texture, pt);
ps->format = pt->format;
ps->width = pt->width[level];
ps->height = pt->height[level];
@@ -378,6 +375,7 @@ brw_get_tex_surface_screen(struct pipe_screen *screen,
ps->nblocksy = pt->nblocksy[level];
ps->stride = tex->stride;
ps->offset = offset;
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
}
return ps;
}
diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c
index 34dbc0624d..e03d653482 100644
--- a/src/gallium/drivers/i965simple/brw_vs_emit.c
+++ b/src/gallium/drivers/i965simple/brw_vs_emit.c
@@ -1293,7 +1293,7 @@ void brw_vs_emit(struct brw_vs_compile *c)
break;
case TGSI_TOKEN_TYPE_IMMEDIATE: {
struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate;
- /*assert(imm->Immediate.Size == 4);*/
+ assert(imm->Immediate.NrTokens == 4 + 1);
c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u.ImmediateFloat32[0].Float;
c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u.ImmediateFloat32[1].Float;
c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u.ImmediateFloat32[2].Float;
diff --git a/src/gallium/drivers/i965simple/brw_wm.c b/src/gallium/drivers/i965simple/brw_wm.c
index 8de565b96c..10161f2d2f 100644
--- a/src/gallium/drivers/i965simple/brw_wm.c
+++ b/src/gallium/drivers/i965simple/brw_wm.c
@@ -111,8 +111,8 @@ static void brw_wm_populate_key( struct brw_context *brw,
if (brw->attribs.DepthStencil->stencil[0].enabled) {
lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
- if (brw->attribs.DepthStencil->stencil[0].write_mask ||
- brw->attribs.DepthStencil->stencil[1].write_mask)
+ if (brw->attribs.DepthStencil->stencil[0].writemask ||
+ brw->attribs.DepthStencil->stencil[1].writemask)
lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
}
diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c
index 1a326f9918..1bab5bfdb3 100644
--- a/src/gallium/drivers/i965simple/brw_wm_surface_state.c
+++ b/src/gallium/drivers/i965simple/brw_wm_surface_state.c
@@ -193,6 +193,7 @@ static void upload_wm_surfaces(struct brw_context *brw )
/* BRW_NEW_FRAMEBUFFER
*/
struct pipe_surface *pipe_surface = brw->attribs.FrameBuffer.cbufs[0];/*fixme*/
+ struct brw_texture *tex = (struct brw_texture *)pipe_surface->texture;
memset(&surf, 0, sizeof(surf));
@@ -204,7 +205,7 @@ static void upload_wm_surfaces(struct brw_context *brw )
surf.ss0.surface_type = BRW_SURFACE_2D;
- surf.ss1.base_addr = brw_buffer_offset( brw, pipe_surface->buffer );
+ surf.ss1.base_addr = brw_buffer_offset( brw, tex->buffer );
surf.ss2.width = pipe_surface->width - 1;
surf.ss2.height = pipe_surface->height - 1;
diff --git a/src/gallium/drivers/nouveau/nouveau_gldefs.h b/src/gallium/drivers/nouveau/nouveau_gldefs.h
new file mode 100644
index 0000000000..ff97aaa9af
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_gldefs.h
@@ -0,0 +1,196 @@
+#ifndef __NOUVEAU_GLDEFS_H__
+#define __NOUVEAU_GLDEFS_H__
+
+static INLINE unsigned
+nvgl_blend_func(unsigned factor)
+{
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ return 0x0000;
+ case PIPE_BLENDFACTOR_ONE:
+ return 0x0001;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return 0x0300;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return 0x0301;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return 0x0302;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return 0x0303;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return 0x0304;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return 0x0305;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return 0x0306;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return 0x0307;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return 0x0308;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return 0x8001;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return 0x8002;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return 0x8003;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return 0x8004;
+ default:
+ return 0x0000;
+ }
+}
+
+static INLINE unsigned
+nvgl_blend_eqn(unsigned func)
+{
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return 0x8006;
+ case PIPE_BLEND_MIN:
+ return 0x8007;
+ case PIPE_BLEND_MAX:
+ return 0x8008;
+ case PIPE_BLEND_SUBTRACT:
+ return 0x800a;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return 0x800b;
+ default:
+ return 0x8006;
+ }
+}
+
+static INLINE unsigned
+nvgl_logicop_func(unsigned func)
+{
+ switch (func) {
+ case PIPE_LOGICOP_CLEAR:
+ return 0x1500;
+ case PIPE_LOGICOP_NOR:
+ return 0x1508;
+ case PIPE_LOGICOP_AND_INVERTED:
+ return 0x1504;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ return 0x150c;
+ case PIPE_LOGICOP_AND_REVERSE:
+ return 0x1502;
+ case PIPE_LOGICOP_INVERT:
+ return 0x150a;
+ case PIPE_LOGICOP_XOR:
+ return 0x1506;
+ case PIPE_LOGICOP_NAND:
+ return 0x150e;
+ case PIPE_LOGICOP_AND:
+ return 0x1501;
+ case PIPE_LOGICOP_EQUIV:
+ return 0x1509;
+ case PIPE_LOGICOP_NOOP:
+ return 0x1505;
+ case PIPE_LOGICOP_OR_INVERTED:
+ return 0x150d;
+ case PIPE_LOGICOP_COPY:
+ return 0x1503;
+ case PIPE_LOGICOP_OR_REVERSE:
+ return 0x150b;
+ case PIPE_LOGICOP_OR:
+ return 0x1507;
+ case PIPE_LOGICOP_SET:
+ return 0x150f;
+ default:
+ return 0x1505;
+ }
+}
+
+static INLINE unsigned
+nvgl_comparison_op(unsigned op)
+{
+ switch (op) {
+ case PIPE_FUNC_NEVER:
+ return 0x0200;
+ case PIPE_FUNC_LESS:
+ return 0x0201;
+ case PIPE_FUNC_EQUAL:
+ return 0x0202;
+ case PIPE_FUNC_LEQUAL:
+ return 0x0203;
+ case PIPE_FUNC_GREATER:
+ return 0x0204;
+ case PIPE_FUNC_NOTEQUAL:
+ return 0x0205;
+ case PIPE_FUNC_GEQUAL:
+ return 0x0206;
+ case PIPE_FUNC_ALWAYS:
+ return 0x0207;
+ default:
+ return 0x0207;
+ }
+}
+
+static INLINE unsigned
+nvgl_polygon_mode(unsigned mode)
+{
+ switch (mode) {
+ case PIPE_POLYGON_MODE_POINT:
+ return 0x1b00;
+ case PIPE_POLYGON_MODE_LINE:
+ return 0x1b01;
+ case PIPE_POLYGON_MODE_FILL:
+ return 0x1b02;
+ default:
+ return 0x1b02;
+ }
+}
+
+static INLINE unsigned
+nvgl_stencil_op(unsigned op)
+{
+ switch (op) {
+ case PIPE_STENCIL_OP_ZERO:
+ return 0x0000;
+ case PIPE_STENCIL_OP_INVERT:
+ return 0x150a;
+ case PIPE_STENCIL_OP_KEEP:
+ return 0x1e00;
+ case PIPE_STENCIL_OP_REPLACE:
+ return 0x1e01;
+ case PIPE_STENCIL_OP_INCR:
+ return 0x1e02;
+ case PIPE_STENCIL_OP_DECR:
+ return 0x1e03;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ return 0x8507;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ return 0x8508;
+ default:
+ return 0x1e00;
+ }
+}
+
+static INLINE unsigned
+nvgl_primitive(unsigned prim) {
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ return 0x0001;
+ case PIPE_PRIM_LINES:
+ return 0x0002;
+ case PIPE_PRIM_LINE_LOOP:
+ return 0x0003;
+ case PIPE_PRIM_LINE_STRIP:
+ return 0x0004;
+ case PIPE_PRIM_TRIANGLES:
+ return 0x0005;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ return 0x0006;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ return 0x0007;
+ case PIPE_PRIM_QUADS:
+ return 0x0008;
+ case PIPE_PRIM_QUAD_STRIP:
+ return 0x0009;
+ case PIPE_PRIM_POLYGON:
+ return 0x000a;
+ default:
+ return 0;
+ }
+}
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_push.h b/src/gallium/drivers/nouveau/nouveau_push.h
new file mode 100644
index 0000000000..54ef1c1291
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_push.h
@@ -0,0 +1,82 @@
+#ifndef __NOUVEAU_PUSH_H__
+#define __NOUVEAU_PUSH_H__
+
+#include "nouveau/nouveau_winsys.h"
+
+#ifndef NOUVEAU_PUSH_CONTEXT
+#error undefined push context
+#endif
+
+#define OUT_RING(data) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ (*pc->nvws->channel->pushbuf->cur++) = (data); \
+} while(0)
+
+#define OUT_RINGp(src,size) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ memcpy(pc->nvws->channel->pushbuf->cur, (src), (size) * 4); \
+ pc->nvws->channel->pushbuf->cur += (size); \
+} while(0)
+
+#define OUT_RINGf(data) do { \
+ union { float v; uint32_t u; } c; \
+ c.v = (data); \
+ OUT_RING(c.u); \
+} while(0)
+
+#define BEGIN_RING(obj,mthd,size) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \
+ pc->nvws->push_flush(pc->nvws, ((size) + 1), NULL); \
+ OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd)); \
+ pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \
+} while(0)
+
+#define BEGIN_RING_NI(obj,mthd,size) do { \
+ BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \
+} while(0)
+
+#define FIRE_RING(fence) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ pc->nvws->push_flush(pc->nvws, 0, fence); \
+} while(0)
+
+#define OUT_RELOC(bo,data,flags,vor,tor) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ pc->nvws->push_reloc(pc->nvws, pc->nvws->channel->pushbuf->cur++, \
+ (bo), (data), (flags), (vor), (tor)); \
+} while(0)
+
+/* Raw data + flags depending on FB/TT buffer */
+#define OUT_RELOCd(bo,data,flags,vor,tor) do { \
+ OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \
+} while(0)
+
+/* FB/TT object handle */
+#define OUT_RELOCo(bo,flags) do { \
+ OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \
+ pc->nvws->channel->vram->handle, \
+ pc->nvws->channel->gart->handle); \
+} while(0)
+
+/* Low 32-bits of offset */
+#define OUT_RELOCl(bo,delta,flags) do { \
+ OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \
+} while(0)
+
+/* High 32-bits of offset */
+#define OUT_RELOCh(bo,delta,flags) do { \
+ OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \
+} while(0)
+
+/* A reloc which'll recombine into a NV_DMA_METHOD packet header */
+#define OUT_RELOCm(bo, flags, obj, mthd, size) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \
+ pc->nvws->push_flush(pc->nvws->channel, ((size) + 1), NULL); \
+ OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \
+ (flags), 0, 0); \
+ pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \
+} while(0)
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h
new file mode 100644
index 0000000000..029b01e17d
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h
@@ -0,0 +1,159 @@
+#ifndef __NOUVEAU_STATEOBJ_H__
+#define __NOUVEAU_STATEOBJ_H__
+
+#include "util/u_debug.h"
+
+struct nouveau_stateobj_reloc {
+ struct pipe_buffer *bo;
+
+ unsigned offset;
+ unsigned packet;
+
+ unsigned data;
+ unsigned flags;
+ unsigned vor;
+ unsigned tor;
+};
+
+struct nouveau_stateobj {
+ int refcount;
+
+ unsigned *push;
+ struct nouveau_stateobj_reloc *reloc;
+
+ unsigned *cur;
+ unsigned cur_packet;
+ unsigned cur_reloc;
+};
+
+static INLINE struct nouveau_stateobj *
+so_new(unsigned push, unsigned reloc)
+{
+ struct nouveau_stateobj *so;
+
+ so = MALLOC(sizeof(struct nouveau_stateobj));
+ so->refcount = 0;
+ so->push = MALLOC(sizeof(unsigned) * push);
+ so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc);
+
+ so->cur = so->push;
+ so->cur_reloc = so->cur_packet = 0;
+
+ return so;
+}
+
+static INLINE void
+so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso)
+{
+ struct nouveau_stateobj *so = *pso;
+
+ if (ref) {
+ ref->refcount++;
+ }
+
+ if (so && --so->refcount <= 0) {
+ free(so->push);
+ free(so->reloc);
+ free(so);
+ }
+
+ *pso = ref;
+}
+
+static INLINE void
+so_data(struct nouveau_stateobj *so, unsigned data)
+{
+ (*so->cur++) = (data);
+ so->cur_packet += 4;
+}
+
+static INLINE void
+so_datap(struct nouveau_stateobj *so, unsigned *data, unsigned size)
+{
+ so->cur_packet += (4 * size);
+ while (size--)
+ (*so->cur++) = (*data++);
+}
+
+static INLINE void
+so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr,
+ unsigned mthd, unsigned size)
+{
+ so->cur_packet = (gr->subc << 13) | (1 << 18) | (mthd - 4);
+ so_data(so, (gr->subc << 13) | (size << 18) | mthd);
+}
+
+static INLINE void
+so_reloc(struct nouveau_stateobj *so, struct pipe_buffer *bo,
+ unsigned data, unsigned flags, unsigned vor, unsigned tor)
+{
+ struct nouveau_stateobj_reloc *r = &so->reloc[so->cur_reloc++];
+
+ r->bo = bo;
+ r->offset = so->cur - so->push;
+ r->packet = so->cur_packet;
+ r->data = data;
+ r->flags = flags;
+ r->vor = vor;
+ r->tor = tor;
+ so_data(so, data);
+}
+
+static INLINE void
+so_dump(struct nouveau_stateobj *so)
+{
+ unsigned i, nr = so->cur - so->push;
+
+ for (i = 0; i < nr; i++)
+ debug_printf("+0x%04x: 0x%08x\n", i, so->push[i]);
+}
+
+static INLINE void
+so_emit(struct nouveau_winsys *nvws, struct nouveau_stateobj *so)
+{
+ struct nouveau_pushbuf *pb = nvws->channel->pushbuf;
+ unsigned nr, i;
+
+ nr = so->cur - so->push;
+ if (pb->remaining < nr)
+ nvws->push_flush(nvws, nr, NULL);
+ pb->remaining -= nr;
+
+ memcpy(pb->cur, so->push, nr * 4);
+ for (i = 0; i < so->cur_reloc; i++) {
+ struct nouveau_stateobj_reloc *r = &so->reloc[i];
+
+ nvws->push_reloc(nvws, pb->cur + r->offset, r->bo,
+ r->data, r->flags, r->vor, r->tor);
+ }
+ pb->cur += nr;
+}
+
+static INLINE void
+so_emit_reloc_markers(struct nouveau_winsys *nvws, struct nouveau_stateobj *so)
+{
+ struct nouveau_pushbuf *pb = nvws->channel->pushbuf;
+ unsigned i;
+
+ if (!so)
+ return;
+
+ i = so->cur_reloc << 1;
+ if (nvws->channel->pushbuf->remaining < i)
+ nvws->push_flush(nvws, i, NULL);
+ nvws->channel->pushbuf->remaining -= i;
+
+ for (i = 0; i < so->cur_reloc; i++) {
+ struct nouveau_stateobj_reloc *r = &so->reloc[i];
+
+ nvws->push_reloc(nvws, pb->cur++, r->bo, r->packet,
+ (r->flags & (NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_GART |
+ NOUVEAU_BO_RDWR)) |
+ NOUVEAU_BO_DUMMY, 0, 0);
+ nvws->push_reloc(nvws, pb->cur++, r->bo, r->data,
+ r->flags | NOUVEAU_BO_DUMMY, r->vor, r->tor);
+ }
+}
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h
new file mode 100644
index 0000000000..a10114beab
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_util.h
@@ -0,0 +1,91 @@
+#ifndef __NOUVEAU_UTIL_H__
+#define __NOUVEAU_UTIL_H__
+
+/* Determine how many vertices can be pushed into the command stream.
+ * Where the remaining space isn't large enough to represent all verices,
+ * split the buffer at primitive boundaries.
+ *
+ * Returns a count of vertices that can be rendered, and an index to
+ * restart drawing at after a flush.
+ */
+static INLINE unsigned
+nouveau_vbuf_split(unsigned remaining, unsigned overhead, unsigned vpp,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned *restart)
+{
+ int max, adj = 0;
+
+ max = remaining - overhead;
+ if (max < 0)
+ return 0;
+
+ max *= vpp;
+ if (max >= count)
+ return count;
+
+ switch (mode) {
+ case PIPE_PRIM_POINTS:
+ break;
+ case PIPE_PRIM_LINES:
+ max = max & 1;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ max = max - (max % 3);
+ break;
+ case PIPE_PRIM_QUADS:
+ max = max & 3;
+ break;
+ case PIPE_PRIM_LINE_LOOP:
+ case PIPE_PRIM_LINE_STRIP:
+ if (max < 2)
+ max = 0;
+ adj = 1;
+ break;
+ case PIPE_PRIM_POLYGON:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (max < 3)
+ max = 0;
+ adj = 2;
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ if (max < 4)
+ max = 0;
+ adj = 3;
+ break;
+ default:
+ assert(0);
+ }
+
+ *restart = start + max - adj;
+ return max;
+}
+
+/* Integer base-2 logarithm, rounded towards zero. */
+static INLINE unsigned log2i(unsigned i)
+{
+ unsigned r = 0;
+
+ if (i & 0xffff0000) {
+ i >>= 16;
+ r += 16;
+ }
+ if (i & 0x0000ff00) {
+ i >>= 8;
+ r += 8;
+ }
+ if (i & 0x000000f0) {
+ i >>= 4;
+ r += 4;
+ }
+ if (i & 0x0000000c) {
+ i >>= 2;
+ r += 2;
+ }
+ if (i & 0x00000002) {
+ r += 1;
+ }
+ return r;
+}
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
new file mode 100644
index 0000000000..4fcadbae3f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -0,0 +1,101 @@
+#ifndef NOUVEAU_WINSYS_H
+#define NOUVEAU_WINSYS_H
+
+#include <stdint.h>
+#include "pipe/internal/p_winsys_screen.h"
+#include "pipe/p_defines.h"
+
+#include "nouveau/nouveau_bo.h"
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_class.h"
+#include "nouveau/nouveau_device.h"
+#include "nouveau/nouveau_grobj.h"
+#include "nouveau/nouveau_notifier.h"
+#include "nouveau/nouveau_resource.h"
+#include "nouveau/nouveau_pushbuf.h"
+
+#define NOUVEAU_CAP_HW_VTXBUF (0xbeef0000)
+#define NOUVEAU_CAP_HW_IDXBUF (0xbeef0001)
+
+#define NOUVEAU_TEXTURE_USAGE_LINEAR (1 << 16)
+
+#define NOUVEAU_BUFFER_USAGE_TEXTURE (1 << 16)
+#define NOUVEAU_BUFFER_USAGE_ZETA (1 << 17)
+#define NOUVEAU_BUFFER_USAGE_TRANSFER (1 << 18)
+
+struct nouveau_winsys {
+ struct nouveau_context *nv;
+
+ struct nouveau_channel *channel;
+
+ int (*res_init)(struct nouveau_resource **heap, unsigned start,
+ unsigned size);
+ int (*res_alloc)(struct nouveau_resource *heap, int size, void *priv,
+ struct nouveau_resource **);
+ void (*res_free)(struct nouveau_resource **);
+
+ int (*push_reloc)(struct nouveau_winsys *, void *ptr,
+ struct pipe_buffer *, uint32_t data,
+ uint32_t flags, uint32_t vor, uint32_t tor);
+ int (*push_flush)(struct nouveau_winsys *, unsigned size,
+ struct pipe_fence_handle **fence);
+
+ int (*grobj_alloc)(struct nouveau_winsys *, int grclass,
+ struct nouveau_grobj **);
+ void (*grobj_free)(struct nouveau_grobj **);
+
+ int (*notifier_alloc)(struct nouveau_winsys *, int count,
+ struct nouveau_notifier **);
+ void (*notifier_free)(struct nouveau_notifier **);
+ void (*notifier_reset)(struct nouveau_notifier *, int id);
+ uint32_t (*notifier_status)(struct nouveau_notifier *, int id);
+ uint32_t (*notifier_retval)(struct nouveau_notifier *, int id);
+ int (*notifier_wait)(struct nouveau_notifier *, int id,
+ int status, double timeout);
+
+ int (*surface_copy)(struct nouveau_winsys *, struct pipe_surface *,
+ unsigned, unsigned, struct pipe_surface *,
+ unsigned, unsigned, unsigned, unsigned);
+ int (*surface_fill)(struct nouveau_winsys *, struct pipe_surface *,
+ unsigned, unsigned, unsigned, unsigned, unsigned);
+
+ struct nouveau_bo *(*get_bo)(struct pipe_buffer *);
+};
+
+extern struct pipe_screen *
+nv04_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv04_create(struct pipe_screen *, unsigned pctx_id);
+
+extern struct pipe_screen *
+nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv10_create(struct pipe_screen *, unsigned pctx_id);
+
+extern struct pipe_screen *
+nv20_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv20_create(struct pipe_screen *, unsigned pctx_id);
+
+extern struct pipe_screen *
+nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv30_create(struct pipe_screen *, unsigned pctx_id);
+
+extern struct pipe_screen *
+nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv40_create(struct pipe_screen *, unsigned pctx_id);
+
+extern struct pipe_screen *
+nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv50_create(struct pipe_screen *, unsigned pctx_id);
+
+#endif
diff --git a/src/gallium/drivers/nv04/Makefile b/src/gallium/drivers/nv04/Makefile
new file mode 100644
index 0000000000..cf9deea851
--- /dev/null
+++ b/src/gallium/drivers/nv04/Makefile
@@ -0,0 +1,20 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv04
+
+C_SOURCES = \
+ nv04_surface_2d.c \
+ nv04_clear.c \
+ nv04_context.c \
+ nv04_fragprog.c \
+ nv04_fragtex.c \
+ nv04_miptree.c \
+ nv04_prim_vbuf.c \
+ nv04_screen.c \
+ nv04_state.c \
+ nv04_state_emit.c \
+ nv04_surface.c \
+ nv04_vbo.c
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/nv04/nv04_clear.c b/src/gallium/drivers/nv04/nv04_clear.c
new file mode 100644
index 0000000000..01cacd36fe
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_clear.c
@@ -0,0 +1,12 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv04_context.h"
+
+void
+nv04_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+}
diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c
new file mode 100644
index 0000000000..d6710cd892
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_context.c
@@ -0,0 +1,107 @@
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/internal/p_winsys_screen.h"
+
+#include "nv04_context.h"
+#include "nv04_screen.h"
+
+static void
+nv04_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ draw_flush(nv04->draw);
+
+ FIRE_RING(fence);
+}
+
+static void
+nv04_destroy(struct pipe_context *pipe)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ if (nv04->draw)
+ draw_destroy(nv04->draw);
+
+ FREE(nv04);
+}
+
+static void
+nv04_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+}
+
+static boolean
+nv04_init_hwctx(struct nv04_context *nv04)
+{
+ // requires a valid handle
+// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOTIFY, 1);
+// OUT_RING(0);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOP, 1);
+ OUT_RING(0);
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
+ OUT_RING(0x40182800);
+// OUT_RING(1<<20/*no cull*/);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1);
+// OUT_RING(0x24|(1<<6)|(1<<8));
+ OUT_RING(0x120001a4);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FORMAT, 1);
+ OUT_RING(0x332213a1);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FILTER, 1);
+ OUT_RING(0x11001010);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_COLORKEY, 1);
+ OUT_RING(0x0);
+// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 1);
+// OUT_RING(SCREEN_OFFSET);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR, 1);
+ OUT_RING(0xff000000);
+
+
+
+ FIRE_RING (NULL);
+ return TRUE;
+}
+
+struct pipe_context *
+nv04_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct nv04_screen *screen = nv04_screen(pscreen);
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv04_context *nv04;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nv04 = CALLOC(1, sizeof(struct nv04_context));
+ if (!nv04)
+ return NULL;
+ nv04->screen = screen;
+ nv04->pctx_id = pctx_id;
+
+ nv04->nvws = nvws;
+
+ nv04->pipe.winsys = ws;
+ nv04->pipe.screen = pscreen;
+ nv04->pipe.destroy = nv04_destroy;
+ nv04->pipe.set_edgeflags = nv04_set_edgeflags;
+ nv04->pipe.draw_arrays = nv04_draw_arrays;
+ nv04->pipe.draw_elements = nv04_draw_elements;
+ nv04->pipe.clear = nv04_clear;
+ nv04->pipe.flush = nv04_flush;
+
+ nv04_init_surface_functions(nv04);
+ nv04_init_state_functions(nv04);
+
+ nv04->draw = draw_create();
+ assert(nv04->draw);
+ draw_wide_point_threshold(nv04->draw, 0.0);
+ draw_wide_line_threshold(nv04->draw, 0.0);
+ draw_enable_line_stipple(nv04->draw, FALSE);
+ draw_enable_point_sprites(nv04->draw, FALSE);
+ draw_set_rasterize_stage(nv04->draw, nv04_draw_vbuf_stage(nv04));
+
+ nv04_init_hwctx(nv04);
+
+ return &nv04->pipe;
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h
new file mode 100644
index 0000000000..2842b2c90d
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_context.h
@@ -0,0 +1,151 @@
+#ifndef __NV04_CONTEXT_H__
+#define __NV04_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx) \
+ struct nv04_screen *ctx = nv04->screen
+#include "nouveau/nouveau_push.h"
+
+#include "nv04_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+#include "nv04_screen.h"
+
+#define NV04_NEW_VERTPROG (1 << 1)
+#define NV04_NEW_FRAGPROG (1 << 2)
+#define NV04_NEW_BLEND (1 << 3)
+#define NV04_NEW_RAST (1 << 4)
+#define NV04_NEW_CONTROL (1 << 5)
+#define NV04_NEW_VIEWPORT (1 << 6)
+#define NV04_NEW_SAMPLER (1 << 7)
+#define NV04_NEW_FRAMEBUFFER (1 << 8)
+#define NV04_NEW_VTXARRAYS (1 << 9)
+
+struct nv04_context {
+ struct pipe_context pipe;
+
+ struct nouveau_winsys *nvws;
+ struct nv04_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ int chipset;
+ struct nouveau_notifier *sync;
+
+ uint32_t dirty;
+
+ struct nv04_blend_state *blend;
+ struct nv04_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+ struct nv04_fragtex_state fragtex;
+ struct nv04_rasterizer_state *rast;
+ struct nv04_depth_stencil_alpha_state *dsa;
+
+ struct nv04_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
+ unsigned dirty_samplers;
+ unsigned fp_samplers;
+ unsigned vp_samplers;
+
+ uint32_t rt_enable;
+ struct pipe_framebuffer_state *framebuffer;
+ struct pipe_surface *rt;
+ struct pipe_surface *zeta;
+
+ struct {
+ struct pipe_buffer *buffer;
+ uint32_t format;
+ } tex[16];
+
+ unsigned vb_enable;
+ struct {
+ struct pipe_buffer *buffer;
+ unsigned delta;
+ } vb[16];
+
+ float *constbuf[PIPE_SHADER_TYPES][32][4];
+ unsigned constbuf_nr[PIPE_SHADER_TYPES];
+
+ struct vertex_info vertex_info;
+ struct {
+
+ struct nouveau_resource *exec_heap;
+ struct nouveau_resource *data_heap;
+
+ struct nv04_vertex_program *active;
+
+ struct nv04_vertex_program *current;
+ struct pipe_buffer *constant_buf;
+ } vertprog;
+
+ struct {
+ struct nv04_fragment_program *active;
+
+ struct nv04_fragment_program *current;
+ struct pipe_buffer *constant_buf;
+ } fragprog;
+
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
+
+ struct pipe_viewport_state viewport;
+};
+
+static INLINE struct nv04_context *
+nv04_context(struct pipe_context *pipe)
+{
+ return (struct nv04_context *)pipe;
+}
+
+extern void nv04_init_state_functions(struct nv04_context *nv04);
+extern void nv04_init_surface_functions(struct nv04_context *nv04);
+extern void nv04_screen_init_miptree_functions(struct pipe_screen *screen);
+
+/* nv04_clear.c */
+extern void nv04_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+/* nv04_draw.c */
+extern struct draw_stage *nv04_draw_render_stage(struct nv04_context *nv04);
+
+/* nv04_fragprog.c */
+extern void nv04_fragprog_bind(struct nv04_context *,
+ struct nv04_fragment_program *);
+extern void nv04_fragprog_destroy(struct nv04_context *,
+ struct nv04_fragment_program *);
+
+/* nv04_fragtex.c */
+extern void nv04_fragtex_bind(struct nv04_context *);
+
+/* nv04_prim_vbuf.c */
+struct draw_stage *nv04_draw_vbuf_stage( struct nv04_context *nv04 );
+
+/* nv04_state.c and friends */
+extern void nv04_emit_hw_state(struct nv04_context *nv04);
+extern void nv04_state_tex_update(struct nv04_context *nv04);
+
+/* nv04_vbo.c */
+extern boolean nv04_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv04_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count);
+
+
+#endif
diff --git a/src/gallium/drivers/nv04/nv04_fragprog.c b/src/gallium/drivers/nv04/nv04_fragprog.c
new file mode 100644
index 0000000000..8a2af41fe0
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_fragprog.c
@@ -0,0 +1,21 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv04_context.h"
+
+void
+nv04_fragprog_bind(struct nv04_context *nv04, struct nv04_fragment_program *fp)
+{
+}
+
+void
+nv04_fragprog_destroy(struct nv04_context *nv04,
+ struct nv04_fragment_program *fp)
+{
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c
new file mode 100644
index 0000000000..21f990fd53
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_fragtex.c
@@ -0,0 +1,73 @@
+#include "nv04_context.h"
+#include "nouveau/nouveau_util.h"
+
+#define _(m,tf) \
+{ \
+ PIPE_FORMAT_##m, \
+ NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \
+}
+
+struct nv04_texture_format {
+ uint pipe;
+ int format;
+};
+
+static struct nv04_texture_format
+nv04_texture_formats[] = {
+ _(A8R8G8B8_UNORM, A8R8G8B8),
+ _(X8R8G8B8_UNORM, X8R8G8B8),
+ _(A1R5G5B5_UNORM, A1R5G5B5),
+ _(A4R4G4B4_UNORM, A4R4G4B4),
+ _(L8_UNORM, Y8 ),
+ _(A8_UNORM, Y8 ),
+};
+
+static uint32_t
+nv04_fragtex_format(uint pipe_format)
+{
+ struct nv04_texture_format *tf = nv04_texture_formats;
+ int i;
+
+ for (i=0; i< sizeof(nv04_texture_formats)/sizeof(nv04_texture_formats[0]); i++) {
+ if (tf->pipe == pipe_format)
+ return tf->format;
+ tf++;
+ }
+
+ NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format));
+ return 0;
+}
+
+
+static void
+nv04_fragtex_build(struct nv04_context *nv04, int unit)
+{
+ struct nv04_miptree *nv04mt = nv04->tex_miptree[unit];
+ struct pipe_texture *pt = &nv04mt->base;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_2D:
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return;
+ }
+
+ nv04->fragtex.format = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER
+ | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER
+ | nv04_fragtex_format(pt->format)
+ | ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT )
+ | ( log2i(pt->width[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT )
+ | ( log2i(pt->height[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT )
+ | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE
+ | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE
+ ;
+}
+
+
+void
+nv04_fragtex_bind(struct nv04_context *nv04)
+{
+ nv04_fragtex_build(nv04, 0);
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c
new file mode 100644
index 0000000000..993c5ef5dd
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_miptree.c
@@ -0,0 +1,177 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv04_context.h"
+#include "nv04_screen.h"
+
+static void
+nv04_miptree_layout(struct nv04_miptree *nv04mt)
+{
+ struct pipe_texture *pt = &nv04mt->base;
+ uint width = pt->width[0], height = pt->height[0];
+ uint offset = 0;
+ int nr_faces, l;
+
+ nr_faces = 1;
+
+ for (l = 0; l <= pt->last_level; l++) {
+ pt->width[l] = width;
+ pt->height[l] = height;
+
+ pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+
+ nv04mt->level[l].pitch = pt->width[0];
+ nv04mt->level[l].pitch = (nv04mt->level[l].pitch + 63) & ~63;
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+ }
+
+ for (l = 0; l <= pt->last_level; l++) {
+
+ nv04mt->level[l].image_offset = offset;
+ offset += nv04mt->level[l].pitch * pt->height[l];
+ }
+
+ nv04mt->total_size = offset;
+}
+
+static struct pipe_texture *
+nv04_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
+{
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv04_miptree *mt;
+
+ mt = MALLOC(sizeof(struct nv04_miptree));
+ if (!mt)
+ return NULL;
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = pscreen;
+ mt->shadow_tex = NULL;
+ mt->shadow_surface = NULL;
+
+ //mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
+ nv04_miptree_layout(mt);
+
+ mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL |
+ NOUVEAU_BUFFER_USAGE_TEXTURE,
+ mt->total_size);
+ if (!mt->buffer) {
+ printf("failed %d byte alloc\n",mt->total_size);
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static struct pipe_texture *
+nv04_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
+ const unsigned *stride, struct pipe_buffer *pb)
+{
+ struct nv04_miptree *mt;
+
+ /* Only supports 2D, non-mipmapped textures for the moment */
+ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
+ pt->depth[0] != 1)
+ return NULL;
+
+ mt = CALLOC_STRUCT(nv04_miptree);
+ if (!mt)
+ return NULL;
+
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = pscreen;
+ mt->level[0].pitch = stride[0];
+ mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
+
+ pipe_buffer_reference(pscreen, &mt->buffer, pb);
+ return &mt->base;
+}
+
+static void
+nv04_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt)
+{
+ struct pipe_texture *pt = *ppt;
+ struct nv04_miptree *mt = (struct nv04_miptree *)pt;
+ int l;
+
+ *ppt = NULL;
+ if (--pt->refcount)
+ return;
+
+ pipe_buffer_reference(pscreen, &mt->buffer, NULL);
+ for (l = 0; l <= pt->last_level; l++) {
+ if (mt->level[l].image_offset)
+ FREE(mt->level[l].image_offset);
+ }
+
+ if (mt->shadow_tex) {
+ assert(mt->shadow_surface);
+ pscreen->tex_surface_release(pscreen, &mt->shadow_surface);
+ nv04_miptree_release(pscreen, &mt->shadow_tex);
+ }
+
+ FREE(mt);
+}
+
+static struct pipe_surface *
+nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct nv04_miptree *nv04mt = (struct nv04_miptree *)pt;
+ struct pipe_surface *ps;
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (!ps)
+ return NULL;
+ pipe_texture_reference(&ps->texture, pt);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = nv04mt->level[level].pitch;
+ ps->usage = flags;
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+ ps->refcount = 1;
+ ps->face = face;
+ ps->level = level;
+ ps->zslice = zslice;
+
+ ps->offset = nv04mt->level[level].image_offset;
+
+ return ps;
+}
+
+static void
+nv04_miptree_surface_del(struct pipe_screen *pscreen,
+ struct pipe_surface **psurface)
+{
+ struct pipe_surface *ps = *psurface;
+
+ *psurface = NULL;
+ if (--ps->refcount > 0)
+ return;
+
+ pipe_texture_reference(&ps->texture, NULL);
+ FREE(ps);
+}
+
+void
+nv04_screen_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv04_miptree_create;
+ pscreen->texture_blanket = nv04_miptree_blanket;
+ pscreen->texture_release = nv04_miptree_release;
+ pscreen->get_tex_surface = nv04_miptree_surface_new;
+ pscreen->tex_surface_release = nv04_miptree_surface_del;
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c
new file mode 100644
index 0000000000..f6458232ae
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c
@@ -0,0 +1,321 @@
+
+#include "util/u_debug.h"
+#include "pipe/p_inlines.h"
+#include "pipe/internal/p_winsys_screen.h"
+#include "pipe/p_compiler.h"
+
+#include "draw/draw_vbuf.h"
+
+#include "nv04_context.h"
+#include "nv04_state.h"
+
+#define VERTEX_SIZE 40
+#define VERTEX_BUFFER_SIZE (4096*VERTEX_SIZE) // 4096 vertices of 40 bytes each
+
+/**
+ * Primitive renderer for nv04.
+ */
+struct nv04_vbuf_render {
+ struct vbuf_render base;
+
+ struct nv04_context *nv04;
+
+ /** Vertex buffer */
+ unsigned char* buffer;
+
+ /** Vertex size in bytes */
+ unsigned vertex_size;
+
+ /** Current primitive */
+ unsigned prim;
+};
+
+
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct nv04_vbuf_render *
+nv04_vbuf_render( struct vbuf_render *render )
+{
+ assert(render);
+ return (struct nv04_vbuf_render *)render;
+}
+
+
+static const struct vertex_info *
+nv04_vbuf_render_get_vertex_info( struct vbuf_render *render )
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+ struct nv04_context *nv04 = nv04_render->nv04;
+ return &nv04->vertex_info;
+}
+
+
+static boolean
+nv04_vbuf_render_allocate_vertices( struct vbuf_render *render,
+ ushort vertex_size,
+ ushort nr_vertices )
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+
+ nv04_render->buffer = (unsigned char*) MALLOC(VERTEX_BUFFER_SIZE);
+ assert(!nv04_render->buffer);
+
+ return nv04_render->buffer ? TRUE : FALSE;
+}
+
+static void *
+nv04_vbuf_render_map_vertices( struct vbuf_render *render )
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+ return nv04_render->buffer;
+}
+
+static void
+nv04_vbuf_render_unmap_vertices( struct vbuf_render *render,
+ ushort min_index,
+ ushort max_index )
+{
+}
+
+static boolean
+nv04_vbuf_render_set_primitive( struct vbuf_render *render,
+ unsigned prim )
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+
+ if (prim <= PIPE_PRIM_LINE_STRIP)
+ return FALSE;
+
+ nv04_render->prim = prim;
+ return TRUE;
+}
+
+static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5)
+{
+ BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49);
+ OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v3,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v4,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v5,8);
+ OUT_RING(0xFEDCBA);
+}
+
+static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2)
+{
+ BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25);
+ OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
+ OUT_RING(0xFED);
+}
+
+static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3)
+{
+ BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33);
+ OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v3,8);
+ OUT_RING(0xFECEDC);
+}
+
+static void nv04_vbuf_render_triangles_elts(struct nv04_vbuf_render * render, const ushort * indices, uint nr_indices)
+{
+ unsigned char* buffer = render->buffer;
+ struct nv04_context* nv04 = render->nv04;
+ int i;
+
+ for( i=0; i< nr_indices-5; i+=6)
+ nv04_2triangles(nv04,
+ buffer,
+ indices[i+0],
+ indices[i+1],
+ indices[i+2],
+ indices[i+3],
+ indices[i+4],
+ indices[i+5]
+ );
+ if (i != nr_indices)
+ {
+ nv04_1triangle(nv04,
+ buffer,
+ indices[i+0],
+ indices[i+1],
+ indices[i+2]
+ );
+ i+=3;
+ }
+ if (i != nr_indices)
+ NOUVEAU_ERR("Houston, we have lost some vertices\n");
+}
+
+static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices)
+{
+ const uint32_t striptbl[]={0x321210,0x543432,0x765654,0x987876,0xBA9A98,0xDCBCBA,0xFEDEDC};
+ unsigned char* buffer = render->buffer;
+ struct nv04_context* nv04 = render->nv04;
+ int i,j;
+
+ for(i = 0; i<nr_indices; i+=14)
+ {
+ int numvert = MIN2(16, nr_indices - i);
+ int numtri = numvert - 2;
+ if (numvert<3)
+ break;
+
+ BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 );
+ for(j = 0; j<numvert; j++)
+ OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 );
+
+ BEGIN_RING_NI( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2 );
+ for(j = 0; j<numtri/2; j++ )
+ OUT_RING(striptbl[j]);
+ if (numtri%2)
+ OUT_RING(striptbl[numtri/2]&0xFFF);
+ }
+}
+
+static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices)
+{
+ const uint32_t fantbl[]={0x320210,0x540430,0x760650,0x980870,0xBA0A90,0xDC0CB0,0xFE0ED0};
+ unsigned char* buffer = render->buffer;
+ struct nv04_context* nv04 = render->nv04;
+ int i,j;
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8);
+ OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8);
+
+ for(i = 1; i<nr_indices; i+=14)
+ {
+ int numvert=MIN2(15, nr_indices - i);
+ int numtri=numvert-2;
+ if (numvert < 3)
+ break;
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8);
+
+ for(j=0;j<numvert;j++)
+ OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 );
+
+ BEGIN_RING_NI(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2);
+ for(j = 0; j<numtri/2; j++)
+ OUT_RING(fantbl[j]);
+ if (numtri%2)
+ OUT_RING(fantbl[numtri/2]&0xFFF);
+ }
+}
+
+static void nv04_vbuf_render_quads_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices)
+{
+ unsigned char* buffer = render->buffer;
+ struct nv04_context* nv04 = render->nv04;
+ int i;
+
+ for(i = 0; i < nr_indices; i += 4)
+ nv04_1quad(nv04,
+ buffer,
+ indices[i+0],
+ indices[i+1],
+ indices[i+2],
+ indices[i+3]
+ );
+}
+
+
+static void
+nv04_vbuf_render_draw( struct vbuf_render *render,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+
+ // emit the indices
+ switch( nv04_render->prim )
+ {
+ case PIPE_PRIM_TRIANGLES:
+ nv04_vbuf_render_triangles_elts(nv04_render, indices, nr_indices);
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ nv04_vbuf_render_tri_strip_elts(nv04_render, indices, nr_indices);
+ break;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ nv04_vbuf_render_tri_fan_elts(nv04_render, indices, nr_indices);
+ break;
+ case PIPE_PRIM_QUADS:
+ nv04_vbuf_render_quads_elts(nv04_render, indices, nr_indices);
+ break;
+ default:
+ NOUVEAU_ERR("You have to implement primitive %d, young padawan\n", nv04_render->prim);
+ break;
+ }
+}
+
+
+static void
+nv04_vbuf_render_release_vertices( struct vbuf_render *render )
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+
+ free(nv04_render->buffer);
+ nv04_render->buffer = NULL;
+}
+
+
+static void
+nv04_vbuf_render_destroy( struct vbuf_render *render )
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+ FREE(nv04_render);
+}
+
+
+/**
+ * Create a new primitive render.
+ */
+static struct vbuf_render *
+nv04_vbuf_render_create( struct nv04_context *nv04 )
+{
+ struct nv04_vbuf_render *nv04_render = CALLOC_STRUCT(nv04_vbuf_render);
+
+ nv04_render->nv04 = nv04;
+
+ nv04_render->base.max_vertex_buffer_bytes = VERTEX_BUFFER_SIZE;
+ nv04_render->base.max_indices = 65536;
+ nv04_render->base.get_vertex_info = nv04_vbuf_render_get_vertex_info;
+ nv04_render->base.allocate_vertices = nv04_vbuf_render_allocate_vertices;
+ nv04_render->base.map_vertices = nv04_vbuf_render_map_vertices;
+ nv04_render->base.unmap_vertices = nv04_vbuf_render_unmap_vertices;
+ nv04_render->base.set_primitive = nv04_vbuf_render_set_primitive;
+ nv04_render->base.draw = nv04_vbuf_render_draw;
+ nv04_render->base.release_vertices = nv04_vbuf_render_release_vertices;
+ nv04_render->base.destroy = nv04_vbuf_render_destroy;
+
+ return &nv04_render->base;
+}
+
+
+/**
+ * Create a new primitive vbuf/render stage.
+ */
+struct draw_stage *nv04_draw_vbuf_stage( struct nv04_context *nv04 )
+{
+ struct vbuf_render *render;
+ struct draw_stage *stage;
+
+ render = nv04_vbuf_render_create(nv04);
+ if(!render)
+ return NULL;
+
+ stage = draw_vbuf_stage( nv04->draw, render );
+ if(!stage) {
+ render->destroy(render);
+ return NULL;
+ }
+
+ return stage;
+}
diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c
new file mode 100644
index 0000000000..9ef38bc244
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_screen.c
@@ -0,0 +1,237 @@
+#include "pipe/p_screen.h"
+#include "pipe/p_inlines.h"
+#include "util/u_simple_screen.h"
+
+#include "nv04_context.h"
+#include "nv04_screen.h"
+
+static const char *
+nv04_screen_get_name(struct pipe_screen *screen)
+{
+ struct nv04_screen *nv04screen = nv04_screen(screen);
+ struct nouveau_device *dev = nv04screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv04_screen_get_vendor(struct pipe_screen *screen)
+{
+ return "nouveau";
+}
+
+static int
+nv04_screen_get_param(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 1;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 0;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 0;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 0;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 0;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 0;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 0;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ return 0;
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ return 1;
+ case NOUVEAU_CAP_HW_VTXBUF:
+ case NOUVEAU_CAP_HW_IDXBUF:
+ return 0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv04_screen_get_paramf(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 0.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 0.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 0.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 0.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static boolean
+nv04_screen_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static void *
+nv04_surface_map(struct pipe_screen *screen, struct pipe_surface *surface,
+ unsigned flags )
+{
+ void *map;
+ struct nv04_miptree *nv04mt = (struct nv04_miptree *)surface->texture;
+
+ map = pipe_buffer_map(screen, nv04mt->buffer, flags);
+ if (!map)
+ return NULL;
+
+ return map + surface->offset;
+}
+
+static void
+nv04_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface)
+{
+ struct nv04_miptree *nv04mt = (struct nv04_miptree *)surface->texture;
+
+ pipe_buffer_unmap(screen, nv04mt->buffer);
+}
+
+static void
+nv04_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nv04_screen *screen = nv04_screen(pscreen);
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nvws->notifier_free(&screen->sync);
+ nvws->grobj_free(&screen->fahrenheit);
+ nv04_surface_2d_takedown(&screen->eng2d);
+
+ FREE(pscreen);
+}
+
+static struct pipe_buffer *
+nv04_surface_buffer(struct pipe_surface *surf)
+{
+ struct nv04_miptree *mt = (struct nv04_miptree *)surf->texture;
+
+ return mt->buffer;
+}
+
+struct pipe_screen *
+nv04_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv04_screen *screen = CALLOC_STRUCT(nv04_screen);
+ unsigned fahrenheit_class = 0, sub3d_class = 0;
+ unsigned chipset = nvws->channel->device->chipset;
+ int ret;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ if (chipset>=0x20) {
+ fahrenheit_class = 0;
+ sub3d_class = 0;
+ } else if (chipset>=0x10) {
+ fahrenheit_class = NV10_DX5_TEXTURED_TRIANGLE;
+ sub3d_class = NV10_CONTEXT_SURFACES_3D;
+ } else {
+ fahrenheit_class=NV04_DX5_TEXTURED_TRIANGLE;
+ sub3d_class = NV04_CONTEXT_SURFACES_3D;
+ }
+
+ if (!fahrenheit_class) {
+ NOUVEAU_ERR("Unknown nv04 chipset: nv%02x\n", chipset);
+ return NULL;
+ }
+
+ /* 2D engine setup */
+ screen->eng2d = nv04_surface_2d_init(nvws);
+ screen->eng2d->buf = nv04_surface_buffer;
+
+ /* 3D object */
+ ret = nvws->grobj_alloc(nvws, fahrenheit_class, &screen->fahrenheit);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ return NULL;
+ }
+
+ /* 3D surface object */
+ ret = nvws->grobj_alloc(nvws, sub3d_class, &screen->context_surfaces_3d);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D surface object: %d\n", ret);
+ return NULL;
+ }
+
+ /* Notifier for sync purposes */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv04_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ screen->pipe.winsys = ws;
+ screen->pipe.destroy = nv04_screen_destroy;
+
+ screen->pipe.get_name = nv04_screen_get_name;
+ screen->pipe.get_vendor = nv04_screen_get_vendor;
+ screen->pipe.get_param = nv04_screen_get_param;
+ screen->pipe.get_paramf = nv04_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv04_screen_is_format_supported;
+
+ screen->pipe.surface_map = nv04_surface_map;
+ screen->pipe.surface_unmap = nv04_surface_unmap;
+
+ nv04_screen_init_miptree_functions(&screen->pipe);
+ u_simple_screen_init(&screen->pipe);
+
+ return &screen->pipe;
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_screen.h b/src/gallium/drivers/nv04/nv04_screen.h
new file mode 100644
index 0000000000..540aec907b
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_screen.h
@@ -0,0 +1,27 @@
+#ifndef __NV04_SCREEN_H__
+#define __NV04_SCREEN_H__
+
+#include "pipe/p_screen.h"
+#include "nv04_surface_2d.h"
+
+struct nv04_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+ unsigned chipset;
+
+ /* HW graphics objects */
+ struct nv04_surface_2d *eng2d;
+ struct nouveau_grobj *fahrenheit;
+ struct nouveau_grobj *context_surfaces_3d;
+ struct nouveau_notifier *sync;
+
+};
+
+static INLINE struct nv04_screen *
+nv04_screen(struct pipe_screen *screen)
+{
+ return (struct nv04_screen *)screen;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c
new file mode 100644
index 0000000000..87c635f962
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_state.c
@@ -0,0 +1,458 @@
+#include "draw/draw_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv04_context.h"
+#include "nv04_state.h"
+
+static void *
+nv04_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv04_blend_state *cb;
+
+ cb = MALLOC(sizeof(struct nv04_blend_state));
+
+ cb->b_enable = cso->blend_enable ? 1 : 0;
+ cb->b_src = ((nvgl_blend_func(cso->alpha_src_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_src_factor)));
+ cb->b_dst = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_dst_factor)));
+
+
+ return (void *)cb;
+}
+
+static void
+nv04_blend_state_bind(struct pipe_context *pipe, void *blend)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ nv04->blend = (struct nv04_blend_state*)blend;
+
+ nv04->dirty |= NV04_NEW_BLEND;
+}
+
+static void
+nv04_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+
+static INLINE unsigned
+wrap_mode(unsigned wrap) {
+ unsigned ret;
+
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_CLAMP:
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
+ }
+ return ret >> NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT;
+}
+
+static void *
+nv04_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+
+ struct nv04_sampler_state *ss;
+ uint32_t filter = 0;
+
+ ss = MALLOC(sizeof(struct nv04_sampler_state));
+
+ ss->format = ((wrap_mode(cso->wrap_s) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT));
+
+ if (cso->max_anisotropy > 1.0) {
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE;
+ }
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR;
+ break;
+ }
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST;
+ break;
+ }
+ break;
+ }
+
+ ss->filter = filter;
+
+ return (void *)ss;
+}
+
+static void
+nv04_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv04->sampler[unit] = sampler[unit];
+ nv04->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void
+nv04_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+static void
+nv04_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **miptree)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv04->tex_miptree[unit] = (struct nv04_miptree *)miptree[unit];
+ nv04->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void *
+nv04_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv04_rasterizer_state *rs;
+
+ /*XXX: ignored:
+ * scissor
+ * points/lines (no hw support, emulated with tris in gallium)
+ */
+ rs = MALLOC(sizeof(struct nv04_rasterizer_state));
+
+ rs->blend = cso->flatshade ? NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD;
+
+ return (void *)rs;
+}
+
+static void
+nv04_rasterizer_state_bind(struct pipe_context *pipe, void *rast)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ nv04->rast = (struct nv04_rasterizer_state*)rast;
+
+ draw_set_rasterizer_state(nv04->draw, (nv04->rast ? nv04->rast->templ : NULL));
+
+ nv04->dirty |= NV04_NEW_RAST | NV04_NEW_BLEND;
+}
+
+static void
+nv04_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+static INLINE uint32_t nv04_compare_func(uint32_t f)
+{
+ switch ( f ) {
+ case PIPE_FUNC_NEVER: return 1;
+ case PIPE_FUNC_LESS: return 2;
+ case PIPE_FUNC_EQUAL: return 3;
+ case PIPE_FUNC_LEQUAL: return 4;
+ case PIPE_FUNC_GREATER: return 5;
+ case PIPE_FUNC_NOTEQUAL: return 6;
+ case PIPE_FUNC_GEQUAL: return 7;
+ case PIPE_FUNC_ALWAYS: return 8;
+ }
+ NOUVEAU_MSG("Unable to find the function\n");
+ return 0;
+}
+
+static void *
+nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv04_depth_stencil_alpha_state *hw;
+
+ hw = MALLOC(sizeof(struct nv04_depth_stencil_alpha_state));
+
+ hw->control = float_to_ubyte(cso->alpha.ref_value);
+ hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT );
+ hw->control |= cso->alpha.enabled ? NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE : 0;
+ hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN;
+ hw->control |= cso->depth.enabled ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT) : 0;
+ hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT );
+ hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module
+ hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE;
+ hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE;
+ hw->control |= cso->depth.writemask ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT) : 0;
+ hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format
+
+ return (void *)hw;
+}
+
+static void
+nv04_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ nv04->dsa = hwcso;
+ nv04->dirty |= NV04_NEW_CONTROL;
+}
+
+static void
+nv04_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+static void *
+nv04_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ return draw_create_vertex_shader(nv04->draw, templ);
+}
+
+static void
+nv04_vp_state_bind(struct pipe_context *pipe, void *shader)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ draw_bind_vertex_shader(nv04->draw, (struct draw_vertex_shader *) shader);
+
+ nv04->dirty |= NV04_NEW_VERTPROG;
+}
+
+static void
+nv04_vp_state_delete(struct pipe_context *pipe, void *shader)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ draw_delete_vertex_shader(nv04->draw, (struct draw_vertex_shader *) shader);
+}
+
+static void *
+nv04_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv04_fragment_program *fp;
+
+ fp = CALLOC(1, sizeof(struct nv04_fragment_program));
+ fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ return (void *)fp;
+}
+
+static void
+nv04_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct nv04_fragment_program *fp = hwcso;
+
+ nv04->fragprog.current = fp;
+ nv04->dirty |= NV04_NEW_FRAGPROG;
+}
+
+static void
+nv04_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct nv04_fragment_program *fp = hwcso;
+
+ nv04_fragprog_destroy(nv04, fp);
+ free((void*)fp->pipe.tokens);
+ free(fp);
+}
+
+static void
+nv04_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+}
+
+static void
+nv04_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+}
+
+static void
+nv04_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(index == 0);
+
+ if (buf) {
+ void *mapped;
+ if (buf->buffer && buf->buffer->size &&
+ (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)))
+ {
+ memcpy(nv04->constbuf[shader], mapped, buf->buffer->size);
+ nv04->constbuf_nr[shader] =
+ buf->buffer->size / (4 * sizeof(float));
+ ws->buffer_unmap(ws, buf->buffer);
+ }
+ }
+}
+
+static void
+nv04_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ nv04->framebuffer = (struct pipe_framebuffer_state*)fb;
+
+ nv04->dirty |= NV04_NEW_FRAMEBUFFER;
+}
+static void
+nv04_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ NOUVEAU_ERR("line stipple hahaha\n");
+}
+
+static void
+nv04_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+/* struct nv04_context *nv04 = nv04_context(pipe);
+
+ // XXX
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2);
+ OUT_RING (((s->maxx - s->minx) << 16) | s->minx);
+ OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/
+}
+
+static void
+nv04_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *viewport)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ nv04->viewport = *viewport;
+
+ draw_set_viewport_state(nv04->draw, &nv04->viewport);
+}
+
+static void
+nv04_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ memcpy(nv04->vtxbuf, buffers, count * sizeof(buffers[0]));
+ nv04->dirty |= NV04_NEW_VTXARRAYS;
+
+ draw_set_vertex_buffers(nv04->draw, count, buffers);
+}
+
+static void
+nv04_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *elements)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ memcpy(nv04->vtxelt, elements, sizeof(*elements) * count);
+ nv04->dirty |= NV04_NEW_VTXARRAYS;
+
+ draw_set_vertex_elements(nv04->draw, count, elements);
+}
+
+void
+nv04_init_state_functions(struct nv04_context *nv04)
+{
+ nv04->pipe.create_blend_state = nv04_blend_state_create;
+ nv04->pipe.bind_blend_state = nv04_blend_state_bind;
+ nv04->pipe.delete_blend_state = nv04_blend_state_delete;
+
+ nv04->pipe.create_sampler_state = nv04_sampler_state_create;
+ nv04->pipe.bind_sampler_states = nv04_sampler_state_bind;
+ nv04->pipe.delete_sampler_state = nv04_sampler_state_delete;
+ nv04->pipe.set_sampler_textures = nv04_set_sampler_texture;
+
+ nv04->pipe.create_rasterizer_state = nv04_rasterizer_state_create;
+ nv04->pipe.bind_rasterizer_state = nv04_rasterizer_state_bind;
+ nv04->pipe.delete_rasterizer_state = nv04_rasterizer_state_delete;
+
+ nv04->pipe.create_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_create;
+ nv04->pipe.bind_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_bind;
+ nv04->pipe.delete_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_delete;
+
+ nv04->pipe.create_vs_state = nv04_vp_state_create;
+ nv04->pipe.bind_vs_state = nv04_vp_state_bind;
+ nv04->pipe.delete_vs_state = nv04_vp_state_delete;
+
+ nv04->pipe.create_fs_state = nv04_fp_state_create;
+ nv04->pipe.bind_fs_state = nv04_fp_state_bind;
+ nv04->pipe.delete_fs_state = nv04_fp_state_delete;
+
+ nv04->pipe.set_blend_color = nv04_set_blend_color;
+ nv04->pipe.set_clip_state = nv04_set_clip_state;
+ nv04->pipe.set_constant_buffer = nv04_set_constant_buffer;
+ nv04->pipe.set_framebuffer_state = nv04_set_framebuffer_state;
+ nv04->pipe.set_polygon_stipple = nv04_set_polygon_stipple;
+ nv04->pipe.set_scissor_state = nv04_set_scissor_state;
+ nv04->pipe.set_viewport_state = nv04_set_viewport_state;
+
+ nv04->pipe.set_vertex_buffers = nv04_set_vertex_buffers;
+ nv04->pipe.set_vertex_elements = nv04_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_state.h b/src/gallium/drivers/nv04/nv04_state.h
new file mode 100644
index 0000000000..15d4685ec1
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_state.h
@@ -0,0 +1,74 @@
+#ifndef __NV04_STATE_H__
+#define __NV04_STATE_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv04_blend_state {
+ uint32_t b_enable;
+ uint32_t b_src;
+ uint32_t b_dst;
+};
+
+struct nv04_fragtex_state {
+ uint32_t format;
+};
+
+struct nv04_sampler_state {
+ uint32_t filter;
+ uint32_t format;
+};
+
+struct nv04_depth_stencil_alpha_state {
+ uint32_t control;
+};
+
+struct nv04_rasterizer_state {
+ uint32_t blend;
+
+ const struct pipe_rasterizer_state *templ;
+};
+
+struct nv04_miptree {
+ struct pipe_texture base;
+
+ struct pipe_buffer *buffer;
+ uint total_size;
+
+ struct pipe_texture *shadow_tex;
+ struct pipe_surface *shadow_surface;
+
+ struct {
+ uint pitch;
+ uint image_offset;
+ } level[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+struct nv04_fragment_program_data {
+ unsigned offset;
+ unsigned index;
+};
+
+struct nv04_fragment_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+
+ boolean translated;
+ boolean on_hw;
+ unsigned samplers;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct nv04_fragment_program_data *consts;
+ unsigned nr_consts;
+
+ struct pipe_buffer *buffer;
+
+ uint32_t fp_control;
+ uint32_t fp_reg_control;
+};
+
+
+
+#endif
diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c
new file mode 100644
index 0000000000..bd8ef1adbf
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_state_emit.c
@@ -0,0 +1,223 @@
+#include "nv04_context.h"
+#include "nv04_state.h"
+
+static void nv04_vertex_layout(struct pipe_context* pipe)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct nv04_fragment_program *fp = nv04->fragprog.current;
+ uint32_t src = 0;
+ int i;
+ struct vertex_info vinfo;
+
+ memset(&vinfo, 0, sizeof(vinfo));
+
+ for (i = 0; i < fp->info.num_inputs; i++) {
+ int isn = fp->info.input_semantic_name[i];
+ int isi = fp->info.input_semantic_index[i];
+ switch (isn) {
+ case TGSI_SEMANTIC_POSITION:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++);
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++);
+ break;
+ default:
+ case TGSI_SEMANTIC_GENERIC:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++);
+ break;
+ case TGSI_SEMANTIC_FOG:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++);
+ break;
+ }
+ }
+
+ printf("%d vertex input\n",fp->info.num_inputs);
+ draw_compute_vertex_size(&vinfo);
+}
+
+static uint32_t nv04_blend_func(uint32_t f)
+{
+ switch ( f ) {
+ case PIPE_BLENDFACTOR_ZERO: return 0x1;
+ case PIPE_BLENDFACTOR_ONE: return 0x2;
+ case PIPE_BLENDFACTOR_SRC_COLOR: return 0x3;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR: return 0x4;
+ case PIPE_BLENDFACTOR_SRC_ALPHA: return 0x5;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return 0x6;
+ case PIPE_BLENDFACTOR_DST_ALPHA: return 0x7;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA: return 0x8;
+ case PIPE_BLENDFACTOR_DST_COLOR: return 0x9;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR: return 0xA;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return 0xB;
+ }
+ NOUVEAU_MSG("Unable to find the blend function 0x%x\n",f);
+ return 0;
+}
+
+static void nv04_emit_control(struct nv04_context* nv04)
+{
+ uint32_t control = nv04->dsa->control;
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
+ OUT_RING(control);
+}
+
+static void nv04_emit_blend(struct nv04_context* nv04)
+{
+ uint32_t blend;
+
+ blend=0x4; // texture MODULATE_ALPHA
+ blend|=0x20; // alpha is MSB
+ blend|=(2<<6); // flat shading
+ blend|=(1<<8); // persp correct
+ blend|=(0<<16); // no fog
+ blend|=(nv04->blend->b_enable<<20);
+ blend|=(nv04_blend_func(nv04->blend->b_src)<<24);
+ blend|=(nv04_blend_func(nv04->blend->b_dst)<<28);
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1);
+ OUT_RING(blend);
+}
+
+static void nv04_emit_sampler(struct nv04_context *nv04, int unit)
+{
+ struct nv04_miptree *nv04mt = nv04->tex_miptree[unit];
+ struct pipe_texture *pt = &nv04mt->base;
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 3);
+ OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ OUT_RING(nv04->sampler[unit]->filter);
+}
+
+static void nv04_state_emit_framebuffer(struct nv04_context* nv04)
+{
+ struct pipe_framebuffer_state* fb = nv04->framebuffer;
+ struct pipe_surface *rt, *zeta;
+ uint32_t rt_format, w, h;
+ int colour_format = 0, zeta_format = 0;
+ struct nv04_miptree *nv04mt = 0;
+
+ w = fb->cbufs[0]->width;
+ h = fb->cbufs[0]->height;
+ colour_format = fb->cbufs[0]->format;
+ rt = fb->cbufs[0];
+
+ if (fb->zsbuf) {
+ if (colour_format) {
+ assert(w == fb->zsbuf->width);
+ assert(h == fb->zsbuf->height);
+ } else {
+ w = fb->zsbuf->width;
+ h = fb->zsbuf->height;
+ }
+
+ zeta_format = fb->zsbuf->format;
+ zeta = fb->zsbuf;
+ }
+
+ switch (colour_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case 0:
+ rt_format = 0x108;
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ rt_format = 0x103;
+ break;
+ default:
+ assert(0);
+ }
+
+ BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1);
+ OUT_RING(rt_format);
+
+ nv04mt = (struct nv04_miptree *)rt->texture;
+ /* FIXME pitches have to be aligned ! */
+ BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
+ OUT_RING(rt->stride|(zeta->stride<<16));
+ OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ if (fb->zsbuf) {
+ nv04mt = (struct nv04_miptree *)zeta->texture;
+ BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
+ OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ }
+}
+
+void
+nv04_emit_hw_state(struct nv04_context *nv04)
+{
+ int i;
+
+ if (nv04->dirty & NV04_NEW_VERTPROG) {
+ //nv04_vertprog_bind(nv04, nv04->vertprog.current);
+ nv04->dirty &= ~NV04_NEW_VERTPROG;
+ }
+
+ if (nv04->dirty & NV04_NEW_FRAGPROG) {
+ nv04_fragprog_bind(nv04, nv04->fragprog.current);
+ nv04->dirty &= ~NV04_NEW_FRAGPROG;
+ nv04->dirty_samplers |= (1<<10);
+ nv04->dirty_samplers = 0;
+ }
+
+ if (nv04->dirty & NV04_NEW_CONTROL) {
+ nv04->dirty &= ~NV04_NEW_CONTROL;
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
+ OUT_RING(nv04->dsa->control);
+ }
+
+ if (nv04->dirty & NV04_NEW_BLEND) {
+ nv04->dirty &= ~NV04_NEW_BLEND;
+
+ nv04_emit_blend(nv04);
+ }
+
+ if (nv04->dirty & NV04_NEW_VTXARRAYS) {
+ nv04->dirty &= ~NV04_NEW_VTXARRAYS;
+ nv04_vertex_layout(nv04);
+ }
+
+ if (nv04->dirty & NV04_NEW_SAMPLER) {
+ nv04->dirty &= ~NV04_NEW_SAMPLER;
+
+ nv04_emit_sampler(nv04, 0);
+ }
+
+ if (nv04->dirty & NV04_NEW_VIEWPORT) {
+ nv04->dirty &= ~NV04_NEW_VIEWPORT;
+// nv04_state_emit_viewport(nv04);
+ }
+
+ if (nv04->dirty & NV04_NEW_FRAMEBUFFER) {
+ nv04->dirty &= ~NV04_NEW_FRAMEBUFFER;
+ nv04_state_emit_framebuffer(nv04);
+ }
+
+ /* Emit relocs for every referenced buffer.
+ * This is to ensure the bufmgr has an accurate idea of how
+ * the buffer is used. This isn't very efficient, but we don't
+ * seem to take a significant performance hit. Will be improved
+ * at some point. Vertex arrays are emitted by nv04_vbo.c
+ */
+
+ /* Render target */
+ BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
+ OUT_RING(nv04->rt->stride|(nv04->zeta->stride<<16));
+ OUT_RELOCl(nv04->rt, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ if (nv04->zeta) {
+ BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
+ OUT_RELOCl(nv04->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ }
+
+ /* Texture images */
+ for (i = 0; i < 1; i++) {
+ if (!(nv04->fp_samplers & (1 << i)))
+ continue;
+ struct nv04_miptree *nv04mt = nv04->tex_miptree[i];
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 2);
+ OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ }
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_surface.c b/src/gallium/drivers/nv04/nv04_surface.c
new file mode 100644
index 0000000000..14abf16679
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_surface.c
@@ -0,0 +1,72 @@
+
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nv04_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/internal/p_winsys_screen.h"
+#include "pipe/p_inlines.h"
+#include "util/u_tile.h"
+
+static void
+nv04_surface_copy(struct pipe_context *pipe, boolean do_flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct nv04_surface_2d *eng2d = nv04->screen->eng2d;
+
+ if (do_flip) {
+ desty += height;
+ while (height--) {
+ eng2d->copy(eng2d, dest, destx, desty--, src,
+ srcx, srcy++, width, 1);
+ }
+ return;
+ }
+
+ eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height);
+}
+
+static void
+nv04_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct nv04_surface_2d *eng2d = nv04->screen->eng2d;
+
+ eng2d->fill(eng2d, dest, destx, desty, width, height, value);
+}
+
+void
+nv04_init_surface_functions(struct nv04_context *nv04)
+{
+ nv04->pipe.surface_copy = nv04_surface_copy;
+ nv04->pipe.surface_fill = nv04_surface_fill;
+}
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
new file mode 100644
index 0000000000..230cfd17dd
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -0,0 +1,448 @@
+#include "pipe/p_context.h"
+#include "pipe/p_format.h"
+#include "util/u_memory.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_util.h"
+#include "nv04_surface_2d.h"
+
+static INLINE int
+nv04_surface_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_A8_UNORM:
+ return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8;
+ case PIPE_FORMAT_R16_SNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5;
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32;
+ default:
+ return -1;
+ }
+}
+
+static INLINE int
+nv04_rect_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_A8_UNORM:
+ return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5;
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
+ default:
+ return -1;
+ }
+}
+
+static INLINE int
+nv04_scaled_image_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5;
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8;
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_R16_SNORM:
+ return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5;
+ default:
+ return -1;
+ }
+}
+
+static INLINE unsigned
+nv04_swizzle_bits(unsigned x, unsigned y)
+{
+ unsigned u = (x & 0x001) << 0 |
+ (x & 0x002) << 1 |
+ (x & 0x004) << 2 |
+ (x & 0x008) << 3 |
+ (x & 0x010) << 4 |
+ (x & 0x020) << 5 |
+ (x & 0x040) << 6 |
+ (x & 0x080) << 7 |
+ (x & 0x100) << 8 |
+ (x & 0x200) << 9 |
+ (x & 0x400) << 10 |
+ (x & 0x800) << 11;
+
+ unsigned v = (y & 0x001) << 1 |
+ (y & 0x002) << 2 |
+ (y & 0x004) << 3 |
+ (y & 0x008) << 4 |
+ (y & 0x010) << 5 |
+ (y & 0x020) << 6 |
+ (y & 0x040) << 7 |
+ (y & 0x080) << 8 |
+ (y & 0x100) << 9 |
+ (y & 0x200) << 10 |
+ (y & 0x400) << 11 |
+ (y & 0x800) << 12;
+ return v | u;
+}
+
+static int
+nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
+ struct pipe_surface *dst, int dx, int dy,
+ struct pipe_surface *src, int sx, int sy,
+ int w, int h)
+{
+ struct nouveau_channel *chan = ctx->nvws->channel;
+ struct nouveau_grobj *swzsurf = ctx->swzsurf;
+ struct nouveau_grobj *sifm = ctx->sifm;
+ struct nouveau_bo *src_bo = ctx->nvws->get_bo(ctx->buf(src));
+ struct nouveau_bo *dst_bo = ctx->nvws->get_bo(ctx->buf(dst));
+ const unsigned max_w = 1024;
+ const unsigned max_h = 1024;
+ const unsigned sub_w = w > max_w ? max_w : w;
+ const unsigned sub_h = h > max_h ? max_h : h;
+ unsigned cx;
+ unsigned cy;
+
+ /* POT or GTFO */
+ assert(!(w & (w - 1)) && !(h & (h - 1)));
+
+ BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1);
+ OUT_RELOCo(chan, dst_bo,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1);
+ OUT_RING (chan, nv04_surface_format(dst->format) |
+ log2i(w) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT |
+ log2i(h) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT);
+
+ BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1);
+ OUT_RELOCo(chan, src_bo,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1);
+ OUT_RING (chan, swzsurf->handle);
+
+ for (cy = 0; cy < h; cy += sub_h) {
+ for (cx = 0; cx < w; cx += sub_w) {
+ BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
+ OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(cx, cy) *
+ dst->block.size, NOUVEAU_BO_GART |
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9);
+ OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE);
+ OUT_RING (chan, nv04_scaled_image_format(src->format));
+ OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, sub_h << 16 | sub_w);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, sub_h << 16 | sub_w);
+ OUT_RING (chan, 1 << 20);
+ OUT_RING (chan, 1 << 20);
+
+ BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SIZE, 4);
+ OUT_RING (chan, sub_h << 16 | sub_w);
+ OUT_RING (chan, src->stride |
+ NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER |
+ NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE);
+ OUT_RELOCl(chan, src_bo, src->offset + cy * src->stride +
+ cx * src->block.size, NOUVEAU_BO_GART |
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RING (chan, 0);
+ }
+ }
+
+ return 0;
+}
+
+static int
+nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx,
+ struct pipe_surface *dst, int dx, int dy,
+ struct pipe_surface *src, int sx, int sy, int w, int h)
+{
+ struct nouveau_channel *chan = ctx->nvws->channel;
+ struct nouveau_grobj *m2mf = ctx->m2mf;
+ struct nouveau_bo *src_bo = ctx->nvws->get_bo(ctx->buf(src));
+ struct nouveau_bo *dst_bo = ctx->nvws->get_bo(ctx->buf(dst));
+ unsigned dst_offset, src_offset;
+
+ dst_offset = dst->offset + (dy * dst->stride) + (dx * dst->block.size);
+ src_offset = src->offset + (sy * src->stride) + (sx * src->block.size);
+
+ WAIT_RING (chan, 3 + ((h / 2047) + 1) * 9);
+ BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
+ OUT_RELOCo(chan, src_bo,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCo(chan, dst_bo,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ while (h) {
+ int count = (h > 2047) ? 2047 : h;
+
+ BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+ OUT_RELOCl(chan, src_bo, src_offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, dst_bo, dst_offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ OUT_RING (chan, src->stride);
+ OUT_RING (chan, dst->stride);
+ OUT_RING (chan, w * src->block.size);
+ OUT_RING (chan, count);
+ OUT_RING (chan, 0x0101);
+ OUT_RING (chan, 0);
+
+ h -= count;
+ src_offset += src->stride * count;
+ dst_offset += dst->stride * count;
+ }
+
+ return 0;
+}
+
+static int
+nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
+ int dx, int dy, struct pipe_surface *src, int sx, int sy,
+ int w, int h)
+{
+ struct nouveau_channel *chan = ctx->nvws->channel;
+ struct nouveau_grobj *surf2d = ctx->surf2d;
+ struct nouveau_grobj *blit = ctx->blit;
+ struct nouveau_bo *src_bo = ctx->nvws->get_bo(ctx->buf(src));
+ struct nouveau_bo *dst_bo = ctx->nvws->get_bo(ctx->buf(dst));
+ int format;
+
+ format = nv04_surface_format(dst->format);
+ if (format < 0)
+ return 1;
+
+ WAIT_RING (chan, 12);
+ BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
+ OUT_RELOCo(chan, src_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4);
+ OUT_RING (chan, format);
+ OUT_RING (chan, (dst->stride << 16) | src->stride);
+ OUT_RELOCl(chan, src_bo, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ BEGIN_RING(chan, blit, 0x0300, 3);
+ OUT_RING (chan, (sy << 16) | sx);
+ OUT_RING (chan, (dy << 16) | dx);
+ OUT_RING (chan, ( h << 16) | w);
+
+ return 0;
+}
+
+static void
+nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
+ int dx, int dy, struct pipe_surface *src, int sx, int sy,
+ int w, int h)
+{
+ int src_linear = src->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR;
+ int dst_linear = dst->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR;
+
+ assert(src->format == dst->format);
+
+ /* Setup transfer to swizzle the texture to vram if needed */
+ if (src_linear && !dst_linear && w > 1 && h > 1) {
+ nv04_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy, w, h);
+ return;
+ }
+
+ /* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback
+ * to NV_MEMORY_TO_MEMORY_FORMAT in this case.
+ */
+ if ((src->offset & 63) || (dst->offset & 63) ||
+ (src->stride & 63) || (dst->stride & 63)) {
+ nv04_surface_copy_m2mf(ctx, dst, dx, dy, src, sx, sy, w, h);
+ return;
+ }
+
+ nv04_surface_copy_blit(ctx, dst, dx, dy, src, sx, sy, w, h);
+}
+
+static void
+nv04_surface_fill(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
+ int dx, int dy, int w, int h, unsigned value)
+{
+ struct nouveau_channel *chan = ctx->nvws->channel;
+ struct nouveau_grobj *surf2d = ctx->surf2d;
+ struct nouveau_grobj *rect = ctx->rect;
+ struct nouveau_bo *dst_bo = ctx->nvws->get_bo(ctx->buf(dst));
+ int cs2d_format, gdirect_format;
+
+ cs2d_format = nv04_surface_format(dst->format);
+ assert(cs2d_format >= 0);
+
+ gdirect_format = nv04_rect_format(dst->format);
+ assert(gdirect_format >= 0);
+
+ WAIT_RING (chan, 16);
+ BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
+ OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4);
+ OUT_RING (chan, cs2d_format);
+ OUT_RING (chan, (dst->stride << 16) | dst->stride);
+ OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1);
+ OUT_RING (chan, gdirect_format);
+ BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1);
+ OUT_RING (chan, value);
+ BEGIN_RING(chan, rect,
+ NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2);
+ OUT_RING (chan, (dx << 16) | dy);
+ OUT_RING (chan, ( w << 16) | h);
+}
+
+void
+nv04_surface_2d_takedown(struct nv04_surface_2d **pctx)
+{
+ struct nv04_surface_2d *ctx;
+
+ if (!pctx || !*pctx)
+ return;
+ ctx = *pctx;
+ *pctx = NULL;
+
+ nouveau_notifier_free(&ctx->ntfy);
+ nouveau_grobj_free(&ctx->m2mf);
+ nouveau_grobj_free(&ctx->surf2d);
+ nouveau_grobj_free(&ctx->swzsurf);
+ nouveau_grobj_free(&ctx->rect);
+ nouveau_grobj_free(&ctx->blit);
+ nouveau_grobj_free(&ctx->sifm);
+
+ FREE(ctx);
+}
+
+struct nv04_surface_2d *
+nv04_surface_2d_init(struct nouveau_winsys *nvws)
+{
+ struct nv04_surface_2d *ctx = CALLOC_STRUCT(nv04_surface_2d);
+ struct nouveau_channel *chan = nvws->channel;
+ unsigned handle = 0x88000000, class;
+ int ret;
+
+ if (!ctx)
+ return NULL;
+
+ ret = nouveau_notifier_alloc(chan, handle++, 1, &ctx->ntfy);
+ if (ret) {
+ nv04_surface_2d_takedown(&ctx);
+ return NULL;
+ }
+
+ ret = nouveau_grobj_alloc(chan, handle++, 0x0039, &ctx->m2mf);
+ if (ret) {
+ nv04_surface_2d_takedown(&ctx);
+ return NULL;
+ }
+
+ BEGIN_RING(chan, ctx->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1);
+ OUT_RING (chan, ctx->ntfy->handle);
+
+ if (chan->device->chipset < 0x10)
+ class = NV04_CONTEXT_SURFACES_2D;
+ else
+ class = NV10_CONTEXT_SURFACES_2D;
+
+ ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->surf2d);
+ if (ret) {
+ nv04_surface_2d_takedown(&ctx);
+ return NULL;
+ }
+
+ BEGIN_RING(chan, ctx->surf2d,
+ NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
+ OUT_RING (chan, chan->vram->handle);
+ OUT_RING (chan, chan->vram->handle);
+
+ if (chan->device->chipset < 0x10)
+ class = NV04_IMAGE_BLIT;
+ else
+ class = NV12_IMAGE_BLIT;
+
+ ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->blit);
+ if (ret) {
+ nv04_surface_2d_takedown(&ctx);
+ return NULL;
+ }
+
+ BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_DMA_NOTIFY, 1);
+ OUT_RING (chan, ctx->ntfy->handle);
+ BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_SURFACE, 1);
+ OUT_RING (chan, ctx->surf2d->handle);
+ BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_OPERATION, 1);
+ OUT_RING (chan, NV04_IMAGE_BLIT_OPERATION_SRCCOPY);
+
+ ret = nouveau_grobj_alloc(chan, handle++, NV04_GDI_RECTANGLE_TEXT,
+ &ctx->rect);
+ if (ret) {
+ nv04_surface_2d_takedown(&ctx);
+ return NULL;
+ }
+
+ BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1);
+ OUT_RING (chan, ctx->ntfy->handle);
+ BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1);
+ OUT_RING (chan, ctx->surf2d->handle);
+ BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1);
+ OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY);
+ BEGIN_RING(chan, ctx->rect,
+ NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1);
+ OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE);
+
+ switch (chan->device->chipset & 0xf0) {
+ case 0x00:
+ case 0x10:
+ class = NV04_SWIZZLED_SURFACE;
+ break;
+ case 0x20:
+ class = NV20_SWIZZLED_SURFACE;
+ break;
+ case 0x30:
+ class = NV30_SWIZZLED_SURFACE;
+ break;
+ case 0x40:
+ case 0x60:
+ class = NV40_SWIZZLED_SURFACE;
+ break;
+ default:
+ /* Famous last words: this really can't happen.. */
+ assert(0);
+ break;
+ }
+
+ ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->swzsurf);
+ if (ret) {
+ nv04_surface_2d_takedown(&ctx);
+ return NULL;
+ }
+
+ if (chan->device->chipset < 0x10) {
+ class = NV04_SCALED_IMAGE_FROM_MEMORY;
+ } else
+ if (chan->device->chipset < 0x40) {
+ class = NV10_SCALED_IMAGE_FROM_MEMORY;
+ } else {
+ class = NV40_SCALED_IMAGE_FROM_MEMORY;
+ }
+
+ ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->sifm);
+ if (ret) {
+ nv04_surface_2d_takedown(&ctx);
+ return NULL;
+ }
+
+ ctx->nvws = nvws;
+ ctx->copy = nv04_surface_copy;
+ ctx->fill = nv04_surface_fill;
+ return ctx;
+}
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.h b/src/gallium/drivers/nv04/nv04_surface_2d.h
new file mode 100644
index 0000000000..21b8f86960
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.h
@@ -0,0 +1,29 @@
+#ifndef __NV04_SURFACE_2D_H__
+#define __NV04_SURFACE_2D_H__
+
+struct nv04_surface_2d {
+ struct nouveau_winsys *nvws;
+ struct nouveau_notifier *ntfy;
+ struct nouveau_grobj *surf2d;
+ struct nouveau_grobj *swzsurf;
+ struct nouveau_grobj *m2mf;
+ struct nouveau_grobj *rect;
+ struct nouveau_grobj *blit;
+ struct nouveau_grobj *sifm;
+
+ struct pipe_buffer *(*buf)(struct pipe_surface *);
+
+ void (*copy)(struct nv04_surface_2d *, struct pipe_surface *dst,
+ int dx, int dy, struct pipe_surface *src, int sx, int sy,
+ int w, int h);
+ void (*fill)(struct nv04_surface_2d *, struct pipe_surface *dst,
+ int dx, int dy, int w, int h, unsigned value);
+};
+
+struct nv04_surface_2d *
+nv04_surface_2d_init(struct nouveau_winsys *nvws);
+
+void
+nv04_surface_2d_takedown(struct nv04_surface_2d **);
+
+#endif
diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c
new file mode 100644
index 0000000000..d21a0e34f7
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_vbo.c
@@ -0,0 +1,78 @@
+#include "draw/draw_context.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv04_context.h"
+#include "nv04_state.h"
+
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_pushbuf.h"
+
+boolean nv04_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count)
+{
+ struct nv04_context *nv04 = nv04_context( pipe );
+ struct draw_context *draw = nv04->draw;
+ unsigned i;
+
+ nv04_emit_hw_state(nv04);
+
+ /*
+ * Map vertex buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv04->vtxbuf[i].buffer) {
+ void *buf
+ = pipe->winsys->buffer_map(pipe->winsys,
+ nv04->vtxbuf[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(draw, i, buf);
+ }
+ }
+ /* Map index buffer, if present */
+ if (indexBuffer) {
+ void *mapped_indexes
+ = pipe->winsys->buffer_map(pipe->winsys, indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes);
+ }
+ else {
+ /* no index/element buffer */
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ draw_set_mapped_constant_buffer(draw,
+ nv04->constbuf[PIPE_SHADER_VERTEX],
+ nv04->constbuf_nr[PIPE_SHADER_VERTEX]);
+
+ /* draw! */
+ draw_arrays(nv04->draw, prim, start, count);
+
+ /*
+ * unmap vertex/index buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv04->vtxbuf[i].buffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, nv04->vtxbuf[i].buffer);
+ draw_set_mapped_vertex_buffer(draw, i, NULL);
+ }
+ }
+ if (indexBuffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer);
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ return TRUE;
+}
+
+boolean nv04_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
+{
+ printf("coucou in draw arrays\n");
+ return nv04_draw_elements(pipe, NULL, 0, prim, start, count);
+}
+
+
+
diff --git a/src/gallium/drivers/nv10/Makefile b/src/gallium/drivers/nv10/Makefile
new file mode 100644
index 0000000000..2b5fbd4f5a
--- /dev/null
+++ b/src/gallium/drivers/nv10/Makefile
@@ -0,0 +1,19 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv10
+
+C_SOURCES = \
+ nv10_clear.c \
+ nv10_context.c \
+ nv10_fragprog.c \
+ nv10_fragtex.c \
+ nv10_miptree.c \
+ nv10_prim_vbuf.c \
+ nv10_screen.c \
+ nv10_state.c \
+ nv10_state_emit.c \
+ nv10_surface.c \
+ nv10_vbo.c
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/nv10/nv10_clear.c b/src/gallium/drivers/nv10/nv10_clear.c
new file mode 100644
index 0000000000..be7e09cf4b
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_clear.c
@@ -0,0 +1,12 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv10_context.h"
+
+void
+nv10_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+}
diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c
new file mode 100644
index 0000000000..ef2c0c5d9f
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_context.c
@@ -0,0 +1,296 @@
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/internal/p_winsys_screen.h"
+
+#include "nv10_context.h"
+#include "nv10_screen.h"
+
+static void
+nv10_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ draw_flush(nv10->draw);
+
+ FIRE_RING(fence);
+}
+
+static void
+nv10_destroy(struct pipe_context *pipe)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ if (nv10->draw)
+ draw_destroy(nv10->draw);
+
+ FREE(nv10);
+}
+
+static void nv10_init_hwctx(struct nv10_context *nv10)
+{
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_winsys *nvws = screen->nvws;
+ int i;
+ float projectionmatrix[16];
+
+ BEGIN_RING(celsius, NV10TCL_DMA_NOTIFY, 1);
+ OUT_RING (screen->sync->handle);
+ BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY0, 2);
+ OUT_RING (nvws->channel->vram->handle);
+ OUT_RING (nvws->channel->gart->handle);
+ BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY2, 2);
+ OUT_RING (nvws->channel->vram->handle);
+ OUT_RING (nvws->channel->vram->handle);
+
+ BEGIN_RING(celsius, NV10TCL_NOP, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
+ OUT_RING ((0x7ff<<16)|0x800);
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
+ OUT_RING ((0x7ff<<16)|0x800);
+
+ for (i=1;i<8;i++) {
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1);
+ OUT_RING (0);
+ }
+
+ BEGIN_RING(celsius, 0x290, 1);
+ OUT_RING ((0x10<<16)|1);
+ BEGIN_RING(celsius, 0x3f4, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(celsius, NV10TCL_NOP, 1);
+ OUT_RING (0);
+
+ if (nv10->screen->celsius->grclass != NV10TCL) {
+ /* For nv11, nv17 */
+ BEGIN_RING(celsius, 0x120, 3);
+ OUT_RING (0);
+ OUT_RING (1);
+ OUT_RING (2);
+
+ BEGIN_RING(celsius, NV10TCL_NOP, 1);
+ OUT_RING (0);
+ }
+
+ BEGIN_RING(celsius, NV10TCL_NOP, 1);
+ OUT_RING (0);
+
+ /* Set state */
+ BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 2);
+ OUT_RING (0x207);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_TX_ENABLE(0), 2);
+ OUT_RING (0);
+ OUT_RING (0);
+
+ BEGIN_RING(celsius, NV10TCL_RC_IN_ALPHA(0), 12);
+ OUT_RING (0x30141010);
+ OUT_RING (0);
+ OUT_RING (0x20040000);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0x00000c00);
+ OUT_RING (0);
+ OUT_RING (0x00000c00);
+ OUT_RING (0x18000000);
+ OUT_RING (0x300e0300);
+ OUT_RING (0x0c091c80);
+
+ BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 2);
+ OUT_RING (1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_SRC, 4);
+ OUT_RING (1);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0x8006);
+ BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 8);
+ OUT_RING (0xff);
+ OUT_RING (0x207);
+ OUT_RING (0);
+ OUT_RING (0xff);
+ OUT_RING (0x1e00);
+ OUT_RING (0x1e00);
+ OUT_RING (0x1e00);
+ OUT_RING (0x1d01);
+ BEGIN_RING(celsius, NV10TCL_NORMALIZE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_LIGHT_MODEL, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_COLOR_CONTROL, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_ENABLED_LIGHTS, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1);
+ OUT_RING (0x201);
+ BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1);
+ OUT_RING (8);
+ BEGIN_RING(celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_LINE_WIDTH, 1);
+ OUT_RING (8);
+ BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (0x1b02);
+ OUT_RING (0x1b02);
+ BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2);
+ OUT_RING (0x405);
+ OUT_RING (0x901);
+ BEGIN_RING(celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_TX_GEN_S(0), 8);
+ for (i=0;i<8;i++) {
+ OUT_RING (0);
+ }
+ BEGIN_RING(celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3);
+ OUT_RING (0x3fc00000); /* -1.50 */
+ OUT_RING (0xbdb8aa0a); /* -0.09 */
+ OUT_RING (0); /* 0.00 */
+
+ BEGIN_RING(celsius, NV10TCL_NOP, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(celsius, NV10TCL_FOG_MODE, 2);
+ OUT_RING (0x802);
+ OUT_RING (2);
+ /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when
+ * using texturing, except when using the texture matrix
+ */
+ BEGIN_RING(celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1);
+ OUT_RING (6);
+ BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1);
+ OUT_RING (0x01010101);
+
+ /* Set vertex component */
+ BEGIN_RING(celsius, NV10TCL_VERTEX_COL_4F_R, 4);
+ OUT_RINGf (1.0);
+ OUT_RINGf (1.0);
+ OUT_RINGf (1.0);
+ OUT_RINGf (1.0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_COL2_3F_R, 3);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_NOR_3F_X, 3);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RINGf (1.0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_TX0_4F_S, 4);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (1.0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_TX1_4F_S, 4);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (1.0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_FOG_1F, 1);
+ OUT_RINGf (0.0);
+ BEGIN_RING(celsius, NV10TCL_EDGEFLAG_ENABLE, 1);
+ OUT_RING (1);
+
+ memset(projectionmatrix, 0, sizeof(projectionmatrix));
+ BEGIN_RING(celsius, NV10TCL_PROJECTION_MATRIX(0), 16);
+ projectionmatrix[0*4+0] = 1.0;
+ projectionmatrix[1*4+1] = 1.0;
+ projectionmatrix[2*4+2] = 1.0;
+ projectionmatrix[3*4+3] = 1.0;
+ for (i=0;i<16;i++) {
+ OUT_RINGf (projectionmatrix[i]);
+ }
+
+ BEGIN_RING(celsius, NV10TCL_DEPTH_RANGE_NEAR, 2);
+ OUT_RING (0.0);
+ OUT_RINGf (16777216.0);
+
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_SCALE_X, 4);
+ OUT_RINGf (-2048.0);
+ OUT_RINGf (-2048.0);
+ OUT_RINGf (16777215.0 * 0.5);
+ OUT_RING (0);
+
+ FIRE_RING (NULL);
+}
+
+static void
+nv10_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+}
+
+struct pipe_context *
+nv10_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct nv10_screen *screen = nv10_screen(pscreen);
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv10_context *nv10;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nv10 = CALLOC(1, sizeof(struct nv10_context));
+ if (!nv10)
+ return NULL;
+ nv10->screen = screen;
+ nv10->pctx_id = pctx_id;
+
+ nv10->nvws = nvws;
+
+ nv10->pipe.winsys = ws;
+ nv10->pipe.screen = pscreen;
+ nv10->pipe.destroy = nv10_destroy;
+ nv10->pipe.set_edgeflags = nv10_set_edgeflags;
+ nv10->pipe.draw_arrays = nv10_draw_arrays;
+ nv10->pipe.draw_elements = nv10_draw_elements;
+ nv10->pipe.clear = nv10_clear;
+ nv10->pipe.flush = nv10_flush;
+
+ nv10_init_surface_functions(nv10);
+ nv10_init_state_functions(nv10);
+
+ nv10->draw = draw_create();
+ assert(nv10->draw);
+ draw_set_rasterize_stage(nv10->draw, nv10_draw_vbuf_stage(nv10));
+
+ nv10_init_hwctx(nv10);
+
+ return &nv10->pipe;
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h
new file mode 100644
index 0000000000..f3b56de25a
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_context.h
@@ -0,0 +1,153 @@
+#ifndef __NV10_CONTEXT_H__
+#define __NV10_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx) \
+ struct nv10_screen *ctx = nv10->screen
+#include "nouveau/nouveau_push.h"
+
+#include "nv10_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+#define NV10_NEW_VERTPROG (1 << 0)
+#define NV10_NEW_FRAGPROG (1 << 1)
+#define NV10_NEW_VTXARRAYS (1 << 2)
+#define NV10_NEW_BLEND (1 << 3)
+#define NV10_NEW_BLENDCOL (1 << 4)
+#define NV10_NEW_RAST (1 << 5)
+#define NV10_NEW_DSA (1 << 6)
+#define NV10_NEW_VIEWPORT (1 << 7)
+#define NV10_NEW_SCISSOR (1 << 8)
+#define NV10_NEW_FRAMEBUFFER (1 << 9)
+
+#include "nv10_screen.h"
+
+struct nv10_context {
+ struct pipe_context pipe;
+
+ struct nouveau_winsys *nvws;
+ struct nv10_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ uint32_t dirty;
+
+ struct nv10_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
+ struct nv10_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
+ unsigned dirty_samplers;
+ unsigned fp_samplers;
+ unsigned vp_samplers;
+
+ uint32_t rt_enable;
+ struct pipe_buffer *rt[4];
+ struct pipe_buffer *zeta;
+ uint32_t lma_offset;
+
+ struct nv10_blend_state *blend;
+ struct pipe_blend_color *blend_color;
+ struct nv10_rasterizer_state *rast;
+ struct nv10_depth_stencil_alpha_state *dsa;
+ struct pipe_viewport_state *viewport;
+ struct pipe_scissor_state *scissor;
+ struct pipe_framebuffer_state *framebuffer;
+
+ //struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
+ float *constbuf[PIPE_SHADER_TYPES][32][4];
+ unsigned constbuf_nr[PIPE_SHADER_TYPES];
+
+ struct vertex_info vertex_info;
+
+ struct {
+ struct pipe_buffer *buffer;
+ uint32_t format;
+ } tex[2];
+
+ unsigned vb_enable;
+ struct {
+ struct pipe_buffer *buffer;
+ unsigned delta;
+ } vb[16];
+
+/* struct {
+
+ struct nouveau_resource *exec_heap;
+ struct nouveau_resource *data_heap;
+
+ struct nv10_vertex_program *active;
+
+ struct nv10_vertex_program *current;
+ } vertprog;
+*/
+ struct {
+ struct nv10_fragment_program *active;
+
+ struct nv10_fragment_program *current;
+ struct pipe_buffer *constant_buf;
+ } fragprog;
+
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
+};
+
+static INLINE struct nv10_context *
+nv10_context(struct pipe_context *pipe)
+{
+ return (struct nv10_context *)pipe;
+}
+
+extern void nv10_init_state_functions(struct nv10_context *nv10);
+extern void nv10_init_surface_functions(struct nv10_context *nv10);
+
+extern void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen);
+
+/* nv10_clear.c */
+extern void nv10_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+/* nv10_draw.c */
+extern struct draw_stage *nv10_draw_render_stage(struct nv10_context *nv10);
+
+/* nv10_fragprog.c */
+extern void nv10_fragprog_bind(struct nv10_context *,
+ struct nv10_fragment_program *);
+extern void nv10_fragprog_destroy(struct nv10_context *,
+ struct nv10_fragment_program *);
+
+/* nv10_fragtex.c */
+extern void nv10_fragtex_bind(struct nv10_context *);
+
+/* nv10_prim_vbuf.c */
+struct draw_stage *nv10_draw_vbuf_stage( struct nv10_context *nv10 );
+extern void nv10_vtxbuf_bind(struct nv10_context* nv10);
+
+/* nv10_state.c and friends */
+extern void nv10_emit_hw_state(struct nv10_context *nv10);
+extern void nv10_state_tex_update(struct nv10_context *nv10);
+
+/* nv10_vbo.c */
+extern boolean nv10_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv10_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count);
+
+
+#endif
diff --git a/src/gallium/drivers/nv10/nv10_fragprog.c b/src/gallium/drivers/nv10/nv10_fragprog.c
new file mode 100644
index 0000000000..698db5a16a
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_fragprog.c
@@ -0,0 +1,21 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv10_context.h"
+
+void
+nv10_fragprog_bind(struct nv10_context *nv10, struct nv10_fragment_program *fp)
+{
+}
+
+void
+nv10_fragprog_destroy(struct nv10_context *nv10,
+ struct nv10_fragment_program *fp)
+{
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c
new file mode 100644
index 0000000000..27f2f87584
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_fragtex.c
@@ -0,0 +1,124 @@
+#include "nv10_context.h"
+#include "nouveau/nouveau_util.h"
+
+#define _(m,tf) \
+{ \
+ TRUE, \
+ PIPE_FORMAT_##m, \
+ NV10TCL_TX_FORMAT_FORMAT_##tf, \
+}
+
+struct nv10_texture_format {
+ boolean defined;
+ uint pipe;
+ int format;
+};
+
+static struct nv10_texture_format
+nv10_texture_formats[] = {
+ _(A8R8G8B8_UNORM, A8R8G8B8),
+ _(A1R5G5B5_UNORM, A1R5G5B5),
+ _(A4R4G4B4_UNORM, A4R4G4B4),
+ _(L8_UNORM , L8 ),
+ _(A8_UNORM , A8 ),
+ _(A8L8_UNORM , A8L8 ),
+// _(RGB_DXT1 , DXT1, ),
+// _(RGBA_DXT1 , DXT1, ),
+// _(RGBA_DXT3 , DXT3, ),
+// _(RGBA_DXT5 , DXT5, ),
+ {},
+};
+
+static struct nv10_texture_format *
+nv10_fragtex_format(uint pipe_format)
+{
+ struct nv10_texture_format *tf = nv10_texture_formats;
+
+ while (tf->defined) {
+ if (tf->pipe == pipe_format)
+ return tf;
+ tf++;
+ }
+
+ return NULL;
+}
+
+
+static void
+nv10_fragtex_build(struct nv10_context *nv10, int unit)
+{
+#if 0
+ struct nv10_sampler_state *ps = nv10->tex_sampler[unit];
+ struct nv10_miptree *nv10mt = nv10->tex_miptree[unit];
+ struct pipe_texture *pt = &nv10mt->base;
+ struct nv10_texture_format *tf;
+ uint32_t txf, txs, txp;
+
+ tf = nv10_fragtex_format(pt->format);
+ if (!tf || !tf->defined) {
+ NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format);
+ return;
+ }
+
+ txf = tf->format << 8;
+ txf |= (pt->last_level + 1) << 16;
+ txf |= log2i(pt->width[0]) << 20;
+ txf |= log2i(pt->height[0]) << 24;
+ txf |= log2i(pt->depth[0]) << 28;
+ txf |= 8;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ txf |= NV10TCL_TX_FORMAT_CUBE_MAP;
+ /* fall-through */
+ case PIPE_TEXTURE_2D:
+ txf |= (2<<4);
+ break;
+ case PIPE_TEXTURE_1D:
+ txf |= (1<<4);
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return;
+ }
+
+ BEGIN_RING(celsius, NV10TCL_TX_OFFSET(unit), 8);
+ OUT_RELOCl(nv10mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(nv10mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ OUT_RING (ps->wrap);
+ OUT_RING (0x40000000); /* enable */
+ OUT_RING (txs);
+ OUT_RING (ps->filt | 0x2000 /* magic */);
+ OUT_RING ((pt->width[0] << 16) | pt->height[0]);
+ OUT_RING (ps->bcol);
+#endif
+}
+
+void
+nv10_fragtex_bind(struct nv10_context *nv10)
+{
+#if 0
+ struct nv10_fragment_program *fp = nv10->fragprog.active;
+ unsigned samplers, unit;
+
+ samplers = nv10->fp_samplers & ~fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ BEGIN_RING(celsius, NV10TCL_TX_ENABLE(unit), 1);
+ OUT_RING (0);
+ }
+
+ samplers = nv10->dirty_samplers & fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ nv10_fragtex_build(nv10, unit);
+ }
+
+ nv10->fp_samplers = fp->samplers;
+#endif
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c
new file mode 100644
index 0000000000..9616135461
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_miptree.c
@@ -0,0 +1,174 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv10_context.h"
+#include "nv10_screen.h"
+
+static void
+nv10_miptree_layout(struct nv10_miptree *nv10mt)
+{
+ struct pipe_texture *pt = &nv10mt->base;
+ boolean swizzled = FALSE;
+ uint width = pt->width[0], height = pt->height[0];
+ uint offset = 0;
+ int nr_faces, l, f;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ nr_faces = 6;
+ } else {
+ nr_faces = 1;
+ }
+
+ for (l = 0; l <= pt->last_level; l++) {
+ pt->width[l] = width;
+ pt->height[l] = height;
+ pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+
+ if (swizzled)
+ nv10mt->level[l].pitch = pt->nblocksx[l] * pt->block.size;
+ else
+ nv10mt->level[l].pitch = pt->nblocksx[0] * pt->block.size;
+ nv10mt->level[l].pitch = (nv10mt->level[l].pitch + 63) & ~63;
+
+ nv10mt->level[l].image_offset =
+ CALLOC(nr_faces, sizeof(unsigned));
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+
+ }
+
+ for (f = 0; f < nr_faces; f++) {
+ for (l = 0; l <= pt->last_level; l++) {
+ nv10mt->level[l].image_offset[f] = offset;
+ offset += nv10mt->level[l].pitch * pt->height[l];
+ }
+ }
+
+ nv10mt->total_size = offset;
+}
+
+static struct pipe_texture *
+nv10_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
+ const unsigned *stride, struct pipe_buffer *pb)
+{
+ struct nv10_miptree *mt;
+
+ /* Only supports 2D, non-mipmapped textures for the moment */
+ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
+ pt->depth[0] != 1)
+ return NULL;
+
+ mt = CALLOC_STRUCT(nv10_miptree);
+ if (!mt)
+ return NULL;
+
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = pscreen;
+ mt->level[0].pitch = stride[0];
+ mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
+
+ pipe_buffer_reference(pscreen, &mt->buffer, pb);
+ return &mt->base;
+}
+
+static struct pipe_texture *
+nv10_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct nv10_miptree *mt;
+
+ mt = MALLOC(sizeof(struct nv10_miptree));
+ if (!mt)
+ return NULL;
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = screen;
+
+ nv10_miptree_layout(mt);
+
+ mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL,
+ mt->total_size);
+ if (!mt->buffer) {
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static void
+nv10_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt)
+{
+ struct pipe_texture *mt = *pt;
+
+ *pt = NULL;
+ if (--mt->refcount <= 0) {
+ struct nv10_miptree *nv10mt = (struct nv10_miptree *)mt;
+ int l;
+
+ pipe_buffer_reference(screen, &nv10mt->buffer, NULL);
+ for (l = 0; l <= mt->last_level; l++) {
+ if (nv10mt->level[l].image_offset)
+ FREE(nv10mt->level[l].image_offset);
+ }
+ FREE(nv10mt);
+ }
+}
+
+static void
+nv10_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt,
+ uint face, uint levels)
+{
+}
+
+
+static struct pipe_surface *
+nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct nv10_miptree *nv10mt = (struct nv10_miptree *)pt;
+ struct pipe_surface *ps;
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (!ps)
+ return NULL;
+ pipe_texture_reference(&ps->texture, pt);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = nv10mt->level[level].pitch;
+ ps->refcount = 1;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ ps->offset = nv10mt->level[level].image_offset[face];
+ } else {
+ ps->offset = nv10mt->level[level].image_offset[0];
+ }
+
+ return ps;
+}
+
+static void
+nv10_miptree_surface_release(struct pipe_screen *screen,
+ struct pipe_surface **surface)
+{
+}
+
+void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv10_miptree_create;
+ pscreen->texture_blanket = nv10_miptree_blanket;
+ pscreen->texture_release = nv10_miptree_release;
+ pscreen->get_tex_surface = nv10_miptree_surface_get;
+ pscreen->tex_surface_release = nv10_miptree_surface_release;
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c
new file mode 100644
index 0000000000..491a881806
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c
@@ -0,0 +1,265 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Build post-transformation, post-clipping vertex buffers and element
+ * lists by hooking into the end of the primitive pipeline and
+ * manipulating the vertex_id field in the vertex headers.
+ *
+ * XXX: work in progress
+ *
+ * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "util/u_debug.h"
+#include "pipe/p_inlines.h"
+#include "pipe/internal/p_winsys_screen.h"
+
+#include "nv10_context.h"
+#include "nv10_state.h"
+
+#include "draw/draw_vbuf.h"
+
+/**
+ * Primitive renderer for nv10.
+ */
+struct nv10_vbuf_render {
+ struct vbuf_render base;
+
+ struct nv10_context *nv10;
+
+ /** Vertex buffer */
+ struct pipe_buffer* buffer;
+
+ /** Vertex size in bytes */
+ unsigned vertex_size;
+
+ /** Hardware primitive */
+ unsigned hwprim;
+};
+
+
+void nv10_vtxbuf_bind( struct nv10_context* nv10 )
+{
+ int i;
+ for(i = 0; i < 8; i++) {
+ BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET(i), 1);
+ OUT_RING(0/*nv10->vtxbuf*/);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT(i) ,1);
+ OUT_RING(0/*XXX*/);
+ }
+}
+
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct nv10_vbuf_render *
+nv10_vbuf_render( struct vbuf_render *render )
+{
+ assert(render);
+ return (struct nv10_vbuf_render *)render;
+}
+
+
+static const struct vertex_info *
+nv10_vbuf_render_get_vertex_info( struct vbuf_render *render )
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ struct nv10_context *nv10 = nv10_render->nv10;
+
+ nv10_emit_hw_state(nv10);
+
+ return &nv10->vertex_info;
+}
+
+static boolean
+nv10_vbuf_render_allocate_vertices( struct vbuf_render *render,
+ ushort vertex_size,
+ ushort nr_vertices )
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ struct nv10_context *nv10 = nv10_render->nv10;
+ struct pipe_winsys *winsys = nv10->pipe.winsys;
+ size_t size = (size_t)vertex_size * (size_t)nr_vertices;
+
+ assert(!nv10_render->buffer);
+ nv10_render->buffer = winsys->buffer_create(winsys, 64, PIPE_BUFFER_USAGE_VERTEX, size);
+
+ nv10->dirty |= NV10_NEW_VTXARRAYS;
+
+ if (nv10_render->buffer)
+ return FALSE;
+ return TRUE;
+}
+
+static void *
+nv10_vbuf_render_map_vertices( struct vbuf_render *render )
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ struct nv10_context *nv10 = nv10_render->nv10;
+ struct pipe_winsys *winsys = nv10->pipe.winsys;
+
+ return winsys->buffer_map(winsys,
+ nv10_render->buffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+}
+
+static void
+nv10_vbuf_render_unmap_vertices( struct vbuf_render *render,
+ ushort min_index,
+ ushort max_index )
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ struct nv10_context *nv10 = nv10_render->nv10;
+ struct pipe_winsys *winsys = nv10->pipe.winsys;
+
+ assert(!nv10_render->buffer);
+ winsys->buffer_unmap(winsys, nv10_render->buffer);
+}
+
+static boolean
+nv10_vbuf_render_set_primitive( struct vbuf_render *render,
+ unsigned prim )
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ unsigned hwp = nvgl_primitive(prim);
+ if (hwp == 0)
+ return FALSE;
+
+ nv10_render->hwprim = hwp;
+ return TRUE;
+}
+
+
+static void
+nv10_vbuf_render_draw( struct vbuf_render *render,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ struct nv10_context *nv10 = nv10_render->nv10;
+ int push, i;
+
+ nv10_emit_hw_state(nv10);
+
+ BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
+ OUT_RELOCl(nv10_render->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+
+ BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING(nv10_render->hwprim);
+
+ if (nr_indices & 1) {
+ BEGIN_RING(celsius, NV10TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (indices[0]);
+ indices++; nr_indices--;
+ }
+
+ while (nr_indices) {
+ // XXX too big/small ? check the size
+ push = MIN2(nr_indices, 1200 * 2);
+
+ BEGIN_RING_NI(celsius, NV10TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((indices[i+1] << 16) | indices[i]);
+
+ nr_indices -= push;
+ indices += push;
+ }
+
+ BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING (0);
+}
+
+
+static void
+nv10_vbuf_render_release_vertices( struct vbuf_render *render )
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ struct nv10_context *nv10 = nv10_render->nv10;
+ struct pipe_screen *pscreen = &nv10->screen->pipe;
+
+ assert(nv10_render->buffer);
+ pipe_buffer_reference(pscreen, &nv10_render->buffer, NULL);
+}
+
+
+static void
+nv10_vbuf_render_destroy( struct vbuf_render *render )
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ FREE(nv10_render);
+}
+
+
+/**
+ * Create a new primitive render.
+ */
+static struct vbuf_render *
+nv10_vbuf_render_create( struct nv10_context *nv10 )
+{
+ struct nv10_vbuf_render *nv10_render = CALLOC_STRUCT(nv10_vbuf_render);
+
+ nv10_render->nv10 = nv10;
+
+ nv10_render->base.max_vertex_buffer_bytes = 16*1024;
+ nv10_render->base.max_indices = 1024;
+ nv10_render->base.get_vertex_info = nv10_vbuf_render_get_vertex_info;
+ nv10_render->base.allocate_vertices = nv10_vbuf_render_allocate_vertices;
+ nv10_render->base.map_vertices = nv10_vbuf_render_map_vertices;
+ nv10_render->base.unmap_vertices = nv10_vbuf_render_unmap_vertices;
+ nv10_render->base.set_primitive = nv10_vbuf_render_set_primitive;
+ nv10_render->base.draw = nv10_vbuf_render_draw;
+ nv10_render->base.release_vertices = nv10_vbuf_render_release_vertices;
+ nv10_render->base.destroy = nv10_vbuf_render_destroy;
+
+ return &nv10_render->base;
+}
+
+
+/**
+ * Create a new primitive vbuf/render stage.
+ */
+struct draw_stage *nv10_draw_vbuf_stage( struct nv10_context *nv10 )
+{
+ struct vbuf_render *render;
+ struct draw_stage *stage;
+
+ render = nv10_vbuf_render_create(nv10);
+ if(!render)
+ return NULL;
+
+ stage = draw_vbuf_stage( nv10->draw, render );
+ if(!stage) {
+ render->destroy(render);
+ return NULL;
+ }
+
+ return stage;
+}
diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c
new file mode 100644
index 0000000000..f417b06c94
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_screen.c
@@ -0,0 +1,226 @@
+#include "pipe/p_screen.h"
+#include "util/u_simple_screen.h"
+
+#include "nv10_context.h"
+#include "nv10_screen.h"
+
+static const char *
+nv10_screen_get_name(struct pipe_screen *screen)
+{
+ struct nv10_screen *nv10screen = nv10_screen(screen);
+ struct nouveau_device *dev = nv10screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv10_screen_get_vendor(struct pipe_screen *screen)
+{
+ return "nouveau";
+}
+
+static int
+nv10_screen_get_param(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 2;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 0;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 0;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 0;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 12;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 12;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 0;
+ case NOUVEAU_CAP_HW_VTXBUF:
+ case NOUVEAU_CAP_HW_IDXBUF:
+ return 0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv10_screen_get_paramf(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 2.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 4.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static boolean
+nv10_screen_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static void *
+nv10_surface_map(struct pipe_screen *screen, struct pipe_surface *surface,
+ unsigned flags )
+{
+ struct pipe_winsys *ws = screen->winsys;
+ void *map;
+ struct nv10_miptree *nv10mt = (struct nv10_miptree *)surface->texture;
+
+ map = ws->buffer_map(ws, nv10mt->buffer, flags);
+ if (!map)
+ return NULL;
+
+ return map + surface->offset;
+}
+
+static void
+nv10_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct nv10_miptree *nv10mt = (struct nv10_miptree *)surface->texture;
+
+ ws->buffer_unmap(ws, nv10mt->buffer);
+}
+
+static void
+nv10_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nv10_screen *screen = nv10_screen(pscreen);
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nvws->notifier_free(&screen->sync);
+ nvws->grobj_free(&screen->celsius);
+
+ FREE(pscreen);
+}
+
+static struct pipe_buffer *
+nv10_surface_buffer(struct pipe_surface *surf)
+{
+ struct nv10_miptree *mt = (struct nv10_miptree *)surf->texture;
+
+ return mt->buffer;
+}
+
+struct pipe_screen *
+nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv10_screen *screen = CALLOC_STRUCT(nv10_screen);
+ unsigned celsius_class;
+ unsigned chipset = nvws->channel->device->chipset;
+ int ret;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ /* 2D engine setup */
+ screen->eng2d = nv04_surface_2d_init(nvws);
+ screen->eng2d->buf = nv10_surface_buffer;
+
+ /* 3D object */
+ if (chipset>=0x20)
+ celsius_class=NV11TCL;
+ else if (chipset>=0x17)
+ celsius_class=NV17TCL;
+ else if (chipset>=0x11)
+ celsius_class=NV11TCL;
+ else
+ celsius_class=NV10TCL;
+
+ if (!celsius_class) {
+ NOUVEAU_ERR("Unknown nv1x chipset: nv%02x\n", chipset);
+ return NULL;
+ }
+
+ ret = nvws->grobj_alloc(nvws, celsius_class, &screen->celsius);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ return FALSE;
+ }
+
+ /* Notifier for sync purposes */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv10_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ screen->pipe.winsys = ws;
+ screen->pipe.destroy = nv10_screen_destroy;
+
+ screen->pipe.get_name = nv10_screen_get_name;
+ screen->pipe.get_vendor = nv10_screen_get_vendor;
+ screen->pipe.get_param = nv10_screen_get_param;
+ screen->pipe.get_paramf = nv10_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv10_screen_is_format_supported;
+
+ screen->pipe.surface_map = nv10_surface_map;
+ screen->pipe.surface_unmap = nv10_surface_unmap;
+
+ nv10_screen_init_miptree_functions(&screen->pipe);
+ u_simple_screen_init(&screen->pipe);
+
+ return &screen->pipe;
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_screen.h b/src/gallium/drivers/nv10/nv10_screen.h
new file mode 100644
index 0000000000..60102a369a
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_screen.h
@@ -0,0 +1,24 @@
+#ifndef __NV10_SCREEN_H__
+#define __NV10_SCREEN_H__
+
+#include "pipe/p_screen.h"
+#include "nv04/nv04_surface_2d.h"
+
+struct nv10_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+
+ /* HW graphics objects */
+ struct nv04_surface_2d *eng2d;
+ struct nouveau_grobj *celsius;
+ struct nouveau_notifier *sync;
+};
+
+static INLINE struct nv10_screen *
+nv10_screen(struct pipe_screen *screen)
+{
+ return (struct nv10_screen *)screen;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c
new file mode 100644
index 0000000000..119af66dfd
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_state.c
@@ -0,0 +1,589 @@
+#include "draw/draw_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv10_context.h"
+#include "nv10_state.h"
+
+static void *
+nv10_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv10_blend_state *cb;
+
+ cb = MALLOC(sizeof(struct nv10_blend_state));
+
+ cb->b_enable = cso->blend_enable ? 1 : 0;
+ cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_src_factor)));
+ cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_dst_factor)));
+
+ cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) |
+ ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) |
+ ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) |
+ ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0));
+
+ cb->d_enable = cso->dither ? 1 : 0;
+
+ return (void *)cb;
+}
+
+static void
+nv10_blend_state_bind(struct pipe_context *pipe, void *blend)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->blend = (struct nv10_blend_state*)blend;
+
+ nv10->dirty |= NV10_NEW_BLEND;
+}
+
+static void
+nv10_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+
+static INLINE unsigned
+wrap_mode(unsigned wrap) {
+ unsigned ret;
+
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ ret = NV10TCL_TX_FORMAT_WRAP_S_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ ret = NV10TCL_TX_FORMAT_WRAP_S_MIRRORED_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_CLAMP:
+ ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ ret = NV10TCL_TX_FORMAT_WRAP_S_REPEAT;
+ break;
+ }
+
+ return ret >> NV10TCL_TX_FORMAT_WRAP_S_SHIFT;
+}
+
+static void *
+nv10_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nv10_sampler_state *ps;
+ uint32_t filter = 0;
+
+ ps = MALLOC(sizeof(struct nv10_sampler_state));
+
+ ps->wrap = ((wrap_mode(cso->wrap_s) << NV10TCL_TX_FORMAT_WRAP_S_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV10TCL_TX_FORMAT_WRAP_T_SHIFT));
+
+ ps->en = 0;
+ if (cso->max_anisotropy > 1.0) {
+ /* no idea, binary driver sets it, works without it.. meh.. */
+ ps->wrap |= (1 << 5);
+
+/* if (cso->max_anisotropy >= 16.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_16X;
+ } else
+ if (cso->max_anisotropy >= 12.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_12X;
+ } else
+ if (cso->max_anisotropy >= 10.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_10X;
+ } else
+ if (cso->max_anisotropy >= 8.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_8X;
+ } else
+ if (cso->max_anisotropy >= 6.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_6X;
+ } else
+ if (cso->max_anisotropy >= 4.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_4X;
+ } else {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_2X;
+ }*/
+ }
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ filter |= NV10TCL_TX_FILTER_MAGNIFY_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ filter |= NV10TCL_TX_FILTER_MAGNIFY_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR;
+ break;
+ }
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST;
+ break;
+ }
+ break;
+ }
+
+ ps->filt = filter;
+
+/* if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ switch (cso->compare_func) {
+ case PIPE_FUNC_NEVER:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NEVER;
+ break;
+ case PIPE_FUNC_GREATER:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GREATER;
+ break;
+ case PIPE_FUNC_EQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_EQUAL;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GEQUAL;
+ break;
+ case PIPE_FUNC_LESS:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LESS;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NOTEQUAL;
+ break;
+ case PIPE_FUNC_LEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LEQUAL;
+ break;
+ case PIPE_FUNC_ALWAYS:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_ALWAYS;
+ break;
+ default:
+ break;
+ }
+ }*/
+
+ ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) |
+ (float_to_ubyte(cso->border_color[0]) << 16) |
+ (float_to_ubyte(cso->border_color[1]) << 8) |
+ (float_to_ubyte(cso->border_color[2]) << 0));
+
+ return (void *)ps;
+}
+
+static void
+nv10_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv10->tex_sampler[unit] = sampler[unit];
+ nv10->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void
+nv10_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nv10_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **miptree)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv10->tex_miptree[unit] = (struct nv10_miptree *)miptree[unit];
+ nv10->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void *
+nv10_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv10_rasterizer_state *rs;
+ int i;
+
+ /*XXX: ignored:
+ * light_twoside
+ * offset_cw/ccw -nohw
+ * scissor
+ * point_smooth -nohw
+ * multisample
+ * offset_units / offset_scale
+ */
+ rs = MALLOC(sizeof(struct nv10_rasterizer_state));
+
+ rs->templ = cso;
+
+ rs->shade_model = cso->flatshade ? 0x1d00 : 0x1d01;
+
+ rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff;
+ rs->line_smooth_en = cso->line_smooth ? 1 : 0;
+
+ rs->point_size = *(uint32_t*)&cso->point_size;
+
+ rs->poly_smooth_en = cso->poly_smooth ? 1 : 0;
+
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ rs->front_face = NV10TCL_FRONT_FACE_CCW;
+ rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw);
+ rs->poly_mode_back = nvgl_polygon_mode(cso->fill_cw);
+ } else {
+ rs->front_face = NV10TCL_FRONT_FACE_CW;
+ rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw);
+ rs->poly_mode_back = nvgl_polygon_mode(cso->fill_ccw);
+ }
+
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ rs->cull_face_en = 1;
+ if (cso->front_winding == PIPE_WINDING_CCW)
+ rs->cull_face = NV10TCL_CULL_FACE_FRONT;
+ else
+ rs->cull_face = NV10TCL_CULL_FACE_BACK;
+ break;
+ case PIPE_WINDING_CW:
+ rs->cull_face_en = 1;
+ if (cso->front_winding == PIPE_WINDING_CW)
+ rs->cull_face = NV10TCL_CULL_FACE_FRONT;
+ else
+ rs->cull_face = NV10TCL_CULL_FACE_BACK;
+ break;
+ case PIPE_WINDING_BOTH:
+ rs->cull_face_en = 1;
+ rs->cull_face = NV10TCL_CULL_FACE_FRONT_AND_BACK;
+ break;
+ case PIPE_WINDING_NONE:
+ default:
+ rs->cull_face_en = 0;
+ rs->cull_face = 0;
+ break;
+ }
+
+ if (cso->point_sprite) {
+ rs->point_sprite = (1 << 0);
+ for (i = 0; i < 8; i++) {
+ if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE)
+ rs->point_sprite |= (1 << (8 + i));
+ }
+ } else {
+ rs->point_sprite = 0;
+ }
+
+ return (void *)rs;
+}
+
+static void
+nv10_rasterizer_state_bind(struct pipe_context *pipe, void *rast)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->rast = (struct nv10_rasterizer_state*)rast;
+
+ draw_set_rasterizer_state(nv10->draw, (nv10->rast ? nv10->rast->templ : NULL));
+
+ nv10->dirty |= NV10_NEW_RAST;
+}
+
+static void
+nv10_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nv10_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv10_depth_stencil_alpha_state *hw;
+
+ hw = MALLOC(sizeof(struct nv10_depth_stencil_alpha_state));
+
+ hw->depth.func = nvgl_comparison_op(cso->depth.func);
+ hw->depth.write_enable = cso->depth.writemask ? 1 : 0;
+ hw->depth.test_enable = cso->depth.enabled ? 1 : 0;
+
+ hw->stencil.enable = cso->stencil[0].enabled ? 1 : 0;
+ hw->stencil.wmask = cso->stencil[0].writemask;
+ hw->stencil.func = nvgl_comparison_op(cso->stencil[0].func);
+ hw->stencil.ref = cso->stencil[0].ref_value;
+ hw->stencil.vmask = cso->stencil[0].valuemask;
+ hw->stencil.fail = nvgl_stencil_op(cso->stencil[0].fail_op);
+ hw->stencil.zfail = nvgl_stencil_op(cso->stencil[0].zfail_op);
+ hw->stencil.zpass = nvgl_stencil_op(cso->stencil[0].zpass_op);
+
+ hw->alpha.enabled = cso->alpha.enabled ? 1 : 0;
+ hw->alpha.func = nvgl_comparison_op(cso->alpha.func);
+ hw->alpha.ref = float_to_ubyte(cso->alpha.ref_value);
+
+ return (void *)hw;
+}
+
+static void
+nv10_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *dsa)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->dsa = (struct nv10_depth_stencil_alpha_state*)dsa;
+
+ nv10->dirty |= NV10_NEW_DSA;
+}
+
+static void
+nv10_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nv10_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ return draw_create_vertex_shader(nv10->draw, templ);
+}
+
+static void
+nv10_vp_state_bind(struct pipe_context *pipe, void *shader)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ draw_bind_vertex_shader(nv10->draw, (struct draw_vertex_shader *) shader);
+
+ nv10->dirty |= NV10_NEW_VERTPROG;
+}
+
+static void
+nv10_vp_state_delete(struct pipe_context *pipe, void *shader)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ draw_delete_vertex_shader(nv10->draw, (struct draw_vertex_shader *) shader);
+}
+
+static void *
+nv10_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv10_fragment_program *fp;
+
+ fp = CALLOC(1, sizeof(struct nv10_fragment_program));
+ fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ tgsi_scan_shader(cso->tokens, &fp->info);
+
+ return (void *)fp;
+}
+
+static void
+nv10_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ struct nv10_fragment_program *fp = hwcso;
+
+ nv10->fragprog.current = fp;
+ nv10->dirty |= NV10_NEW_FRAGPROG;
+}
+
+static void
+nv10_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ struct nv10_fragment_program *fp = hwcso;
+
+ nv10_fragprog_destroy(nv10, fp);
+ FREE((void*)fp->pipe.tokens);
+ FREE(fp);
+}
+
+static void
+nv10_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->blend_color = (struct pipe_blend_color*)bcol;
+
+ nv10->dirty |= NV10_NEW_BLENDCOL;
+}
+
+static void
+nv10_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ draw_set_clip_state(nv10->draw, clip);
+}
+
+static void
+nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(index == 0);
+
+ if (buf) {
+ void *mapped;
+ if (buf->buffer && buf->buffer->size &&
+ (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)))
+ {
+ memcpy(nv10->constbuf[shader], mapped, buf->buffer->size);
+ nv10->constbuf_nr[shader] =
+ buf->buffer->size / (4 * sizeof(float));
+ ws->buffer_unmap(ws, buf->buffer);
+ }
+ }
+}
+
+static void
+nv10_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->framebuffer = (struct pipe_framebuffer_state*)fb;
+
+ nv10->dirty |= NV10_NEW_FRAMEBUFFER;
+}
+
+static void
+nv10_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ NOUVEAU_ERR("line stipple hahaha\n");
+}
+
+static void
+nv10_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->scissor = (struct pipe_scissor_state*)s;
+
+ nv10->dirty |= NV10_NEW_SCISSOR;
+}
+
+static void
+nv10_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->viewport = (struct pipe_viewport_state*)vpt;
+
+ draw_set_viewport_state(nv10->draw, nv10->viewport);
+
+ nv10->dirty |= NV10_NEW_VIEWPORT;
+}
+
+static void
+nv10_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ memcpy(nv10->vtxbuf, vb, sizeof(*vb) * count);
+ nv10->dirty |= NV10_NEW_VTXARRAYS;
+
+ draw_set_vertex_buffers(nv10->draw, count, vb);
+}
+
+static void
+nv10_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *ve)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ memcpy(nv10->vtxelt, ve, sizeof(*ve) * count);
+ nv10->dirty |= NV10_NEW_VTXARRAYS;
+
+ draw_set_vertex_elements(nv10->draw, count, ve);
+}
+
+void
+nv10_init_state_functions(struct nv10_context *nv10)
+{
+ nv10->pipe.create_blend_state = nv10_blend_state_create;
+ nv10->pipe.bind_blend_state = nv10_blend_state_bind;
+ nv10->pipe.delete_blend_state = nv10_blend_state_delete;
+
+ nv10->pipe.create_sampler_state = nv10_sampler_state_create;
+ nv10->pipe.bind_sampler_states = nv10_sampler_state_bind;
+ nv10->pipe.delete_sampler_state = nv10_sampler_state_delete;
+ nv10->pipe.set_sampler_textures = nv10_set_sampler_texture;
+
+ nv10->pipe.create_rasterizer_state = nv10_rasterizer_state_create;
+ nv10->pipe.bind_rasterizer_state = nv10_rasterizer_state_bind;
+ nv10->pipe.delete_rasterizer_state = nv10_rasterizer_state_delete;
+
+ nv10->pipe.create_depth_stencil_alpha_state =
+ nv10_depth_stencil_alpha_state_create;
+ nv10->pipe.bind_depth_stencil_alpha_state =
+ nv10_depth_stencil_alpha_state_bind;
+ nv10->pipe.delete_depth_stencil_alpha_state =
+ nv10_depth_stencil_alpha_state_delete;
+
+ nv10->pipe.create_vs_state = nv10_vp_state_create;
+ nv10->pipe.bind_vs_state = nv10_vp_state_bind;
+ nv10->pipe.delete_vs_state = nv10_vp_state_delete;
+
+ nv10->pipe.create_fs_state = nv10_fp_state_create;
+ nv10->pipe.bind_fs_state = nv10_fp_state_bind;
+ nv10->pipe.delete_fs_state = nv10_fp_state_delete;
+
+ nv10->pipe.set_blend_color = nv10_set_blend_color;
+ nv10->pipe.set_clip_state = nv10_set_clip_state;
+ nv10->pipe.set_constant_buffer = nv10_set_constant_buffer;
+ nv10->pipe.set_framebuffer_state = nv10_set_framebuffer_state;
+ nv10->pipe.set_polygon_stipple = nv10_set_polygon_stipple;
+ nv10->pipe.set_scissor_state = nv10_set_scissor_state;
+ nv10->pipe.set_viewport_state = nv10_set_viewport_state;
+
+ nv10->pipe.set_vertex_buffers = nv10_set_vertex_buffers;
+ nv10->pipe.set_vertex_elements = nv10_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_state.h b/src/gallium/drivers/nv10/nv10_state.h
new file mode 100644
index 0000000000..3a3fd0d4f4
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_state.h
@@ -0,0 +1,139 @@
+#ifndef __NV10_STATE_H__
+#define __NV10_STATE_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv10_blend_state {
+ uint32_t b_enable;
+ uint32_t b_srcfunc;
+ uint32_t b_dstfunc;
+
+ uint32_t c_mask;
+
+ uint32_t d_enable;
+};
+
+struct nv10_sampler_state {
+ uint32_t wrap;
+ uint32_t en;
+ uint32_t filt;
+ uint32_t bcol;
+};
+
+struct nv10_rasterizer_state {
+ uint32_t shade_model;
+
+ uint32_t line_width;
+ uint32_t line_smooth_en;
+
+ uint32_t point_size;
+
+ uint32_t poly_smooth_en;
+
+ uint32_t poly_mode_front;
+ uint32_t poly_mode_back;
+
+ uint32_t front_face;
+ uint32_t cull_face;
+ uint32_t cull_face_en;
+
+ uint32_t point_sprite;
+
+ const struct pipe_rasterizer_state *templ;
+};
+
+struct nv10_vertex_program_exec {
+ uint32_t data[4];
+ boolean has_branch_offset;
+ int const_index;
+};
+
+struct nv10_vertex_program_data {
+ int index; /* immediates == -1 */
+ float value[4];
+};
+
+struct nv10_vertex_program {
+ const struct pipe_shader_state *pipe;
+
+ boolean translated;
+ struct nv10_vertex_program_exec *insns;
+ unsigned nr_insns;
+ struct nv10_vertex_program_data *consts;
+ unsigned nr_consts;
+
+ struct nouveau_resource *exec;
+ unsigned exec_start;
+ struct nouveau_resource *data;
+ unsigned data_start;
+ unsigned data_start_min;
+
+ uint32_t ir;
+ uint32_t or;
+};
+
+struct nv10_fragment_program_data {
+ unsigned offset;
+ unsigned index;
+};
+
+struct nv10_fragment_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+
+ boolean translated;
+ boolean on_hw;
+ unsigned samplers;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct nv10_fragment_program_data *consts;
+ unsigned nr_consts;
+
+ struct pipe_buffer *buffer;
+
+ uint32_t fp_control;
+ uint32_t fp_reg_control;
+};
+
+
+struct nv10_depth_stencil_alpha_state {
+ struct {
+ uint32_t func;
+ uint32_t write_enable;
+ uint32_t test_enable;
+ } depth;
+
+ struct {
+ uint32_t enable;
+ uint32_t wmask;
+ uint32_t func;
+ uint32_t ref;
+ uint32_t vmask;
+ uint32_t fail;
+ uint32_t zfail;
+ uint32_t zpass;
+ } stencil;
+
+ struct {
+ uint32_t enabled;
+ uint32_t func;
+ uint32_t ref;
+ } alpha;
+};
+
+struct nv10_miptree {
+ struct pipe_texture base;
+
+ struct pipe_buffer *buffer;
+ uint total_size;
+
+ struct {
+ uint pitch;
+ uint *image_offset;
+ } level[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+#endif
diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c
new file mode 100644
index 0000000000..5dec618b93
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_state_emit.c
@@ -0,0 +1,306 @@
+#include "nv10_context.h"
+#include "nv10_state.h"
+
+static void nv10_state_emit_blend(struct nv10_context* nv10)
+{
+ struct nv10_blend_state *b = nv10->blend;
+
+ BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 1);
+ OUT_RING (b->d_enable);
+
+ BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 3);
+ OUT_RING (b->b_enable);
+ OUT_RING (b->b_srcfunc);
+ OUT_RING (b->b_dstfunc);
+
+ BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1);
+ OUT_RING (b->c_mask);
+}
+
+static void nv10_state_emit_blend_color(struct nv10_context* nv10)
+{
+ struct pipe_blend_color *c = nv10->blend_color;
+
+ BEGIN_RING(celsius, NV10TCL_BLEND_COLOR, 1);
+ OUT_RING ((float_to_ubyte(c->color[3]) << 24)|
+ (float_to_ubyte(c->color[0]) << 16)|
+ (float_to_ubyte(c->color[1]) << 8) |
+ (float_to_ubyte(c->color[2]) << 0));
+}
+
+static void nv10_state_emit_rast(struct nv10_context* nv10)
+{
+ struct nv10_rasterizer_state *r = nv10->rast;
+
+ BEGIN_RING(celsius, NV10TCL_SHADE_MODEL, 2);
+ OUT_RING (r->shade_model);
+ OUT_RING (r->line_width);
+
+
+ BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1);
+ OUT_RING (r->point_size);
+
+ BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (r->poly_mode_front);
+ OUT_RING (r->poly_mode_back);
+
+
+ BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2);
+ OUT_RING (r->cull_face);
+ OUT_RING (r->front_face);
+
+ BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2);
+ OUT_RING (r->line_smooth_en);
+ OUT_RING (r->poly_smooth_en);
+
+ BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (r->cull_face_en);
+}
+
+static void nv10_state_emit_dsa(struct nv10_context* nv10)
+{
+ struct nv10_depth_stencil_alpha_state *d = nv10->dsa;
+
+ BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1);
+ OUT_RING (d->depth.func);
+
+ BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (d->depth.write_enable);
+
+ BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (d->depth.test_enable);
+
+#if 0
+ BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1);
+ OUT_RING (d->stencil.enable);
+ BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7);
+ OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7);
+#endif
+
+ BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (d->alpha.enabled);
+
+ BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 1);
+ OUT_RING (d->alpha.func);
+
+ BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_REF, 1);
+ OUT_RING (d->alpha.ref);
+}
+
+static void nv10_state_emit_viewport(struct nv10_context* nv10)
+{
+}
+
+static void nv10_state_emit_scissor(struct nv10_context* nv10)
+{
+ // XXX this is so not working
+/* struct pipe_scissor_state *s = nv10->scissor;
+ BEGIN_RING(celsius, NV10TCL_SCISSOR_HORIZ, 2);
+ OUT_RING (((s->maxx - s->minx) << 16) | s->minx);
+ OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/
+}
+
+static void nv10_state_emit_framebuffer(struct nv10_context* nv10)
+{
+ struct pipe_framebuffer_state* fb = nv10->framebuffer;
+ struct pipe_surface *rt, *zeta = NULL;
+ uint32_t rt_format, w, h;
+ int colour_format = 0, zeta_format = 0;
+ struct nv10_miptree *nv10mt = 0;
+
+ w = fb->cbufs[0]->width;
+ h = fb->cbufs[0]->height;
+ colour_format = fb->cbufs[0]->format;
+ rt = fb->cbufs[0];
+
+ if (fb->zsbuf) {
+ if (colour_format) {
+ assert(w == fb->zsbuf->width);
+ assert(h == fb->zsbuf->height);
+ } else {
+ w = fb->zsbuf->width;
+ h = fb->zsbuf->height;
+ }
+
+ zeta_format = fb->zsbuf->format;
+ zeta = fb->zsbuf;
+ }
+
+ rt_format = NV10TCL_RT_FORMAT_TYPE_LINEAR;
+
+ switch (colour_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case 0:
+ rt_format |= NV10TCL_RT_FORMAT_COLOR_A8R8G8B8;
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ rt_format |= NV10TCL_RT_FORMAT_COLOR_R5G6B5;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (zeta) {
+ BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1);
+ OUT_RING (rt->stride | (zeta->stride << 16));
+ } else {
+ BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1);
+ OUT_RING (rt->stride | (rt->stride << 16));
+ }
+
+ nv10mt = (struct nv10_miptree *)rt->texture;
+ nv10->rt[0] = nv10mt->buffer;
+
+ if (zeta_format)
+ {
+ nv10mt = (struct nv10_miptree *)zeta->texture;
+ nv10->zeta = nv10mt->buffer;
+ }
+
+ BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3);
+ OUT_RING ((w << 16) | 0);
+ OUT_RING ((h << 16) | 0);
+ OUT_RING (rt_format);
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+ OUT_RING (((w - 1) << 16) | 0 | 0x08000800);
+ OUT_RING (((h - 1) << 16) | 0 | 0x08000800);
+}
+
+static void nv10_vertex_layout(struct nv10_context *nv10)
+{
+ struct nv10_fragment_program *fp = nv10->fragprog.current;
+ uint32_t src = 0;
+ int i;
+ struct vertex_info vinfo;
+
+ memset(&vinfo, 0, sizeof(vinfo));
+
+ for (i = 0; i < fp->info.num_inputs; i++) {
+ switch (fp->info.input_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++);
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++);
+ break;
+ default:
+ case TGSI_SEMANTIC_GENERIC:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++);
+ break;
+ case TGSI_SEMANTIC_FOG:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++);
+ break;
+ }
+ }
+ draw_compute_vertex_size(&vinfo);
+}
+
+void
+nv10_emit_hw_state(struct nv10_context *nv10)
+{
+ int i;
+
+ if (nv10->dirty & NV10_NEW_VERTPROG) {
+ //nv10_vertprog_bind(nv10, nv10->vertprog.current);
+ nv10->dirty &= ~NV10_NEW_VERTPROG;
+ }
+
+ if (nv10->dirty & NV10_NEW_FRAGPROG) {
+ nv10_fragprog_bind(nv10, nv10->fragprog.current);
+ /*XXX: clear NV10_NEW_FRAGPROG if no new program uploaded */
+ nv10->dirty_samplers |= (1<<10);
+ nv10->dirty_samplers = 0;
+ }
+
+ if (nv10->dirty_samplers || (nv10->dirty & NV10_NEW_FRAGPROG)) {
+ nv10_fragtex_bind(nv10);
+ nv10->dirty &= ~NV10_NEW_FRAGPROG;
+ }
+
+ if (nv10->dirty & NV10_NEW_VTXARRAYS) {
+ nv10->dirty &= ~NV10_NEW_VTXARRAYS;
+ nv10_vertex_layout(nv10);
+ nv10_vtxbuf_bind(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_BLEND) {
+ nv10->dirty &= ~NV10_NEW_BLEND;
+ nv10_state_emit_blend(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_BLENDCOL) {
+ nv10->dirty &= ~NV10_NEW_BLENDCOL;
+ nv10_state_emit_blend_color(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_RAST) {
+ nv10->dirty &= ~NV10_NEW_RAST;
+ nv10_state_emit_rast(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_DSA) {
+ nv10->dirty &= ~NV10_NEW_DSA;
+ nv10_state_emit_dsa(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_VIEWPORT) {
+ nv10->dirty &= ~NV10_NEW_VIEWPORT;
+ nv10_state_emit_viewport(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_SCISSOR) {
+ nv10->dirty &= ~NV10_NEW_SCISSOR;
+ nv10_state_emit_scissor(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_FRAMEBUFFER) {
+ nv10->dirty &= ~NV10_NEW_FRAMEBUFFER;
+ nv10_state_emit_framebuffer(nv10);
+ }
+
+ /* Emit relocs for every referenced buffer.
+ * This is to ensure the bufmgr has an accurate idea of how
+ * the buffer is used. This isn't very efficient, but we don't
+ * seem to take a significant performance hit. Will be improved
+ * at some point. Vertex arrays are emitted by nv10_vbo.c
+ */
+
+ /* Render target */
+// XXX figre out who's who for NV10TCL_DMA_* and fill accordingly
+// BEGIN_RING(celsius, NV10TCL_DMA_COLOR0, 1);
+// OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ if (nv10->zeta) {
+// XXX
+// BEGIN_RING(celsius, NV10TCL_DMA_ZETA, 1);
+// OUT_RELOCo(nv10->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(celsius, NV10TCL_ZETA_OFFSET, 1);
+ OUT_RELOCl(nv10->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ /* XXX for when we allocate LMA on nv17 */
+/* BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
+ OUT_RELOCl(nv10->zeta + lma_offset);*/
+ }
+
+ /* Vertex buffer */
+ BEGIN_RING(celsius, NV10TCL_DMA_VTXBUF0, 1);
+ OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ /* Texture images */
+ for (i = 0; i < 2; i++) {
+ if (!(nv10->fp_samplers & (1 << i)))
+ continue;
+ BEGIN_RING(celsius, NV10TCL_TX_OFFSET(i), 1);
+ OUT_RELOCl(nv10->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ BEGIN_RING(celsius, NV10TCL_TX_FORMAT(i), 1);
+ OUT_RELOCd(nv10->tex[i].buffer, nv10->tex[i].format,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
+ NOUVEAU_BO_OR, NV10TCL_TX_FORMAT_DMA0,
+ NV10TCL_TX_FORMAT_DMA1);
+ }
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_surface.c b/src/gallium/drivers/nv10/nv10_surface.c
new file mode 100644
index 0000000000..2538151063
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_surface.c
@@ -0,0 +1,72 @@
+
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nv10_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/internal/p_winsys_screen.h"
+#include "pipe/p_inlines.h"
+#include "util/u_tile.h"
+
+static void
+nv10_surface_copy(struct pipe_context *pipe, boolean do_flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ struct nv04_surface_2d *eng2d = nv10->screen->eng2d;
+
+ if (do_flip) {
+ desty += height;
+ while (height--) {
+ eng2d->copy(eng2d, dest, destx, desty--, src,
+ srcx, srcy++, width, 1);
+ }
+ return;
+ }
+
+ eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height);
+}
+
+static void
+nv10_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ struct nv04_surface_2d *eng2d = nv10->screen->eng2d;
+
+ eng2d->fill(eng2d, dest, destx, desty, width, height, value);
+}
+
+void
+nv10_init_surface_functions(struct nv10_context *nv10)
+{
+ nv10->pipe.surface_copy = nv10_surface_copy;
+ nv10->pipe.surface_fill = nv10_surface_fill;
+}
diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c
new file mode 100644
index 0000000000..d0e788ac03
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_vbo.c
@@ -0,0 +1,77 @@
+#include "draw/draw_context.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv10_context.h"
+#include "nv10_state.h"
+
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_pushbuf.h"
+
+boolean nv10_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count)
+{
+ struct nv10_context *nv10 = nv10_context( pipe );
+ struct draw_context *draw = nv10->draw;
+ unsigned i;
+
+ nv10_emit_hw_state(nv10);
+
+ /*
+ * Map vertex buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv10->vtxbuf[i].buffer) {
+ void *buf
+ = pipe->winsys->buffer_map(pipe->winsys,
+ nv10->vtxbuf[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(draw, i, buf);
+ }
+ }
+ /* Map index buffer, if present */
+ if (indexBuffer) {
+ void *mapped_indexes
+ = pipe->winsys->buffer_map(pipe->winsys, indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes);
+ }
+ else {
+ /* no index/element buffer */
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ draw_set_mapped_constant_buffer(draw,
+ nv10->constbuf[PIPE_SHADER_VERTEX],
+ nv10->constbuf_nr[PIPE_SHADER_VERTEX]);
+
+ /* draw! */
+ draw_arrays(nv10->draw, prim, start, count);
+
+ /*
+ * unmap vertex/index buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv10->vtxbuf[i].buffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, nv10->vtxbuf[i].buffer);
+ draw_set_mapped_vertex_buffer(draw, i, NULL);
+ }
+ }
+ if (indexBuffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer);
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ return TRUE;
+}
+
+boolean nv10_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
+{
+ return nv10_draw_elements(pipe, NULL, 0, prim, start, count);
+}
+
+
+
diff --git a/src/gallium/drivers/nv20/Makefile b/src/gallium/drivers/nv20/Makefile
new file mode 100644
index 0000000000..93e34f8e92
--- /dev/null
+++ b/src/gallium/drivers/nv20/Makefile
@@ -0,0 +1,20 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv20
+
+C_SOURCES = \
+ nv20_clear.c \
+ nv20_context.c \
+ nv20_fragprog.c \
+ nv20_fragtex.c \
+ nv20_miptree.c \
+ nv20_prim_vbuf.c \
+ nv20_screen.c \
+ nv20_state.c \
+ nv20_state_emit.c \
+ nv20_surface.c \
+ nv20_vbo.c
+# nv20_vertprog.c
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/nv20/nv20_clear.c b/src/gallium/drivers/nv20/nv20_clear.c
new file mode 100644
index 0000000000..29f4afd87c
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_clear.c
@@ -0,0 +1,13 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv20_context.h"
+
+void
+nv20_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+ ps->status = PIPE_SURFACE_STATUS_CLEAR;
+}
diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c
new file mode 100644
index 0000000000..1659aec8fa
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_context.c
@@ -0,0 +1,419 @@
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/internal/p_winsys_screen.h"
+
+#include "nv20_context.h"
+#include "nv20_screen.h"
+
+static void
+nv20_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ draw_flush(nv20->draw);
+
+ FIRE_RING(fence);
+}
+
+static void
+nv20_destroy(struct pipe_context *pipe)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ if (nv20->draw)
+ draw_destroy(nv20->draw);
+
+ FREE(nv20);
+}
+
+static void nv20_init_hwctx(struct nv20_context *nv20)
+{
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_winsys *nvws = screen->nvws;
+ int i;
+ float projectionmatrix[16];
+ const boolean is_nv25tcl = (nv20->screen->kelvin->grclass == NV25TCL);
+
+ BEGIN_RING(kelvin, NV20TCL_DMA_NOTIFY, 1);
+ OUT_RING (screen->sync->handle);
+ BEGIN_RING(kelvin, NV20TCL_DMA_TEXTURE0, 2);
+ OUT_RING (nvws->channel->vram->handle);
+ OUT_RING (nvws->channel->gart->handle); /* TEXTURE1 */
+ BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 2);
+ OUT_RING (nvws->channel->vram->handle);
+ OUT_RING (nvws->channel->vram->handle); /* ZETA */
+
+ BEGIN_RING(kelvin, NV20TCL_DMA_QUERY, 1);
+ OUT_RING (0); /* renouveau: beef0351, unique */
+
+ BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1);
+ OUT_RING ((0xfff << 16) | 0x0);
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1);
+ OUT_RING ((0xfff << 16) | 0x0);
+
+ for (i = 1; i < NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE; i++) {
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1);
+ OUT_RING (0);
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(kelvin, 0x17e0, 3);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (1.0);
+
+ if (is_nv25tcl) {
+ BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1);
+ OUT_RING (NV20TCL_TX_RCOMP_LEQUAL | 0xdb0);
+ } else {
+ BEGIN_RING(kelvin, 0x1e68, 1);
+ OUT_RING (0x4b800000); /* 16777216.000000 */
+ BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1);
+ OUT_RING (NV20TCL_TX_RCOMP_LEQUAL);
+ }
+
+ BEGIN_RING(kelvin, 0x290, 1);
+ OUT_RING ((0x10 << 16) | 1);
+ BEGIN_RING(kelvin, 0x9fc, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, 0x1d80, 1);
+ OUT_RING (1);
+ BEGIN_RING(kelvin, 0x9f8, 1);
+ OUT_RING (4);
+ BEGIN_RING(kelvin, 0x17ec, 3);
+ OUT_RINGf (0.0);
+ OUT_RINGf (1.0);
+ OUT_RINGf (0.0);
+
+ if (is_nv25tcl) {
+ BEGIN_RING(kelvin, 0x1d88, 1);
+ OUT_RING (3);
+
+ BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY9, 1);
+ OUT_RING (nvws->channel->vram->handle);
+ BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY8, 1);
+ OUT_RING (nvws->channel->vram->handle);
+ }
+ BEGIN_RING(kelvin, NV20TCL_DMA_FENCE, 1);
+ OUT_RING (0); /* renouveau: beef1e10 */
+
+ BEGIN_RING(kelvin, 0x1e98, 1);
+ OUT_RING (0);
+#if 0
+ if (is_nv25tcl) {
+ BEGIN_RING(NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2);
+ OUT_RING (NvDmaTT); /* renouveau: beef0202 */
+ OUT_RING (NvDmaFB); /* renouveau: beef0201 */
+
+ BEGIN_RING(NvSub3D, NV20TCL_DMA_TEXTURE1, 1);
+ OUT_RING (NvDmaTT); /* renouveau: beef0202 */
+ }
+#endif
+ BEGIN_RING(kelvin, NV20TCL_NOTIFY, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(kelvin, 0x120, 3);
+ OUT_RING (0);
+ OUT_RING (1);
+ OUT_RING (2);
+
+/* error: ILLEGAL_MTHD, PROTECTION_FAULT
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4);
+ OUT_RINGf (0.0);
+ OUT_RINGf (512.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+*/
+
+ if (is_nv25tcl) {
+ BEGIN_RING(kelvin, 0x022c, 2);
+ OUT_RING (0x280);
+ OUT_RING (0x07d28000);
+ }
+
+/* * illegal method, protection fault
+ BEGIN_RING(NvSub3D, 0x1c2c, 1);
+ OUT_RING (0); */
+
+ if (is_nv25tcl) {
+ BEGIN_RING(kelvin, 0x1da4, 1);
+ OUT_RING (0);
+ }
+
+/* * crashes with illegal method, protection fault
+ BEGIN_RING(NvSub3D, 0x1c18, 1);
+ OUT_RING (0x200); */
+
+ BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2);
+ OUT_RING ((0 << 16) | 0);
+ OUT_RING ((0 << 16) | 0);
+
+ /* *** Set state *** */
+
+ BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2);
+ OUT_RING (NV20TCL_ALPHA_FUNC_FUNC_ALWAYS);
+ OUT_RING (0); /* NV20TCL_ALPHA_FUNC_REF */
+
+ for (i = 0; i < NV20TCL_TX_ENABLE__SIZE; ++i) {
+ BEGIN_RING(kelvin, NV20TCL_TX_ENABLE(i), 1);
+ OUT_RING (0);
+ }
+ BEGIN_RING(kelvin, NV20TCL_TX_SHADER_OP, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_IN_ALPHA(0), 4);
+ OUT_RING (0x30d410d0);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_OUT_RGB(0), 4);
+ OUT_RING (0x00000c00);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_ENABLE, 1);
+ OUT_RING (0x00011101);
+ BEGIN_RING(kelvin, NV20TCL_RC_FINAL0, 2);
+ OUT_RING (0x130e0300);
+ OUT_RING (0x0c091c80);
+ BEGIN_RING(kelvin, NV20TCL_RC_OUT_ALPHA(0), 4);
+ OUT_RING (0x00000c00);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_IN_RGB(0), 4);
+ OUT_RING (0x20c400c0);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_COLOR0, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4);
+ OUT_RING (0x035125a0);
+ OUT_RING (0);
+ OUT_RING (0x40002000);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1);
+ OUT_RING (0xffff0000);
+
+ BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 4);
+ OUT_RING (NV20TCL_BLEND_FUNC_SRC_ONE);
+ OUT_RING (NV20TCL_BLEND_FUNC_DST_ZERO);
+ OUT_RING (0); /* NV20TCL_BLEND_COLOR */
+ OUT_RING (NV20TCL_BLEND_EQUATION_FUNC_ADD);
+ BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7);
+ OUT_RING (0xff);
+ OUT_RING (NV20TCL_STENCIL_FUNC_FUNC_ALWAYS);
+ OUT_RING (0); /* NV20TCL_STENCIL_FUNC_REF */
+ OUT_RING (0xff); /* NV20TCL_STENCIL_FUNC_MASK */
+ OUT_RING (NV20TCL_STENCIL_OP_FAIL_KEEP);
+ OUT_RING (NV20TCL_STENCIL_OP_ZFAIL_KEEP);
+ OUT_RING (NV20TCL_STENCIL_OP_ZPASS_KEEP);
+
+ BEGIN_RING(kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (NV20TCL_COLOR_LOGIC_OP_OP_COPY);
+ BEGIN_RING(kelvin, 0x17cc, 1);
+ OUT_RING (0);
+ if (is_nv25tcl) {
+ BEGIN_RING(kelvin, 0x1d84, 1);
+ OUT_RING (1);
+ }
+ BEGIN_RING(kelvin, NV20TCL_LIGHTING_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_LIGHT_CONTROL, 1);
+ OUT_RING (0x00020000);
+ BEGIN_RING(kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_ENABLED_LIGHTS, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_NORMALIZE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0),
+ NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE);
+ for (i = 0; i < NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE; ++i) {
+ OUT_RING(0xffffffff);
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ OUT_RING (0);
+ OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */
+ OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1);
+ OUT_RING (NV20TCL_DEPTH_FUNC_LESS);
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1);
+ OUT_RING (1);
+ if (!is_nv25tcl) {
+ BEGIN_RING(kelvin, 0x1d84, 1);
+ OUT_RING (3);
+ }
+ BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1);
+ if (!is_nv25tcl) {
+ OUT_RING (8);
+ } else {
+ OUT_RINGf (1.0);
+ }
+ if (!is_nv25tcl) {
+ BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (0); /* NV20TCL.POINT_SMOOTH_ENABLE */
+ } else {
+ BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, 0x0a1c, 1);
+ OUT_RING (0x800);
+ }
+ BEGIN_RING(kelvin, NV20TCL_LINE_WIDTH, 1);
+ OUT_RING (8);
+ BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (NV20TCL_POLYGON_MODE_FRONT_FILL);
+ OUT_RING (NV20TCL_POLYGON_MODE_BACK_FILL);
+ BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2);
+ OUT_RING (NV20TCL_CULL_FACE_BACK);
+ OUT_RING (NV20TCL_FRONT_FACE_CCW);
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 1);
+ OUT_RING (NV20TCL_SHADE_MODEL_SMOOTH);
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE);
+ for (i=0; i < 4 * NV20TCL_TX_GEN_S__SIZE; ++i) {
+ OUT_RING(0);
+ }
+ BEGIN_RING(kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3);
+ OUT_RINGf (1.5);
+ OUT_RINGf (-0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */
+ OUT_RINGf (0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */
+ BEGIN_RING(kelvin, NV20TCL_FOG_MODE, 2);
+ OUT_RING (NV20TCL_FOG_MODE_EXP_2);
+ OUT_RING (NV20TCL_FOG_COORD_DIST_COORD_FOG);
+ BEGIN_RING(kelvin, NV20TCL_FOG_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (0); /* NV20TCL.FOG_COLOR */
+ BEGIN_RING(kelvin, NV20TCL_ENGINE, 1);
+ OUT_RING (NV20TCL_ENGINE_FIXED);
+
+ for (i = 0; i < NV20TCL_TX_MATRIX_ENABLE__SIZE; ++i) {
+ BEGIN_RING(kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1);
+ OUT_RING (0);
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15);
+ OUT_RINGf(1.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0);
+ OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); OUT_RINGf(1.0);
+ OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0);
+ for (i = 4; i < 16; ++i) {
+ OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0);
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_EDGEFLAG_ENABLE, 1);
+ OUT_RING (1);
+ BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1);
+ OUT_RING (0x00010101);
+ BEGIN_RING(kelvin, NV20TCL_CLEAR_VALUE, 1);
+ OUT_RING (0);
+
+ memset(projectionmatrix, 0, sizeof(projectionmatrix));
+ projectionmatrix[0*4+0] = 1.0;
+ projectionmatrix[1*4+1] = 1.0;
+ projectionmatrix[2*4+2] = 16777215.0;
+ projectionmatrix[3*4+3] = 1.0;
+ BEGIN_RING(kelvin, NV20TCL_PROJECTION_MATRIX(0), 16);
+ for (i = 0; i < 16; i++) {
+ OUT_RINGf (projectionmatrix[i]);
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2);
+ OUT_RINGf (0.0);
+ OUT_RINGf (16777216.0); /* [0, 1] scaled approx to [0, 2^24] */
+
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE0_X, 4);
+ OUT_RINGf (0.0); /* x-offset, w/2 + 1.031250 */
+ OUT_RINGf (0.0); /* y-offset, h/2 + 0.030762 */
+ OUT_RINGf (0.0);
+ OUT_RINGf (16777215.0);
+
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE1_X, 4);
+ OUT_RINGf (0.0); /* no effect?, w/2 */
+ OUT_RINGf (0.0); /* no effect?, h/2 */
+ OUT_RINGf (16777215.0 * 0.5);
+ OUT_RINGf (65535.0);
+
+ FIRE_RING (NULL);
+}
+
+static void
+nv20_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+}
+
+struct pipe_context *
+nv20_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct nv20_screen *screen = nv20_screen(pscreen);
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv20_context *nv20;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nv20 = CALLOC(1, sizeof(struct nv20_context));
+ if (!nv20)
+ return NULL;
+ nv20->screen = screen;
+ nv20->pctx_id = pctx_id;
+
+ nv20->nvws = nvws;
+
+ nv20->pipe.winsys = ws;
+ nv20->pipe.screen = pscreen;
+ nv20->pipe.destroy = nv20_destroy;
+ nv20->pipe.set_edgeflags = nv20_set_edgeflags;
+ nv20->pipe.draw_arrays = nv20_draw_arrays;
+ nv20->pipe.draw_elements = nv20_draw_elements;
+ nv20->pipe.clear = nv20_clear;
+ nv20->pipe.flush = nv20_flush;
+
+ nv20_init_surface_functions(nv20);
+ nv20_init_state_functions(nv20);
+
+ nv20->draw = draw_create();
+ assert(nv20->draw);
+ draw_set_rasterize_stage(nv20->draw, nv20_draw_vbuf_stage(nv20));
+
+ nv20_init_hwctx(nv20);
+
+ return &nv20->pipe;
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h
new file mode 100644
index 0000000000..8ad926db20
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_context.h
@@ -0,0 +1,153 @@
+#ifndef __NV20_CONTEXT_H__
+#define __NV20_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx) \
+ struct nv20_screen *ctx = nv20->screen
+#include "nouveau/nouveau_push.h"
+
+#include "nv20_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+#define NV20_NEW_VERTPROG (1 << 0)
+#define NV20_NEW_FRAGPROG (1 << 1)
+#define NV20_NEW_VTXARRAYS (1 << 2)
+#define NV20_NEW_BLEND (1 << 3)
+#define NV20_NEW_BLENDCOL (1 << 4)
+#define NV20_NEW_RAST (1 << 5)
+#define NV20_NEW_DSA (1 << 6)
+#define NV20_NEW_VIEWPORT (1 << 7)
+#define NV20_NEW_SCISSOR (1 << 8)
+#define NV20_NEW_FRAMEBUFFER (1 << 9)
+
+#include "nv20_screen.h"
+
+struct nv20_context {
+ struct pipe_context pipe;
+
+ struct nouveau_winsys *nvws;
+ struct nv20_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ uint32_t dirty;
+
+ struct nv20_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
+ struct nv20_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
+ unsigned dirty_samplers;
+ unsigned fp_samplers;
+ unsigned vp_samplers;
+
+ uint32_t rt_enable;
+ struct pipe_buffer *rt[4];
+ struct pipe_buffer *zeta;
+ uint32_t lma_offset;
+
+ struct nv20_blend_state *blend;
+ struct pipe_blend_color *blend_color;
+ struct nv20_rasterizer_state *rast;
+ struct nv20_depth_stencil_alpha_state *dsa;
+ struct pipe_viewport_state *viewport;
+ struct pipe_scissor_state *scissor;
+ struct pipe_framebuffer_state *framebuffer;
+
+ //struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
+ float *constbuf[PIPE_SHADER_TYPES][32][4];
+ unsigned constbuf_nr[PIPE_SHADER_TYPES];
+
+ struct vertex_info vertex_info;
+
+ struct {
+ struct pipe_buffer *buffer;
+ uint32_t format;
+ } tex[2];
+
+ unsigned vb_enable;
+ struct {
+ struct pipe_buffer *buffer;
+ unsigned delta;
+ } vb[16];
+
+/* struct {
+
+ struct nouveau_resource *exec_heap;
+ struct nouveau_resource *data_heap;
+
+ struct nv20_vertex_program *active;
+
+ struct nv20_vertex_program *current;
+ } vertprog;
+*/
+ struct {
+ struct nv20_fragment_program *active;
+
+ struct nv20_fragment_program *current;
+ struct pipe_buffer *constant_buf;
+ } fragprog;
+
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
+};
+
+static INLINE struct nv20_context *
+nv20_context(struct pipe_context *pipe)
+{
+ return (struct nv20_context *)pipe;
+}
+
+extern void nv20_init_state_functions(struct nv20_context *nv20);
+extern void nv20_init_surface_functions(struct nv20_context *nv20);
+
+extern void nv20_screen_init_miptree_functions(struct pipe_screen *pscreen);
+
+/* nv20_clear.c */
+extern void nv20_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+/* nv20_draw.c */
+extern struct draw_stage *nv20_draw_render_stage(struct nv20_context *nv20);
+
+/* nv20_fragprog.c */
+extern void nv20_fragprog_bind(struct nv20_context *,
+ struct nv20_fragment_program *);
+extern void nv20_fragprog_destroy(struct nv20_context *,
+ struct nv20_fragment_program *);
+
+/* nv20_fragtex.c */
+extern void nv20_fragtex_bind(struct nv20_context *);
+
+/* nv20_prim_vbuf.c */
+struct draw_stage *nv20_draw_vbuf_stage( struct nv20_context *nv20 );
+extern void nv20_vtxbuf_bind(struct nv20_context* nv20);
+
+/* nv20_state.c and friends */
+extern void nv20_emit_hw_state(struct nv20_context *nv20);
+extern void nv20_state_tex_update(struct nv20_context *nv20);
+
+/* nv20_vbo.c */
+extern boolean nv20_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv20_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count);
+
+
+#endif
diff --git a/src/gallium/drivers/nv20/nv20_fragprog.c b/src/gallium/drivers/nv20/nv20_fragprog.c
new file mode 100644
index 0000000000..4f496369dd
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_fragprog.c
@@ -0,0 +1,21 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv20_context.h"
+
+void
+nv20_fragprog_bind(struct nv20_context *nv20, struct nv20_fragment_program *fp)
+{
+}
+
+void
+nv20_fragprog_destroy(struct nv20_context *nv20,
+ struct nv20_fragment_program *fp)
+{
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c
new file mode 100644
index 0000000000..495a7be912
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_fragtex.c
@@ -0,0 +1,124 @@
+#include "nv20_context.h"
+#include "nouveau/nouveau_util.h"
+
+#define _(m,tf) \
+{ \
+ TRUE, \
+ PIPE_FORMAT_##m, \
+ NV20TCL_TX_FORMAT_FORMAT_##tf, \
+}
+
+struct nv20_texture_format {
+ boolean defined;
+ uint pipe;
+ int format;
+};
+
+static struct nv20_texture_format
+nv20_texture_formats[] = {
+ _(A8R8G8B8_UNORM, A8R8G8B8),
+ _(A1R5G5B5_UNORM, A1R5G5B5),
+ _(A4R4G4B4_UNORM, A4R4G4B4),
+ _(L8_UNORM , L8 ),
+ _(A8_UNORM , A8 ),
+ _(A8L8_UNORM , A8L8 ),
+/* _(RGB_DXT1 , DXT1, ), */
+/* _(RGBA_DXT1 , DXT1, ), */
+/* _(RGBA_DXT3 , DXT3, ), */
+/* _(RGBA_DXT5 , DXT5, ), */
+ {},
+};
+
+static struct nv20_texture_format *
+nv20_fragtex_format(uint pipe_format)
+{
+ struct nv20_texture_format *tf = nv20_texture_formats;
+
+ while (tf->defined) {
+ if (tf->pipe == pipe_format)
+ return tf;
+ tf++;
+ }
+
+ return NULL;
+}
+
+
+static void
+nv20_fragtex_build(struct nv20_context *nv20, int unit)
+{
+#if 0
+ struct nv20_sampler_state *ps = nv20->tex_sampler[unit];
+ struct nv20_miptree *nv20mt = nv20->tex_miptree[unit];
+ struct pipe_texture *pt = &nv20mt->base;
+ struct nv20_texture_format *tf;
+ uint32_t txf, txs, txp;
+
+ tf = nv20_fragtex_format(pt->format);
+ if (!tf || !tf->defined) {
+ NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format);
+ return;
+ }
+
+ txf = tf->format << 8;
+ txf |= (pt->last_level + 1) << 16;
+ txf |= log2i(pt->width[0]) << 20;
+ txf |= log2i(pt->height[0]) << 24;
+ txf |= log2i(pt->depth[0]) << 28;
+ txf |= 8;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ txf |= NV10TCL_TX_FORMAT_CUBE_MAP;
+ /* fall-through */
+ case PIPE_TEXTURE_2D:
+ txf |= (2<<4);
+ break;
+ case PIPE_TEXTURE_1D:
+ txf |= (1<<4);
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return;
+ }
+
+ BEGIN_RING(kelvin, NV10TCL_TX_OFFSET(unit), 8);
+ OUT_RELOCl(nv20mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(nv20mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ OUT_RING (ps->wrap);
+ OUT_RING (0x40000000); /* enable */
+ OUT_RING (txs);
+ OUT_RING (ps->filt | 0x2000 /* magic */);
+ OUT_RING ((pt->width[0] << 16) | pt->height[0]);
+ OUT_RING (ps->bcol);
+#endif
+}
+
+void
+nv20_fragtex_bind(struct nv20_context *nv20)
+{
+#if 0
+ struct nv20_fragment_program *fp = nv20->fragprog.active;
+ unsigned samplers, unit;
+
+ samplers = nv20->fp_samplers & ~fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ BEGIN_RING(kelvin, NV10TCL_TX_ENABLE(unit), 1);
+ OUT_RING (0);
+ }
+
+ samplers = nv20->dirty_samplers & fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ nv20_fragtex_build(nv20, unit);
+ }
+
+ nv20->fp_samplers = fp->samplers;
+#endif
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c
new file mode 100644
index 0000000000..ef7e9c5428
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_miptree.c
@@ -0,0 +1,206 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv20_context.h"
+#include "nv20_screen.h"
+
+static void
+nv20_miptree_layout(struct nv20_miptree *nv20mt)
+{
+ struct pipe_texture *pt = &nv20mt->base;
+ boolean swizzled = FALSE;
+ uint width = pt->width[0], height = pt->height[0];
+ uint offset = 0;
+ int nr_faces, l, f;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ nr_faces = 6;
+ } else {
+ nr_faces = 1;
+ }
+
+ for (l = 0; l <= pt->last_level; l++) {
+ pt->width[l] = width;
+ pt->height[l] = height;
+ pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+
+ if (swizzled)
+ nv20mt->level[l].pitch = pt->nblocksx[l] * pt->block.size;
+ else
+ nv20mt->level[l].pitch = pt->nblocksx[0] * pt->block.size;
+ nv20mt->level[l].pitch = (nv20mt->level[l].pitch + 63) & ~63;
+
+ nv20mt->level[l].image_offset =
+ CALLOC(nr_faces, sizeof(unsigned));
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+
+ }
+
+ for (f = 0; f < nr_faces; f++) {
+ for (l = 0; l <= pt->last_level; l++) {
+ nv20mt->level[l].image_offset[f] = offset;
+ offset += nv20mt->level[l].pitch * pt->height[l];
+ }
+ }
+
+ nv20mt->total_size = offset;
+}
+
+static struct pipe_texture *
+nv20_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
+ const unsigned *stride, struct pipe_buffer *pb)
+{
+ struct nv20_miptree *mt;
+
+ /* Only supports 2D, non-mipmapped textures for the moment */
+ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
+ pt->depth[0] != 1)
+ return NULL;
+
+ mt = CALLOC_STRUCT(nv20_miptree);
+ if (!mt)
+ return NULL;
+
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = pscreen;
+ mt->level[0].pitch = stride[0];
+ mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
+
+ pipe_buffer_reference(pscreen, &mt->buffer, pb);
+ return &mt->base;
+}
+
+static struct pipe_texture *
+nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct nv20_miptree *mt;
+ unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL |
+ NOUVEAU_BUFFER_USAGE_TEXTURE;
+
+ mt = MALLOC(sizeof(struct nv20_miptree));
+ if (!mt)
+ return NULL;
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = screen;
+
+ /* Swizzled textures must be POT */
+ if (pt->width[0] & (pt->width[0] - 1) ||
+ pt->height[0] & (pt->height[0] - 1))
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else
+ if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY |
+ PIPE_TEXTURE_USAGE_DISPLAY_TARGET))
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else {
+ switch (pt->format) {
+ /* TODO: Figure out which formats can be swizzled */
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ case PIPE_FORMAT_R16_SNORM:
+ break;
+ default:
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ }
+ }
+
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
+ buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+
+ nv20_miptree_layout(mt);
+
+ mt->buffer = ws->buffer_create(ws, 256, buf_usage, mt->total_size);
+ if (!mt->buffer) {
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static void
+nv20_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt)
+{
+ struct pipe_texture *mt = *pt;
+
+ *pt = NULL;
+ if (--mt->refcount <= 0) {
+ struct nv20_miptree *nv20mt = (struct nv20_miptree *)mt;
+ int l;
+
+ pipe_buffer_reference(screen, &nv20mt->buffer, NULL);
+ for (l = 0; l <= mt->last_level; l++) {
+ if (nv20mt->level[l].image_offset)
+ FREE(nv20mt->level[l].image_offset);
+ }
+ FREE(nv20mt);
+ }
+}
+
+static struct pipe_surface *
+nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct nv20_miptree *nv20mt = (struct nv20_miptree *)pt;
+ struct pipe_surface *ps;
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (!ps)
+ return NULL;
+ pipe_texture_reference(&ps->texture, pt);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = nv20mt->level[level].pitch;
+ ps->usage = flags;
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+ ps->refcount = 1;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ ps->offset = nv20mt->level[level].image_offset[face];
+ } else
+ if (pt->target == PIPE_TEXTURE_3D) {
+ ps->offset = nv20mt->level[level].image_offset[zslice];
+ } else {
+ ps->offset = nv20mt->level[level].image_offset[0];
+ }
+
+ return ps;
+}
+
+static void
+nv20_miptree_surface_release(struct pipe_screen *pscreen,
+ struct pipe_surface **psurface)
+{
+ struct pipe_surface *ps = *psurface;
+
+ *psurface = NULL;
+ if (--ps->refcount > 0)
+ return;
+
+ pipe_texture_reference(&ps->texture, NULL);
+ FREE(ps);
+}
+
+void nv20_screen_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv20_miptree_create;
+ pscreen->texture_blanket = nv20_miptree_blanket;
+ pscreen->texture_release = nv20_miptree_release;
+ pscreen->get_tex_surface = nv20_miptree_surface_get;
+ pscreen->tex_surface_release = nv20_miptree_surface_release;
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_prim_vbuf.c b/src/gallium/drivers/nv20/nv20_prim_vbuf.c
new file mode 100644
index 0000000000..319e1f6557
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_prim_vbuf.c
@@ -0,0 +1,430 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Build post-transformation, post-clipping vertex buffers and element
+ * lists by hooking into the end of the primitive pipeline and
+ * manipulating the vertex_id field in the vertex headers.
+ *
+ * XXX: work in progress
+ *
+ * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "util/u_debug.h"
+#include "pipe/p_inlines.h"
+#include "pipe/internal/p_winsys_screen.h"
+
+#include "nv20_context.h"
+#include "nv20_state.h"
+
+#include "draw/draw_vbuf.h"
+
+/**
+ * Primitive renderer for nv20.
+ */
+struct nv20_vbuf_render {
+ struct vbuf_render base;
+
+ struct nv20_context *nv20;
+
+ /** Vertex buffer in VRAM */
+ struct pipe_buffer *pbuffer;
+
+ /** Vertex buffer in normal memory */
+ void *mbuffer;
+
+ /** Vertex size in bytes */
+ /*unsigned vertex_size;*/
+
+ /** Hardware primitive */
+ unsigned hwprim;
+};
+
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct nv20_vbuf_render *
+nv20_vbuf_render(struct vbuf_render *render)
+{
+ assert(render);
+ return (struct nv20_vbuf_render *)render;
+}
+
+void nv20_vtxbuf_bind( struct nv20_context* nv20 )
+{
+#if 0
+ int i;
+ for(i = 0; i < NV20TCL_VTXBUF_ADDRESS__SIZE; i++) {
+ BEGIN_RING(kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1);
+ OUT_RING(0/*nv20->vtxbuf*/);
+ BEGIN_RING(kelvin, NV20TCL_VTXFMT(i) ,1);
+ OUT_RING(0/*XXX*/);
+ }
+#endif
+}
+
+static const struct vertex_info *
+nv20_vbuf_render_get_vertex_info( struct vbuf_render *render )
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+ struct nv20_context *nv20 = nv20_render->nv20;
+
+ nv20_emit_hw_state(nv20);
+
+ return &nv20->vertex_info;
+}
+
+static void *
+nv20__allocate_mbuffer(struct nv20_vbuf_render *nv20_render, size_t size)
+{
+ nv20_render->mbuffer = MALLOC(size);
+ return nv20_render->mbuffer;
+}
+
+static void
+nv20__allocate_pbuffer(struct nv20_vbuf_render *nv20_render, size_t size)
+{
+ struct pipe_winsys *winsys = nv20_render->nv20->pipe.winsys;
+ nv20_render->pbuffer = winsys->buffer_create(winsys, 64,
+ PIPE_BUFFER_USAGE_VERTEX, size);
+}
+
+static boolean
+nv20_vbuf_render_allocate_vertices( struct vbuf_render *render,
+ ushort vertex_size,
+ ushort nr_vertices )
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+ size_t size = (size_t)vertex_size * (size_t)nr_vertices;
+ void *buf;
+
+ assert(!nv20_render->pbuffer);
+ assert(!nv20_render->mbuffer);
+
+ /*
+ * For small amount of vertices, don't bother with pipe vertex
+ * buffer, the data will be passed directly via the fifo.
+ */
+ /* XXX: Pipe vertex buffers don't work. */
+ if (0 && size > 16 * 1024) {
+ nv20__allocate_pbuffer(nv20_render, size);
+ /* umm yeah so this is ugly */
+ buf = nv20_render->pbuffer;
+ } else {
+ buf = nv20__allocate_mbuffer(nv20_render, size);
+ }
+
+ if (buf)
+ nv20_render->nv20->dirty |= NV20_NEW_VTXARRAYS;
+
+ return buf ? TRUE : FALSE;
+}
+
+static void *
+nv20_vbuf_render_map_vertices( struct vbuf_render *render )
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+ struct pipe_winsys *winsys = nv20_render->nv20->pipe.winsys;
+
+ if (nv20_render->pbuffer) {
+ return winsys->buffer_map(winsys,
+ nv20_render->pbuffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+ } else if (nv20_render->mbuffer) {
+ return nv20_render->mbuffer;
+ } else
+ assert(0);
+
+ /* warnings be gone */
+ return NULL;
+}
+
+static void
+nv20_vbuf_render_unmap_vertices( struct vbuf_render *render,
+ ushort min_index,
+ ushort max_index )
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+ struct pipe_winsys *winsys = nv20_render->nv20->pipe.winsys;
+
+ if (nv20_render->pbuffer)
+ winsys->buffer_unmap(winsys, nv20_render->pbuffer);
+}
+
+static boolean
+nv20_vbuf_render_set_primitive( struct vbuf_render *render,
+ unsigned prim )
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+ unsigned hwp = nvgl_primitive(prim);
+ if (hwp == 0)
+ return FALSE;
+
+ nv20_render->hwprim = hwp;
+ return TRUE;
+}
+
+static uint32_t
+nv20__vtxhwformat(unsigned stride, unsigned fields, unsigned type)
+{
+ return (stride << NV20TCL_VTXFMT_STRIDE_SHIFT) |
+ (fields << NV20TCL_VTXFMT_SIZE_SHIFT) |
+ (type << NV20TCL_VTXFMT_TYPE_SHIFT);
+}
+
+static unsigned
+nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr)
+{
+ uint32_t hwfmt = 0;
+ unsigned fields;
+
+ switch (type) {
+ case EMIT_OMIT:
+ hwfmt = nv20__vtxhwformat(0, 0, 2);
+ fields = 0;
+ break;
+ case EMIT_1F:
+ hwfmt = nv20__vtxhwformat(4, 1, 2);
+ fields = 1;
+ break;
+ case EMIT_2F:
+ hwfmt = nv20__vtxhwformat(8, 2, 2);
+ fields = 2;
+ break;
+ case EMIT_3F:
+ hwfmt = nv20__vtxhwformat(12, 3, 2);
+ fields = 3;
+ break;
+ case EMIT_4F:
+ hwfmt = nv20__vtxhwformat(16, 4, 2);
+ fields = 4;
+ break;
+ default:
+ NOUVEAU_ERR("unhandled attrib_emit %d\n", type);
+ return 0;
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_VTXFMT(hwattr), 1);
+ OUT_RING(hwfmt);
+ return fields;
+}
+
+static unsigned
+nv20__emit_vertex_array_format(struct nv20_context *nv20)
+{
+ struct vertex_info *vinfo = &nv20->vertex_info;
+ int hwattr = NV20TCL_VTXFMT__SIZE;
+ int attr = 0;
+ unsigned nr_fields = 0;
+
+ while (hwattr-- > 0) {
+ if (vinfo->hwfmt[0] & (1 << hwattr)) {
+ nr_fields += nv20__emit_format(nv20,
+ vinfo->attrib[attr].emit, hwattr);
+ attr++;
+ } else
+ nv20__emit_format(nv20, EMIT_OMIT, hwattr);
+ }
+
+ return nr_fields;
+}
+
+static void
+nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct nv20_context *nv20 = nv20_render->nv20;
+ struct vertex_info *vinfo = &nv20->vertex_info;
+ unsigned nr_fields;
+ int max_push;
+ ubyte *data = nv20_render->mbuffer;
+ int vsz = 4 * vinfo->size;
+
+ nr_fields = nv20__emit_vertex_array_format(nv20);
+
+ BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING(nv20_render->hwprim);
+
+ max_push = 1200 / nr_fields;
+ while (nr_indices) {
+ int i;
+ int push = MIN2(nr_indices, max_push);
+
+ BEGIN_RING_NI(kelvin, NV20TCL_VERTEX_DATA, push * nr_fields);
+ for (i = 0; i < push; i++) {
+ /* XXX: fixme to handle other than floats? */
+ int f = nr_fields;
+ float *attrv = (float*)&data[indices[i] * vsz];
+ while (f-- > 0)
+ OUT_RINGf(*attrv++);
+ }
+
+ nr_indices -= push;
+ indices += push;
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING(NV20TCL_VERTEX_BEGIN_END_STOP);
+}
+
+static void
+nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct nv20_context *nv20 = nv20_render->nv20;
+ int push, i;
+
+ NOUVEAU_ERR("nv20__draw_pbuffer: this path is broken.\n");
+
+ BEGIN_RING(kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
+ OUT_RELOCl(nv20_render->pbuffer, 0,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+
+ BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING(nv20_render->hwprim);
+
+ if (nr_indices & 1) {
+ BEGIN_RING(kelvin, NV10TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (indices[0]);
+ indices++; nr_indices--;
+ }
+
+ while (nr_indices) {
+ // XXX too big/small ? check the size
+ push = MIN2(nr_indices, 1200 * 2);
+
+ BEGIN_RING_NI(kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((indices[i+1] << 16) | indices[i]);
+
+ nr_indices -= push;
+ indices += push;
+ }
+
+ BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING (0);
+}
+
+static void
+nv20_vbuf_render_draw( struct vbuf_render *render,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+
+ nv20_emit_hw_state(nv20_render->nv20);
+
+ if (nv20_render->pbuffer)
+ nv20__draw_pbuffer(nv20_render, indices, nr_indices);
+ else if (nv20_render->mbuffer)
+ nv20__draw_mbuffer(nv20_render, indices, nr_indices);
+ else
+ assert(0);
+}
+
+
+static void
+nv20_vbuf_render_release_vertices( struct vbuf_render *render )
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+ struct nv20_context *nv20 = nv20_render->nv20;
+ struct pipe_screen *pscreen = &nv20->screen->pipe;
+
+ if (nv20_render->pbuffer) {
+ pipe_buffer_reference(pscreen, &nv20_render->pbuffer, NULL);
+ } else if (nv20_render->mbuffer) {
+ FREE(nv20_render->mbuffer);
+ nv20_render->mbuffer = NULL;
+ } else
+ assert(0);
+}
+
+
+static void
+nv20_vbuf_render_destroy( struct vbuf_render *render )
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+
+ assert(!nv20_render->pbuffer);
+ assert(!nv20_render->mbuffer);
+
+ FREE(nv20_render);
+}
+
+
+/**
+ * Create a new primitive render.
+ */
+static struct vbuf_render *
+nv20_vbuf_render_create( struct nv20_context *nv20 )
+{
+ struct nv20_vbuf_render *nv20_render = CALLOC_STRUCT(nv20_vbuf_render);
+
+ nv20_render->nv20 = nv20;
+
+ nv20_render->base.max_vertex_buffer_bytes = 16*1024;
+ nv20_render->base.max_indices = 1024;
+ nv20_render->base.get_vertex_info = nv20_vbuf_render_get_vertex_info;
+ nv20_render->base.allocate_vertices =
+ nv20_vbuf_render_allocate_vertices;
+ nv20_render->base.map_vertices = nv20_vbuf_render_map_vertices;
+ nv20_render->base.unmap_vertices = nv20_vbuf_render_unmap_vertices;
+ nv20_render->base.set_primitive = nv20_vbuf_render_set_primitive;
+ nv20_render->base.draw = nv20_vbuf_render_draw;
+ nv20_render->base.release_vertices = nv20_vbuf_render_release_vertices;
+ nv20_render->base.destroy = nv20_vbuf_render_destroy;
+
+ return &nv20_render->base;
+}
+
+
+/**
+ * Create a new primitive vbuf/render stage.
+ */
+struct draw_stage *nv20_draw_vbuf_stage( struct nv20_context *nv20 )
+{
+ struct vbuf_render *render;
+ struct draw_stage *stage;
+
+ render = nv20_vbuf_render_create(nv20);
+ if(!render)
+ return NULL;
+
+ stage = draw_vbuf_stage( nv20->draw, render );
+ if(!stage) {
+ render->destroy(render);
+ return NULL;
+ }
+
+ return stage;
+}
diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c
new file mode 100644
index 0000000000..5f2b7b4f71
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_screen.c
@@ -0,0 +1,222 @@
+#include "pipe/p_screen.h"
+#include "util/u_simple_screen.h"
+
+#include "nv20_context.h"
+#include "nv20_screen.h"
+
+static const char *
+nv20_screen_get_name(struct pipe_screen *screen)
+{
+ struct nv20_screen *nv20screen = nv20_screen(screen);
+ struct nouveau_device *dev = nv20screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv20_screen_get_vendor(struct pipe_screen *screen)
+{
+ return "nouveau";
+}
+
+static int
+nv20_screen_get_param(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 2;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 0;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 0;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 0;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 12;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 12;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 0;
+ case NOUVEAU_CAP_HW_VTXBUF:
+ case NOUVEAU_CAP_HW_IDXBUF:
+ return 0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv20_screen_get_paramf(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 2.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 4.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static boolean
+nv20_screen_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static void *
+nv20_surface_map(struct pipe_screen *screen, struct pipe_surface *surface,
+ unsigned flags )
+{
+ struct pipe_winsys *ws = screen->winsys;
+ void *map;
+ struct nv20_miptree *nv20mt = (struct nv20_miptree *)surface->texture;
+
+ map = ws->buffer_map(ws, nv20mt->buffer, flags);
+ if (!map)
+ return NULL;
+
+ return map + surface->offset;
+}
+
+static void
+nv20_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct nv20_miptree *nv20mt = (struct nv20_miptree *)surface->texture;
+
+ ws->buffer_unmap(ws, nv20mt->buffer);
+}
+
+static void
+nv20_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nv20_screen *screen = nv20_screen(pscreen);
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nvws->notifier_free(&screen->sync);
+ nvws->grobj_free(&screen->kelvin);
+
+ FREE(pscreen);
+}
+
+static struct pipe_buffer *
+nv20_surface_buffer(struct pipe_surface *surf)
+{
+ struct nv20_miptree *mt = (struct nv20_miptree *)surf->texture;
+
+ return mt->buffer;
+}
+
+struct pipe_screen *
+nv20_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv20_screen *screen = CALLOC_STRUCT(nv20_screen);
+ unsigned kelvin_class = 0;
+ unsigned chipset = nvws->channel->device->chipset;
+ int ret;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ /* 2D engine setup */
+ screen->eng2d = nv04_surface_2d_init(nvws);
+ screen->eng2d->buf = nv20_surface_buffer;
+
+ /* 3D object */
+ if (chipset >= 0x25)
+ kelvin_class = NV25TCL;
+ else if (chipset >= 0x20)
+ kelvin_class = NV20TCL;
+
+ if (!kelvin_class || chipset >= 0x30) {
+ NOUVEAU_ERR("Unknown nv2x chipset: nv%02x\n", chipset);
+ return NULL;
+ }
+
+ ret = nvws->grobj_alloc(nvws, kelvin_class, &screen->kelvin);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ return FALSE;
+ }
+
+ /* Notifier for sync purposes */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv20_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ screen->pipe.winsys = ws;
+ screen->pipe.destroy = nv20_screen_destroy;
+
+ screen->pipe.get_name = nv20_screen_get_name;
+ screen->pipe.get_vendor = nv20_screen_get_vendor;
+ screen->pipe.get_param = nv20_screen_get_param;
+ screen->pipe.get_paramf = nv20_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv20_screen_is_format_supported;
+
+ screen->pipe.surface_map = nv20_surface_map;
+ screen->pipe.surface_unmap = nv20_surface_unmap;
+
+ nv20_screen_init_miptree_functions(&screen->pipe);
+ u_simple_screen_init(&screen->pipe);
+
+ return &screen->pipe;
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_screen.h b/src/gallium/drivers/nv20/nv20_screen.h
new file mode 100644
index 0000000000..bf2f2c0d9f
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_screen.h
@@ -0,0 +1,24 @@
+#ifndef __NV20_SCREEN_H__
+#define __NV20_SCREEN_H__
+
+#include "pipe/p_screen.h"
+#include "nv04/nv04_surface_2d.h"
+
+struct nv20_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+
+ /* HW graphics objects */
+ struct nv04_surface_2d *eng2d;
+ struct nouveau_grobj *kelvin;
+ struct nouveau_notifier *sync;
+};
+
+static INLINE struct nv20_screen *
+nv20_screen(struct pipe_screen *screen)
+{
+ return (struct nv20_screen *)screen;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c
new file mode 100644
index 0000000000..ecec4f49a0
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_state.c
@@ -0,0 +1,582 @@
+#include "draw/draw_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv20_context.h"
+#include "nv20_state.h"
+
+static void *
+nv20_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv20_blend_state *cb;
+
+ cb = MALLOC(sizeof(struct nv20_blend_state));
+
+ cb->b_enable = cso->blend_enable ? 1 : 0;
+ cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_src_factor)));
+ cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_dst_factor)));
+
+ cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) |
+ ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) |
+ ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) |
+ ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0));
+
+ cb->d_enable = cso->dither ? 1 : 0;
+
+ return (void *)cb;
+}
+
+static void
+nv20_blend_state_bind(struct pipe_context *pipe, void *blend)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->blend = (struct nv20_blend_state*)blend;
+
+ nv20->dirty |= NV20_NEW_BLEND;
+}
+
+static void
+nv20_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+
+static INLINE unsigned
+wrap_mode(unsigned wrap) {
+ unsigned ret;
+
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ ret = NV20TCL_TX_WRAP_S_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ ret = NV20TCL_TX_WRAP_S_MIRRORED_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ ret = NV20TCL_TX_WRAP_S_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ ret = NV20TCL_TX_WRAP_S_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_CLAMP:
+ ret = NV20TCL_TX_WRAP_S_CLAMP;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ ret = NV20TCL_TX_WRAP_S_REPEAT;
+ break;
+ }
+
+ return (ret >> NV20TCL_TX_WRAP_S_SHIFT);
+}
+
+static void *
+nv20_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nv20_sampler_state *ps;
+ uint32_t filter = 0;
+
+ ps = MALLOC(sizeof(struct nv20_sampler_state));
+
+ ps->wrap = ((wrap_mode(cso->wrap_s) << NV20TCL_TX_WRAP_S_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV20TCL_TX_WRAP_T_SHIFT));
+
+ ps->en = 0;
+ if (cso->max_anisotropy > 1.0) {
+ /* no idea, binary driver sets it, works without it.. meh.. */
+ ps->wrap |= (1 << 5);
+
+/* if (cso->max_anisotropy >= 8.0) {
+ ps->en |= NV20TCL_TX_ENABLE_ANISO_8X;
+ } else
+ if (cso->max_anisotropy >= 4.0) {
+ ps->en |= NV20TCL_TX_ENABLE_ANISO_4X;
+ } else {
+ ps->en |= NV20TCL_TX_ENABLE_ANISO_2X;
+ }*/
+ }
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ filter |= NV20TCL_TX_FILTER_MAGNIFY_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ filter |= NV20TCL_TX_FILTER_MAGNIFY_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |=
+ NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV20TCL_TX_FILTER_MINIFY_LINEAR;
+ break;
+ }
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |=
+ NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |=
+ NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV20TCL_TX_FILTER_MINIFY_NEAREST;
+ break;
+ }
+ break;
+ }
+
+ ps->filt = filter;
+
+/* if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ switch (cso->compare_func) {
+ case PIPE_FUNC_NEVER:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NEVER;
+ break;
+ case PIPE_FUNC_GREATER:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GREATER;
+ break;
+ case PIPE_FUNC_EQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_EQUAL;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GEQUAL;
+ break;
+ case PIPE_FUNC_LESS:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LESS;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NOTEQUAL;
+ break;
+ case PIPE_FUNC_LEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LEQUAL;
+ break;
+ case PIPE_FUNC_ALWAYS:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_ALWAYS;
+ break;
+ default:
+ break;
+ }
+ }*/
+
+ ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) |
+ (float_to_ubyte(cso->border_color[0]) << 16) |
+ (float_to_ubyte(cso->border_color[1]) << 8) |
+ (float_to_ubyte(cso->border_color[2]) << 0));
+
+ return (void *)ps;
+}
+
+static void
+nv20_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv20->tex_sampler[unit] = sampler[unit];
+ nv20->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void
+nv20_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nv20_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **miptree)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv20->tex_miptree[unit] = (struct nv20_miptree *)miptree[unit];
+ nv20->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void *
+nv20_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv20_rasterizer_state *rs;
+ int i;
+
+ /*XXX: ignored:
+ * light_twoside
+ * offset_cw/ccw -nohw
+ * scissor
+ * point_smooth -nohw
+ * multisample
+ * offset_units / offset_scale
+ */
+ rs = MALLOC(sizeof(struct nv20_rasterizer_state));
+
+ rs->templ = cso;
+
+ rs->shade_model = cso->flatshade ? NV20TCL_SHADE_MODEL_FLAT :
+ NV20TCL_SHADE_MODEL_SMOOTH;
+
+ rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff;
+ rs->line_smooth_en = cso->line_smooth ? 1 : 0;
+
+ /* XXX: nv20 and nv25 different! */
+ rs->point_size = *(uint32_t*)&cso->point_size;
+
+ rs->poly_smooth_en = cso->poly_smooth ? 1 : 0;
+
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ rs->front_face = NV20TCL_FRONT_FACE_CCW;
+ rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw);
+ rs->poly_mode_back = nvgl_polygon_mode(cso->fill_cw);
+ } else {
+ rs->front_face = NV20TCL_FRONT_FACE_CW;
+ rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw);
+ rs->poly_mode_back = nvgl_polygon_mode(cso->fill_ccw);
+ }
+
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ rs->cull_face_en = 1;
+ if (cso->front_winding == PIPE_WINDING_CCW)
+ rs->cull_face = NV20TCL_CULL_FACE_FRONT;
+ else
+ rs->cull_face = NV20TCL_CULL_FACE_BACK;
+ break;
+ case PIPE_WINDING_CW:
+ rs->cull_face_en = 1;
+ if (cso->front_winding == PIPE_WINDING_CW)
+ rs->cull_face = NV20TCL_CULL_FACE_FRONT;
+ else
+ rs->cull_face = NV20TCL_CULL_FACE_BACK;
+ break;
+ case PIPE_WINDING_BOTH:
+ rs->cull_face_en = 1;
+ rs->cull_face = NV20TCL_CULL_FACE_FRONT_AND_BACK;
+ break;
+ case PIPE_WINDING_NONE:
+ default:
+ rs->cull_face_en = 0;
+ rs->cull_face = 0;
+ break;
+ }
+
+ if (cso->point_sprite) {
+ rs->point_sprite = (1 << 0);
+ for (i = 0; i < 8; i++) {
+ if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE)
+ rs->point_sprite |= (1 << (8 + i));
+ }
+ } else {
+ rs->point_sprite = 0;
+ }
+
+ return (void *)rs;
+}
+
+static void
+nv20_rasterizer_state_bind(struct pipe_context *pipe, void *rast)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->rast = (struct nv20_rasterizer_state*)rast;
+
+ draw_set_rasterizer_state(nv20->draw, (nv20->rast ? nv20->rast->templ : NULL));
+
+ nv20->dirty |= NV20_NEW_RAST;
+}
+
+static void
+nv20_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nv20_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv20_depth_stencil_alpha_state *hw;
+
+ hw = MALLOC(sizeof(struct nv20_depth_stencil_alpha_state));
+
+ hw->depth.func = nvgl_comparison_op(cso->depth.func);
+ hw->depth.write_enable = cso->depth.writemask ? 1 : 0;
+ hw->depth.test_enable = cso->depth.enabled ? 1 : 0;
+
+ hw->stencil.enable = cso->stencil[0].enabled ? 1 : 0;
+ hw->stencil.wmask = cso->stencil[0].writemask;
+ hw->stencil.func = nvgl_comparison_op(cso->stencil[0].func);
+ hw->stencil.ref = cso->stencil[0].ref_value;
+ hw->stencil.vmask = cso->stencil[0].valuemask;
+ hw->stencil.fail = nvgl_stencil_op(cso->stencil[0].fail_op);
+ hw->stencil.zfail = nvgl_stencil_op(cso->stencil[0].zfail_op);
+ hw->stencil.zpass = nvgl_stencil_op(cso->stencil[0].zpass_op);
+
+ hw->alpha.enabled = cso->alpha.enabled ? 1 : 0;
+ hw->alpha.func = nvgl_comparison_op(cso->alpha.func);
+ hw->alpha.ref = float_to_ubyte(cso->alpha.ref_value);
+
+ return (void *)hw;
+}
+
+static void
+nv20_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *dsa)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->dsa = (struct nv20_depth_stencil_alpha_state*)dsa;
+
+ nv20->dirty |= NV20_NEW_DSA;
+}
+
+static void
+nv20_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nv20_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ return draw_create_vertex_shader(nv20->draw, templ);
+}
+
+static void
+nv20_vp_state_bind(struct pipe_context *pipe, void *shader)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ draw_bind_vertex_shader(nv20->draw, (struct draw_vertex_shader *) shader);
+
+ nv20->dirty |= NV20_NEW_VERTPROG;
+}
+
+static void
+nv20_vp_state_delete(struct pipe_context *pipe, void *shader)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ draw_delete_vertex_shader(nv20->draw, (struct draw_vertex_shader *) shader);
+}
+
+static void *
+nv20_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv20_fragment_program *fp;
+
+ fp = CALLOC(1, sizeof(struct nv20_fragment_program));
+ fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ tgsi_scan_shader(cso->tokens, &fp->info);
+
+ return (void *)fp;
+}
+
+static void
+nv20_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ struct nv20_fragment_program *fp = hwcso;
+
+ nv20->fragprog.current = fp;
+ nv20->dirty |= NV20_NEW_FRAGPROG;
+}
+
+static void
+nv20_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ struct nv20_fragment_program *fp = hwcso;
+
+ nv20_fragprog_destroy(nv20, fp);
+ FREE((void*)fp->pipe.tokens);
+ FREE(fp);
+}
+
+static void
+nv20_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->blend_color = (struct pipe_blend_color*)bcol;
+
+ nv20->dirty |= NV20_NEW_BLENDCOL;
+}
+
+static void
+nv20_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ draw_set_clip_state(nv20->draw, clip);
+}
+
+static void
+nv20_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(index == 0);
+
+ if (buf) {
+ void *mapped;
+ if (buf->buffer && buf->buffer->size &&
+ (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)))
+ {
+ memcpy(nv20->constbuf[shader], mapped, buf->buffer->size);
+ nv20->constbuf_nr[shader] =
+ buf->buffer->size / (4 * sizeof(float));
+ ws->buffer_unmap(ws, buf->buffer);
+ }
+ }
+}
+
+static void
+nv20_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->framebuffer = (struct pipe_framebuffer_state*)fb;
+
+ nv20->dirty |= NV20_NEW_FRAMEBUFFER;
+}
+
+static void
+nv20_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ NOUVEAU_ERR("line stipple hahaha\n");
+}
+
+static void
+nv20_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->scissor = (struct pipe_scissor_state*)s;
+
+ nv20->dirty |= NV20_NEW_SCISSOR;
+}
+
+static void
+nv20_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->viewport = (struct pipe_viewport_state*)vpt;
+
+ draw_set_viewport_state(nv20->draw, nv20->viewport);
+
+ nv20->dirty |= NV20_NEW_VIEWPORT;
+}
+
+static void
+nv20_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ memcpy(nv20->vtxbuf, vb, sizeof(*vb) * count);
+ nv20->dirty |= NV20_NEW_VTXARRAYS;
+
+ draw_set_vertex_buffers(nv20->draw, count, vb);
+}
+
+static void
+nv20_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *ve)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ memcpy(nv20->vtxelt, ve, sizeof(*ve) * count);
+ nv20->dirty |= NV20_NEW_VTXARRAYS;
+
+ draw_set_vertex_elements(nv20->draw, count, ve);
+}
+
+void
+nv20_init_state_functions(struct nv20_context *nv20)
+{
+ nv20->pipe.create_blend_state = nv20_blend_state_create;
+ nv20->pipe.bind_blend_state = nv20_blend_state_bind;
+ nv20->pipe.delete_blend_state = nv20_blend_state_delete;
+
+ nv20->pipe.create_sampler_state = nv20_sampler_state_create;
+ nv20->pipe.bind_sampler_states = nv20_sampler_state_bind;
+ nv20->pipe.delete_sampler_state = nv20_sampler_state_delete;
+ nv20->pipe.set_sampler_textures = nv20_set_sampler_texture;
+
+ nv20->pipe.create_rasterizer_state = nv20_rasterizer_state_create;
+ nv20->pipe.bind_rasterizer_state = nv20_rasterizer_state_bind;
+ nv20->pipe.delete_rasterizer_state = nv20_rasterizer_state_delete;
+
+ nv20->pipe.create_depth_stencil_alpha_state =
+ nv20_depth_stencil_alpha_state_create;
+ nv20->pipe.bind_depth_stencil_alpha_state =
+ nv20_depth_stencil_alpha_state_bind;
+ nv20->pipe.delete_depth_stencil_alpha_state =
+ nv20_depth_stencil_alpha_state_delete;
+
+ nv20->pipe.create_vs_state = nv20_vp_state_create;
+ nv20->pipe.bind_vs_state = nv20_vp_state_bind;
+ nv20->pipe.delete_vs_state = nv20_vp_state_delete;
+
+ nv20->pipe.create_fs_state = nv20_fp_state_create;
+ nv20->pipe.bind_fs_state = nv20_fp_state_bind;
+ nv20->pipe.delete_fs_state = nv20_fp_state_delete;
+
+ nv20->pipe.set_blend_color = nv20_set_blend_color;
+ nv20->pipe.set_clip_state = nv20_set_clip_state;
+ nv20->pipe.set_constant_buffer = nv20_set_constant_buffer;
+ nv20->pipe.set_framebuffer_state = nv20_set_framebuffer_state;
+ nv20->pipe.set_polygon_stipple = nv20_set_polygon_stipple;
+ nv20->pipe.set_scissor_state = nv20_set_scissor_state;
+ nv20->pipe.set_viewport_state = nv20_set_viewport_state;
+
+ nv20->pipe.set_vertex_buffers = nv20_set_vertex_buffers;
+ nv20->pipe.set_vertex_elements = nv20_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_state.h b/src/gallium/drivers/nv20/nv20_state.h
new file mode 100644
index 0000000000..34f402fdcb
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_state.h
@@ -0,0 +1,139 @@
+#ifndef __NV20_STATE_H__
+#define __NV20_STATE_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv20_blend_state {
+ uint32_t b_enable;
+ uint32_t b_srcfunc;
+ uint32_t b_dstfunc;
+
+ uint32_t c_mask;
+
+ uint32_t d_enable;
+};
+
+struct nv20_sampler_state {
+ uint32_t wrap;
+ uint32_t en;
+ uint32_t filt;
+ uint32_t bcol;
+};
+
+struct nv20_rasterizer_state {
+ uint32_t shade_model;
+
+ uint32_t line_width;
+ uint32_t line_smooth_en;
+
+ uint32_t point_size;
+
+ uint32_t poly_smooth_en;
+
+ uint32_t poly_mode_front;
+ uint32_t poly_mode_back;
+
+ uint32_t front_face;
+ uint32_t cull_face;
+ uint32_t cull_face_en;
+
+ uint32_t point_sprite;
+
+ const struct pipe_rasterizer_state *templ;
+};
+
+struct nv20_vertex_program_exec {
+ uint32_t data[4];
+ boolean has_branch_offset;
+ int const_index;
+};
+
+struct nv20_vertex_program_data {
+ int index; /* immediates == -1 */
+ float value[4];
+};
+
+struct nv20_vertex_program {
+ const struct pipe_shader_state *pipe;
+
+ boolean translated;
+ struct nv20_vertex_program_exec *insns;
+ unsigned nr_insns;
+ struct nv20_vertex_program_data *consts;
+ unsigned nr_consts;
+
+ struct nouveau_resource *exec;
+ unsigned exec_start;
+ struct nouveau_resource *data;
+ unsigned data_start;
+ unsigned data_start_min;
+
+ uint32_t ir;
+ uint32_t or;
+};
+
+struct nv20_fragment_program_data {
+ unsigned offset;
+ unsigned index;
+};
+
+struct nv20_fragment_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+
+ boolean translated;
+ boolean on_hw;
+ unsigned samplers;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct nv20_fragment_program_data *consts;
+ unsigned nr_consts;
+
+ struct pipe_buffer *buffer;
+
+ uint32_t fp_control;
+ uint32_t fp_reg_control;
+};
+
+
+struct nv20_depth_stencil_alpha_state {
+ struct {
+ uint32_t func;
+ uint32_t write_enable;
+ uint32_t test_enable;
+ } depth;
+
+ struct {
+ uint32_t enable;
+ uint32_t wmask;
+ uint32_t func;
+ uint32_t ref;
+ uint32_t vmask;
+ uint32_t fail;
+ uint32_t zfail;
+ uint32_t zpass;
+ } stencil;
+
+ struct {
+ uint32_t enabled;
+ uint32_t func;
+ uint32_t ref;
+ } alpha;
+};
+
+struct nv20_miptree {
+ struct pipe_texture base;
+
+ struct pipe_buffer *buffer;
+ uint total_size;
+
+ struct {
+ uint pitch;
+ uint *image_offset;
+ } level[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+#endif
diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c
new file mode 100644
index 0000000000..0f4df9ca31
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_state_emit.c
@@ -0,0 +1,396 @@
+#include "nv20_context.h"
+#include "nv20_state.h"
+#include "draw/draw_context.h"
+
+static void nv20_state_emit_blend(struct nv20_context* nv20)
+{
+ struct nv20_blend_state *b = nv20->blend;
+
+ BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1);
+ OUT_RING (b->d_enable);
+
+ BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
+ OUT_RING (b->b_enable);
+
+ BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 2);
+ OUT_RING (b->b_srcfunc);
+ OUT_RING (b->b_dstfunc);
+
+ BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1);
+ OUT_RING (b->c_mask);
+}
+
+static void nv20_state_emit_blend_color(struct nv20_context* nv20)
+{
+ struct pipe_blend_color *c = nv20->blend_color;
+
+ BEGIN_RING(kelvin, NV20TCL_BLEND_COLOR, 1);
+ OUT_RING ((float_to_ubyte(c->color[3]) << 24)|
+ (float_to_ubyte(c->color[0]) << 16)|
+ (float_to_ubyte(c->color[1]) << 8) |
+ (float_to_ubyte(c->color[2]) << 0));
+}
+
+static void nv20_state_emit_rast(struct nv20_context* nv20)
+{
+ struct nv20_rasterizer_state *r = nv20->rast;
+
+ BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 2);
+ OUT_RING (r->shade_model);
+ OUT_RING (r->line_width);
+
+
+ BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1);
+ OUT_RING (r->point_size);
+
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (r->poly_mode_front);
+ OUT_RING (r->poly_mode_back);
+
+
+ BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2);
+ OUT_RING (r->cull_face);
+ OUT_RING (r->front_face);
+
+ BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2);
+ OUT_RING (r->line_smooth_en);
+ OUT_RING (r->poly_smooth_en);
+
+ BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (r->cull_face_en);
+}
+
+static void nv20_state_emit_dsa(struct nv20_context* nv20)
+{
+ struct nv20_depth_stencil_alpha_state *d = nv20->dsa;
+
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1);
+ OUT_RING (d->depth.func);
+
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (d->depth.write_enable);
+
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (d->depth.test_enable);
+
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1);
+ OUT_RING (1);
+
+#if 0
+ BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1);
+ OUT_RING (d->stencil.enable);
+ BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7);
+ OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7);
+#endif
+
+ BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (d->alpha.enabled);
+
+ BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1);
+ OUT_RING (d->alpha.func);
+
+ BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_REF, 1);
+ OUT_RING (d->alpha.ref);
+}
+
+static void nv20_state_emit_viewport(struct nv20_context* nv20)
+{
+}
+
+static void nv20_state_emit_scissor(struct nv20_context* nv20)
+{
+ /* NV20TCL_SCISSOR_* is probably a software method */
+/* struct pipe_scissor_state *s = nv20->scissor;
+ BEGIN_RING(kelvin, NV20TCL_SCISSOR_HORIZ, 2);
+ OUT_RING (((s->maxx - s->minx) << 16) | s->minx);
+ OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/
+}
+
+static void nv20_state_emit_framebuffer(struct nv20_context* nv20)
+{
+ struct pipe_framebuffer_state* fb = nv20->framebuffer;
+ struct pipe_surface *rt, *zeta = NULL;
+ uint32_t rt_format, w, h;
+ int colour_format = 0, zeta_format = 0;
+ struct nv20_miptree *nv20mt = 0;
+
+ w = fb->cbufs[0]->width;
+ h = fb->cbufs[0]->height;
+ colour_format = fb->cbufs[0]->format;
+ rt = fb->cbufs[0];
+
+ if (fb->zsbuf) {
+ if (colour_format) {
+ assert(w == fb->zsbuf->width);
+ assert(h == fb->zsbuf->height);
+ } else {
+ w = fb->zsbuf->width;
+ h = fb->zsbuf->height;
+ }
+
+ zeta_format = fb->zsbuf->format;
+ zeta = fb->zsbuf;
+ }
+
+ rt_format = NV20TCL_RT_FORMAT_TYPE_LINEAR | 0x20;
+
+ switch (colour_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case 0:
+ rt_format |= NV20TCL_RT_FORMAT_COLOR_A8R8G8B8;
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ rt_format |= NV20TCL_RT_FORMAT_COLOR_R5G6B5;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (zeta) {
+ BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1);
+ OUT_RING (rt->stride | (zeta->stride << 16));
+ } else {
+ BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1);
+ OUT_RING (rt->stride | (rt->stride << 16));
+ }
+
+ nv20mt = (struct nv20_miptree *)rt->texture;
+ nv20->rt[0] = nv20mt->buffer;
+
+ if (zeta_format)
+ {
+ nv20mt = (struct nv20_miptree *)zeta->texture;
+ nv20->zeta = nv20mt->buffer;
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 3);
+ OUT_RING ((w << 16) | 0);
+ OUT_RING ((h << 16) | 0); /*NV20TCL_RT_VERT */
+ OUT_RING (rt_format); /* NV20TCL_RT_FORMAT */
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+ OUT_RING (((w - 1) << 16) | 0);
+ OUT_RING (((h - 1) << 16) | 0);
+}
+
+static void nv20_vertex_layout(struct nv20_context *nv20)
+{
+ struct nv20_fragment_program *fp = nv20->fragprog.current;
+ struct draw_context *dc = nv20->draw;
+ int src;
+ int i;
+ struct vertex_info *vinfo = &nv20->vertex_info;
+ const enum interp_mode colorInterp = INTERP_LINEAR;
+ boolean colors[2] = { FALSE };
+ boolean generics[12] = { FALSE };
+ boolean fog = FALSE;
+
+ memset(vinfo, 0, sizeof(*vinfo));
+
+ /*
+ * Assumed NV20 hardware vertex attribute order:
+ * 0 position, 1 ?, 2 ?, 3 col0,
+ * 4 col1?, 5 ?, 6 ?, 7 ?,
+ * 8 ?, 9 tex0, 10 tex1, 11 tex2,
+ * 12 tex3, 13 ?, 14 ?, 15 ?
+ * unaccounted: wgh, nor, fog
+ * There are total 16 attrs.
+ * vinfo->hwfmt[0] has a used-bit corresponding to each of these.
+ * relation to TGSI_SEMANTIC_*:
+ * - POSITION: position (always used)
+ * - COLOR: col1, col0
+ * - GENERIC: tex3, tex2, tex1, tex0, normal, weight
+ * - FOG: fog
+ */
+
+ for (i = 0; i < fp->info.num_inputs; i++) {
+ int isn = fp->info.input_semantic_name[i];
+ int isi = fp->info.input_semantic_index[i];
+ switch (isn) {
+ case TGSI_SEMANTIC_POSITION:
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ assert(isi < 2);
+ colors[isi] = TRUE;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ assert(isi < 12);
+ generics[isi] = TRUE;
+ break;
+ case TGSI_SEMANTIC_FOG:
+ fog = TRUE;
+ break;
+ default:
+ assert(0 && "unknown input_semantic_name");
+ }
+ }
+
+ /* always do position */ {
+ src = draw_find_vs_output(dc, TGSI_SEMANTIC_POSITION, 0);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
+ vinfo->hwfmt[0] |= (1 << 0);
+ }
+
+ /* two unnamed generics */
+ for (i = 4; i < 6; i++) {
+ if (!generics[i])
+ continue;
+ src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ vinfo->hwfmt[0] |= (1 << (i - 3));
+ }
+
+ if (colors[0]) {
+ src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 0);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
+ vinfo->hwfmt[0] |= (1 << 3);
+ }
+
+ if (colors[1]) {
+ src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 1);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
+ vinfo->hwfmt[0] |= (1 << 4);
+ }
+
+ /* four unnamed generics */
+ for (i = 6; i < 10; i++) {
+ if (!generics[i])
+ continue;
+ src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ vinfo->hwfmt[0] |= (1 << (i - 1));
+ }
+
+ /* tex0, tex1, tex2, tex3 */
+ for (i = 0; i < 4; i++) {
+ if (!generics[i])
+ continue;
+ src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ vinfo->hwfmt[0] |= (1 << (i + 9));
+ }
+
+ /* two unnamed generics */
+ for (i = 10; i < 12; i++) {
+ if (!generics[i])
+ continue;
+ src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ vinfo->hwfmt[0] |= (1 << (i + 3));
+ }
+
+ if (fog) {
+ src = draw_find_vs_output(dc, TGSI_SEMANTIC_FOG, 0);
+ draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
+ vinfo->hwfmt[0] |= (1 << 15);
+ }
+
+ draw_compute_vertex_size(vinfo);
+}
+
+void
+nv20_emit_hw_state(struct nv20_context *nv20)
+{
+ int i;
+
+ if (nv20->dirty & NV20_NEW_VERTPROG) {
+ //nv20_vertprog_bind(nv20, nv20->vertprog.current);
+ nv20->dirty &= ~NV20_NEW_VERTPROG;
+ }
+
+ if (nv20->dirty & NV20_NEW_FRAGPROG) {
+ nv20_fragprog_bind(nv20, nv20->fragprog.current);
+ /*XXX: clear NV20_NEW_FRAGPROG if no new program uploaded */
+ nv20->dirty_samplers |= (1<<10);
+ nv20->dirty_samplers = 0;
+ }
+
+ if (nv20->dirty_samplers || (nv20->dirty & NV20_NEW_FRAGPROG)) {
+ nv20_fragtex_bind(nv20);
+ nv20->dirty &= ~NV20_NEW_FRAGPROG;
+ }
+
+ if (nv20->dirty & NV20_NEW_VTXARRAYS) {
+ nv20->dirty &= ~NV20_NEW_VTXARRAYS;
+ nv20_vertex_layout(nv20);
+ nv20_vtxbuf_bind(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_BLEND) {
+ nv20->dirty &= ~NV20_NEW_BLEND;
+ nv20_state_emit_blend(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_BLENDCOL) {
+ nv20->dirty &= ~NV20_NEW_BLENDCOL;
+ nv20_state_emit_blend_color(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_RAST) {
+ nv20->dirty &= ~NV20_NEW_RAST;
+ nv20_state_emit_rast(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_DSA) {
+ nv20->dirty &= ~NV20_NEW_DSA;
+ nv20_state_emit_dsa(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_VIEWPORT) {
+ nv20->dirty &= ~NV20_NEW_VIEWPORT;
+ nv20_state_emit_viewport(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_SCISSOR) {
+ nv20->dirty &= ~NV20_NEW_SCISSOR;
+ nv20_state_emit_scissor(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_FRAMEBUFFER) {
+ nv20->dirty &= ~NV20_NEW_FRAMEBUFFER;
+ nv20_state_emit_framebuffer(nv20);
+ }
+
+ /* Emit relocs for every referenced buffer.
+ * This is to ensure the bufmgr has an accurate idea of how
+ * the buffer is used. This isn't very efficient, but we don't
+ * seem to take a significant performance hit. Will be improved
+ * at some point. Vertex arrays are emitted by nv20_vbo.c
+ */
+
+ /* Render target */
+ BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 1);
+ OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ if (nv20->zeta) {
+ BEGIN_RING(kelvin, NV20TCL_DMA_ZETA, 1);
+ OUT_RELOCo(nv20->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(kelvin, NV20TCL_ZETA_OFFSET, 1);
+ OUT_RELOCl(nv20->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ /* XXX for when we allocate LMA on nv17 */
+/* BEGIN_RING(kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
+ OUT_RELOCl(nv20->zeta + lma_offset);*/
+ }
+
+ /* Vertex buffer */
+ BEGIN_RING(kelvin, NV20TCL_DMA_VTXBUF0, 1);
+ OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ /* Texture images */
+ for (i = 0; i < 2; i++) {
+ if (!(nv20->fp_samplers & (1 << i)))
+ continue;
+ BEGIN_RING(kelvin, NV20TCL_TX_OFFSET(i), 1);
+ OUT_RELOCl(nv20->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ BEGIN_RING(kelvin, NV20TCL_TX_FORMAT(i), 1);
+ OUT_RELOCd(nv20->tex[i].buffer, nv20->tex[i].format,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
+ NOUVEAU_BO_OR, NV20TCL_TX_FORMAT_DMA0,
+ NV20TCL_TX_FORMAT_DMA1);
+ }
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_surface.c b/src/gallium/drivers/nv20/nv20_surface.c
new file mode 100644
index 0000000000..6cd607583c
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_surface.c
@@ -0,0 +1,72 @@
+
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nv20_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/internal/p_winsys_screen.h"
+#include "pipe/p_inlines.h"
+#include "util/u_tile.h"
+
+static void
+nv20_surface_copy(struct pipe_context *pipe, boolean do_flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ struct nv04_surface_2d *eng2d = nv20->screen->eng2d;
+
+ if (do_flip) {
+ desty += height;
+ while (height--) {
+ eng2d->copy(eng2d, dest, destx, desty--, src,
+ srcx, srcy++, width, 1);
+ }
+ return;
+ }
+
+ eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height);
+}
+
+static void
+nv20_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ struct nv04_surface_2d *eng2d = nv20->screen->eng2d;
+
+ eng2d->fill(eng2d, dest, destx, desty, width, height, value);
+}
+
+void
+nv20_init_surface_functions(struct nv20_context *nv20)
+{
+ nv20->pipe.surface_copy = nv20_surface_copy;
+ nv20->pipe.surface_fill = nv20_surface_fill;
+}
diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c
new file mode 100644
index 0000000000..24d8f4bef0
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_vbo.c
@@ -0,0 +1,78 @@
+#include "draw/draw_context.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv20_context.h"
+#include "nv20_state.h"
+
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_pushbuf.h"
+
+boolean nv20_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count)
+{
+ struct nv20_context *nv20 = nv20_context( pipe );
+ struct draw_context *draw = nv20->draw;
+ unsigned i;
+
+ nv20_emit_hw_state(nv20);
+
+ /*
+ * Map vertex buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv20->vtxbuf[i].buffer) {
+ void *buf
+ = pipe->winsys->buffer_map(pipe->winsys,
+ nv20->vtxbuf[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(draw, i, buf);
+ }
+ }
+ /* Map index buffer, if present */
+ if (indexBuffer) {
+ void *mapped_indexes
+ = pipe->winsys->buffer_map(pipe->winsys, indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes);
+ }
+ else {
+ /* no index/element buffer */
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ draw_set_mapped_constant_buffer(draw,
+ nv20->constbuf[PIPE_SHADER_VERTEX],
+ nv20->constbuf_nr[PIPE_SHADER_VERTEX]);
+
+ /* draw! */
+ draw_arrays(nv20->draw, prim, start, count);
+
+ /*
+ * unmap vertex/index buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv20->vtxbuf[i].buffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, nv20->vtxbuf[i].buffer);
+ draw_set_mapped_vertex_buffer(draw, i, NULL);
+ }
+ }
+ if (indexBuffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer);
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ draw_flush(nv20->draw);
+ return TRUE;
+}
+
+boolean nv20_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
+{
+ return nv20_draw_elements(pipe, NULL, 0, prim, start, count);
+}
+
+
+
diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c
new file mode 100644
index 0000000000..5db0e807ff
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_vertprog.c
@@ -0,0 +1,838 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "nv20_context.h"
+#include "nv20_state.h"
+
+/* TODO (at least...):
+ * 1. Indexed consts + ARL
+ * 2. Arb. swz/negation
+ * 3. NV_vp11, NV_vp2, NV_vp3 features
+ * - extra arith opcodes
+ * - branching
+ * - texture sampling
+ * - indexed attribs
+ * - indexed results
+ * 4. bugs
+ */
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 8
+#define MASK_Y 4
+#define MASK_Z 2
+#define MASK_W 1
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE 0
+#define DEF_CTEST 0
+#include "nv20_shader.h"
+
+#define swz(s,x,y,z,w) nv20_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv20_sr_neg((s))
+#define abs(s) nv20_sr_abs((s))
+
+struct nv20_vpc {
+ struct nv20_vertex_program *vp;
+
+ struct nv20_vertex_program_exec *vpi;
+
+ unsigned output_map[PIPE_MAX_SHADER_OUTPUTS];
+
+ int high_temp;
+ int temp_temp_count;
+
+ struct nv20_sreg *imm;
+ unsigned nr_imm;
+};
+
+static struct nv20_sreg
+temp(struct nv20_vpc *vpc)
+{
+ int idx;
+
+ idx = vpc->temp_temp_count++;
+ idx += vpc->high_temp + 1;
+ return nv20_sr(NV30SR_TEMP, idx);
+}
+
+static struct nv20_sreg
+constant(struct nv20_vpc *vpc, int pipe, float x, float y, float z, float w)
+{
+ struct nv20_vertex_program *vp = vpc->vp;
+ struct nv20_vertex_program_data *vpd;
+ int idx;
+
+ if (pipe >= 0) {
+ for (idx = 0; idx < vp->nr_consts; idx++) {
+ if (vp->consts[idx].index == pipe)
+ return nv20_sr(NV30SR_CONST, idx);
+ }
+ }
+
+ idx = vp->nr_consts++;
+ vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
+ vpd = &vp->consts[idx];
+
+ vpd->index = pipe;
+ vpd->value[0] = x;
+ vpd->value[1] = y;
+ vpd->value[2] = z;
+ vpd->value[3] = w;
+ return nv20_sr(NV30SR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nv20_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2))
+
+static void
+emit_src(struct nv20_vpc *vpc, uint32_t *hw, int pos, struct nv20_sreg src)
+{
+ struct nv20_vertex_program *vp = vpc->vp;
+ uint32_t sr = 0;
+
+ switch (src.type) {
+ case NV30SR_TEMP:
+ sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT);
+ sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT);
+ break;
+ case NV30SR_INPUT:
+ sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ vp->ir |= (1 << src.index);
+ hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT);
+ break;
+ case NV30SR_CONST:
+ sr |= (NV30_VP_SRC_REG_TYPE_CONST <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ assert(vpc->vpi->const_index == -1 ||
+ vpc->vpi->const_index == src.index);
+ vpc->vpi->const_index = src.index;
+ break;
+ case NV30SR_NONE:
+ sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (src.negate)
+ sr |= NV30_VP_SRC_NEGATE;
+
+ if (src.abs)
+ hw[0] |= (1 << (21 + pos));
+
+ sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) |
+ (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) |
+ (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) |
+ (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT));
+
+/*
+ * |VVV|
+ * d�.�b
+ * \u/
+ *
+ */
+
+ switch (pos) {
+ case 0:
+ hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >>
+ NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT;
+ hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) <<
+ NV30_VP_INST_SRC0L_SHIFT;
+ break;
+ case 1:
+ hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT;
+ break;
+ case 2:
+ hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >>
+ NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT;
+ hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) <<
+ NV30_VP_INST_SRC2L_SHIFT;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+emit_dst(struct nv20_vpc *vpc, uint32_t *hw, int slot, struct nv20_sreg dst)
+{
+ struct nv20_vertex_program *vp = vpc->vp;
+
+ switch (dst.type) {
+ case NV30SR_TEMP:
+ hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);
+ break;
+ case NV30SR_OUTPUT:
+ switch (dst.index) {
+ case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
+ case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
+ case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
+ case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
+ case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break;
+ case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
+ case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
+ case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
+ case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
+ case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
+ case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
+ case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
+ case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
+ case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
+ default:
+ break;
+ }
+
+ hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT);
+ hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);
+
+ /*XXX: no way this is entirely correct, someone needs to
+ * figure out what exactly it is.
+ */
+ hw[3] |= 0x800;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+nv20_vp_arith(struct nv20_vpc *vpc, int slot, int op,
+ struct nv20_sreg dst, int mask,
+ struct nv20_sreg s0, struct nv20_sreg s1,
+ struct nv20_sreg s2)
+{
+ struct nv20_vertex_program *vp = vpc->vp;
+ uint32_t *hw;
+
+ vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
+ vpc->vpi = &vp->insns[vp->nr_insns - 1];
+ memset(vpc->vpi, 0, sizeof(*vpc->vpi));
+ vpc->vpi->const_index = -1;
+
+ hw = vpc->vpi->data;
+
+ hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT);
+ hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) |
+ (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) |
+ (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) |
+ (3 << NV30_VP_INST_COND_SWZ_W_SHIFT));
+
+ hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
+// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK;
+// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT);
+
+ if (dst.type == NV30SR_OUTPUT) {
+ if (slot)
+ hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);
+ else
+ hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT);
+ } else {
+ if (slot)
+ hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT);
+ else
+ hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT);
+ }
+
+ emit_dst(vpc, hw, slot, dst);
+ emit_src(vpc, hw, 0, s0);
+ emit_src(vpc, hw, 1, s1);
+ emit_src(vpc, hw, 2, s2);
+}
+
+static INLINE struct nv20_sreg
+tgsi_src(struct nv20_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
+ struct nv20_sreg src;
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ src = nv20_sr(NV30SR_INPUT, fsrc->SrcRegister.Index);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ src = vpc->imm[fsrc->SrcRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ if (vpc->high_temp < fsrc->SrcRegister.Index)
+ vpc->high_temp = fsrc->SrcRegister.Index;
+ src = nv20_sr(NV30SR_TEMP, fsrc->SrcRegister.Index);
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->SrcRegisterExtMod.Absolute;
+ src.negate = fsrc->SrcRegister.Negate;
+ src.swz[0] = fsrc->SrcRegister.SwizzleX;
+ src.swz[1] = fsrc->SrcRegister.SwizzleY;
+ src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+ src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ return src;
+}
+
+static INLINE struct nv20_sreg
+tgsi_dst(struct nv20_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
+ struct nv20_sreg dst;
+
+ switch (fdst->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ dst = nv20_sr(NV30SR_OUTPUT,
+ vpc->output_map[fdst->DstRegister.Index]);
+
+ break;
+ case TGSI_FILE_TEMPORARY:
+ dst = nv20_sr(NV30SR_TEMP, fdst->DstRegister.Index);
+ if (vpc->high_temp < dst.index)
+ vpc->high_temp = dst.index;
+ break;
+ default:
+ NOUVEAU_ERR("bad dst file\n");
+ break;
+ }
+
+ return dst;
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+ return mask;
+}
+
+static boolean
+nv20_vertprog_parse_instruction(struct nv20_vpc *vpc,
+ const struct tgsi_full_instruction *finst)
+{
+ struct nv20_sreg src[3], dst, tmp;
+ struct nv20_sreg none = nv20_sr(NV30SR_NONE, 0);
+ int mask;
+ int ai = -1, ci = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ vpc->temp_temp_count = 0;
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(vpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+ ai = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ /*XXX: index comparison is broken now that consts come from
+ * two different register files.
+ */
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_IMMEDIATE:
+ if (ci == -1 || ci == fsrc->SrcRegister.Index) {
+ ci = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
+ mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]);
+ break;
+ case TGSI_OPCODE_ARL:
+ arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_EXP:
+ arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LIT:
+ arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_POW:
+ tmp = temp(vpc);
+ arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none,
+ swz(src[0], X, X, X, X));
+ arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+ swz(src[1], X, X, X, X), none);
+ arith(vpc, 1, OP_EX2, dst, mask, none, none,
+ swz(tmp, X, X, X, X));
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_RET:
+ break;
+ case TGSI_OPCODE_RSQ:
+ arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGT:
+ arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1]));
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(vpc);
+ arith(vpc, 0, OP_MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ int hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NV30_VP_INST_DEST_POS;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV30_VP_INST_DEST_COL0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV30_VP_INST_DEST_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV30_VP_INST_DEST_BFC0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV30_VP_INST_DEST_BFC1;
+ } else {
+ NOUVEAU_ERR("bad bcolour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NV30_VP_INST_DEST_FOGC;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ hw = NV30_VP_INST_DEST_PSZ;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.SemanticIndex <= 7) {
+ hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ vpc->output_map[fdec->DeclarationRange.First] = hw;
+ return TRUE;
+}
+
+static boolean
+nv20_vertprog_prepare(struct nv20_vpc *vpc)
+{
+ struct tgsi_parse_context p;
+ int nr_imm = 0;
+
+ tgsi_parse_init(&p, vpc->vp->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch(tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ nr_imm++;
+ break;
+ default:
+ break;
+ }
+ }
+ tgsi_parse_free(&p);
+
+ if (nr_imm) {
+ vpc->imm = CALLOC(nr_imm, sizeof(struct nv20_sreg));
+ assert(vpc->imm);
+ }
+
+ return TRUE;
+}
+
+static void
+nv20_vertprog_translate(struct nv20_context *nv20,
+ struct nv20_vertex_program *vp)
+{
+ struct tgsi_parse_context parse;
+ struct nv20_vpc *vpc = NULL;
+
+ tgsi_dump(vp->pipe.tokens,0);
+
+ vpc = CALLOC(1, sizeof(struct nv20_vpc));
+ if (!vpc)
+ return;
+ vpc->vp = vp;
+ vpc->high_temp = -1;
+
+ if (!nv20_vertprog_prepare(vpc)) {
+ FREE(vpc);
+ return;
+ }
+
+ tgsi_parse_init(&parse, vp->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+ fdec = &parse.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_OUTPUT:
+ if (!nv20_vertprog_parse_decl_output(vpc, fdec))
+ goto out_err;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const struct tgsi_full_immediate *imm;
+
+ imm = &parse.FullToken.FullImmediate;
+ assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+ assert(imm->Immediate.NrTokens == 4 + 1);
+ vpc->imm[vpc->nr_imm++] =
+ constant(vpc, -1,
+ imm->u.ImmediateFloat32[0].Float,
+ imm->u.ImmediateFloat32[1].Float,
+ imm->u.ImmediateFloat32[2].Float,
+ imm->u.ImmediateFloat32[3].Float);
+ }
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+ finst = &parse.FullToken.FullInstruction;
+ if (!nv20_vertprog_parse_instruction(vpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST;
+ vp->translated = TRUE;
+out_err:
+ tgsi_parse_free(&parse);
+ FREE(vpc);
+}
+
+static boolean
+nv20_vertprog_validate(struct nv20_context *nv20)
+{
+ struct nouveau_winsys *nvws = nv20->nvws;
+ struct pipe_winsys *ws = nv20->pipe.winsys;
+ struct nouveau_grobj *rankine = nv20->screen->rankine;
+ struct nv20_vertex_program *vp;
+ struct pipe_buffer *constbuf;
+ boolean upload_code = FALSE, upload_data = FALSE;
+ int i;
+
+ vp = nv20->vertprog;
+ constbuf = nv20->constbuf[PIPE_SHADER_VERTEX];
+
+ /* Translate TGSI shader into hw bytecode */
+ if (!vp->translated) {
+ nv20_vertprog_translate(nv20, vp);
+ if (!vp->translated)
+ return FALSE;
+ }
+
+ /* Allocate hw vtxprog exec slots */
+ if (!vp->exec) {
+ struct nouveau_resource *heap = nv20->screen->vp_exec_heap;
+ struct nouveau_stateobj *so;
+ uint vplen = vp->nr_insns;
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) {
+ while (heap->next && heap->size < vplen) {
+ struct nv20_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->exec);
+ }
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec))
+ assert(0);
+ }
+
+ so = so_new(2, 0);
+ so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1);
+ so_data (so, vp->exec->start);
+ so_ref(so, &vp->so);
+
+ upload_code = TRUE;
+ }
+
+ /* Allocate hw vtxprog const slots */
+ if (vp->nr_consts && !vp->data) {
+ struct nouveau_resource *heap = nv20->screen->vp_data_heap;
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) {
+ while (heap->next && heap->size < vp->nr_consts) {
+ struct nv20_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->data);
+ }
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data))
+ assert(0);
+ }
+
+ /*XXX: handle this some day */
+ assert(vp->data->start >= vp->data_start_min);
+
+ upload_data = TRUE;
+ if (vp->data_start != vp->data->start)
+ upload_code = TRUE;
+ }
+
+ /* If exec or data segments moved we need to patch the program to
+ * fixup offsets and register IDs.
+ */
+ if (vp->exec_start != vp->exec->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv20_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->has_branch_offset) {
+ assert(0);
+ }
+ }
+
+ vp->exec_start = vp->exec->start;
+ }
+
+ if (vp->nr_consts && vp->data_start != vp->data->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv20_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->const_index >= 0) {
+ vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK;
+ vpi->data[1] |=
+ (vpi->const_index + vp->data->start) <<
+ NV30_VP_INST_CONST_SRC_SHIFT;
+
+ }
+ }
+
+ vp->data_start = vp->data->start;
+ }
+
+ /* Update + Upload constant values */
+ if (vp->nr_consts) {
+ float *map = NULL;
+
+ if (constbuf) {
+ map = ws->buffer_map(ws, constbuf,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ }
+
+ for (i = 0; i < vp->nr_consts; i++) {
+ struct nv20_vertex_program_data *vpd = &vp->consts[i];
+
+ if (vpd->index >= 0) {
+ if (!upload_data &&
+ !memcmp(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float)))
+ continue;
+ memcpy(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float));
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5);
+ OUT_RING (i + vp->data->start);
+ OUT_RINGp ((uint32_t *)vpd->value, 4);
+ }
+
+ if (constbuf) {
+ ws->buffer_unmap(ws, constbuf);
+ }
+ }
+
+ /* Upload vtxprog */
+ if (upload_code) {
+#if 0
+ for (i = 0; i < vp->nr_insns; i++) {
+ NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+ i, vp->insns[i].data[0], vp->insns[i].data[1],
+ vp->insns[i].data[2], vp->insns[i].data[3]);
+ }
+#endif
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1);
+ OUT_RING (vp->exec->start);
+ for (i = 0; i < vp->nr_insns; i++) {
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4);
+ OUT_RINGp (vp->insns[i].data, 4);
+ }
+ }
+
+ if (vp->so != nv20->state.hw[NV30_STATE_VERTPROG]) {
+ so_ref(vp->so, &nv20->state.hw[NV30_STATE_VERTPROG]);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void
+nv20_vertprog_destroy(struct nv20_context *nv20, struct nv20_vertex_program *vp)
+{
+ struct nouveau_winsys *nvws = nv20->screen->nvws;
+
+ vp->translated = FALSE;
+
+ if (vp->nr_insns) {
+ FREE(vp->insns);
+ vp->insns = NULL;
+ vp->nr_insns = 0;
+ }
+
+ if (vp->nr_consts) {
+ FREE(vp->consts);
+ vp->consts = NULL;
+ vp->nr_consts = 0;
+ }
+
+ nvws->res_free(&vp->exec);
+ vp->exec_start = 0;
+ nvws->res_free(&vp->data);
+ vp->data_start = 0;
+ vp->data_start_min = 0;
+
+ vp->ir = vp->or = 0;
+ so_ref(NULL, &vp->so);
+}
+
+struct nv20_state_entry nv20_state_vertprog = {
+ .validate = nv20_vertprog_validate,
+ .dirty = {
+ .pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/,
+ .hw = NV30_STATE_VERTPROG,
+ }
+};
diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile
new file mode 100644
index 0000000000..4c29e2eab3
--- /dev/null
+++ b/src/gallium/drivers/nv30/Makefile
@@ -0,0 +1,28 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv30
+
+C_SOURCES = \
+ nv30_clear.c \
+ nv30_context.c \
+ nv30_draw.c \
+ nv30_fragprog.c \
+ nv30_fragtex.c \
+ nv30_miptree.c \
+ nv30_query.c \
+ nv30_screen.c \
+ nv30_state.c \
+ nv30_state_blend.c \
+ nv30_state_emit.c \
+ nv30_state_fb.c \
+ nv30_state_rasterizer.c \
+ nv30_state_scissor.c \
+ nv30_state_stipple.c \
+ nv30_state_viewport.c \
+ nv30_state_zsa.c \
+ nv30_surface.c \
+ nv30_vbo.c \
+ nv30_vertprog.c
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/nv30/nv30_clear.c b/src/gallium/drivers/nv30/nv30_clear.c
new file mode 100644
index 0000000000..8c3ca204d5
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_clear.c
@@ -0,0 +1,13 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv30_context.h"
+
+void
+nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+ ps->status = PIPE_SURFACE_STATUS_CLEAR;
+}
diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c
new file mode 100644
index 0000000000..61654f8756
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_context.c
@@ -0,0 +1,72 @@
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/internal/p_winsys_screen.h"
+
+#include "nv30_context.h"
+#include "nv30_screen.h"
+
+static void
+nv30_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
+ BEGIN_RING(rankine, 0x1fd8, 1);
+ OUT_RING (2);
+ BEGIN_RING(rankine, 0x1fd8, 1);
+ OUT_RING (1);
+ }
+
+ FIRE_RING(fence);
+}
+
+static void
+nv30_destroy(struct pipe_context *pipe)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ if (nv30->draw)
+ draw_destroy(nv30->draw);
+ FREE(nv30);
+}
+
+struct pipe_context *
+nv30_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct nv30_screen *screen = nv30_screen(pscreen);
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv30_context *nv30;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nv30 = CALLOC(1, sizeof(struct nv30_context));
+ if (!nv30)
+ return NULL;
+ nv30->screen = screen;
+ nv30->pctx_id = pctx_id;
+
+ nv30->nvws = nvws;
+
+ nv30->pipe.winsys = ws;
+ nv30->pipe.screen = pscreen;
+ nv30->pipe.destroy = nv30_destroy;
+ nv30->pipe.draw_arrays = nv30_draw_arrays;
+ nv30->pipe.draw_elements = nv30_draw_elements;
+ nv30->pipe.clear = nv30_clear;
+ nv30->pipe.flush = nv30_flush;
+
+ nv30_init_query_functions(nv30);
+ nv30_init_surface_functions(nv30);
+ nv30_init_state_functions(nv30);
+
+ /* Create, configure, and install fallback swtnl path */
+ nv30->draw = draw_create();
+ draw_wide_point_threshold(nv30->draw, 9999999.0);
+ draw_wide_line_threshold(nv30->draw, 9999999.0);
+ draw_enable_line_stipple(nv30->draw, FALSE);
+ draw_enable_point_sprites(nv30->draw, FALSE);
+ draw_set_rasterize_stage(nv30->draw, nv30_draw_render_stage(nv30));
+
+ return &nv30->pipe;
+}
+
diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h
new file mode 100644
index 0000000000..b933769700
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_context.h
@@ -0,0 +1,212 @@
+#ifndef __NV30_CONTEXT_H__
+#define __NV30_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx) \
+ struct nv30_screen *ctx = nv30->screen
+#include "nouveau/nouveau_push.h"
+#include "nouveau/nouveau_stateobj.h"
+
+#include "nv30_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+enum nv30_state_index {
+ NV30_STATE_FB = 0,
+ NV30_STATE_VIEWPORT = 1,
+ NV30_STATE_BLEND = 2,
+ NV30_STATE_RAST = 3,
+ NV30_STATE_ZSA = 4,
+ NV30_STATE_BCOL = 5,
+ NV30_STATE_CLIP = 6,
+ NV30_STATE_SCISSOR = 7,
+ NV30_STATE_STIPPLE = 8,
+ NV30_STATE_FRAGPROG = 9,
+ NV30_STATE_VERTPROG = 10,
+ NV30_STATE_FRAGTEX0 = 11,
+ NV30_STATE_FRAGTEX1 = 12,
+ NV30_STATE_FRAGTEX2 = 13,
+ NV30_STATE_FRAGTEX3 = 14,
+ NV30_STATE_FRAGTEX4 = 15,
+ NV30_STATE_FRAGTEX5 = 16,
+ NV30_STATE_FRAGTEX6 = 17,
+ NV30_STATE_FRAGTEX7 = 18,
+ NV30_STATE_FRAGTEX8 = 19,
+ NV30_STATE_FRAGTEX9 = 20,
+ NV30_STATE_FRAGTEX10 = 21,
+ NV30_STATE_FRAGTEX11 = 22,
+ NV30_STATE_FRAGTEX12 = 23,
+ NV30_STATE_FRAGTEX13 = 24,
+ NV30_STATE_FRAGTEX14 = 25,
+ NV30_STATE_FRAGTEX15 = 26,
+ NV30_STATE_VERTTEX0 = 27,
+ NV30_STATE_VERTTEX1 = 28,
+ NV30_STATE_VERTTEX2 = 29,
+ NV30_STATE_VERTTEX3 = 30,
+ NV30_STATE_VTXBUF = 31,
+ NV30_STATE_VTXFMT = 32,
+ NV30_STATE_VTXATTR = 33,
+ NV30_STATE_MAX = 34
+};
+
+#include "nv30_screen.h"
+
+#define NV30_NEW_BLEND (1 << 0)
+#define NV30_NEW_RAST (1 << 1)
+#define NV30_NEW_ZSA (1 << 2)
+#define NV30_NEW_SAMPLER (1 << 3)
+#define NV30_NEW_FB (1 << 4)
+#define NV30_NEW_STIPPLE (1 << 5)
+#define NV30_NEW_SCISSOR (1 << 6)
+#define NV30_NEW_VIEWPORT (1 << 7)
+#define NV30_NEW_BCOL (1 << 8)
+#define NV30_NEW_VERTPROG (1 << 9)
+#define NV30_NEW_FRAGPROG (1 << 10)
+#define NV30_NEW_ARRAYS (1 << 11)
+#define NV30_NEW_UCP (1 << 12)
+
+struct nv30_rasterizer_state {
+ struct pipe_rasterizer_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv30_zsa_state {
+ struct pipe_depth_stencil_alpha_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv30_blend_state {
+ struct pipe_blend_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+
+struct nv30_state {
+ unsigned scissor_enabled;
+ unsigned stipple_enabled;
+ unsigned viewport_bypass;
+ unsigned fp_samplers;
+
+ uint64_t dirty;
+ struct nouveau_stateobj *hw[NV30_STATE_MAX];
+};
+
+struct nv30_context {
+ struct pipe_context pipe;
+
+ struct nouveau_winsys *nvws;
+ struct nv30_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ /* HW state derived from pipe states */
+ struct nv30_state state;
+
+ /* Context state */
+ unsigned dirty;
+ struct pipe_scissor_state scissor;
+ unsigned stipple[32];
+ struct nv30_vertex_program *vertprog;
+ struct nv30_fragment_program *fragprog;
+ struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
+ unsigned constbuf_nr[PIPE_SHADER_TYPES];
+ struct nv30_rasterizer_state *rasterizer;
+ struct nv30_zsa_state *zsa;
+ struct nv30_blend_state *blend;
+ struct pipe_blend_color blend_colour;
+ struct pipe_viewport_state viewport;
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_buffer *idxbuf;
+ unsigned idxbuf_format;
+ struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
+ struct nv30_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
+ unsigned nr_samplers;
+ unsigned nr_textures;
+ unsigned dirty_samplers;
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ unsigned vtxbuf_nr;
+ struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
+ unsigned vtxelt_nr;
+ const unsigned *edgeflags;
+};
+
+static INLINE struct nv30_context *
+nv30_context(struct pipe_context *pipe)
+{
+ return (struct nv30_context *)pipe;
+}
+
+struct nv30_state_entry {
+ boolean (*validate)(struct nv30_context *nv30);
+ struct {
+ unsigned pipe;
+ unsigned hw;
+ } dirty;
+};
+
+extern void nv30_init_state_functions(struct nv30_context *nv30);
+extern void nv30_init_surface_functions(struct nv30_context *nv30);
+extern void nv30_init_query_functions(struct nv30_context *nv30);
+
+extern void nv30_screen_init_miptree_functions(struct pipe_screen *pscreen);
+
+/* nv30_draw.c */
+extern struct draw_stage *nv30_draw_render_stage(struct nv30_context *nv30);
+
+/* nv30_vertprog.c */
+extern void nv30_vertprog_destroy(struct nv30_context *,
+ struct nv30_vertex_program *);
+
+/* nv30_fragprog.c */
+extern void nv30_fragprog_destroy(struct nv30_context *,
+ struct nv30_fragment_program *);
+
+/* nv30_fragtex.c */
+extern void nv30_fragtex_bind(struct nv30_context *);
+
+/* nv30_state.c and friends */
+extern boolean nv30_state_validate(struct nv30_context *nv30);
+extern void nv30_state_emit(struct nv30_context *nv30);
+extern struct nv30_state_entry nv30_state_rasterizer;
+extern struct nv30_state_entry nv30_state_scissor;
+extern struct nv30_state_entry nv30_state_stipple;
+extern struct nv30_state_entry nv30_state_fragprog;
+extern struct nv30_state_entry nv30_state_vertprog;
+extern struct nv30_state_entry nv30_state_blend;
+extern struct nv30_state_entry nv30_state_blend_colour;
+extern struct nv30_state_entry nv30_state_zsa;
+extern struct nv30_state_entry nv30_state_viewport;
+extern struct nv30_state_entry nv30_state_framebuffer;
+extern struct nv30_state_entry nv30_state_fragtex;
+extern struct nv30_state_entry nv30_state_vbo;
+
+/* nv30_vbo.c */
+extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv30_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start,
+ unsigned count);
+
+/* nv30_clear.c */
+extern void nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+#endif
diff --git a/src/gallium/drivers/nv30/nv30_draw.c b/src/gallium/drivers/nv30/nv30_draw.c
new file mode 100644
index 0000000000..74fc138c05
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_draw.c
@@ -0,0 +1,61 @@
+#include "draw/draw_pipe.h"
+
+#include "nv30_context.h"
+
+struct nv30_draw_stage {
+ struct draw_stage draw;
+ struct nv30_context *nv30;
+};
+
+static void
+nv30_draw_point(struct draw_stage *draw, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv30_draw_line(struct draw_stage *draw, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv30_draw_tri(struct draw_stage *draw, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv30_draw_flush(struct draw_stage *draw, unsigned flags)
+{
+}
+
+static void
+nv30_draw_reset_stipple_counter(struct draw_stage *draw)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv30_draw_destroy(struct draw_stage *draw)
+{
+ FREE(draw);
+}
+
+struct draw_stage *
+nv30_draw_render_stage(struct nv30_context *nv30)
+{
+ struct nv30_draw_stage *nv30draw = CALLOC_STRUCT(nv30_draw_stage);
+
+ nv30draw->nv30 = nv30;
+ nv30draw->draw.draw = nv30->draw;
+ nv30draw->draw.point = nv30_draw_point;
+ nv30draw->draw.line = nv30_draw_line;
+ nv30draw->draw.tri = nv30_draw_tri;
+ nv30draw->draw.flush = nv30_draw_flush;
+ nv30draw->draw.reset_stipple_counter = nv30_draw_reset_stipple_counter;
+ nv30draw->draw.destroy = nv30_draw_destroy;
+
+ return &nv30draw->draw;
+}
+
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
new file mode 100644
index 0000000000..320ba3f4bf
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -0,0 +1,911 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv30_context.h"
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 1
+#define MASK_Y 2
+#define MASK_Z 4
+#define MASK_W 8
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X
+#define DEF_CTEST NV30_FP_OP_COND_TR
+#include "nv30_shader.h"
+
+#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv30_sr_neg((s))
+#define abs(s) nv30_sr_abs((s))
+#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v)
+
+#define MAX_CONSTS 128
+#define MAX_IMM 32
+struct nv30_fpc {
+ struct nv30_fragment_program *fp;
+
+ uint attrib_map[PIPE_MAX_SHADER_INPUTS];
+
+ int high_temp;
+ int temp_temp_count;
+ int num_regs;
+
+ uint depth_id;
+ uint colour_id;
+
+ unsigned inst_offset;
+
+ struct {
+ int pipe;
+ float vals[4];
+ } consts[MAX_CONSTS];
+ int nr_consts;
+
+ struct nv30_sreg imm[MAX_IMM];
+ unsigned nr_imm;
+};
+
+static INLINE struct nv30_sreg
+temp(struct nv30_fpc *fpc)
+{
+ int idx;
+
+ idx = fpc->temp_temp_count++;
+ idx += fpc->high_temp + 1;
+ return nv30_sr(NV30SR_TEMP, idx);
+}
+
+static INLINE struct nv30_sreg
+constant(struct nv30_fpc *fpc, int pipe, float vals[4])
+{
+ int idx;
+
+ if (fpc->nr_consts == MAX_CONSTS)
+ assert(0);
+ idx = fpc->nr_consts++;
+
+ fpc->consts[idx].pipe = pipe;
+ if (pipe == -1)
+ memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
+ return nv30_sr(NV30SR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \
+ (d), (m), (s0), (s1), (s2))
+#define tex(cc,s,o,u,d,m,s0,s1,s2) \
+ nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \
+ (d), (m), (s0), none, none)
+
+static void
+grow_insns(struct nv30_fpc *fpc, int size)
+{
+ struct nv30_fragment_program *fp = fpc->fp;
+
+ fp->insn_len += size;
+ fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
+}
+
+static void
+emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src)
+{
+ struct nv30_fragment_program *fp = fpc->fp;
+ uint32_t *hw = &fp->insn[fpc->inst_offset];
+ uint32_t sr = 0;
+
+ switch (src.type) {
+ case NV30SR_INPUT:
+ sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
+ hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT);
+ break;
+ case NV30SR_OUTPUT:
+ sr |= NV30_FP_REG_SRC_HALF;
+ /* fall-through */
+ case NV30SR_TEMP:
+ sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT);
+ sr |= (src.index << NV30_FP_REG_SRC_SHIFT);
+ break;
+ case NV30SR_CONST:
+ grow_insns(fpc, 4);
+ hw = &fp->insn[fpc->inst_offset];
+ if (fpc->consts[src.index].pipe >= 0) {
+ struct nv30_fragment_program_data *fpd;
+
+ fp->consts = realloc(fp->consts, ++fp->nr_consts *
+ sizeof(*fpd));
+ fpd = &fp->consts[fp->nr_consts - 1];
+ fpd->offset = fpc->inst_offset + 4;
+ fpd->index = fpc->consts[src.index].pipe;
+ memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
+ } else {
+ memcpy(&fp->insn[fpc->inst_offset + 4],
+ fpc->consts[src.index].vals,
+ sizeof(uint32_t) * 4);
+ }
+
+ sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);
+ break;
+ case NV30SR_NONE:
+ sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (src.negate)
+ sr |= NV30_FP_REG_NEGATE;
+
+ if (src.abs)
+ hw[1] |= (1 << (29 + pos));
+
+ sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) |
+ (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) |
+ (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) |
+ (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT));
+
+ hw[pos + 1] |= sr;
+}
+
+static void
+emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst)
+{
+ struct nv30_fragment_program *fp = fpc->fp;
+ uint32_t *hw = &fp->insn[fpc->inst_offset];
+
+ switch (dst.type) {
+ case NV30SR_TEMP:
+ if (fpc->num_regs < (dst.index + 1))
+ fpc->num_regs = dst.index + 1;
+ break;
+ case NV30SR_OUTPUT:
+ if (dst.index == 1) {
+ fp->fp_control |= 0xe;
+ } else {
+ hw[0] |= NV30_FP_OP_OUT_REG_HALF;
+ }
+ break;
+ case NV30SR_NONE:
+ hw[0] |= (1 << 30);
+ break;
+ default:
+ assert(0);
+ }
+
+ hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT);
+}
+
+static void
+nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op,
+ struct nv30_sreg dst, int mask,
+ struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
+{
+ struct nv30_fragment_program *fp = fpc->fp;
+ uint32_t *hw;
+
+ fpc->inst_offset = fp->insn_len;
+ grow_insns(fpc, 4);
+ hw = &fp->insn[fpc->inst_offset];
+ memset(hw, 0, sizeof(uint32_t) * 4);
+
+ if (op == NV30_FP_OP_OPCODE_KIL)
+ fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
+ hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT);
+ hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT);
+ hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT);
+
+ if (sat)
+ hw[0] |= NV30_FP_OP_OUT_SAT;
+
+ if (dst.cc_update)
+ hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE;
+ hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT);
+ hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) |
+ (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) |
+ (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) |
+ (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT));
+
+ emit_dst(fpc, dst);
+ emit_src(fpc, 0, s0);
+ emit_src(fpc, 1, s1);
+ emit_src(fpc, 2, s2);
+}
+
+static void
+nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit,
+ struct nv30_sreg dst, int mask,
+ struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
+{
+ struct nv30_fragment_program *fp = fpc->fp;
+
+ nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
+
+ fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT);
+ fp->samplers |= (1 << unit);
+}
+
+static INLINE struct nv30_sreg
+tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
+{
+ struct nv30_sreg src;
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ src = nv30_sr(NV30SR_INPUT,
+ fpc->attrib_map[fsrc->SrcRegister.Index]);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(fpc, fsrc->SrcRegister.Index, NULL);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ assert(fsrc->SrcRegister.Index < fpc->nr_imm);
+ src = fpc->imm[fsrc->SrcRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1);
+ if (fpc->high_temp < src.index)
+ fpc->high_temp = src.index;
+ break;
+ /* This is clearly insane, but gallium hands us shaders like this.
+ * Luckily fragprog results are just temp regs..
+ */
+ case TGSI_FILE_OUTPUT:
+ if (fsrc->SrcRegister.Index == fpc->colour_id)
+ return nv30_sr(NV30SR_OUTPUT, 0);
+ else
+ return nv30_sr(NV30SR_OUTPUT, 1);
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->SrcRegisterExtMod.Absolute;
+ src.negate = fsrc->SrcRegister.Negate;
+ src.swz[0] = fsrc->SrcRegister.SwizzleX;
+ src.swz[1] = fsrc->SrcRegister.SwizzleY;
+ src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+ src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ return src;
+}
+
+static INLINE struct nv30_sreg
+tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
+ int idx;
+
+ switch (fdst->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ if (fdst->DstRegister.Index == fpc->colour_id)
+ return nv30_sr(NV30SR_OUTPUT, 0);
+ else
+ return nv30_sr(NV30SR_OUTPUT, 1);
+ break;
+ case TGSI_FILE_TEMPORARY:
+ idx = fdst->DstRegister.Index + 1;
+ if (fpc->high_temp < idx)
+ fpc->high_temp = idx;
+ return nv30_sr(NV30SR_TEMP, idx);
+ case TGSI_FILE_NULL:
+ return nv30_sr(NV30SR_NONE, 0);
+ default:
+ NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File);
+ return nv30_sr(NV30SR_NONE, 0);
+ }
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+ return mask;
+}
+
+static boolean
+src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
+ struct nv30_sreg *src)
+{
+ const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
+ struct nv30_sreg tgsi = tgsi_src(fpc, fsrc);
+ uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
+ uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
+ fsrc->SrcRegisterExtSwz.NegateY,
+ fsrc->SrcRegisterExtSwz.NegateZ,
+ fsrc->SrcRegisterExtSwz.NegateW };
+ uint c;
+
+ for (c = 0; c < 4; c++) {
+ switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ mask |= (1 << c);
+ break;
+ case TGSI_EXTSWIZZLE_ZERO:
+ zero_mask |= (1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ case TGSI_EXTSWIZZLE_ONE:
+ one_mask |= (1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (!tgsi.negate && neg[c])
+ neg_mask |= (1 << c);
+ }
+
+ if (mask == MASK_ALL && !neg_mask)
+ return TRUE;
+
+ *src = temp(fpc);
+
+ if (mask)
+ arith(fpc, 0, MOV, *src, mask, tgsi, none, none);
+
+ if (zero_mask)
+ arith(fpc, 0, SFL, *src, zero_mask, *src, none, none);
+
+ if (one_mask)
+ arith(fpc, 0, STR, *src, one_mask, *src, none, none);
+
+ if (neg_mask) {
+ struct nv30_sreg one = temp(fpc);
+ arith(fpc, 0, STR, one, neg_mask, one, none, none);
+ arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none);
+ }
+
+ return FALSE;
+}
+
+static boolean
+nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
+ const struct tgsi_full_instruction *finst)
+{
+ const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
+ struct nv30_sreg src[3], dst, tmp;
+ int mask, sat, unit = 0;
+ int ai = -1, ci = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ fpc->temp_temp_count = 0;
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(fpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_TEMPORARY:
+ if (!src_native_swz(fpc, fsrc, &src[i]))
+ continue;
+ break;
+ default:
+ break;
+ }
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+ ai = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ NOUVEAU_MSG("extra src attr %d\n",
+ fsrc->SrcRegister.Index);
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_IMMEDIATE:
+ if (ci == -1 || ci == fsrc->SrcRegister.Index) {
+ ci = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ case TGSI_FILE_SAMPLER:
+ unit = fsrc->SrcRegister.Index;
+ break;
+ case TGSI_FILE_OUTPUT:
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]);
+ mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+ sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_CMP:
+ tmp = temp(fpc);
+ arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+ tmp.cc_update = 1;
+ arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+ dst.cc_test = NV30_VP_INST_COND_LT;
+ arith(fpc, sat, MOV, dst, mask, src[1], none, none);
+ break;
+ case TGSI_OPCODE_COS:
+ arith(fpc, sat, COS, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ tmp = temp(fpc);
+ arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none);
+ arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
+ swz(src[1], W, W, W, W), none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(fpc, sat, EX2, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(fpc, sat, FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(fpc, sat, FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_KILP:
+ arith(fpc, 0, KIL, none, 0, none, none, none);
+ break;
+ case TGSI_OPCODE_KIL:
+ dst = nv30_sr(NV30SR_NONE, 0);
+ dst.cc_update = 1;
+ arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
+ dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT;
+ arith(fpc, 0, KIL, dst, 0, none, none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(fpc, sat, LG2, dst, mask, src[0], none, none);
+ break;
+// case TGSI_OPCODE_LIT:
+ case TGSI_OPCODE_LRP:
+ arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(fpc, sat, MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_NOISE1:
+ case TGSI_OPCODE_NOISE2:
+ case TGSI_OPCODE_NOISE3:
+ case TGSI_OPCODE_NOISE4:
+ arith(fpc, sat, SFL, dst, mask, none, none, none);
+ break;
+ case TGSI_OPCODE_POW:
+ arith(fpc, sat, POW, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(fpc, sat, RCP, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_RET:
+ assert(0);
+ break;
+ case TGSI_OPCODE_RFL:
+ arith(fpc, 0, RFL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_RSQ:
+ arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
+ break;
+ case TGSI_OPCODE_SCS:
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ break;
+ case TGSI_OPCODE_SIN:
+ arith(fpc, sat, SIN, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGT:
+ arith(fpc, sat, SGT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
+ break;
+ case TGSI_OPCODE_TEX:
+ tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_TXB:
+ tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_TXP:
+ tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(fpc);
+ arith(fpc, 0, MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(fpc, sat, MAD, dst, (mask & ~MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ int hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NV30_FP_OP_INPUT_SRC_POSITION;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV30_FP_OP_INPUT_SRC_COL0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV30_FP_OP_INPUT_SRC_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NV30_FP_OP_INPUT_SRC_FOGC;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.SemanticIndex <= 7) {
+ hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic.
+ SemanticIndex);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad input semantic\n");
+ return FALSE;
+ }
+
+ fpc->attrib_map[fdec->DeclarationRange.First] = hw;
+ return TRUE;
+}
+
+static boolean
+nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ fpc->depth_id = fdec->DeclarationRange.First;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ fpc->colour_id = fdec->DeclarationRange.First;
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv30_fragprog_prepare(struct nv30_fpc *fpc)
+{
+ struct tgsi_parse_context p;
+ /*int high_temp = -1, i;*/
+
+ tgsi_parse_init(&p, fpc->fp->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch(tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+ fdec = &p.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_INPUT:
+ if (!nv30_fragprog_parse_decl_attrib(fpc, fdec))
+ goto out_err;
+ break;
+ case TGSI_FILE_OUTPUT:
+ if (!nv30_fragprog_parse_decl_output(fpc, fdec))
+ goto out_err;
+ break;
+ /*case TGSI_FILE_TEMPORARY:
+ if (fdec->DeclarationRange.Last > high_temp) {
+ high_temp =
+ fdec->DeclarationRange.Last;
+ }
+ break;*/
+ default:
+ break;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ struct tgsi_full_immediate *imm;
+ float vals[4];
+
+ imm = &p.FullToken.FullImmediate;
+ assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+ assert(fpc->nr_imm < MAX_IMM);
+
+ vals[0] = imm->u.ImmediateFloat32[0].Float;
+ vals[1] = imm->u.ImmediateFloat32[1].Float;
+ vals[2] = imm->u.ImmediateFloat32[2].Float;
+ vals[3] = imm->u.ImmediateFloat32[3].Float;
+ fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ tgsi_parse_free(&p);
+
+ /*if (++high_temp) {
+ fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg));
+ for (i = 0; i < high_temp; i++)
+ fpc->r_temp[i] = temp(fpc);
+ fpc->r_temps_discard = 0;
+ }*/
+
+ return TRUE;
+
+out_err:
+ /*if (fpc->r_temp)
+ FREE(fpc->r_temp);*/
+ tgsi_parse_free(&p);
+ return FALSE;
+}
+
+static void
+nv30_fragprog_translate(struct nv30_context *nv30,
+ struct nv30_fragment_program *fp)
+{
+ struct tgsi_parse_context parse;
+ struct nv30_fpc *fpc = NULL;
+
+ tgsi_dump(fp->pipe.tokens,0);
+
+ fpc = CALLOC(1, sizeof(struct nv30_fpc));
+ if (!fpc)
+ return;
+ fpc->fp = fp;
+ fpc->high_temp = -1;
+ fpc->num_regs = 2;
+
+ if (!nv30_fragprog_prepare(fpc)) {
+ FREE(fpc);
+ return;
+ }
+
+ tgsi_parse_init(&parse, fp->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+
+ finst = &parse.FullToken.FullInstruction;
+ if (!nv30_fragprog_parse_instruction(fpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ fp->fp_control |= (fpc->num_regs-1)/2;
+ fp->fp_reg_control = (1<<16)|0x4;
+
+ /* Terminate final instruction */
+ fp->insn[fpc->inst_offset] |= 0x00000001;
+
+ /* Append NOP + END instruction, may or may not be necessary. */
+ fpc->inst_offset = fp->insn_len;
+ grow_insns(fpc, 4);
+ fp->insn[fpc->inst_offset + 0] = 0x00000001;
+ fp->insn[fpc->inst_offset + 1] = 0x00000000;
+ fp->insn[fpc->inst_offset + 2] = 0x00000000;
+ fp->insn[fpc->inst_offset + 3] = 0x00000000;
+
+ fp->translated = TRUE;
+ fp->on_hw = FALSE;
+out_err:
+ tgsi_parse_free(&parse);
+ FREE(fpc);
+}
+
+static void
+nv30_fragprog_upload(struct nv30_context *nv30,
+ struct nv30_fragment_program *fp)
+{
+ struct pipe_winsys *ws = nv30->pipe.winsys;
+ const uint32_t le = 1;
+ uint32_t *map;
+ int i;
+
+ map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
+
+#if 0
+ for (i = 0; i < fp->insn_len; i++) {
+ fflush(stdout); fflush(stderr);
+ NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]);
+ fflush(stdout); fflush(stderr);
+ }
+#endif
+
+ if ((*(const uint8_t *)&le)) {
+ for (i = 0; i < fp->insn_len; i++) {
+ map[i] = fp->insn[i];
+ }
+ } else {
+ /* Weird swapping for big-endian chips */
+ for (i = 0; i < fp->insn_len; i++) {
+ map[i] = ((fp->insn[i] & 0xffff) << 16) |
+ ((fp->insn[i] >> 16) & 0xffff);
+ }
+ }
+
+ ws->buffer_unmap(ws, fp->buffer);
+}
+
+static boolean
+nv30_fragprog_validate(struct nv30_context *nv30)
+{
+ struct nv30_fragment_program *fp = nv30->fragprog;
+ struct pipe_buffer *constbuf =
+ nv30->constbuf[PIPE_SHADER_FRAGMENT];
+ struct pipe_winsys *ws = nv30->pipe.winsys;
+ struct nouveau_stateobj *so;
+ boolean new_consts = FALSE;
+ int i;
+
+ if (fp->translated)
+ goto update_constants;
+
+ /*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/
+ nv30_fragprog_translate(nv30, fp);
+ if (!fp->translated) {
+ /*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/
+ return FALSE;
+ }
+
+ fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4);
+ nv30_fragprog_upload(nv30, fp);
+
+ so = so_new(8, 1);
+ so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1);
+ so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ NV34TCL_FP_ACTIVE_PROGRAM_DMA0, NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
+ so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1);
+ so_data (so, fp->fp_control);
+ so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1);
+ so_data (so, fp->fp_reg_control);
+ so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1);
+ so_data (so, fp->samplers);
+ so_ref(so, &fp->so);
+
+update_constants:
+ if (fp->nr_consts) {
+ float *map;
+
+ map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ);
+ for (i = 0; i < fp->nr_consts; i++) {
+ struct nv30_fragment_program_data *fpd = &fp->consts[i];
+ uint32_t *p = &fp->insn[fpd->offset];
+ uint32_t *cb = (uint32_t *)&map[fpd->index * 4];
+
+ if (!memcmp(p, cb, 4 * sizeof(float)))
+ continue;
+ memcpy(p, cb, 4 * sizeof(float));
+ new_consts = TRUE;
+ }
+ ws->buffer_unmap(ws, constbuf);
+
+ if (new_consts)
+ nv30_fragprog_upload(nv30, fp);
+ }
+
+ if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) {
+ so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void
+nv30_fragprog_destroy(struct nv30_context *nv30,
+ struct nv30_fragment_program *fp)
+{
+ if (fp->insn_len)
+ FREE(fp->insn);
+}
+
+struct nv30_state_entry nv30_state_fragprog = {
+ .validate = nv30_fragprog_validate,
+ .dirty = {
+ .pipe = NV30_NEW_FRAGPROG,
+ .hw = NV30_STATE_FRAGPROG
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c
new file mode 100644
index 0000000000..b1d2663af3
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_fragtex.c
@@ -0,0 +1,163 @@
+#include "nv30_context.h"
+#include "nouveau/nouveau_util.h"
+
+#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \
+{ \
+ TRUE, \
+ PIPE_FORMAT_##m, \
+ NV34TCL_TX_FORMAT_FORMAT_##tf, \
+ (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \
+ NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \
+ NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \
+ NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w) \
+}
+
+struct nv30_texture_format {
+ boolean defined;
+ uint pipe;
+ int format;
+ int swizzle;
+};
+
+static struct nv30_texture_format
+nv30_texture_formats[] = {
+ _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W),
+ _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W),
+ _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W),
+ _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W),
+ _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X),
+ _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X),
+ _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X),
+ _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y),
+// _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X),
+// _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X),
+ _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W),
+ _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W),
+ _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W),
+ _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W),
+ {},
+};
+
+static struct nv30_texture_format *
+nv30_fragtex_format(uint pipe_format)
+{
+ struct nv30_texture_format *tf = nv30_texture_formats;
+ char fs[128];
+
+ while (tf->defined) {
+ if (tf->pipe == pipe_format)
+ return tf;
+ tf++;
+ }
+
+ NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format));
+ return NULL;
+}
+
+
+static struct nouveau_stateobj *
+nv30_fragtex_build(struct nv30_context *nv30, int unit)
+{
+ struct nv30_sampler_state *ps = nv30->tex_sampler[unit];
+ struct nv30_miptree *nv30mt = nv30->tex_miptree[unit];
+ struct pipe_texture *pt = &nv30mt->base;
+ struct nv30_texture_format *tf;
+ struct nouveau_stateobj *so;
+ uint32_t txf, txs , txp;
+ unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+ tf = nv30_fragtex_format(pt->format);
+ if (!tf)
+ assert(0);
+
+ txf = tf->format;
+ txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0);
+ txf |= log2i(pt->width[0]) << 20;
+ txf |= log2i(pt->height[0]) << 24;
+ txf |= log2i(pt->depth[0]) << 28;
+ txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ txf |= NV34TCL_TX_FORMAT_CUBIC;
+ /* fall-through */
+ case PIPE_TEXTURE_2D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_2D;
+ break;
+ case PIPE_TEXTURE_3D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_3D;
+ break;
+ case PIPE_TEXTURE_1D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_1D;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return NULL;
+ }
+
+ if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ txp = 0;
+ } else {
+ txp = nv30mt->level[0].pitch;
+ txf |= (1<<13) /*FIXME: NV34TCL_TX_FORMAT_LINEAR ? */;
+ }
+
+ txs = tf->swizzle;
+
+ so = so_new(16, 2);
+ so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8);
+ so_reloc (so, nv30mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (so, nv30mt->buffer, txf, tex_flags | NOUVEAU_BO_OR,
+ NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1);
+ so_data (so, ps->wrap);
+ so_data (so, NV34TCL_TX_ENABLE_ENABLE | ps->en);
+ so_data (so, txs);
+ so_data (so, ps->filt | 0x2000 /*voodoo*/);
+ so_data (so, (pt->width[0] << NV34TCL_TX_NPOT_SIZE_W_SHIFT) |
+ pt->height[0]);
+ so_data (so, ps->bcol);
+
+ return so;
+}
+
+static boolean
+nv30_fragtex_validate(struct nv30_context *nv30)
+{
+ struct nv30_fragment_program *fp = nv30->fragprog;
+ struct nv30_state *state = &nv30->state;
+ struct nouveau_stateobj *so;
+ unsigned samplers, unit;
+
+ samplers = state->fp_samplers & ~fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ so = so_new(2, 0);
+ so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1);
+ so_data (so, 0);
+ so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]);
+ state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit));
+ }
+
+ samplers = nv30->dirty_samplers & fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ so = nv30_fragtex_build(nv30, unit);
+ so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]);
+ state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit));
+ }
+
+ nv30->state.fp_samplers = fp->samplers;
+ return FALSE;
+}
+
+struct nv30_state_entry nv30_state_fragtex = {
+ .validate = nv30_fragtex_validate,
+ .dirty = {
+ .pipe = NV30_NEW_SAMPLER | NV30_NEW_FRAGPROG,
+ .hw = 0
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
new file mode 100644
index 0000000000..b11ed8c24e
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -0,0 +1,235 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv30_context.h"
+
+static void
+nv30_miptree_layout(struct nv30_miptree *nv30mt)
+{
+ struct pipe_texture *pt = &nv30mt->base;
+ uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
+ uint offset = 0;
+ int nr_faces, l, f;
+ uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER |
+ PIPE_TEXTURE_USAGE_DEPTH_STENCIL |
+ PIPE_TEXTURE_USAGE_RENDER_TARGET |
+ PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
+ PIPE_TEXTURE_USAGE_PRIMARY);
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ nr_faces = 6;
+ } else
+ if (pt->target == PIPE_TEXTURE_3D) {
+ nr_faces = pt->depth[0];
+ } else {
+ nr_faces = 1;
+ }
+
+ for (l = 0; l <= pt->last_level; l++) {
+ pt->width[l] = width;
+ pt->height[l] = height;
+ pt->depth[l] = depth;
+ pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+
+ if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
+ nv30mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64);
+ else
+ nv30mt->level[l].pitch = pt->width[l] * pt->block.size;
+
+ nv30mt->level[l].image_offset =
+ CALLOC(nr_faces, sizeof(unsigned));
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+ depth = MAX2(1, depth >> 1);
+ }
+
+ for (f = 0; f < nr_faces; f++) {
+ for (l = 0; l < pt->last_level; l++) {
+ nv30mt->level[l].image_offset[f] = offset;
+
+ if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) &&
+ pt->width[l + 1] > 1 && pt->height[l + 1] > 1)
+ offset += align(nv30mt->level[l].pitch * pt->height[l], 64);
+ else
+ offset += nv30mt->level[l].pitch * pt->height[l];
+ }
+
+ nv30mt->level[l].image_offset[f] = offset;
+ offset += nv30mt->level[l].pitch * pt->height[l];
+ }
+
+ nv30mt->total_size = offset;
+}
+
+static struct pipe_texture *
+nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
+{
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv30_miptree *mt;
+
+ mt = MALLOC(sizeof(struct nv30_miptree));
+ if (!mt)
+ return NULL;
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = pscreen;
+ mt->shadow_tex = NULL;
+ mt->shadow_surface = NULL;
+
+ /* Swizzled textures must be POT */
+ if (pt->width[0] & (pt->width[0] - 1) ||
+ pt->height[0] & (pt->height[0] - 1))
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else
+ if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY |
+ PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
+ PIPE_TEXTURE_USAGE_DEPTH_STENCIL))
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else {
+ switch (pt->format) {
+ /* TODO: Figure out which formats can be swizzled */
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ case PIPE_FORMAT_R16_SNORM:
+ {
+ if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE))
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ break;
+ }
+ default:
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ }
+ }
+
+ nv30_miptree_layout(mt);
+
+ mt->buffer = ws->buffer_create(ws, 256,
+ PIPE_BUFFER_USAGE_PIXEL |
+ NOUVEAU_BUFFER_USAGE_TEXTURE,
+ mt->total_size);
+ if (!mt->buffer) {
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static struct pipe_texture *
+nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
+ const unsigned *stride, struct pipe_buffer *pb)
+{
+ struct nv30_miptree *mt;
+
+ /* Only supports 2D, non-mipmapped textures for the moment */
+ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
+ pt->depth[0] != 1)
+ return NULL;
+
+ mt = CALLOC_STRUCT(nv30_miptree);
+ if (!mt)
+ return NULL;
+
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = pscreen;
+ mt->level[0].pitch = stride[0];
+ mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
+
+ pipe_buffer_reference(pscreen, &mt->buffer, pb);
+ return &mt->base;
+}
+
+static void
+nv30_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt)
+{
+ struct pipe_texture *pt = *ppt;
+ struct nv30_miptree *mt = (struct nv30_miptree *)pt;
+ int l;
+
+ *ppt = NULL;
+ if (--pt->refcount)
+ return;
+
+ pipe_buffer_reference(pscreen, &mt->buffer, NULL);
+ for (l = 0; l <= pt->last_level; l++) {
+ if (mt->level[l].image_offset)
+ FREE(mt->level[l].image_offset);
+ }
+
+ if (mt->shadow_tex) {
+ if (mt->shadow_surface)
+ pscreen->tex_surface_release(pscreen, &mt->shadow_surface);
+ nv30_miptree_release(pscreen, &mt->shadow_tex);
+ }
+
+ FREE(mt);
+}
+
+static struct pipe_surface *
+nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt;
+ struct pipe_surface *ps;
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (!ps)
+ return NULL;
+ pipe_texture_reference(&ps->texture, pt);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = nv30mt->level[level].pitch;
+ ps->usage = flags;
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+ ps->refcount = 1;
+ ps->face = face;
+ ps->level = level;
+ ps->zslice = zslice;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ ps->offset = nv30mt->level[level].image_offset[face];
+ } else
+ if (pt->target == PIPE_TEXTURE_3D) {
+ ps->offset = nv30mt->level[level].image_offset[zslice];
+ } else {
+ ps->offset = nv30mt->level[level].image_offset[0];
+ }
+
+ return ps;
+}
+
+static void
+nv30_miptree_surface_del(struct pipe_screen *pscreen,
+ struct pipe_surface **psurface)
+{
+ struct pipe_surface *ps = *psurface;
+
+ *psurface = NULL;
+ if (--ps->refcount > 0)
+ return;
+
+ pipe_texture_reference(&ps->texture, NULL);
+ FREE(ps);
+}
+
+void
+nv30_screen_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv30_miptree_create;
+ pscreen->texture_blanket = nv30_miptree_blanket;
+ pscreen->texture_release = nv30_miptree_release;
+ pscreen->get_tex_surface = nv30_miptree_surface_new;
+ pscreen->tex_surface_release = nv30_miptree_surface_del;
+}
diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c
new file mode 100644
index 0000000000..2f974cf5c4
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_query.c
@@ -0,0 +1,122 @@
+#include "pipe/p_context.h"
+
+#include "nv30_context.h"
+
+struct nv30_query {
+ struct nouveau_resource *object;
+ unsigned type;
+ boolean ready;
+ uint64_t result;
+};
+
+static INLINE struct nv30_query *
+nv30_query(struct pipe_query *pipe)
+{
+ return (struct nv30_query *)pipe;
+}
+
+static struct pipe_query *
+nv30_query_create(struct pipe_context *pipe, unsigned query_type)
+{
+ struct nv30_query *q;
+
+ q = CALLOC(1, sizeof(struct nv30_query));
+ q->type = query_type;
+
+ return (struct pipe_query *)q;
+}
+
+static void
+nv30_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_query *q = nv30_query(pq);
+
+ if (q->object)
+ nv30->nvws->res_free(&q->object);
+ FREE(q);
+}
+
+static void
+nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_query *q = nv30_query(pq);
+
+ assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+ /* Happens when end_query() is called, then another begin_query()
+ * without querying the result in-between. For now we'll wait for
+ * the existing query to notify completion, but it could be better.
+ */
+ if (q->object) {
+ uint64_t tmp;
+ pipe->get_query_result(pipe, pq, 1, &tmp);
+ }
+
+ if (nv30->nvws->res_alloc(nv30->screen->query_heap, 1, NULL, &q->object))
+ assert(0);
+ nv30->nvws->notifier_reset(nv30->screen->query, q->object->start);
+
+ BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1);
+ OUT_RING (1);
+ BEGIN_RING(rankine, NV34TCL_QUERY_UNK17CC, 1);
+ OUT_RING (1);
+
+ q->ready = FALSE;
+}
+
+static void
+nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_query *q = nv30_query(pq);
+
+ BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1);
+ OUT_RING ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) |
+ ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT));
+ FIRE_RING(NULL);
+}
+
+static boolean
+nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, uint64_t *result)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_query *q = nv30_query(pq);
+ struct nouveau_winsys *nvws = nv30->nvws;
+
+ assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+ if (!q->ready) {
+ unsigned status;
+
+ status = nvws->notifier_status(nv30->screen->query,
+ q->object->start);
+ if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) {
+ if (wait == FALSE)
+ return FALSE;
+ nvws->notifier_wait(nv30->screen->query, q->object->start,
+ NV_NOTIFY_STATE_STATUS_COMPLETED,
+ 0);
+ }
+
+ q->result = nvws->notifier_retval(nv30->screen->query,
+ q->object->start);
+ q->ready = TRUE;
+ nvws->res_free(&q->object);
+ }
+
+ *result = q->result;
+ return TRUE;
+}
+
+void
+nv30_init_query_functions(struct nv30_context *nv30)
+{
+ nv30->pipe.create_query = nv30_query_create;
+ nv30->pipe.destroy_query = nv30_query_destroy;
+ nv30->pipe.begin_query = nv30_query_begin;
+ nv30->pipe.end_query = nv30_query_end;
+ nv30->pipe.get_query_result = nv30_query_result;
+}
diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
new file mode 100644
index 0000000000..c97a73f0b1
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -0,0 +1,401 @@
+#include "pipe/p_screen.h"
+#include "util/u_simple_screen.h"
+
+#include "nv30_context.h"
+#include "nv30_screen.h"
+
+#define NV30TCL_CHIPSET_3X_MASK 0x00000003
+#define NV34TCL_CHIPSET_3X_MASK 0x00000010
+#define NV35TCL_CHIPSET_3X_MASK 0x000001e0
+
+static const char *
+nv30_screen_get_name(struct pipe_screen *pscreen)
+{
+ struct nv30_screen *screen = nv30_screen(pscreen);
+ struct nouveau_device *dev = screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv30_screen_get_vendor(struct pipe_screen *pscreen)
+{
+ return "nouveau";
+}
+
+static int
+nv30_screen_get_param(struct pipe_screen *pscreen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 16;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 0;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 2;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 13;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 13;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ return 0;
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ return 1;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 0;
+ case NOUVEAU_CAP_HW_VTXBUF:
+ case NOUVEAU_CAP_HW_IDXBUF:
+ return 1;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv30_screen_get_paramf(struct pipe_screen *pscreen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 8.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 4.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static boolean
+nv30_screen_surface_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ case PIPE_FORMAT_A8L8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static struct pipe_buffer *
+nv30_surface_buffer(struct pipe_surface *surf)
+{
+ struct nv30_miptree *mt = (struct nv30_miptree *)surf->texture;
+
+ return mt->buffer;
+}
+
+static void *
+nv30_surface_map(struct pipe_screen *screen, struct pipe_surface *surface,
+ unsigned flags )
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct pipe_surface *surface_to_map;
+ void *map;
+
+ if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ struct nv30_miptree *mt = (struct nv30_miptree *)surface->texture;
+
+ if (!mt->shadow_tex) {
+ unsigned old_tex_usage = surface->texture->tex_usage;
+ surface->texture->tex_usage = NOUVEAU_TEXTURE_USAGE_LINEAR |
+ PIPE_TEXTURE_USAGE_DYNAMIC;
+ mt->shadow_tex = screen->texture_create(screen, surface->texture);
+ surface->texture->tex_usage = old_tex_usage;
+
+ assert(mt->shadow_tex->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR);
+ }
+
+ mt->shadow_surface = screen->get_tex_surface
+ (
+ screen, mt->shadow_tex,
+ surface->face, surface->level, surface->zslice,
+ surface->usage
+ );
+
+ surface_to_map = mt->shadow_surface;
+ }
+ else
+ surface_to_map = surface;
+
+ assert(surface_to_map);
+
+ map = ws->buffer_map(ws, nv30_surface_buffer(surface_to_map), flags);
+ if (!map)
+ return NULL;
+
+ return map + surface_to_map->offset;
+}
+
+static void
+nv30_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct pipe_surface *surface_to_unmap;
+
+ /* TODO: Copy from shadow just before push buffer is flushed instead.
+ There are probably some programs that map/unmap excessively
+ before rendering. */
+ if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ struct nv30_miptree *mt = (struct nv30_miptree *)surface->texture;
+
+ assert(mt->shadow_tex);
+
+ surface_to_unmap = mt->shadow_surface;
+ }
+ else
+ surface_to_unmap = surface;
+
+ assert(surface_to_unmap);
+
+ ws->buffer_unmap(ws, nv30_surface_buffer(surface_to_unmap));
+
+ if (surface_to_unmap != surface) {
+ struct nv30_screen *nvscreen = nv30_screen(screen);
+
+ nvscreen->eng2d->copy(nvscreen->eng2d, surface, 0, 0,
+ surface_to_unmap, 0, 0,
+ surface->width, surface->height);
+
+ screen->tex_surface_release(screen, &surface_to_unmap);
+ }
+}
+
+static void
+nv30_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nv30_screen *screen = nv30_screen(pscreen);
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nvws->res_free(&screen->vp_exec_heap);
+ nvws->res_free(&screen->vp_data_heap);
+ nvws->res_free(&screen->query_heap);
+ nvws->notifier_free(&screen->query);
+ nvws->notifier_free(&screen->sync);
+ nvws->grobj_free(&screen->rankine);
+
+ FREE(pscreen);
+}
+
+struct pipe_screen *
+nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen);
+ struct nouveau_stateobj *so;
+ unsigned rankine_class = 0;
+ unsigned chipset = nvws->channel->device->chipset;
+ int ret, i;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ /* 2D engine setup */
+ screen->eng2d = nv04_surface_2d_init(nvws);
+ screen->eng2d->buf = nv30_surface_buffer;
+
+ /* 3D object */
+ switch (chipset & 0xf0) {
+ case 0x30:
+ if (NV30TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f)))
+ rankine_class = 0x0397;
+ else
+ if (NV34TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f)))
+ rankine_class = 0x0697;
+ else
+ if (NV35TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f)))
+ rankine_class = 0x0497;
+ break;
+ default:
+ break;
+ }
+
+ if (!rankine_class) {
+ NOUVEAU_ERR("Unknown nv3x chipset: nv%02x\n", chipset);
+ return NULL;
+ }
+
+ ret = nvws->grobj_alloc(nvws, rankine_class, &screen->rankine);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ return FALSE;
+ }
+
+ /* Notifier for sync purposes */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv30_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Query objects */
+ ret = nvws->notifier_alloc(nvws, 32, &screen->query);
+ if (ret) {
+ NOUVEAU_ERR("Error initialising query objects: %d\n", ret);
+ nv30_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ ret = nvws->res_init(&screen->query_heap, 0, 32);
+ if (ret) {
+ NOUVEAU_ERR("Error initialising query object heap: %d\n", ret);
+ nv30_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Vtxprog resources */
+ if (nvws->res_init(&screen->vp_exec_heap, 0, 256) ||
+ nvws->res_init(&screen->vp_data_heap, 0, 256)) {
+ nv30_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Static rankine initialisation */
+ so = so_new(128, 0);
+ so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1);
+ so_data (so, screen->sync->handle);
+ so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->gart->handle);
+ so_method(so, screen->rankine, NV34TCL_DMA_COLOR1, 1);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->rankine, NV34TCL_DMA_COLOR0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->rankine, NV34TCL_DMA_VTXBUF0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->gart->handle);
+/* so_method(so, screen->rankine, NV34TCL_DMA_FENCE, 2);
+ so_data (so, 0);
+ so_data (so, screen->query->handle);*/
+ so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY7, 1);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY8, 1);
+ so_data (so, nvws->channel->vram->handle);
+
+ for (i=1; i<8; i++) {
+ so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+ so_data (so, 0);
+ so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_VERT(i), 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, screen->rankine, 0x220, 1);
+ so_data (so, 1);
+
+ so_method(so, screen->rankine, 0x03b0, 1);
+ so_data (so, 0x00100000);
+ so_method(so, screen->rankine, 0x1454, 1);
+ so_data (so, 0);
+ so_method(so, screen->rankine, 0x1d80, 1);
+ so_data (so, 3);
+ so_method(so, screen->rankine, 0x1450, 1);
+ so_data (so, 0x00030004);
+
+ /* NEW */
+ so_method(so, screen->rankine, 0x1e98, 1);
+ so_data (so, 0);
+ so_method(so, screen->rankine, 0x17e0, 3);
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(1.0));
+ so_method(so, screen->rankine, 0x1f80, 16);
+ for (i=0; i<16; i++) {
+ so_data (so, (i==8) ? 0x0000ffff : 0);
+ }
+
+ so_method(so, screen->rankine, 0x120, 3);
+ so_data (so, 0);
+ so_data (so, 1);
+ so_data (so, 2);
+
+ so_method(so, screen->rankine, 0x1d88, 1);
+ so_data (so, 0x00001200);
+
+ so_method(so, screen->rankine, NV34TCL_RC_ENABLE, 1);
+ so_data (so, 0);
+
+ so_method(so, screen->rankine, NV34TCL_DEPTH_RANGE_NEAR, 2);
+ so_data (so, fui(0.0));
+ so_data (so, fui(1.0));
+
+ so_method(so, screen->rankine, NV34TCL_MULTISAMPLE_CONTROL, 1);
+ so_data (so, 0xffff0000);
+
+ /* enables use of vp rather than fixed-function somehow */
+ so_method(so, screen->rankine, 0x1e94, 1);
+ so_data (so, 0x13);
+
+ so_emit(nvws, so);
+ so_ref(NULL, &so);
+ nvws->push_flush(nvws, 0, NULL);
+
+ screen->pipe.winsys = ws;
+ screen->pipe.destroy = nv30_screen_destroy;
+
+ screen->pipe.get_name = nv30_screen_get_name;
+ screen->pipe.get_vendor = nv30_screen_get_vendor;
+ screen->pipe.get_param = nv30_screen_get_param;
+ screen->pipe.get_paramf = nv30_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv30_screen_surface_format_supported;
+
+ screen->pipe.surface_map = nv30_surface_map;
+ screen->pipe.surface_unmap = nv30_surface_unmap;
+
+ nv30_screen_init_miptree_functions(&screen->pipe);
+ u_simple_screen_init(&screen->pipe);
+
+ return &screen->pipe;
+}
diff --git a/src/gallium/drivers/nv30/nv30_screen.h b/src/gallium/drivers/nv30/nv30_screen.h
new file mode 100644
index 0000000000..b11e470f94
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_screen.h
@@ -0,0 +1,37 @@
+#ifndef __NV30_SCREEN_H__
+#define __NV30_SCREEN_H__
+
+#include "pipe/p_screen.h"
+#include "nv04/nv04_surface_2d.h"
+
+struct nv30_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+
+ unsigned cur_pctx;
+
+ /* HW graphics objects */
+ struct nv04_surface_2d *eng2d;
+ struct nouveau_grobj *rankine;
+ struct nouveau_notifier *sync;
+
+ /* Query object resources */
+ struct nouveau_notifier *query;
+ struct nouveau_resource *query_heap;
+
+ /* Vtxprog resources */
+ struct nouveau_resource *vp_exec_heap;
+ struct nouveau_resource *vp_data_heap;
+
+ /* Current 3D state of channel */
+ struct nouveau_stateobj *state[NV30_STATE_MAX];
+};
+
+static INLINE struct nv30_screen *
+nv30_screen(struct pipe_screen *screen)
+{
+ return (struct nv30_screen *)screen;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv30/nv30_shader.h b/src/gallium/drivers/nv30/nv30_shader.h
new file mode 100644
index 0000000000..dd3a36f78f
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_shader.h
@@ -0,0 +1,490 @@
+#ifndef __NV30_SHADER_H__
+#define __NV30_SHADER_H__
+
+/* Vertex programs instruction set
+ *
+ * 128bit opcodes, split into 4 32-bit ones for ease of use.
+ *
+ * Non-native instructions
+ * ABS - MOV + NV40_VP_INST0_DEST_ABS
+ * POW - EX2 + MUL + LG2
+ * SUB - ADD, second source negated
+ * SWZ - MOV
+ * XPD -
+ *
+ * Register access
+ * - Only one INPUT can be accessed per-instruction (move extras into TEMPs)
+ * - Only one CONST can be accessed per-instruction (move extras into TEMPs)
+ *
+ * Relative Addressing
+ * According to the value returned for
+ * MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB
+ *
+ * there are only two address registers available. The destination in the
+ * ARL instruction is set to TEMP <n> (The temp isn't actually written).
+ *
+ * When using vanilla ARB_v_p, the proprietary driver will squish both the
+ * available ADDRESS regs into the first hardware reg in the X and Y
+ * components.
+ *
+ * To use an address reg as an index into consts, the CONST_SRC is set to
+ * (const_base + offset) and INDEX_CONST is set.
+ *
+ * To access the second address reg use ADDR_REG_SELECT_1. A particular
+ * component of the address regs is selected with ADDR_SWZ.
+ *
+ * Only one address register can be accessed per instruction.
+ *
+ * Conditional execution (see NV_vertex_program{2,3} for details) Conditional
+ * execution of an instruction is enabled by setting COND_TEST_ENABLE, and
+ * selecting the condition which will allow the test to pass with
+ * COND_{FL,LT,...}. It is possible to swizzle the values in the condition
+ * register, which allows for testing against an individual component.
+ *
+ * Branching:
+ *
+ * The BRA/CAL instructions seem to follow a slightly different opcode
+ * layout. The destination instruction ID (IADDR) overlaps a source field.
+ * Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO
+ * command, and is incremented automatically on each UPLOAD_INST FIFO
+ * command.
+ *
+ * Conditional branching is achieved by using the condition tests described
+ * above. There doesn't appear to be dedicated looping instructions, but
+ * this can be done using a temp reg + conditional branching.
+ *
+ * Subroutines may be uploaded before the main program itself, but the first
+ * executed instruction is determined by the PROGRAM_START_ID FIFO command.
+ *
+ */
+
+/* DWORD 0 */
+
+#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
+#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */
+#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */
+#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */
+#define NV30_VP_INST_VEC_RESULT (1 << 20)
+#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16
+#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16)
+#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15)
+#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16)
+#define NV30_VP_INST_COND_TEST_ENABLE (1<<14)
+#define NV30_VP_INST_COND_SHIFT 11
+#define NV30_VP_INST_COND_MASK (0x07 << 11)
+# define NV30_VP_INST_COND_FL 0 /* guess */
+# define NV30_VP_INST_COND_LT 1
+# define NV30_VP_INST_COND_EQ 2
+# define NV30_VP_INST_COND_LE 3
+# define NV30_VP_INST_COND_GT 4
+# define NV30_VP_INST_COND_NE 5
+# define NV30_VP_INST_COND_GE 6
+# define NV30_VP_INST_COND_TR 7 /* guess */
+#define NV30_VP_INST_COND_SWZ_X_SHIFT 9
+#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9)
+#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7
+#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7)
+#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5
+#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5)
+#define NV30_VP_INST_COND_SWZ_W_SHIFT 3
+#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3)
+#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3
+#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3)
+#define NV30_VP_INST_ADDR_SWZ_SHIFT 1
+#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1)
+#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0
+#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0)
+
+/* DWORD 1 */
+#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28
+#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28)
+# define NV30_VP_INST_OP_NOP 0x00
+# define NV30_VP_INST_OP_RCP 0x02
+# define NV30_VP_INST_OP_RCC 0x03
+# define NV30_VP_INST_OP_RSQ 0x04
+# define NV30_VP_INST_OP_EXP 0x05
+# define NV30_VP_INST_OP_LOG 0x06
+# define NV30_VP_INST_OP_LIT 0x07
+# define NV30_VP_INST_OP_BRA 0x09
+# define NV30_VP_INST_OP_CAL 0x0B
+# define NV30_VP_INST_OP_RET 0x0C
+# define NV30_VP_INST_OP_LG2 0x0D
+# define NV30_VP_INST_OP_EX2 0x0E
+# define NV30_VP_INST_OP_SIN 0x0F
+# define NV30_VP_INST_OP_COS 0x10
+#define NV30_VP_INST_VEC_OPCODE_SHIFT 23
+#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23)
+# define NV30_VP_INST_OP_NOPV 0x00
+# define NV30_VP_INST_OP_MOV 0x01
+# define NV30_VP_INST_OP_MUL 0x02
+# define NV30_VP_INST_OP_ADD 0x03
+# define NV30_VP_INST_OP_MAD 0x04
+# define NV30_VP_INST_OP_DP3 0x05
+# define NV30_VP_INST_OP_DP4 0x07
+# define NV30_VP_INST_OP_DPH 0x06
+# define NV30_VP_INST_OP_DST 0x08
+# define NV30_VP_INST_OP_MIN 0x09
+# define NV30_VP_INST_OP_MAX 0x0A
+# define NV30_VP_INST_OP_SLT 0x0B
+# define NV30_VP_INST_OP_SGE 0x0C
+# define NV30_VP_INST_OP_ARL 0x0D
+# define NV30_VP_INST_OP_FRC 0x0E
+# define NV30_VP_INST_OP_FLR 0x0F
+# define NV30_VP_INST_OP_SEQ 0x10
+# define NV30_VP_INST_OP_SFL 0x11
+# define NV30_VP_INST_OP_SGT 0x12
+# define NV30_VP_INST_OP_SLE 0x13
+# define NV30_VP_INST_OP_SNE 0x14
+# define NV30_VP_INST_OP_STR 0x15
+# define NV30_VP_INST_OP_SSG 0x16
+# define NV30_VP_INST_OP_ARR 0x17
+# define NV30_VP_INST_OP_ARA 0x18
+#define NV30_VP_INST_CONST_SRC_SHIFT 14
+#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14)
+#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/
+#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/
+# define NV30_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */
+# define NV30_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */
+# define NV30_VP_INST_IN_NORMAL 2
+# define NV30_VP_INST_IN_COL0 3 /* Should probably confirm them all though */
+# define NV30_VP_INST_IN_COL1 4
+# define NV30_VP_INST_IN_FOGC 5
+# define NV30_VP_INST_IN_TC0 8
+# define NV30_VP_INST_IN_TC(n) (8+n)
+#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/
+#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/
+
+/* Please note: the IADDR fields overlap other fields because they are used
+ * only for branch instructions. See Branching: label above
+ *
+ * DWORD 2
+ */
+#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/
+#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */
+#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/
+#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/
+#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/
+#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/
+#define NV30_VP_INST_IADDR_SHIFT 2
+#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */
+
+/* DWORD 3 */
+#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/
+#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/
+#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24
+#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24)
+#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20
+#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20)
+#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16
+#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16)
+#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/
+#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/
+#define NV30_VP_INST_DEST_SHIFT 2
+#define NV30_VP_INST_DEST_MASK (0x0F << 2)
+# define NV30_VP_INST_DEST_POS 0
+# define NV30_VP_INST_DEST_BFC0 1
+# define NV30_VP_INST_DEST_BFC1 2
+# define NV30_VP_INST_DEST_COL0 3
+# define NV30_VP_INST_DEST_COL1 4
+# define NV30_VP_INST_DEST_FOGC 5
+# define NV30_VP_INST_DEST_PSZ 6
+# define NV30_VP_INST_DEST_TC(n) (8+n)
+
+#define NV30_VP_INST_LAST (1 << 0)
+
+/* Useful to split the source selection regs into their pieces */
+#define NV30_VP_SRC0_HIGH_SHIFT 6
+#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0
+#define NV30_VP_SRC0_LOW_MASK 0x0000003F
+#define NV30_VP_SRC2_HIGH_SHIFT 4
+#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0
+#define NV30_VP_SRC2_LOW_MASK 0x0000000F
+
+
+/* Source-register definition - matches NV20 exactly */
+#define NV30_VP_SRC_NEGATE (1<<14)
+#define NV30_VP_SRC_SWZ_X_SHIFT 12
+#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12)
+#define NV30_VP_SRC_SWZ_Y_SHIFT 10
+#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10)
+#define NV30_VP_SRC_SWZ_Z_SHIFT 8
+#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8)
+#define NV30_VP_SRC_SWZ_W_SHIFT 6
+#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6)
+#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6
+#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6)
+#define NV30_VP_SRC_TEMP_SRC_SHIFT 2
+#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0)
+#define NV30_VP_SRC_REG_TYPE_SHIFT 0
+#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0)
+#define NV30_VP_SRC_REG_TYPE_TEMP 1
+#define NV30_VP_SRC_REG_TYPE_INPUT 2
+#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */
+
+/*
+ * Each fragment program opcode appears to be comprised of 4 32-bit values.
+ *
+ * 0 - Opcode, output reg/mask, ATTRIB source
+ * 1 - Source 0
+ * 2 - Source 1
+ * 3 - Source 2
+ *
+ * There appears to be no special difference between result regs and temp regs.
+ * result.color == R0.xyzw
+ * result.depth == R1.z
+ * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0
+ * otherwise it is set to 1.
+ *
+ * Constants are inserted directly after the instruction that uses them.
+ *
+ * It appears that it's not possible to use two input registers in one
+ * instruction as the input sourcing is done in the instruction dword
+ * and not the source selection dwords. As such instructions such as:
+ *
+ * ADD result.color, fragment.color, fragment.texcoord[0];
+ *
+ * must be split into two MOV's and then an ADD (nvidia does this) but
+ * I'm not sure why it's not just one MOV and then source the second input
+ * in the ADD instruction..
+ *
+ * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
+ * negation requires multiplication with a const.
+ *
+ * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE
+ * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO
+ * is implemented simply by not writing to the relevant components of the destination.
+ *
+ * Conditional execution
+ * TODO
+ *
+ * Non-native instructions:
+ * LIT
+ * LRP - MAD+MAD
+ * SUB - ADD, negate second source
+ * RSQ - LG2 + EX2
+ * POW - LG2 + MUL + EX2
+ * SCS - COS + SIN
+ * XPD
+ */
+
+//== Opcode / Destination selection ==
+#define NV30_FP_OP_PROGRAM_END (1 << 0)
+#define NV30_FP_OP_OUT_REG_SHIFT 1
+#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */
+/* Needs to be set when writing outputs to get expected result.. */
+#define NV30_FP_OP_OUT_REG_HALF (1 << 7)
+#define NV30_FP_OP_COND_WRITE_ENABLE (1 << 8)
+#define NV30_FP_OP_OUTMASK_SHIFT 9
+#define NV30_FP_OP_OUTMASK_MASK (0xF << 9)
+# define NV30_FP_OP_OUT_X (1<<9)
+# define NV30_FP_OP_OUT_Y (1<<10)
+# define NV30_FP_OP_OUT_Z (1<<11)
+# define NV30_FP_OP_OUT_W (1<<12)
+/* Uncertain about these, especially the input_src values.. it's possible that
+ * they can be dynamically changed.
+ */
+#define NV30_FP_OP_INPUT_SRC_SHIFT 13
+#define NV30_FP_OP_INPUT_SRC_MASK (15 << 13)
+# define NV30_FP_OP_INPUT_SRC_POSITION 0x0
+# define NV30_FP_OP_INPUT_SRC_COL0 0x1
+# define NV30_FP_OP_INPUT_SRC_COL1 0x2
+# define NV30_FP_OP_INPUT_SRC_FOGC 0x3
+# define NV30_FP_OP_INPUT_SRC_TC0 0x4
+# define NV30_FP_OP_INPUT_SRC_TC(n) (0x4 + n)
+#define NV30_FP_OP_TEX_UNIT_SHIFT 17
+#define NV30_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */
+#define NV30_FP_OP_PRECISION_SHIFT 22
+#define NV30_FP_OP_PRECISION_MASK (3 << 22)
+# define NV30_FP_PRECISION_FP32 0
+# define NV30_FP_PRECISION_FP16 1
+# define NV30_FP_PRECISION_FX12 2
+#define NV30_FP_OP_OPCODE_SHIFT 24
+#define NV30_FP_OP_OPCODE_MASK (0x3F << 24)
+# define NV30_FP_OP_OPCODE_NOP 0x00
+# define NV30_FP_OP_OPCODE_MOV 0x01
+# define NV30_FP_OP_OPCODE_MUL 0x02
+# define NV30_FP_OP_OPCODE_ADD 0x03
+# define NV30_FP_OP_OPCODE_MAD 0x04
+# define NV30_FP_OP_OPCODE_DP3 0x05
+# define NV30_FP_OP_OPCODE_DP4 0x06
+# define NV30_FP_OP_OPCODE_DST 0x07
+# define NV30_FP_OP_OPCODE_MIN 0x08
+# define NV30_FP_OP_OPCODE_MAX 0x09
+# define NV30_FP_OP_OPCODE_SLT 0x0A
+# define NV30_FP_OP_OPCODE_SGE 0x0B
+# define NV30_FP_OP_OPCODE_SLE 0x0C
+# define NV30_FP_OP_OPCODE_SGT 0x0D
+# define NV30_FP_OP_OPCODE_SNE 0x0E
+# define NV30_FP_OP_OPCODE_SEQ 0x0F
+# define NV30_FP_OP_OPCODE_FRC 0x10
+# define NV30_FP_OP_OPCODE_FLR 0x11
+# define NV30_FP_OP_OPCODE_KIL 0x12
+# define NV30_FP_OP_OPCODE_PK4B 0x13
+# define NV30_FP_OP_OPCODE_UP4B 0x14
+# define NV30_FP_OP_OPCODE_DDX 0x15 /* can only write XY */
+# define NV30_FP_OP_OPCODE_DDY 0x16 /* can only write XY */
+# define NV30_FP_OP_OPCODE_TEX 0x17
+# define NV30_FP_OP_OPCODE_TXP 0x18
+# define NV30_FP_OP_OPCODE_TXD 0x19
+# define NV30_FP_OP_OPCODE_RCP 0x1A
+# define NV30_FP_OP_OPCODE_RSQ 0x1B
+# define NV30_FP_OP_OPCODE_EX2 0x1C
+# define NV30_FP_OP_OPCODE_LG2 0x1D
+# define NV30_FP_OP_OPCODE_LIT 0x1E
+# define NV30_FP_OP_OPCODE_LRP 0x1F
+# define NV30_FP_OP_OPCODE_STR 0x20
+# define NV30_FP_OP_OPCODE_SFL 0x21
+# define NV30_FP_OP_OPCODE_COS 0x22
+# define NV30_FP_OP_OPCODE_SIN 0x23
+# define NV30_FP_OP_OPCODE_PK2H 0x24
+# define NV30_FP_OP_OPCODE_UP2H 0x25
+# define NV30_FP_OP_OPCODE_POW 0x26
+# define NV30_FP_OP_OPCODE_PK4UB 0x27
+# define NV30_FP_OP_OPCODE_UP4UB 0x28
+# define NV30_FP_OP_OPCODE_PK2US 0x29
+# define NV30_FP_OP_OPCODE_UP2US 0x2A
+# define NV30_FP_OP_OPCODE_DP2A 0x2E
+# define NV30_FP_OP_OPCODE_TXB 0x31
+# define NV30_FP_OP_OPCODE_RFL 0x36
+# define NV30_FP_OP_OPCODE_DIV 0x3A
+#define NV30_FP_OP_OUT_SAT (1 << 31)
+
+/* high order bits of SRC0 */
+#define NV30_FP_OP_OUT_ABS (1 << 29)
+#define NV30_FP_OP_COND_SWZ_W_SHIFT 27
+#define NV30_FP_OP_COND_SWZ_W_MASK (3 << 27)
+#define NV30_FP_OP_COND_SWZ_Z_SHIFT 25
+#define NV30_FP_OP_COND_SWZ_Z_MASK (3 << 25)
+#define NV30_FP_OP_COND_SWZ_Y_SHIFT 23
+#define NV30_FP_OP_COND_SWZ_Y_MASK (3 << 23)
+#define NV30_FP_OP_COND_SWZ_X_SHIFT 21
+#define NV30_FP_OP_COND_SWZ_X_MASK (3 << 21)
+#define NV30_FP_OP_COND_SWZ_ALL_SHIFT 21
+#define NV30_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21)
+#define NV30_FP_OP_COND_SHIFT 18
+#define NV30_FP_OP_COND_MASK (0x07 << 18)
+# define NV30_FP_OP_COND_FL 0
+# define NV30_FP_OP_COND_LT 1
+# define NV30_FP_OP_COND_EQ 2
+# define NV30_FP_OP_COND_LE 3
+# define NV30_FP_OP_COND_GT 4
+# define NV30_FP_OP_COND_NE 5
+# define NV30_FP_OP_COND_GE 6
+# define NV30_FP_OP_COND_TR 7
+
+/* high order bits of SRC1 */
+#define NV30_FP_OP_DST_SCALE_SHIFT 28
+#define NV30_FP_OP_DST_SCALE_MASK (3 << 28)
+#define NV30_FP_OP_DST_SCALE_1X 0
+#define NV30_FP_OP_DST_SCALE_2X 1
+#define NV30_FP_OP_DST_SCALE_4X 2
+#define NV30_FP_OP_DST_SCALE_8X 3
+#define NV30_FP_OP_DST_SCALE_INV_2X 5
+#define NV30_FP_OP_DST_SCALE_INV_4X 6
+#define NV30_FP_OP_DST_SCALE_INV_8X 7
+
+
+/* high order bits of SRC2 */
+#define NV30_FP_OP_INDEX_INPUT (1 << 30)
+
+//== Register selection ==
+#define NV30_FP_REG_TYPE_SHIFT 0
+#define NV30_FP_REG_TYPE_MASK (3 << 0)
+# define NV30_FP_REG_TYPE_TEMP 0
+# define NV30_FP_REG_TYPE_INPUT 1
+# define NV30_FP_REG_TYPE_CONST 2
+#define NV30_FP_REG_SRC_SHIFT 2 /* uncertain */
+#define NV30_FP_REG_SRC_MASK (31 << 2)
+#define NV30_FP_REG_SRC_HALF (1 << 8)
+#define NV30_FP_REG_SWZ_ALL_SHIFT 9
+#define NV30_FP_REG_SWZ_ALL_MASK (255 << 9)
+#define NV30_FP_REG_SWZ_X_SHIFT 9
+#define NV30_FP_REG_SWZ_X_MASK (3 << 9)
+#define NV30_FP_REG_SWZ_Y_SHIFT 11
+#define NV30_FP_REG_SWZ_Y_MASK (3 << 11)
+#define NV30_FP_REG_SWZ_Z_SHIFT 13
+#define NV30_FP_REG_SWZ_Z_MASK (3 << 13)
+#define NV30_FP_REG_SWZ_W_SHIFT 15
+#define NV30_FP_REG_SWZ_W_MASK (3 << 15)
+# define NV30_FP_SWIZZLE_X 0
+# define NV30_FP_SWIZZLE_Y 1
+# define NV30_FP_SWIZZLE_Z 2
+# define NV30_FP_SWIZZLE_W 3
+#define NV30_FP_REG_NEGATE (1 << 17)
+
+#define NV30SR_NONE 0
+#define NV30SR_OUTPUT 1
+#define NV30SR_INPUT 2
+#define NV30SR_TEMP 3
+#define NV30SR_CONST 4
+
+struct nv30_sreg {
+ int type;
+ int index;
+
+ int dst_scale;
+
+ int negate;
+ int abs;
+ int swz[4];
+
+ int cc_update;
+ int cc_update_reg;
+ int cc_test;
+ int cc_test_reg;
+ int cc_swz[4];
+};
+
+static INLINE struct nv30_sreg
+nv30_sr(int type, int index)
+{
+ struct nv30_sreg temp = {
+ .type = type,
+ .index = index,
+ .dst_scale = DEF_SCALE,
+ .abs = 0,
+ .negate = 0,
+ .swz = { 0, 1, 2, 3 },
+ .cc_update = 0,
+ .cc_update_reg = 0,
+ .cc_test = DEF_CTEST,
+ .cc_test_reg = 0,
+ .cc_swz = { 0, 1, 2, 3 },
+ };
+ return temp;
+}
+
+static INLINE struct nv30_sreg
+nv30_sr_swz(struct nv30_sreg src, int x, int y, int z, int w)
+{
+ struct nv30_sreg dst = src;
+
+ dst.swz[SWZ_X] = src.swz[x];
+ dst.swz[SWZ_Y] = src.swz[y];
+ dst.swz[SWZ_Z] = src.swz[z];
+ dst.swz[SWZ_W] = src.swz[w];
+ return dst;
+}
+
+static INLINE struct nv30_sreg
+nv30_sr_neg(struct nv30_sreg src)
+{
+ src.negate = !src.negate;
+ return src;
+}
+
+static INLINE struct nv30_sreg
+nv30_sr_abs(struct nv30_sreg src)
+{
+ src.abs = 1;
+ return src;
+}
+
+static INLINE struct nv30_sreg
+nv30_sr_scale(struct nv30_sreg src, int scale)
+{
+ src.dst_scale = scale;
+ return src;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c
new file mode 100644
index 0000000000..26147565a5
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state.c
@@ -0,0 +1,725 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv30_context.h"
+#include "nv30_state.h"
+
+static void *
+nv30_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+ struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso));
+ struct nouveau_stateobj *so = so_new(16, 0);
+
+ if (cso->blend_enable) {
+ so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3);
+ so_data (so, 1);
+ so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) |
+ nvgl_blend_func(cso->rgb_src_factor));
+ so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 |
+ nvgl_blend_func(cso->rgb_dst_factor));
+ /* FIXME: Gallium assumes GL_EXT_blend_func_separate.
+ It is not the case for NV30 */
+ so_method(so, rankine, NV34TCL_BLEND_EQUATION, 1);
+ so_data (so, nvgl_blend_eqn(cso->rgb_func));
+ } else {
+ so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, rankine, NV34TCL_COLOR_MASK, 1);
+ so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) |
+ ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) |
+ ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) |
+ ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0)));
+
+ if (cso->logicop_enable) {
+ so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2);
+ so_data (so, 1);
+ so_data (so, nvgl_logicop_func(cso->logicop_func));
+ } else {
+ so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, rankine, NV34TCL_DITHER_ENABLE, 1);
+ so_data (so, cso->dither ? 1 : 0);
+
+ so_ref(so, &bso->so);
+ bso->pipe = *cso;
+ return (void *)bso;
+}
+
+static void
+nv30_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->blend = hwcso;
+ nv30->dirty |= NV30_NEW_BLEND;
+}
+
+static void
+nv30_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_blend_state *bso = hwcso;
+
+ so_ref(NULL, &bso->so);
+ FREE(bso);
+}
+
+
+static INLINE unsigned
+wrap_mode(unsigned wrap) {
+ unsigned ret;
+
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ ret = NV34TCL_TX_WRAP_S_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_CLAMP:
+ ret = NV34TCL_TX_WRAP_S_CLAMP;
+ break;
+/* case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP;
+ break;*/
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ ret = NV34TCL_TX_WRAP_S_REPEAT;
+ break;
+ }
+
+ return ret >> NV34TCL_TX_WRAP_S_SHIFT;
+}
+
+static void *
+nv30_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nv30_sampler_state *ps;
+ uint32_t filter = 0;
+
+ ps = MALLOC(sizeof(struct nv30_sampler_state));
+
+ ps->fmt = 0;
+ /* TODO: Not all RECTs formats have this bit set, bits 15-8 of format
+ are the tx format to use. We should store normalized coord flag
+ in sampler state structure, and set appropriate format in
+ nvxx_fragtex_build()
+ */
+ /*NV34TCL_TX_FORMAT_RECT*/
+ /*if (!cso->normalized_coords) {
+ ps->fmt |= (1<<14) ;
+ }*/
+
+ ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) |
+ (wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT));
+
+ ps->en = 0;
+
+ if (cso->max_anisotropy >= 8.0) {
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_8X;
+ } else
+ if (cso->max_anisotropy >= 4.0) {
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_4X;
+ } else
+ if (cso->max_anisotropy >= 2.0) {
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_2X;
+ }
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR;
+ break;
+ }
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST;
+ break;
+ }
+ break;
+ }
+
+ ps->filt = filter;
+
+ {
+ float limit;
+
+ limit = CLAMP(cso->lod_bias, -16.0, 15.0);
+ ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;
+
+ limit = CLAMP(cso->max_lod, 0.0, 15.0);
+ ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/;
+
+ limit = CLAMP(cso->min_lod, 0.0, 15.0);
+ ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/;
+ }
+
+ if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ switch (cso->compare_func) {
+ case PIPE_FUNC_NEVER:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NEVER;
+ break;
+ case PIPE_FUNC_GREATER:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GREATER;
+ break;
+ case PIPE_FUNC_EQUAL:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_EQUAL;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GEQUAL;
+ break;
+ case PIPE_FUNC_LESS:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LESS;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NOTEQUAL;
+ break;
+ case PIPE_FUNC_LEQUAL:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LEQUAL;
+ break;
+ case PIPE_FUNC_ALWAYS:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_ALWAYS;
+ break;
+ default:
+ break;
+ }
+ }
+
+ ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) |
+ (float_to_ubyte(cso->border_color[0]) << 16) |
+ (float_to_ubyte(cso->border_color[1]) << 8) |
+ (float_to_ubyte(cso->border_color[2]) << 0));
+
+ return (void *)ps;
+}
+
+static void
+nv30_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv30->tex_sampler[unit] = sampler[unit];
+ nv30->dirty_samplers |= (1 << unit);
+ }
+
+ for (unit = nr; unit < nv30->nr_samplers; unit++) {
+ nv30->tex_sampler[unit] = NULL;
+ nv30->dirty_samplers |= (1 << unit);
+ }
+
+ nv30->nr_samplers = nr;
+ nv30->dirty |= NV30_NEW_SAMPLER;
+}
+
+static void
+nv30_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nv30_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **miptree)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ pipe_texture_reference((struct pipe_texture **)
+ &nv30->tex_miptree[unit], miptree[unit]);
+ nv30->dirty_samplers |= (1 << unit);
+ }
+
+ for (unit = nr; unit < nv30->nr_textures; unit++) {
+ pipe_texture_reference((struct pipe_texture **)
+ &nv30->tex_miptree[unit], NULL);
+ nv30->dirty_samplers |= (1 << unit);
+ }
+
+ nv30->nr_textures = nr;
+ nv30->dirty |= NV30_NEW_SAMPLER;
+}
+
+static void *
+nv30_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso));
+ struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+
+ /*XXX: ignored:
+ * light_twoside
+ * point_smooth -nohw
+ * multisample
+ */
+
+ so_method(so, rankine, NV34TCL_SHADE_MODEL, 1);
+ so_data (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT :
+ NV34TCL_SHADE_MODEL_SMOOTH);
+
+ so_method(so, rankine, NV34TCL_LINE_WIDTH, 2);
+ so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff);
+ so_data (so, cso->line_smooth ? 1 : 0);
+ so_method(so, rankine, NV34TCL_LINE_STIPPLE_ENABLE, 2);
+ so_data (so, cso->line_stipple_enable ? 1 : 0);
+ so_data (so, (cso->line_stipple_pattern << 16) |
+ cso->line_stipple_factor);
+
+ so_method(so, rankine, NV34TCL_POINT_SIZE, 1);
+ so_data (so, fui(cso->point_size));
+
+ so_method(so, rankine, NV34TCL_POLYGON_MODE_FRONT, 6);
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV34TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV34TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV34TCL_CULL_FACE_BACK);
+ break;
+ }
+ so_data(so, NV34TCL_FRONT_FACE_CCW);
+ } else {
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV34TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV34TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV34TCL_CULL_FACE_BACK);
+ break;
+ }
+ so_data(so, NV34TCL_FRONT_FACE_CW);
+ }
+ so_data(so, cso->poly_smooth ? 1 : 0);
+ so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0);
+
+ so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, cso->poly_stipple_enable ? 1 : 0);
+
+ so_method(so, rankine, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if (cso->offset_cw || cso->offset_ccw) {
+ so_method(so, rankine, NV34TCL_POLYGON_OFFSET_FACTOR, 2);
+ so_data (so, fui(cso->offset_scale));
+ so_data (so, fui(cso->offset_units * 2));
+ }
+
+ so_method(so, rankine, NV34TCL_POINT_SPRITE, 1);
+ if (cso->point_sprite) {
+ unsigned psctl = (1 << 0), i;
+
+ for (i = 0; i < 8; i++) {
+ if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE)
+ psctl |= (1 << (8 + i));
+ }
+
+ so_data(so, psctl);
+ } else {
+ so_data(so, 0);
+ }
+
+ so_ref(so, &rsso->so);
+ rsso->pipe = *cso;
+ return (void *)rsso;
+}
+
+static void
+nv30_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->rasterizer = hwcso;
+ nv30->dirty |= NV30_NEW_RAST;
+ /*nv30->draw_dirty |= NV30_NEW_RAST;*/
+}
+
+static void
+nv30_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_rasterizer_state *rsso = hwcso;
+
+ so_ref(NULL, &rsso->so);
+ FREE(rsso);
+}
+
+static void *
+nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
+ struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+
+ so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3);
+ so_data (so, nvgl_comparison_op(cso->depth.func));
+ so_data (so, cso->depth.writemask ? 1 : 0);
+ so_data (so, cso->depth.enabled ? 1 : 0);
+
+ so_method(so, rankine, NV34TCL_ALPHA_FUNC_ENABLE, 3);
+ so_data (so, cso->alpha.enabled ? 1 : 0);
+ so_data (so, nvgl_comparison_op(cso->alpha.func));
+ so_data (so, float_to_ubyte(cso->alpha.ref_value));
+
+ if (cso->stencil[0].enabled) {
+ so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 8);
+ so_data (so, cso->stencil[0].enabled ? 1 : 0);
+ so_data (so, cso->stencil[0].writemask);
+ so_data (so, nvgl_comparison_op(cso->stencil[0].func));
+ so_data (so, cso->stencil[0].ref_value);
+ so_data (so, cso->stencil[0].valuemask);
+ so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
+ } else {
+ so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ if (cso->stencil[1].enabled) {
+ so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 8);
+ so_data (so, cso->stencil[1].enabled ? 1 : 0);
+ so_data (so, cso->stencil[1].writemask);
+ so_data (so, nvgl_comparison_op(cso->stencil[1].func));
+ so_data (so, cso->stencil[1].ref_value);
+ so_data (so, cso->stencil[1].valuemask);
+ so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
+ } else {
+ so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_ref(so, &zsaso->so);
+ zsaso->pipe = *cso;
+ return (void *)zsaso;
+}
+
+static void
+nv30_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->zsa = hwcso;
+ nv30->dirty |= NV30_NEW_ZSA;
+}
+
+static void
+nv30_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_zsa_state *zsaso = hwcso;
+
+ so_ref(NULL, &zsaso->so);
+ FREE(zsaso);
+}
+
+static void *
+nv30_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ /*struct nv30_context *nv30 = nv30_context(pipe);*/
+ struct nv30_vertex_program *vp;
+
+ vp = CALLOC(1, sizeof(struct nv30_vertex_program));
+ vp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+ /*vp->draw = draw_create_vertex_shader(nv30->draw, &vp->pipe);*/
+
+ return (void *)vp;
+}
+
+static void
+nv30_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->vertprog = hwcso;
+ nv30->dirty |= NV30_NEW_VERTPROG;
+ /*nv30->draw_dirty |= NV30_NEW_VERTPROG;*/
+}
+
+static void
+nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_vertex_program *vp = hwcso;
+
+ /*draw_delete_vertex_shader(nv30->draw, vp->draw);*/
+ nv30_vertprog_destroy(nv30, vp);
+ FREE((void*)vp->pipe.tokens);
+ FREE(vp);
+}
+
+static void *
+nv30_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv30_fragment_program *fp;
+
+ fp = CALLOC(1, sizeof(struct nv30_fragment_program));
+ fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ tgsi_scan_shader(fp->pipe.tokens, &fp->info);
+
+ return (void *)fp;
+}
+
+static void
+nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->fragprog = hwcso;
+ nv30->dirty |= NV30_NEW_FRAGPROG;
+}
+
+static void
+nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_fragment_program *fp = hwcso;
+
+ nv30_fragprog_destroy(nv30, fp);
+ FREE((void*)fp->pipe.tokens);
+ FREE(fp);
+}
+
+static void
+nv30_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->blend_colour = *bcol;
+ nv30->dirty |= NV30_NEW_BCOL;
+}
+
+static void
+nv30_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+}
+
+static void
+nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->constbuf[shader] = buf->buffer;
+ nv30->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float));
+
+ if (shader == PIPE_SHADER_VERTEX) {
+ nv30->dirty |= NV30_NEW_VERTPROG;
+ } else
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ nv30->dirty |= NV30_NEW_FRAGPROG;
+ }
+}
+
+static void
+nv30_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->framebuffer = *fb;
+ nv30->dirty |= NV30_NEW_FB;
+}
+
+static void
+nv30_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ memcpy(nv30->stipple, stipple->stipple, 4 * 32);
+ nv30->dirty |= NV30_NEW_STIPPLE;
+}
+
+static void
+nv30_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->scissor = *s;
+ nv30->dirty |= NV30_NEW_SCISSOR;
+}
+
+static void
+nv30_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->viewport = *vpt;
+ nv30->dirty |= NV30_NEW_VIEWPORT;
+ /*nv30->draw_dirty |= NV30_NEW_VIEWPORT;*/
+}
+
+static void
+nv30_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ memcpy(nv30->vtxbuf, vb, sizeof(*vb) * count);
+ nv30->vtxbuf_nr = count;
+
+ nv30->dirty |= NV30_NEW_ARRAYS;
+ /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
+}
+
+static void
+nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *ve)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ memcpy(nv30->vtxelt, ve, sizeof(*ve) * count);
+ nv30->vtxelt_nr = count;
+
+ nv30->dirty |= NV30_NEW_ARRAYS;
+ /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
+}
+
+static void
+nv30_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->edgeflags = bitfield;
+ nv30->dirty |= NV30_NEW_ARRAYS;
+ /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
+}
+
+void
+nv30_init_state_functions(struct nv30_context *nv30)
+{
+ nv30->pipe.create_blend_state = nv30_blend_state_create;
+ nv30->pipe.bind_blend_state = nv30_blend_state_bind;
+ nv30->pipe.delete_blend_state = nv30_blend_state_delete;
+
+ nv30->pipe.create_sampler_state = nv30_sampler_state_create;
+ nv30->pipe.bind_sampler_states = nv30_sampler_state_bind;
+ nv30->pipe.delete_sampler_state = nv30_sampler_state_delete;
+ nv30->pipe.set_sampler_textures = nv30_set_sampler_texture;
+
+ nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create;
+ nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind;
+ nv30->pipe.delete_rasterizer_state = nv30_rasterizer_state_delete;
+
+ nv30->pipe.create_depth_stencil_alpha_state =
+ nv30_depth_stencil_alpha_state_create;
+ nv30->pipe.bind_depth_stencil_alpha_state =
+ nv30_depth_stencil_alpha_state_bind;
+ nv30->pipe.delete_depth_stencil_alpha_state =
+ nv30_depth_stencil_alpha_state_delete;
+
+ nv30->pipe.create_vs_state = nv30_vp_state_create;
+ nv30->pipe.bind_vs_state = nv30_vp_state_bind;
+ nv30->pipe.delete_vs_state = nv30_vp_state_delete;
+
+ nv30->pipe.create_fs_state = nv30_fp_state_create;
+ nv30->pipe.bind_fs_state = nv30_fp_state_bind;
+ nv30->pipe.delete_fs_state = nv30_fp_state_delete;
+
+ nv30->pipe.set_blend_color = nv30_set_blend_color;
+ nv30->pipe.set_clip_state = nv30_set_clip_state;
+ nv30->pipe.set_constant_buffer = nv30_set_constant_buffer;
+ nv30->pipe.set_framebuffer_state = nv30_set_framebuffer_state;
+ nv30->pipe.set_polygon_stipple = nv30_set_polygon_stipple;
+ nv30->pipe.set_scissor_state = nv30_set_scissor_state;
+ nv30->pipe.set_viewport_state = nv30_set_viewport_state;
+
+ nv30->pipe.set_edgeflags = nv30_set_edgeflags;
+ nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers;
+ nv30->pipe.set_vertex_elements = nv30_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h
new file mode 100644
index 0000000000..2023278e37
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state.h
@@ -0,0 +1,88 @@
+#ifndef __NV30_STATE_H__
+#define __NV30_STATE_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv30_sampler_state {
+ uint32_t fmt;
+ uint32_t wrap;
+ uint32_t en;
+ uint32_t filt;
+ uint32_t bcol;
+};
+
+struct nv30_vertex_program_exec {
+ uint32_t data[4];
+ boolean has_branch_offset;
+ int const_index;
+};
+
+struct nv30_vertex_program_data {
+ int index; /* immediates == -1 */
+ float value[4];
+};
+
+struct nv30_vertex_program {
+ struct pipe_shader_state pipe;
+
+ boolean translated;
+
+ struct nv30_vertex_program_exec *insns;
+ unsigned nr_insns;
+ struct nv30_vertex_program_data *consts;
+ unsigned nr_consts;
+
+ struct nouveau_resource *exec;
+ unsigned exec_start;
+ struct nouveau_resource *data;
+ unsigned data_start;
+ unsigned data_start_min;
+
+ uint32_t ir;
+ uint32_t or;
+ struct nouveau_stateobj *so;
+};
+
+struct nv30_fragment_program_data {
+ unsigned offset;
+ unsigned index;
+};
+
+struct nv30_fragment_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+
+ boolean translated;
+ boolean on_hw;
+ unsigned samplers;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct nv30_fragment_program_data *consts;
+ unsigned nr_consts;
+
+ struct pipe_buffer *buffer;
+
+ uint32_t fp_control;
+ uint32_t fp_reg_control;
+ struct nouveau_stateobj *so;
+};
+
+struct nv30_miptree {
+ struct pipe_texture base;
+
+ struct pipe_buffer *buffer;
+ uint total_size;
+
+ struct pipe_texture *shadow_tex;
+ struct pipe_surface *shadow_surface;
+
+ struct {
+ uint pitch;
+ uint *image_offset;
+ } level[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+#endif
diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c
new file mode 100644
index 0000000000..44d43e132a
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_blend.c
@@ -0,0 +1,40 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_blend_validate(struct nv30_context *nv30)
+{
+ so_ref(nv30->blend->so, &nv30->state.hw[NV30_STATE_BLEND]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_blend = {
+ .validate = nv30_state_blend_validate,
+ .dirty = {
+ .pipe = NV30_NEW_BLEND,
+ .hw = NV30_STATE_BLEND
+ }
+};
+
+static boolean
+nv30_state_blend_colour_validate(struct nv30_context *nv30)
+{
+ struct nouveau_stateobj *so = so_new(2, 0);
+ struct pipe_blend_color *bcol = &nv30->blend_colour;
+
+ so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1);
+ so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) |
+ (float_to_ubyte(bcol->color[0]) << 16) |
+ (float_to_ubyte(bcol->color[1]) << 8) |
+ (float_to_ubyte(bcol->color[2]) << 0)));
+
+ so_ref(so, &nv30->state.hw[NV30_STATE_BCOL]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_blend_colour = {
+ .validate = nv30_state_blend_colour_validate,
+ .dirty = {
+ .pipe = NV30_NEW_BCOL,
+ .hw = NV30_STATE_BCOL
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c
new file mode 100644
index 0000000000..f77b08ff69
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_emit.c
@@ -0,0 +1,118 @@
+#include "nv30_context.h"
+#include "nv30_state.h"
+
+static struct nv30_state_entry *render_states[] = {
+ &nv30_state_framebuffer,
+ &nv30_state_rasterizer,
+ &nv30_state_scissor,
+ &nv30_state_stipple,
+ &nv30_state_fragprog,
+ &nv30_state_fragtex,
+ &nv30_state_vertprog,
+ &nv30_state_blend,
+ &nv30_state_blend_colour,
+ &nv30_state_zsa,
+ &nv30_state_viewport,
+ &nv30_state_vbo,
+ NULL
+};
+
+static void
+nv30_state_do_validate(struct nv30_context *nv30,
+ struct nv30_state_entry **states)
+{
+ const struct pipe_framebuffer_state *fb = &nv30->framebuffer;
+ unsigned i;
+
+ for (i = 0; i < fb->nr_cbufs; i++)
+ fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED;
+ if (fb->zsbuf)
+ fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED;
+
+ while (*states) {
+ struct nv30_state_entry *e = *states;
+
+ if (nv30->dirty & e->dirty.pipe) {
+ if (e->validate(nv30)) {
+ nv30->state.dirty |= (1ULL << e->dirty.hw);
+ }
+ }
+
+ states++;
+ }
+ nv30->dirty = 0;
+}
+
+void
+nv30_state_emit(struct nv30_context *nv30)
+{
+ struct nv30_state *state = &nv30->state;
+ struct nv30_screen *screen = nv30->screen;
+ unsigned i, samplers;
+ uint64_t states;
+
+ if (nv30->pctx_id != screen->cur_pctx) {
+ for (i = 0; i < NV30_STATE_MAX; i++) {
+ if (state->hw[i] && screen->state[i] != state->hw[i])
+ state->dirty |= (1ULL << i);
+ }
+
+ screen->cur_pctx = nv30->pctx_id;
+ }
+
+ for (i = 0, states = state->dirty; states; i++) {
+ if (!(states & (1ULL << i)))
+ continue;
+ so_ref (state->hw[i], &nv30->screen->state[i]);
+ if (state->hw[i])
+ so_emit(nv30->nvws, nv30->screen->state[i]);
+ states &= ~(1ULL << i);
+ }
+
+ state->dirty = 0;
+
+ so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FB]);
+ for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
+ if (!(samplers & (1 << i)))
+ continue;
+ so_emit_reloc_markers(nv30->nvws,
+ state->hw[NV30_STATE_FRAGTEX0+i]);
+ samplers &= ~(1ULL << i);
+ }
+ so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FRAGPROG]);
+ if (state->hw[NV30_STATE_VTXBUF] /*&& nv30->render_mode == HW*/)
+ so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_VTXBUF]);
+}
+
+boolean
+nv30_state_validate(struct nv30_context *nv30)
+{
+#if 0
+ boolean was_sw = nv30->fallback_swtnl ? TRUE : FALSE;
+
+ if (nv30->render_mode != HW) {
+ /* Don't even bother trying to go back to hw if none
+ * of the states that caused swtnl previously have changed.
+ */
+ if ((nv30->fallback_swtnl & nv30->dirty)
+ != nv30->fallback_swtnl)
+ return FALSE;
+
+ /* Attempt to go to hwtnl again */
+ nv30->pipe.flush(&nv30->pipe, 0, NULL);
+ nv30->dirty |= (NV30_NEW_VIEWPORT |
+ NV30_NEW_VERTPROG |
+ NV30_NEW_ARRAYS);
+ nv30->render_mode = HW;
+ }
+#endif
+ nv30_state_do_validate(nv30, render_states);
+#if 0
+ if (nv30->fallback_swtnl || nv30->fallback_swrast)
+ return FALSE;
+
+ if (was_sw)
+ NOUVEAU_ERR("swtnl->hw\n");
+#endif
+ return TRUE;
+}
diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c
new file mode 100644
index 0000000000..77368cb205
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_fb.c
@@ -0,0 +1,144 @@
+#include "nv30_context.h"
+#include "nouveau/nouveau_util.h"
+
+static boolean
+nv30_state_framebuffer_validate(struct nv30_context *nv30)
+{
+ struct pipe_framebuffer_state *fb = &nv30->framebuffer;
+ struct pipe_surface *rt[2], *zeta = NULL;
+ uint32_t rt_enable, rt_format;
+ int i, colour_format = 0, zeta_format = 0;
+ struct nouveau_stateobj *so = so_new(64, 10);
+ unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
+ unsigned w = fb->width;
+ unsigned h = fb->height;
+ struct nv30_miptree *nv30mt;
+
+ rt_enable = 0;
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ if (colour_format) {
+ assert(colour_format == fb->cbufs[i]->format);
+ } else {
+ colour_format = fb->cbufs[i]->format;
+ rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i);
+ rt[i] = fb->cbufs[i];
+ }
+ }
+
+ if (rt_enable & NV34TCL_RT_ENABLE_COLOR1)
+ rt_enable |= NV34TCL_RT_ENABLE_MRT;
+
+ if (fb->zsbuf) {
+ zeta_format = fb->zsbuf->format;
+ zeta = fb->zsbuf;
+ }
+
+ if (!(rt[0]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
+ for (i = 1; i < fb->nr_cbufs; i++)
+ assert(!(rt[i]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR));
+
+ /* FIXME: NV34TCL_RT_FORMAT_LOG2_[WIDTH/HEIGHT] */
+ rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
+ log2i(fb->width) << 16 /*NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT*/ |
+ log2i(fb->height) << 24 /*NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT*/;
+ }
+ else
+ rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
+
+ switch (colour_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case 0:
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5;
+ break;
+ default:
+ assert(0);
+ }
+
+ switch (zeta_format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16;
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case 0:
+ rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) {
+ uint32_t pitch = rt[0]->stride;
+ if (zeta) {
+ pitch |= (zeta->stride << 16);
+ } else {
+ pitch |= (pitch << 16);
+ }
+
+ nv30mt = (struct nv30_miptree *)rt[0]->texture;
+ so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR0, 1);
+ so_reloc (so, nv30mt->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv30->nvws->channel->vram->handle,
+ nv30->nvws->channel->gart->handle);
+ so_method(so, nv30->screen->rankine, NV34TCL_COLOR0_PITCH, 2);
+ so_data (so, pitch);
+ so_reloc (so, nv30mt->buffer, rt[0]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ }
+
+ if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) {
+ nv30mt = (struct nv30_miptree *)rt[1]->texture;
+ so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR1, 1);
+ so_reloc (so, nv30mt->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv30->nvws->channel->vram->handle,
+ nv30->nvws->channel->gart->handle);
+ so_method(so, nv30->screen->rankine, NV34TCL_COLOR1_OFFSET, 2);
+ so_reloc (so, nv30mt->buffer, rt[1]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, rt[1]->stride);
+ }
+
+ if (zeta_format) {
+ nv30mt = (struct nv30_miptree *)zeta->texture;
+ so_method(so, nv30->screen->rankine, NV34TCL_DMA_ZETA, 1);
+ so_reloc (so, nv30mt->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv30->nvws->channel->vram->handle,
+ nv30->nvws->channel->gart->handle);
+ so_method(so, nv30->screen->rankine, NV34TCL_ZETA_OFFSET, 1);
+ so_reloc (so, nv30mt->buffer, zeta->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ /* TODO: allocate LMA depth buffer */
+ }
+
+ so_method(so, nv30->screen->rankine, NV34TCL_RT_ENABLE, 1);
+ so_data (so, rt_enable);
+ so_method(so, nv30->screen->rankine, NV34TCL_RT_HORIZ, 3);
+ so_data (so, (w << 16) | 0);
+ so_data (so, (h << 16) | 0);
+ so_data (so, rt_format);
+ so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_HORIZ, 2);
+ so_data (so, (w << 16) | 0);
+ so_data (so, (h << 16) | 0);
+ so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+ so_data (so, ((w - 1) << 16) | 0);
+ so_data (so, ((h - 1) << 16) | 0);
+ so_method(so, nv30->screen->rankine, 0x1d88, 1);
+ so_data (so, (1 << 12) | h);
+ /* Wonder why this is needed, context should all be set to zero on init */
+ so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_TX_ORIGIN, 1);
+ so_data (so, 0);
+
+ so_ref(so, &nv30->state.hw[NV30_STATE_FB]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_framebuffer = {
+ .validate = nv30_state_framebuffer_validate,
+ .dirty = {
+ .pipe = NV30_NEW_FB,
+ .hw = NV30_STATE_FB
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_rasterizer.c b/src/gallium/drivers/nv30/nv30_state_rasterizer.c
new file mode 100644
index 0000000000..6d1b60e043
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_rasterizer.c
@@ -0,0 +1,17 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_rasterizer_validate(struct nv30_context *nv30)
+{
+ so_ref(nv30->rasterizer->so,
+ &nv30->state.hw[NV30_STATE_RAST]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_rasterizer = {
+ .validate = nv30_state_rasterizer_validate,
+ .dirty = {
+ .pipe = NV30_NEW_RAST,
+ .hw = NV30_STATE_RAST
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c
new file mode 100644
index 0000000000..1db9bc1795
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_scissor.c
@@ -0,0 +1,35 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_scissor_validate(struct nv30_context *nv30)
+{
+ struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe;
+ struct pipe_scissor_state *s = &nv30->scissor;
+ struct nouveau_stateobj *so;
+
+ if (nv30->state.hw[NV30_STATE_SCISSOR] &&
+ (rast->scissor == 0 && nv30->state.scissor_enabled == 0))
+ return FALSE;
+ nv30->state.scissor_enabled = rast->scissor;
+
+ so = so_new(3, 0);
+ so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2);
+ if (nv30->state.scissor_enabled) {
+ so_data (so, ((s->maxx - s->minx) << 16) | s->minx);
+ so_data (so, ((s->maxy - s->miny) << 16) | s->miny);
+ } else {
+ so_data (so, 4096 << 16);
+ so_data (so, 4096 << 16);
+ }
+
+ so_ref(so, &nv30->state.hw[NV30_STATE_SCISSOR]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_scissor = {
+ .validate = nv30_state_scissor_validate,
+ .dirty = {
+ .pipe = NV30_NEW_SCISSOR | NV30_NEW_RAST,
+ .hw = NV30_STATE_SCISSOR
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c
new file mode 100644
index 0000000000..41b42813b4
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_stipple.c
@@ -0,0 +1,39 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_stipple_validate(struct nv30_context *nv30)
+{
+ struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe;
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+ struct nouveau_stateobj *so;
+
+ if (nv30->state.hw[NV30_STATE_STIPPLE] &&
+ (rast->poly_stipple_enable == 0 && nv30->state.stipple_enabled == 0))
+ return FALSE;
+
+ if (rast->poly_stipple_enable) {
+ unsigned i;
+
+ so = so_new(35, 0);
+ so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, 1);
+ so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32);
+ for (i = 0; i < 32; i++)
+ so_data(so, nv30->stipple[i]);
+ } else {
+ so = so_new(2, 0);
+ so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_ref(so, &nv30->state.hw[NV30_STATE_STIPPLE]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_stipple = {
+ .validate = nv30_state_stipple_validate,
+ .dirty = {
+ .pipe = NV30_NEW_STIPPLE | NV30_NEW_RAST,
+ .hw = NV30_STATE_STIPPLE,
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c
new file mode 100644
index 0000000000..951d40ebfd
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_viewport.c
@@ -0,0 +1,70 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_viewport_validate(struct nv30_context *nv30)
+{
+ struct pipe_viewport_state *vpt = &nv30->viewport;
+ struct nouveau_stateobj *so;
+ unsigned bypass;
+
+ if (/*nv30->render_mode == HW &&*/ !nv30->rasterizer->pipe.bypass_clipping)
+ bypass = 0;
+ else
+ bypass = 1;
+
+ if (nv30->state.hw[NV30_STATE_VIEWPORT] &&
+ (bypass || !(nv30->dirty & NV30_NEW_VIEWPORT)) &&
+ nv30->state.viewport_bypass == bypass)
+ return FALSE;
+ nv30->state.viewport_bypass = bypass;
+
+ so = so_new(11, 0);
+ if (!bypass) {
+ so_method(so, nv30->screen->rankine,
+ NV34TCL_VIEWPORT_TRANSLATE_X, 8);
+ so_data (so, fui(vpt->translate[0]));
+ so_data (so, fui(vpt->translate[1]));
+ so_data (so, fui(vpt->translate[2]));
+ so_data (so, fui(vpt->translate[3]));
+ so_data (so, fui(vpt->scale[0]));
+ so_data (so, fui(vpt->scale[1]));
+ so_data (so, fui(vpt->scale[2]));
+ so_data (so, fui(vpt->scale[3]));
+/* so_method(so, nv30->screen->rankine, 0x1d78, 1);
+ so_data (so, 1);
+*/ } else {
+ so_method(so, nv30->screen->rankine,
+ NV34TCL_VIEWPORT_TRANSLATE_X, 8);
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(0.0));
+ /* Not entirely certain what this is yet. The DDX uses this
+ * value also as it fixes rendering when you pass
+ * pre-transformed vertices to the GPU. My best gusss is that
+ * this bypasses some culling/clipping stage. Might be worth
+ * noting that points/lines are uneffected by whatever this
+ * value fixes, only filled polygons are effected.
+ */
+/* so_method(so, nv30->screen->rankine, 0x1d78, 1);
+ so_data (so, 0x110);
+*/ }
+ /* TODO/FIXME: never saw value 0x0110 in renouveau dumps, only 0x0001 */
+ so_method(so, nv30->screen->rankine, 0x1d78, 1);
+ so_data (so, 1);
+
+ so_ref(so, &nv30->state.hw[NV30_STATE_VIEWPORT]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_viewport = {
+ .validate = nv30_state_viewport_validate,
+ .dirty = {
+ .pipe = NV30_NEW_VIEWPORT | NV30_NEW_RAST,
+ .hw = NV30_STATE_VIEWPORT
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_zsa.c b/src/gallium/drivers/nv30/nv30_state_zsa.c
new file mode 100644
index 0000000000..0940b7269b
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_zsa.c
@@ -0,0 +1,17 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_zsa_validate(struct nv30_context *nv30)
+{
+ so_ref(nv30->zsa->so,
+ &nv30->state.hw[NV30_STATE_ZSA]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_zsa = {
+ .validate = nv30_state_zsa_validate,
+ .dirty = {
+ .pipe = NV30_NEW_ZSA,
+ .hw = NV30_STATE_ZSA
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_surface.c b/src/gallium/drivers/nv30/nv30_surface.c
new file mode 100644
index 0000000000..0f8dc12045
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_surface.c
@@ -0,0 +1,72 @@
+
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nv30_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/internal/p_winsys_screen.h"
+#include "pipe/p_inlines.h"
+#include "util/u_tile.h"
+
+static void
+nv30_surface_copy(struct pipe_context *pipe, boolean do_flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv04_surface_2d *eng2d = nv30->screen->eng2d;
+
+ if (do_flip) {
+ desty += height;
+ while (height--) {
+ eng2d->copy(eng2d, dest, destx, desty--, src,
+ srcx, srcy++, width, 1);
+ }
+ return;
+ }
+
+ eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height);
+}
+
+static void
+nv30_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv04_surface_2d *eng2d = nv30->screen->eng2d;
+
+ eng2d->fill(eng2d, dest, destx, desty, width, height, value);
+}
+
+void
+nv30_init_surface_functions(struct nv30_context *nv30)
+{
+ nv30->pipe.surface_copy = nv30_surface_copy;
+ nv30->pipe.surface_fill = nv30_surface_fill;
+}
diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c
new file mode 100644
index 0000000000..2d6d48ac16
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_vbo.c
@@ -0,0 +1,556 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv30_context.h"
+#include "nv30_state.h"
+
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_pushbuf.h"
+#include "nouveau/nouveau_util.h"
+
+#define FORCE_SWTNL 0
+
+static INLINE int
+nv30_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
+{
+ switch (pipe) {
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ *fmt = NV34TCL_VTXFMT_TYPE_FLOAT;
+ break;
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ *fmt = NV34TCL_VTXFMT_TYPE_UBYTE;
+ break;
+ case PIPE_FORMAT_R16_SSCALED:
+ case PIPE_FORMAT_R16G16_SSCALED:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ *fmt = NV34TCL_VTXFMT_TYPE_USHORT;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe));
+ return 1;
+ }
+
+ switch (pipe) {
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R16_SSCALED:
+ *ncomp = 1;
+ break;
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R16G16_SSCALED:
+ *ncomp = 2;
+ break;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ *ncomp = 3;
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ *ncomp = 4;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe));
+ return 1;
+ }
+
+ return 0;
+}
+
+static boolean
+nv30_vbo_set_idxbuf(struct nv30_context *nv30, struct pipe_buffer *ib,
+ unsigned ib_size)
+{
+ struct pipe_screen *pscreen = &nv30->screen->pipe;
+ unsigned type;
+
+ if (!ib) {
+ nv30->idxbuf = NULL;
+ nv30->idxbuf_format = 0xdeadbeef;
+ return FALSE;
+ }
+
+ if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1)
+ return FALSE;
+
+ switch (ib_size) {
+ case 2:
+ type = NV34TCL_IDXBUF_FORMAT_TYPE_U16;
+ break;
+ case 4:
+ type = NV34TCL_IDXBUF_FORMAT_TYPE_U32;
+ break;
+ default:
+ return FALSE;
+ }
+
+ if (ib != nv30->idxbuf ||
+ type != nv30->idxbuf_format) {
+ nv30->dirty |= NV30_NEW_ARRAYS;
+ nv30->idxbuf = ib;
+ nv30->idxbuf_format = type;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so,
+ int attrib, struct pipe_vertex_element *ve,
+ struct pipe_vertex_buffer *vb)
+{
+ struct pipe_winsys *ws = nv30->pipe.winsys;
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+ unsigned type, ncomp;
+ void *map;
+
+ if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp))
+ return FALSE;
+
+ map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
+ map += vb->buffer_offset + ve->src_offset;
+
+ switch (type) {
+ case NV34TCL_VTXFMT_TYPE_FLOAT:
+ {
+ float *v = map;
+
+ switch (ncomp) {
+ case 4:
+ so_method(so, rankine, NV34TCL_VTX_ATTR_4F_X(attrib), 4);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ so_data (so, fui(v[2]));
+ so_data (so, fui(v[3]));
+ break;
+ case 3:
+ so_method(so, rankine, NV34TCL_VTX_ATTR_3F_X(attrib), 3);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ so_data (so, fui(v[2]));
+ break;
+ case 2:
+ so_method(so, rankine, NV34TCL_VTX_ATTR_2F_X(attrib), 2);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ break;
+ case 1:
+ so_method(so, rankine, NV34TCL_VTX_ATTR_1F(attrib), 1);
+ so_data (so, fui(v[0]));
+ break;
+ default:
+ ws->buffer_unmap(ws, vb->buffer);
+ return FALSE;
+ }
+ }
+ break;
+ default:
+ ws->buffer_unmap(ws, vb->buffer);
+ return FALSE;
+ }
+
+ ws->buffer_unmap(ws, vb->buffer);
+
+ return TRUE;
+}
+
+boolean
+nv30_draw_arrays(struct pipe_context *pipe,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nouveau_channel *chan = nv30->nvws->channel;
+ unsigned restart = 0;
+
+ nv30_vbo_set_idxbuf(nv30, NULL, 0);
+ if (FORCE_SWTNL || !nv30_state_validate(nv30)) {
+ /*return nv30_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);*/
+ return FALSE;
+ }
+
+ while (count) {
+ unsigned vc, nr;
+
+ nv30_state_emit(nv30);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
+ mode, start, count, &restart);
+ if (!vc) {
+ FIRE_RING(NULL);
+ continue;
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ nr = (vc & 0xff);
+ if (nr) {
+ BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1);
+ OUT_RING (((nr - 1) << 24) | start);
+ start += nr;
+ }
+
+ nr = vc >> 8;
+ while (nr) {
+ unsigned push = nr > 2047 ? 2047 : nr;
+
+ nr -= push;
+
+ BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push);
+ while (push--) {
+ OUT_RING(((0x100 - 1) << 24) | start);
+ start += 0x100;
+ }
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (0);
+
+ count -= vc;
+ start = restart;
+ }
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+static INLINE void
+nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv30->nvws->channel;
+
+ while (count) {
+ uint8_t *elts = (uint8_t *)ib + start;
+ unsigned vc, push, restart = 0;
+
+ nv30_state_emit(nv30);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ if (vc & 1) {
+ BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+
+ push = MIN2(vc, 2047 * 2);
+
+ BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((elts[i+1] << 16) | elts[i]);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static INLINE void
+nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv30->nvws->channel;
+
+ while (count) {
+ uint16_t *elts = (uint16_t *)ib + start;
+ unsigned vc, push, restart = 0;
+
+ nv30_state_emit(nv30);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ if (vc & 1) {
+ BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+
+ push = MIN2(vc, 2047 * 2);
+
+ BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((elts[i+1] << 16) | elts[i]);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static INLINE void
+nv30_draw_elements_u32(struct nv30_context *nv30, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv30->nvws->channel;
+
+ while (count) {
+ uint32_t *elts = (uint32_t *)ib + start;
+ unsigned vc, push, restart = 0;
+
+ nv30_state_emit(nv30);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ while (vc) {
+ push = MIN2(vc, 2047);
+
+ BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push);
+ OUT_RINGp (elts, push);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static boolean
+nv30_draw_elements_inline(struct pipe_context *pipe,
+ struct pipe_buffer *ib, unsigned ib_size,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+ void *map;
+
+ map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ);
+ if (!ib) {
+ NOUVEAU_ERR("failed mapping ib\n");
+ return FALSE;
+ }
+
+ switch (ib_size) {
+ case 1:
+ nv30_draw_elements_u08(nv30, map, mode, start, count);
+ break;
+ case 2:
+ nv30_draw_elements_u16(nv30, map, mode, start, count);
+ break;
+ case 4:
+ nv30_draw_elements_u32(nv30, map, mode, start, count);
+ break;
+ default:
+ NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
+ break;
+ }
+
+ ws->buffer_unmap(ws, ib);
+ return TRUE;
+}
+
+static boolean
+nv30_draw_elements_vbo(struct pipe_context *pipe,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nouveau_channel *chan = nv30->nvws->channel;
+ unsigned restart = 0;
+
+ while (count) {
+ unsigned nr, vc;
+
+ nv30_state_emit(nv30);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
+ mode, start, count, &restart);
+ if (!vc) {
+ FIRE_RING(NULL);
+ continue;
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ nr = (vc & 0xff);
+ if (nr) {
+ BEGIN_RING(rankine, NV34TCL_VB_INDEX_BATCH, 1);
+ OUT_RING (((nr - 1) << 24) | start);
+ start += nr;
+ }
+
+ nr = vc >> 8;
+ while (nr) {
+ unsigned push = nr > 2047 ? 2047 : nr;
+
+ nr -= push;
+
+ BEGIN_RING_NI(rankine, NV34TCL_VB_INDEX_BATCH, push);
+ while (push--) {
+ OUT_RING(((0x100 - 1) << 24) | start);
+ start += 0x100;
+ }
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (0);
+
+ count -= vc;
+ start = restart;
+ }
+
+ return TRUE;
+}
+
+boolean
+nv30_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer, unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ boolean idxbuf;
+
+ idxbuf = nv30_vbo_set_idxbuf(nv30, indexBuffer, indexSize);
+ if (FORCE_SWTNL || !nv30_state_validate(nv30)) {
+ /*return nv30_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);*/
+ return FALSE;
+ }
+
+ if (idxbuf) {
+ nv30_draw_elements_vbo(pipe, mode, start, count);
+ } else {
+ nv30_draw_elements_inline(pipe, indexBuffer, indexSize,
+ mode, start, count);
+ }
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+static boolean
+nv30_vbo_validate(struct nv30_context *nv30)
+{
+ struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL;
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+ struct pipe_buffer *ib = nv30->idxbuf;
+ unsigned ib_format = nv30->idxbuf_format;
+ unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+ int hw;
+
+ if (nv30->edgeflags) {
+ /*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/
+ return FALSE;
+ }
+
+ vtxbuf = so_new(20, 18);
+ so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr);
+ vtxfmt = so_new(17, 0);
+ so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr);
+
+ for (hw = 0; hw < nv30->vtxelt_nr; hw++) {
+ struct pipe_vertex_element *ve;
+ struct pipe_vertex_buffer *vb;
+ unsigned type, ncomp;
+
+ ve = &nv30->vtxelt[hw];
+ vb = &nv30->vtxbuf[ve->vertex_buffer_index];
+
+ if (!vb->stride) {
+ if (!sattr)
+ sattr = so_new(16 * 5, 0);
+
+ if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) {
+ so_data(vtxbuf, 0);
+ so_data(vtxfmt, NV34TCL_VTXFMT_TYPE_FLOAT);
+ continue;
+ }
+ }
+
+ if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
+ /*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/
+ so_ref(NULL, &vtxbuf);
+ so_ref(NULL, &vtxfmt);
+ return FALSE;
+ }
+
+ so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset,
+ vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
+ so_data (vtxfmt, ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) |
+ (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type));
+ }
+
+ if (ib) {
+ so_method(vtxbuf, rankine, NV34TCL_IDXBUF_ADDRESS, 2);
+ so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR,
+ 0, NV34TCL_IDXBUF_FORMAT_DMA1);
+ }
+
+ so_method(vtxbuf, rankine, 0x1710, 1);
+ so_data (vtxbuf, 0);
+
+ so_ref(vtxbuf, &nv30->state.hw[NV30_STATE_VTXBUF]);
+ nv30->state.dirty |= (1ULL << NV30_STATE_VTXBUF);
+ so_ref(vtxfmt, &nv30->state.hw[NV30_STATE_VTXFMT]);
+ nv30->state.dirty |= (1ULL << NV30_STATE_VTXFMT);
+ so_ref(sattr, &nv30->state.hw[NV30_STATE_VTXATTR]);
+ nv30->state.dirty |= (1ULL << NV30_STATE_VTXATTR);
+ return FALSE;
+}
+
+struct nv30_state_entry nv30_state_vbo = {
+ .validate = nv30_vbo_validate,
+ .dirty = {
+ .pipe = NV30_NEW_ARRAYS,
+ .hw = 0,
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
new file mode 100644
index 0000000000..d262725057
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -0,0 +1,838 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "nv30_context.h"
+#include "nv30_state.h"
+
+/* TODO (at least...):
+ * 1. Indexed consts + ARL
+ * 2. Arb. swz/negation
+ * 3. NV_vp11, NV_vp2, NV_vp3 features
+ * - extra arith opcodes
+ * - branching
+ * - texture sampling
+ * - indexed attribs
+ * - indexed results
+ * 4. bugs
+ */
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 8
+#define MASK_Y 4
+#define MASK_Z 2
+#define MASK_W 1
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE 0
+#define DEF_CTEST 0
+#include "nv30_shader.h"
+
+#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv30_sr_neg((s))
+#define abs(s) nv30_sr_abs((s))
+
+struct nv30_vpc {
+ struct nv30_vertex_program *vp;
+
+ struct nv30_vertex_program_exec *vpi;
+
+ unsigned output_map[PIPE_MAX_SHADER_OUTPUTS];
+
+ int high_temp;
+ int temp_temp_count;
+
+ struct nv30_sreg *imm;
+ unsigned nr_imm;
+};
+
+static struct nv30_sreg
+temp(struct nv30_vpc *vpc)
+{
+ int idx;
+
+ idx = vpc->temp_temp_count++;
+ idx += vpc->high_temp + 1;
+ return nv30_sr(NV30SR_TEMP, idx);
+}
+
+static struct nv30_sreg
+constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w)
+{
+ struct nv30_vertex_program *vp = vpc->vp;
+ struct nv30_vertex_program_data *vpd;
+ int idx;
+
+ if (pipe >= 0) {
+ for (idx = 0; idx < vp->nr_consts; idx++) {
+ if (vp->consts[idx].index == pipe)
+ return nv30_sr(NV30SR_CONST, idx);
+ }
+ }
+
+ idx = vp->nr_consts++;
+ vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
+ vpd = &vp->consts[idx];
+
+ vpd->index = pipe;
+ vpd->value[0] = x;
+ vpd->value[1] = y;
+ vpd->value[2] = z;
+ vpd->value[3] = w;
+ return nv30_sr(NV30SR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nv30_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2))
+
+static void
+emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src)
+{
+ struct nv30_vertex_program *vp = vpc->vp;
+ uint32_t sr = 0;
+
+ switch (src.type) {
+ case NV30SR_TEMP:
+ sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT);
+ sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT);
+ break;
+ case NV30SR_INPUT:
+ sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ vp->ir |= (1 << src.index);
+ hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT);
+ break;
+ case NV30SR_CONST:
+ sr |= (NV30_VP_SRC_REG_TYPE_CONST <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ assert(vpc->vpi->const_index == -1 ||
+ vpc->vpi->const_index == src.index);
+ vpc->vpi->const_index = src.index;
+ break;
+ case NV30SR_NONE:
+ sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (src.negate)
+ sr |= NV30_VP_SRC_NEGATE;
+
+ if (src.abs)
+ hw[0] |= (1 << (21 + pos));
+
+ sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) |
+ (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) |
+ (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) |
+ (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT));
+
+/*
+ * |VVV|
+ * d�.�b
+ * \u/
+ *
+ */
+
+ switch (pos) {
+ case 0:
+ hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >>
+ NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT;
+ hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) <<
+ NV30_VP_INST_SRC0L_SHIFT;
+ break;
+ case 1:
+ hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT;
+ break;
+ case 2:
+ hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >>
+ NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT;
+ hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) <<
+ NV30_VP_INST_SRC2L_SHIFT;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst)
+{
+ struct nv30_vertex_program *vp = vpc->vp;
+
+ switch (dst.type) {
+ case NV30SR_TEMP:
+ hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);
+ break;
+ case NV30SR_OUTPUT:
+ switch (dst.index) {
+ case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
+ case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
+ case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
+ case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
+ case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break;
+ case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
+ case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
+ case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
+ case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
+ case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
+ case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
+ case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
+ case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
+ case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
+ default:
+ break;
+ }
+
+ hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT);
+ hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);
+
+ /*XXX: no way this is entirely correct, someone needs to
+ * figure out what exactly it is.
+ */
+ hw[3] |= 0x800;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op,
+ struct nv30_sreg dst, int mask,
+ struct nv30_sreg s0, struct nv30_sreg s1,
+ struct nv30_sreg s2)
+{
+ struct nv30_vertex_program *vp = vpc->vp;
+ uint32_t *hw;
+
+ vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
+ vpc->vpi = &vp->insns[vp->nr_insns - 1];
+ memset(vpc->vpi, 0, sizeof(*vpc->vpi));
+ vpc->vpi->const_index = -1;
+
+ hw = vpc->vpi->data;
+
+ hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT);
+ hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) |
+ (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) |
+ (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) |
+ (3 << NV30_VP_INST_COND_SWZ_W_SHIFT));
+
+ hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
+// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK;
+// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT);
+
+ if (dst.type == NV30SR_OUTPUT) {
+ if (slot)
+ hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);
+ else
+ hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT);
+ } else {
+ if (slot)
+ hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT);
+ else
+ hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT);
+ }
+
+ emit_dst(vpc, hw, slot, dst);
+ emit_src(vpc, hw, 0, s0);
+ emit_src(vpc, hw, 1, s1);
+ emit_src(vpc, hw, 2, s2);
+}
+
+static INLINE struct nv30_sreg
+tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
+ struct nv30_sreg src;
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ src = nv30_sr(NV30SR_INPUT, fsrc->SrcRegister.Index);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ src = vpc->imm[fsrc->SrcRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ if (vpc->high_temp < fsrc->SrcRegister.Index)
+ vpc->high_temp = fsrc->SrcRegister.Index;
+ src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index);
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->SrcRegisterExtMod.Absolute;
+ src.negate = fsrc->SrcRegister.Negate;
+ src.swz[0] = fsrc->SrcRegister.SwizzleX;
+ src.swz[1] = fsrc->SrcRegister.SwizzleY;
+ src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+ src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ return src;
+}
+
+static INLINE struct nv30_sreg
+tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
+ struct nv30_sreg dst;
+
+ switch (fdst->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ dst = nv30_sr(NV30SR_OUTPUT,
+ vpc->output_map[fdst->DstRegister.Index]);
+
+ break;
+ case TGSI_FILE_TEMPORARY:
+ dst = nv30_sr(NV30SR_TEMP, fdst->DstRegister.Index);
+ if (vpc->high_temp < dst.index)
+ vpc->high_temp = dst.index;
+ break;
+ default:
+ NOUVEAU_ERR("bad dst file\n");
+ break;
+ }
+
+ return dst;
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+ return mask;
+}
+
+static boolean
+nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
+ const struct tgsi_full_instruction *finst)
+{
+ struct nv30_sreg src[3], dst, tmp;
+ struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
+ int mask;
+ int ai = -1, ci = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ vpc->temp_temp_count = 0;
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(vpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+ ai = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ /*XXX: index comparison is broken now that consts come from
+ * two different register files.
+ */
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_IMMEDIATE:
+ if (ci == -1 || ci == fsrc->SrcRegister.Index) {
+ ci = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
+ mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]);
+ break;
+ case TGSI_OPCODE_ARL:
+ arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_EXP:
+ arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LIT:
+ arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_POW:
+ tmp = temp(vpc);
+ arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none,
+ swz(src[0], X, X, X, X));
+ arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+ swz(src[1], X, X, X, X), none);
+ arith(vpc, 1, OP_EX2, dst, mask, none, none,
+ swz(tmp, X, X, X, X));
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_RET:
+ break;
+ case TGSI_OPCODE_RSQ:
+ arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGT:
+ arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1]));
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(vpc);
+ arith(vpc, 0, OP_MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ int hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NV30_VP_INST_DEST_POS;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV30_VP_INST_DEST_COL0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV30_VP_INST_DEST_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV30_VP_INST_DEST_BFC0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV30_VP_INST_DEST_BFC1;
+ } else {
+ NOUVEAU_ERR("bad bcolour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NV30_VP_INST_DEST_FOGC;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ hw = NV30_VP_INST_DEST_PSZ;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.SemanticIndex <= 7) {
+ hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ vpc->output_map[fdec->DeclarationRange.First] = hw;
+ return TRUE;
+}
+
+static boolean
+nv30_vertprog_prepare(struct nv30_vpc *vpc)
+{
+ struct tgsi_parse_context p;
+ int nr_imm = 0;
+
+ tgsi_parse_init(&p, vpc->vp->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch(tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ nr_imm++;
+ break;
+ default:
+ break;
+ }
+ }
+ tgsi_parse_free(&p);
+
+ if (nr_imm) {
+ vpc->imm = CALLOC(nr_imm, sizeof(struct nv30_sreg));
+ assert(vpc->imm);
+ }
+
+ return TRUE;
+}
+
+static void
+nv30_vertprog_translate(struct nv30_context *nv30,
+ struct nv30_vertex_program *vp)
+{
+ struct tgsi_parse_context parse;
+ struct nv30_vpc *vpc = NULL;
+
+ tgsi_dump(vp->pipe.tokens,0);
+
+ vpc = CALLOC(1, sizeof(struct nv30_vpc));
+ if (!vpc)
+ return;
+ vpc->vp = vp;
+ vpc->high_temp = -1;
+
+ if (!nv30_vertprog_prepare(vpc)) {
+ FREE(vpc);
+ return;
+ }
+
+ tgsi_parse_init(&parse, vp->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+ fdec = &parse.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_OUTPUT:
+ if (!nv30_vertprog_parse_decl_output(vpc, fdec))
+ goto out_err;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const struct tgsi_full_immediate *imm;
+
+ imm = &parse.FullToken.FullImmediate;
+ assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+ assert(imm->Immediate.NrTokens == 4 + 1);
+ vpc->imm[vpc->nr_imm++] =
+ constant(vpc, -1,
+ imm->u.ImmediateFloat32[0].Float,
+ imm->u.ImmediateFloat32[1].Float,
+ imm->u.ImmediateFloat32[2].Float,
+ imm->u.ImmediateFloat32[3].Float);
+ }
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+ finst = &parse.FullToken.FullInstruction;
+ if (!nv30_vertprog_parse_instruction(vpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST;
+ vp->translated = TRUE;
+out_err:
+ tgsi_parse_free(&parse);
+ FREE(vpc);
+}
+
+static boolean
+nv30_vertprog_validate(struct nv30_context *nv30)
+{
+ struct nouveau_winsys *nvws = nv30->nvws;
+ struct pipe_winsys *ws = nv30->pipe.winsys;
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+ struct nv30_vertex_program *vp;
+ struct pipe_buffer *constbuf;
+ boolean upload_code = FALSE, upload_data = FALSE;
+ int i;
+
+ vp = nv30->vertprog;
+ constbuf = nv30->constbuf[PIPE_SHADER_VERTEX];
+
+ /* Translate TGSI shader into hw bytecode */
+ if (!vp->translated) {
+ nv30_vertprog_translate(nv30, vp);
+ if (!vp->translated)
+ return FALSE;
+ }
+
+ /* Allocate hw vtxprog exec slots */
+ if (!vp->exec) {
+ struct nouveau_resource *heap = nv30->screen->vp_exec_heap;
+ struct nouveau_stateobj *so;
+ uint vplen = vp->nr_insns;
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) {
+ while (heap->next && heap->size < vplen) {
+ struct nv30_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->exec);
+ }
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec))
+ assert(0);
+ }
+
+ so = so_new(2, 0);
+ so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1);
+ so_data (so, vp->exec->start);
+ so_ref(so, &vp->so);
+
+ upload_code = TRUE;
+ }
+
+ /* Allocate hw vtxprog const slots */
+ if (vp->nr_consts && !vp->data) {
+ struct nouveau_resource *heap = nv30->screen->vp_data_heap;
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) {
+ while (heap->next && heap->size < vp->nr_consts) {
+ struct nv30_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->data);
+ }
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data))
+ assert(0);
+ }
+
+ /*XXX: handle this some day */
+ assert(vp->data->start >= vp->data_start_min);
+
+ upload_data = TRUE;
+ if (vp->data_start != vp->data->start)
+ upload_code = TRUE;
+ }
+
+ /* If exec or data segments moved we need to patch the program to
+ * fixup offsets and register IDs.
+ */
+ if (vp->exec_start != vp->exec->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv30_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->has_branch_offset) {
+ assert(0);
+ }
+ }
+
+ vp->exec_start = vp->exec->start;
+ }
+
+ if (vp->nr_consts && vp->data_start != vp->data->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv30_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->const_index >= 0) {
+ vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK;
+ vpi->data[1] |=
+ (vpi->const_index + vp->data->start) <<
+ NV30_VP_INST_CONST_SRC_SHIFT;
+
+ }
+ }
+
+ vp->data_start = vp->data->start;
+ }
+
+ /* Update + Upload constant values */
+ if (vp->nr_consts) {
+ float *map = NULL;
+
+ if (constbuf) {
+ map = ws->buffer_map(ws, constbuf,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ }
+
+ for (i = 0; i < vp->nr_consts; i++) {
+ struct nv30_vertex_program_data *vpd = &vp->consts[i];
+
+ if (vpd->index >= 0) {
+ if (!upload_data &&
+ !memcmp(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float)))
+ continue;
+ memcpy(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float));
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5);
+ OUT_RING (i + vp->data->start);
+ OUT_RINGp ((uint32_t *)vpd->value, 4);
+ }
+
+ if (constbuf) {
+ ws->buffer_unmap(ws, constbuf);
+ }
+ }
+
+ /* Upload vtxprog */
+ if (upload_code) {
+#if 0
+ for (i = 0; i < vp->nr_insns; i++) {
+ NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+ i, vp->insns[i].data[0], vp->insns[i].data[1],
+ vp->insns[i].data[2], vp->insns[i].data[3]);
+ }
+#endif
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1);
+ OUT_RING (vp->exec->start);
+ for (i = 0; i < vp->nr_insns; i++) {
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4);
+ OUT_RINGp (vp->insns[i].data, 4);
+ }
+ }
+
+ if (vp->so != nv30->state.hw[NV30_STATE_VERTPROG]) {
+ so_ref(vp->so, &nv30->state.hw[NV30_STATE_VERTPROG]);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void
+nv30_vertprog_destroy(struct nv30_context *nv30, struct nv30_vertex_program *vp)
+{
+ struct nouveau_winsys *nvws = nv30->screen->nvws;
+
+ vp->translated = FALSE;
+
+ if (vp->nr_insns) {
+ FREE(vp->insns);
+ vp->insns = NULL;
+ vp->nr_insns = 0;
+ }
+
+ if (vp->nr_consts) {
+ FREE(vp->consts);
+ vp->consts = NULL;
+ vp->nr_consts = 0;
+ }
+
+ nvws->res_free(&vp->exec);
+ vp->exec_start = 0;
+ nvws->res_free(&vp->data);
+ vp->data_start = 0;
+ vp->data_start_min = 0;
+
+ vp->ir = vp->or = 0;
+ so_ref(NULL, &vp->so);
+}
+
+struct nv30_state_entry nv30_state_vertprog = {
+ .validate = nv30_vertprog_validate,
+ .dirty = {
+ .pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/,
+ .hw = NV30_STATE_VERTPROG,
+ }
+};
diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile
new file mode 100644
index 0000000000..8c738aefa6
--- /dev/null
+++ b/src/gallium/drivers/nv40/Makefile
@@ -0,0 +1,28 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv40
+
+C_SOURCES = \
+ nv40_clear.c \
+ nv40_context.c \
+ nv40_draw.c \
+ nv40_fragprog.c \
+ nv40_fragtex.c \
+ nv40_miptree.c \
+ nv40_query.c \
+ nv40_screen.c \
+ nv40_state.c \
+ nv40_state_blend.c \
+ nv40_state_emit.c \
+ nv40_state_fb.c \
+ nv40_state_rasterizer.c \
+ nv40_state_scissor.c \
+ nv40_state_stipple.c \
+ nv40_state_viewport.c \
+ nv40_state_zsa.c \
+ nv40_surface.c \
+ nv40_vbo.c \
+ nv40_vertprog.c
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/nv40/nv40_clear.c b/src/gallium/drivers/nv40/nv40_clear.c
new file mode 100644
index 0000000000..59efd620e3
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_clear.c
@@ -0,0 +1,13 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv40_context.h"
+
+void
+nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+ ps->status = PIPE_SURFACE_STATUS_CLEAR;
+}
diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c
new file mode 100644
index 0000000000..5d325f5067
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_context.c
@@ -0,0 +1,72 @@
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/internal/p_winsys_screen.h"
+
+#include "nv40_context.h"
+#include "nv40_screen.h"
+
+static void
+nv40_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
+ BEGIN_RING(curie, 0x1fd8, 1);
+ OUT_RING (2);
+ BEGIN_RING(curie, 0x1fd8, 1);
+ OUT_RING (1);
+ }
+
+ FIRE_RING(fence);
+}
+
+static void
+nv40_destroy(struct pipe_context *pipe)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ if (nv40->draw)
+ draw_destroy(nv40->draw);
+ FREE(nv40);
+}
+
+struct pipe_context *
+nv40_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct nv40_screen *screen = nv40_screen(pscreen);
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv40_context *nv40;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nv40 = CALLOC(1, sizeof(struct nv40_context));
+ if (!nv40)
+ return NULL;
+ nv40->screen = screen;
+ nv40->pctx_id = pctx_id;
+
+ nv40->nvws = nvws;
+
+ nv40->pipe.winsys = ws;
+ nv40->pipe.screen = pscreen;
+ nv40->pipe.destroy = nv40_destroy;
+ nv40->pipe.draw_arrays = nv40_draw_arrays;
+ nv40->pipe.draw_elements = nv40_draw_elements;
+ nv40->pipe.clear = nv40_clear;
+ nv40->pipe.flush = nv40_flush;
+
+ nv40_init_query_functions(nv40);
+ nv40_init_surface_functions(nv40);
+ nv40_init_state_functions(nv40);
+
+ /* Create, configure, and install fallback swtnl path */
+ nv40->draw = draw_create();
+ draw_wide_point_threshold(nv40->draw, 9999999.0);
+ draw_wide_line_threshold(nv40->draw, 9999999.0);
+ draw_enable_line_stipple(nv40->draw, FALSE);
+ draw_enable_point_sprites(nv40->draw, FALSE);
+ draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40));
+
+ return &nv40->pipe;
+}
+
diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
new file mode 100644
index 0000000000..adcfbdd85a
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_context.h
@@ -0,0 +1,233 @@
+#ifndef __NV40_CONTEXT_H__
+#define __NV40_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx) \
+ struct nv40_screen *ctx = nv40->screen
+#include "nouveau/nouveau_push.h"
+#include "nouveau/nouveau_stateobj.h"
+
+#include "nv40_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+enum nv40_state_index {
+ NV40_STATE_FB = 0,
+ NV40_STATE_VIEWPORT = 1,
+ NV40_STATE_BLEND = 2,
+ NV40_STATE_RAST = 3,
+ NV40_STATE_ZSA = 4,
+ NV40_STATE_BCOL = 5,
+ NV40_STATE_CLIP = 6,
+ NV40_STATE_SCISSOR = 7,
+ NV40_STATE_STIPPLE = 8,
+ NV40_STATE_FRAGPROG = 9,
+ NV40_STATE_VERTPROG = 10,
+ NV40_STATE_FRAGTEX0 = 11,
+ NV40_STATE_FRAGTEX1 = 12,
+ NV40_STATE_FRAGTEX2 = 13,
+ NV40_STATE_FRAGTEX3 = 14,
+ NV40_STATE_FRAGTEX4 = 15,
+ NV40_STATE_FRAGTEX5 = 16,
+ NV40_STATE_FRAGTEX6 = 17,
+ NV40_STATE_FRAGTEX7 = 18,
+ NV40_STATE_FRAGTEX8 = 19,
+ NV40_STATE_FRAGTEX9 = 20,
+ NV40_STATE_FRAGTEX10 = 21,
+ NV40_STATE_FRAGTEX11 = 22,
+ NV40_STATE_FRAGTEX12 = 23,
+ NV40_STATE_FRAGTEX13 = 24,
+ NV40_STATE_FRAGTEX14 = 25,
+ NV40_STATE_FRAGTEX15 = 26,
+ NV40_STATE_VERTTEX0 = 27,
+ NV40_STATE_VERTTEX1 = 28,
+ NV40_STATE_VERTTEX2 = 29,
+ NV40_STATE_VERTTEX3 = 30,
+ NV40_STATE_VTXBUF = 31,
+ NV40_STATE_VTXFMT = 32,
+ NV40_STATE_VTXATTR = 33,
+ NV40_STATE_MAX = 34
+};
+
+#include "nv40_screen.h"
+
+#define NV40_NEW_BLEND (1 << 0)
+#define NV40_NEW_RAST (1 << 1)
+#define NV40_NEW_ZSA (1 << 2)
+#define NV40_NEW_SAMPLER (1 << 3)
+#define NV40_NEW_FB (1 << 4)
+#define NV40_NEW_STIPPLE (1 << 5)
+#define NV40_NEW_SCISSOR (1 << 6)
+#define NV40_NEW_VIEWPORT (1 << 7)
+#define NV40_NEW_BCOL (1 << 8)
+#define NV40_NEW_VERTPROG (1 << 9)
+#define NV40_NEW_FRAGPROG (1 << 10)
+#define NV40_NEW_ARRAYS (1 << 11)
+#define NV40_NEW_UCP (1 << 12)
+
+struct nv40_rasterizer_state {
+ struct pipe_rasterizer_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv40_zsa_state {
+ struct pipe_depth_stencil_alpha_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv40_blend_state {
+ struct pipe_blend_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+
+struct nv40_state {
+ unsigned scissor_enabled;
+ unsigned stipple_enabled;
+ unsigned viewport_bypass;
+ unsigned fp_samplers;
+
+ uint64_t dirty;
+ struct nouveau_stateobj *hw[NV40_STATE_MAX];
+};
+
+struct nv40_context {
+ struct pipe_context pipe;
+
+ struct nouveau_winsys *nvws;
+ struct nv40_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ /* HW state derived from pipe states */
+ struct nv40_state state;
+ struct {
+ struct nv40_vertex_program *vertprog;
+
+ unsigned nr_attribs;
+ unsigned hw[PIPE_MAX_SHADER_INPUTS];
+ unsigned draw[PIPE_MAX_SHADER_INPUTS];
+ unsigned emit[PIPE_MAX_SHADER_INPUTS];
+ } swtnl;
+
+ enum {
+ HW, SWTNL, SWRAST
+ } render_mode;
+ unsigned fallback_swtnl;
+ unsigned fallback_swrast;
+
+ /* Context state */
+ unsigned dirty, draw_dirty;
+ struct pipe_scissor_state scissor;
+ unsigned stipple[32];
+ struct pipe_clip_state clip;
+ struct nv40_vertex_program *vertprog;
+ struct nv40_fragment_program *fragprog;
+ struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
+ unsigned constbuf_nr[PIPE_SHADER_TYPES];
+ struct nv40_rasterizer_state *rasterizer;
+ struct nv40_zsa_state *zsa;
+ struct nv40_blend_state *blend;
+ struct pipe_blend_color blend_colour;
+ struct pipe_viewport_state viewport;
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_buffer *idxbuf;
+ unsigned idxbuf_format;
+ struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
+ struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
+ unsigned nr_samplers;
+ unsigned nr_textures;
+ unsigned dirty_samplers;
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ unsigned vtxbuf_nr;
+ struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
+ unsigned vtxelt_nr;
+ const unsigned *edgeflags;
+};
+
+static INLINE struct nv40_context *
+nv40_context(struct pipe_context *pipe)
+{
+ return (struct nv40_context *)pipe;
+}
+
+struct nv40_state_entry {
+ boolean (*validate)(struct nv40_context *nv40);
+ struct {
+ unsigned pipe;
+ unsigned hw;
+ } dirty;
+};
+
+extern void nv40_init_state_functions(struct nv40_context *nv40);
+extern void nv40_init_surface_functions(struct nv40_context *nv40);
+extern void nv40_init_query_functions(struct nv40_context *nv40);
+
+extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen);
+
+/* nv40_draw.c */
+extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40);
+extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe,
+ struct pipe_buffer *idxbuf,
+ unsigned ib_size, unsigned mode,
+ unsigned start, unsigned count);
+
+/* nv40_vertprog.c */
+extern void nv40_vertprog_destroy(struct nv40_context *,
+ struct nv40_vertex_program *);
+
+/* nv40_fragprog.c */
+extern void nv40_fragprog_destroy(struct nv40_context *,
+ struct nv40_fragment_program *);
+
+/* nv40_fragtex.c */
+extern void nv40_fragtex_bind(struct nv40_context *);
+
+/* nv40_state.c and friends */
+extern boolean nv40_state_validate(struct nv40_context *nv40);
+extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40);
+extern void nv40_state_emit(struct nv40_context *nv40);
+extern struct nv40_state_entry nv40_state_rasterizer;
+extern struct nv40_state_entry nv40_state_scissor;
+extern struct nv40_state_entry nv40_state_stipple;
+extern struct nv40_state_entry nv40_state_fragprog;
+extern struct nv40_state_entry nv40_state_vertprog;
+extern struct nv40_state_entry nv40_state_blend;
+extern struct nv40_state_entry nv40_state_blend_colour;
+extern struct nv40_state_entry nv40_state_zsa;
+extern struct nv40_state_entry nv40_state_viewport;
+extern struct nv40_state_entry nv40_state_framebuffer;
+extern struct nv40_state_entry nv40_state_fragtex;
+extern struct nv40_state_entry nv40_state_vbo;
+extern struct nv40_state_entry nv40_state_vtxfmt;
+
+/* nv40_vbo.c */
+extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv40_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start,
+ unsigned count);
+
+/* nv40_clear.c */
+extern void nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+#endif
diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c
new file mode 100644
index 0000000000..c83ff91d7e
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_draw.c
@@ -0,0 +1,349 @@
+#include "pipe/p_shader_tokens.h"
+
+#include "util/u_pack_color.h"
+
+#include "draw/draw_context.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pipe.h"
+
+#include "nv40_context.h"
+#define NV40_SHADER_NO_FUCKEDNESS
+#include "nv40_shader.h"
+
+/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
+ * often at all. Uses "quadro style" vertex submission + a fixed vertex
+ * layout to avoid the need to generate a vertex program or vtxfmt.
+ */
+
+struct nv40_render_stage {
+ struct draw_stage stage;
+ struct nv40_context *nv40;
+ unsigned prim;
+};
+
+static INLINE struct nv40_render_stage *
+nv40_render_stage(struct draw_stage *stage)
+{
+ return (struct nv40_render_stage *)stage;
+}
+
+static INLINE void
+nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v)
+{
+ unsigned i;
+
+ for (i = 0; i < nv40->swtnl.nr_attribs; i++) {
+ unsigned idx = nv40->swtnl.draw[i];
+ unsigned hw = nv40->swtnl.hw[i];
+
+ switch (nv40->swtnl.emit[i]) {
+ case EMIT_OMIT:
+ break;
+ case EMIT_1F:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1);
+ OUT_RING (fui(v->data[idx][0]));
+ break;
+ case EMIT_2F:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2);
+ OUT_RING (fui(v->data[idx][0]));
+ OUT_RING (fui(v->data[idx][1]));
+ break;
+ case EMIT_3F:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3);
+ OUT_RING (fui(v->data[idx][0]));
+ OUT_RING (fui(v->data[idx][1]));
+ OUT_RING (fui(v->data[idx][2]));
+ break;
+ case EMIT_4F:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4);
+ OUT_RING (fui(v->data[idx][0]));
+ OUT_RING (fui(v->data[idx][1]));
+ OUT_RING (fui(v->data[idx][2]));
+ OUT_RING (fui(v->data[idx][3]));
+ break;
+ case EMIT_4UB:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1);
+ OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]),
+ float_to_ubyte(v->data[idx][1]),
+ float_to_ubyte(v->data[idx][2]),
+ float_to_ubyte(v->data[idx][3])));
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+}
+
+static INLINE void
+nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
+ unsigned mode, unsigned count)
+{
+ struct nv40_render_stage *rs = nv40_render_stage(stage);
+ struct nv40_context *nv40 = rs->nv40;
+ struct nouveau_pushbuf *pb = nv40->nvws->channel->pushbuf;
+ unsigned i;
+
+ /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */
+ if (pb->remaining < ((count * 20) + 6)) {
+ if (rs->prim != NV40TCL_BEGIN_END_STOP) {
+ NOUVEAU_ERR("AIII, missed flush\n");
+ assert(0);
+ }
+ FIRE_RING(NULL);
+ nv40_state_emit(nv40);
+ }
+
+ /* Switch primitive modes if necessary */
+ if (rs->prim != mode) {
+ if (rs->prim != NV40TCL_BEGIN_END_STOP) {
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (NV40TCL_BEGIN_END_STOP);
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (mode);
+ rs->prim = mode;
+ }
+
+ /* Emit vertex data */
+ for (i = 0; i < count; i++)
+ nv40_render_vertex(nv40, prim->v[i]);
+
+ /* If it's likely we'll need to empty the push buffer soon, finish
+ * off the primitive now.
+ */
+ if (pb->remaining < ((count * 20) + 6)) {
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (NV40TCL_BEGIN_END_STOP);
+ rs->prim = NV40TCL_BEGIN_END_STOP;
+ }
+}
+
+static void
+nv40_render_point(struct draw_stage *draw, struct prim_header *prim)
+{
+ nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_POINTS, 1);
+}
+
+static void
+nv40_render_line(struct draw_stage *draw, struct prim_header *prim)
+{
+ nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_LINES, 2);
+}
+
+static void
+nv40_render_tri(struct draw_stage *draw, struct prim_header *prim)
+{
+ nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_TRIANGLES, 3);
+}
+
+static void
+nv40_render_flush(struct draw_stage *draw, unsigned flags)
+{
+ struct nv40_render_stage *rs = nv40_render_stage(draw);
+ struct nv40_context *nv40 = rs->nv40;
+
+ if (rs->prim != NV40TCL_BEGIN_END_STOP) {
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (NV40TCL_BEGIN_END_STOP);
+ rs->prim = NV40TCL_BEGIN_END_STOP;
+ }
+}
+
+static void
+nv40_render_reset_stipple_counter(struct draw_stage *draw)
+{
+}
+
+static void
+nv40_render_destroy(struct draw_stage *draw)
+{
+ FREE(draw);
+}
+
+static INLINE void
+emit_mov(struct nv40_vertex_program *vp,
+ unsigned dst, unsigned src, unsigned vor, unsigned mask)
+{
+ struct nv40_vertex_program_exec *inst;
+
+ vp->insns = realloc(vp->insns,
+ sizeof(struct nv40_vertex_program_exec) *
+ ++vp->nr_insns);
+ inst = &vp->insns[vp->nr_insns - 1];
+
+ inst->data[0] = 0x401f9c6c;
+ inst->data[1] = 0x0040000d | (src << 8);
+ inst->data[2] = 0x8106c083;
+ inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13);
+ inst->const_index = -1;
+ inst->has_branch_offset = FALSE;
+
+ vp->ir |= (1 << src);
+ if (vor != ~0)
+ vp->or |= (1 << vor);
+}
+
+static struct nv40_vertex_program *
+create_drawvp(struct nv40_context *nv40)
+{
+ struct nv40_vertex_program *vp = CALLOC_STRUCT(nv40_vertex_program);
+ unsigned i;
+
+ emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8);
+ for (i = 0; i < 8; i++)
+ emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf);
+
+ vp->insns[vp->nr_insns - 1].data[3] |= 1;
+ vp->translated = TRUE;
+ return vp;
+}
+
+struct draw_stage *
+nv40_draw_render_stage(struct nv40_context *nv40)
+{
+ struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage);
+
+ if (!nv40->swtnl.vertprog)
+ nv40->swtnl.vertprog = create_drawvp(nv40);
+
+ render->nv40 = nv40;
+ render->stage.draw = nv40->draw;
+ render->stage.point = nv40_render_point;
+ render->stage.line = nv40_render_line;
+ render->stage.tri = nv40_render_tri;
+ render->stage.flush = nv40_render_flush;
+ render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter;
+ render->stage.destroy = nv40_render_destroy;
+
+ return &render->stage;
+}
+
+boolean
+nv40_draw_elements_swtnl(struct pipe_context *pipe,
+ struct pipe_buffer *idxbuf, unsigned idxbuf_size,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+ unsigned i;
+ void *map;
+
+ if (!nv40_state_validate_swtnl(nv40))
+ return FALSE;
+ nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF);
+ nv40_state_emit(nv40);
+
+ for (i = 0; i < nv40->vtxbuf_nr; i++) {
+ map = ws->buffer_map(ws, nv40->vtxbuf[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(nv40->draw, i, map);
+ }
+
+ if (idxbuf) {
+ map = ws->buffer_map(ws, idxbuf, PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map);
+ } else {
+ draw_set_mapped_element_buffer(nv40->draw, 0, NULL);
+ }
+
+ if (nv40->constbuf[PIPE_SHADER_VERTEX]) {
+ const unsigned nr = nv40->constbuf_nr[PIPE_SHADER_VERTEX];
+
+ map = ws->buffer_map(ws, nv40->constbuf[PIPE_SHADER_VERTEX],
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_constant_buffer(nv40->draw, map, nr);
+ }
+
+ draw_arrays(nv40->draw, mode, start, count);
+
+ for (i = 0; i < nv40->vtxbuf_nr; i++)
+ ws->buffer_unmap(ws, nv40->vtxbuf[i].buffer);
+
+ if (idxbuf)
+ ws->buffer_unmap(ws, idxbuf);
+
+ if (nv40->constbuf[PIPE_SHADER_VERTEX])
+ ws->buffer_unmap(ws, nv40->constbuf[PIPE_SHADER_VERTEX]);
+
+ draw_flush(nv40->draw);
+ pipe->flush(pipe, 0, NULL);
+
+ return TRUE;
+}
+
+static INLINE void
+emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit,
+ unsigned semantic, unsigned index)
+{
+ unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index);
+ unsigned a = nv40->swtnl.nr_attribs++;
+
+ nv40->swtnl.hw[a] = hw;
+ nv40->swtnl.emit[a] = emit;
+ nv40->swtnl.draw[a] = draw_out;
+}
+
+static boolean
+nv40_state_vtxfmt_validate(struct nv40_context *nv40)
+{
+ struct nv40_fragment_program *fp = nv40->fragprog;
+ unsigned colour = 0, texcoords = 0, fog = 0, i;
+
+ /* Determine needed fragprog inputs */
+ for (i = 0; i < fp->info.num_inputs; i++) {
+ switch (fp->info.input_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ colour |= (1 << fp->info.input_semantic_index[i]);
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ texcoords |= (1 << fp->info.input_semantic_index[i]);
+ break;
+ case TGSI_SEMANTIC_FOG:
+ fog = 1;
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ nv40->swtnl.nr_attribs = 0;
+
+ /* Map draw vtxprog output to hw attribute IDs */
+ for (i = 0; i < 2; i++) {
+ if (!(colour & (1 << i)))
+ continue;
+ emit_attrib(nv40, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i);
+ }
+
+ for (i = 0; i < 8; i++) {
+ if (!(texcoords & (1 << i)))
+ continue;
+ emit_attrib(nv40, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i);
+ }
+
+ if (fog) {
+ emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0);
+ }
+
+ emit_attrib(nv40, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0);
+
+ return FALSE;
+}
+
+struct nv40_state_entry nv40_state_vtxfmt = {
+ .validate = nv40_state_vtxfmt_validate,
+ .dirty = {
+ .pipe = NV40_NEW_ARRAYS | NV40_NEW_FRAGPROG,
+ .hw = 0
+ }
+};
+
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
new file mode 100644
index 0000000000..91dcbebda0
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -0,0 +1,991 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv40_context.h"
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 1
+#define MASK_Y 2
+#define MASK_Z 4
+#define MASK_W 8
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X
+#define DEF_CTEST NV40_FP_OP_COND_TR
+#include "nv40_shader.h"
+
+#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv40_sr_neg((s))
+#define abs(s) nv40_sr_abs((s))
+#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v)
+
+#define MAX_CONSTS 128
+#define MAX_IMM 32
+struct nv40_fpc {
+ struct nv40_fragment_program *fp;
+
+ uint attrib_map[PIPE_MAX_SHADER_INPUTS];
+
+ unsigned r_temps;
+ unsigned r_temps_discard;
+ struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
+ struct nv40_sreg *r_temp;
+
+ int num_regs;
+
+ unsigned inst_offset;
+ unsigned have_const;
+
+ struct {
+ int pipe;
+ float vals[4];
+ } consts[MAX_CONSTS];
+ int nr_consts;
+
+ struct nv40_sreg imm[MAX_IMM];
+ unsigned nr_imm;
+};
+
+static INLINE struct nv40_sreg
+temp(struct nv40_fpc *fpc)
+{
+ int idx = ffs(~fpc->r_temps) - 1;
+
+ if (idx < 0) {
+ NOUVEAU_ERR("out of temps!!\n");
+ assert(0);
+ return nv40_sr(NV40SR_TEMP, 0);
+ }
+
+ fpc->r_temps |= (1 << idx);
+ fpc->r_temps_discard |= (1 << idx);
+ return nv40_sr(NV40SR_TEMP, idx);
+}
+
+static INLINE void
+release_temps(struct nv40_fpc *fpc)
+{
+ fpc->r_temps &= ~fpc->r_temps_discard;
+ fpc->r_temps_discard = 0;
+}
+
+static INLINE struct nv40_sreg
+constant(struct nv40_fpc *fpc, int pipe, float vals[4])
+{
+ int idx;
+
+ if (fpc->nr_consts == MAX_CONSTS)
+ assert(0);
+ idx = fpc->nr_consts++;
+
+ fpc->consts[idx].pipe = pipe;
+ if (pipe == -1)
+ memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
+ return nv40_sr(NV40SR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \
+ (d), (m), (s0), (s1), (s2))
+#define tex(cc,s,o,u,d,m,s0,s1,s2) \
+ nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \
+ (d), (m), (s0), none, none)
+
+static void
+grow_insns(struct nv40_fpc *fpc, int size)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+
+ fp->insn_len += size;
+ fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
+}
+
+static void
+emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+ uint32_t *hw = &fp->insn[fpc->inst_offset];
+ uint32_t sr = 0;
+
+ switch (src.type) {
+ case NV40SR_INPUT:
+ sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
+ hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT);
+ break;
+ case NV40SR_OUTPUT:
+ sr |= NV40_FP_REG_SRC_HALF;
+ /* fall-through */
+ case NV40SR_TEMP:
+ sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT);
+ sr |= (src.index << NV40_FP_REG_SRC_SHIFT);
+ break;
+ case NV40SR_CONST:
+ if (!fpc->have_const) {
+ grow_insns(fpc, 4);
+ fpc->have_const = 1;
+ }
+
+ hw = &fp->insn[fpc->inst_offset];
+ if (fpc->consts[src.index].pipe >= 0) {
+ struct nv40_fragment_program_data *fpd;
+
+ fp->consts = realloc(fp->consts, ++fp->nr_consts *
+ sizeof(*fpd));
+ fpd = &fp->consts[fp->nr_consts - 1];
+ fpd->offset = fpc->inst_offset + 4;
+ fpd->index = fpc->consts[src.index].pipe;
+ memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
+ } else {
+ memcpy(&fp->insn[fpc->inst_offset + 4],
+ fpc->consts[src.index].vals,
+ sizeof(uint32_t) * 4);
+ }
+
+ sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);
+ break;
+ case NV40SR_NONE:
+ sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (src.negate)
+ sr |= NV40_FP_REG_NEGATE;
+
+ if (src.abs)
+ hw[1] |= (1 << (29 + pos));
+
+ sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) |
+ (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) |
+ (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) |
+ (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT));
+
+ hw[pos + 1] |= sr;
+}
+
+static void
+emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+ uint32_t *hw = &fp->insn[fpc->inst_offset];
+
+ switch (dst.type) {
+ case NV40SR_TEMP:
+ if (fpc->num_regs < (dst.index + 1))
+ fpc->num_regs = dst.index + 1;
+ break;
+ case NV40SR_OUTPUT:
+ if (dst.index == 1) {
+ fp->fp_control |= 0xe;
+ } else {
+ hw[0] |= NV40_FP_OP_OUT_REG_HALF;
+ }
+ break;
+ case NV40SR_NONE:
+ hw[0] |= (1 << 30);
+ break;
+ default:
+ assert(0);
+ }
+
+ hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT);
+}
+
+static void
+nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op,
+ struct nv40_sreg dst, int mask,
+ struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+ uint32_t *hw;
+
+ fpc->inst_offset = fp->insn_len;
+ fpc->have_const = 0;
+ grow_insns(fpc, 4);
+ hw = &fp->insn[fpc->inst_offset];
+ memset(hw, 0, sizeof(uint32_t) * 4);
+
+ if (op == NV40_FP_OP_OPCODE_KIL)
+ fp->fp_control |= NV40TCL_FP_CONTROL_KIL;
+ hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT);
+ hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT);
+ hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT);
+
+ if (sat)
+ hw[0] |= NV40_FP_OP_OUT_SAT;
+
+ if (dst.cc_update)
+ hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE;
+ hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT);
+ hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) |
+ (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) |
+ (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) |
+ (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT));
+
+ emit_dst(fpc, dst);
+ emit_src(fpc, 0, s0);
+ emit_src(fpc, 1, s1);
+ emit_src(fpc, 2, s2);
+}
+
+static void
+nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit,
+ struct nv40_sreg dst, int mask,
+ struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+
+ nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
+
+ fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT);
+ fp->samplers |= (1 << unit);
+}
+
+static INLINE struct nv40_sreg
+tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)
+{
+ struct nv40_sreg src;
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ src = nv40_sr(NV40SR_INPUT,
+ fpc->attrib_map[fsrc->SrcRegister.Index]);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(fpc, fsrc->SrcRegister.Index, NULL);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ assert(fsrc->SrcRegister.Index < fpc->nr_imm);
+ src = fpc->imm[fsrc->SrcRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ src = fpc->r_temp[fsrc->SrcRegister.Index];
+ break;
+ /* NV40 fragprog result regs are just temps, so this is simple */
+ case TGSI_FILE_OUTPUT:
+ src = fpc->r_result[fsrc->SrcRegister.Index];
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->SrcRegisterExtMod.Absolute;
+ src.negate = fsrc->SrcRegister.Negate;
+ src.swz[0] = fsrc->SrcRegister.SwizzleX;
+ src.swz[1] = fsrc->SrcRegister.SwizzleY;
+ src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+ src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ return src;
+}
+
+static INLINE struct nv40_sreg
+tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
+ switch (fdst->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ return fpc->r_result[fdst->DstRegister.Index];
+ case TGSI_FILE_TEMPORARY:
+ return fpc->r_temp[fdst->DstRegister.Index];
+ case TGSI_FILE_NULL:
+ return nv40_sr(NV40SR_NONE, 0);
+ default:
+ NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File);
+ return nv40_sr(NV40SR_NONE, 0);
+ }
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+ return mask;
+}
+
+static boolean
+src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc,
+ struct nv40_sreg *src)
+{
+ const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+ struct nv40_sreg tgsi = tgsi_src(fpc, fsrc);
+ uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
+ uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
+ fsrc->SrcRegisterExtSwz.NegateY,
+ fsrc->SrcRegisterExtSwz.NegateZ,
+ fsrc->SrcRegisterExtSwz.NegateW };
+ uint c;
+
+ for (c = 0; c < 4; c++) {
+ switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ mask |= (1 << c);
+ break;
+ case TGSI_EXTSWIZZLE_ZERO:
+ zero_mask |= (1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ case TGSI_EXTSWIZZLE_ONE:
+ one_mask |= (1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (!tgsi.negate && neg[c])
+ neg_mask |= (1 << c);
+ }
+
+ if (mask == MASK_ALL && !neg_mask)
+ return TRUE;
+
+ *src = temp(fpc);
+
+ if (mask)
+ arith(fpc, 0, MOV, *src, mask, tgsi, none, none);
+
+ if (zero_mask)
+ arith(fpc, 0, SFL, *src, zero_mask, *src, none, none);
+
+ if (one_mask)
+ arith(fpc, 0, STR, *src, one_mask, *src, none, none);
+
+ if (neg_mask) {
+ struct nv40_sreg one = temp(fpc);
+ arith(fpc, 0, STR, one, neg_mask, one, none, none);
+ arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none);
+ }
+
+ return FALSE;
+}
+
+static boolean
+nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
+ const struct tgsi_full_instruction *finst)
+{
+ const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+ struct nv40_sreg src[3], dst, tmp;
+ int mask, sat, unit;
+ int ai = -1, ci = -1, ii = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(fpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_TEMPORARY:
+ if (!src_native_swz(fpc, fsrc, &src[i]))
+ continue;
+ break;
+ default:
+ break;
+ }
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+ ai = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_CONSTANT:
+ if ((ci == -1 && ii == -1) ||
+ ci == fsrc->SrcRegister.Index) {
+ ci = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ if ((ci == -1 && ii == -1) ||
+ ii == fsrc->SrcRegister.Index) {
+ ii = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ case TGSI_FILE_SAMPLER:
+ unit = fsrc->SrcRegister.Index;
+ break;
+ case TGSI_FILE_OUTPUT:
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]);
+ mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+ sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_CMP:
+ tmp = temp(fpc);
+ arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+ tmp.cc_update = 1;
+ arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+ dst.cc_test = NV40_VP_INST_COND_LT;
+ arith(fpc, sat, MOV, dst, mask, src[1], none, none);
+ break;
+ case TGSI_OPCODE_COS:
+ arith(fpc, sat, COS, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DDX:
+ if (mask & (MASK_Z | MASK_W)) {
+ tmp = temp(fpc);
+ arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y,
+ swz(src[0], Z, W, Z, W), none, none);
+ arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W,
+ swz(tmp, X, Y, X, Y), none, none);
+ arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, src[0],
+ none, none);
+ arith(fpc, 0, MOV, dst, mask, tmp, none, none);
+ } else {
+ arith(fpc, sat, DDX, dst, mask, src[0], none, none);
+ }
+ break;
+ case TGSI_OPCODE_DDY:
+ if (mask & (MASK_Z | MASK_W)) {
+ tmp = temp(fpc);
+ arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y,
+ swz(src[0], Z, W, Z, W), none, none);
+ arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W,
+ swz(tmp, X, Y, X, Y), none, none);
+ arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, src[0],
+ none, none);
+ arith(fpc, 0, MOV, dst, mask, tmp, none, none);
+ } else {
+ arith(fpc, sat, DDY, dst, mask, src[0], none, none);
+ }
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ tmp = temp(fpc);
+ arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none);
+ arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
+ swz(src[1], W, W, W, W), none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(fpc, sat, EX2, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(fpc, sat, FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(fpc, sat, FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_KILP:
+ arith(fpc, 0, KIL, none, 0, none, none, none);
+ break;
+ case TGSI_OPCODE_KIL:
+ dst = nv40_sr(NV40SR_NONE, 0);
+ dst.cc_update = 1;
+ arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
+ dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT;
+ arith(fpc, 0, KIL, dst, 0, none, none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(fpc, sat, LG2, dst, mask, src[0], none, none);
+ break;
+// case TGSI_OPCODE_LIT:
+ case TGSI_OPCODE_LRP:
+ tmp = temp(fpc);
+ arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
+ arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(fpc, sat, MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_NOISE1:
+ case TGSI_OPCODE_NOISE2:
+ case TGSI_OPCODE_NOISE3:
+ case TGSI_OPCODE_NOISE4:
+ arith(fpc, sat, SFL, dst, mask, none, none, none);
+ break;
+ case TGSI_OPCODE_POW:
+ tmp = temp(fpc);
+ arith(fpc, 0, LG2, tmp, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+ swz(src[1], X, X, X, X), none);
+ arith(fpc, sat, EX2, dst, mask,
+ swz(tmp, X, X, X, X), none, none);
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(fpc, sat, RCP, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_RET:
+ assert(0);
+ break;
+ case TGSI_OPCODE_RFL:
+ tmp = temp(fpc);
+ arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none);
+ arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none);
+ arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z,
+ swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
+ arith(fpc, sat, MAD, dst, mask,
+ swz(tmp, Z, Z, Z, Z), src[0], neg(src[1]));
+ break;
+ case TGSI_OPCODE_RSQ:
+ tmp = temp(fpc);
+ arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X,
+ abs(swz(src[0], X, X, X, X)), none, none);
+ arith(fpc, sat, EX2, dst, mask,
+ neg(swz(tmp, X, X, X, X)), none, none);
+ break;
+ case TGSI_OPCODE_SCS:
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ break;
+ case TGSI_OPCODE_SEQ:
+ arith(fpc, sat, SEQ, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SFL:
+ arith(fpc, sat, SFL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGT:
+ arith(fpc, sat, SGT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SIN:
+ arith(fpc, sat, SIN, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_SLE:
+ arith(fpc, sat, SLE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SNE:
+ arith(fpc, sat, SNE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_STR:
+ arith(fpc, sat, STR, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
+ break;
+ case TGSI_OPCODE_TEX:
+ tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_TXB:
+ tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_TXP:
+ tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(fpc);
+ arith(fpc, 0, MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(fpc, sat, MAD, dst, (mask & ~MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ release_temps(fpc);
+ return TRUE;
+}
+
+static boolean
+nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ int hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NV40_FP_OP_INPUT_SRC_POSITION;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV40_FP_OP_INPUT_SRC_COL0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV40_FP_OP_INPUT_SRC_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NV40_FP_OP_INPUT_SRC_FOGC;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.SemanticIndex <= 7) {
+ hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic.
+ SemanticIndex);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad input semantic\n");
+ return FALSE;
+ }
+
+ fpc->attrib_map[fdec->DeclarationRange.First] = hw;
+ return TRUE;
+}
+
+static boolean
+nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ unsigned idx = fdec->DeclarationRange.First;
+ unsigned hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = 1;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ switch (fdec->Semantic.SemanticIndex) {
+ case 0: hw = 0; break;
+ case 1: hw = 2; break;
+ case 2: hw = 3; break;
+ case 3: hw = 4; break;
+ default:
+ NOUVEAU_ERR("bad rcol index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ fpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw);
+ fpc->r_temps |= (1 << hw);
+ return TRUE;
+}
+
+static boolean
+nv40_fragprog_prepare(struct nv40_fpc *fpc)
+{
+ struct tgsi_parse_context p;
+ int high_temp = -1, i;
+
+ tgsi_parse_init(&p, fpc->fp->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch(tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+ fdec = &p.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_INPUT:
+ if (!nv40_fragprog_parse_decl_attrib(fpc, fdec))
+ goto out_err;
+ break;
+ case TGSI_FILE_OUTPUT:
+ if (!nv40_fragprog_parse_decl_output(fpc, fdec))
+ goto out_err;
+ break;
+ case TGSI_FILE_TEMPORARY:
+ if (fdec->DeclarationRange.Last > high_temp) {
+ high_temp =
+ fdec->DeclarationRange.Last;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ struct tgsi_full_immediate *imm;
+ float vals[4];
+
+ imm = &p.FullToken.FullImmediate;
+ assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+ assert(fpc->nr_imm < MAX_IMM);
+
+ vals[0] = imm->u.ImmediateFloat32[0].Float;
+ vals[1] = imm->u.ImmediateFloat32[1].Float;
+ vals[2] = imm->u.ImmediateFloat32[2].Float;
+ vals[3] = imm->u.ImmediateFloat32[3].Float;
+ fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ tgsi_parse_free(&p);
+
+ if (++high_temp) {
+ fpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg));
+ for (i = 0; i < high_temp; i++)
+ fpc->r_temp[i] = temp(fpc);
+ fpc->r_temps_discard = 0;
+ }
+
+ return TRUE;
+
+out_err:
+ if (fpc->r_temp)
+ FREE(fpc->r_temp);
+ tgsi_parse_free(&p);
+ return FALSE;
+}
+
+static void
+nv40_fragprog_translate(struct nv40_context *nv40,
+ struct nv40_fragment_program *fp)
+{
+ struct tgsi_parse_context parse;
+ struct nv40_fpc *fpc = NULL;
+
+ fpc = CALLOC(1, sizeof(struct nv40_fpc));
+ if (!fpc)
+ return;
+ fpc->fp = fp;
+ fpc->num_regs = 2;
+
+ if (!nv40_fragprog_prepare(fpc)) {
+ FREE(fpc);
+ return;
+ }
+
+ tgsi_parse_init(&parse, fp->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+
+ finst = &parse.FullToken.FullInstruction;
+ if (!nv40_fragprog_parse_instruction(fpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT;
+
+ /* Terminate final instruction */
+ fp->insn[fpc->inst_offset] |= 0x00000001;
+
+ /* Append NOP + END instruction, may or may not be necessary. */
+ fpc->inst_offset = fp->insn_len;
+ grow_insns(fpc, 4);
+ fp->insn[fpc->inst_offset + 0] = 0x00000001;
+ fp->insn[fpc->inst_offset + 1] = 0x00000000;
+ fp->insn[fpc->inst_offset + 2] = 0x00000000;
+ fp->insn[fpc->inst_offset + 3] = 0x00000000;
+
+ fp->translated = TRUE;
+out_err:
+ tgsi_parse_free(&parse);
+ if (fpc->r_temp)
+ FREE(fpc->r_temp);
+ FREE(fpc);
+}
+
+static void
+nv40_fragprog_upload(struct nv40_context *nv40,
+ struct nv40_fragment_program *fp)
+{
+ struct pipe_winsys *ws = nv40->pipe.winsys;
+ const uint32_t le = 1;
+ uint32_t *map;
+ int i;
+
+ map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
+
+#if 0
+ for (i = 0; i < fp->insn_len; i++) {
+ fflush(stdout); fflush(stderr);
+ NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]);
+ fflush(stdout); fflush(stderr);
+ }
+#endif
+
+ if ((*(const uint8_t *)&le)) {
+ for (i = 0; i < fp->insn_len; i++) {
+ map[i] = fp->insn[i];
+ }
+ } else {
+ /* Weird swapping for big-endian chips */
+ for (i = 0; i < fp->insn_len; i++) {
+ map[i] = ((fp->insn[i] & 0xffff) << 16) |
+ ((fp->insn[i] >> 16) & 0xffff);
+ }
+ }
+
+ ws->buffer_unmap(ws, fp->buffer);
+}
+
+static boolean
+nv40_fragprog_validate(struct nv40_context *nv40)
+{
+ struct nv40_fragment_program *fp = nv40->fragprog;
+ struct pipe_buffer *constbuf =
+ nv40->constbuf[PIPE_SHADER_FRAGMENT];
+ struct pipe_winsys *ws = nv40->pipe.winsys;
+ struct nouveau_stateobj *so;
+ boolean new_consts = FALSE;
+ int i;
+
+ if (fp->translated)
+ goto update_constants;
+
+ nv40->fallback_swrast &= ~NV40_NEW_FRAGPROG;
+ nv40_fragprog_translate(nv40, fp);
+ if (!fp->translated) {
+ nv40->fallback_swrast |= NV40_NEW_FRAGPROG;
+ return FALSE;
+ }
+
+ fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4);
+ nv40_fragprog_upload(nv40, fp);
+
+ so = so_new(4, 1);
+ so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1);
+ so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1);
+ so_method(so, nv40->screen->curie, NV40TCL_FP_CONTROL, 1);
+ so_data (so, fp->fp_control);
+ so_ref(so, &fp->so);
+
+update_constants:
+ if (fp->nr_consts) {
+ float *map;
+
+ map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ);
+ for (i = 0; i < fp->nr_consts; i++) {
+ struct nv40_fragment_program_data *fpd = &fp->consts[i];
+ uint32_t *p = &fp->insn[fpd->offset];
+ uint32_t *cb = (uint32_t *)&map[fpd->index * 4];
+
+ if (!memcmp(p, cb, 4 * sizeof(float)))
+ continue;
+ memcpy(p, cb, 4 * sizeof(float));
+ new_consts = TRUE;
+ }
+ ws->buffer_unmap(ws, constbuf);
+
+ if (new_consts)
+ nv40_fragprog_upload(nv40, fp);
+ }
+
+ if (new_consts || fp->so != nv40->state.hw[NV40_STATE_FRAGPROG]) {
+ so_ref(fp->so, &nv40->state.hw[NV40_STATE_FRAGPROG]);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void
+nv40_fragprog_destroy(struct nv40_context *nv40,
+ struct nv40_fragment_program *fp)
+{
+ if (fp->insn_len)
+ FREE(fp->insn);
+}
+
+struct nv40_state_entry nv40_state_fragprog = {
+ .validate = nv40_fragprog_validate,
+ .dirty = {
+ .pipe = NV40_NEW_FRAGPROG,
+ .hw = NV40_STATE_FRAGPROG
+ }
+};
+
diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c
new file mode 100644
index 0000000000..0227d22620
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_fragtex.c
@@ -0,0 +1,168 @@
+#include "nv40_context.h"
+
+#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \
+{ \
+ TRUE, \
+ PIPE_FORMAT_##m, \
+ NV40TCL_TEX_FORMAT_FORMAT_##tf, \
+ (NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y | \
+ NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w | \
+ NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y | \
+ NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w), \
+ ((NV40TCL_TEX_FILTER_SIGNED_RED*sx) | (NV40TCL_TEX_FILTER_SIGNED_GREEN*sy) | \
+ (NV40TCL_TEX_FILTER_SIGNED_BLUE*sz) | (NV40TCL_TEX_FILTER_SIGNED_ALPHA*sw)) \
+}
+
+struct nv40_texture_format {
+ boolean defined;
+ uint pipe;
+ int format;
+ int swizzle;
+ int sign;
+};
+
+static struct nv40_texture_format
+nv40_texture_formats[] = {
+ _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0),
+ _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
+ _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X, 0, 0, 0, 0),
+ _(R16_SNORM , A16 , ZERO, ZERO, S1, ONE, X, X, X, Y, 1, 1, 1, 1),
+ _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X, 0, 0, 0, 0),
+ _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y, 0, 0, 0, 0),
+ _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
+ _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
+ _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0),
+ _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ {},
+};
+
+static struct nv40_texture_format *
+nv40_fragtex_format(uint pipe_format)
+{
+ struct nv40_texture_format *tf = nv40_texture_formats;
+
+ while (tf->defined) {
+ if (tf->pipe == pipe_format)
+ return tf;
+ tf++;
+ }
+
+ NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format));
+ return NULL;
+}
+
+
+static struct nouveau_stateobj *
+nv40_fragtex_build(struct nv40_context *nv40, int unit)
+{
+ struct nv40_sampler_state *ps = nv40->tex_sampler[unit];
+ struct nv40_miptree *nv40mt = nv40->tex_miptree[unit];
+ struct pipe_texture *pt = &nv40mt->base;
+ struct nv40_texture_format *tf;
+ struct nouveau_stateobj *so;
+ uint32_t txf, txs, txp;
+ unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+ tf = nv40_fragtex_format(pt->format);
+ if (!tf)
+ assert(0);
+
+ txf = ps->fmt;
+ txf |= tf->format | 0x8000;
+ txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT);
+
+ if (1) /* XXX */
+ txf |= NV40TCL_TEX_FORMAT_NO_BORDER;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ txf |= NV40TCL_TEX_FORMAT_CUBIC;
+ /* fall-through */
+ case PIPE_TEXTURE_2D:
+ txf |= NV40TCL_TEX_FORMAT_DIMS_2D;
+ break;
+ case PIPE_TEXTURE_3D:
+ txf |= NV40TCL_TEX_FORMAT_DIMS_3D;
+ break;
+ case PIPE_TEXTURE_1D:
+ txf |= NV40TCL_TEX_FORMAT_DIMS_1D;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return NULL;
+ }
+
+ if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ txp = 0;
+ } else {
+ txp = nv40mt->level[0].pitch;
+ txf |= NV40TCL_TEX_FORMAT_LINEAR;
+ }
+
+ txs = tf->swizzle;
+
+ so = so_new(16, 2);
+ so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8);
+ so_reloc (so, nv40mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (so, nv40mt->buffer, txf, tex_flags | NOUVEAU_BO_OR,
+ NV40TCL_TEX_FORMAT_DMA0, NV40TCL_TEX_FORMAT_DMA1);
+ so_data (so, ps->wrap);
+ so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en);
+ so_data (so, txs);
+ so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/);
+ so_data (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) |
+ pt->height[0]);
+ so_data (so, ps->bcol);
+ so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1);
+ so_data (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp);
+
+ return so;
+}
+
+static boolean
+nv40_fragtex_validate(struct nv40_context *nv40)
+{
+ struct nv40_fragment_program *fp = nv40->fragprog;
+ struct nv40_state *state = &nv40->state;
+ struct nouveau_stateobj *so;
+ unsigned samplers, unit;
+
+ samplers = state->fp_samplers & ~fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ so = so_new(2, 0);
+ so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1);
+ so_data (so, 0);
+ so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]);
+ state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit));
+ }
+
+ samplers = nv40->dirty_samplers & fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ so = nv40_fragtex_build(nv40, unit);
+ so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]);
+ state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit));
+ }
+
+ nv40->state.fp_samplers = fp->samplers;
+ return FALSE;
+}
+
+struct nv40_state_entry nv40_state_fragtex = {
+ .validate = nv40_fragtex_validate,
+ .dirty = {
+ .pipe = NV40_NEW_SAMPLER | NV40_NEW_FRAGPROG,
+ .hw = 0
+ }
+};
+
diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c
new file mode 100644
index 0000000000..e38b1e7f5c
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_miptree.c
@@ -0,0 +1,238 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv40_context.h"
+
+static void
+nv40_miptree_layout(struct nv40_miptree *mt)
+{
+ struct pipe_texture *pt = &mt->base;
+ uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
+ uint offset = 0;
+ int nr_faces, l, f;
+ uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER |
+ PIPE_TEXTURE_USAGE_DEPTH_STENCIL |
+ PIPE_TEXTURE_USAGE_RENDER_TARGET |
+ PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
+ PIPE_TEXTURE_USAGE_PRIMARY);
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ nr_faces = 6;
+ } else
+ if (pt->target == PIPE_TEXTURE_3D) {
+ nr_faces = pt->depth[0];
+ } else {
+ nr_faces = 1;
+ }
+
+ for (l = 0; l <= pt->last_level; l++) {
+ pt->width[l] = width;
+ pt->height[l] = height;
+ pt->depth[l] = depth;
+ pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+
+ if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
+ mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64);
+ else
+ mt->level[l].pitch = pt->width[l] * pt->block.size;
+
+ mt->level[l].image_offset =
+ CALLOC(nr_faces, sizeof(unsigned));
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+ depth = MAX2(1, depth >> 1);
+ }
+
+ for (f = 0; f < nr_faces; f++) {
+ for (l = 0; l < pt->last_level; l++) {
+ mt->level[l].image_offset[f] = offset;
+
+ if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) &&
+ pt->width[l + 1] > 1 && pt->height[l + 1] > 1)
+ offset += align(mt->level[l].pitch * pt->height[l], 64);
+ else
+ offset += mt->level[l].pitch * pt->height[l];
+ }
+
+ mt->level[l].image_offset[f] = offset;
+ offset += mt->level[l].pitch * pt->height[l];
+ }
+
+ mt->total_size = offset;
+}
+
+static struct pipe_texture *
+nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
+{
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv40_miptree *mt;
+ unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL |
+ NOUVEAU_BUFFER_USAGE_TEXTURE;
+
+ mt = MALLOC(sizeof(struct nv40_miptree));
+ if (!mt)
+ return NULL;
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = pscreen;
+ mt->shadow_tex = NULL;
+ mt->shadow_surface = NULL;
+
+ /* Swizzled textures must be POT */
+ if (pt->width[0] & (pt->width[0] - 1) ||
+ pt->height[0] & (pt->height[0] - 1))
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else
+ if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY |
+ PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
+ PIPE_TEXTURE_USAGE_DEPTH_STENCIL))
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else {
+ switch (pt->format) {
+ /* TODO: Figure out which formats can be swizzled */
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ case PIPE_FORMAT_R16_SNORM:
+ {
+ if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE))
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ break;
+ }
+ default:
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ }
+ }
+
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
+ buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+
+ nv40_miptree_layout(mt);
+
+ mt->buffer = ws->buffer_create(ws, 256, buf_usage, mt->total_size);
+ if (!mt->buffer) {
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static struct pipe_texture *
+nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
+ const unsigned *stride, struct pipe_buffer *pb)
+{
+ struct nv40_miptree *mt;
+
+ /* Only supports 2D, non-mipmapped textures for the moment */
+ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
+ pt->depth[0] != 1)
+ return NULL;
+
+ mt = CALLOC_STRUCT(nv40_miptree);
+ if (!mt)
+ return NULL;
+
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = pscreen;
+ mt->level[0].pitch = stride[0];
+ mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
+
+ pipe_buffer_reference(pscreen, &mt->buffer, pb);
+ return &mt->base;
+}
+
+static void
+nv40_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt)
+{
+ struct pipe_texture *pt = *ppt;
+ struct nv40_miptree *mt = (struct nv40_miptree *)pt;
+ int l;
+
+ *ppt = NULL;
+ if (--pt->refcount)
+ return;
+
+ pipe_buffer_reference(pscreen, &mt->buffer, NULL);
+ for (l = 0; l <= pt->last_level; l++) {
+ if (mt->level[l].image_offset)
+ FREE(mt->level[l].image_offset);
+ }
+
+ if (mt->shadow_tex) {
+ if (mt->shadow_surface)
+ pscreen->tex_surface_release(pscreen, &mt->shadow_surface);
+ nv40_miptree_release(pscreen, &mt->shadow_tex);
+ }
+
+ FREE(mt);
+}
+
+static struct pipe_surface *
+nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct nv40_miptree *mt = (struct nv40_miptree *)pt;
+ struct pipe_surface *ps;
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (!ps)
+ return NULL;
+ pipe_texture_reference(&ps->texture, pt);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = mt->level[level].pitch;
+ ps->usage = flags;
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+ ps->refcount = 1;
+ ps->face = face;
+ ps->level = level;
+ ps->zslice = zslice;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ ps->offset = mt->level[level].image_offset[face];
+ } else
+ if (pt->target == PIPE_TEXTURE_3D) {
+ ps->offset = mt->level[level].image_offset[zslice];
+ } else {
+ ps->offset = mt->level[level].image_offset[0];
+ }
+
+ return ps;
+}
+
+static void
+nv40_miptree_surface_del(struct pipe_screen *pscreen,
+ struct pipe_surface **psurface)
+{
+ struct pipe_surface *ps = *psurface;
+
+ *psurface = NULL;
+ if (--ps->refcount > 0)
+ return;
+
+ pipe_texture_reference(&ps->texture, NULL);
+ FREE(ps);
+}
+
+void
+nv40_screen_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv40_miptree_create;
+ pscreen->texture_blanket = nv40_miptree_blanket;
+ pscreen->texture_release = nv40_miptree_release;
+ pscreen->get_tex_surface = nv40_miptree_surface_new;
+ pscreen->tex_surface_release = nv40_miptree_surface_del;
+}
+
diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c
new file mode 100644
index 0000000000..9b9a43f49d
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_query.c
@@ -0,0 +1,122 @@
+#include "pipe/p_context.h"
+
+#include "nv40_context.h"
+
+struct nv40_query {
+ struct nouveau_resource *object;
+ unsigned type;
+ boolean ready;
+ uint64_t result;
+};
+
+static INLINE struct nv40_query *
+nv40_query(struct pipe_query *pipe)
+{
+ return (struct nv40_query *)pipe;
+}
+
+static struct pipe_query *
+nv40_query_create(struct pipe_context *pipe, unsigned query_type)
+{
+ struct nv40_query *q;
+
+ q = CALLOC(1, sizeof(struct nv40_query));
+ q->type = query_type;
+
+ return (struct pipe_query *)q;
+}
+
+static void
+nv40_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_query *q = nv40_query(pq);
+
+ if (q->object)
+ nv40->nvws->res_free(&q->object);
+ FREE(q);
+}
+
+static void
+nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_query *q = nv40_query(pq);
+
+ assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+ /* Happens when end_query() is called, then another begin_query()
+ * without querying the result in-between. For now we'll wait for
+ * the existing query to notify completion, but it could be better.
+ */
+ if (q->object) {
+ uint64_t tmp;
+ pipe->get_query_result(pipe, pq, 1, &tmp);
+ }
+
+ if (nv40->nvws->res_alloc(nv40->screen->query_heap, 1, NULL, &q->object))
+ assert(0);
+ nv40->nvws->notifier_reset(nv40->screen->query, q->object->start);
+
+ BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1);
+ OUT_RING (1);
+ BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1);
+ OUT_RING (1);
+
+ q->ready = FALSE;
+}
+
+static void
+nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_query *q = nv40_query(pq);
+
+ BEGIN_RING(curie, NV40TCL_QUERY_GET, 1);
+ OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) |
+ ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT));
+ FIRE_RING(NULL);
+}
+
+static boolean
+nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, uint64_t *result)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_query *q = nv40_query(pq);
+ struct nouveau_winsys *nvws = nv40->nvws;
+
+ assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+ if (!q->ready) {
+ unsigned status;
+
+ status = nvws->notifier_status(nv40->screen->query,
+ q->object->start);
+ if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) {
+ if (wait == FALSE)
+ return FALSE;
+ nvws->notifier_wait(nv40->screen->query, q->object->start,
+ NV_NOTIFY_STATE_STATUS_COMPLETED,
+ 0);
+ }
+
+ q->result = nvws->notifier_retval(nv40->screen->query,
+ q->object->start);
+ q->ready = TRUE;
+ nvws->res_free(&q->object);
+ }
+
+ *result = q->result;
+ return TRUE;
+}
+
+void
+nv40_init_query_functions(struct nv40_context *nv40)
+{
+ nv40->pipe.create_query = nv40_query_create;
+ nv40->pipe.destroy_query = nv40_query_destroy;
+ nv40->pipe.begin_query = nv40_query_begin;
+ nv40->pipe.end_query = nv40_query_end;
+ nv40->pipe.get_query_result = nv40_query_result;
+}
diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c
new file mode 100644
index 0000000000..2372bc8441
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_screen.c
@@ -0,0 +1,383 @@
+#include "pipe/p_screen.h"
+#include "util/u_simple_screen.h"
+
+#include "nv40_context.h"
+#include "nv40_screen.h"
+
+#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf
+#define NV4X_GRCLASS4497_CHIPSETS 0x00005450
+#define NV6X_GRCLASS4497_CHIPSETS 0x00000088
+
+static const char *
+nv40_screen_get_name(struct pipe_screen *pscreen)
+{
+ struct nv40_screen *screen = nv40_screen(pscreen);
+ struct nouveau_device *dev = screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv40_screen_get_vendor(struct pipe_screen *pscreen)
+{
+ return "nouveau";
+}
+
+static int
+nv40_screen_get_param(struct pipe_screen *pscreen, int param)
+{
+ struct nv40_screen *screen = nv40_screen(pscreen);
+
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 16;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 1;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 4;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 13;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 13;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ return 1;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 0; /* We have 4 - but unsupported currently */
+ case NOUVEAU_CAP_HW_VTXBUF:
+ return 1;
+ case NOUVEAU_CAP_HW_IDXBUF:
+ if (screen->curie->grclass == NV40TCL)
+ return 1;
+ return 0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv40_screen_get_paramf(struct pipe_screen *pscreen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 16.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 16.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static boolean
+nv40_screen_surface_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_R16_SNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ case PIPE_FORMAT_A8L8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT5_RGBA:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static struct pipe_buffer *
+nv40_surface_buffer(struct pipe_surface *surf)
+{
+ struct nv40_miptree *mt = (struct nv40_miptree *)surf->texture;
+
+ return mt->buffer;
+}
+
+static void *
+nv40_surface_map(struct pipe_screen *screen, struct pipe_surface *surface,
+ unsigned flags )
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct pipe_surface *surface_to_map;
+ void *map;
+
+ if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture;
+
+ if (!mt->shadow_tex) {
+ unsigned old_tex_usage = surface->texture->tex_usage;
+ surface->texture->tex_usage = NOUVEAU_TEXTURE_USAGE_LINEAR |
+ PIPE_TEXTURE_USAGE_DYNAMIC;
+ mt->shadow_tex = screen->texture_create(screen, surface->texture);
+ surface->texture->tex_usage = old_tex_usage;
+
+ assert(mt->shadow_tex->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR);
+ }
+
+ mt->shadow_surface = screen->get_tex_surface
+ (
+ screen, mt->shadow_tex,
+ surface->face, surface->level, surface->zslice,
+ surface->usage
+ );
+
+ surface_to_map = mt->shadow_surface;
+ }
+ else
+ surface_to_map = surface;
+
+ assert(surface_to_map);
+ map = ws->buffer_map(ws, nv40_surface_buffer(surface_to_map), flags);
+ if (!map)
+ return NULL;
+
+ return map + surface_to_map->offset;
+}
+
+static void
+nv40_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct pipe_surface *surface_to_unmap;
+
+ /* TODO: Copy from shadow just before push buffer is flushed instead.
+ There are probably some programs that map/unmap excessively
+ before rendering. */
+ if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture;
+
+ assert(mt->shadow_tex);
+
+ surface_to_unmap = mt->shadow_surface;
+ }
+ else
+ surface_to_unmap = surface;
+
+ assert(surface_to_unmap);
+
+ ws->buffer_unmap(ws, nv40_surface_buffer(surface_to_unmap));
+
+ if (surface_to_unmap != surface) {
+ struct nv40_screen *nvscreen = nv40_screen(screen);
+
+ nvscreen->eng2d->copy(nvscreen->eng2d, surface, 0, 0,
+ surface_to_unmap, 0, 0,
+ surface->width, surface->height);
+
+ screen->tex_surface_release(screen, &surface_to_unmap);
+ }
+}
+
+static void
+nv40_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nv40_screen *screen = nv40_screen(pscreen);
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nvws->res_free(&screen->vp_exec_heap);
+ nvws->res_free(&screen->vp_data_heap);
+ nvws->res_free(&screen->query_heap);
+ nvws->notifier_free(&screen->query);
+ nvws->notifier_free(&screen->sync);
+ nvws->grobj_free(&screen->curie);
+
+ FREE(pscreen);
+}
+
+struct pipe_screen *
+nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv40_screen *screen = CALLOC_STRUCT(nv40_screen);
+ struct nouveau_stateobj *so;
+ unsigned curie_class;
+ unsigned chipset = nvws->channel->device->chipset;
+ int ret;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ /* 2D engine setup */
+ screen->eng2d = nv04_surface_2d_init(nvws);
+ screen->eng2d->buf = nv40_surface_buffer;
+
+ /* 3D object */
+ switch (chipset & 0xf0) {
+ case 0x40:
+ if (NV4X_GRCLASS4097_CHIPSETS & (1 << (chipset & 0x0f)))
+ curie_class = NV40TCL;
+ else
+ if (NV4X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f)))
+ curie_class = NV44TCL;
+ break;
+ case 0x60:
+ if (NV6X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f)))
+ curie_class = NV44TCL;
+ break;
+ default:
+ break;
+ }
+
+ if (!curie_class) {
+ NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", chipset);
+ return NULL;
+ }
+
+ ret = nvws->grobj_alloc(nvws, curie_class, &screen->curie);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ return FALSE;
+ }
+
+ /* Notifier for sync purposes */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv40_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Query objects */
+ ret = nvws->notifier_alloc(nvws, 32, &screen->query);
+ if (ret) {
+ NOUVEAU_ERR("Error initialising query objects: %d\n", ret);
+ nv40_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ ret = nvws->res_init(&screen->query_heap, 0, 32);
+ if (ret) {
+ NOUVEAU_ERR("Error initialising query object heap: %d\n", ret);
+ nv40_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Vtxprog resources */
+ if (nvws->res_init(&screen->vp_exec_heap, 0, 512) ||
+ nvws->res_init(&screen->vp_data_heap, 0, 256)) {
+ nv40_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Static curie initialisation */
+ so = so_new(128, 0);
+ so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1);
+ so_data (so, screen->sync->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->gart->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_COLOR1, 1);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_COLOR0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_VTXBUF0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->gart->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_FENCE, 2);
+ so_data (so, 0);
+ so_data (so, screen->query->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_UNK01AC, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_COLOR2, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->vram->handle);
+
+ so_method(so, screen->curie, 0x1ea4, 3);
+ so_data (so, 0x00000010);
+ so_data (so, 0x01000100);
+ so_data (so, 0xff800006);
+
+ /* vtxprog output routing */
+ so_method(so, screen->curie, 0x1fc4, 1);
+ so_data (so, 0x06144321);
+ so_method(so, screen->curie, 0x1fc8, 2);
+ so_data (so, 0xedcba987);
+ so_data (so, 0x00000021);
+ so_method(so, screen->curie, 0x1fd0, 1);
+ so_data (so, 0x00171615);
+ so_method(so, screen->curie, 0x1fd4, 1);
+ so_data (so, 0x001b1a19);
+
+ so_method(so, screen->curie, 0x1ef8, 1);
+ so_data (so, 0x0020ffff);
+ so_method(so, screen->curie, 0x1d64, 1);
+ so_data (so, 0x00d30000);
+ so_method(so, screen->curie, 0x1e94, 1);
+ so_data (so, 0x00000001);
+
+ so_emit(nvws, so);
+ so_ref(NULL, &so);
+ nvws->push_flush(nvws, 0, NULL);
+
+ screen->pipe.winsys = ws;
+ screen->pipe.destroy = nv40_screen_destroy;
+
+ screen->pipe.get_name = nv40_screen_get_name;
+ screen->pipe.get_vendor = nv40_screen_get_vendor;
+ screen->pipe.get_param = nv40_screen_get_param;
+ screen->pipe.get_paramf = nv40_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv40_screen_surface_format_supported;
+
+ screen->pipe.surface_map = nv40_surface_map;
+ screen->pipe.surface_unmap = nv40_surface_unmap;
+
+ nv40_screen_init_miptree_functions(&screen->pipe);
+ u_simple_screen_init(&screen->pipe);
+
+ return &screen->pipe;
+}
+
diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nv40/nv40_screen.h
new file mode 100644
index 0000000000..4500aa0e5c
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_screen.h
@@ -0,0 +1,37 @@
+#ifndef __NV40_SCREEN_H__
+#define __NV40_SCREEN_H__
+
+#include "pipe/p_screen.h"
+#include "nv04/nv04_surface_2d.h"
+
+struct nv40_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+
+ unsigned cur_pctx;
+
+ /* HW graphics objects */
+ struct nv04_surface_2d *eng2d;
+ struct nouveau_grobj *curie;
+ struct nouveau_notifier *sync;
+
+ /* Query object resources */
+ struct nouveau_notifier *query;
+ struct nouveau_resource *query_heap;
+
+ /* Vtxprog resources */
+ struct nouveau_resource *vp_exec_heap;
+ struct nouveau_resource *vp_data_heap;
+
+ /* Current 3D state of channel */
+ struct nouveau_stateobj *state[NV40_STATE_MAX];
+};
+
+static INLINE struct nv40_screen *
+nv40_screen(struct pipe_screen *screen)
+{
+ return (struct nv40_screen *)screen;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h
new file mode 100644
index 0000000000..854dccf548
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_shader.h
@@ -0,0 +1,556 @@
+#ifndef __NV40_SHADER_H__
+#define __NV40_SHADER_H__
+
+/* Vertex programs instruction set
+ *
+ * The NV40 instruction set is very similar to NV30. Most fields are in
+ * a slightly different position in the instruction however.
+ *
+ * Merged instructions
+ * In some cases it is possible to put two instructions into one opcode
+ * slot. The rules for when this is OK is not entirely clear to me yet.
+ *
+ * There are separate writemasks and dest temp register fields for each
+ * grouping of instructions. There is however only one field with the
+ * ID of a result register. Writing to temp/result regs is selected by
+ * setting VEC_RESULT/SCA_RESULT.
+ *
+ * Temporary registers
+ * The source/dest temp register fields have been extended by 1 bit, to
+ * give a total of 32 temporary registers.
+ *
+ * Relative Addressing
+ * NV40 can use an address register to index into vertex attribute regs.
+ * This is done by putting the offset value into INPUT_SRC and setting
+ * the INDEX_INPUT flag.
+ *
+ * Conditional execution (see NV_vertex_program{2,3} for details)
+ * There is a second condition code register on NV40, it's use is enabled
+ * by setting the COND_REG_SELECT_1 flag.
+ *
+ * Texture lookup
+ * TODO
+ */
+
+/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */
+#define NV40_VP_INST_VEC_RESULT (1 << 30)
+/* uncertain.. */
+#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29)
+/* use address reg as index into attribs */
+#define NV40_VP_INST_INDEX_INPUT (1 << 27)
+#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25)
+#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
+#define NV40_VP_INST_SRC2_ABS (1 << 23)
+#define NV40_VP_INST_SRC1_ABS (1 << 22)
+#define NV40_VP_INST_SRC0_ABS (1 << 21)
+#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15
+#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15)
+#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13)
+#define NV40_VP_INST_COND_SHIFT 10
+#define NV40_VP_INST_COND_MASK (0x7 << 10)
+# define NV40_VP_INST_COND_FL 0
+# define NV40_VP_INST_COND_LT 1
+# define NV40_VP_INST_COND_EQ 2
+# define NV40_VP_INST_COND_LE 3
+# define NV40_VP_INST_COND_GT 4
+# define NV40_VP_INST_COND_NE 5
+# define NV40_VP_INST_COND_GE 6
+# define NV40_VP_INST_COND_TR 7
+#define NV40_VP_INST_COND_SWZ_X_SHIFT 8
+#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8)
+#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6
+#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6)
+#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4
+#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4)
+#define NV40_VP_INST_COND_SWZ_W_SHIFT 2
+#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2)
+#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2
+#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2)
+#define NV40_VP_INST_ADDR_SWZ_SHIFT 0
+#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0)
+#define NV40_VP_INST0_KNOWN ( \
+ NV40_VP_INST_INDEX_INPUT | \
+ NV40_VP_INST_COND_REG_SELECT_1 | \
+ NV40_VP_INST_ADDR_REG_SELECT_1 | \
+ NV40_VP_INST_SRC2_ABS | \
+ NV40_VP_INST_SRC1_ABS | \
+ NV40_VP_INST_SRC0_ABS | \
+ NV40_VP_INST_VEC_DEST_TEMP_MASK | \
+ NV40_VP_INST_COND_TEST_ENABLE | \
+ NV40_VP_INST_COND_MASK | \
+ NV40_VP_INST_COND_SWZ_ALL_MASK | \
+ NV40_VP_INST_ADDR_SWZ_MASK)
+
+/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */
+#define NV40_VP_INST_VEC_OPCODE_SHIFT 22
+#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22)
+# define NV40_VP_INST_OP_NOP 0x00
+# define NV40_VP_INST_OP_MOV 0x01
+# define NV40_VP_INST_OP_MUL 0x02
+# define NV40_VP_INST_OP_ADD 0x03
+# define NV40_VP_INST_OP_MAD 0x04
+# define NV40_VP_INST_OP_DP3 0x05
+# define NV40_VP_INST_OP_DPH 0x06
+# define NV40_VP_INST_OP_DP4 0x07
+# define NV40_VP_INST_OP_DST 0x08
+# define NV40_VP_INST_OP_MIN 0x09
+# define NV40_VP_INST_OP_MAX 0x0A
+# define NV40_VP_INST_OP_SLT 0x0B
+# define NV40_VP_INST_OP_SGE 0x0C
+# define NV40_VP_INST_OP_ARL 0x0D
+# define NV40_VP_INST_OP_FRC 0x0E
+# define NV40_VP_INST_OP_FLR 0x0F
+# define NV40_VP_INST_OP_SEQ 0x10
+# define NV40_VP_INST_OP_SFL 0x11
+# define NV40_VP_INST_OP_SGT 0x12
+# define NV40_VP_INST_OP_SLE 0x13
+# define NV40_VP_INST_OP_SNE 0x14
+# define NV40_VP_INST_OP_STR 0x15
+# define NV40_VP_INST_OP_SSG 0x16
+# define NV40_VP_INST_OP_ARR 0x17
+# define NV40_VP_INST_OP_ARA 0x18
+# define NV40_VP_INST_OP_TXL 0x19
+#define NV40_VP_INST_SCA_OPCODE_SHIFT 27
+#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27)
+# define NV40_VP_INST_OP_NOP 0x00
+# define NV40_VP_INST_OP_MOV 0x01
+# define NV40_VP_INST_OP_RCP 0x02
+# define NV40_VP_INST_OP_RCC 0x03
+# define NV40_VP_INST_OP_RSQ 0x04
+# define NV40_VP_INST_OP_EXP 0x05
+# define NV40_VP_INST_OP_LOG 0x06
+# define NV40_VP_INST_OP_LIT 0x07
+# define NV40_VP_INST_OP_BRA 0x09
+# define NV40_VP_INST_OP_CAL 0x0B
+# define NV40_VP_INST_OP_RET 0x0C
+# define NV40_VP_INST_OP_LG2 0x0D
+# define NV40_VP_INST_OP_EX2 0x0E
+# define NV40_VP_INST_OP_SIN 0x0F
+# define NV40_VP_INST_OP_COS 0x10
+# define NV40_VP_INST_OP_PUSHA 0x13
+# define NV40_VP_INST_OP_POPA 0x14
+#define NV40_VP_INST_CONST_SRC_SHIFT 12
+#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12)
+#define NV40_VP_INST_INPUT_SRC_SHIFT 8
+#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8)
+# define NV40_VP_INST_IN_POS 0
+# define NV40_VP_INST_IN_WEIGHT 1
+# define NV40_VP_INST_IN_NORMAL 2
+# define NV40_VP_INST_IN_COL0 3
+# define NV40_VP_INST_IN_COL1 4
+# define NV40_VP_INST_IN_FOGC 5
+# define NV40_VP_INST_IN_TC0 8
+# define NV40_VP_INST_IN_TC(n) (8+n)
+#define NV40_VP_INST_SRC0H_SHIFT 0
+#define NV40_VP_INST_SRC0H_MASK (0xFF << 0)
+#define NV40_VP_INST1_KNOWN ( \
+ NV40_VP_INST_VEC_OPCODE_MASK | \
+ NV40_VP_INST_SCA_OPCODE_MASK | \
+ NV40_VP_INST_CONST_SRC_MASK | \
+ NV40_VP_INST_INPUT_SRC_MASK | \
+ NV40_VP_INST_SRC0H_MASK \
+ )
+
+/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */
+#define NV40_VP_INST_SRC0L_SHIFT 23
+#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23)
+#define NV40_VP_INST_SRC1_SHIFT 6
+#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6)
+#define NV40_VP_INST_SRC2H_SHIFT 0
+#define NV40_VP_INST_SRC2H_MASK (0x3F << 0)
+#define NV40_VP_INST_IADDRH_SHIFT 0
+#define NV40_VP_INST_IADDRH_MASK (0x1F << 0)
+
+/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */
+#define NV40_VP_INST_IADDRL_SHIFT 29
+#define NV40_VP_INST_IADDRL_MASK (7 << 29)
+#define NV40_VP_INST_SRC2L_SHIFT 21
+#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21)
+#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17
+#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17)
+# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20)
+# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19)
+# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18)
+# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17)
+#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13
+#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13)
+# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16)
+# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15)
+# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14)
+# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13)
+#define NV40_VP_INST_SCA_RESULT (1 << 12)
+#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7
+#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7)
+#define NV40_VP_INST_DEST_SHIFT 2
+#define NV40_VP_INST_DEST_MASK (31 << 2)
+# define NV40_VP_INST_DEST_POS 0
+# define NV40_VP_INST_DEST_COL0 1
+# define NV40_VP_INST_DEST_COL1 2
+# define NV40_VP_INST_DEST_BFC0 3
+# define NV40_VP_INST_DEST_BFC1 4
+# define NV40_VP_INST_DEST_FOGC 5
+# define NV40_VP_INST_DEST_PSZ 6
+# define NV40_VP_INST_DEST_TC0 7
+# define NV40_VP_INST_DEST_TC(n) (7+n)
+# define NV40_VP_INST_DEST_TEMP 0x1F
+#define NV40_VP_INST_INDEX_CONST (1 << 1)
+#define NV40_VP_INST_LAST (1 << 0)
+#define NV40_VP_INST3_KNOWN ( \
+ NV40_VP_INST_SRC2L_MASK |\
+ NV40_VP_INST_SCA_WRITEMASK_MASK |\
+ NV40_VP_INST_VEC_WRITEMASK_MASK |\
+ NV40_VP_INST_SCA_DEST_TEMP_MASK |\
+ NV40_VP_INST_DEST_MASK |\
+ NV40_VP_INST_INDEX_CONST)
+
+/* Useful to split the source selection regs into their pieces */
+#define NV40_VP_SRC0_HIGH_SHIFT 9
+#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00
+#define NV40_VP_SRC0_LOW_MASK 0x000001FF
+#define NV40_VP_SRC2_HIGH_SHIFT 11
+#define NV40_VP_SRC2_HIGH_MASK 0x0001F800
+#define NV40_VP_SRC2_LOW_MASK 0x000007FF
+
+/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */
+#define NV40_VP_SRC_NEGATE (1 << 16)
+#define NV40_VP_SRC_SWZ_X_SHIFT 14
+#define NV40_VP_SRC_SWZ_X_MASK (3 << 14)
+#define NV40_VP_SRC_SWZ_Y_SHIFT 12
+#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12)
+#define NV40_VP_SRC_SWZ_Z_SHIFT 10
+#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10)
+#define NV40_VP_SRC_SWZ_W_SHIFT 8
+#define NV40_VP_SRC_SWZ_W_MASK (3 << 8)
+#define NV40_VP_SRC_SWZ_ALL_SHIFT 8
+#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8)
+#define NV40_VP_SRC_TEMP_SRC_SHIFT 2
+#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2)
+#define NV40_VP_SRC_REG_TYPE_SHIFT 0
+#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0)
+# define NV40_VP_SRC_REG_TYPE_UNK0 0
+# define NV40_VP_SRC_REG_TYPE_TEMP 1
+# define NV40_VP_SRC_REG_TYPE_INPUT 2
+# define NV40_VP_SRC_REG_TYPE_CONST 3
+
+
+/*
+ * Each fragment program opcode appears to be comprised of 4 32-bit values.
+ *
+ * 0 - Opcode, output reg/mask, ATTRIB source
+ * 1 - Source 0
+ * 2 - Source 1
+ * 3 - Source 2
+ *
+ * There appears to be no special difference between result regs and temp regs.
+ * result.color == R0.xyzw
+ * result.depth == R1.z
+ * When the fragprog contains instructions to write depth,
+ * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1.
+ *
+ * Constants are inserted directly after the instruction that uses them.
+ *
+ * It appears that it's not possible to use two input registers in one
+ * instruction as the input sourcing is done in the instruction dword
+ * and not the source selection dwords. As such instructions such as:
+ *
+ * ADD result.color, fragment.color, fragment.texcoord[0];
+ *
+ * must be split into two MOV's and then an ADD (nvidia does this) but
+ * I'm not sure why it's not just one MOV and then source the second input
+ * in the ADD instruction..
+ *
+ * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
+ * negation requires multiplication with a const.
+ *
+ * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and
+ * SWIZZLE_ONE.
+ *
+ * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as
+ * SWIZZLE_ZERO is implemented simply by not writing to the relevant components
+ * of the destination.
+ *
+ * Looping
+ * Loops appear to be fairly expensive on NV40 at least, the proprietary
+ * driver goes to a lot of effort to avoid using the native looping
+ * instructions. If the total number of *executed* instructions between
+ * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop.
+ * The maximum loop count is 255.
+ *
+ * Conditional execution
+ * TODO
+ *
+ * Non-native instructions:
+ * LIT
+ * LRP - MAD+MAD
+ * SUB - ADD, negate second source
+ * RSQ - LG2 + EX2
+ * POW - LG2 + MUL + EX2
+ * SCS - COS + SIN
+ * XPD
+ * DP2 - MUL + ADD
+ * NRM
+ */
+
+//== Opcode / Destination selection ==
+#define NV40_FP_OP_PROGRAM_END (1 << 0)
+#define NV40_FP_OP_OUT_REG_SHIFT 1
+#define NV40_FP_OP_OUT_REG_MASK (63 << 1)
+/* Needs to be set when writing outputs to get expected result.. */
+#define NV40_FP_OP_OUT_REG_HALF (1 << 7)
+#define NV40_FP_OP_COND_WRITE_ENABLE (1 << 8)
+#define NV40_FP_OP_OUTMASK_SHIFT 9
+#define NV40_FP_OP_OUTMASK_MASK (0xF << 9)
+# define NV40_FP_OP_OUT_X (1 << 9)
+# define NV40_FP_OP_OUT_Y (1 <<10)
+# define NV40_FP_OP_OUT_Z (1 <<11)
+# define NV40_FP_OP_OUT_W (1 <<12)
+/* Uncertain about these, especially the input_src values.. it's possible that
+ * they can be dynamically changed.
+ */
+#define NV40_FP_OP_INPUT_SRC_SHIFT 13
+#define NV40_FP_OP_INPUT_SRC_MASK (15 << 13)
+# define NV40_FP_OP_INPUT_SRC_POSITION 0x0
+# define NV40_FP_OP_INPUT_SRC_COL0 0x1
+# define NV40_FP_OP_INPUT_SRC_COL1 0x2
+# define NV40_FP_OP_INPUT_SRC_FOGC 0x3
+# define NV40_FP_OP_INPUT_SRC_TC0 0x4
+# define NV40_FP_OP_INPUT_SRC_TC(n) (0x4 + n)
+# define NV40_FP_OP_INPUT_SRC_FACING 0xE
+#define NV40_FP_OP_TEX_UNIT_SHIFT 17
+#define NV40_FP_OP_TEX_UNIT_MASK (0xF << 17)
+#define NV40_FP_OP_PRECISION_SHIFT 22
+#define NV40_FP_OP_PRECISION_MASK (3 << 22)
+# define NV40_FP_PRECISION_FP32 0
+# define NV40_FP_PRECISION_FP16 1
+# define NV40_FP_PRECISION_FX12 2
+#define NV40_FP_OP_OPCODE_SHIFT 24
+#define NV40_FP_OP_OPCODE_MASK (0x3F << 24)
+# define NV40_FP_OP_OPCODE_NOP 0x00
+# define NV40_FP_OP_OPCODE_MOV 0x01
+# define NV40_FP_OP_OPCODE_MUL 0x02
+# define NV40_FP_OP_OPCODE_ADD 0x03
+# define NV40_FP_OP_OPCODE_MAD 0x04
+# define NV40_FP_OP_OPCODE_DP3 0x05
+# define NV40_FP_OP_OPCODE_DP4 0x06
+# define NV40_FP_OP_OPCODE_DST 0x07
+# define NV40_FP_OP_OPCODE_MIN 0x08
+# define NV40_FP_OP_OPCODE_MAX 0x09
+# define NV40_FP_OP_OPCODE_SLT 0x0A
+# define NV40_FP_OP_OPCODE_SGE 0x0B
+# define NV40_FP_OP_OPCODE_SLE 0x0C
+# define NV40_FP_OP_OPCODE_SGT 0x0D
+# define NV40_FP_OP_OPCODE_SNE 0x0E
+# define NV40_FP_OP_OPCODE_SEQ 0x0F
+# define NV40_FP_OP_OPCODE_FRC 0x10
+# define NV40_FP_OP_OPCODE_FLR 0x11
+# define NV40_FP_OP_OPCODE_KIL 0x12
+# define NV40_FP_OP_OPCODE_PK4B 0x13
+# define NV40_FP_OP_OPCODE_UP4B 0x14
+/* DDX/DDY can only write to XY */
+# define NV40_FP_OP_OPCODE_DDX 0x15
+# define NV40_FP_OP_OPCODE_DDY 0x16
+# define NV40_FP_OP_OPCODE_TEX 0x17
+# define NV40_FP_OP_OPCODE_TXP 0x18
+# define NV40_FP_OP_OPCODE_TXD 0x19
+# define NV40_FP_OP_OPCODE_RCP 0x1A
+# define NV40_FP_OP_OPCODE_EX2 0x1C
+# define NV40_FP_OP_OPCODE_LG2 0x1D
+# define NV40_FP_OP_OPCODE_STR 0x20
+# define NV40_FP_OP_OPCODE_SFL 0x21
+# define NV40_FP_OP_OPCODE_COS 0x22
+# define NV40_FP_OP_OPCODE_SIN 0x23
+# define NV40_FP_OP_OPCODE_PK2H 0x24
+# define NV40_FP_OP_OPCODE_UP2H 0x25
+# define NV40_FP_OP_OPCODE_PK4UB 0x27
+# define NV40_FP_OP_OPCODE_UP4UB 0x28
+# define NV40_FP_OP_OPCODE_PK2US 0x29
+# define NV40_FP_OP_OPCODE_UP2US 0x2A
+# define NV40_FP_OP_OPCODE_DP2A 0x2E
+# define NV40_FP_OP_OPCODE_TXL 0x2F
+# define NV40_FP_OP_OPCODE_TXB 0x31
+# define NV40_FP_OP_OPCODE_DIV 0x3A
+# define NV40_FP_OP_OPCODE_UNK_LIT 0x3C
+/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/
+# define NV40_FP_OP_BRA_OPCODE_BRK 0x0
+# define NV40_FP_OP_BRA_OPCODE_CAL 0x1
+# define NV40_FP_OP_BRA_OPCODE_IF 0x2
+# define NV40_FP_OP_BRA_OPCODE_LOOP 0x3
+# define NV40_FP_OP_BRA_OPCODE_REP 0x4
+# define NV40_FP_OP_BRA_OPCODE_RET 0x5
+#define NV40_FP_OP_OUT_SAT (1 << 31)
+
+/* high order bits of SRC0 */
+#define NV40_FP_OP_OUT_ABS (1 << 29)
+#define NV40_FP_OP_COND_SWZ_W_SHIFT 27
+#define NV40_FP_OP_COND_SWZ_W_MASK (3 << 27)
+#define NV40_FP_OP_COND_SWZ_Z_SHIFT 25
+#define NV40_FP_OP_COND_SWZ_Z_MASK (3 << 25)
+#define NV40_FP_OP_COND_SWZ_Y_SHIFT 23
+#define NV40_FP_OP_COND_SWZ_Y_MASK (3 << 23)
+#define NV40_FP_OP_COND_SWZ_X_SHIFT 21
+#define NV40_FP_OP_COND_SWZ_X_MASK (3 << 21)
+#define NV40_FP_OP_COND_SWZ_ALL_SHIFT 21
+#define NV40_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21)
+#define NV40_FP_OP_COND_SHIFT 18
+#define NV40_FP_OP_COND_MASK (0x07 << 18)
+# define NV40_FP_OP_COND_FL 0
+# define NV40_FP_OP_COND_LT 1
+# define NV40_FP_OP_COND_EQ 2
+# define NV40_FP_OP_COND_LE 3
+# define NV40_FP_OP_COND_GT 4
+# define NV40_FP_OP_COND_NE 5
+# define NV40_FP_OP_COND_GE 6
+# define NV40_FP_OP_COND_TR 7
+
+/* high order bits of SRC1 */
+#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31)
+#define NV40_FP_OP_DST_SCALE_SHIFT 28
+#define NV40_FP_OP_DST_SCALE_MASK (3 << 28)
+#define NV40_FP_OP_DST_SCALE_1X 0
+#define NV40_FP_OP_DST_SCALE_2X 1
+#define NV40_FP_OP_DST_SCALE_4X 2
+#define NV40_FP_OP_DST_SCALE_8X 3
+#define NV40_FP_OP_DST_SCALE_INV_2X 5
+#define NV40_FP_OP_DST_SCALE_INV_4X 6
+#define NV40_FP_OP_DST_SCALE_INV_8X 7
+
+/* SRC1 LOOP */
+#define NV40_FP_OP_LOOP_INCR_SHIFT 19
+#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19)
+#define NV40_FP_OP_LOOP_INDEX_SHIFT 10
+#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10)
+#define NV40_FP_OP_LOOP_COUNT_SHIFT 2
+#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2)
+
+/* SRC1 IF */
+#define NV40_FP_OP_ELSE_ID_SHIFT 2
+#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2)
+
+/* SRC1 CAL */
+#define NV40_FP_OP_IADDR_SHIFT 2
+#define NV40_FP_OP_IADDR_MASK (0xFF << 2)
+
+/* SRC1 REP
+ * I have no idea why there are 3 count values here.. but they
+ * have always been filled with the same value in my tests so
+ * far..
+ */
+#define NV40_FP_OP_REP_COUNT1_SHIFT 2
+#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2)
+#define NV40_FP_OP_REP_COUNT2_SHIFT 10
+#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10)
+#define NV40_FP_OP_REP_COUNT3_SHIFT 19
+#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19)
+
+/* SRC2 REP/IF */
+#define NV40_FP_OP_END_ID_SHIFT 2
+#define NV40_FP_OP_END_ID_MASK (0xFF << 2)
+
+// SRC2 high-order
+#define NV40_FP_OP_INDEX_INPUT (1 << 30)
+#define NV40_FP_OP_ADDR_INDEX_SHIFT 19
+#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19)
+
+//== Register selection ==
+#define NV40_FP_REG_TYPE_SHIFT 0
+#define NV40_FP_REG_TYPE_MASK (3 << 0)
+# define NV40_FP_REG_TYPE_TEMP 0
+# define NV40_FP_REG_TYPE_INPUT 1
+# define NV40_FP_REG_TYPE_CONST 2
+#define NV40_FP_REG_SRC_SHIFT 2
+#define NV40_FP_REG_SRC_MASK (63 << 2)
+#define NV40_FP_REG_SRC_HALF (1 << 8)
+#define NV40_FP_REG_SWZ_ALL_SHIFT 9
+#define NV40_FP_REG_SWZ_ALL_MASK (255 << 9)
+#define NV40_FP_REG_SWZ_X_SHIFT 9
+#define NV40_FP_REG_SWZ_X_MASK (3 << 9)
+#define NV40_FP_REG_SWZ_Y_SHIFT 11
+#define NV40_FP_REG_SWZ_Y_MASK (3 << 11)
+#define NV40_FP_REG_SWZ_Z_SHIFT 13
+#define NV40_FP_REG_SWZ_Z_MASK (3 << 13)
+#define NV40_FP_REG_SWZ_W_SHIFT 15
+#define NV40_FP_REG_SWZ_W_MASK (3 << 15)
+# define NV40_FP_SWIZZLE_X 0
+# define NV40_FP_SWIZZLE_Y 1
+# define NV40_FP_SWIZZLE_Z 2
+# define NV40_FP_SWIZZLE_W 3
+#define NV40_FP_REG_NEGATE (1 << 17)
+
+#ifndef NV40_SHADER_NO_FUCKEDNESS
+#define NV40SR_NONE 0
+#define NV40SR_OUTPUT 1
+#define NV40SR_INPUT 2
+#define NV40SR_TEMP 3
+#define NV40SR_CONST 4
+
+struct nv40_sreg {
+ int type;
+ int index;
+
+ int dst_scale;
+
+ int negate;
+ int abs;
+ int swz[4];
+
+ int cc_update;
+ int cc_update_reg;
+ int cc_test;
+ int cc_test_reg;
+ int cc_swz[4];
+};
+
+static INLINE struct nv40_sreg
+nv40_sr(int type, int index)
+{
+ struct nv40_sreg temp = {
+ .type = type,
+ .index = index,
+ .dst_scale = DEF_SCALE,
+ .abs = 0,
+ .negate = 0,
+ .swz = { 0, 1, 2, 3 },
+ .cc_update = 0,
+ .cc_update_reg = 0,
+ .cc_test = DEF_CTEST,
+ .cc_test_reg = 0,
+ .cc_swz = { 0, 1, 2, 3 },
+ };
+ return temp;
+}
+
+static INLINE struct nv40_sreg
+nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w)
+{
+ struct nv40_sreg dst = src;
+
+ dst.swz[SWZ_X] = src.swz[x];
+ dst.swz[SWZ_Y] = src.swz[y];
+ dst.swz[SWZ_Z] = src.swz[z];
+ dst.swz[SWZ_W] = src.swz[w];
+ return dst;
+}
+
+static INLINE struct nv40_sreg
+nv40_sr_neg(struct nv40_sreg src)
+{
+ src.negate = !src.negate;
+ return src;
+}
+
+static INLINE struct nv40_sreg
+nv40_sr_abs(struct nv40_sreg src)
+{
+ src.abs = 1;
+ return src;
+}
+
+static INLINE struct nv40_sreg
+nv40_sr_scale(struct nv40_sreg src, int scale)
+{
+ src.dst_scale = scale;
+ return src;
+}
+#endif
+
+#endif
diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c
new file mode 100644
index 0000000000..2eff25aa83
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state.c
@@ -0,0 +1,740 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "draw/draw_context.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv40_context.h"
+#include "nv40_state.h"
+
+static void *
+nv40_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nouveau_grobj *curie = nv40->screen->curie;
+ struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso));
+ struct nouveau_stateobj *so = so_new(16, 0);
+
+ if (cso->blend_enable) {
+ so_method(so, curie, NV40TCL_BLEND_ENABLE, 3);
+ so_data (so, 1);
+ so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) |
+ nvgl_blend_func(cso->rgb_src_factor));
+ so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 |
+ nvgl_blend_func(cso->rgb_dst_factor));
+ so_method(so, curie, NV40TCL_BLEND_EQUATION, 1);
+ so_data (so, nvgl_blend_eqn(cso->alpha_func) << 16 |
+ nvgl_blend_eqn(cso->rgb_func));
+ } else {
+ so_method(so, curie, NV40TCL_BLEND_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, curie, NV40TCL_COLOR_MASK, 1);
+ so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) |
+ ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) |
+ ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) |
+ ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0)));
+
+ if (cso->logicop_enable) {
+ so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2);
+ so_data (so, 1);
+ so_data (so, nvgl_logicop_func(cso->logicop_func));
+ } else {
+ so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, curie, NV40TCL_DITHER_ENABLE, 1);
+ so_data (so, cso->dither ? 1 : 0);
+
+ so_ref(so, &bso->so);
+ bso->pipe = *cso;
+ return (void *)bso;
+}
+
+static void
+nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->blend = hwcso;
+ nv40->dirty |= NV40_NEW_BLEND;
+}
+
+static void
+nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_blend_state *bso = hwcso;
+
+ so_ref(NULL, &bso->so);
+ FREE(bso);
+}
+
+
+static INLINE unsigned
+wrap_mode(unsigned wrap) {
+ unsigned ret;
+
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ ret = NV40TCL_TEX_WRAP_S_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ ret = NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_CLAMP:
+ ret = NV40TCL_TEX_WRAP_S_CLAMP;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP;
+ break;
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ ret = NV40TCL_TEX_WRAP_S_REPEAT;
+ break;
+ }
+
+ return ret >> NV40TCL_TEX_WRAP_S_SHIFT;
+}
+
+static void *
+nv40_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nv40_sampler_state *ps;
+ uint32_t filter = 0;
+
+ ps = MALLOC(sizeof(struct nv40_sampler_state));
+
+ ps->fmt = 0;
+ if (!cso->normalized_coords)
+ ps->fmt |= NV40TCL_TEX_FORMAT_RECT;
+
+ ps->wrap = ((wrap_mode(cso->wrap_s) << NV40TCL_TEX_WRAP_S_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV40TCL_TEX_WRAP_T_SHIFT) |
+ (wrap_mode(cso->wrap_r) << NV40TCL_TEX_WRAP_R_SHIFT));
+
+ ps->en = 0;
+ if (cso->max_anisotropy >= 2.0) {
+ /* no idea, binary driver sets it, works without it.. meh.. */
+ ps->wrap |= (1 << 5);
+
+ if (cso->max_anisotropy >= 16.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X;
+ } else
+ if (cso->max_anisotropy >= 12.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X;
+ } else
+ if (cso->max_anisotropy >= 10.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X;
+ } else
+ if (cso->max_anisotropy >= 8.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X;
+ } else
+ if (cso->max_anisotropy >= 6.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X;
+ } else
+ if (cso->max_anisotropy >= 4.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X;
+ } else {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X;
+ }
+ }
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ filter |= NV40TCL_TEX_FILTER_MAG_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ filter |= NV40TCL_TEX_FILTER_MAG_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV40TCL_TEX_FILTER_MIN_LINEAR;
+ break;
+ }
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV40TCL_TEX_FILTER_MIN_NEAREST;
+ break;
+ }
+ break;
+ }
+
+ ps->filt = filter;
+
+ {
+ float limit;
+
+ limit = CLAMP(cso->lod_bias, -16.0, 15.0);
+ ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;
+
+ limit = CLAMP(cso->max_lod, 0.0, 15.0);
+ ps->en |= (int)(limit * 256.0) << 7;
+
+ limit = CLAMP(cso->min_lod, 0.0, 15.0);
+ ps->en |= (int)(limit * 256.0) << 19;
+ }
+
+
+ if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ switch (cso->compare_func) {
+ case PIPE_FUNC_NEVER:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NEVER;
+ break;
+ case PIPE_FUNC_GREATER:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GREATER;
+ break;
+ case PIPE_FUNC_EQUAL:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_EQUAL;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GEQUAL;
+ break;
+ case PIPE_FUNC_LESS:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LESS;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL;
+ break;
+ case PIPE_FUNC_LEQUAL:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LEQUAL;
+ break;
+ case PIPE_FUNC_ALWAYS:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_ALWAYS;
+ break;
+ default:
+ break;
+ }
+ }
+
+ ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) |
+ (float_to_ubyte(cso->border_color[0]) << 16) |
+ (float_to_ubyte(cso->border_color[1]) << 8) |
+ (float_to_ubyte(cso->border_color[2]) << 0));
+
+ return (void *)ps;
+}
+
+static void
+nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv40->tex_sampler[unit] = sampler[unit];
+ nv40->dirty_samplers |= (1 << unit);
+ }
+
+ for (unit = nr; unit < nv40->nr_samplers; unit++) {
+ nv40->tex_sampler[unit] = NULL;
+ nv40->dirty_samplers |= (1 << unit);
+ }
+
+ nv40->nr_samplers = nr;
+ nv40->dirty |= NV40_NEW_SAMPLER;
+}
+
+static void
+nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nv40_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **miptree)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ pipe_texture_reference((struct pipe_texture **)
+ &nv40->tex_miptree[unit], miptree[unit]);
+ nv40->dirty_samplers |= (1 << unit);
+ }
+
+ for (unit = nr; unit < nv40->nr_textures; unit++) {
+ pipe_texture_reference((struct pipe_texture **)
+ &nv40->tex_miptree[unit], NULL);
+ nv40->dirty_samplers |= (1 << unit);
+ }
+
+ nv40->nr_textures = nr;
+ nv40->dirty |= NV40_NEW_SAMPLER;
+}
+
+static void *
+nv40_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso));
+ struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_grobj *curie = nv40->screen->curie;
+
+ /*XXX: ignored:
+ * light_twoside
+ * point_smooth -nohw
+ * multisample
+ */
+
+ so_method(so, curie, NV40TCL_SHADE_MODEL, 1);
+ so_data (so, cso->flatshade ? NV40TCL_SHADE_MODEL_FLAT :
+ NV40TCL_SHADE_MODEL_SMOOTH);
+
+ so_method(so, curie, NV40TCL_LINE_WIDTH, 2);
+ so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff);
+ so_data (so, cso->line_smooth ? 1 : 0);
+ so_method(so, curie, NV40TCL_LINE_STIPPLE_ENABLE, 2);
+ so_data (so, cso->line_stipple_enable ? 1 : 0);
+ so_data (so, (cso->line_stipple_pattern << 16) |
+ cso->line_stipple_factor);
+
+ so_method(so, curie, NV40TCL_POINT_SIZE, 1);
+ so_data (so, fui(cso->point_size));
+
+ so_method(so, curie, NV40TCL_POLYGON_MODE_FRONT, 6);
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV40TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV40TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV40TCL_CULL_FACE_BACK);
+ break;
+ }
+ so_data(so, NV40TCL_FRONT_FACE_CCW);
+ } else {
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV40TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV40TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV40TCL_CULL_FACE_BACK);
+ break;
+ }
+ so_data(so, NV40TCL_FRONT_FACE_CW);
+ }
+ so_data(so, cso->poly_smooth ? 1 : 0);
+ so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0);
+
+ so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, cso->poly_stipple_enable ? 1 : 0);
+
+ so_method(so, curie, NV40TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if (cso->offset_cw || cso->offset_ccw) {
+ so_method(so, curie, NV40TCL_POLYGON_OFFSET_FACTOR, 2);
+ so_data (so, fui(cso->offset_scale));
+ so_data (so, fui(cso->offset_units * 2));
+ }
+
+ so_method(so, curie, NV40TCL_POINT_SPRITE, 1);
+ if (cso->point_sprite) {
+ unsigned psctl = (1 << 0), i;
+
+ for (i = 0; i < 8; i++) {
+ if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE)
+ psctl |= (1 << (8 + i));
+ }
+
+ so_data(so, psctl);
+ } else {
+ so_data(so, 0);
+ }
+
+ so_ref(so, &rsso->so);
+ rsso->pipe = *cso;
+ return (void *)rsso;
+}
+
+static void
+nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->rasterizer = hwcso;
+ nv40->dirty |= NV40_NEW_RAST;
+ nv40->draw_dirty |= NV40_NEW_RAST;
+}
+
+static void
+nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_rasterizer_state *rsso = hwcso;
+
+ so_ref(NULL, &rsso->so);
+ FREE(rsso);
+}
+
+static void *
+nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
+ struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_grobj *curie = nv40->screen->curie;
+
+ so_method(so, curie, NV40TCL_DEPTH_FUNC, 3);
+ so_data (so, nvgl_comparison_op(cso->depth.func));
+ so_data (so, cso->depth.writemask ? 1 : 0);
+ so_data (so, cso->depth.enabled ? 1 : 0);
+
+ so_method(so, curie, NV40TCL_ALPHA_TEST_ENABLE, 3);
+ so_data (so, cso->alpha.enabled ? 1 : 0);
+ so_data (so, nvgl_comparison_op(cso->alpha.func));
+ so_data (so, float_to_ubyte(cso->alpha.ref_value));
+
+ if (cso->stencil[0].enabled) {
+ so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 8);
+ so_data (so, cso->stencil[0].enabled ? 1 : 0);
+ so_data (so, cso->stencil[0].writemask);
+ so_data (so, nvgl_comparison_op(cso->stencil[0].func));
+ so_data (so, cso->stencil[0].ref_value);
+ so_data (so, cso->stencil[0].valuemask);
+ so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
+ } else {
+ so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ if (cso->stencil[1].enabled) {
+ so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 8);
+ so_data (so, cso->stencil[1].enabled ? 1 : 0);
+ so_data (so, cso->stencil[1].writemask);
+ so_data (so, nvgl_comparison_op(cso->stencil[1].func));
+ so_data (so, cso->stencil[1].ref_value);
+ so_data (so, cso->stencil[1].valuemask);
+ so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
+ } else {
+ so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_ref(so, &zsaso->so);
+ zsaso->pipe = *cso;
+ return (void *)zsaso;
+}
+
+static void
+nv40_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->zsa = hwcso;
+ nv40->dirty |= NV40_NEW_ZSA;
+}
+
+static void
+nv40_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_zsa_state *zsaso = hwcso;
+
+ so_ref(NULL, &zsaso->so);
+ FREE(zsaso);
+}
+
+static void *
+nv40_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_vertex_program *vp;
+
+ vp = CALLOC(1, sizeof(struct nv40_vertex_program));
+ vp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+ vp->draw = draw_create_vertex_shader(nv40->draw, &vp->pipe);
+
+ return (void *)vp;
+}
+
+static void
+nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->vertprog = hwcso;
+ nv40->dirty |= NV40_NEW_VERTPROG;
+ nv40->draw_dirty |= NV40_NEW_VERTPROG;
+}
+
+static void
+nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_vertex_program *vp = hwcso;
+
+ draw_delete_vertex_shader(nv40->draw, vp->draw);
+ nv40_vertprog_destroy(nv40, vp);
+ FREE((void*)vp->pipe.tokens);
+ FREE(vp);
+}
+
+static void *
+nv40_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv40_fragment_program *fp;
+
+ fp = CALLOC(1, sizeof(struct nv40_fragment_program));
+ fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ tgsi_scan_shader(fp->pipe.tokens, &fp->info);
+
+ return (void *)fp;
+}
+
+static void
+nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->fragprog = hwcso;
+ nv40->dirty |= NV40_NEW_FRAGPROG;
+}
+
+static void
+nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_fragment_program *fp = hwcso;
+
+ nv40_fragprog_destroy(nv40, fp);
+ FREE((void*)fp->pipe.tokens);
+ FREE(fp);
+}
+
+static void
+nv40_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->blend_colour = *bcol;
+ nv40->dirty |= NV40_NEW_BCOL;
+}
+
+static void
+nv40_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->clip = *clip;
+ nv40->dirty |= NV40_NEW_UCP;
+ nv40->draw_dirty |= NV40_NEW_UCP;
+}
+
+static void
+nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->constbuf[shader] = buf->buffer;
+ nv40->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float));
+
+ if (shader == PIPE_SHADER_VERTEX) {
+ nv40->dirty |= NV40_NEW_VERTPROG;
+ } else
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ nv40->dirty |= NV40_NEW_FRAGPROG;
+ }
+}
+
+static void
+nv40_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->framebuffer = *fb;
+ nv40->dirty |= NV40_NEW_FB;
+}
+
+static void
+nv40_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ memcpy(nv40->stipple, stipple->stipple, 4 * 32);
+ nv40->dirty |= NV40_NEW_STIPPLE;
+}
+
+static void
+nv40_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->scissor = *s;
+ nv40->dirty |= NV40_NEW_SCISSOR;
+}
+
+static void
+nv40_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->viewport = *vpt;
+ nv40->dirty |= NV40_NEW_VIEWPORT;
+ nv40->draw_dirty |= NV40_NEW_VIEWPORT;
+}
+
+static void
+nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ memcpy(nv40->vtxbuf, vb, sizeof(*vb) * count);
+ nv40->vtxbuf_nr = count;
+
+ nv40->dirty |= NV40_NEW_ARRAYS;
+ nv40->draw_dirty |= NV40_NEW_ARRAYS;
+}
+
+static void
+nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *ve)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ memcpy(nv40->vtxelt, ve, sizeof(*ve) * count);
+ nv40->vtxelt_nr = count;
+
+ nv40->dirty |= NV40_NEW_ARRAYS;
+ nv40->draw_dirty |= NV40_NEW_ARRAYS;
+}
+
+static void
+nv40_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->edgeflags = bitfield;
+ nv40->dirty |= NV40_NEW_ARRAYS;
+ nv40->draw_dirty |= NV40_NEW_ARRAYS;
+}
+
+void
+nv40_init_state_functions(struct nv40_context *nv40)
+{
+ nv40->pipe.create_blend_state = nv40_blend_state_create;
+ nv40->pipe.bind_blend_state = nv40_blend_state_bind;
+ nv40->pipe.delete_blend_state = nv40_blend_state_delete;
+
+ nv40->pipe.create_sampler_state = nv40_sampler_state_create;
+ nv40->pipe.bind_sampler_states = nv40_sampler_state_bind;
+ nv40->pipe.delete_sampler_state = nv40_sampler_state_delete;
+ nv40->pipe.set_sampler_textures = nv40_set_sampler_texture;
+
+ nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create;
+ nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind;
+ nv40->pipe.delete_rasterizer_state = nv40_rasterizer_state_delete;
+
+ nv40->pipe.create_depth_stencil_alpha_state =
+ nv40_depth_stencil_alpha_state_create;
+ nv40->pipe.bind_depth_stencil_alpha_state =
+ nv40_depth_stencil_alpha_state_bind;
+ nv40->pipe.delete_depth_stencil_alpha_state =
+ nv40_depth_stencil_alpha_state_delete;
+
+ nv40->pipe.create_vs_state = nv40_vp_state_create;
+ nv40->pipe.bind_vs_state = nv40_vp_state_bind;
+ nv40->pipe.delete_vs_state = nv40_vp_state_delete;
+
+ nv40->pipe.create_fs_state = nv40_fp_state_create;
+ nv40->pipe.bind_fs_state = nv40_fp_state_bind;
+ nv40->pipe.delete_fs_state = nv40_fp_state_delete;
+
+ nv40->pipe.set_blend_color = nv40_set_blend_color;
+ nv40->pipe.set_clip_state = nv40_set_clip_state;
+ nv40->pipe.set_constant_buffer = nv40_set_constant_buffer;
+ nv40->pipe.set_framebuffer_state = nv40_set_framebuffer_state;
+ nv40->pipe.set_polygon_stipple = nv40_set_polygon_stipple;
+ nv40->pipe.set_scissor_state = nv40_set_scissor_state;
+ nv40->pipe.set_viewport_state = nv40_set_viewport_state;
+
+ nv40->pipe.set_edgeflags = nv40_set_edgeflags;
+ nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers;
+ nv40->pipe.set_vertex_elements = nv40_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h
new file mode 100644
index 0000000000..9c55903ae3
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state.h
@@ -0,0 +1,91 @@
+#ifndef __NV40_STATE_H__
+#define __NV40_STATE_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv40_sampler_state {
+ uint32_t fmt;
+ uint32_t wrap;
+ uint32_t en;
+ uint32_t filt;
+ uint32_t bcol;
+};
+
+struct nv40_vertex_program_exec {
+ uint32_t data[4];
+ boolean has_branch_offset;
+ int const_index;
+};
+
+struct nv40_vertex_program_data {
+ int index; /* immediates == -1 */
+ float value[4];
+};
+
+struct nv40_vertex_program {
+ struct pipe_shader_state pipe;
+
+ struct draw_vertex_shader *draw;
+
+ boolean translated;
+
+ struct pipe_clip_state ucp;
+
+ struct nv40_vertex_program_exec *insns;
+ unsigned nr_insns;
+ struct nv40_vertex_program_data *consts;
+ unsigned nr_consts;
+
+ struct nouveau_resource *exec;
+ unsigned exec_start;
+ struct nouveau_resource *data;
+ unsigned data_start;
+ unsigned data_start_min;
+
+ uint32_t ir;
+ uint32_t or;
+ uint32_t clip_ctrl;
+ struct nouveau_stateobj *so;
+};
+
+struct nv40_fragment_program_data {
+ unsigned offset;
+ unsigned index;
+};
+
+struct nv40_fragment_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+
+ boolean translated;
+ unsigned samplers;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct nv40_fragment_program_data *consts;
+ unsigned nr_consts;
+
+ struct pipe_buffer *buffer;
+
+ uint32_t fp_control;
+ struct nouveau_stateobj *so;
+};
+
+struct nv40_miptree {
+ struct pipe_texture base;
+
+ struct pipe_buffer *buffer;
+ uint total_size;
+
+ struct pipe_texture *shadow_tex;
+ struct pipe_surface *shadow_surface;
+
+ struct {
+ uint pitch;
+ uint *image_offset;
+ } level[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+#endif
diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c
new file mode 100644
index 0000000000..95e6d7394f
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_blend.c
@@ -0,0 +1,40 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_blend_validate(struct nv40_context *nv40)
+{
+ so_ref(nv40->blend->so, &nv40->state.hw[NV40_STATE_BLEND]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_blend = {
+ .validate = nv40_state_blend_validate,
+ .dirty = {
+ .pipe = NV40_NEW_BLEND,
+ .hw = NV40_STATE_BLEND
+ }
+};
+
+static boolean
+nv40_state_blend_colour_validate(struct nv40_context *nv40)
+{
+ struct nouveau_stateobj *so = so_new(2, 0);
+ struct pipe_blend_color *bcol = &nv40->blend_colour;
+
+ so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1);
+ so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) |
+ (float_to_ubyte(bcol->color[0]) << 16) |
+ (float_to_ubyte(bcol->color[1]) << 8) |
+ (float_to_ubyte(bcol->color[2]) << 0)));
+
+ so_ref(so, &nv40->state.hw[NV40_STATE_BCOL]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_blend_colour = {
+ .validate = nv40_state_blend_colour_validate,
+ .dirty = {
+ .pipe = NV40_NEW_BCOL,
+ .hw = NV40_STATE_BCOL
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c
new file mode 100644
index 0000000000..ce859def10
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_emit.c
@@ -0,0 +1,184 @@
+#include "nv40_context.h"
+#include "nv40_state.h"
+#include "draw/draw_context.h"
+
+static struct nv40_state_entry *render_states[] = {
+ &nv40_state_framebuffer,
+ &nv40_state_rasterizer,
+ &nv40_state_scissor,
+ &nv40_state_stipple,
+ &nv40_state_fragprog,
+ &nv40_state_fragtex,
+ &nv40_state_vertprog,
+ &nv40_state_blend,
+ &nv40_state_blend_colour,
+ &nv40_state_zsa,
+ &nv40_state_viewport,
+ &nv40_state_vbo,
+ NULL
+};
+
+static struct nv40_state_entry *swtnl_states[] = {
+ &nv40_state_framebuffer,
+ &nv40_state_rasterizer,
+ &nv40_state_scissor,
+ &nv40_state_stipple,
+ &nv40_state_fragprog,
+ &nv40_state_fragtex,
+ &nv40_state_vertprog,
+ &nv40_state_blend,
+ &nv40_state_blend_colour,
+ &nv40_state_zsa,
+ &nv40_state_viewport,
+ &nv40_state_vtxfmt,
+ NULL
+};
+
+static void
+nv40_state_do_validate(struct nv40_context *nv40,
+ struct nv40_state_entry **states)
+{
+ const struct pipe_framebuffer_state *fb = &nv40->framebuffer;
+ unsigned i;
+
+ for (i = 0; i < fb->nr_cbufs; i++)
+ fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED;
+ if (fb->zsbuf)
+ fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED;
+
+ while (*states) {
+ struct nv40_state_entry *e = *states;
+
+ if (nv40->dirty & e->dirty.pipe) {
+ if (e->validate(nv40))
+ nv40->state.dirty |= (1ULL << e->dirty.hw);
+ }
+
+ states++;
+ }
+ nv40->dirty = 0;
+}
+
+void
+nv40_state_emit(struct nv40_context *nv40)
+{
+ struct nv40_state *state = &nv40->state;
+ struct nv40_screen *screen = nv40->screen;
+ unsigned i, samplers;
+ uint64_t states;
+
+ if (nv40->pctx_id != screen->cur_pctx) {
+ for (i = 0; i < NV40_STATE_MAX; i++) {
+ if (state->hw[i] && screen->state[i] != state->hw[i])
+ state->dirty |= (1ULL << i);
+ }
+
+ screen->cur_pctx = nv40->pctx_id;
+ }
+
+ for (i = 0, states = state->dirty; states; i++) {
+ if (!(states & (1ULL << i)))
+ continue;
+ so_ref (state->hw[i], &nv40->screen->state[i]);
+ if (state->hw[i])
+ so_emit(nv40->nvws, nv40->screen->state[i]);
+ states &= ~(1ULL << i);
+ }
+
+ if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) |
+ (1ULL << NV40_STATE_FRAGTEX0))) {
+ BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
+ OUT_RING (2);
+ BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
+ OUT_RING (1);
+ }
+
+ state->dirty = 0;
+
+ so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FB]);
+ for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
+ if (!(samplers & (1 << i)))
+ continue;
+ so_emit_reloc_markers(nv40->nvws,
+ state->hw[NV40_STATE_FRAGTEX0+i]);
+ samplers &= ~(1ULL << i);
+ }
+ so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FRAGPROG]);
+ if (state->hw[NV40_STATE_VTXBUF] && nv40->render_mode == HW)
+ so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_VTXBUF]);
+}
+
+boolean
+nv40_state_validate(struct nv40_context *nv40)
+{
+ boolean was_sw = nv40->fallback_swtnl ? TRUE : FALSE;
+
+ if (nv40->render_mode != HW) {
+ /* Don't even bother trying to go back to hw if none
+ * of the states that caused swtnl previously have changed.
+ */
+ if ((nv40->fallback_swtnl & nv40->dirty)
+ != nv40->fallback_swtnl)
+ return FALSE;
+
+ /* Attempt to go to hwtnl again */
+ nv40->pipe.flush(&nv40->pipe, 0, NULL);
+ nv40->dirty |= (NV40_NEW_VIEWPORT |
+ NV40_NEW_VERTPROG |
+ NV40_NEW_ARRAYS);
+ nv40->render_mode = HW;
+ }
+
+ nv40_state_do_validate(nv40, render_states);
+ if (nv40->fallback_swtnl || nv40->fallback_swrast)
+ return FALSE;
+
+ if (was_sw)
+ NOUVEAU_ERR("swtnl->hw\n");
+
+ return TRUE;
+}
+
+boolean
+nv40_state_validate_swtnl(struct nv40_context *nv40)
+{
+ struct draw_context *draw = nv40->draw;
+
+ /* Setup for swtnl */
+ if (nv40->render_mode == HW) {
+ NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl);
+ nv40->pipe.flush(&nv40->pipe, 0, NULL);
+ nv40->dirty |= (NV40_NEW_VIEWPORT |
+ NV40_NEW_VERTPROG |
+ NV40_NEW_ARRAYS);
+ nv40->render_mode = SWTNL;
+ }
+
+ if (nv40->draw_dirty & NV40_NEW_VERTPROG)
+ draw_bind_vertex_shader(draw, nv40->vertprog->draw);
+
+ if (nv40->draw_dirty & NV40_NEW_RAST)
+ draw_set_rasterizer_state(draw, &nv40->rasterizer->pipe);
+
+ if (nv40->draw_dirty & NV40_NEW_UCP)
+ draw_set_clip_state(draw, &nv40->clip);
+
+ if (nv40->draw_dirty & NV40_NEW_VIEWPORT)
+ draw_set_viewport_state(draw, &nv40->viewport);
+
+ if (nv40->draw_dirty & NV40_NEW_ARRAYS) {
+ draw_set_edgeflags(draw, nv40->edgeflags);
+ draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf);
+ draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt);
+ }
+
+ nv40_state_do_validate(nv40, swtnl_states);
+ if (nv40->fallback_swrast) {
+ NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nv40->fallback_swrast);
+ return FALSE;
+ }
+
+ nv40->draw_dirty = 0;
+ return TRUE;
+}
+
diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c
new file mode 100644
index 0000000000..454abad31f
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_fb.c
@@ -0,0 +1,162 @@
+#include "nv40_context.h"
+#include "nouveau/nouveau_util.h"
+
+static struct pipe_buffer *
+nv40_surface_buffer(struct pipe_surface *surface)
+{
+ struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture;
+ return mt->buffer;
+}
+
+static boolean
+nv40_state_framebuffer_validate(struct nv40_context *nv40)
+{
+ struct pipe_framebuffer_state *fb = &nv40->framebuffer;
+ struct pipe_surface *rt[4], *zeta;
+ uint32_t rt_enable, rt_format;
+ int i, colour_format = 0, zeta_format = 0;
+ struct nouveau_stateobj *so = so_new(64, 10);
+ unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
+ unsigned w = fb->width;
+ unsigned h = fb->height;
+
+ rt_enable = 0;
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ if (colour_format) {
+ assert(colour_format == fb->cbufs[i]->format);
+ } else {
+ colour_format = fb->cbufs[i]->format;
+ rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i);
+ rt[i] = fb->cbufs[i];
+ }
+ }
+
+ if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 |
+ NV40TCL_RT_ENABLE_COLOR3))
+ rt_enable |= NV40TCL_RT_ENABLE_MRT;
+
+ if (fb->zsbuf) {
+ zeta_format = fb->zsbuf->format;
+ zeta = fb->zsbuf;
+ }
+
+ if (!(rt[0]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
+ for (i = 1; i < fb->nr_cbufs; i++)
+ assert(!(rt[i]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR));
+
+ rt_format = NV40TCL_RT_FORMAT_TYPE_SWIZZLED |
+ log2i(fb->width) << NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT |
+ log2i(fb->height) << NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT;
+ }
+ else
+ rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR;
+
+ switch (colour_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case 0:
+ rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8;
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ rt_format |= NV40TCL_RT_FORMAT_COLOR_R5G6B5;
+ break;
+ default:
+ assert(0);
+ }
+
+ switch (zeta_format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ rt_format |= NV40TCL_RT_FORMAT_ZETA_Z16;
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case 0:
+ rt_format |= NV40TCL_RT_FORMAT_ZETA_Z24S8;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) {
+ so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR0, 1);
+ so_reloc (so, nv40_surface_buffer(rt[0]), 0, rt_flags | NOUVEAU_BO_OR,
+ nv40->nvws->channel->vram->handle,
+ nv40->nvws->channel->gart->handle);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR0_PITCH, 2);
+ so_data (so, rt[0]->stride);
+ so_reloc (so, nv40_surface_buffer(rt[0]), rt[0]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ }
+
+ if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) {
+ so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR1, 1);
+ so_reloc (so, nv40_surface_buffer(rt[1]), 0, rt_flags | NOUVEAU_BO_OR,
+ nv40->nvws->channel->vram->handle,
+ nv40->nvws->channel->gart->handle);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR1_OFFSET, 2);
+ so_reloc (so, nv40_surface_buffer(rt[1]), rt[1]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, rt[1]->stride);
+ }
+
+ if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
+ so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR2, 1);
+ so_reloc (so, nv40_surface_buffer(rt[2]), 0, rt_flags | NOUVEAU_BO_OR,
+ nv40->nvws->channel->vram->handle,
+ nv40->nvws->channel->gart->handle);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR2_OFFSET, 1);
+ so_reloc (so, nv40_surface_buffer(rt[2]), rt[2]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR2_PITCH, 1);
+ so_data (so, rt[2]->stride);
+ }
+
+ if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
+ so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR3, 1);
+ so_reloc (so, nv40_surface_buffer(rt[3]), 0, rt_flags | NOUVEAU_BO_OR,
+ nv40->nvws->channel->vram->handle,
+ nv40->nvws->channel->gart->handle);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR3_OFFSET, 1);
+ so_reloc (so, nv40_surface_buffer(rt[3]), rt[3]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR3_PITCH, 1);
+ so_data (so, rt[3]->stride);
+ }
+
+ if (zeta_format) {
+ so_method(so, nv40->screen->curie, NV40TCL_DMA_ZETA, 1);
+ so_reloc (so, nv40_surface_buffer(zeta), 0, rt_flags | NOUVEAU_BO_OR,
+ nv40->nvws->channel->vram->handle,
+ nv40->nvws->channel->gart->handle);
+ so_method(so, nv40->screen->curie, NV40TCL_ZETA_OFFSET, 1);
+ so_reloc (so, nv40_surface_buffer(zeta), zeta->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, nv40->screen->curie, NV40TCL_ZETA_PITCH, 1);
+ so_data (so, zeta->stride);
+ }
+
+ so_method(so, nv40->screen->curie, NV40TCL_RT_ENABLE, 1);
+ so_data (so, rt_enable);
+ so_method(so, nv40->screen->curie, NV40TCL_RT_HORIZ, 3);
+ so_data (so, (w << 16) | 0);
+ so_data (so, (h << 16) | 0);
+ so_data (so, rt_format);
+ so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_HORIZ, 2);
+ so_data (so, (w << 16) | 0);
+ so_data (so, (h << 16) | 0);
+ so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+ so_data (so, ((w - 1) << 16) | 0);
+ so_data (so, ((h - 1) << 16) | 0);
+ so_method(so, nv40->screen->curie, 0x1d88, 1);
+ so_data (so, (1 << 12) | h);
+
+ so_ref(so, &nv40->state.hw[NV40_STATE_FB]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_framebuffer = {
+ .validate = nv40_state_framebuffer_validate,
+ .dirty = {
+ .pipe = NV40_NEW_FB,
+ .hw = NV40_STATE_FB
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_rasterizer.c b/src/gallium/drivers/nv40/nv40_state_rasterizer.c
new file mode 100644
index 0000000000..9ecda5990f
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_rasterizer.c
@@ -0,0 +1,17 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_rasterizer_validate(struct nv40_context *nv40)
+{
+ so_ref(nv40->rasterizer->so,
+ &nv40->state.hw[NV40_STATE_RAST]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_rasterizer = {
+ .validate = nv40_state_rasterizer_validate,
+ .dirty = {
+ .pipe = NV40_NEW_RAST,
+ .hw = NV40_STATE_RAST
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c
new file mode 100644
index 0000000000..285239ef41
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_scissor.c
@@ -0,0 +1,35 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_scissor_validate(struct nv40_context *nv40)
+{
+ struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe;
+ struct pipe_scissor_state *s = &nv40->scissor;
+ struct nouveau_stateobj *so;
+
+ if (nv40->state.hw[NV40_STATE_SCISSOR] &&
+ (rast->scissor == 0 && nv40->state.scissor_enabled == 0))
+ return FALSE;
+ nv40->state.scissor_enabled = rast->scissor;
+
+ so = so_new(3, 0);
+ so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2);
+ if (nv40->state.scissor_enabled) {
+ so_data (so, ((s->maxx - s->minx) << 16) | s->minx);
+ so_data (so, ((s->maxy - s->miny) << 16) | s->miny);
+ } else {
+ so_data (so, 4096 << 16);
+ so_data (so, 4096 << 16);
+ }
+
+ so_ref(so, &nv40->state.hw[NV40_STATE_SCISSOR]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_scissor = {
+ .validate = nv40_state_scissor_validate,
+ .dirty = {
+ .pipe = NV40_NEW_SCISSOR | NV40_NEW_RAST,
+ .hw = NV40_STATE_SCISSOR
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c
new file mode 100644
index 0000000000..b51024ad9b
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_stipple.c
@@ -0,0 +1,39 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_stipple_validate(struct nv40_context *nv40)
+{
+ struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe;
+ struct nouveau_grobj *curie = nv40->screen->curie;
+ struct nouveau_stateobj *so;
+
+ if (nv40->state.hw[NV40_STATE_STIPPLE] &&
+ (rast->poly_stipple_enable == 0 && nv40->state.stipple_enabled == 0))
+ return FALSE;
+
+ if (rast->poly_stipple_enable) {
+ unsigned i;
+
+ so = so_new(35, 0);
+ so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, 1);
+ so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32);
+ for (i = 0; i < 32; i++)
+ so_data(so, nv40->stipple[i]);
+ } else {
+ so = so_new(2, 0);
+ so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_ref(so, &nv40->state.hw[NV40_STATE_STIPPLE]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_stipple = {
+ .validate = nv40_state_stipple_validate,
+ .dirty = {
+ .pipe = NV40_NEW_STIPPLE | NV40_NEW_RAST,
+ .hw = NV40_STATE_STIPPLE,
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c
new file mode 100644
index 0000000000..869a55b405
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_viewport.c
@@ -0,0 +1,67 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_viewport_validate(struct nv40_context *nv40)
+{
+ struct pipe_viewport_state *vpt = &nv40->viewport;
+ struct nouveau_stateobj *so;
+ unsigned bypass;
+
+ if (nv40->render_mode == HW && !nv40->rasterizer->pipe.bypass_clipping)
+ bypass = 0;
+ else
+ bypass = 1;
+
+ if (nv40->state.hw[NV40_STATE_VIEWPORT] &&
+ (bypass || !(nv40->dirty & NV40_NEW_VIEWPORT)) &&
+ nv40->state.viewport_bypass == bypass)
+ return FALSE;
+ nv40->state.viewport_bypass = bypass;
+
+ so = so_new(11, 0);
+ if (!bypass) {
+ so_method(so, nv40->screen->curie,
+ NV40TCL_VIEWPORT_TRANSLATE_X, 8);
+ so_data (so, fui(vpt->translate[0]));
+ so_data (so, fui(vpt->translate[1]));
+ so_data (so, fui(vpt->translate[2]));
+ so_data (so, fui(vpt->translate[3]));
+ so_data (so, fui(vpt->scale[0]));
+ so_data (so, fui(vpt->scale[1]));
+ so_data (so, fui(vpt->scale[2]));
+ so_data (so, fui(vpt->scale[3]));
+ so_method(so, nv40->screen->curie, 0x1d78, 1);
+ so_data (so, 1);
+ } else {
+ so_method(so, nv40->screen->curie,
+ NV40TCL_VIEWPORT_TRANSLATE_X, 8);
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(0.0));
+ /* Not entirely certain what this is yet. The DDX uses this
+ * value also as it fixes rendering when you pass
+ * pre-transformed vertices to the GPU. My best gusss is that
+ * this bypasses some culling/clipping stage. Might be worth
+ * noting that points/lines are uneffected by whatever this
+ * value fixes, only filled polygons are effected.
+ */
+ so_method(so, nv40->screen->curie, 0x1d78, 1);
+ so_data (so, 0x110);
+ }
+
+ so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_viewport = {
+ .validate = nv40_state_viewport_validate,
+ .dirty = {
+ .pipe = NV40_NEW_VIEWPORT | NV40_NEW_RAST,
+ .hw = NV40_STATE_VIEWPORT
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_zsa.c b/src/gallium/drivers/nv40/nv40_state_zsa.c
new file mode 100644
index 0000000000..fb760677c8
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_zsa.c
@@ -0,0 +1,17 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_zsa_validate(struct nv40_context *nv40)
+{
+ so_ref(nv40->zsa->so,
+ &nv40->state.hw[NV40_STATE_ZSA]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_zsa = {
+ .validate = nv40_state_zsa_validate,
+ .dirty = {
+ .pipe = NV40_NEW_ZSA,
+ .hw = NV40_STATE_ZSA
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c
new file mode 100644
index 0000000000..c4a5fb20d9
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_surface.c
@@ -0,0 +1,72 @@
+
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nv40_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/internal/p_winsys_screen.h"
+#include "pipe/p_inlines.h"
+#include "util/u_tile.h"
+
+static void
+nv40_surface_copy(struct pipe_context *pipe, boolean do_flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv04_surface_2d *eng2d = nv40->screen->eng2d;
+
+ if (do_flip) {
+ desty += height;
+ while (height--) {
+ eng2d->copy(eng2d, dest, destx, desty--, src,
+ srcx, srcy++, width, 1);
+ }
+ return;
+ }
+
+ eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height);
+}
+
+static void
+nv40_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv04_surface_2d *eng2d = nv40->screen->eng2d;
+
+ eng2d->fill(eng2d, dest, destx, desty, width, height, value);
+}
+
+void
+nv40_init_surface_functions(struct nv40_context *nv40)
+{
+ nv40->pipe.surface_copy = nv40_surface_copy;
+ nv40->pipe.surface_fill = nv40_surface_fill;
+}
diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c
new file mode 100644
index 0000000000..8f1834628f
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_vbo.c
@@ -0,0 +1,555 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv40_context.h"
+#include "nv40_state.h"
+
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_pushbuf.h"
+#include "nouveau/nouveau_util.h"
+
+#define FORCE_SWTNL 0
+
+static INLINE int
+nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
+{
+ switch (pipe) {
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ *fmt = NV40TCL_VTXFMT_TYPE_FLOAT;
+ break;
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ *fmt = NV40TCL_VTXFMT_TYPE_UBYTE;
+ break;
+ case PIPE_FORMAT_R16_SSCALED:
+ case PIPE_FORMAT_R16G16_SSCALED:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ *fmt = NV40TCL_VTXFMT_TYPE_USHORT;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe));
+ return 1;
+ }
+
+ switch (pipe) {
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R16_SSCALED:
+ *ncomp = 1;
+ break;
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R16G16_SSCALED:
+ *ncomp = 2;
+ break;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ *ncomp = 3;
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ *ncomp = 4;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe));
+ return 1;
+ }
+
+ return 0;
+}
+
+static boolean
+nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib,
+ unsigned ib_size)
+{
+ struct pipe_screen *pscreen = &nv40->screen->pipe;
+ unsigned type;
+
+ if (!ib) {
+ nv40->idxbuf = NULL;
+ nv40->idxbuf_format = 0xdeadbeef;
+ return FALSE;
+ }
+
+ if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1)
+ return FALSE;
+
+ switch (ib_size) {
+ case 2:
+ type = NV40TCL_IDXBUF_FORMAT_TYPE_U16;
+ break;
+ case 4:
+ type = NV40TCL_IDXBUF_FORMAT_TYPE_U32;
+ break;
+ default:
+ return FALSE;
+ }
+
+ if (ib != nv40->idxbuf ||
+ type != nv40->idxbuf_format) {
+ nv40->dirty |= NV40_NEW_ARRAYS;
+ nv40->idxbuf = ib;
+ nv40->idxbuf_format = type;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so,
+ int attrib, struct pipe_vertex_element *ve,
+ struct pipe_vertex_buffer *vb)
+{
+ struct pipe_winsys *ws = nv40->pipe.winsys;
+ struct nouveau_grobj *curie = nv40->screen->curie;
+ unsigned type, ncomp;
+ void *map;
+
+ if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp))
+ return FALSE;
+
+ map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
+ map += vb->buffer_offset + ve->src_offset;
+
+ switch (type) {
+ case NV40TCL_VTXFMT_TYPE_FLOAT:
+ {
+ float *v = map;
+
+ switch (ncomp) {
+ case 4:
+ so_method(so, curie, NV40TCL_VTX_ATTR_4F_X(attrib), 4);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ so_data (so, fui(v[2]));
+ so_data (so, fui(v[3]));
+ break;
+ case 3:
+ so_method(so, curie, NV40TCL_VTX_ATTR_3F_X(attrib), 3);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ so_data (so, fui(v[2]));
+ break;
+ case 2:
+ so_method(so, curie, NV40TCL_VTX_ATTR_2F_X(attrib), 2);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ break;
+ case 1:
+ so_method(so, curie, NV40TCL_VTX_ATTR_1F(attrib), 1);
+ so_data (so, fui(v[0]));
+ break;
+ default:
+ ws->buffer_unmap(ws, vb->buffer);
+ return FALSE;
+ }
+ }
+ break;
+ default:
+ ws->buffer_unmap(ws, vb->buffer);
+ return FALSE;
+ }
+
+ ws->buffer_unmap(ws, vb->buffer);
+
+ return TRUE;
+}
+
+boolean
+nv40_draw_arrays(struct pipe_context *pipe,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nouveau_channel *chan = nv40->nvws->channel;
+ unsigned restart;
+
+ nv40_vbo_set_idxbuf(nv40, NULL, 0);
+ if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
+ return nv40_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);
+ }
+
+ while (count) {
+ unsigned vc, nr;
+
+ nv40_state_emit(nv40);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
+ mode, start, count, &restart);
+ if (!vc) {
+ FIRE_RING(NULL);
+ continue;
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ nr = (vc & 0xff);
+ if (nr) {
+ BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1);
+ OUT_RING (((nr - 1) << 24) | start);
+ start += nr;
+ }
+
+ nr = vc >> 8;
+ while (nr) {
+ unsigned push = nr > 2047 ? 2047 : nr;
+
+ nr -= push;
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push);
+ while (push--) {
+ OUT_RING(((0x100 - 1) << 24) | start);
+ start += 0x100;
+ }
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (0);
+
+ count -= vc;
+ start = restart;
+ }
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+static INLINE void
+nv40_draw_elements_u08(struct nv40_context *nv40, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv40->nvws->channel;
+
+ while (count) {
+ uint8_t *elts = (uint8_t *)ib + start;
+ unsigned vc, push, restart;
+
+ nv40_state_emit(nv40);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ if (vc & 1) {
+ BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+
+ push = MIN2(vc, 2047 * 2);
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((elts[i+1] << 16) | elts[i]);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static INLINE void
+nv40_draw_elements_u16(struct nv40_context *nv40, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv40->nvws->channel;
+
+ while (count) {
+ uint16_t *elts = (uint16_t *)ib + start;
+ unsigned vc, push, restart;
+
+ nv40_state_emit(nv40);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ if (vc & 1) {
+ BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+
+ push = MIN2(vc, 2047 * 2);
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((elts[i+1] << 16) | elts[i]);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static INLINE void
+nv40_draw_elements_u32(struct nv40_context *nv40, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv40->nvws->channel;
+
+ while (count) {
+ uint32_t *elts = (uint32_t *)ib + start;
+ unsigned vc, push, restart;
+
+ nv40_state_emit(nv40);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ while (vc) {
+ push = MIN2(vc, 2047);
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push);
+ OUT_RINGp (elts, push);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static boolean
+nv40_draw_elements_inline(struct pipe_context *pipe,
+ struct pipe_buffer *ib, unsigned ib_size,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+ void *map;
+
+ map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ);
+ if (!ib) {
+ NOUVEAU_ERR("failed mapping ib\n");
+ return FALSE;
+ }
+
+ switch (ib_size) {
+ case 1:
+ nv40_draw_elements_u08(nv40, map, mode, start, count);
+ break;
+ case 2:
+ nv40_draw_elements_u16(nv40, map, mode, start, count);
+ break;
+ case 4:
+ nv40_draw_elements_u32(nv40, map, mode, start, count);
+ break;
+ default:
+ NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
+ break;
+ }
+
+ ws->buffer_unmap(ws, ib);
+ return TRUE;
+}
+
+static boolean
+nv40_draw_elements_vbo(struct pipe_context *pipe,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nouveau_channel *chan = nv40->nvws->channel;
+ unsigned restart;
+
+ while (count) {
+ unsigned nr, vc;
+
+ nv40_state_emit(nv40);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
+ mode, start, count, &restart);
+ if (!vc) {
+ FIRE_RING(NULL);
+ continue;
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ nr = (vc & 0xff);
+ if (nr) {
+ BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1);
+ OUT_RING (((nr - 1) << 24) | start);
+ start += nr;
+ }
+
+ nr = vc >> 8;
+ while (nr) {
+ unsigned push = nr > 2047 ? 2047 : nr;
+
+ nr -= push;
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push);
+ while (push--) {
+ OUT_RING(((0x100 - 1) << 24) | start);
+ start += 0x100;
+ }
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (0);
+
+ count -= vc;
+ start = restart;
+ }
+
+ return TRUE;
+}
+
+boolean
+nv40_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer, unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ boolean idxbuf;
+
+ idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize);
+ if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
+ return nv40_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);
+ }
+
+ if (idxbuf) {
+ nv40_draw_elements_vbo(pipe, mode, start, count);
+ } else {
+ nv40_draw_elements_inline(pipe, indexBuffer, indexSize,
+ mode, start, count);
+ }
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+static boolean
+nv40_vbo_validate(struct nv40_context *nv40)
+{
+ struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL;
+ struct nouveau_grobj *curie = nv40->screen->curie;
+ struct pipe_buffer *ib = nv40->idxbuf;
+ unsigned ib_format = nv40->idxbuf_format;
+ unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+ int hw;
+
+ if (nv40->edgeflags) {
+ nv40->fallback_swtnl |= NV40_NEW_ARRAYS;
+ return FALSE;
+ }
+
+ vtxbuf = so_new(20, 18);
+ so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr);
+ vtxfmt = so_new(17, 0);
+ so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr);
+
+ for (hw = 0; hw < nv40->vtxelt_nr; hw++) {
+ struct pipe_vertex_element *ve;
+ struct pipe_vertex_buffer *vb;
+ unsigned type, ncomp;
+
+ ve = &nv40->vtxelt[hw];
+ vb = &nv40->vtxbuf[ve->vertex_buffer_index];
+
+ if (!vb->stride) {
+ if (!sattr)
+ sattr = so_new(16 * 5, 0);
+
+ if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) {
+ so_data(vtxbuf, 0);
+ so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT);
+ continue;
+ }
+ }
+
+ if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
+ nv40->fallback_swtnl |= NV40_NEW_ARRAYS;
+ so_ref(NULL, &vtxbuf);
+ so_ref(NULL, &vtxfmt);
+ return FALSE;
+ }
+
+ so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset,
+ vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ 0, NV40TCL_VTXBUF_ADDRESS_DMA1);
+ so_data (vtxfmt, ((vb->stride << NV40TCL_VTXFMT_STRIDE_SHIFT) |
+ (ncomp << NV40TCL_VTXFMT_SIZE_SHIFT) | type));
+ }
+
+ if (ib) {
+ so_method(vtxbuf, curie, NV40TCL_IDXBUF_ADDRESS, 2);
+ so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR,
+ 0, NV40TCL_IDXBUF_FORMAT_DMA1);
+ }
+
+ so_method(vtxbuf, curie, 0x1710, 1);
+ so_data (vtxbuf, 0);
+
+ so_ref(vtxbuf, &nv40->state.hw[NV40_STATE_VTXBUF]);
+ nv40->state.dirty |= (1ULL << NV40_STATE_VTXBUF);
+ so_ref(vtxfmt, &nv40->state.hw[NV40_STATE_VTXFMT]);
+ nv40->state.dirty |= (1ULL << NV40_STATE_VTXFMT);
+ so_ref(sattr, &nv40->state.hw[NV40_STATE_VTXATTR]);
+ nv40->state.dirty |= (1ULL << NV40_STATE_VTXATTR);
+ return FALSE;
+}
+
+struct nv40_state_entry nv40_state_vbo = {
+ .validate = nv40_vbo_validate,
+ .dirty = {
+ .pipe = NV40_NEW_ARRAYS,
+ .hw = 0,
+ }
+};
+
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
new file mode 100644
index 0000000000..0862386638
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -0,0 +1,1070 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv40_context.h"
+#include "nv40_state.h"
+
+/* TODO (at least...):
+ * 1. Indexed consts + ARL
+ * 3. NV_vp11, NV_vp2, NV_vp3 features
+ * - extra arith opcodes
+ * - branching
+ * - texture sampling
+ * - indexed attribs
+ * - indexed results
+ * 4. bugs
+ */
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 8
+#define MASK_Y 4
+#define MASK_Z 2
+#define MASK_W 1
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE 0
+#define DEF_CTEST 0
+#include "nv40_shader.h"
+
+#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv40_sr_neg((s))
+#define abs(s) nv40_sr_abs((s))
+
+#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n))
+
+struct nv40_vpc {
+ struct nv40_vertex_program *vp;
+
+ struct nv40_vertex_program_exec *vpi;
+
+ unsigned r_temps;
+ unsigned r_temps_discard;
+ struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
+ struct nv40_sreg *r_address;
+ struct nv40_sreg *r_temp;
+
+ struct nv40_sreg *imm;
+ unsigned nr_imm;
+
+ unsigned hpos_idx;
+};
+
+static struct nv40_sreg
+temp(struct nv40_vpc *vpc)
+{
+ int idx = ffs(~vpc->r_temps) - 1;
+
+ if (idx < 0) {
+ NOUVEAU_ERR("out of temps!!\n");
+ assert(0);
+ return nv40_sr(NV40SR_TEMP, 0);
+ }
+
+ vpc->r_temps |= (1 << idx);
+ vpc->r_temps_discard |= (1 << idx);
+ return nv40_sr(NV40SR_TEMP, idx);
+}
+
+static INLINE void
+release_temps(struct nv40_vpc *vpc)
+{
+ vpc->r_temps &= ~vpc->r_temps_discard;
+ vpc->r_temps_discard = 0;
+}
+
+static struct nv40_sreg
+constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w)
+{
+ struct nv40_vertex_program *vp = vpc->vp;
+ struct nv40_vertex_program_data *vpd;
+ int idx;
+
+ if (pipe >= 0) {
+ for (idx = 0; idx < vp->nr_consts; idx++) {
+ if (vp->consts[idx].index == pipe)
+ return nv40_sr(NV40SR_CONST, idx);
+ }
+ }
+
+ idx = vp->nr_consts++;
+ vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
+ vpd = &vp->consts[idx];
+
+ vpd->index = pipe;
+ vpd->value[0] = x;
+ vpd->value[1] = y;
+ vpd->value[2] = z;
+ vpd->value[3] = w;
+ return nv40_sr(NV40SR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2))
+
+static void
+emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src)
+{
+ struct nv40_vertex_program *vp = vpc->vp;
+ uint32_t sr = 0;
+
+ switch (src.type) {
+ case NV40SR_TEMP:
+ sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT);
+ sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT);
+ break;
+ case NV40SR_INPUT:
+ sr |= (NV40_VP_SRC_REG_TYPE_INPUT <<
+ NV40_VP_SRC_REG_TYPE_SHIFT);
+ vp->ir |= (1 << src.index);
+ hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT);
+ break;
+ case NV40SR_CONST:
+ sr |= (NV40_VP_SRC_REG_TYPE_CONST <<
+ NV40_VP_SRC_REG_TYPE_SHIFT);
+ assert(vpc->vpi->const_index == -1 ||
+ vpc->vpi->const_index == src.index);
+ vpc->vpi->const_index = src.index;
+ break;
+ case NV40SR_NONE:
+ sr |= (NV40_VP_SRC_REG_TYPE_INPUT <<
+ NV40_VP_SRC_REG_TYPE_SHIFT);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (src.negate)
+ sr |= NV40_VP_SRC_NEGATE;
+
+ if (src.abs)
+ hw[0] |= (1 << (21 + pos));
+
+ sr |= ((src.swz[0] << NV40_VP_SRC_SWZ_X_SHIFT) |
+ (src.swz[1] << NV40_VP_SRC_SWZ_Y_SHIFT) |
+ (src.swz[2] << NV40_VP_SRC_SWZ_Z_SHIFT) |
+ (src.swz[3] << NV40_VP_SRC_SWZ_W_SHIFT));
+
+ switch (pos) {
+ case 0:
+ hw[1] |= ((sr & NV40_VP_SRC0_HIGH_MASK) >>
+ NV40_VP_SRC0_HIGH_SHIFT) << NV40_VP_INST_SRC0H_SHIFT;
+ hw[2] |= (sr & NV40_VP_SRC0_LOW_MASK) <<
+ NV40_VP_INST_SRC0L_SHIFT;
+ break;
+ case 1:
+ hw[2] |= sr << NV40_VP_INST_SRC1_SHIFT;
+ break;
+ case 2:
+ hw[2] |= ((sr & NV40_VP_SRC2_HIGH_MASK) >>
+ NV40_VP_SRC2_HIGH_SHIFT) << NV40_VP_INST_SRC2H_SHIFT;
+ hw[3] |= (sr & NV40_VP_SRC2_LOW_MASK) <<
+ NV40_VP_INST_SRC2L_SHIFT;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)
+{
+ struct nv40_vertex_program *vp = vpc->vp;
+
+ switch (dst.type) {
+ case NV40SR_TEMP:
+ hw[3] |= NV40_VP_INST_DEST_MASK;
+ if (slot == 0) {
+ hw[0] |= (dst.index <<
+ NV40_VP_INST_VEC_DEST_TEMP_SHIFT);
+ } else {
+ hw[3] |= (dst.index <<
+ NV40_VP_INST_SCA_DEST_TEMP_SHIFT);
+ }
+ break;
+ case NV40SR_OUTPUT:
+ switch (dst.index) {
+ case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
+ case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
+ case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
+ case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
+ case NV40_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break;
+ case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
+ case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
+ case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
+ case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
+ case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
+ case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
+ case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
+ case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
+ case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
+ case NV40_VP_INST_DEST_CLIP(0):
+ vp->or |= (1 << 6);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE0;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(1):
+ vp->or |= (1 << 7);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE1;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(2):
+ vp->or |= (1 << 8);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE2;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(3):
+ vp->or |= (1 << 9);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE3;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
+ case NV40_VP_INST_DEST_CLIP(4):
+ vp->or |= (1 << 10);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE4;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
+ case NV40_VP_INST_DEST_CLIP(5):
+ vp->or |= (1 << 11);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE5;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
+ default:
+ break;
+ }
+
+ hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT);
+ if (slot == 0) {
+ hw[0] |= NV40_VP_INST_VEC_RESULT;
+ hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);
+ } else {
+ hw[3] |= NV40_VP_INST_SCA_RESULT;
+ hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op,
+ struct nv40_sreg dst, int mask,
+ struct nv40_sreg s0, struct nv40_sreg s1,
+ struct nv40_sreg s2)
+{
+ struct nv40_vertex_program *vp = vpc->vp;
+ uint32_t *hw;
+
+ vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
+ vpc->vpi = &vp->insns[vp->nr_insns - 1];
+ memset(vpc->vpi, 0, sizeof(*vpc->vpi));
+ vpc->vpi->const_index = -1;
+
+ hw = vpc->vpi->data;
+
+ hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT);
+ hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) |
+ (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) |
+ (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) |
+ (3 << NV40_VP_INST_COND_SWZ_W_SHIFT));
+
+ if (slot == 0) {
+ hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT);
+ hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
+ hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT);
+ } else {
+ hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT);
+ hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20));
+ hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT);
+ }
+
+ emit_dst(vpc, hw, slot, dst);
+ emit_src(vpc, hw, 0, s0);
+ emit_src(vpc, hw, 1, s1);
+ emit_src(vpc, hw, 2, s2);
+}
+
+static INLINE struct nv40_sreg
+tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
+ struct nv40_sreg src;
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ src = nv40_sr(NV40SR_INPUT, fsrc->SrcRegister.Index);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ src = vpc->imm[fsrc->SrcRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ src = vpc->r_temp[fsrc->SrcRegister.Index];
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->SrcRegisterExtMod.Absolute;
+ src.negate = fsrc->SrcRegister.Negate;
+ src.swz[0] = fsrc->SrcRegister.SwizzleX;
+ src.swz[1] = fsrc->SrcRegister.SwizzleY;
+ src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+ src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ return src;
+}
+
+static INLINE struct nv40_sreg
+tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
+ struct nv40_sreg dst;
+
+ switch (fdst->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ dst = vpc->r_result[fdst->DstRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ dst = vpc->r_temp[fdst->DstRegister.Index];
+ break;
+ case TGSI_FILE_ADDRESS:
+ dst = vpc->r_address[fdst->DstRegister.Index];
+ break;
+ default:
+ NOUVEAU_ERR("bad dst file\n");
+ break;
+ }
+
+ return dst;
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+ return mask;
+}
+
+static boolean
+src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,
+ struct nv40_sreg *src)
+{
+ const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+ struct nv40_sreg tgsi = tgsi_src(vpc, fsrc);
+ uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
+ uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
+ fsrc->SrcRegisterExtSwz.NegateY,
+ fsrc->SrcRegisterExtSwz.NegateZ,
+ fsrc->SrcRegisterExtSwz.NegateW };
+ uint c;
+
+ for (c = 0; c < 4; c++) {
+ switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ mask |= tgsi_mask(1 << c);
+ break;
+ case TGSI_EXTSWIZZLE_ZERO:
+ zero_mask |= tgsi_mask(1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ case TGSI_EXTSWIZZLE_ONE:
+ one_mask |= tgsi_mask(1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (!tgsi.negate && neg[c])
+ neg_mask |= tgsi_mask(1 << c);
+ }
+
+ if (mask == MASK_ALL && !neg_mask)
+ return TRUE;
+
+ *src = temp(vpc);
+
+ if (mask)
+ arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none);
+
+ if (zero_mask)
+ arith(vpc, 0, OP_SFL, *src, zero_mask, *src, none, none);
+
+ if (one_mask)
+ arith(vpc, 0, OP_STR, *src, one_mask, *src, none, none);
+
+ if (neg_mask) {
+ struct nv40_sreg one = temp(vpc);
+ arith(vpc, 0, OP_STR, one, neg_mask, one, none, none);
+ arith(vpc, 0, OP_MUL, *src, neg_mask, *src, neg(one), none);
+ }
+
+ return FALSE;
+}
+
+static boolean
+nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
+ const struct tgsi_full_instruction *finst)
+{
+ struct nv40_sreg src[3], dst, tmp;
+ struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+ int mask;
+ int ai = -1, ci = -1, ii = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(vpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_TEMPORARY:
+ if (!src_native_swz(vpc, fsrc, &src[i]))
+ continue;
+ break;
+ default:
+ break;
+ }
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+ ai = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_CONSTANT:
+ if ((ci == -1 && ii == -1) ||
+ ci == fsrc->SrcRegister.Index) {
+ ci = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ if ((ci == -1 && ii == -1) ||
+ ii == fsrc->SrcRegister.Index) {
+ ii = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
+ mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]);
+ break;
+ case TGSI_OPCODE_ARL:
+ arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_EXP:
+ arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LIT:
+ arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_POW:
+ tmp = temp(vpc);
+ arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none,
+ swz(src[0], X, X, X, X));
+ arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+ swz(src[1], X, X, X, X), none);
+ arith(vpc, 1, OP_EX2, dst, mask, none, none,
+ swz(tmp, X, X, X, X));
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_RET:
+ break;
+ case TGSI_OPCODE_RSQ:
+ arith(vpc, 1, OP_RSQ, dst, mask, none, none, abs(src[0]));
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1]));
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(vpc);
+ arith(vpc, 0, OP_MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ release_temps(vpc);
+ return TRUE;
+}
+
+static boolean
+nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ unsigned idx = fdec->DeclarationRange.First;
+ int hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NV40_VP_INST_DEST_POS;
+ vpc->hpos_idx = idx;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV40_VP_INST_DEST_COL0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV40_VP_INST_DEST_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV40_VP_INST_DEST_BFC0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV40_VP_INST_DEST_BFC1;
+ } else {
+ NOUVEAU_ERR("bad bcolour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NV40_VP_INST_DEST_FOGC;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ hw = NV40_VP_INST_DEST_PSZ;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.SemanticIndex <= 7) {
+ hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ vpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw);
+ return TRUE;
+}
+
+static boolean
+nv40_vertprog_prepare(struct nv40_vpc *vpc)
+{
+ struct tgsi_parse_context p;
+ int high_temp = -1, high_addr = -1, nr_imm = 0, i;
+
+ tgsi_parse_init(&p, vpc->vp->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch(tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ nr_imm++;
+ break;
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+
+ fdec = &p.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_TEMPORARY:
+ if (fdec->DeclarationRange.Last > high_temp) {
+ high_temp =
+ fdec->DeclarationRange.Last;
+ }
+ break;
+#if 0 /* this would be nice.. except gallium doesn't track it */
+ case TGSI_FILE_ADDRESS:
+ if (fdec->DeclarationRange.Last > high_addr) {
+ high_addr =
+ fdec->DeclarationRange.Last;
+ }
+ break;
+#endif
+ case TGSI_FILE_OUTPUT:
+ if (!nv40_vertprog_parse_decl_output(vpc, fdec))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+#if 1 /* yay, parse instructions looking for address regs instead */
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+ const struct tgsi_full_dst_register *fdst;
+
+ finst = &p.FullToken.FullInstruction;
+ fdst = &finst->FullDstRegisters[0];
+
+ if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) {
+ if (fdst->DstRegister.Index > high_addr)
+ high_addr = fdst->DstRegister.Index;
+ }
+
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+ }
+ tgsi_parse_free(&p);
+
+ if (nr_imm) {
+ vpc->imm = CALLOC(nr_imm, sizeof(struct nv40_sreg));
+ assert(vpc->imm);
+ }
+
+ if (++high_temp) {
+ vpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg));
+ for (i = 0; i < high_temp; i++)
+ vpc->r_temp[i] = temp(vpc);
+ }
+
+ if (++high_addr) {
+ vpc->r_address = CALLOC(high_addr, sizeof(struct nv40_sreg));
+ for (i = 0; i < high_addr; i++)
+ vpc->r_address[i] = temp(vpc);
+ }
+
+ vpc->r_temps_discard = 0;
+ return TRUE;
+}
+
+static void
+nv40_vertprog_translate(struct nv40_context *nv40,
+ struct nv40_vertex_program *vp)
+{
+ struct tgsi_parse_context parse;
+ struct nv40_vpc *vpc = NULL;
+ struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+ int i;
+
+ vpc = CALLOC(1, sizeof(struct nv40_vpc));
+ if (!vpc)
+ return;
+ vpc->vp = vp;
+
+ if (!nv40_vertprog_prepare(vpc)) {
+ FREE(vpc);
+ return;
+ }
+
+ /* Redirect post-transform vertex position to a temp if user clip
+ * planes are enabled. We need to append code the the vtxprog
+ * to handle clip planes later.
+ */
+ if (vp->ucp.nr) {
+ vpc->r_result[vpc->hpos_idx] = temp(vpc);
+ vpc->r_temps_discard = 0;
+ }
+
+ tgsi_parse_init(&parse, vp->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const struct tgsi_full_immediate *imm;
+
+ imm = &parse.FullToken.FullImmediate;
+ assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+ assert(imm->Immediate.NrTokens == 4 + 1);
+ vpc->imm[vpc->nr_imm++] =
+ constant(vpc, -1,
+ imm->u.ImmediateFloat32[0].Float,
+ imm->u.ImmediateFloat32[1].Float,
+ imm->u.ImmediateFloat32[2].Float,
+ imm->u.ImmediateFloat32[3].Float);
+ }
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+ finst = &parse.FullToken.FullInstruction;
+ if (!nv40_vertprog_parse_instruction(vpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Write out HPOS if it was redirected to a temp earlier */
+ if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) {
+ struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT,
+ NV40_VP_INST_DEST_POS);
+ struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx];
+
+ arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none);
+ }
+
+ /* Insert code to handle user clip planes */
+ for (i = 0; i < vp->ucp.nr; i++) {
+ struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT,
+ NV40_VP_INST_DEST_CLIP(i));
+ struct nv40_sreg ceqn = constant(vpc, -1,
+ nv40->clip.ucp[i][0],
+ nv40->clip.ucp[i][1],
+ nv40->clip.ucp[i][2],
+ nv40->clip.ucp[i][3]);
+ struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx];
+ unsigned mask;
+
+ switch (i) {
+ case 0: case 3: mask = MASK_Y; break;
+ case 1: case 4: mask = MASK_Z; break;
+ case 2: case 5: mask = MASK_W; break;
+ default:
+ NOUVEAU_ERR("invalid clip dist #%d\n", i);
+ goto out_err;
+ }
+
+ arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none);
+ }
+
+ vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST;
+ vp->translated = TRUE;
+out_err:
+ tgsi_parse_free(&parse);
+ if (vpc->r_temp)
+ FREE(vpc->r_temp);
+ if (vpc->r_address)
+ FREE(vpc->r_address);
+ if (vpc->imm)
+ FREE(vpc->imm);
+ FREE(vpc);
+}
+
+static boolean
+nv40_vertprog_validate(struct nv40_context *nv40)
+{
+ struct nouveau_winsys *nvws = nv40->nvws;
+ struct pipe_winsys *ws = nv40->pipe.winsys;
+ struct nouveau_grobj *curie = nv40->screen->curie;
+ struct nv40_vertex_program *vp;
+ struct pipe_buffer *constbuf;
+ boolean upload_code = FALSE, upload_data = FALSE;
+ int i;
+
+ if (nv40->render_mode == HW) {
+ vp = nv40->vertprog;
+ constbuf = nv40->constbuf[PIPE_SHADER_VERTEX];
+
+ if ((nv40->dirty & NV40_NEW_UCP) ||
+ memcmp(&nv40->clip, &vp->ucp, sizeof(vp->ucp))) {
+ nv40_vertprog_destroy(nv40, vp);
+ memcpy(&vp->ucp, &nv40->clip, sizeof(vp->ucp));
+ }
+ } else {
+ vp = nv40->swtnl.vertprog;
+ constbuf = NULL;
+ }
+
+ /* Translate TGSI shader into hw bytecode */
+ if (vp->translated)
+ goto check_gpu_resources;
+
+ nv40->fallback_swtnl &= ~NV40_NEW_VERTPROG;
+ nv40_vertprog_translate(nv40, vp);
+ if (!vp->translated) {
+ nv40->fallback_swtnl |= NV40_NEW_VERTPROG;
+ return FALSE;
+ }
+
+check_gpu_resources:
+ /* Allocate hw vtxprog exec slots */
+ if (!vp->exec) {
+ struct nouveau_resource *heap = nv40->screen->vp_exec_heap;
+ struct nouveau_stateobj *so;
+ uint vplen = vp->nr_insns;
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) {
+ while (heap->next && heap->size < vplen) {
+ struct nv40_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->exec);
+ }
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec))
+ assert(0);
+ }
+
+ so = so_new(7, 0);
+ so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1);
+ so_data (so, vp->exec->start);
+ so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2);
+ so_data (so, vp->ir);
+ so_data (so, vp->or);
+ so_method(so, curie, NV40TCL_CLIP_PLANE_ENABLE, 1);
+ so_data (so, vp->clip_ctrl);
+ so_ref(so, &vp->so);
+
+ upload_code = TRUE;
+ }
+
+ /* Allocate hw vtxprog const slots */
+ if (vp->nr_consts && !vp->data) {
+ struct nouveau_resource *heap = nv40->screen->vp_data_heap;
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) {
+ while (heap->next && heap->size < vp->nr_consts) {
+ struct nv40_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->data);
+ }
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data))
+ assert(0);
+ }
+
+ /*XXX: handle this some day */
+ assert(vp->data->start >= vp->data_start_min);
+
+ upload_data = TRUE;
+ if (vp->data_start != vp->data->start)
+ upload_code = TRUE;
+ }
+
+ /* If exec or data segments moved we need to patch the program to
+ * fixup offsets and register IDs.
+ */
+ if (vp->exec_start != vp->exec->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv40_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->has_branch_offset) {
+ assert(0);
+ }
+ }
+
+ vp->exec_start = vp->exec->start;
+ }
+
+ if (vp->nr_consts && vp->data_start != vp->data->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv40_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->const_index >= 0) {
+ vpi->data[1] &= ~NV40_VP_INST_CONST_SRC_MASK;
+ vpi->data[1] |=
+ (vpi->const_index + vp->data->start) <<
+ NV40_VP_INST_CONST_SRC_SHIFT;
+
+ }
+ }
+
+ vp->data_start = vp->data->start;
+ }
+
+ /* Update + Upload constant values */
+ if (vp->nr_consts) {
+ float *map = NULL;
+
+ if (constbuf) {
+ map = ws->buffer_map(ws, constbuf,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ }
+
+ for (i = 0; i < vp->nr_consts; i++) {
+ struct nv40_vertex_program_data *vpd = &vp->consts[i];
+
+ if (vpd->index >= 0) {
+ if (!upload_data &&
+ !memcmp(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float)))
+ continue;
+ memcpy(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float));
+ }
+
+ BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5);
+ OUT_RING (i + vp->data->start);
+ OUT_RINGp ((uint32_t *)vpd->value, 4);
+ }
+
+ if (constbuf)
+ ws->buffer_unmap(ws, constbuf);
+ }
+
+ /* Upload vtxprog */
+ if (upload_code) {
+#if 0
+ for (i = 0; i < vp->nr_insns; i++) {
+ NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]);
+ NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]);
+ NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]);
+ NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]);
+ }
+#endif
+ BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1);
+ OUT_RING (vp->exec->start);
+ for (i = 0; i < vp->nr_insns; i++) {
+ BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4);
+ OUT_RINGp (vp->insns[i].data, 4);
+ }
+ }
+
+ if (vp->so != nv40->state.hw[NV40_STATE_VERTPROG]) {
+ so_ref(vp->so, &nv40->state.hw[NV40_STATE_VERTPROG]);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void
+nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp)
+{
+ struct nouveau_winsys *nvws = nv40->screen->nvws;
+
+ vp->translated = FALSE;
+
+ if (vp->nr_insns) {
+ FREE(vp->insns);
+ vp->insns = NULL;
+ vp->nr_insns = 0;
+ }
+
+ if (vp->nr_consts) {
+ FREE(vp->consts);
+ vp->consts = NULL;
+ vp->nr_consts = 0;
+ }
+
+ nvws->res_free(&vp->exec);
+ vp->exec_start = 0;
+ nvws->res_free(&vp->data);
+ vp->data_start = 0;
+ vp->data_start_min = 0;
+
+ vp->ir = vp->or = vp->clip_ctrl = 0;
+ so_ref(NULL, &vp->so);
+}
+
+struct nv40_state_entry nv40_state_vertprog = {
+ .validate = nv40_vertprog_validate,
+ .dirty = {
+ .pipe = NV40_NEW_VERTPROG | NV40_NEW_UCP,
+ .hw = NV40_STATE_VERTPROG,
+ }
+};
+
diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile
new file mode 100644
index 0000000000..612aea28a3
--- /dev/null
+++ b/src/gallium/drivers/nv50/Makefile
@@ -0,0 +1,21 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv50
+
+C_SOURCES = \
+ nv50_clear.c \
+ nv50_context.c \
+ nv50_draw.c \
+ nv50_miptree.c \
+ nv50_query.c \
+ nv50_program.c \
+ nv50_screen.c \
+ nv50_state.c \
+ nv50_state_validate.c \
+ nv50_surface.c \
+ nv50_tex.c \
+ nv50_transfer.c \
+ nv50_vbo.c
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c
new file mode 100644
index 0000000000..f9bc3b53ca
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_clear.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv50_context.h"
+
+void
+nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct pipe_framebuffer_state fb, s_fb = nv50->framebuffer;
+ struct pipe_scissor_state sc, s_sc = nv50->scissor;
+ unsigned dirty = nv50->dirty;
+
+ nv50->dirty = 0;
+
+ if (ps->format == PIPE_FORMAT_Z24S8_UNORM ||
+ ps->format == PIPE_FORMAT_Z16_UNORM) {
+ fb.nr_cbufs = 0;
+ fb.zsbuf = ps;
+ } else {
+ fb.nr_cbufs = 1;
+ fb.cbufs[0] = ps;
+ fb.zsbuf = NULL;
+ }
+ fb.width = ps->width;
+ fb.height = ps->height;
+ pipe->set_framebuffer_state(pipe, &fb);
+
+ sc.minx = sc.miny = 0;
+ sc.maxx = fb.width;
+ sc.maxy = fb.height;
+ pipe->set_scissor_state(pipe, &sc);
+
+ nv50_state_validate(nv50);
+
+ switch (ps->format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ BEGIN_RING(chan, tesla, 0x0d80, 4);
+ OUT_RINGf (chan, ubyte_to_float((clearValue >> 16) & 0xff));
+ OUT_RINGf (chan, ubyte_to_float((clearValue >> 8) & 0xff));
+ OUT_RINGf (chan, ubyte_to_float((clearValue >> 0) & 0xff));
+ OUT_RINGf (chan, ubyte_to_float((clearValue >> 24) & 0xff));
+ BEGIN_RING(chan, tesla, 0x19d0, 1);
+ OUT_RING (chan, 0x3c);
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ BEGIN_RING(chan, tesla, 0x0d90, 1);
+ OUT_RINGf (chan, (float)(clearValue >> 8) * (1.0 / 16777215.0));
+ BEGIN_RING(chan, tesla, 0x0da0, 1);
+ OUT_RING (chan, clearValue & 0xff);
+ BEGIN_RING(chan, tesla, 0x19d0, 1);
+ OUT_RING (chan, 0x03);
+ break;
+ default:
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height,
+ clearValue);
+ break;
+ }
+
+ pipe->set_framebuffer_state(pipe, &s_fb);
+ pipe->set_scissor_state(pipe, &s_sc);
+ nv50->dirty |= dirty;
+
+ ps->status = PIPE_SURFACE_STATUS_CLEAR;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
new file mode 100644
index 0000000000..565a5da668
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/internal/p_winsys_screen.h"
+
+#include "nv50_context.h"
+#include "nv50_screen.h"
+
+static void
+nv50_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv50_context *nv50 = (struct nv50_context *)pipe;
+
+ FIRE_RING(nv50->screen->nvws->channel);
+}
+
+static void
+nv50_destroy(struct pipe_context *pipe)
+{
+ struct nv50_context *nv50 = (struct nv50_context *)pipe;
+
+ draw_destroy(nv50->draw);
+ FREE(nv50);
+}
+
+
+static void
+nv50_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+}
+
+struct pipe_context *
+nv50_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct pipe_winsys *pipe_winsys = pscreen->winsys;
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ struct nv50_context *nv50;
+
+ nv50 = CALLOC_STRUCT(nv50_context);
+ if (!nv50)
+ return NULL;
+ nv50->screen = screen;
+ nv50->pctx_id = pctx_id;
+
+ nv50->pipe.winsys = pipe_winsys;
+ nv50->pipe.screen = pscreen;
+
+ nv50->pipe.destroy = nv50_destroy;
+
+ nv50->pipe.set_edgeflags = nv50_set_edgeflags;
+ nv50->pipe.draw_arrays = nv50_draw_arrays;
+ nv50->pipe.draw_elements = nv50_draw_elements;
+ nv50->pipe.clear = nv50_clear;
+
+ nv50->pipe.flush = nv50_flush;
+
+ nv50_init_surface_functions(nv50);
+ nv50_init_state_functions(nv50);
+ nv50_init_query_functions(nv50);
+
+ nv50->draw = draw_create();
+ assert(nv50->draw);
+ draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50));
+
+ return &nv50->pipe;
+}
+
+
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
new file mode 100644
index 0000000000..313e435e7a
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -0,0 +1,201 @@
+#ifndef __NV50_CONTEXT_H__
+#define __NV50_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+#include "nouveau/nouveau_stateobj.h"
+
+#include "nv50_screen.h"
+#include "nv50_program.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+/* Constant buffer assignment */
+#define NV50_CB_PMISC 0
+#define NV50_CB_PVP 1
+#define NV50_CB_PFP 2
+#define NV50_CB_PGP 3
+#define NV50_CB_TIC 4
+#define NV50_CB_TSC 5
+#define NV50_CB_PUPLOAD 6
+
+#define NV50_NEW_BLEND (1 << 0)
+#define NV50_NEW_ZSA (1 << 1)
+#define NV50_NEW_BLEND_COLOUR (1 << 2)
+#define NV50_NEW_STIPPLE (1 << 3)
+#define NV50_NEW_SCISSOR (1 << 4)
+#define NV50_NEW_VIEWPORT (1 << 5)
+#define NV50_NEW_RASTERIZER (1 << 6)
+#define NV50_NEW_FRAMEBUFFER (1 << 7)
+#define NV50_NEW_VERTPROG (1 << 8)
+#define NV50_NEW_VERTPROG_CB (1 << 9)
+#define NV50_NEW_FRAGPROG (1 << 10)
+#define NV50_NEW_FRAGPROG_CB (1 << 11)
+#define NV50_NEW_ARRAYS (1 << 12)
+#define NV50_NEW_SAMPLER (1 << 13)
+#define NV50_NEW_TEXTURE (1 << 14)
+
+struct nv50_blend_stateobj {
+ struct pipe_blend_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv50_zsa_stateobj {
+ struct pipe_depth_stencil_alpha_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv50_rasterizer_stateobj {
+ struct pipe_rasterizer_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv50_miptree_level {
+ int *image_offset;
+ unsigned pitch;
+};
+
+struct nv50_miptree {
+ struct pipe_texture base;
+ struct pipe_buffer *buffer;
+
+ struct nv50_miptree_level level[PIPE_MAX_TEXTURE_LEVELS];
+ int image_nr;
+ int total_size;
+};
+
+static INLINE struct nv50_miptree *
+nv50_miptree(struct pipe_texture *pt)
+{
+ return (struct nv50_miptree *)pt;
+}
+
+struct nv50_surface {
+ struct pipe_surface base;
+};
+
+static INLINE struct nv50_surface *
+nv50_surface(struct pipe_surface *pt)
+{
+ return (struct nv50_surface *)pt;
+}
+
+static INLINE struct pipe_buffer *
+nv50_surface_buffer(struct pipe_surface *surface)
+{
+ struct nv50_miptree *mt = (struct nv50_miptree *)surface->texture;
+ return mt->buffer;
+}
+
+struct nv50_state {
+ unsigned dirty;
+
+ struct nouveau_stateobj *fb;
+ struct nouveau_stateobj *blend;
+ struct nouveau_stateobj *blend_colour;
+ struct nouveau_stateobj *zsa;
+ struct nouveau_stateobj *rast;
+ struct nouveau_stateobj *stipple;
+ struct nouveau_stateobj *scissor;
+ unsigned scissor_enabled;
+ struct nouveau_stateobj *viewport;
+ unsigned viewport_bypass;
+ struct nouveau_stateobj *tsc_upload;
+ struct nouveau_stateobj *tic_upload;
+ struct nouveau_stateobj *vertprog;
+ struct nouveau_stateobj *fragprog;
+ struct nouveau_stateobj *vtxfmt;
+ struct nouveau_stateobj *vtxbuf;
+};
+
+struct nv50_context {
+ struct pipe_context pipe;
+
+ struct nv50_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ struct nv50_state state;
+
+ unsigned dirty;
+ struct nv50_blend_stateobj *blend;
+ struct nv50_zsa_stateobj *zsa;
+ struct nv50_rasterizer_stateobj *rasterizer;
+ struct pipe_blend_color blend_colour;
+ struct pipe_poly_stipple stipple;
+ struct pipe_scissor_state scissor;
+ struct pipe_viewport_state viewport;
+ struct pipe_framebuffer_state framebuffer;
+ struct nv50_program *vertprog;
+ struct nv50_program *fragprog;
+ struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ unsigned vtxbuf_nr;
+ struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
+ unsigned vtxelt_nr;
+ unsigned *sampler[PIPE_MAX_SAMPLERS];
+ unsigned sampler_nr;
+ struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS];
+ unsigned miptree_nr;
+};
+
+static INLINE struct nv50_context *
+nv50_context(struct pipe_context *pipe)
+{
+ return (struct nv50_context *)pipe;
+}
+
+extern void nv50_init_surface_functions(struct nv50_context *nv50);
+extern void nv50_init_state_functions(struct nv50_context *nv50);
+extern void nv50_init_query_functions(struct nv50_context *nv50);
+
+extern void nv50_screen_init_miptree_functions(struct pipe_screen *pscreen);
+
+extern int
+nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst,
+ int dx, int dy, struct pipe_surface *src, int sx, int sy,
+ int w, int h);
+
+/* nv50_draw.c */
+extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50);
+
+/* nv50_vbo.c */
+extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv50_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start,
+ unsigned count);
+extern void nv50_vbo_validate(struct nv50_context *nv50);
+
+/* nv50_clear.c */
+extern void nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+/* nv50_program.c */
+extern void nv50_vertprog_validate(struct nv50_context *nv50);
+extern void nv50_fragprog_validate(struct nv50_context *nv50);
+extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p);
+
+/* nv50_state_validate.c */
+extern boolean nv50_state_validate(struct nv50_context *nv50);
+
+/* nv50_tex.c */
+extern void nv50_tex_validate(struct nv50_context *);
+
+#endif
diff --git a/src/gallium/drivers/nv50/nv50_draw.c b/src/gallium/drivers/nv50/nv50_draw.c
new file mode 100644
index 0000000000..2f6f607261
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_draw.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "draw/draw_pipe.h"
+
+#include "nv50_context.h"
+
+struct nv50_render_stage {
+ struct draw_stage stage;
+ struct nv50_context *nv50;
+};
+
+static INLINE struct nv50_render_stage *
+nv50_render_stage(struct draw_stage *stage)
+{
+ return (struct nv50_render_stage *)stage;
+}
+
+static void
+nv50_render_point(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_line(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_tri(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_flush(struct draw_stage *stage, unsigned flags)
+{
+}
+
+static void
+nv50_render_reset_stipple_counter(struct draw_stage *stage)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_destroy(struct draw_stage *stage)
+{
+ FREE(stage);
+}
+
+struct draw_stage *
+nv50_draw_render_stage(struct nv50_context *nv50)
+{
+ struct nv50_render_stage *rs = CALLOC_STRUCT(nv50_render_stage);
+
+ rs->nv50 = nv50;
+ rs->stage.draw = nv50->draw;
+ rs->stage.destroy = nv50_render_destroy;
+ rs->stage.point = nv50_render_point;
+ rs->stage.line = nv50_render_line;
+ rs->stage.tri = nv50_render_tri;
+ rs->stage.flush = nv50_render_flush;
+ rs->stage.reset_stipple_counter = nv50_render_reset_stipple_counter;
+
+ return &rs->stage;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
new file mode 100644
index 0000000000..24a68b7235
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv50_context.h"
+
+static struct pipe_texture *
+nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
+{
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
+ struct pipe_texture *pt = &mt->base;
+ unsigned usage, width = tmp->width[0], height = tmp->height[0];
+ unsigned depth = tmp->depth[0];
+ int i, l;
+
+ mt->base = *tmp;
+ mt->base.refcount = 1;
+ mt->base.screen = pscreen;
+
+ usage = PIPE_BUFFER_USAGE_PIXEL;
+ switch (pt->format) {
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ usage |= NOUVEAU_BUFFER_USAGE_ZETA;
+ break;
+ default:
+ break;
+ }
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_3D:
+ mt->image_nr = pt->depth[0];
+ break;
+ case PIPE_TEXTURE_CUBE:
+ mt->image_nr = 6;
+ break;
+ default:
+ mt->image_nr = 1;
+ break;
+ }
+
+ for (l = 0; l <= pt->last_level; l++) {
+ struct nv50_miptree_level *lvl = &mt->level[l];
+
+ pt->width[l] = width;
+ pt->height[l] = height;
+ pt->depth[l] = depth;
+ pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+
+ lvl->image_offset = CALLOC(mt->image_nr, sizeof(int));
+ lvl->pitch = align(pt->width[l] * pt->block.size, 64);
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+ depth = MAX2(1, depth >> 1);
+ }
+
+ for (i = 0; i < mt->image_nr; i++) {
+ for (l = 0; l <= pt->last_level; l++) {
+ struct nv50_miptree_level *lvl = &mt->level[l];
+ int size;
+
+ size = align(pt->width[l], 8) * pt->block.size;
+ size = align(size, 64);
+ size *= align(pt->height[l], 8) * pt->block.size;
+
+ lvl->image_offset[i] = mt->total_size;
+
+ mt->total_size += size;
+ }
+ }
+
+ mt->buffer = ws->buffer_create(ws, 256, usage, mt->total_size);
+ if (!mt->buffer) {
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static struct pipe_texture *
+nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
+ const unsigned *stride, struct pipe_buffer *pb)
+{
+ struct nv50_miptree *mt;
+
+ /* Only supports 2D, non-mipmapped textures for the moment */
+ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
+ pt->depth[0] != 1)
+ return NULL;
+
+ mt = CALLOC_STRUCT(nv50_miptree);
+ if (!mt)
+ return NULL;
+
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = pscreen;
+ mt->image_nr = 1;
+ mt->level[0].pitch = *stride;
+ mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
+
+ pipe_buffer_reference(pscreen, &mt->buffer, pb);
+ return &mt->base;
+}
+
+static void
+nv50_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt)
+{
+ struct pipe_texture *pt = *ppt;
+
+ *ppt = NULL;
+
+ if (--pt->refcount <= 0) {
+ struct nv50_miptree *mt = nv50_miptree(pt);
+
+ pipe_buffer_reference(pscreen, &mt->buffer, NULL);
+ FREE(mt);
+ }
+}
+
+static struct pipe_surface *
+nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct nv50_miptree *mt = nv50_miptree(pt);
+ struct nv50_miptree_level *lvl = &mt->level[level];
+ struct pipe_surface *ps;
+ int img;
+
+ if (pt->target == PIPE_TEXTURE_CUBE)
+ img = face;
+ else
+ if (pt->target == PIPE_TEXTURE_3D)
+ img = zslice;
+ else
+ img = 0;
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (!ps)
+ return NULL;
+ pipe_texture_reference(&ps->texture, pt);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->usage = flags;
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+ ps->refcount = 1;
+ ps->face = face;
+ ps->level = level;
+ ps->zslice = zslice;
+ ps->offset = lvl->image_offset[img];
+
+ return ps;
+}
+
+static void
+nv50_miptree_surface_del(struct pipe_screen *pscreen,
+ struct pipe_surface **psurface)
+{
+ struct pipe_surface *ps = *psurface;
+ struct nv50_surface *s = nv50_surface(ps);
+
+ *psurface = NULL;
+
+ if (--ps->refcount <= 0) {
+ pipe_texture_reference(&ps->texture, NULL);
+ FREE(s);
+ }
+}
+
+void
+nv50_screen_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv50_miptree_create;
+ pscreen->texture_blanket = nv50_miptree_blanket;
+ pscreen->texture_release = nv50_miptree_release;
+ pscreen->get_tex_surface = nv50_miptree_surface_new;
+ pscreen->tex_surface_release = nv50_miptree_surface_del;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
new file mode 100644
index 0000000000..14c5d47e79
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -0,0 +1,1784 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_inlines.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv50_context.h"
+
+#define NV50_SU_MAX_TEMP 64
+//#define NV50_PROGRAM_DUMP
+
+/* ARL - gallium craps itself on progs/vp/arl.txt
+ *
+ * MSB - Like MAD, but MUL+SUB
+ * - Fuck it off, introduce a way to negate args for ops that
+ * support it.
+ *
+ * Look into inlining IMMD for ops other than MOV (make it general?)
+ * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
+ * but can emit to P_TEMP first - then MOV later. NVIDIA does this
+ *
+ * In ops such as ADD it's possible to construct a bad opcode in the !is_long()
+ * case, if the emit_src() causes the inst to suddenly become long.
+ *
+ * Verify half-insns work where expected - and force disable them where they
+ * don't work - MUL has it forcibly disabled atm as it fixes POW..
+ *
+ * FUCK! watch dst==src vectors, can overwrite components that are needed.
+ * ie. SUB R0, R0.yzxw, R0
+ *
+ * Things to check with renouveau:
+ * FP attr/result assignment - how?
+ * attrib
+ * - 0x16bc maps vp output onto fp hpos
+ * - 0x16c0 maps vp output onto fp col0
+ * result
+ * - colr always 0-3
+ * - depr always 4
+ * 0x16bc->0x16e8 --> some binding between vp/fp regs
+ * 0x16b8 --> VP output count
+ *
+ * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
+ * "MOV rcol.x, fcol.y" = 0x00000004
+ * 0x19a8 --> as above but 0x00000100 and 0x00000000
+ * - 0x00100000 used when KIL used
+ * 0x196c --> as above but 0x00000011 and 0x00000000
+ *
+ * 0x1988 --> 0xXXNNNNNN
+ * - XX == FP high something
+ */
+struct nv50_reg {
+ enum {
+ P_TEMP,
+ P_ATTR,
+ P_RESULT,
+ P_CONST,
+ P_IMMD
+ } type;
+ int index;
+
+ int hw;
+ int neg;
+};
+
+struct nv50_pc {
+ struct nv50_program *p;
+
+ /* hw resources */
+ struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
+
+ /* tgsi resources */
+ struct nv50_reg *temp;
+ int temp_nr;
+ struct nv50_reg *attr;
+ int attr_nr;
+ struct nv50_reg *result;
+ int result_nr;
+ struct nv50_reg *param;
+ int param_nr;
+ struct nv50_reg *immd;
+ float *immd_buf;
+ int immd_nr;
+
+ struct nv50_reg *temp_temp[16];
+ unsigned temp_temp_nr;
+};
+
+static void
+alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
+{
+ int i;
+
+ if (reg->type == P_RESULT) {
+ if (pc->p->cfg.high_result < (reg->hw + 1))
+ pc->p->cfg.high_result = reg->hw + 1;
+ }
+
+ if (reg->type != P_TEMP)
+ return;
+
+ if (reg->hw >= 0) {
+ /*XXX: do this here too to catch FP temp-as-attr usage..
+ * not clean, but works */
+ if (pc->p->cfg.high_temp < (reg->hw + 1))
+ pc->p->cfg.high_temp = reg->hw + 1;
+ return;
+ }
+
+ for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
+ if (!(pc->r_temp[i])) {
+ pc->r_temp[i] = reg;
+ reg->hw = i;
+ if (pc->p->cfg.high_temp < (i + 1))
+ pc->p->cfg.high_temp = i + 1;
+ return;
+ }
+ }
+
+ assert(0);
+}
+
+static struct nv50_reg *
+alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
+{
+ struct nv50_reg *r;
+ int i;
+
+ if (dst && dst->type == P_TEMP && dst->hw == -1)
+ return dst;
+
+ for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
+ if (!pc->r_temp[i]) {
+ r = CALLOC_STRUCT(nv50_reg);
+ r->type = P_TEMP;
+ r->index = -1;
+ r->hw = i;
+ pc->r_temp[i] = r;
+ return r;
+ }
+ }
+
+ assert(0);
+ return NULL;
+}
+
+static void
+free_temp(struct nv50_pc *pc, struct nv50_reg *r)
+{
+ if (r->index == -1) {
+ unsigned hw = r->hw;
+
+ FREE(pc->r_temp[hw]);
+ pc->r_temp[hw] = NULL;
+ }
+}
+
+static int
+alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx)
+{
+ int i;
+
+ if ((idx + 4) >= NV50_SU_MAX_TEMP)
+ return 1;
+
+ if (pc->r_temp[idx] || pc->r_temp[idx + 1] ||
+ pc->r_temp[idx + 2] || pc->r_temp[idx + 3])
+ return alloc_temp4(pc, dst, idx + 1);
+
+ for (i = 0; i < 4; i++) {
+ dst[i] = CALLOC_STRUCT(nv50_reg);
+ dst[i]->type = P_TEMP;
+ dst[i]->index = -1;
+ dst[i]->hw = idx + i;
+ pc->r_temp[idx + i] = dst[i];
+ }
+
+ return 0;
+}
+
+static void
+free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4])
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ free_temp(pc, reg[i]);
+}
+
+static struct nv50_reg *
+temp_temp(struct nv50_pc *pc)
+{
+ if (pc->temp_temp_nr >= 16)
+ assert(0);
+
+ pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
+ return pc->temp_temp[pc->temp_temp_nr++];
+}
+
+static void
+kill_temp_temp(struct nv50_pc *pc)
+{
+ int i;
+
+ for (i = 0; i < pc->temp_temp_nr; i++)
+ free_temp(pc, pc->temp_temp[i]);
+ pc->temp_temp_nr = 0;
+}
+
+static int
+ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
+{
+ pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)),
+ (pc->immd_nr + 1) * 4 * sizeof(float));
+ pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
+ pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
+ pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
+ pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
+
+ return pc->immd_nr++;
+}
+
+static struct nv50_reg *
+alloc_immd(struct nv50_pc *pc, float f)
+{
+ struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
+ unsigned hw;
+
+ hw = ctor_immd(pc, f, 0, 0, 0) * 4;
+ r->type = P_IMMD;
+ r->hw = hw;
+ r->index = -1;
+ return r;
+}
+
+static struct nv50_program_exec *
+exec(struct nv50_pc *pc)
+{
+ struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec);
+
+ e->param.index = -1;
+ return e;
+}
+
+static void
+emit(struct nv50_pc *pc, struct nv50_program_exec *e)
+{
+ struct nv50_program *p = pc->p;
+
+ if (p->exec_tail)
+ p->exec_tail->next = e;
+ if (!p->exec_head)
+ p->exec_head = e;
+ p->exec_tail = e;
+ p->exec_size += (e->inst[0] & 1) ? 2 : 1;
+}
+
+static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *);
+
+static boolean
+is_long(struct nv50_program_exec *e)
+{
+ if (e->inst[0] & 1)
+ return TRUE;
+ return FALSE;
+}
+
+static boolean
+is_immd(struct nv50_program_exec *e)
+{
+ if (is_long(e) && (e->inst[1] & 3) == 3)
+ return TRUE;
+ return FALSE;
+}
+
+static INLINE void
+set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
+ struct nv50_program_exec *e)
+{
+ set_long(pc, e);
+ e->inst[1] &= ~((0x1f << 7) | (0x3 << 12));
+ e->inst[1] |= (pred << 7) | (idx << 12);
+}
+
+static INLINE void
+set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx,
+ struct nv50_program_exec *e)
+{
+ set_long(pc, e);
+ e->inst[1] &= ~((0x3 << 4) | (1 << 6));
+ e->inst[1] |= (idx << 4) | (on << 6);
+}
+
+static INLINE void
+set_long(struct nv50_pc *pc, struct nv50_program_exec *e)
+{
+ if (is_long(e))
+ return;
+
+ e->inst[0] |= 1;
+ set_pred(pc, 0xf, 0, e);
+ set_pred_wr(pc, 0, 0, e);
+}
+
+static INLINE void
+set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
+{
+ if (dst->type == P_RESULT) {
+ set_long(pc, e);
+ e->inst[1] |= 0x00000008;
+ }
+
+ alloc_reg(pc, dst);
+ e->inst[0] |= (dst->hw << 2);
+}
+
+static INLINE void
+set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
+{
+ unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
+
+ set_long(pc, e);
+ /*XXX: can't be predicated - bits overlap.. catch cases where both
+ * are required and avoid them. */
+ set_pred(pc, 0, 0, e);
+ set_pred_wr(pc, 0, 0, e);
+
+ e->inst[1] |= 0x00000002 | 0x00000001;
+ e->inst[0] |= (val & 0x3f) << 16;
+ e->inst[1] |= (val >> 6) << 2;
+}
+
+static void
+emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src, struct nv50_reg *iv)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0x80000000;
+ set_dst(pc, dst, e);
+ alloc_reg(pc, src);
+ e->inst[0] |= (src->hw << 16);
+ if (iv) {
+ e->inst[0] |= (1 << 25);
+ alloc_reg(pc, iv);
+ e->inst[0] |= (iv->hw << 9);
+ }
+
+ emit(pc, e);
+}
+
+static void
+set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
+ struct nv50_program_exec *e)
+{
+ set_long(pc, e);
+#if 1
+ e->inst[1] |= (1 << 22);
+#else
+ if (src->type == P_IMMD) {
+ e->inst[1] |= (NV50_CB_PMISC << 22);
+ } else {
+ if (pc->p->type == PIPE_SHADER_VERTEX)
+ e->inst[1] |= (NV50_CB_PVP << 22);
+ else
+ e->inst[1] |= (NV50_CB_PFP << 22);
+ }
+#endif
+
+ e->param.index = src->hw;
+ e->param.shift = s;
+ e->param.mask = m << (s % 32);
+}
+
+static void
+emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0x10000000;
+
+ set_dst(pc, dst, e);
+
+ if (0 && dst->type != P_RESULT && src->type == P_IMMD) {
+ set_immd(pc, src, e);
+ /*XXX: 32-bit, but steals part of "half" reg space - need to
+ * catch and handle this case if/when we do half-regs
+ */
+ e->inst[0] |= 0x00008000;
+ } else
+ if (src->type == P_IMMD || src->type == P_CONST) {
+ set_long(pc, e);
+ set_data(pc, src, 0x7f, 9, e);
+ e->inst[1] |= 0x20000000; /* src0 const? */
+ } else {
+ if (src->type == P_ATTR) {
+ set_long(pc, e);
+ e->inst[1] |= 0x00200000;
+ }
+
+ alloc_reg(pc, src);
+ e->inst[0] |= (src->hw << 9);
+ }
+
+ /* We really should support "half" instructions here at some point,
+ * but I don't feel confident enough about them yet.
+ */
+ set_long(pc, e);
+ if (is_long(e) && !is_immd(e)) {
+ e->inst[1] |= 0x04000000; /* 32-bit */
+ e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
+ }
+
+ emit(pc, e);
+}
+
+static boolean
+check_swap_src_0_1(struct nv50_pc *pc,
+ struct nv50_reg **s0, struct nv50_reg **s1)
+{
+ struct nv50_reg *src0 = *s0, *src1 = *s1;
+
+ if (src0->type == P_CONST) {
+ if (src1->type != P_CONST) {
+ *s0 = src1;
+ *s1 = src0;
+ return TRUE;
+ }
+ } else
+ if (src1->type == P_ATTR) {
+ if (src0->type != P_ATTR) {
+ *s0 = src1;
+ *s1 = src0;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static void
+set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
+{
+ if (src->type == P_ATTR) {
+ set_long(pc, e);
+ e->inst[1] |= 0x00200000;
+ } else
+ if (src->type == P_CONST || src->type == P_IMMD) {
+ struct nv50_reg *temp = temp_temp(pc);
+
+ emit_mov(pc, temp, src);
+ src = temp;
+ }
+
+ alloc_reg(pc, src);
+ e->inst[0] |= (src->hw << 9);
+}
+
+static void
+set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
+{
+ if (src->type == P_ATTR) {
+ struct nv50_reg *temp = temp_temp(pc);
+
+ emit_mov(pc, temp, src);
+ src = temp;
+ } else
+ if (src->type == P_CONST || src->type == P_IMMD) {
+ assert(!(e->inst[0] & 0x00800000));
+ if (e->inst[0] & 0x01000000) {
+ struct nv50_reg *temp = temp_temp(pc);
+
+ emit_mov(pc, temp, src);
+ src = temp;
+ } else {
+ set_data(pc, src, 0x7f, 16, e);
+ e->inst[0] |= 0x00800000;
+ }
+ }
+
+ alloc_reg(pc, src);
+ e->inst[0] |= (src->hw << 16);
+}
+
+static void
+set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
+{
+ set_long(pc, e);
+
+ if (src->type == P_ATTR) {
+ struct nv50_reg *temp = temp_temp(pc);
+
+ emit_mov(pc, temp, src);
+ src = temp;
+ } else
+ if (src->type == P_CONST || src->type == P_IMMD) {
+ assert(!(e->inst[0] & 0x01000000));
+ if (e->inst[0] & 0x00800000) {
+ struct nv50_reg *temp = temp_temp(pc);
+
+ emit_mov(pc, temp, src);
+ src = temp;
+ } else {
+ set_data(pc, src, 0x7f, 32+14, e);
+ e->inst[0] |= 0x01000000;
+ }
+ }
+
+ alloc_reg(pc, src);
+ e->inst[1] |= (src->hw << 14);
+}
+
+static void
+emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
+ struct nv50_reg *src1)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xc0000000;
+ set_long(pc, e);
+
+ check_swap_src_0_1(pc, &src0, &src1);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_1(pc, src1, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src0, struct nv50_reg *src1)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xb0000000;
+
+ check_swap_src_0_1(pc, &src0, &src1);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ if (is_long(e))
+ set_src_2(pc, src1, e);
+ else
+ set_src_1(pc, src1, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
+ struct nv50_reg *src0, struct nv50_reg *src1)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ set_long(pc, e);
+ e->inst[0] |= 0xb0000000;
+ e->inst[1] |= (sub << 29);
+
+ check_swap_src_0_1(pc, &src0, &src1);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_1(pc, src1, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
+ struct nv50_reg *src1)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xb0000000;
+
+ set_long(pc, e);
+ if (check_swap_src_0_1(pc, &src0, &src1))
+ e->inst[1] |= 0x04000000;
+ else
+ e->inst[1] |= 0x08000000;
+
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_2(pc, src1, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
+ struct nv50_reg *src1, struct nv50_reg *src2)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xe0000000;
+
+ check_swap_src_0_1(pc, &src0, &src1);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_1(pc, src1, e);
+ set_src_2(pc, src2, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
+ struct nv50_reg *src1, struct nv50_reg *src2)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xe0000000;
+ set_long(pc, e);
+ e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */
+
+ check_swap_src_0_1(pc, &src0, &src1);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_1(pc, src1, e);
+ set_src_2(pc, src2, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_flop(struct nv50_pc *pc, unsigned sub,
+ struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0x90000000;
+ if (sub) {
+ set_long(pc, e);
+ e->inst[1] |= (sub << 29);
+ }
+
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xb0000000;
+
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+ set_long(pc, e);
+ e->inst[1] |= (6 << 29) | 0x00004000;
+
+ emit(pc, e);
+}
+
+static void
+emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xb0000000;
+
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+ set_long(pc, e);
+ e->inst[1] |= (6 << 29);
+
+ emit(pc, e);
+}
+
+static void
+emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
+ struct nv50_reg *src0, struct nv50_reg *src1)
+{
+ struct nv50_program_exec *e = exec(pc);
+ unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
+ struct nv50_reg *rdst;
+
+ assert(c_op <= 7);
+ if (check_swap_src_0_1(pc, &src0, &src1))
+ c_op = inv_cop[c_op];
+
+ rdst = dst;
+ if (dst->type != P_TEMP)
+ dst = alloc_temp(pc, NULL);
+
+ /* set.u32 */
+ set_long(pc, e);
+ e->inst[0] |= 0xb0000000;
+ e->inst[1] |= (3 << 29);
+ e->inst[1] |= (c_op << 14);
+ /*XXX: breaks things, .u32 by default?
+ * decuda will disasm as .u16 and use .lo/.hi regs, but this
+ * doesn't seem to match what the hw actually does.
+ inst[1] |= 0x04000000; << breaks things.. .u32 by default?
+ */
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_1(pc, src1, e);
+ emit(pc, e);
+
+ /* cvt.f32.u32 */
+ e = exec(pc);
+ e->inst[0] = 0xa0000001;
+ e->inst[1] = 0x64014780;
+ set_dst(pc, rdst, e);
+ set_src_0(pc, dst, e);
+ emit(pc, e);
+
+ if (dst != rdst)
+ free_temp(pc, dst);
+}
+
+static void
+emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xa0000000; /* cvt */
+ set_long(pc, e);
+ e->inst[1] |= (6 << 29); /* cvt */
+ e->inst[1] |= 0x08000000; /* integer mode */
+ e->inst[1] |= 0x04000000; /* 32 bit */
+ e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */
+ e->inst[1] |= (1 << 14); /* src .f32 */
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *v, struct nv50_reg *e)
+{
+ struct nv50_reg *temp = alloc_temp(pc, NULL);
+
+ emit_flop(pc, 3, temp, v);
+ emit_mul(pc, temp, temp, e);
+ emit_preex2(pc, temp, temp);
+ emit_flop(pc, 6, dst, temp);
+
+ free_temp(pc, temp);
+}
+
+static void
+emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xa0000000; /* cvt */
+ set_long(pc, e);
+ e->inst[1] |= (6 << 29); /* cvt */
+ e->inst[1] |= 0x04000000; /* 32 bit */
+ e->inst[1] |= (1 << 14); /* src .f32 */
+ e->inst[1] |= ((1 << 6) << 14); /* .abs */
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
+ struct nv50_reg **src)
+{
+ struct nv50_reg *one = alloc_immd(pc, 1.0);
+ struct nv50_reg *zero = alloc_immd(pc, 0.0);
+ struct nv50_reg *neg128 = alloc_immd(pc, -127.999999);
+ struct nv50_reg *pos128 = alloc_immd(pc, 127.999999);
+ struct nv50_reg *tmp[4];
+
+ if (mask & (1 << 0))
+ emit_mov(pc, dst[0], one);
+
+ if (mask & (1 << 3))
+ emit_mov(pc, dst[3], one);
+
+ if (mask & (3 << 1)) {
+ if (mask & (1 << 1))
+ tmp[0] = dst[1];
+ else
+ tmp[0] = temp_temp(pc);
+ emit_minmax(pc, 4, tmp[0], src[0], zero);
+ }
+
+ if (mask & (1 << 2)) {
+ set_pred_wr(pc, 1, 0, pc->p->exec_tail);
+
+ tmp[1] = temp_temp(pc);
+ emit_minmax(pc, 4, tmp[1], src[1], zero);
+
+ tmp[3] = temp_temp(pc);
+ emit_minmax(pc, 4, tmp[3], src[3], neg128);
+ emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
+
+ emit_pow(pc, dst[2], tmp[1], tmp[3]);
+ emit_mov(pc, dst[2], zero);
+ set_pred(pc, 3, 0, pc->p->exec_tail);
+ }
+}
+
+static void
+emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ set_long(pc, e);
+ e->inst[0] |= 0xa0000000; /* delta */
+ e->inst[1] |= (7 << 29); /* delta */
+ e->inst[1] |= 0x04000000; /* negate arg0? probably not */
+ e->inst[1] |= (1 << 14); /* src .f32 */
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e;
+ const int r_pred = 1;
+
+ /* Sets predicate reg ? */
+ e = exec(pc);
+ e->inst[0] = 0xa00001fd;
+ e->inst[1] = 0xc4014788;
+ set_src_0(pc, src, e);
+ set_pred_wr(pc, 1, r_pred, e);
+ emit(pc, e);
+
+ /* This is probably KILP */
+ e = exec(pc);
+ e->inst[0] = 0x000001fe;
+ set_long(pc, e);
+ set_pred(pc, 1 /* LT? */, r_pred, e);
+ emit(pc, e);
+}
+
+static struct nv50_reg *
+tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
+{
+ switch (dst->DstRegister.File) {
+ case TGSI_FILE_TEMPORARY:
+ return &pc->temp[dst->DstRegister.Index * 4 + c];
+ case TGSI_FILE_OUTPUT:
+ return &pc->result[dst->DstRegister.Index * 4 + c];
+ case TGSI_FILE_NULL:
+ return NULL;
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+static struct nv50_reg *
+tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
+{
+ struct nv50_reg *r = NULL;
+ struct nv50_reg *temp;
+ unsigned c;
+
+ c = tgsi_util_get_full_src_register_extswizzle(src, chan);
+ switch (c) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ switch (src->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ r = &pc->attr[src->SrcRegister.Index * 4 + c];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ r = &pc->temp[src->SrcRegister.Index * 4 + c];
+ break;
+ case TGSI_FILE_CONSTANT:
+ r = &pc->param[src->SrcRegister.Index * 4 + c];
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ r = &pc->immd[src->SrcRegister.Index * 4 + c];
+ break;
+ case TGSI_FILE_SAMPLER:
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case TGSI_EXTSWIZZLE_ZERO:
+ r = alloc_immd(pc, 0.0);
+ break;
+ case TGSI_EXTSWIZZLE_ONE:
+ r = alloc_immd(pc, 1.0);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
+ case TGSI_UTIL_SIGN_KEEP:
+ break;
+ case TGSI_UTIL_SIGN_CLEAR:
+ temp = temp_temp(pc);
+ emit_abs(pc, temp, r);
+ r = temp;
+ break;
+ case TGSI_UTIL_SIGN_TOGGLE:
+ temp = temp_temp(pc);
+ emit_neg(pc, temp, r);
+ r = temp;
+ break;
+ case TGSI_UTIL_SIGN_SET:
+ temp = temp_temp(pc);
+ emit_abs(pc, temp, r);
+ emit_neg(pc, temp, r);
+ r = temp;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ return r;
+}
+
+static boolean
+nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
+{
+ const struct tgsi_full_instruction *inst = &tok->FullInstruction;
+ struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
+ unsigned mask, sat, unit;
+ int i, c;
+
+ mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
+
+ for (c = 0; c < 4; c++) {
+ if (mask & (1 << c))
+ dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
+ else
+ dst[c] = NULL;
+ }
+
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i];
+
+ if (fs->SrcRegister.File == TGSI_FILE_SAMPLER)
+ unit = fs->SrcRegister.Index;
+
+ for (c = 0; c < 4; c++)
+ src[i][c] = tgsi_src(pc, c, fs);
+ }
+
+ if (sat) {
+ for (c = 0; c < 4; c++) {
+ rdst[c] = dst[c];
+ dst[c] = temp_temp(pc);
+ }
+ }
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_abs(pc, dst[c], src[0][c]);
+ }
+ break;
+ case TGSI_OPCODE_ADD:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_add(pc, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_COS:
+ temp = alloc_temp(pc, NULL);
+ emit_precossin(pc, temp, src[0][0]);
+ emit_flop(pc, 5, temp, temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ break;
+ case TGSI_OPCODE_DP3:
+ temp = alloc_temp(pc, NULL);
+ emit_mul(pc, temp, src[0][0], src[1][0]);
+ emit_mad(pc, temp, src[0][1], src[1][1], temp);
+ emit_mad(pc, temp, src[0][2], src[1][2], temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_DP4:
+ temp = alloc_temp(pc, NULL);
+ emit_mul(pc, temp, src[0][0], src[1][0]);
+ emit_mad(pc, temp, src[0][1], src[1][1], temp);
+ emit_mad(pc, temp, src[0][2], src[1][2], temp);
+ emit_mad(pc, temp, src[0][3], src[1][3], temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_DPH:
+ temp = alloc_temp(pc, NULL);
+ emit_mul(pc, temp, src[0][0], src[1][0]);
+ emit_mad(pc, temp, src[0][1], src[1][1], temp);
+ emit_mad(pc, temp, src[0][2], src[1][2], temp);
+ emit_add(pc, temp, src[1][3], temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_DST:
+ {
+ struct nv50_reg *one = alloc_immd(pc, 1.0);
+ if (mask & (1 << 0))
+ emit_mov(pc, dst[0], one);
+ if (mask & (1 << 1))
+ emit_mul(pc, dst[1], src[0][1], src[1][1]);
+ if (mask & (1 << 2))
+ emit_mov(pc, dst[2], src[0][2]);
+ if (mask & (1 << 3))
+ emit_mov(pc, dst[3], src[1][3]);
+ FREE(one);
+ }
+ break;
+ case TGSI_OPCODE_EX2:
+ temp = alloc_temp(pc, NULL);
+ emit_preex2(pc, temp, src[0][0]);
+ emit_flop(pc, 6, temp, temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_FLR:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_flr(pc, dst[c], src[0][c]);
+ }
+ break;
+ case TGSI_OPCODE_FRC:
+ temp = alloc_temp(pc, NULL);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_flr(pc, temp, src[0][c]);
+ emit_sub(pc, dst[c], src[0][c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_KIL:
+ emit_kil(pc, src[0][0]);
+ emit_kil(pc, src[0][1]);
+ emit_kil(pc, src[0][2]);
+ emit_kil(pc, src[0][3]);
+ break;
+ case TGSI_OPCODE_LIT:
+ emit_lit(pc, &dst[0], mask, &src[0][0]);
+ break;
+ case TGSI_OPCODE_LG2:
+ temp = alloc_temp(pc, NULL);
+ emit_flop(pc, 3, temp, src[0][0]);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ break;
+ case TGSI_OPCODE_LRP:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ /*XXX: we can do better than this */
+ temp = alloc_temp(pc, NULL);
+ emit_neg(pc, temp, src[0][c]);
+ emit_mad(pc, temp, temp, src[2][c], src[2][c]);
+ emit_mad(pc, dst[c], src[0][c], src[1][c], temp);
+ free_temp(pc, temp);
+ }
+ break;
+ case TGSI_OPCODE_MAD:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
+ }
+ break;
+ case TGSI_OPCODE_MAX:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_MIN:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_MOV:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], src[0][c]);
+ }
+ break;
+ case TGSI_OPCODE_MUL:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mul(pc, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_POW:
+ temp = alloc_temp(pc, NULL);
+ emit_pow(pc, temp, src[0][0], src[1][0]);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_RCP:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_flop(pc, 0, dst[c], src[0][0]);
+ }
+ break;
+ case TGSI_OPCODE_RSQ:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_flop(pc, 2, dst[c], src[0][0]);
+ }
+ break;
+ case TGSI_OPCODE_SCS:
+ temp = alloc_temp(pc, NULL);
+ emit_precossin(pc, temp, src[0][0]);
+ if (mask & (1 << 0))
+ emit_flop(pc, 5, dst[0], temp);
+ if (mask & (1 << 1))
+ emit_flop(pc, 4, dst[1], temp);
+ break;
+ case TGSI_OPCODE_SGE:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_SIN:
+ temp = alloc_temp(pc, NULL);
+ emit_precossin(pc, temp, src[0][0]);
+ emit_flop(pc, 4, temp, temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ break;
+ case TGSI_OPCODE_SLT:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_SUB:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_sub(pc, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXP:
+ {
+ struct nv50_reg *t[4];
+ struct nv50_program_exec *e;
+
+ alloc_temp4(pc, t, 0);
+ emit_mov(pc, t[0], src[0][0]);
+ emit_mov(pc, t[1], src[0][1]);
+
+ e = exec(pc);
+ e->inst[0] = 0xf6400000;
+ e->inst[0] |= (unit << 9);
+ set_long(pc, e);
+ e->inst[1] |= 0x0000c004;
+ set_dst(pc, t[0], e);
+ emit(pc, e);
+
+ if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]);
+ if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]);
+ if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]);
+ if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]);
+
+ free_temp4(pc, t);
+ }
+ break;
+ case TGSI_OPCODE_XPD:
+ temp = alloc_temp(pc, NULL);
+ if (mask & (1 << 0)) {
+ emit_mul(pc, temp, src[0][2], src[1][1]);
+ emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
+ }
+ if (mask & (1 << 1)) {
+ emit_mul(pc, temp, src[0][0], src[1][2]);
+ emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
+ }
+ if (mask & (1 << 2)) {
+ emit_mul(pc, temp, src[0][1], src[1][0]);
+ emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_END:
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ if (sat) {
+ for (c = 0; c < 4; c++) {
+ struct nv50_program_exec *e;
+
+ if (!(mask & (1 << c)))
+ continue;
+ e = exec(pc);
+
+ e->inst[0] = 0xa0000000; /* cvt */
+ set_long(pc, e);
+ e->inst[1] |= (6 << 29); /* cvt */
+ e->inst[1] |= 0x04000000; /* 32 bit */
+ e->inst[1] |= (1 << 14); /* src .f32 */
+ e->inst[1] |= ((1 << 5) << 14); /* .sat */
+ set_dst(pc, rdst[c], e);
+ set_src_0(pc, dst[c], e);
+ emit(pc, e);
+ }
+ }
+
+ kill_temp_temp(pc);
+ return TRUE;
+}
+
+static boolean
+nv50_program_tx_prep(struct nv50_pc *pc)
+{
+ struct tgsi_parse_context p;
+ boolean ret = FALSE;
+ unsigned i, c;
+
+ tgsi_parse_init(&p, pc->p->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch (tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const struct tgsi_full_immediate *imm =
+ &p.FullToken.FullImmediate;
+
+ ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
+ imm->u.ImmediateFloat32[1].Float,
+ imm->u.ImmediateFloat32[2].Float,
+ imm->u.ImmediateFloat32[3].Float);
+ }
+ break;
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *d;
+ unsigned last;
+
+ d = &p.FullToken.FullDeclaration;
+ last = d->DeclarationRange.Last;
+
+ switch (d->Declaration.File) {
+ case TGSI_FILE_TEMPORARY:
+ if (pc->temp_nr < (last + 1))
+ pc->temp_nr = last + 1;
+ break;
+ case TGSI_FILE_OUTPUT:
+ if (pc->result_nr < (last + 1))
+ pc->result_nr = last + 1;
+ break;
+ case TGSI_FILE_INPUT:
+ if (pc->attr_nr < (last + 1))
+ pc->attr_nr = last + 1;
+ break;
+ case TGSI_FILE_CONSTANT:
+ if (pc->param_nr < (last + 1))
+ pc->param_nr = last + 1;
+ break;
+ case TGSI_FILE_SAMPLER:
+ break;
+ default:
+ NOUVEAU_ERR("bad decl file %d\n",
+ d->Declaration.File);
+ goto out_err;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (pc->temp_nr) {
+ pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg));
+ if (!pc->temp)
+ goto out_err;
+
+ for (i = 0; i < pc->temp_nr; i++) {
+ for (c = 0; c < 4; c++) {
+ pc->temp[i*4+c].type = P_TEMP;
+ pc->temp[i*4+c].hw = -1;
+ pc->temp[i*4+c].index = i;
+ }
+ }
+ }
+
+ if (pc->attr_nr) {
+ struct nv50_reg *iv = NULL;
+ int aid = 0;
+
+ pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg));
+ if (!pc->attr)
+ goto out_err;
+
+ if (pc->p->type == PIPE_SHADER_FRAGMENT) {
+ iv = alloc_temp(pc, NULL);
+ emit_interp(pc, iv, iv, NULL);
+ emit_flop(pc, 0, iv, iv);
+ aid++;
+ }
+
+ for (i = 0; i < pc->attr_nr; i++) {
+ struct nv50_reg *a = &pc->attr[i*4];
+
+ for (c = 0; c < 4; c++) {
+ if (pc->p->type == PIPE_SHADER_FRAGMENT) {
+ struct nv50_reg *at =
+ alloc_temp(pc, NULL);
+ pc->attr[i*4+c].type = at->type;
+ pc->attr[i*4+c].hw = at->hw;
+ pc->attr[i*4+c].index = at->index;
+ } else {
+ pc->p->cfg.vp.attr[aid/32] |=
+ (1 << (aid % 32));
+ pc->attr[i*4+c].type = P_ATTR;
+ pc->attr[i*4+c].hw = aid++;
+ pc->attr[i*4+c].index = i;
+ }
+ }
+
+ if (pc->p->type != PIPE_SHADER_FRAGMENT)
+ continue;
+
+ emit_interp(pc, &a[0], &a[0], iv);
+ emit_interp(pc, &a[1], &a[1], iv);
+ emit_interp(pc, &a[2], &a[2], iv);
+ emit_interp(pc, &a[3], &a[3], iv);
+ }
+
+ if (iv)
+ free_temp(pc, iv);
+ }
+
+ if (pc->result_nr) {
+ int rid = 0;
+
+ pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg));
+ if (!pc->result)
+ goto out_err;
+
+ for (i = 0; i < pc->result_nr; i++) {
+ for (c = 0; c < 4; c++) {
+ if (pc->p->type == PIPE_SHADER_FRAGMENT) {
+ pc->result[i*4+c].type = P_TEMP;
+ pc->result[i*4+c].hw = -1;
+ } else {
+ pc->result[i*4+c].type = P_RESULT;
+ pc->result[i*4+c].hw = rid++;
+ }
+ pc->result[i*4+c].index = i;
+ }
+ }
+ }
+
+ if (pc->param_nr) {
+ int rid = 0;
+
+ pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg));
+ if (!pc->param)
+ goto out_err;
+
+ for (i = 0; i < pc->param_nr; i++) {
+ for (c = 0; c < 4; c++) {
+ pc->param[i*4+c].type = P_CONST;
+ pc->param[i*4+c].hw = rid++;
+ pc->param[i*4+c].index = i;
+ }
+ }
+ }
+
+ if (pc->immd_nr) {
+ int rid = pc->param_nr * 4;
+
+ pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg));
+ if (!pc->immd)
+ goto out_err;
+
+ for (i = 0; i < pc->immd_nr; i++) {
+ for (c = 0; c < 4; c++) {
+ pc->immd[i*4+c].type = P_IMMD;
+ pc->immd[i*4+c].hw = rid++;
+ pc->immd[i*4+c].index = i;
+ }
+ }
+ }
+
+ ret = TRUE;
+out_err:
+ tgsi_parse_free(&p);
+ return ret;
+}
+
+static boolean
+nv50_program_tx(struct nv50_program *p)
+{
+ struct tgsi_parse_context parse;
+ struct nv50_pc *pc;
+ boolean ret;
+
+ pc = CALLOC_STRUCT(nv50_pc);
+ if (!pc)
+ return FALSE;
+ pc->p = p;
+ pc->p->cfg.high_temp = 4;
+
+ ret = nv50_program_tx_prep(pc);
+ if (ret == FALSE)
+ goto out_cleanup;
+
+ tgsi_parse_init(&parse, pc->p->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ const union tgsi_full_token *tok = &parse.FullToken;
+
+ tgsi_parse_token(&parse);
+
+ switch (tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ ret = nv50_program_tx_insn(pc, tok);
+ if (ret == FALSE)
+ goto out_err;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (p->type == PIPE_SHADER_FRAGMENT) {
+ struct nv50_reg out;
+
+ out.type = P_TEMP;
+ for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++)
+ emit_mov(pc, &out, &pc->result[out.hw]);
+ }
+
+ assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head));
+ pc->p->exec_tail->inst[1] |= 0x00000001;
+
+ p->param_nr = pc->param_nr * 4;
+ p->immd_nr = pc->immd_nr * 4;
+ p->immd = pc->immd_buf;
+
+out_err:
+ tgsi_parse_free(&parse);
+
+out_cleanup:
+ return ret;
+}
+
+static void
+nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
+{
+ if (nv50_program_tx(p) == FALSE)
+ assert(0);
+ p->translated = TRUE;
+}
+
+static void
+nv50_program_upload_data(struct nv50_context *nv50, float *map,
+ unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+
+ while (count) {
+ unsigned nr = count > 2047 ? 2047 : count;
+
+ BEGIN_RING(chan, tesla, 0x00000f00, 1);
+ OUT_RING (chan, (NV50_CB_PMISC << 0) | (start << 8));
+ BEGIN_RING(chan, tesla, 0x40000f04, nr);
+ OUT_RINGp (chan, map, nr);
+
+ map += nr;
+ start += nr;
+ count -= nr;
+ }
+}
+
+static void
+nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
+{
+ struct nouveau_winsys *nvws = nv50->screen->nvws;
+ struct pipe_winsys *ws = nv50->pipe.winsys;
+ unsigned nr = p->param_nr + p->immd_nr;
+
+ if (!p->data && nr) {
+ struct nouveau_resource *heap = nv50->screen->vp_data_heap;
+
+ if (nvws->res_alloc(heap, nr, p, &p->data)) {
+ while (heap->next && heap->size < nr) {
+ struct nv50_program *evict = heap->next->priv;
+ nvws->res_free(&evict->data);
+ }
+
+ if (nvws->res_alloc(heap, nr, p, &p->data))
+ assert(0);
+ }
+ }
+
+ if (p->param_nr) {
+ float *map = ws->buffer_map(ws, nv50->constbuf[p->type],
+ PIPE_BUFFER_USAGE_CPU_READ);
+ nv50_program_upload_data(nv50, map, p->data->start,
+ p->param_nr);
+ ws->buffer_unmap(ws, nv50->constbuf[p->type]);
+ }
+
+ if (p->immd_nr) {
+ nv50_program_upload_data(nv50, p->immd,
+ p->data->start + p->param_nr,
+ p->immd_nr);
+ }
+}
+
+static void
+nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
+{
+ struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct pipe_winsys *ws = nv50->pipe.winsys;
+ struct nv50_program_exec *e;
+ struct nouveau_stateobj *so;
+ const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
+ unsigned start, count, *up, *ptr;
+ boolean upload = FALSE;
+
+ if (!p->buffer) {
+ p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4);
+ upload = TRUE;
+ }
+
+ if (p->data && p->data->start != p->data_start) {
+ for (e = p->exec_head; e; e = e->next) {
+ unsigned ei, ci;
+
+ if (e->param.index < 0)
+ continue;
+ ei = e->param.shift >> 5;
+ ci = e->param.index + p->data->start;
+
+ e->inst[ei] &= ~e->param.mask;
+ e->inst[ei] |= (ci << e->param.shift);
+ }
+
+ p->data_start = p->data->start;
+ upload = TRUE;
+ }
+
+ if (!upload)
+ return;
+
+#ifdef NV50_PROGRAM_DUMP
+ NOUVEAU_ERR("-------\n");
+ up = ptr = MALLOC(p->exec_size * 4);
+ for (e = p->exec_head; e; e = e->next) {
+ NOUVEAU_ERR("0x%08x\n", e->inst[0]);
+ if (is_long(e))
+ NOUVEAU_ERR("0x%08x\n", e->inst[1]);
+ }
+
+#endif
+
+ up = ptr = MALLOC(p->exec_size * 4);
+ for (e = p->exec_head; e; e = e->next) {
+ *(ptr++) = e->inst[0];
+ if (is_long(e))
+ *(ptr++) = e->inst[1];
+ }
+
+ so = so_new(4,2);
+ so_method(so, nv50->screen->tesla, 0x1280, 3);
+ so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4));
+
+ start = 0; count = p->exec_size;
+ while (count) {
+ struct nouveau_winsys *nvws = nv50->screen->nvws;
+ unsigned nr;
+
+ so_emit(nvws, so);
+
+ nr = MIN2(count, 2047);
+ nr = MIN2(nvws->channel->pushbuf->remaining, nr);
+ if (nvws->channel->pushbuf->remaining < (nr + 3)) {
+ FIRE_RING(chan);
+ continue;
+ }
+
+ BEGIN_RING(chan, tesla, 0x0f00, 1);
+ OUT_RING (chan, (start << 8) | NV50_CB_PUPLOAD);
+ BEGIN_RING(chan, tesla, 0x40000f04, nr);
+ OUT_RINGp (chan, up + start, nr);
+
+ start += nr;
+ count -= nr;
+ }
+
+ FREE(up);
+ so_ref(NULL, &so);
+}
+
+void
+nv50_vertprog_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nv50_program *p = nv50->vertprog;
+ struct nouveau_stateobj *so;
+
+ if (!p->translated) {
+ nv50_program_validate(nv50, p);
+ if (!p->translated)
+ assert(0);
+ }
+
+ nv50_program_validate_data(nv50, p);
+ nv50_program_validate_code(nv50, p);
+
+ so = so_new(13, 2);
+ so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
+ so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, tesla, 0x1650, 2);
+ so_data (so, p->cfg.vp.attr[0]);
+ so_data (so, p->cfg.vp.attr[1]);
+ so_method(so, tesla, 0x16b8, 1);
+ so_data (so, p->cfg.high_result);
+ so_method(so, tesla, 0x16ac, 2);
+ so_data (so, p->cfg.high_result); //8);
+ so_data (so, p->cfg.high_temp);
+ so_method(so, tesla, 0x140c, 1);
+ so_data (so, 0); /* program start offset */
+ so_ref(so, &nv50->state.vertprog);
+}
+
+void
+nv50_fragprog_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nv50_program *p = nv50->fragprog;
+ struct nouveau_stateobj *so;
+
+ if (!p->translated) {
+ nv50_program_validate(nv50, p);
+ if (!p->translated)
+ assert(0);
+ }
+
+ nv50_program_validate_data(nv50, p);
+ nv50_program_validate_code(nv50, p);
+
+ so = so_new(64, 2);
+ so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
+ so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, tesla, 0x1904, 4);
+ so_data (so, 0x00040404); /* p: 0x01000404 */
+ so_data (so, 0x00000004);
+ so_data (so, 0x00000000);
+ so_data (so, 0x00000000);
+ so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */
+ so_data (so, 0x03020100);
+ so_data (so, 0x07060504);
+ so_data (so, 0x0b0a0908);
+ so_method(so, tesla, 0x1988, 2);
+ so_data (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */
+ so_data (so, p->cfg.high_temp);
+ so_method(so, tesla, 0x1414, 1);
+ so_data (so, 0); /* program start offset */
+ so_ref(so, &nv50->state.fragprog);
+}
+
+void
+nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
+{
+ struct pipe_screen *pscreen = nv50->pipe.screen;
+
+ while (p->exec_head) {
+ struct nv50_program_exec *e = p->exec_head;
+
+ p->exec_head = e->next;
+ FREE(e);
+ }
+ p->exec_tail = NULL;
+ p->exec_size = 0;
+
+ if (p->buffer)
+ pipe_buffer_reference(pscreen, &p->buffer, NULL);
+
+ nv50->screen->nvws->res_free(&p->data);
+
+ p->translated = 0;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
new file mode 100644
index 0000000000..78deed6a38
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -0,0 +1,45 @@
+#ifndef __NV50_PROGRAM_H__
+#define __NV50_PROGRAM_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv50_program_exec {
+ struct nv50_program_exec *next;
+
+ unsigned inst[2];
+ struct {
+ int index;
+ unsigned mask;
+ unsigned shift;
+ } param;
+};
+
+struct nv50_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+ boolean translated;
+
+ unsigned type;
+ struct nv50_program_exec *exec_head;
+ struct nv50_program_exec *exec_tail;
+ unsigned exec_size;
+ struct nouveau_resource *data;
+ unsigned data_start;
+
+ struct pipe_buffer *buffer;
+
+ float *immd;
+ unsigned immd_nr;
+ unsigned param_nr;
+
+ struct {
+ unsigned high_temp;
+ unsigned high_result;
+ struct {
+ unsigned attr[2];
+ } vp;
+ } cfg;
+};
+
+#endif
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
new file mode 100644
index 0000000000..7c8831a46d
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_inlines.h"
+
+#include "nv50_context.h"
+
+struct nv50_query {
+ struct pipe_buffer *buffer;
+ unsigned type;
+ boolean ready;
+ uint64_t result;
+};
+
+static INLINE struct nv50_query *
+nv50_query(struct pipe_query *pipe)
+{
+ return (struct nv50_query *)pipe;
+}
+
+static struct pipe_query *
+nv50_query_create(struct pipe_context *pipe, unsigned type)
+{
+ struct pipe_winsys *ws = pipe->winsys;
+ struct nv50_query *q = CALLOC_STRUCT(nv50_query);
+
+ assert (q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+ q->type = type;
+
+ q->buffer = ws->buffer_create(ws, 256, 0, 16);
+ if (!q->buffer) {
+ FREE(q);
+ return NULL;
+ }
+
+ return (struct pipe_query *)q;
+}
+
+static void
+nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv50_query *q = nv50_query(pq);
+
+ if (q) {
+ pipe_buffer_reference(pipe->screen, &q->buffer, NULL);
+ FREE(q);
+ }
+}
+
+static void
+nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nv50_query *q = nv50_query(pq);
+
+ BEGIN_RING(chan, tesla, 0x1530, 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, tesla, 0x1514, 1);
+ OUT_RING (chan, 1);
+
+ q->ready = FALSE;
+}
+
+static void
+nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nv50_query *q = nv50_query(pq);
+ struct nouveau_bo *bo = nv50->screen->nvws->get_bo(q->buffer);
+
+ WAIT_RING (chan, 5);
+ BEGIN_RING(chan, tesla, 0x1b00, 4);
+ OUT_RELOCh(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RING (chan, 0x00000000);
+ OUT_RING (chan, 0x0100f002);
+ FIRE_RING (chan);
+}
+
+static boolean
+nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, uint64_t *result)
+{
+ struct pipe_winsys *ws = pipe->winsys;
+ struct nv50_query *q = nv50_query(pq);
+
+ /*XXX: Want to be able to return FALSE here instead of blocking
+ * until the result is available..
+ */
+
+ if (!q->ready) {
+ uint32_t *map = ws->buffer_map(ws, q->buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ q->result = map[1];
+ q->ready = TRUE;
+ ws->buffer_unmap(ws, q->buffer);
+ }
+
+ *result = q->result;
+ return q->ready;
+}
+
+void
+nv50_init_query_functions(struct nv50_context *nv50)
+{
+ nv50->pipe.create_query = nv50_query_create;
+ nv50->pipe.destroy_query = nv50_query_destroy;
+ nv50->pipe.begin_query = nv50_query_begin;
+ nv50->pipe.end_query = nv50_query_end;
+ nv50->pipe.get_query_result = nv50_query_result;
+}
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
new file mode 100644
index 0000000000..ee24405d36
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -0,0 +1,373 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_screen.h"
+
+#include "util/u_simple_screen.h"
+
+#include "nv50_context.h"
+#include "nv50_screen.h"
+
+#include "nouveau/nouveau_stateobj.h"
+
+#define NV5X_GRCLASS5097_CHIPSETS 0x00000001
+#define NV8X_GRCLASS8297_CHIPSETS 0x00000050
+#define NV9X_GRCLASS8297_CHIPSETS 0x00000014
+
+static boolean
+nv50_screen_is_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ case PIPE_FORMAT_A8L8_UNORM:
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT5_RGBA:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static const char *
+nv50_screen_get_name(struct pipe_screen *pscreen)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ struct nouveau_device *dev = screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv50_screen_get_vendor(struct pipe_screen *pscreen)
+{
+ return "nouveau";
+}
+
+static int
+nv50_screen_get_param(struct pipe_screen *pscreen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 32;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 1;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 8;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 13;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 13;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ return 1;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 0;
+ case NOUVEAU_CAP_HW_VTXBUF:
+ return 1;
+ case NOUVEAU_CAP_HW_IDXBUF:
+ return 0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv50_screen_get_paramf(struct pipe_screen *pscreen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 16.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 4.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static void
+nv50_screen_destroy(struct pipe_screen *pscreen)
+{
+ FREE(pscreen);
+}
+
+struct pipe_screen *
+nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen);
+ struct nouveau_stateobj *so;
+ unsigned tesla_class = 0, ret;
+ unsigned chipset = nvws->channel->device->chipset;
+ int i;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ /* DMA engine object */
+ ret = nvws->grobj_alloc(nvws, 0x5039, &screen->m2mf);
+ if (ret) {
+ NOUVEAU_ERR("Error creating M2MF object: %d\n", ret);
+ nv50_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* 2D object */
+ ret = nvws->grobj_alloc(nvws, NV50_2D, &screen->eng2d);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 2D object: %d\n", ret);
+ nv50_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* 3D object */
+ if ((chipset & 0xf0) != 0x50 && (chipset & 0xf0) != 0x80) {
+ NOUVEAU_ERR("Not a G8x chipset\n");
+ nv50_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ switch (chipset & 0xf0) {
+ case 0x50:
+ if (NV5X_GRCLASS5097_CHIPSETS & (1 << (chipset & 0x0f)))
+ tesla_class = 0x5097;
+ break;
+ case 0x80:
+ if (NV8X_GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f)))
+ tesla_class = 0x8297;
+ break;
+ case 0x90:
+ if (NV9X_GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f)))
+ tesla_class = 0x8297;
+ break;
+ default:
+ break;
+ }
+
+ if (tesla_class == 0) {
+ NOUVEAU_ERR("Unknown G8x chipset: NV%02x\n", chipset);
+ nv50_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ ret = nvws->grobj_alloc(nvws, tesla_class, &screen->tesla);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ nv50_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Sync notifier */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv50_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Static M2MF init */
+ so = so_new(32, 0);
+ so_method(so, screen->m2mf, 0x0180, 3);
+ so_data (so, screen->sync->handle);
+ so_data (so, screen->nvws->channel->vram->handle);
+ so_data (so, screen->nvws->channel->vram->handle);
+ so_emit(nvws, so);
+ so_ref (NULL, &so);
+
+ /* Static 2D init */
+ so = so_new(64, 0);
+ so_method(so, screen->eng2d, NV50_2D_DMA_NOTIFY, 4);
+ so_data (so, screen->sync->handle);
+ so_data (so, screen->nvws->channel->vram->handle);
+ so_data (so, screen->nvws->channel->vram->handle);
+ so_data (so, screen->nvws->channel->vram->handle);
+ so_method(so, screen->eng2d, NV50_2D_OPERATION, 1);
+ so_data (so, NV50_2D_OPERATION_SRCCOPY);
+ so_method(so, screen->eng2d, 0x0290, 1);
+ so_data (so, 0);
+ so_method(so, screen->eng2d, 0x0888, 1);
+ so_data (so, 1);
+ so_emit(nvws, so);
+ so_ref(NULL, &so);
+
+ /* Static tesla init */
+ so = so_new(256, 20);
+
+ so_method(so, screen->tesla, 0x1558, 1);
+ so_data (so, 1);
+ so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1);
+ so_data (so, screen->sync->handle);
+ so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0),
+ NV50TCL_DMA_UNK0__SIZE);
+ for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++)
+ so_data(so, nvws->channel->vram->handle);
+ so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0),
+ NV50TCL_DMA_UNK1__SIZE);
+ for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++)
+ so_data(so, nvws->channel->vram->handle);
+ so_method(so, screen->tesla, 0x121c, 1);
+ so_data (so, 1);
+
+ so_method(so, screen->tesla, 0x13bc, 1);
+ so_data (so, 0x54);
+ so_method(so, screen->tesla, 0x13ac, 1);
+ so_data (so, 1);
+ so_method(so, screen->tesla, 0x16b8, 1);
+ so_data (so, 8);
+
+ /* Shared constant buffer */
+ screen->constbuf = ws->buffer_create(ws, 0, 0, 128 * 4 * 4);
+ if (nvws->res_init(&screen->vp_data_heap, 0, 128)) {
+ NOUVEAU_ERR("Error initialising constant buffer\n");
+ nv50_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ so_method(so, screen->tesla, 0x1280, 3);
+ so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, (NV50_CB_PMISC << 16) | 0x00001000);
+
+ /* Texture sampler/image unit setup - we abuse the constant buffer
+ * upload mechanism for the moment to upload data to the tex config
+ * blocks. At some point we *may* want to go the NVIDIA way of doing
+ * things?
+ */
+ screen->tic = ws->buffer_create(ws, 0, 0, 32 * 8 * 4);
+ so_method(so, screen->tesla, 0x1280, 3);
+ so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, (NV50_CB_TIC << 16) | 0x0800);
+ so_method(so, screen->tesla, 0x1574, 3);
+ so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, 0x00000800);
+
+ screen->tsc = ws->buffer_create(ws, 0, 0, 32 * 8 * 4);
+ so_method(so, screen->tesla, 0x1280, 3);
+ so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, (NV50_CB_TSC << 16) | 0x0800);
+ so_method(so, screen->tesla, 0x155c, 3);
+ so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, 0x00000800);
+
+
+ /* Vertex array limits - max them out */
+ for (i = 0; i < 16; i++) {
+ so_method(so, screen->tesla, 0x1080 + (i * 8), 2);
+ so_data (so, 0x000000ff);
+ so_data (so, 0xffffffff);
+ }
+
+ so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2);
+ so_data (so, fui(0.0));
+ so_data (so, fui(1.0));
+
+ so_method(so, screen->tesla, 0x1234, 1);
+ so_data (so, 1);
+ so_method(so, screen->tesla, 0x1458, 1);
+ so_data (so, 1);
+
+ so_emit(nvws, so);
+ so_ref(so, &screen->static_init);
+ nvws->push_flush(nvws, 0, NULL);
+
+ screen->pipe.winsys = ws;
+
+ screen->pipe.destroy = nv50_screen_destroy;
+
+ screen->pipe.get_name = nv50_screen_get_name;
+ screen->pipe.get_vendor = nv50_screen_get_vendor;
+ screen->pipe.get_param = nv50_screen_get_param;
+ screen->pipe.get_paramf = nv50_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv50_screen_is_format_supported;
+
+ nv50_screen_init_miptree_functions(&screen->pipe);
+ nv50_transfer_init_screen_functions(&screen->pipe);
+ u_simple_screen_init(&screen->pipe);
+
+ return &screen->pipe;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
new file mode 100644
index 0000000000..db567aaac8
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -0,0 +1,35 @@
+#ifndef __NV50_SCREEN_H__
+#define __NV50_SCREEN_H__
+
+#include "pipe/p_screen.h"
+
+struct nv50_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+
+ unsigned cur_pctx;
+
+ struct nouveau_grobj *tesla;
+ struct nouveau_grobj *eng2d;
+ struct nouveau_grobj *m2mf;
+ struct nouveau_notifier *sync;
+
+ struct pipe_buffer *constbuf;
+ struct nouveau_resource *vp_data_heap;
+
+ struct pipe_buffer *tic;
+ struct pipe_buffer *tsc;
+
+ struct nouveau_stateobj *static_init;
+};
+
+static INLINE struct nv50_screen *
+nv50_screen(struct pipe_screen *screen)
+{
+ return (struct nv50_screen *)screen;
+}
+
+void nv50_transfer_init_screen_functions(struct pipe_screen *);
+
+#endif
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
new file mode 100644
index 0000000000..787ff958ec
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -0,0 +1,664 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv50_context.h"
+#include "nv50_texture.h"
+
+#include "nouveau/nouveau_stateobj.h"
+
+static void *
+nv50_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nouveau_stateobj *so = so_new(64, 0);
+ struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
+ struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj);
+ unsigned cmask = 0, i;
+
+ /*XXX ignored:
+ * - dither
+ */
+
+ if (cso->blend_enable == 0) {
+ so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8);
+ for (i = 0; i < 8; i++)
+ so_data(so, 0);
+ } else {
+ so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8);
+ for (i = 0; i < 8; i++)
+ so_data(so, 1);
+ so_method(so, tesla, NV50TCL_BLEND_EQUATION_RGB, 5);
+ so_data (so, nvgl_blend_eqn(cso->rgb_func));
+ so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_src_factor));
+ so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_dst_factor));
+ so_data (so, nvgl_blend_eqn(cso->alpha_func));
+ so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_src_factor));
+ so_method(so, tesla, NV50TCL_BLEND_FUNC_DST_ALPHA, 1);
+ so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_dst_factor));
+ }
+
+ if (cso->logicop_enable == 0 ) {
+ so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 1);
+ so_data (so, 0);
+ } else {
+ so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 2);
+ so_data (so, 1);
+ so_data (so, nvgl_logicop_func(cso->logicop_func));
+ }
+
+ if (cso->colormask & PIPE_MASK_R)
+ cmask |= (1 << 0);
+ if (cso->colormask & PIPE_MASK_G)
+ cmask |= (1 << 4);
+ if (cso->colormask & PIPE_MASK_B)
+ cmask |= (1 << 8);
+ if (cso->colormask & PIPE_MASK_A)
+ cmask |= (1 << 12);
+ so_method(so, tesla, NV50TCL_COLOR_MASK(0), 8);
+ for (i = 0; i < 8; i++)
+ so_data(so, cmask);
+
+ bso->pipe = *cso;
+ so_ref(so, &bso->so);
+ return (void *)bso;
+}
+
+static void
+nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->blend = hwcso;
+ nv50->dirty |= NV50_NEW_BLEND;
+}
+
+static void
+nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_blend_stateobj *bso = hwcso;
+
+ so_ref(NULL, &bso->so);
+ FREE(bso);
+}
+
+static INLINE unsigned
+wrap_mode(unsigned wrap)
+{
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return NV50TSC_1_0_WRAPS_REPEAT;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return NV50TSC_1_0_WRAPS_MIRROR_REPEAT;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER;
+ case PIPE_TEX_WRAP_CLAMP:
+ return NV50TSC_1_0_WRAPS_CLAMP;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ return NV50TSC_1_0_WRAPS_MIRROR_CLAMP;
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ return NV50TSC_1_0_WRAPS_REPEAT;
+ }
+}
+static void *
+nv50_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ unsigned *tsc = CALLOC(8, sizeof(unsigned));
+
+ tsc[0] = (0x00024000 |
+ (wrap_mode(cso->wrap_s) << 0) |
+ (wrap_mode(cso->wrap_t) << 3) |
+ (wrap_mode(cso->wrap_r) << 6));
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ tsc[1] |= NV50TSC_1_1_MAGF_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ tsc[1] |= NV50TSC_1_1_MAGF_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ tsc[1] |= NV50TSC_1_1_MINF_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ tsc[1] |= NV50TSC_1_1_MINF_NEAREST;
+ break;
+ }
+
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ tsc[1] |= NV50TSC_1_1_MIPF_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ tsc[1] |= NV50TSC_1_1_MIPF_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ tsc[1] |= NV50TSC_1_1_MIPF_NONE;
+ break;
+ }
+
+ if (cso->max_anisotropy >= 16.0)
+ tsc[0] |= (7 << 20);
+ else
+ if (cso->max_anisotropy >= 12.0)
+ tsc[0] |= (6 << 20);
+ else
+ if (cso->max_anisotropy >= 10.0)
+ tsc[0] |= (5 << 20);
+ else
+ if (cso->max_anisotropy >= 8.0)
+ tsc[0] |= (4 << 20);
+ else
+ if (cso->max_anisotropy >= 6.0)
+ tsc[0] |= (3 << 20);
+ else
+ if (cso->max_anisotropy >= 4.0)
+ tsc[0] |= (2 << 20);
+ else
+ if (cso->max_anisotropy >= 2.0)
+ tsc[0] |= (1 << 20);
+
+ if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ tsc[0] |= (1 << 8);
+ tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7);
+ }
+
+ return (void *)tsc;
+}
+
+static void
+nv50_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ int i;
+
+ nv50->sampler_nr = nr;
+ for (i = 0; i < nv50->sampler_nr; i++)
+ nv50->sampler[i] = sampler[i];
+
+ nv50->dirty |= NV50_NEW_SAMPLER;
+}
+
+static void
+nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **pt)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ int i;
+
+ for (i = 0; i < nr; i++)
+ pipe_texture_reference((void *)&nv50->miptree[i], pt[i]);
+ for (i = nr; i < nv50->miptree_nr; i++)
+ pipe_texture_reference((void *)&nv50->miptree[i], NULL);
+
+ nv50->miptree_nr = nr;
+ nv50->dirty |= NV50_NEW_TEXTURE;
+}
+
+static void *
+nv50_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nouveau_stateobj *so = so_new(64, 0);
+ struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
+ struct nv50_rasterizer_stateobj *rso =
+ CALLOC_STRUCT(nv50_rasterizer_stateobj);
+
+ /*XXX: ignored
+ * - light_twosize
+ * - point_smooth
+ * - multisample
+ * - point_sprite / sprite_coord_mode
+ */
+
+ so_method(so, tesla, NV50TCL_SHADE_MODEL, 1);
+ so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT :
+ NV50TCL_SHADE_MODEL_SMOOTH);
+
+ so_method(so, tesla, NV50TCL_LINE_WIDTH, 1);
+ so_data (so, fui(cso->line_width));
+ so_method(so, tesla, NV50TCL_LINE_SMOOTH_ENABLE, 1);
+ so_data (so, cso->line_smooth ? 1 : 0);
+ if (cso->line_stipple_enable) {
+ so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1);
+ so_data (so, 1);
+ so_method(so, tesla, NV50TCL_LINE_STIPPLE_PATTERN, 1);
+ so_data (so, (cso->line_stipple_pattern << 8) |
+ cso->line_stipple_factor);
+ } else {
+ so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, tesla, NV50TCL_POINT_SIZE, 1);
+ so_data (so, fui(cso->point_size));
+
+ so_method(so, tesla, NV50TCL_POLYGON_MODE_FRONT, 3);
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ } else {
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ }
+ so_data(so, cso->poly_smooth ? 1 : 0);
+
+ so_method(so, tesla, NV50TCL_CULL_FACE_ENABLE, 3);
+ so_data (so, cso->cull_mode != PIPE_WINDING_NONE);
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ so_data(so, NV50TCL_FRONT_FACE_CCW);
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV50TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV50TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV50TCL_CULL_FACE_BACK);
+ break;
+ }
+ } else {
+ so_data(so, NV50TCL_FRONT_FACE_CW);
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV50TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV50TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV50TCL_CULL_FACE_BACK);
+ break;
+ }
+ }
+
+ so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, cso->poly_stipple_enable ? 1 : 0);
+
+ so_method(so, tesla, NV50TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+
+ if (cso->offset_cw || cso->offset_ccw) {
+ so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1);
+ so_data (so, fui(cso->offset_scale));
+ so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1);
+ so_data (so, fui(cso->offset_units));
+ }
+
+ rso->pipe = *cso;
+ so_ref(so, &rso->so);
+ return (void *)rso;
+}
+
+static void
+nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->rasterizer = hwcso;
+ nv50->dirty |= NV50_NEW_RASTERIZER;
+}
+
+static void
+nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_rasterizer_stateobj *rso = hwcso;
+
+ so_ref(NULL, &rso->so);
+ FREE(rso);
+}
+
+static void *
+nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
+ struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj);
+ struct nouveau_stateobj *so = so_new(64, 0);
+
+ so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1);
+ so_data (so, cso->depth.writemask ? 1 : 0);
+ if (cso->depth.enabled) {
+ so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1);
+ so_data (so, 1);
+ so_method(so, tesla, NV50TCL_DEPTH_TEST_FUNC, 1);
+ so_data (so, nvgl_comparison_op(cso->depth.func));
+ } else {
+ so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ /*XXX: yes, I know they're backwards.. header needs fixing */
+ if (cso->stencil[0].enabled) {
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5);
+ so_data (so, 1);
+ so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
+ so_data (so, nvgl_comparison_op(cso->stencil[0].func));
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3);
+ so_data (so, cso->stencil[0].ref_value);
+ so_data (so, cso->stencil[0].writemask);
+ so_data (so, cso->stencil[0].valuemask);
+ } else {
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ if (cso->stencil[1].enabled) {
+ so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8);
+ so_data (so, 1);
+ so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
+ so_data (so, nvgl_comparison_op(cso->stencil[1].func));
+ so_data (so, cso->stencil[1].ref_value);
+ so_data (so, cso->stencil[1].writemask);
+ so_data (so, cso->stencil[1].valuemask);
+ } else {
+ so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ if (cso->alpha.enabled) {
+ so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1);
+ so_data (so, 1);
+ so_method(so, tesla, NV50TCL_ALPHA_TEST_REF, 2);
+ so_data (so, fui(cso->alpha.ref_value));
+ so_data (so, nvgl_comparison_op(cso->alpha.func));
+ } else {
+ so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ zsa->pipe = *cso;
+ so_ref(so, &zsa->so);
+ return (void *)zsa;
+}
+
+static void
+nv50_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->zsa = hwcso;
+ nv50->dirty |= NV50_NEW_ZSA;
+}
+
+static void
+nv50_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_zsa_stateobj *zsa = hwcso;
+
+ so_ref(NULL, &zsa->so);
+ FREE(zsa);
+}
+
+static void *
+nv50_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv50_program *p = CALLOC_STRUCT(nv50_program);
+
+ p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+ p->type = PIPE_SHADER_VERTEX;
+ tgsi_scan_shader(p->pipe.tokens, &p->info);
+ return (void *)p;
+}
+
+static void
+nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->vertprog = hwcso;
+ nv50->dirty |= NV50_NEW_VERTPROG;
+}
+
+static void
+nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_program *p = hwcso;
+
+ nv50_program_destroy(nv50, p);
+ FREE((void*)p->pipe.tokens);
+ FREE(p);
+}
+
+static void *
+nv50_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv50_program *p = CALLOC_STRUCT(nv50_program);
+
+ p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+ p->type = PIPE_SHADER_FRAGMENT;
+ tgsi_scan_shader(p->pipe.tokens, &p->info);
+ return (void *)p;
+}
+
+static void
+nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->fragprog = hwcso;
+ nv50->dirty |= NV50_NEW_FRAGPROG;
+}
+
+static void
+nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_program *p = hwcso;
+
+ nv50_program_destroy(nv50, p);
+ FREE((void*)p->pipe.tokens);
+ FREE(p);
+}
+
+static void
+nv50_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->blend_colour = *bcol;
+ nv50->dirty |= NV50_NEW_BLEND_COLOUR;
+}
+
+static void
+nv50_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+}
+
+static void
+nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ if (shader == PIPE_SHADER_VERTEX) {
+ nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer;
+ nv50->dirty |= NV50_NEW_VERTPROG_CB;
+ } else
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer;
+ nv50->dirty |= NV50_NEW_FRAGPROG_CB;
+ }
+}
+
+static void
+nv50_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->framebuffer = *fb;
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+}
+
+static void
+nv50_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->stipple = *stipple;
+ nv50->dirty |= NV50_NEW_STIPPLE;
+}
+
+static void
+nv50_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->scissor = *s;
+ nv50->dirty |= NV50_NEW_SCISSOR;
+}
+
+static void
+nv50_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->viewport = *vpt;
+ nv50->dirty |= NV50_NEW_VIEWPORT;
+}
+
+static void
+nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count);
+ nv50->vtxbuf_nr = count;
+
+ nv50->dirty |= NV50_NEW_ARRAYS;
+}
+
+static void
+nv50_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *ve)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ memcpy(nv50->vtxelt, ve, sizeof(*ve) * count);
+ nv50->vtxelt_nr = count;
+
+ nv50->dirty |= NV50_NEW_ARRAYS;
+}
+
+void
+nv50_init_state_functions(struct nv50_context *nv50)
+{
+ nv50->pipe.create_blend_state = nv50_blend_state_create;
+ nv50->pipe.bind_blend_state = nv50_blend_state_bind;
+ nv50->pipe.delete_blend_state = nv50_blend_state_delete;
+
+ nv50->pipe.create_sampler_state = nv50_sampler_state_create;
+ nv50->pipe.bind_sampler_states = nv50_sampler_state_bind;
+ nv50->pipe.delete_sampler_state = nv50_sampler_state_delete;
+ nv50->pipe.set_sampler_textures = nv50_set_sampler_texture;
+
+ nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create;
+ nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind;
+ nv50->pipe.delete_rasterizer_state = nv50_rasterizer_state_delete;
+
+ nv50->pipe.create_depth_stencil_alpha_state =
+ nv50_depth_stencil_alpha_state_create;
+ nv50->pipe.bind_depth_stencil_alpha_state =
+ nv50_depth_stencil_alpha_state_bind;
+ nv50->pipe.delete_depth_stencil_alpha_state =
+ nv50_depth_stencil_alpha_state_delete;
+
+ nv50->pipe.create_vs_state = nv50_vp_state_create;
+ nv50->pipe.bind_vs_state = nv50_vp_state_bind;
+ nv50->pipe.delete_vs_state = nv50_vp_state_delete;
+
+ nv50->pipe.create_fs_state = nv50_fp_state_create;
+ nv50->pipe.bind_fs_state = nv50_fp_state_bind;
+ nv50->pipe.delete_fs_state = nv50_fp_state_delete;
+
+ nv50->pipe.set_blend_color = nv50_set_blend_color;
+ nv50->pipe.set_clip_state = nv50_set_clip_state;
+ nv50->pipe.set_constant_buffer = nv50_set_constant_buffer;
+ nv50->pipe.set_framebuffer_state = nv50_set_framebuffer_state;
+ nv50->pipe.set_polygon_stipple = nv50_set_polygon_stipple;
+ nv50->pipe.set_scissor_state = nv50_set_scissor_state;
+ nv50->pipe.set_viewport_state = nv50_set_viewport_state;
+
+ nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers;
+ nv50->pipe.set_vertex_elements = nv50_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
new file mode 100644
index 0000000000..948112ffa9
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_context.h"
+#include "nouveau/nouveau_stateobj.h"
+
+static void
+nv50_state_validate_fb(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so = so_new(128, 18);
+ struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+ unsigned i, w, h, gw = 0;
+
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ if (!gw) {
+ w = fb->cbufs[i]->width;
+ h = fb->cbufs[i]->height;
+ gw = 1;
+ } else {
+ assert(w == fb->cbufs[i]->width);
+ assert(h == fb->cbufs[i]->height);
+ }
+
+ so_method(so, tesla, NV50TCL_RT_HORIZ(i), 2);
+ so_data (so, fb->cbufs[i]->width);
+ so_data (so, fb->cbufs[i]->height);
+
+ so_method(so, tesla, NV50TCL_RT_ADDRESS_HIGH(i), 5);
+ so_reloc (so, nv50_surface_buffer(fb->cbufs[i]), fb->cbufs[i]->offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH |
+ NOUVEAU_BO_RDWR, 0, 0);
+ so_reloc (so, nv50_surface_buffer(fb->cbufs[i]), fb->cbufs[i]->offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
+ NOUVEAU_BO_RDWR, 0, 0);
+ switch (fb->cbufs[i]->format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ so_data(so, 0xcf);
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ so_data(so, 0xe8);
+ break;
+ default:
+ NOUVEAU_ERR("AIIII unknown format %s\n",
+ pf_name(fb->cbufs[i]->format));
+ so_data(so, 0xe6);
+ break;
+ }
+ so_data(so, 0x00000000);
+ so_data(so, 0x00000000);
+
+ so_method(so, tesla, 0x1224, 1);
+ so_data (so, 1);
+ }
+
+ if (fb->zsbuf) {
+ if (!gw) {
+ w = fb->zsbuf->width;
+ h = fb->zsbuf->height;
+ gw = 1;
+ } else {
+ assert(w == fb->zsbuf->width);
+ assert(h == fb->zsbuf->height);
+ }
+
+ so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5);
+ so_reloc (so, nv50_surface_buffer(fb->zsbuf), fb->zsbuf->offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH |
+ NOUVEAU_BO_RDWR, 0, 0);
+ so_reloc (so, nv50_surface_buffer(fb->zsbuf), fb->zsbuf->offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
+ NOUVEAU_BO_RDWR, 0, 0);
+ switch (fb->zsbuf->format) {
+ case PIPE_FORMAT_Z24S8_UNORM:
+ so_data(so, 0x16);
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
+ so_data(so, 0x15);
+ break;
+ default:
+ NOUVEAU_ERR("AIIII unknown format %s\n",
+ pf_name(fb->zsbuf->format));
+ so_data(so, 0x16);
+ break;
+ }
+ so_data(so, 0x00000000);
+ so_data(so, 0x00000000);
+
+ so_method(so, tesla, 0x1538, 1);
+ so_data (so, 1);
+ so_method(so, tesla, 0x1228, 3);
+ so_data (so, fb->zsbuf->width);
+ so_data (so, fb->zsbuf->height);
+ so_data (so, 0x00010001);
+ }
+
+ so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2);
+ so_data (so, w << 16);
+ so_data (so, h << 16);
+ so_method(so, tesla, 0x0e04, 2);
+ so_data (so, w << 16);
+ so_data (so, h << 16);
+ so_method(so, tesla, 0xdf8, 2);
+ so_data (so, 0);
+ so_data (so, h);
+
+ so_ref(so, &nv50->state.fb);
+}
+
+static void
+nv50_state_emit(struct nv50_context *nv50)
+{
+ struct nv50_screen *screen = nv50->screen;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ if (nv50->pctx_id != screen->cur_pctx) {
+ nv50->state.dirty |= 0xffffffff;
+ screen->cur_pctx = nv50->pctx_id;
+ }
+
+ if (nv50->state.dirty & NV50_NEW_FRAMEBUFFER)
+ so_emit(nvws, nv50->state.fb);
+ if (nv50->state.dirty & NV50_NEW_BLEND)
+ so_emit(nvws, nv50->state.blend);
+ if (nv50->state.dirty & NV50_NEW_ZSA)
+ so_emit(nvws, nv50->state.zsa);
+ if (nv50->state.dirty & NV50_NEW_VERTPROG)
+ so_emit(nvws, nv50->state.vertprog);
+ if (nv50->state.dirty & NV50_NEW_FRAGPROG)
+ so_emit(nvws, nv50->state.fragprog);
+ if (nv50->state.dirty & NV50_NEW_RASTERIZER)
+ so_emit(nvws, nv50->state.rast);
+ if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR)
+ so_emit(nvws, nv50->state.blend_colour);
+ if (nv50->state.dirty & NV50_NEW_STIPPLE)
+ so_emit(nvws, nv50->state.stipple);
+ if (nv50->state.dirty & NV50_NEW_SCISSOR)
+ so_emit(nvws, nv50->state.scissor);
+ if (nv50->state.dirty & NV50_NEW_VIEWPORT)
+ so_emit(nvws, nv50->state.viewport);
+ if (nv50->state.dirty & NV50_NEW_SAMPLER)
+ so_emit(nvws, nv50->state.tsc_upload);
+ if (nv50->state.dirty & NV50_NEW_TEXTURE)
+ so_emit(nvws, nv50->state.tic_upload);
+ if (nv50->state.dirty & NV50_NEW_ARRAYS) {
+ so_emit(nvws, nv50->state.vtxfmt);
+ so_emit(nvws, nv50->state.vtxbuf);
+ }
+ nv50->state.dirty = 0;
+
+ so_emit_reloc_markers(nvws, nv50->state.fb);
+ so_emit_reloc_markers(nvws, nv50->state.vertprog);
+ so_emit_reloc_markers(nvws, nv50->state.fragprog);
+ so_emit_reloc_markers(nvws, nv50->state.vtxbuf);
+ so_emit_reloc_markers(nvws, nv50->screen->static_init);
+}
+
+boolean
+nv50_state_validate(struct nv50_context *nv50)
+{
+ const struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so;
+ unsigned i;
+
+ for (i = 0; i < fb->nr_cbufs; i++)
+ fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED;
+
+ if (fb->zsbuf)
+ fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED;
+
+ if (nv50->dirty & NV50_NEW_FRAMEBUFFER)
+ nv50_state_validate_fb(nv50);
+
+ if (nv50->dirty & NV50_NEW_BLEND)
+ so_ref(nv50->blend->so, &nv50->state.blend);
+
+ if (nv50->dirty & NV50_NEW_ZSA)
+ so_ref(nv50->zsa->so, &nv50->state.zsa);
+
+ if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB))
+ nv50_vertprog_validate(nv50);
+
+ if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB))
+ nv50_fragprog_validate(nv50);
+
+ if (nv50->dirty & NV50_NEW_RASTERIZER)
+ so_ref(nv50->rasterizer->so, &nv50->state.rast);
+
+ if (nv50->dirty & NV50_NEW_BLEND_COLOUR) {
+ so = so_new(5, 0);
+ so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4);
+ so_data (so, fui(nv50->blend_colour.color[0]));
+ so_data (so, fui(nv50->blend_colour.color[1]));
+ so_data (so, fui(nv50->blend_colour.color[2]));
+ so_data (so, fui(nv50->blend_colour.color[3]));
+ so_ref(so, &nv50->state.blend_colour);
+ }
+
+ if (nv50->dirty & NV50_NEW_STIPPLE) {
+ so = so_new(33, 0);
+ so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
+ for (i = 0; i < 32; i++)
+ so_data(so, nv50->stipple.stipple[i]);
+ so_ref(so, &nv50->state.stipple);
+ }
+
+ if (nv50->dirty & (NV50_NEW_SCISSOR | NV50_NEW_RASTERIZER)) {
+ struct pipe_rasterizer_state *rast = &nv50->rasterizer->pipe;
+ struct pipe_scissor_state *s = &nv50->scissor;
+
+ if (nv50->state.scissor &&
+ (rast->scissor == 0 && nv50->state.scissor_enabled == 0))
+ goto scissor_uptodate;
+ nv50->state.scissor_enabled = rast->scissor;
+
+ so = so_new(3, 0);
+ so_method(so, tesla, 0x0ff4, 2);
+ if (nv50->state.scissor_enabled) {
+ so_data(so, ((s->maxx - s->minx) << 16) | s->minx);
+ so_data(so, ((s->maxy - s->miny) << 16) | s->miny);
+ } else {
+ so_data(so, (8192 << 16));
+ so_data(so, (8192 << 16));
+ }
+ so_ref(so, &nv50->state.scissor);
+ nv50->state.dirty |= NV50_NEW_SCISSOR;
+ }
+scissor_uptodate:
+
+ if (nv50->dirty & NV50_NEW_VIEWPORT) {
+ unsigned bypass;
+
+ if (!nv50->rasterizer->pipe.bypass_clipping)
+ bypass = 0;
+ else
+ bypass = 1;
+
+ if (nv50->state.viewport &&
+ (bypass || !(nv50->dirty & NV50_NEW_VIEWPORT)) &&
+ nv50->state.viewport_bypass == bypass)
+ goto viewport_uptodate;
+ nv50->state.viewport_bypass = bypass;
+
+ so = so_new(12, 0);
+ if (!bypass) {
+ so_method(so, tesla, NV50TCL_VIEWPORT_UNK1(0), 3);
+ so_data (so, fui(nv50->viewport.translate[0]));
+ so_data (so, fui(nv50->viewport.translate[1]));
+ so_data (so, fui(nv50->viewport.translate[2]));
+ so_method(so, tesla, NV50TCL_VIEWPORT_UNK0(0), 3);
+ so_data (so, fui(nv50->viewport.scale[0]));
+ so_data (so, fui(-nv50->viewport.scale[1]));
+ so_data (so, fui(nv50->viewport.scale[2]));
+ so_method(so, tesla, 0x192c, 1);
+ so_data (so, 1);
+ so_method(so, tesla, 0x0f90, 1);
+ so_data (so, 0);
+ } else {
+ so_method(so, tesla, 0x192c, 1);
+ so_data (so, 0);
+ so_method(so, tesla, 0x0f90, 1);
+ so_data (so, 1);
+ }
+
+ so_ref(so, &nv50->state.viewport);
+ }
+viewport_uptodate:
+
+ if (nv50->dirty & NV50_NEW_SAMPLER) {
+ int i;
+
+ so = so_new(nv50->sampler_nr * 8 + 3, 0);
+ so_method(so, tesla, 0x0f00, 1);
+ so_data (so, NV50_CB_TSC);
+ so_method(so, tesla, 0x40000f04, nv50->sampler_nr * 8);
+ for (i = 0; i < nv50->sampler_nr; i++)
+ so_datap (so, nv50->sampler[i], 8);
+ so_ref(so, &nv50->state.tsc_upload);
+ }
+
+ if (nv50->dirty & NV50_NEW_TEXTURE)
+ nv50_tex_validate(nv50);
+
+ if (nv50->dirty & NV50_NEW_ARRAYS)
+ nv50_vbo_validate(nv50);
+
+ nv50->state.dirty |= nv50->dirty;
+ nv50->dirty = 0;
+ nv50_state_emit(nv50);
+
+ return TRUE;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
new file mode 100644
index 0000000000..b0936518b0
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define __NOUVEAU_PUSH_H__
+#include <stdint.h>
+#include "nouveau/nouveau_pushbuf.h"
+#include "nv50_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/internal/p_winsys_screen.h"
+#include "pipe/p_inlines.h"
+
+#include "util/u_tile.h"
+
+static INLINE int
+nv50_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ return NV50_2D_DST_FORMAT_32BPP;
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ return NV50_2D_DST_FORMAT_24BPP;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ return NV50_2D_DST_FORMAT_16BPP;
+ case PIPE_FORMAT_A8_UNORM:
+ return NV50_2D_DST_FORMAT_8BPP;
+ default:
+ return -1;
+ }
+}
+
+static int
+nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
+{
+ struct nv50_miptree *mt = nv50_miptree(ps->texture);
+ struct nouveau_channel *chan = screen->nvws->channel;
+ struct nouveau_grobj *eng2d = screen->eng2d;
+ struct nouveau_bo *bo;
+ int format, mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT;
+ int flags = NOUVEAU_BO_VRAM | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD);
+
+ bo = screen->nvws->get_bo(nv50_miptree(ps->texture)->buffer);
+ if (!bo)
+ return 1;
+
+ format = nv50_format(ps->format);
+ if (format < 0)
+ return 1;
+
+ if (!bo->tiled) {
+ BEGIN_RING(chan, eng2d, mthd, 2);
+ OUT_RING (chan, format);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, eng2d, mthd + 0x14, 5);
+ OUT_RING (chan, mt->level[0].pitch);
+ OUT_RING (chan, ps->width);
+ OUT_RING (chan, ps->height);
+ OUT_RELOCh(chan, bo, ps->offset, flags);
+ OUT_RELOCl(chan, bo, ps->offset, flags);
+ } else {
+ BEGIN_RING(chan, eng2d, mthd, 5);
+ OUT_RING (chan, format);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, eng2d, mthd + 0x18, 4);
+ OUT_RING (chan, ps->width);
+ OUT_RING (chan, ps->height);
+ OUT_RELOCh(chan, bo, ps->offset, flags);
+ OUT_RELOCl(chan, bo, ps->offset, flags);
+ }
+
+#if 0
+ if (dst) {
+ BEGIN_RING(chan, eng2d, NV50_2D_CLIP_X, 4);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, surf->width);
+ OUT_RING (chan, surf->height);
+ }
+#endif
+
+ return 0;
+}
+
+int
+nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst,
+ int dx, int dy, struct pipe_surface *src, int sx, int sy,
+ int w, int h)
+{
+ struct nouveau_channel *chan = screen->nvws->channel;
+ struct nouveau_grobj *eng2d = screen->eng2d;
+ int ret;
+
+ WAIT_RING (chan, 32);
+
+ ret = nv50_surface_set(screen, dst, 1);
+ if (ret)
+ return ret;
+
+ ret = nv50_surface_set(screen, src, 0);
+ if (ret)
+ return ret;
+
+ BEGIN_RING(chan, eng2d, 0x088c, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, eng2d, NV50_2D_BLIT_DST_X, 4);
+ OUT_RING (chan, dx);
+ OUT_RING (chan, dy);
+ OUT_RING (chan, w);
+ OUT_RING (chan, h);
+ BEGIN_RING(chan, eng2d, 0x08c0, 4);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, eng2d, 0x08d0, 4);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, sx);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, sy);
+
+ return 0;
+}
+
+static void
+nv50_surface_copy(struct pipe_context *pipe, boolean flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv50_context *nv50 = (struct nv50_context *)pipe;
+ struct nv50_screen *screen = nv50->screen;
+
+ assert(src->format == dest->format);
+
+ if (flip) {
+ desty += height;
+ while (height--) {
+ nv50_surface_do_copy(screen, dest, destx, desty--, src,
+ srcx, srcy++, width, 1);
+ }
+ } else {
+ nv50_surface_do_copy(screen, dest, destx, desty, src, srcx,
+ srcy, width, height);
+ }
+}
+
+static void
+nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv50_context *nv50 = (struct nv50_context *)pipe;
+ struct nv50_screen *screen = nv50->screen;
+ struct nouveau_channel *chan = screen->nvws->channel;
+ struct nouveau_grobj *eng2d = screen->eng2d;
+ int format, ret;
+
+ format = nv50_format(dest->format);
+ if (format < 0)
+ return;
+
+ WAIT_RING (chan, 32);
+
+ ret = nv50_surface_set(screen, dest, 1);
+ if (ret)
+ return;
+
+ BEGIN_RING(chan, eng2d, 0x0580, 3);
+ OUT_RING (chan, 4);
+ OUT_RING (chan, format);
+ OUT_RING (chan, value);
+ BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4);
+ OUT_RING (chan, destx);
+ OUT_RING (chan, desty);
+ OUT_RING (chan, width);
+ OUT_RING (chan, height);
+}
+
+void
+nv50_init_surface_functions(struct nv50_context *nv50)
+{
+ nv50->pipe.surface_copy = nv50_surface_copy;
+ nv50->pipe.surface_fill = nv50_surface_fill;
+}
+
+
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
new file mode 100644
index 0000000000..31bf59675e
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_context.h"
+#include "nv50_texture.h"
+
+#include "nouveau/nouveau_stateobj.h"
+
+static int
+nv50_tex_construct(struct nouveau_stateobj *so, struct nv50_miptree *mt)
+{
+ switch (mt->base.format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_8_8_8_8);
+ break;
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_1_5_5_5);
+ break;
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_4_4_4_4);
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_5_6_5);
+ break;
+ case PIPE_FORMAT_L8_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_8);
+ break;
+ case PIPE_FORMAT_A8_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_ZERO | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_ZERO | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_ZERO | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_8);
+ break;
+ case PIPE_FORMAT_I8_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_8);
+ break;
+ case PIPE_FORMAT_A8L8_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C1 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_8_8);
+ break;
+ case PIPE_FORMAT_DXT1_RGB:
+ so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_DXT1);
+ break;
+ case PIPE_FORMAT_DXT1_RGBA:
+ so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_DXT1);
+ break;
+ case PIPE_FORMAT_DXT3_RGBA:
+ so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_DXT3);
+ break;
+ case PIPE_FORMAT_DXT5_RGBA:
+ so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_DXT5);
+ break;
+ default:
+ return 1;
+ }
+
+ so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
+ NOUVEAU_BO_RD, 0, 0);
+ so_data (so, 0xd0005000);
+ so_data (so, 0x00300000);
+ so_data (so, mt->base.width[0]);
+ so_data (so, (mt->base.depth[0] << 16) | mt->base.height[0]);
+ so_data (so, 0x03000000);
+ so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH |
+ NOUVEAU_BO_RD, 0, 0);
+
+ return 0;
+}
+
+void
+nv50_tex_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so;
+ int unit;
+
+ so = so_new(nv50->miptree_nr * 8 + 3, nv50->miptree_nr * 2);
+ so_method(so, tesla, 0x0f00, 1);
+ so_data (so, NV50_CB_TIC);
+ so_method(so, tesla, 0x40000f04, nv50->miptree_nr * 8);
+ for (unit = 0; unit < nv50->miptree_nr; unit++) {
+ struct nv50_miptree *mt = nv50->miptree[unit];
+
+ if (nv50_tex_construct(so, mt)) {
+ NOUVEAU_ERR("failed tex validate\n");
+ so_ref(NULL, &so);
+ return;
+ }
+ }
+
+ so_ref(so, &nv50->state.tic_upload);
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h
new file mode 100644
index 0000000000..aca622c73b
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_texture.h
@@ -0,0 +1,129 @@
+#ifndef __NV50_TEXTURE_H__
+#define __NV50_TEXTURE_H__
+
+/* It'd be really nice to have these in nouveau_class.h generated by
+ * renouveau like the rest of the object header - but not sure it can
+ * handle non-object stuff nicely - need to look into it.
+ */
+
+/* Texture image control block */
+#define NV50TIC_0_0_MAPA_MASK 0x38000000
+#define NV50TIC_0_0_MAPA_ZERO 0x00000000
+#define NV50TIC_0_0_MAPA_C0 0x10000000
+#define NV50TIC_0_0_MAPA_C1 0x18000000
+#define NV50TIC_0_0_MAPA_C2 0x20000000
+#define NV50TIC_0_0_MAPA_C3 0x28000000
+#define NV50TIC_0_0_MAPA_ONE 0x38000000
+#define NV50TIC_0_0_MAPR_MASK 0x07000000
+#define NV50TIC_0_0_MAPR_ZERO 0x00000000
+#define NV50TIC_0_0_MAPR_C0 0x02000000
+#define NV50TIC_0_0_MAPR_C1 0x03000000
+#define NV50TIC_0_0_MAPR_C2 0x04000000
+#define NV50TIC_0_0_MAPR_C3 0x05000000
+#define NV50TIC_0_0_MAPR_ONE 0x07000000
+#define NV50TIC_0_0_MAPG_MASK 0x00e00000
+#define NV50TIC_0_0_MAPG_ZERO 0x00000000
+#define NV50TIC_0_0_MAPG_C0 0x00400000
+#define NV50TIC_0_0_MAPG_C1 0x00600000
+#define NV50TIC_0_0_MAPG_C2 0x00800000
+#define NV50TIC_0_0_MAPG_C3 0x00a00000
+#define NV50TIC_0_0_MAPG_ONE 0x00e00000
+#define NV50TIC_0_0_MAPB_MASK 0x001c0000
+#define NV50TIC_0_0_MAPB_ZERO 0x00000000
+#define NV50TIC_0_0_MAPB_C0 0x00080000
+#define NV50TIC_0_0_MAPB_C1 0x000c0000
+#define NV50TIC_0_0_MAPB_C2 0x00100000
+#define NV50TIC_0_0_MAPB_C3 0x00140000
+#define NV50TIC_0_0_MAPB_ONE 0x001c0000
+#define NV50TIC_0_0_TYPEA_MASK 0x00038000
+#define NV50TIC_0_0_TYPEA_UNORM 0x00010000
+#define NV50TIC_0_0_TYPER_MASK 0x00007000
+#define NV50TIC_0_0_TYPER_UNORM 0x00002000
+#define NV50TIC_0_0_TYPEG_MASK 0x00000e00
+#define NV50TIC_0_0_TYPEG_UNORM 0x00000400
+#define NV50TIC_0_0_TYPEB_MASK 0x000001c0
+#define NV50TIC_0_0_TYPEB_UNORM 0x00000080
+#define NV50TIC_0_0_FMT_MASK 0x0000003c
+#define NV50TIC_0_0_FMT_8_8_8_8 0x00000008
+#define NV50TIC_0_0_FMT_4_4_4_4 0x00000012
+#define NV50TIC_0_0_FMT_1_5_5_5 0x00000013
+#define NV50TIC_0_0_FMT_5_6_5 0x00000015
+#define NV50TIC_0_0_FMT_8_8 0x00000018
+#define NV50TIC_0_0_FMT_8 0x0000001d
+#define NV50TIC_0_0_FMT_DXT1 0x00000024
+#define NV50TIC_0_0_FMT_DXT3 0x00000025
+#define NV50TIC_0_0_FMT_DXT5 0x00000026
+
+#define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff
+#define NV50TIC_0_1_OFFSET_LOW_SHIFT 0
+
+#define NV50TIC_0_2_UNKNOWN_MASK 0xffffffff
+
+#define NV50TIC_0_3_UNKNOWN_MASK 0xffffffff
+
+#define NV50TIC_0_4_WIDTH_MASK 0x0000ffff
+#define NV50TIC_0_4_WIDTH_SHIFT 0
+
+#define NV50TIC_0_5_DEPTH_MASK 0xffff0000
+#define NV50TIC_0_5_DEPTH_SHIFT 16
+#define NV50TIC_0_5_HEIGHT_MASK 0x0000ffff
+#define NV50TIC_0_5_HEIGHT_SHIFT 0
+
+#define NV50TIC_0_6_UNKNOWN_MASK 0xffffffff
+
+#define NV50TIC_0_7_OFFSET_HIGH_MASK 0xffffffff
+#define NV50TIC_0_7_OFFSET_HIGH_SHIFT 0
+
+/* Texture sampler control block */
+#define NV50TSC_1_0_WRAPS_MASK 0x00000007
+#define NV50TSC_1_0_WRAPS_REPEAT 0x00000000
+#define NV50TSC_1_0_WRAPS_MIRROR_REPEAT 0x00000001
+#define NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE 0x00000002
+#define NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER 0x00000003
+#define NV50TSC_1_0_WRAPS_CLAMP 0x00000004
+#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE 0x00000005
+#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER 0x00000006
+#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP 0x00000007
+#define NV50TSC_1_0_WRAPT_MASK 0x00000038
+#define NV50TSC_1_0_WRAPT_REPEAT 0x00000000
+#define NV50TSC_1_0_WRAPT_MIRROR_REPEAT 0x00000008
+#define NV50TSC_1_0_WRAPT_CLAMP_TO_EDGE 0x00000010
+#define NV50TSC_1_0_WRAPT_CLAMP_TO_BORDER 0x00000018
+#define NV50TSC_1_0_WRAPT_CLAMP 0x00000020
+#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_EDGE 0x00000028
+#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_BORDER 0x00000030
+#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP 0x00000038
+#define NV50TSC_1_0_WRAPR_MASK 0x000001c0
+#define NV50TSC_1_0_WRAPR_REPEAT 0x00000000
+#define NV50TSC_1_0_WRAPR_MIRROR_REPEAT 0x00000040
+#define NV50TSC_1_0_WRAPR_CLAMP_TO_EDGE 0x00000080
+#define NV50TSC_1_0_WRAPR_CLAMP_TO_BORDER 0x000000c0
+#define NV50TSC_1_0_WRAPR_CLAMP 0x00000100
+#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_EDGE 0x00000140
+#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_BORDER 0x00000180
+#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP 0x000001c0
+
+#define NV50TSC_1_1_MAGF_MASK 0x00000003
+#define NV50TSC_1_1_MAGF_NEAREST 0x00000001
+#define NV50TSC_1_1_MAGF_LINEAR 0x00000002
+#define NV50TSC_1_1_MINF_MASK 0x00000030
+#define NV50TSC_1_1_MINF_NEAREST 0x00000010
+#define NV50TSC_1_1_MINF_LINEAR 0x00000020
+#define NV50TSC_1_1_MIPF_MASK 0x000000c0
+#define NV50TSC_1_1_MIPF_NONE 0x00000040
+#define NV50TSC_1_1_MIPF_NEAREST 0x00000080
+#define NV50TSC_1_1_MIPF_LINEAR 0x000000c0
+
+#define NV50TSC_1_2_UNKNOWN_MASK 0xffffffff
+
+#define NV50TSC_1_3_UNKNOWN_MASK 0xffffffff
+
+#define NV50TSC_1_4_UNKNOWN_MASK 0xffffffff
+
+#define NV50TSC_1_5_UNKNOWN_MASK 0xffffffff
+
+#define NV50TSC_1_6_UNKNOWN_MASK 0xffffffff
+
+#define NV50TSC_1_7_UNKNOWN_MASK 0xffffffff
+
+#endif
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
new file mode 100644
index 0000000000..a00c999510
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -0,0 +1,216 @@
+
+#include "pipe/p_context.h"
+#include "pipe/p_inlines.h"
+
+#include "nv50_context.h"
+
+struct nv50_transfer {
+ struct pipe_transfer base;
+ struct pipe_buffer *buffer;
+ struct nv50_miptree_level *level;
+ int level_pitch;
+ int level_width;
+ int level_height;
+ int level_x;
+ int level_y;
+};
+
+static void
+nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct pipe_buffer *src,
+ int src_pitch, int sx, int sy, int sw, int sh,
+ struct pipe_buffer *dst, int dst_pitch, int dx, int dy,
+ int dw, int dh, int cpp, int width, int height,
+ unsigned src_reloc, unsigned dst_reloc)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ struct nouveau_winsys *nvws = screen->nvws;
+ struct nouveau_channel *chan = nvws->channel;
+ struct nouveau_grobj *m2mf = screen->m2mf;
+ struct nouveau_bo *src_bo = nvws->get_bo(src);
+ struct nouveau_bo *dst_bo = nvws->get_bo(dst);
+ unsigned src_offset = 0, dst_offset = 0;
+
+ src_reloc |= NOUVEAU_BO_RD;
+ dst_reloc |= NOUVEAU_BO_WR;
+
+ WAIT_RING (chan, 14);
+
+ if (!src_bo->tiled) {
+ BEGIN_RING(chan, m2mf, 0x0200, 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, m2mf, 0x0314, 1);
+ OUT_RING (chan, src_pitch);
+ src_offset = (sy * src_pitch) + (sx * cpp);
+ } else {
+ BEGIN_RING(chan, m2mf, 0x0200, 6);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, sw * cpp);
+ OUT_RING (chan, sh);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ }
+
+ if (!dst_bo->tiled) {
+ BEGIN_RING(chan, m2mf, 0x021c, 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, m2mf, 0x0318, 1);
+ OUT_RING (chan, dst_pitch);
+ dst_offset = (dy * dst_pitch) + (dx * cpp);
+ } else {
+ BEGIN_RING(chan, m2mf, 0x021c, 6);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, dw * cpp);
+ OUT_RING (chan, dh);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ }
+
+ while (height) {
+ int line_count = height > 2047 ? 2047 : height;
+
+ WAIT_RING (chan, 15);
+ BEGIN_RING(chan, m2mf, 0x0238, 2);
+ OUT_RELOCh(chan, src_bo, src_offset, src_reloc);
+ OUT_RELOCh(chan, dst_bo, dst_offset, dst_reloc);
+ BEGIN_RING(chan, m2mf, 0x030c, 2);
+ OUT_RELOCl(chan, src_bo, src_offset, src_reloc);
+ OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc);
+ if (src_bo->tiled) {
+ BEGIN_RING(chan, m2mf, 0x0218, 1);
+ OUT_RING (chan, (dy << 16) | sx);
+ } else {
+ src_offset += (line_count * src_pitch);
+ }
+ if (dst_bo->tiled) {
+ BEGIN_RING(chan, m2mf, 0x0234, 1);
+ OUT_RING (chan, (sy << 16) | dx);
+ } else {
+ dst_offset += (line_count * dst_pitch);
+ }
+ BEGIN_RING(chan, m2mf, 0x031c, 4);
+ OUT_RING (chan, width * cpp);
+ OUT_RING (chan, line_count);
+ OUT_RING (chan, 0x00000101);
+ OUT_RING (chan, 0);
+ FIRE_RING (chan);
+
+ height -= line_count;
+ sy += line_count;
+ dy += line_count;
+ }
+}
+
+static struct pipe_transfer *
+nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ enum pipe_transfer_usage usage,
+ unsigned x, unsigned y, unsigned w, unsigned h)
+{
+ struct nv50_miptree *mt = nv50_miptree(pt);
+ struct nv50_miptree_level *lvl = &mt->level[level];
+ struct nv50_transfer *tx;
+ unsigned image = 0;
+
+ if (pt->target == PIPE_TEXTURE_CUBE)
+ image = face;
+ else
+ if (pt->target == PIPE_TEXTURE_3D)
+ image = zslice;
+
+ tx = CALLOC_STRUCT(nv50_transfer);
+ if (!tx)
+ return NULL;
+
+ tx->base.refcount = 1;
+ pipe_texture_reference(&tx->base.texture, pt);
+ tx->base.format = pt->format;
+ tx->base.width = w;
+ tx->base.height = h;
+ tx->base.block = pt->block;
+ tx->base.nblocksx = pt->nblocksx[level];
+ tx->base.nblocksy = pt->nblocksy[level];
+ tx->base.stride = (w * pt->block.size);
+ tx->base.usage = usage;
+
+ tx->level = lvl;
+ tx->level_pitch = lvl->pitch;
+ tx->level_width = mt->base.width[level];
+ tx->level_height = mt->base.height[level];
+ tx->level_x = x;
+ tx->level_y = y;
+ tx->buffer =
+ pipe_buffer_create(pscreen, 0, NOUVEAU_BUFFER_USAGE_TRANSFER,
+ w * tx->base.block.size * h);
+
+ if (usage != PIPE_TRANSFER_WRITE) {
+ nv50_transfer_rect_m2mf(pscreen, mt->buffer, tx->level_pitch,
+ x, y, tx->level_width, tx->level_height,
+ tx->buffer, tx->base.stride, 0, 0,
+ tx->base.width, tx->base.height,
+ tx->base.block.size, w, h,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
+ NOUVEAU_BO_GART);
+ }
+
+ return &tx->base;
+}
+
+static void
+nv50_transfer_del(struct pipe_screen *pscreen, struct pipe_transfer **pptx)
+{
+ struct pipe_transfer *ptx = *pptx;
+ struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
+ struct nv50_miptree *mt = nv50_miptree(ptx->texture);
+
+ *pptx = NULL;
+ if (--ptx->refcount)
+ return;
+
+ if (ptx->usage != PIPE_TRANSFER_READ) {
+ nv50_transfer_rect_m2mf(pscreen, tx->buffer, tx->base.stride,
+ 0, 0, tx->base.width, tx->base.height,
+ mt->buffer, tx->level_pitch,
+ tx->level_x, tx->level_y,
+ tx->level_width, tx->level_height,
+ tx->base.block.size, tx->base.width,
+ tx->base.height, NOUVEAU_BO_GART,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
+ }
+
+ pipe_buffer_reference(pscreen, &tx->buffer, NULL);
+ pipe_texture_reference(&ptx->texture, NULL);
+ FREE(ptx);
+}
+
+static void *
+nv50_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
+{
+ struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
+ unsigned flags = 0;
+
+ if (ptx->usage & PIPE_TRANSFER_WRITE)
+ flags |= PIPE_BUFFER_USAGE_CPU_WRITE;
+ if (ptx->usage & PIPE_TRANSFER_READ)
+ flags |= PIPE_BUFFER_USAGE_CPU_READ;
+
+ return pipe_buffer_map(pscreen, tx->buffer, flags);
+}
+
+static void
+nv50_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
+{
+ struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
+
+ pipe_buffer_unmap(pscreen, tx->buffer);
+}
+
+void
+nv50_transfer_init_screen_functions(struct pipe_screen *pscreen)
+{
+ pscreen->get_tex_transfer = nv50_transfer_new;
+ pscreen->tex_transfer_release = nv50_transfer_del;
+ pscreen->transfer_map = nv50_transfer_map;
+ pscreen->transfer_unmap = nv50_transfer_unmap;
+}
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
new file mode 100644
index 0000000000..08d751dddb
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv50_context.h"
+
+static INLINE unsigned
+nv50_prim(unsigned mode)
+{
+ switch (mode) {
+ case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
+ case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
+ case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
+ case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
+ case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
+ case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
+ case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
+ case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
+ case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
+ default:
+ break;
+ }
+
+ NOUVEAU_ERR("invalid primitive type %d\n", mode);
+ return NV50TCL_VERTEX_BEGIN_POINTS;
+}
+
+boolean
+nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
+ unsigned count)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+
+ nv50_state_validate(nv50);
+
+ BEGIN_RING(chan, tesla, 0x142c, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, tesla, 0x142c, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, tesla, 0x1440, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, tesla, 0x1334, 1);
+ OUT_RING (chan, 0);
+
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+ OUT_RING (chan, nv50_prim(mode));
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
+ OUT_RING (chan, start);
+ OUT_RING (chan, count);
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
+ OUT_RING (chan, 0);
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+static INLINE void
+nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
+ unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+
+ map += start;
+
+ if (count & 1) {
+ BEGIN_RING(chan, tesla, 0x15e8, 1);
+ OUT_RING (chan, map[0]);
+ map++;
+ count--;
+ }
+
+ while (count) {
+ unsigned nr = count > 2046 ? 2046 : count;
+ int i;
+
+ BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1);
+ for (i = 0; i < nr; i += 2)
+ OUT_RING (chan, (map[1] << 16) | map[0]);
+
+ count -= nr;
+ map += nr;
+ }
+}
+
+static INLINE void
+nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
+ unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+
+ map += start;
+
+ if (count & 1) {
+ BEGIN_RING(chan, tesla, 0x15e8, 1);
+ OUT_RING (chan, map[0]);
+ map++;
+ count--;
+ }
+
+ while (count) {
+ unsigned nr = count > 2046 ? 2046 : count;
+ int i;
+
+ BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1);
+ for (i = 0; i < nr; i += 2)
+ OUT_RING (chan, (map[1] << 16) | map[0]);
+
+ count -= nr;
+ map += nr;
+ }
+}
+
+static INLINE void
+nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint8_t *map,
+ unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+
+ map += start;
+
+ while (count) {
+ unsigned nr = count > 2047 ? 2047 : count;
+
+ BEGIN_RING(chan, tesla, 0x400015e8, nr);
+ OUT_RINGp (chan, map, nr);
+
+ count -= nr;
+ map += nr;
+ }
+}
+
+boolean
+nv50_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer, unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct pipe_winsys *ws = pipe->winsys;
+ void *map = ws->buffer_map(ws, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
+
+ nv50_state_validate(nv50);
+
+ BEGIN_RING(chan, tesla, 0x142c, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, tesla, 0x142c, 1);
+ OUT_RING (chan, 0);
+
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+ OUT_RING (chan, nv50_prim(mode));
+ switch (indexSize) {
+ case 1:
+ nv50_draw_elements_inline_u08(nv50, map, start, count);
+ break;
+ case 2:
+ nv50_draw_elements_inline_u16(nv50, map, start, count);
+ break;
+ case 4:
+ nv50_draw_elements_inline_u32(nv50, map, start, count);
+ break;
+ default:
+ assert(0);
+ }
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
+ OUT_RING (chan, 0);
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+void
+nv50_vbo_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *vtxbuf, *vtxfmt;
+ int i;
+
+ vtxbuf = so_new(nv50->vtxelt_nr * 4, nv50->vtxelt_nr * 2);
+ vtxfmt = so_new(nv50->vtxelt_nr + 1, 0);
+ so_method(vtxfmt, tesla, 0x1ac0, nv50->vtxelt_nr);
+
+ for (i = 0; i < nv50->vtxelt_nr; i++) {
+ struct pipe_vertex_element *ve = &nv50->vtxelt[i];
+ struct pipe_vertex_buffer *vb =
+ &nv50->vtxbuf[ve->vertex_buffer_index];
+
+ switch (ve->src_format) {
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ so_data(vtxfmt, 0x7e080000 | i);
+ break;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ so_data(vtxfmt, 0x7e100000 | i);
+ break;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ so_data(vtxfmt, 0x7e200000 | i);
+ break;
+ case PIPE_FORMAT_R32_FLOAT:
+ so_data(vtxfmt, 0x7e900000 | i);
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ so_data(vtxfmt, 0x24500000 | i);
+ break;
+ default:
+ {
+ NOUVEAU_ERR("invalid vbo format %s\n",
+ pf_name(ve->src_format));
+ assert(0);
+ return;
+ }
+ }
+
+ so_method(vtxbuf, tesla, 0x900 + (i * 16), 3);
+ so_data (vtxbuf, 0x20000000 | vb->stride);
+ so_reloc (vtxbuf, vb->buffer, vb->buffer_offset +
+ ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (vtxbuf, vb->buffer, vb->buffer_offset +
+ ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ }
+
+ so_ref (vtxfmt, &nv50->state.vtxfmt);
+ so_ref (vtxbuf, &nv50->state.vtxbuf);
+}
+
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
new file mode 100644
index 0000000000..9b7524b523
--- /dev/null
+++ b/src/gallium/drivers/r300/Makefile
@@ -0,0 +1,20 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = r300
+
+C_SOURCES = \
+ r300_chipset.c \
+ r300_clear.c \
+ r300_context.c \
+ r300_emit.c \
+ r300_flush.c \
+ r300_screen.c \
+ r300_state.c \
+ r300_state_derived.c \
+ r300_state_shader.c \
+ r300_surface.c \
+ r300_swtcl_emit.c \
+ r300_texture.c
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript
new file mode 100644
index 0000000000..18684c3e7f
--- /dev/null
+++ b/src/gallium/drivers/r300/SConscript
@@ -0,0 +1,17 @@
+Import('*')
+
+env = env.Clone()
+
+r300 = env.ConvenienceLibrary(
+ target = 'r300',
+ source = [
+ 'r300_blit.c',
+ 'r300_clear.c',
+ 'r300_context.c',
+ 'r300_screen.c',
+ 'r300_state.c',
+ 'r300_surface.c',
+ ])
+
+Export('r300')
+
diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
new file mode 100644
index 0000000000..196537a432
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_chipset.h"
+#include "util/u_debug.h"
+
+/* r300_chipset: A file all to itself for deducing the various properties of
+ * Radeons. */
+
+/* Parse a PCI ID and fill an r300_capabilities struct with information. */
+void r300_parse_chipset(struct r300_capabilities* caps)
+{
+ /* Reasonable defaults */
+ caps->has_tcl = TRUE;
+ caps->is_r500 = FALSE;
+ caps->num_vert_fpus = 4;
+
+
+ /* Note: These are not ordered by PCI ID. I leave that task to GCC,
+ * which will perform the ordering while collating jump tables. Instead,
+ * I've tried to group them according to capabilities and age. */
+ switch (caps->pci_id) {
+ case 0x4144:
+ caps->family = CHIP_FAMILY_R300;
+ break;
+
+ case 0x4145:
+ case 0x4146:
+ case 0x4147:
+ case 0x4E44:
+ case 0x4E45:
+ case 0x4E46:
+ case 0x4E47:
+ caps->family = CHIP_FAMILY_R300;
+ break;
+
+ case 0x4150:
+ case 0x4151:
+ case 0x4152:
+ case 0x4153:
+ case 0x4154:
+ case 0x4155:
+ case 0x4156:
+ case 0x4E50:
+ case 0x4E51:
+ case 0x4E52:
+ case 0x4E53:
+ case 0x4E54:
+ case 0x4E56:
+ caps->family = CHIP_FAMILY_RV350;
+ break;
+
+ case 0x4148:
+ case 0x4149:
+ case 0x414A:
+ case 0x414B:
+ case 0x4E48:
+ case 0x4E49:
+ case 0x4E4B:
+ caps->family = CHIP_FAMILY_R350;
+ break;
+
+ case 0x4E4A:
+ caps->family = CHIP_FAMILY_R360;
+ break;
+
+ case 0x5460:
+ case 0x5462:
+ case 0x5464:
+ case 0x5B60:
+ case 0x5B62:
+ case 0x5B63:
+ case 0x5B64:
+ case 0x5B65:
+ caps->family = CHIP_FAMILY_RV370;
+ break;
+
+ case 0x3150:
+ case 0x3152:
+ case 0x3154:
+ case 0x3E50:
+ case 0x3E54:
+ caps->family = CHIP_FAMILY_RV380;
+ break;
+
+ case 0x4A48:
+ case 0x4A49:
+ case 0x4A4A:
+ case 0x4A4B:
+ case 0x4A4C:
+ case 0x4A4D:
+ case 0x4A4E:
+ case 0x4A4F:
+ case 0x4A50:
+ case 0x4A54:
+ caps->family = CHIP_FAMILY_R420;
+ caps->num_vert_fpus = 6;
+ break;
+
+ case 0x5548:
+ case 0x5549:
+ case 0x554A:
+ case 0x554B:
+ case 0x5550:
+ case 0x5551:
+ case 0x5552:
+ case 0x5554:
+ case 0x5D57:
+ caps->family = CHIP_FAMILY_R423;
+ caps->num_vert_fpus = 6;
+ break;
+
+ case 0x554C:
+ case 0x554D:
+ case 0x554E:
+ case 0x554F:
+ case 0x5D48:
+ case 0x5D49:
+ case 0x5D4A:
+ caps->family = CHIP_FAMILY_R430;
+ caps->num_vert_fpus = 6;
+ break;
+
+ case 0x5D4C:
+ case 0x5D4D:
+ case 0x5D4E:
+ case 0x5D4F:
+ case 0x5D50:
+ case 0x5D52:
+ caps->family = CHIP_FAMILY_R480;
+ caps->num_vert_fpus = 6;
+ break;
+
+ case 0x4B49:
+ case 0x4B4A:
+ case 0x4B4B:
+ case 0x4B4C:
+ caps->family = CHIP_FAMILY_R481;
+ caps->num_vert_fpus = 6;
+ break;
+
+ case 0x5E4C:
+ case 0x5E4F:
+ case 0x564A:
+ case 0x564B:
+ case 0x564F:
+ case 0x5652:
+ case 0x5653:
+ case 0x5657:
+ case 0x5E48:
+ case 0x5E4A:
+ case 0x5E4B:
+ case 0x5E4D:
+ caps->family = CHIP_FAMILY_RV410;
+ caps->num_vert_fpus = 6;
+ break;
+
+ case 0x5954:
+ case 0x5955:
+ caps->family = CHIP_FAMILY_RS480;
+ caps->has_tcl = FALSE;
+ break;
+
+ case 0x5974:
+ case 0x5975:
+ caps->family = CHIP_FAMILY_RS482;
+ caps->has_tcl = FALSE;
+ break;
+
+ case 0x5A41:
+ case 0x5A42:
+ caps->family = CHIP_FAMILY_RS400;
+ caps->has_tcl = FALSE;
+ break;
+
+ case 0x5A61:
+ case 0x5A62:
+ caps->family = CHIP_FAMILY_RC410;
+ caps->has_tcl = FALSE;
+ break;
+
+ case 0x791E:
+ case 0x791F:
+ caps->family = CHIP_FAMILY_RS690;
+ caps->has_tcl = FALSE;
+ break;
+
+ case 0x796C:
+ case 0x796D:
+ case 0x796E:
+ case 0x796F:
+ caps->family = CHIP_FAMILY_RS740;
+ caps->has_tcl = FALSE;
+ break;
+
+ case 0x7100:
+ case 0x7101:
+ case 0x7102:
+ case 0x7103:
+ case 0x7104:
+ case 0x7105:
+ case 0x7106:
+ case 0x7108:
+ case 0x7109:
+ case 0x710A:
+ case 0x710B:
+ case 0x710C:
+ case 0x710E:
+ case 0x710F:
+ caps->family = CHIP_FAMILY_R520;
+ caps->num_vert_fpus = 8;
+ caps->is_r500 = TRUE;
+ break;
+
+ case 0x7140:
+ case 0x7141:
+ case 0x7142:
+ case 0x7143:
+ case 0x7144:
+ case 0x7145:
+ case 0x7146:
+ case 0x7147:
+ case 0x7149:
+ case 0x714A:
+ case 0x714B:
+ case 0x714C:
+ case 0x714D:
+ case 0x714E:
+ case 0x714F:
+ case 0x7151:
+ case 0x7152:
+ case 0x7153:
+ case 0x715E:
+ case 0x715F:
+ case 0x7180:
+ case 0x7181:
+ case 0x7183:
+ case 0x7186:
+ case 0x7187:
+ case 0x7188:
+ case 0x718A:
+ case 0x718B:
+ case 0x718C:
+ case 0x718D:
+ case 0x718F:
+ case 0x7193:
+ case 0x7196:
+ case 0x719B:
+ case 0x719F:
+ case 0x7200:
+ case 0x7210:
+ case 0x7211:
+ caps->family = CHIP_FAMILY_RV515;
+ caps->num_vert_fpus = 2;
+ caps->is_r500 = TRUE;
+ break;
+
+ case 0x71C0:
+ case 0x71C1:
+ case 0x71C2:
+ case 0x71C3:
+ case 0x71C4:
+ case 0x71C5:
+ case 0x71C6:
+ case 0x71C7:
+ case 0x71CD:
+ case 0x71CE:
+ case 0x71D2:
+ case 0x71D4:
+ case 0x71D5:
+ case 0x71D6:
+ case 0x71DA:
+ case 0x71DE:
+ caps->family = CHIP_FAMILY_RV530;
+ caps->num_vert_fpus = 5;
+ caps->is_r500 = TRUE;
+ break;
+
+ case 0x7240:
+ case 0x7243:
+ case 0x7244:
+ case 0x7245:
+ case 0x7246:
+ case 0x7247:
+ case 0x7248:
+ case 0x7249:
+ case 0x724A:
+ case 0x724B:
+ case 0x724C:
+ case 0x724D:
+ case 0x724E:
+ case 0x724F:
+ case 0x7284:
+ caps->family = CHIP_FAMILY_R580;
+ caps->num_vert_fpus = 8;
+ caps->is_r500 = TRUE;
+ break;
+
+ case 0x7280:
+ caps->family = CHIP_FAMILY_RV570;
+ caps->num_vert_fpus = 5;
+ caps->is_r500 = TRUE;
+ break;
+
+ case 0x7281:
+ case 0x7283:
+ case 0x7287:
+ case 0x7288:
+ case 0x7289:
+ case 0x728B:
+ case 0x728C:
+ case 0x7290:
+ case 0x7291:
+ case 0x7293:
+ case 0x7297:
+ caps->family = CHIP_FAMILY_RV560;
+ caps->num_vert_fpus = 5;
+ caps->is_r500 = TRUE;
+ break;
+
+ default:
+ debug_printf("r300: Warning: Unknown chipset 0x%x\n",
+ caps->pci_id);
+ break;
+ }
+
+ /* XXX SW TCL is broken so no forcing it off right now
+ caps->has_tcl = FALSE; */
+}
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
new file mode 100644
index 0000000000..a9cd372ec5
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_CHIPSET_H
+#define R300_CHIPSET_H
+
+#include "pipe/p_compiler.h"
+
+/* Structure containing all the possible information about a specific Radeon
+ * in the R3xx, R4xx, and R5xx families. */
+struct r300_capabilities {
+ /* PCI ID */
+ uint32_t pci_id;
+ /* Chipset family */
+ int family;
+ /* The number of vertex floating-point units */
+ int num_vert_fpus;
+ /* The number of fragment pipes */
+ int num_frag_pipes;
+ /* Whether or not TCL is physically present */
+ boolean has_tcl;
+ /* Whether or not this is an RV515 or newer; R500s have many differences
+ * that require extra consideration, compared to their R3xx cousins:
+ * - Extra bit of width and height on texture sizes
+ * - Blend color is split across two registers
+ * - Universal Shader (US) block used for fragment shaders */
+ boolean is_r500;
+};
+
+/* Enumerations for legibility and telling which card we're running on. */
+enum {
+ CHIP_FAMILY_R300 = 0,
+ CHIP_FAMILY_R350,
+ CHIP_FAMILY_R360,
+ CHIP_FAMILY_RV350,
+ CHIP_FAMILY_RV370,
+ CHIP_FAMILY_RV380,
+ CHIP_FAMILY_R420,
+ CHIP_FAMILY_R423,
+ CHIP_FAMILY_R430,
+ CHIP_FAMILY_R480,
+ CHIP_FAMILY_R481,
+ CHIP_FAMILY_RV410,
+ CHIP_FAMILY_RS400,
+ CHIP_FAMILY_RC410,
+ CHIP_FAMILY_RS480,
+ CHIP_FAMILY_RS482,
+ CHIP_FAMILY_RS690,
+ CHIP_FAMILY_RS740,
+ CHIP_FAMILY_RV515,
+ CHIP_FAMILY_R520,
+ CHIP_FAMILY_RV530,
+ CHIP_FAMILY_R580,
+ CHIP_FAMILY_RV560,
+ CHIP_FAMILY_RV570
+};
+
+void r300_parse_chipset(struct r300_capabilities* caps);
+
+#endif /* R300_CHIPSET_H */
diff --git a/src/gallium/drivers/r300/r300_clear.c b/src/gallium/drivers/r300/r300_clear.c
new file mode 100644
index 0000000000..fd28437aaa
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_clear.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_clear.h"
+
+/* This gets its own file because Intel's is in its own file.
+ * I assume there's a good reason. */
+void r300_clear(struct pipe_context* pipe,
+ struct pipe_surface* ps,
+ unsigned color)
+{
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, color);
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+} \ No newline at end of file
diff --git a/src/gallium/drivers/r300/r300_clear.h b/src/gallium/drivers/r300/r300_clear.h
new file mode 100644
index 0000000000..e24a0690c9
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_clear.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "pipe/p_context.h"
+
+void r300_clear(struct pipe_context* pipe,
+ struct pipe_surface* ps,
+ unsigned color);
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
new file mode 100644
index 0000000000..15a8751549
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_context.h"
+
+static boolean r300_draw_range_elements(struct pipe_context* pipe,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ int i;
+
+ if (r300->dirty_state) {
+ r300_emit_dirty_state(r300);
+ }
+
+ for (i = 0; i < r300->vertex_buffer_count; i++) {
+ void* buf = pipe_buffer_map(pipe->screen,
+ r300->vertex_buffers[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(r300->draw, i, buf);
+ }
+
+ if (indexBuffer) {
+ void* indices = pipe_buffer_map(pipe->screen, indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer_range(r300->draw, indexSize,
+ minIndex, maxIndex, indices);
+ } else {
+ draw_set_mapped_element_buffer(r300->draw, 0, NULL);
+ }
+
+ draw_set_mapped_constant_buffer(r300->draw,
+ r300->shader_constants[PIPE_SHADER_VERTEX].constants,
+ r300->shader_constants[PIPE_SHADER_VERTEX].user_count *
+ (sizeof(float) * 4));
+
+ /* Abandon all hope, ye who enter here. */
+ draw_arrays(r300->draw, mode, start, count);
+
+ for (i = 0; i < r300->vertex_buffer_count; i++) {
+ pipe_buffer_unmap(pipe->screen, r300->vertex_buffers[i].buffer);
+ draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
+ }
+
+ if (indexBuffer) {
+ pipe_buffer_unmap(pipe->screen, indexBuffer);
+ draw_set_mapped_element_buffer_range(r300->draw, 0, start,
+ start + count - 1, NULL);
+ }
+
+ return true;
+}
+
+static boolean r300_draw_elements(struct pipe_context* pipe,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize, unsigned mode,
+ unsigned start, unsigned count)
+{
+ return r300_draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0,
+ mode, start, count);
+}
+
+static boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
+ unsigned start, unsigned count)
+{
+ return r300_draw_elements(pipe, NULL, 0, mode, start, count);
+}
+
+static void r300_destroy_context(struct pipe_context* context) {
+ struct r300_context* r300 = r300_context(context);
+
+ draw_destroy(r300->draw);
+
+ FREE(r300->blend_color_state);
+ FREE(r300->scissor_state);
+ FREE(r300);
+}
+
+struct pipe_context* r300_create_context(struct pipe_screen* screen,
+ struct pipe_winsys* winsys,
+ struct r300_winsys* r300_winsys)
+{
+ struct r300_context* r300 = CALLOC_STRUCT(r300_context);
+
+ if (!r300)
+ return NULL;
+
+ r300->winsys = r300_winsys;
+ r300->context.winsys = winsys;
+ r300->context.screen = r300_create_screen(winsys, r300_winsys);
+
+ r300->context.destroy = r300_destroy_context;
+
+ r300->context.clear = r300_clear;
+
+ r300->context.draw_arrays = r300_draw_arrays;
+ r300->context.draw_elements = r300_draw_elements;
+ r300->context.draw_range_elements = r300_draw_range_elements;
+
+ r300->draw = draw_create();
+ draw_set_rasterize_stage(r300->draw, r300_draw_swtcl_stage(r300));
+
+ r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state);
+ r300->scissor_state = CALLOC_STRUCT(r300_scissor_state);
+
+ r300_init_flush_functions(r300);
+
+ r300_init_surface_functions(r300);
+
+ r300_init_state_functions(r300);
+
+ r300->dirty_state = R300_NEW_KITCHEN_SINK;
+ r300->dirty_hw++;
+
+ return &r300->context;
+}
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
new file mode 100644
index 0000000000..aaab1dd2bc
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -0,0 +1,265 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_CONTEXT_H
+#define R300_CONTEXT_H
+
+#include "draw/draw_context.h"
+#include "draw/draw_vertex.h"
+#include "pipe/p_context.h"
+#include "tgsi/tgsi_scan.h"
+#include "util/u_memory.h"
+
+#include "r300_clear.h"
+#include "r300_screen.h"
+#include "r300_winsys.h"
+
+struct r300_blend_state {
+ uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */
+ uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */
+ uint32_t rop; /* R300_RB3D_ROPCNTL: 0x4e18 */
+ uint32_t dither; /* R300_RB3D_DITHER_CTL: 0x4e50 */
+};
+
+struct r300_blend_color_state {
+ /* RV515 and earlier */
+ uint32_t blend_color; /* R300_RB3D_BLEND_COLOR: 0x4e10 */
+ /* R520 and newer */
+ uint32_t blend_color_red_alpha; /* R500_RB3D_CONSTANT_COLOR_AR: 0x4ef8 */
+ uint32_t blend_color_green_blue; /* R500_RB3D_CONSTANT_COLOR_GB: 0x4efc */
+};
+
+struct r300_dsa_state {
+ uint32_t alpha_function; /* R300_FG_ALPHA_FUNC: 0x4bd4 */
+ uint32_t alpha_reference; /* R500_FG_ALPHA_VALUE: 0x4be0 */
+ uint32_t z_buffer_control; /* R300_ZB_CNTL: 0x4f00 */
+ uint32_t z_stencil_control; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */
+ uint32_t stencil_ref_mask; /* R300_ZB_STENCILREFMASK: 0x4f08 */
+ uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */
+ uint32_t stencil_ref_bf; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */
+};
+
+struct r300_rs_state {
+ /* XXX icky as fucking hell */
+ struct pipe_rasterizer_state rs;
+
+ uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */
+ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */
+ uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */
+ uint32_t depth_scale_front; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */
+ uint32_t depth_offset_front;/* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */
+ uint32_t depth_scale_back; /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */
+ uint32_t depth_offset_back; /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */
+ uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */
+ uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */
+ uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */
+ uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */
+};
+
+struct r300_sampler_state {
+ uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */
+ uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */
+ uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */
+};
+
+struct r300_scissor_state {
+ uint32_t scissor_top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */
+ uint32_t scissor_bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */
+};
+
+struct r300_texture_state {
+};
+
+#define R300_NEW_BLEND 0x0000001
+#define R300_NEW_BLEND_COLOR 0x0000002
+#define R300_NEW_CONSTANTS 0x0000004
+#define R300_NEW_DSA 0x0000008
+#define R300_NEW_FRAMEBUFFERS 0x0000010
+#define R300_NEW_FRAGMENT_SHADER 0x0000020
+#define R300_NEW_RASTERIZER 0x0000040
+#define R300_NEW_SAMPLER 0x0000080
+#define R300_NEW_SCISSOR 0x0008000
+#define R300_NEW_TEXTURE 0x0010000
+#define R300_NEW_VERTEX_FORMAT 0x1000000
+#define R300_NEW_VERTEX_SHADER 0x2000000
+#define R300_NEW_KITCHEN_SINK 0x3ffffff
+
+/* The next several objects are not pure Radeon state; they inherit from
+ * various Gallium classes. */
+
+struct r300_constant_buffer {
+ /* Buffer of constants */
+ /* XXX first number should be raised */
+ float constants[8][4];
+ /* Number of user-defined constants */
+ int user_count;
+ /* Total number of constants */
+ int count;
+};
+
+struct r3xx_fragment_shader {
+ /* Parent class */
+ struct pipe_shader_state state;
+ struct tgsi_shader_info info;
+
+ /* Has this shader been translated yet? */
+ boolean translated;
+
+ /* Pixel stack size */
+ int stack_size;
+};
+
+struct r300_fragment_shader {
+ /* Parent class */
+ struct r3xx_fragment_shader shader;
+
+ /* Number of ALU instructions */
+ int alu_instruction_count;
+
+ /* Number of texture instructions */
+ int tex_instruction_count;
+
+ /* Number of texture indirections */
+ int indirections;
+
+ /* Indirection node offsets */
+ int offset0;
+ int offset1;
+ int offset2;
+ int offset3;
+
+ /* Machine instructions */
+ struct {
+ uint32_t alu_rgb_inst;
+ uint32_t alu_rgb_addr;
+ uint32_t alu_alpha_inst;
+ uint32_t alu_alpha_addr;
+ } instructions[64]; /* XXX magic num */
+};
+
+struct r500_fragment_shader {
+ /* Parent class */
+ struct r3xx_fragment_shader shader;
+
+ /* Number of used instructions */
+ int instruction_count;
+
+ /* Machine instructions */
+ struct {
+ uint32_t inst0;
+ uint32_t inst1;
+ uint32_t inst2;
+ uint32_t inst3;
+ uint32_t inst4;
+ uint32_t inst5;
+ } instructions[256]; /*< XXX magic number */
+};
+
+struct r300_texture {
+ /* Parent class */
+ struct pipe_texture tex;
+
+ /* Offsets into the buffer. */
+ unsigned offset[PIPE_MAX_TEXTURE_LEVELS];
+
+ /* Stride (pitch?) of this texture in bytes */
+ unsigned stride;
+
+ /* Total size of this texture, in bytes. */
+ unsigned size;
+
+ /* Pipe buffer backing this texture. */
+ struct pipe_buffer* buffer;
+};
+
+struct r300_vertex_format {
+ /* Parent class */
+ struct vertex_info vinfo;
+ /* R300_VAP_PROG_STREAK_CNTL_[0-7] */
+ uint32_t vap_prog_stream_cntl[8];
+ /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */
+ uint32_t vap_prog_stream_cntl_ext[8];
+};
+
+struct r300_context {
+ /* Parent class */
+ struct pipe_context context;
+
+ /* The interface to the windowing system, etc. */
+ struct r300_winsys* winsys;
+ /* Draw module. Used mostly for SW TCL. */
+ struct draw_context* draw;
+
+ /* Various CSO state objects. */
+ /* Blend state. */
+ struct r300_blend_state* blend_state;
+ /* Blend color state. */
+ struct r300_blend_color_state* blend_color_state;
+ /* Shader constants. */
+ struct r300_constant_buffer shader_constants[PIPE_SHADER_TYPES];
+ /* Depth, stencil, and alpha state. */
+ struct r300_dsa_state* dsa_state;
+ /* Fragment shader. */
+ struct r3xx_fragment_shader* fs;
+ /* Framebuffer state. We currently don't need our own version of this. */
+ struct pipe_framebuffer_state framebuffer_state;
+ /* Rasterizer state. */
+ struct r300_rs_state* rs_state;
+ /* Sampler states. */
+ struct r300_sampler_state* sampler_states[8];
+ int sampler_count;
+ /* Scissor state. */
+ struct r300_scissor_state* scissor_state;
+ /* Texture states. */
+ struct r300_texture* textures[8];
+ struct r300_texture_state* texture_states[8];
+ int texture_count;
+ /* Vertex buffers. */
+ struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
+ int vertex_buffer_count;
+ /* Vertex information. */
+ struct r300_vertex_format vertex_info;
+ /* Bitmask of dirty state objects. */
+ uint32_t dirty_state;
+ /* Flag indicating whether or not the HW is dirty. */
+ uint32_t dirty_hw;
+};
+
+/* Convenience cast wrapper. */
+static struct r300_context* r300_context(struct pipe_context* context) {
+ return (struct r300_context*)context;
+}
+
+/* Context initialization. */
+struct draw_stage* r300_draw_swtcl_stage(struct r300_context* r300);
+void r300_init_state_functions(struct r300_context* r300);
+void r300_init_surface_functions(struct r300_context* r300);
+
+/* Fun with includes: r300_winsys also declares this prototype.
+ * We'll just step out in that case... */
+#ifndef R300_WINSYS_H
+struct pipe_context* r300_create_context(struct pipe_screen* screen,
+ struct pipe_winsys* winsys,
+ struct r300_winsys* r300_winsys);
+#endif
+
+#endif /* R300_CONTEXT_H */
diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
new file mode 100644
index 0000000000..d8038ff1e1
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_CS_H
+#define R300_CS_H
+
+#include "util/u_math.h"
+
+#include "r300_reg.h"
+#include "r300_winsys.h"
+
+/* Yes, I know macros are ugly. However, they are much prettier than the code
+ * that they neatly hide away, and don't have the cost of function setup,so
+ * we're going to use them. */
+
+#define MAX_CS_SIZE 64 * 1024 / 4
+
+#define VERY_VERBOSE_REGISTERS 0
+
+/* XXX stolen from radeon_drm.h */
+#define RADEON_GEM_DOMAIN_CPU 0x1
+#define RADEON_GEM_DOMAIN_GTT 0x2
+#define RADEON_GEM_DOMAIN_VRAM 0x4
+
+/* XXX stolen from radeon_reg.h */
+#define RADEON_CP_PACKET0 0x0
+
+#define CP_PACKET0(register, count) \
+ (RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2))
+
+#define CP_PACKET3(op, count) \
+ (RADEON_CP_PACKET3 | (op) | ((count) << 16))
+
+#define CS_LOCALS(context) \
+ struct r300_winsys* cs_winsys = context->winsys; \
+ struct radeon_cs* cs = cs_winsys->cs; \
+ int cs_count = 0;
+
+#define CHECK_CS(size) \
+ cs_winsys->check_cs(cs, (size))
+
+#define BEGIN_CS(size) do { \
+ CHECK_CS(size); \
+ debug_printf("r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \
+ size, __FUNCTION__, __FILE__, __LINE__); \
+ cs_winsys->begin_cs(cs, (size), __FILE__, __FUNCTION__, __LINE__); \
+ cs_count = size; \
+} while (0)
+
+#define OUT_CS(value) do { \
+ cs_winsys->write_cs_dword(cs, (value)); \
+ cs_count--; \
+} while (0)
+
+#define OUT_CS_32F(value) do { \
+ cs_winsys->write_cs_dword(cs, fui(value)); \
+ cs_count--; \
+} while (0)
+
+#define OUT_CS_REG(register, value) do { \
+ if (VERY_VERBOSE_REGISTERS) \
+ debug_printf("r300: writing 0x%08X to register 0x%04X\n", \
+ value, register); \
+ assert(register); \
+ OUT_CS(CP_PACKET0(register, 0)); \
+ OUT_CS(value); \
+} while (0)
+
+/* Note: This expects count to be the number of registers,
+ * not the actual packet0 count! */
+#define OUT_CS_REG_SEQ(register, count) do { \
+ if (VERY_VERBOSE_REGISTERS) \
+ debug_printf("r300: writing register sequence of %d to 0x%04X\n", \
+ count, register); \
+ assert(register); \
+ OUT_CS(CP_PACKET0(register, ((count) - 1))); \
+} while (0)
+
+#define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \
+ debug_printf("r300: writing relocation for buffer %p, offset %d\n", \
+ bo, offset); \
+ assert(bo); \
+ OUT_CS(offset); \
+ cs_winsys->write_cs_reloc(cs, bo, rd, wd, flags); \
+ cs_count -= 2; \
+} while (0)
+
+#define END_CS do { \
+ debug_printf("r300: END_CS in %s (%s:%d)\n", __FUNCTION__, __FILE__, \
+ __LINE__); \
+ if (cs_count != 0) \
+ debug_printf("r300: Warning: cs_count off by %d\n", cs_count); \
+ cs_winsys->end_cs(cs, __FILE__, __FUNCTION__, __LINE__); \
+} while (0)
+
+#define FLUSH_CS do { \
+ debug_printf("r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, __FILE__, \
+ __LINE__); \
+ cs_winsys->flush_cs(cs); \
+} while (0)
+
+#include "r300_cs_inlines.h"
+
+#endif /* R300_CS_H */
diff --git a/src/gallium/drivers/r300/r300_cs_inlines.h b/src/gallium/drivers/r300/r300_cs_inlines.h
new file mode 100644
index 0000000000..03bb608eb9
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_cs_inlines.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/* r300_cs_inlines: This is just a handful of useful inlines for sending
+ * (very) common instructions to the CS buffer. Should only be included from
+ * r300_cs.h, probably. */
+
+#ifdef R300_CS_H
+
+#define RADEON_ONE_REG_WR (1 << 15)
+
+#define OUT_CS_ONE_REG(register, count) do { \
+ if (VERY_VERBOSE_REGISTERS) \
+ debug_printf("r300: writing data sequence of %d to 0x%04X\n", \
+ count, register); \
+ assert(register); \
+ OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \
+} while (0)
+
+#define R300_PACIFY do { \
+ OUT_CS_REG(RADEON_WAIT_UNTIL, (1 << 14) | (1 << 15) | (1 << 16) | (1 << 17) | \
+ (1 << 18)); \
+} while (0)
+
+#define R300_SCREENDOOR do { \
+ OUT_CS_REG(R300_SC_SCREENDOOR, 0x0); \
+ R300_PACIFY; \
+ OUT_CS_REG(R300_SC_SCREENDOOR, 0xffffff); \
+} while (0)
+
+#endif /* R300_CS_H */
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
new file mode 100644
index 0000000000..960f45f651
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -0,0 +1,303 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/* r300_emit: Functions for emitting state. */
+
+#include "r300_emit.h"
+
+void r300_emit_blend_state(struct r300_context* r300,
+ struct r300_blend_state* blend)
+{
+ CS_LOCALS(r300);
+ BEGIN_CS(7);
+ OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 2);
+ OUT_CS(blend->blend_control);
+ OUT_CS(blend->alpha_blend_control);
+ OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop);
+ OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither);
+ END_CS;
+}
+
+void r300_emit_blend_color_state(struct r300_context* r300,
+ struct r300_blend_color_state* bc)
+{
+ struct r300_screen* r300screen =
+ (struct r300_screen*)r300->context.screen;
+ CS_LOCALS(r300);
+ if (r300screen->caps->is_r500) {
+ BEGIN_CS(3);
+ OUT_CS_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2);
+ OUT_CS(bc->blend_color_red_alpha);
+ OUT_CS(bc->blend_color_green_blue);
+ END_CS;
+ } else {
+ BEGIN_CS(2);
+ OUT_CS_REG(R300_RB3D_BLEND_COLOR, bc->blend_color);
+ END_CS;
+ }
+}
+
+void r300_emit_dsa_state(struct r300_context* r300,
+ struct r300_dsa_state* dsa)
+{
+ struct r300_screen* r300screen =
+ (struct r300_screen*)r300->context.screen;
+ CS_LOCALS(r300);
+ BEGIN_CS(r300screen->caps->is_r500 ? 8 : 8);
+ OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function);
+ /* XXX figure out the r300 counterpart for this */
+ if (r300screen->caps->is_r500) {
+ /* OUT_CS_REG(R500_FG_ALPHA_VALUE, dsa->alpha_reference); */
+ }
+ OUT_CS_REG_SEQ(R300_ZB_CNTL, 3);
+ OUT_CS(dsa->z_buffer_control);
+ OUT_CS(dsa->z_stencil_control);
+ OUT_CS(dsa->stencil_ref_mask);
+ OUT_CS_REG(R300_ZB_ZTOP, dsa->z_buffer_top);
+ if (r300screen->caps->is_r500) {
+ /* OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); */
+ }
+ END_CS;
+}
+
+void r300_emit_fragment_shader(struct r300_context* r300,
+ struct r300_fragment_shader* fs)
+{
+ CS_LOCALS(r300);
+ int i;
+ BEGIN_CS(22);
+
+ OUT_CS_REG(R300_US_CONFIG, MAX2(fs->indirections - 1, 0));
+ OUT_CS_REG(R300_US_PIXSIZE, fs->shader.stack_size);
+ /* XXX figure out exactly how big the sizes are on this reg */
+ OUT_CS_REG(R300_US_CODE_OFFSET, 0x0);
+ /* XXX figure these ones out a bit better kthnx */
+ OUT_CS_REG(R300_US_CODE_ADDR_0, 0x0);
+ OUT_CS_REG(R300_US_CODE_ADDR_1, 0x0);
+ OUT_CS_REG(R300_US_CODE_ADDR_2, 0x0);
+ OUT_CS_REG(R300_US_CODE_ADDR_3, R300_RGBA_OUT);
+
+ for (i = 0; i < fs->alu_instruction_count; i++) {
+ OUT_CS_REG(R300_US_ALU_RGB_INST_0 + (4 * i),
+ fs->instructions[i].alu_rgb_inst);
+ OUT_CS_REG(R300_US_ALU_RGB_ADDR_0 + (4 * i),
+ fs->instructions[i].alu_rgb_addr);
+ OUT_CS_REG(R300_US_ALU_ALPHA_INST_0 + (4 * i),
+ fs->instructions[i].alu_alpha_inst);
+ OUT_CS_REG(R300_US_ALU_ALPHA_ADDR_0 + (4 * i),
+ fs->instructions[i].alu_alpha_addr);
+ }
+
+ END_CS;
+}
+
+void r500_emit_fragment_shader(struct r300_context* r300,
+ struct r500_fragment_shader* fs)
+{
+ CS_LOCALS(r300);
+ int i = 0;
+ BEGIN_CS(9 + (fs->instruction_count * 6));
+ OUT_CS_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
+ OUT_CS_REG(R500_US_PIXSIZE, fs->shader.stack_size);
+ OUT_CS_REG(R500_US_CODE_ADDR, R500_US_CODE_START_ADDR(0) |
+ R500_US_CODE_END_ADDR(fs->instruction_count));
+
+ OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_INSTR);
+ OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA,
+ fs->instruction_count * 6);
+ for (i = 0; i < fs->instruction_count; i++) {
+ OUT_CS(fs->instructions[i].inst0);
+ OUT_CS(fs->instructions[i].inst1);
+ OUT_CS(fs->instructions[i].inst2);
+ OUT_CS(fs->instructions[i].inst3);
+ OUT_CS(fs->instructions[i].inst4);
+ OUT_CS(fs->instructions[i].inst5);
+ }
+ END_CS;
+}
+
+/* Translate pipe_format into US_OUT_FMT. Note that formats are stored from
+ * C3 to C0. */
+uint32_t translate_out_fmt(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return R300_US_OUT_FMT_C4_8 |
+ R300_C0_SEL_B | R300_C1_SEL_G |
+ R300_C2_SEL_R | R300_C3_SEL_A;
+ default:
+ return R300_US_OUT_FMT_UNUSED;
+ }
+ return 0;
+}
+
+/* XXX add pitch, stride */
+void r300_emit_fb_state(struct r300_context* r300,
+ struct pipe_framebuffer_state* fb)
+{
+ CS_LOCALS(r300);
+ struct r300_texture* tex;
+ int i;
+
+ BEGIN_CS((5 * fb->nr_cbufs) + (fb->zsbuf ? 5 : 0) + 4);
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ tex = (struct r300_texture*)fb->cbufs[i]->texture;
+ OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1);
+ OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+ OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i),
+ translate_out_fmt(fb->cbufs[i]->format));
+ }
+
+ if (fb->zsbuf) {
+ tex = (struct r300_texture*)fb->zsbuf->texture;
+ OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
+ OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ if (fb->zsbuf->format == PIPE_FORMAT_Z24S8_UNORM) {
+ OUT_CS_REG(R300_ZB_FORMAT,
+ R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL);
+ } else {
+ OUT_CS_REG(R300_ZB_FORMAT, 0x0);
+ }
+ }
+
+ OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+ OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT,
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+ END_CS;
+}
+
+void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs)
+{
+ struct r300_screen* r300screen =
+ (struct r300_screen*)r300->context.screen;
+ CS_LOCALS(r300);
+ BEGIN_CS(13);
+ OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status);
+ OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 6);
+ OUT_CS(rs->depth_scale_front);
+ OUT_CS(rs->depth_offset_front);
+ OUT_CS(rs->depth_scale_back);
+ OUT_CS(rs->depth_offset_back);
+ OUT_CS(rs->polygon_offset_enable);
+ OUT_CS(rs->cull_mode);
+ OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config);
+ OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, rs->line_stipple_value);
+ END_CS;
+}
+
+void r300_emit_scissor_state(struct r300_context* r300,
+ struct r300_scissor_state* scissor)
+{
+ CS_LOCALS(r300);
+ BEGIN_CS(3);
+ OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
+ OUT_CS(scissor->scissor_top_left);
+ OUT_CS(scissor->scissor_bottom_right);
+ END_CS;
+}
+
+void r300_emit_vertex_format_state(struct r300_context* r300)
+{
+ CS_LOCALS(r300);
+ int i;
+
+ BEGIN_CS(6);
+ OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2);
+ OUT_CS(r300->vertex_info.vinfo.hwfmt[0]);
+ OUT_CS(r300->vertex_info.vinfo.hwfmt[1]);
+ OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
+ OUT_CS(r300->vertex_info.vinfo.hwfmt[2]);
+ OUT_CS(r300->vertex_info.vinfo.hwfmt[3]);
+ END_CS;
+
+ BEGIN_CS(18);
+ OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8);
+ for (i = 0; i < 8; i++) {
+ OUT_CS(r300->vertex_info.vap_prog_stream_cntl[i]);
+ }
+ OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8);
+ for (i = 0; i < 8; i++) {
+ OUT_CS(r300->vertex_info.vap_prog_stream_cntl_ext[i]);
+ }
+ END_CS;
+}
+
+/* Emit all dirty state. */
+void r300_emit_dirty_state(struct r300_context* r300)
+{
+ struct r300_screen* r300screen =
+ (struct r300_screen*)r300->context.screen;
+ CS_LOCALS(r300);
+
+ if (!(r300->dirty_state) && !(r300->dirty_hw)) {
+ return;
+ }
+
+ r300_update_derived_state(r300);
+
+ /* XXX check size */
+
+ if (r300->dirty_state & R300_NEW_BLEND) {
+ r300_emit_blend_state(r300, r300->blend_state);
+ r300->dirty_state &= ~R300_NEW_BLEND;
+ }
+
+ if (r300->dirty_state & R300_NEW_BLEND_COLOR) {
+ r300_emit_blend_color_state(r300, r300->blend_color_state);
+ r300->dirty_state &= ~R300_NEW_BLEND_COLOR;
+ }
+
+ if (r300->dirty_state & R300_NEW_DSA) {
+ r300_emit_dsa_state(r300, r300->dsa_state);
+ r300->dirty_state &= ~R300_NEW_DSA;
+ }
+
+ if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) {
+ if (r300screen->caps->is_r500) {
+ r500_emit_fragment_shader(r300,
+ (struct r500_fragment_shader*)r300->fs);
+ } else {
+ r300_emit_fragment_shader(r300,
+ (struct r300_fragment_shader*)r300->fs);
+ }
+ r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER;
+ }
+
+ if (r300->dirty_state & R300_NEW_RASTERIZER) {
+ r300_emit_rs_state(r300, r300->rs_state);
+ r300->dirty_state &= ~R300_NEW_RASTERIZER;
+ }
+
+ if (r300->dirty_state & R300_NEW_SCISSOR) {
+ r300_emit_scissor_state(r300, r300->scissor_state);
+ r300->dirty_state &= ~R300_NEW_SCISSOR;
+ }
+
+ if (r300->dirty_state & R300_NEW_VERTEX_FORMAT) {
+ r300_emit_vertex_format_state(r300);
+ r300->dirty_state &= ~R300_NEW_VERTEX_FORMAT;
+ }
+}
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
new file mode 100644
index 0000000000..f21ca33171
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_EMIT_H
+#define R300_EMIT_H
+
+#include "util/u_math.h"
+
+#include "r300_context.h"
+#include "r300_cs.h"
+#include "r300_screen.h"
+
+void r300_emit_blend_state(struct r300_context* r300,
+ struct r300_blend_state* blend);
+
+void r300_emit_blend_color_state(struct r300_context* r300,
+ struct r300_blend_color_state* bc);
+
+void r300_emit_dsa_state(struct r300_context* r300,
+ struct r300_dsa_state* dsa);
+
+void r300_emit_fragment_shader(struct r300_context* r300,
+ struct r300_fragment_shader* fs);
+
+void r500_emit_fragment_shader(struct r300_context* r300,
+ struct r500_fragment_shader* fs);
+
+void r300_emit_fb_state(struct r300_context* r300,
+ struct pipe_framebuffer_state* fb);
+
+void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs);
+
+void r300_emit_scissor_state(struct r300_context* r300,
+ struct r300_scissor_state* scissor);
+
+
+/* Emit all dirty state. */
+void r300_emit_dirty_state(struct r300_context* r300);
+
+#endif /* R300_EMIT_H */
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
new file mode 100644
index 0000000000..3766f0a0a7
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_flush.h"
+
+static void r300_flush(struct pipe_context* pipe,
+ unsigned flags,
+ struct pipe_fence_handle** fence)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ CS_LOCALS(r300);
+
+ if (r300->dirty_hw) {
+ FLUSH_CS;
+ r300->dirty_state = R300_NEW_KITCHEN_SINK;
+ r300->dirty_hw = 0;
+ }
+}
+
+void r300_init_flush_functions(struct r300_context* r300)
+{
+ r300->context.flush = r300_flush;
+}
diff --git a/src/gallium/drivers/r300/r300_flush.h b/src/gallium/drivers/r300/r300_flush.h
new file mode 100644
index 0000000000..a1b224b39c
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_flush.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_FLUSH_H
+#define R300_FLUSH_H
+
+#include "pipe/p_context.h"
+
+#include "r300_context.h"
+#include "r300_cs.h"
+
+void r300_init_flush_functions(struct r300_context* r300);
+
+#endif /* R300_FLUSH_H */
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
new file mode 100644
index 0000000000..8888b39a2f
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -0,0 +1,3263 @@
+/**************************************************************************
+
+Copyright (C) 2004-2005 Nicolai Haehnle et al.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/* *INDENT-OFF* */
+
+#ifndef _R300_REG_H
+#define _R300_REG_H
+
+#define R300_MC_INIT_MISC_LAT_TIMER 0x180
+# define R300_MC_MISC__MC_CPR_INIT_LAT_SHIFT 0
+# define R300_MC_MISC__MC_VF_INIT_LAT_SHIFT 4
+# define R300_MC_MISC__MC_DISP0R_INIT_LAT_SHIFT 8
+# define R300_MC_MISC__MC_DISP1R_INIT_LAT_SHIFT 12
+# define R300_MC_MISC__MC_FIXED_INIT_LAT_SHIFT 16
+# define R300_MC_MISC__MC_E2R_INIT_LAT_SHIFT 20
+# define R300_MC_MISC__MC_SAME_PAGE_PRIO_SHIFT 24
+# define R300_MC_MISC__MC_GLOBW_INIT_LAT_SHIFT 28
+
+
+#define R300_MC_INIT_GFX_LAT_TIMER 0x154
+# define R300_MC_MISC__MC_G3D0R_INIT_LAT_SHIFT 0
+# define R300_MC_MISC__MC_G3D1R_INIT_LAT_SHIFT 4
+# define R300_MC_MISC__MC_G3D2R_INIT_LAT_SHIFT 8
+# define R300_MC_MISC__MC_G3D3R_INIT_LAT_SHIFT 12
+# define R300_MC_MISC__MC_TX0R_INIT_LAT_SHIFT 16
+# define R300_MC_MISC__MC_TX1R_INIT_LAT_SHIFT 20
+# define R300_MC_MISC__MC_GLOBR_INIT_LAT_SHIFT 24
+# define R300_MC_MISC__MC_GLOBW_FULL_LAT_SHIFT 28
+
+/*
+ * This file contains registers and constants for the R300. They have been
+ * found mostly by examining command buffers captured using glxtest, as well
+ * as by extrapolating some known registers and constants from the R200.
+ * I am fairly certain that they are correct unless stated otherwise
+ * in comments.
+ */
+
+#define R300_SE_VPORT_XSCALE 0x1D98
+#define R300_SE_VPORT_XOFFSET 0x1D9C
+#define R300_SE_VPORT_YSCALE 0x1DA0
+#define R300_SE_VPORT_YOFFSET 0x1DA4
+#define R300_SE_VPORT_ZSCALE 0x1DA8
+#define R300_SE_VPORT_ZOFFSET 0x1DAC
+
+#define R300_VAP_PORT_IDX0 0x2040
+/*
+ * Vertex Array Processing (VAP) Control
+ */
+#define R300_VAP_CNTL 0x2080
+# define R300_PVS_NUM_SLOTS_SHIFT 0
+# define R300_PVS_NUM_CNTLRS_SHIFT 4
+# define R300_PVS_NUM_FPUS_SHIFT 8
+# define R300_VF_MAX_VTX_NUM_SHIFT 18
+# define R300_GL_CLIP_SPACE_DEF (0 << 22)
+# define R300_DX_CLIP_SPACE_DEF (1 << 22)
+# define R500_TCL_STATE_OPTIMIZATION (1 << 23)
+
+/* This register is written directly and also starts data section
+ * in many 3d CP_PACKET3's
+ */
+#define R300_VAP_VF_CNTL 0x2084
+# define R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT 0
+# define R300_VAP_VF_CNTL__PRIM_NONE (0<<0)
+# define R300_VAP_VF_CNTL__PRIM_POINTS (1<<0)
+# define R300_VAP_VF_CNTL__PRIM_LINES (2<<0)
+# define R300_VAP_VF_CNTL__PRIM_LINE_STRIP (3<<0)
+# define R300_VAP_VF_CNTL__PRIM_TRIANGLES (4<<0)
+# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN (5<<0)
+# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP (6<<0)
+# define R300_VAP_VF_CNTL__PRIM_LINE_LOOP (12<<0)
+# define R300_VAP_VF_CNTL__PRIM_QUADS (13<<0)
+# define R300_VAP_VF_CNTL__PRIM_QUAD_STRIP (14<<0)
+# define R300_VAP_VF_CNTL__PRIM_POLYGON (15<<0)
+
+# define R300_VAP_VF_CNTL__PRIM_WALK__SHIFT 4
+ /* State based - direct writes to registers trigger vertex
+ generation */
+# define R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED (0<<4)
+# define R300_VAP_VF_CNTL__PRIM_WALK_INDICES (1<<4)
+# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST (2<<4)
+# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED (3<<4)
+
+ /* I don't think I saw these three used.. */
+# define R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT 6
+# define R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT 9
+# define R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT 10
+
+ /* index size - when not set the indices are assumed to be 16 bit */
+# define R300_VAP_VF_CNTL__INDEX_SIZE_32bit (1<<11)
+ /* number of vertices */
+# define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16
+
+#define R500_VAP_INDEX_OFFSET 0x208c
+
+#define R300_VAP_OUTPUT_VTX_FMT_0 0x2090
+# define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT (1<<0)
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT (1<<1)
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2)
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3)
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4)
+# define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16)
+
+#define R300_VAP_OUTPUT_VTX_FMT_1 0x2094
+ /* each of the following is 3 bits wide, specifies number
+ of components */
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21
+# define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT 0
+# define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT 1
+# define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS 2
+# define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS 3
+# define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4
+
+#define R300_SE_VTE_CNTL 0x20b0
+# define R300_VPORT_X_SCALE_ENA (1 << 0)
+# define R300_VPORT_X_OFFSET_ENA (1 << 1)
+# define R300_VPORT_Y_SCALE_ENA (1 << 2)
+# define R300_VPORT_Y_OFFSET_ENA (1 << 3)
+# define R300_VPORT_Z_SCALE_ENA (1 << 4)
+# define R300_VPORT_Z_OFFSET_ENA (1 << 5)
+# define R300_VTX_XY_FMT (1 << 8)
+# define R300_VTX_Z_FMT (1 << 9)
+# define R300_VTX_W0_FMT (1 << 10)
+# define R300_SERIAL_PROC_ENA (1 << 11)
+
+#define R300_VAP_VTX_SIZE 0x20b4
+
+/* BEGIN: Vertex data assembly - lots of uncertainties */
+
+/* gap */
+
+/* Maximum Vertex Indx Clamp */
+#define R300_VAP_VF_MAX_VTX_INDX 0x2134
+/* Minimum Vertex Indx Clamp */
+#define R300_VAP_VF_MIN_VTX_INDX 0x2138
+
+/** Vertex assembler/processor control status */
+#define R300_VAP_CNTL_STATUS 0x2140
+/* No swap at all (default) */
+# define R300_VC_NO_SWAP (0 << 0)
+/* 16-bit swap: 0xAABBCCDD becomes 0xBBAADDCC */
+# define R300_VC_16BIT_SWAP (1 << 0)
+/* 32-bit swap: 0xAABBCCDD becomes 0xDDCCBBAA */
+# define R300_VC_32BIT_SWAP (2 << 0)
+/* Half-dword swap: 0xAABBCCDD becomes 0xCCDDAABB */
+# define R300_VC_HALF_DWORD_SWAP (3 << 0)
+/* The TCL engine will not be used (as it is logically or even physically removed) */
+# define R300_VAP_TCL_BYPASS (1 << 8)
+/* Read only flag if TCL engine is busy. */
+# define R300_VAP_PVS_BUSY (1 << 11)
+/* TODO: gap for MAX_MPS */
+/* Read only flag if the vertex store is busy. */
+# define R300_VAP_VS_BUSY (1 << 24)
+/* Read only flag if the reciprocal engine is busy. */
+# define R300_VAP_RCP_BUSY (1 << 25)
+/* Read only flag if the viewport transform engine is busy. */
+# define R300_VAP_VTE_BUSY (1 << 26)
+/* Read only flag if the memory interface unit is busy. */
+# define R300_VAP_MUI_BUSY (1 << 27)
+/* Read only flag if the vertex cache is busy. */
+# define R300_VAP_VC_BUSY (1 << 28)
+/* Read only flag if the vertex fetcher is busy. */
+# define R300_VAP_VF_BUSY (1 << 29)
+/* Read only flag if the register pipeline is busy. */
+# define R300_VAP_REGPIPE_BUSY (1 << 30)
+/* Read only flag if the VAP engine is busy. */
+# define R300_VAP_VAP_BUSY (1 << 31)
+
+/* gap */
+
+/* Where do we get our vertex data?
+ *
+ * Vertex data either comes either from immediate mode registers or from
+ * vertex arrays.
+ * There appears to be no mixed mode (though we can force the pitch of
+ * vertex arrays to 0, effectively reusing the same element over and over
+ * again).
+ *
+ * Immediate mode is controlled by the INPUT_CNTL registers. I am not sure
+ * if these registers influence vertex array processing.
+ *
+ * Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3.
+ *
+ * In both cases, vertex attributes are then passed through INPUT_ROUTE.
+ *
+ * Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data
+ * into the vertex processor's input registers.
+ * The first word routes the first input, the second word the second, etc.
+ * The corresponding input is routed into the register with the given index.
+ * The list is ended by a word with INPUT_ROUTE_END set.
+ *
+ * Always set COMPONENTS_4 in immediate mode.
+ */
+
+#define R300_VAP_PROG_STREAM_CNTL_0 0x2150
+# define R300_DATA_TYPE_0_SHIFT 0
+# define R300_DATA_TYPE_FLOAT_1 0
+# define R300_DATA_TYPE_FLOAT_2 1
+# define R300_DATA_TYPE_FLOAT_3 2
+# define R300_DATA_TYPE_FLOAT_4 3
+# define R300_DATA_TYPE_BYTE 4
+# define R300_DATA_TYPE_D3DCOLOR 5
+# define R300_DATA_TYPE_SHORT_2 6
+# define R300_DATA_TYPE_SHORT_4 7
+# define R300_DATA_TYPE_VECTOR_3_TTT 8
+# define R300_DATA_TYPE_VECTOR_3_EET 9
+# define R300_SKIP_DWORDS_SHIFT 4
+# define R300_DST_VEC_LOC_SHIFT 8
+# define R300_LAST_VEC (1 << 13)
+# define R300_SIGNED (1 << 14)
+# define R300_NORMALIZE (1 << 15)
+# define R300_DATA_TYPE_1_SHIFT 16
+#define R300_VAP_PROG_STREAM_CNTL_1 0x2154
+#define R300_VAP_PROG_STREAM_CNTL_2 0x2158
+#define R300_VAP_PROG_STREAM_CNTL_3 0x215C
+#define R300_VAP_PROG_STREAM_CNTL_4 0x2160
+#define R300_VAP_PROG_STREAM_CNTL_5 0x2164
+#define R300_VAP_PROG_STREAM_CNTL_6 0x2168
+#define R300_VAP_PROG_STREAM_CNTL_7 0x216C
+/* gap */
+
+/* Notes:
+ * - always set up to produce at least two attributes:
+ * if vertex program uses only position, fglrx will set normal, too
+ * - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal.
+ */
+#define R300_VAP_VTX_STATE_CNTL 0x2180
+# define R300_COLOR_0_ASSEMBLY_SHIFT 0
+# define R300_SEL_COLOR 0
+# define R300_SEL_USER_COLOR_0 1
+# define R300_SEL_USER_COLOR_1 2
+# define R300_COLOR_1_ASSEMBLY_SHIFT 2
+# define R300_COLOR_2_ASSEMBLY_SHIFT 4
+# define R300_COLOR_3_ASSEMBLY_SHIFT 6
+# define R300_COLOR_4_ASSEMBLY_SHIFT 8
+# define R300_COLOR_5_ASSEMBLY_SHIFT 10
+# define R300_COLOR_6_ASSEMBLY_SHIFT 12
+# define R300_COLOR_7_ASSEMBLY_SHIFT 14
+# define R300_UPDATE_USER_COLOR_0_ENA (1 << 16)
+
+/*
+ * Each bit in this field applies to the corresponding vector in the VSM
+ * memory (i.e. Bit 0 applies to VECTOR_0 (POSITION), etc.). If the bit
+ * is set, then the corresponding 4-Dword Vector is output into the Vertex Stream.
+ */
+#define R300_VAP_VSM_VTX_ASSM 0x2184
+# define R300_INPUT_CNTL_POS 0x00000001
+# define R300_INPUT_CNTL_NORMAL 0x00000002
+# define R300_INPUT_CNTL_COLOR 0x00000004
+# define R300_INPUT_CNTL_TC0 0x00000400
+# define R300_INPUT_CNTL_TC1 0x00000800
+# define R300_INPUT_CNTL_TC2 0x00001000 /* GUESS */
+# define R300_INPUT_CNTL_TC3 0x00002000 /* GUESS */
+# define R300_INPUT_CNTL_TC4 0x00004000 /* GUESS */
+# define R300_INPUT_CNTL_TC5 0x00008000 /* GUESS */
+# define R300_INPUT_CNTL_TC6 0x00010000 /* GUESS */
+# define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */
+
+/* Programmable Stream Control Signed Normalize Control */
+#define R300_VAP_PSC_SGN_NORM_CNTL 0x21dc
+# define SGN_NORM_ZERO 0
+# define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1
+# define SGN_NORM_NO_ZERO 2
+
+/* gap */
+
+/* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0
+ * are set to a swizzling bit pattern, other words are 0.
+ *
+ * In immediate mode, the pattern is always set to xyzw. In vertex array
+ * mode, the swizzling pattern is e.g. used to set zw components in texture
+ * coordinates with only tweo components.
+ */
+#define R300_VAP_PROG_STREAM_CNTL_EXT_0 0x21e0
+# define R300_SWIZZLE0_SHIFT 0
+# define R300_SWIZZLE_SELECT_X_SHIFT 0
+# define R300_SWIZZLE_SELECT_Y_SHIFT 3
+# define R300_SWIZZLE_SELECT_Z_SHIFT 6
+# define R300_SWIZZLE_SELECT_W_SHIFT 9
+
+# define R300_SWIZZLE_SELECT_X 0
+# define R300_SWIZZLE_SELECT_Y 1
+# define R300_SWIZZLE_SELECT_Z 2
+# define R300_SWIZZLE_SELECT_W 3
+# define R300_SWIZZLE_SELECT_FP_ZERO 4
+# define R300_SWIZZLE_SELECT_FP_ONE 5
+/* alternate forms for r300_emit.c */
+# define R300_INPUT_ROUTE_SELECT_X 0
+# define R300_INPUT_ROUTE_SELECT_Y 1
+# define R300_INPUT_ROUTE_SELECT_Z 2
+# define R300_INPUT_ROUTE_SELECT_W 3
+# define R300_INPUT_ROUTE_SELECT_ZERO 4
+# define R300_INPUT_ROUTE_SELECT_ONE 5
+
+# define R300_WRITE_ENA_SHIFT 12
+# define R300_WRITE_ENA_X 1
+# define R300_WRITE_ENA_Y 2
+# define R300_WRITE_ENA_Z 4
+# define R300_WRITE_ENA_W 8
+# define R300_SWIZZLE1_SHIFT 16
+#define R300_VAP_PROG_STREAM_CNTL_EXT_1 0x21e4
+#define R300_VAP_PROG_STREAM_CNTL_EXT_2 0x21e8
+#define R300_VAP_PROG_STREAM_CNTL_EXT_3 0x21ec
+#define R300_VAP_PROG_STREAM_CNTL_EXT_4 0x21f0
+#define R300_VAP_PROG_STREAM_CNTL_EXT_5 0x21f4
+#define R300_VAP_PROG_STREAM_CNTL_EXT_6 0x21f8
+#define R300_VAP_PROG_STREAM_CNTL_EXT_7 0x21fc
+
+/* END: Vertex data assembly */
+
+/* gap */
+
+/* BEGIN: Upload vertex program and data */
+
+/*
+ * The programmable vertex shader unit has a memory bank of unknown size
+ * that can be written to in 16 byte units by writing the address into
+ * UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs).
+ *
+ * Pointers into the memory bank are always in multiples of 16 bytes.
+ *
+ * The memory bank is divided into areas with fixed meaning.
+ *
+ * Starting at address UPLOAD_PROGRAM: Vertex program instructions.
+ * Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB),
+ * whereas the difference between known addresses suggests size 512.
+ *
+ * Starting at address UPLOAD_PARAMETERS: Vertex program parameters.
+ * Native reported limits and the VPI layout suggest size 256, whereas
+ * difference between known addresses suggests size 512.
+ *
+ * At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the
+ * floating point pointsize. The exact purpose of this state is uncertain,
+ * as there is also the R300_RE_POINTSIZE register.
+ *
+ * Multiple vertex programs and parameter sets can be loaded at once,
+ * which could explain the size discrepancy.
+ */
+#define R300_VAP_PVS_VECTOR_INDX_REG 0x2200
+# define R300_PVS_CODE_START 0
+# define R300_MAX_PVS_CODE_LINES 256
+# define R500_MAX_PVS_CODE_LINES 1024
+# define R300_PVS_CONST_START 512
+# define R500_PVS_CONST_START 1024
+# define R300_MAX_PVS_CONST_VECS 256
+# define R500_MAX_PVS_CONST_VECS 1024
+# define R300_PVS_UCP_START 1024
+# define R500_PVS_UCP_START 1536
+# define R300_POINT_VPORT_SCALE_OFFSET 1030
+# define R500_POINT_VPORT_SCALE_OFFSET 1542
+# define R300_POINT_GEN_TEX_OFFSET 1031
+# define R500_POINT_GEN_TEX_OFFSET 1543
+
+/*
+ * These are obsolete defines form r300_context.h, but they might give some
+ * clues when investigating the addresses further...
+ */
+#if 0
+#define VSF_DEST_PROGRAM 0x0
+#define VSF_DEST_MATRIX0 0x200
+#define VSF_DEST_MATRIX1 0x204
+#define VSF_DEST_MATRIX2 0x208
+#define VSF_DEST_VECTOR0 0x20c
+#define VSF_DEST_VECTOR1 0x20d
+#define VSF_DEST_UNKNOWN1 0x400
+#define VSF_DEST_UNKNOWN2 0x406
+#endif
+
+/* gap */
+
+#define R300_VAP_PVS_UPLOAD_DATA 0x2208
+
+/* END: Upload vertex program and data */
+
+/* gap */
+
+/* I do not know the purpose of this register. However, I do know that
+ * it is set to 221C_CLEAR for clear operations and to 221C_NORMAL
+ * for normal rendering.
+ *
+ * 2007-11-05: This register is the user clip plane control register, but there
+ * also seems to be a rendering mode control; the NORMAL/CLEAR defines.
+ *
+ * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view
+ */
+#define R300_VAP_CLIP_CNTL 0x221C
+# define R300_VAP_UCP_ENABLE_0 (1 << 0)
+# define R300_VAP_UCP_ENABLE_1 (1 << 1)
+# define R300_VAP_UCP_ENABLE_2 (1 << 2)
+# define R300_VAP_UCP_ENABLE_3 (1 << 3)
+# define R300_VAP_UCP_ENABLE_4 (1 << 4)
+# define R300_VAP_UCP_ENABLE_5 (1 << 5)
+# define R300_PS_UCP_MODE_DIST_COP (0 << 14)
+# define R300_PS_UCP_MODE_RADIUS_COP (1 << 14)
+# define R300_PS_UCP_MODE_RADIUS_COP_CLIP (2 << 14)
+# define R300_PS_UCP_MODE_CLIP_AS_TRIFAN (3 << 14)
+# define R300_CLIP_DISABLE (1 << 16)
+# define R300_UCP_CULL_ONLY_ENABLE (1 << 17)
+# define R300_BOUNDARY_EDGE_FLAG_ENABLE (1 << 18)
+# define R500_COLOR2_IS_TEXTURE (1 << 20)
+# define R500_COLOR3_IS_TEXTURE (1 << 21)
+
+/* These seem to be per-pixel and per-vertex X and Y clipping planes. The first
+ * plane is per-pixel and the second plane is per-vertex.
+ *
+ * This was determined by experimentation alone but I believe it is correct.
+ *
+ * These registers are called X_QUAD0_1_FL to X_QUAD0_4_FL by glxtest.
+ */
+#define R300_VAP_GB_VERT_CLIP_ADJ 0x2220
+#define R300_VAP_GB_VERT_DISC_ADJ 0x2224
+#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228
+#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c
+
+/* gap */
+
+/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between
+ * rendering commands and overwriting vertex program parameters.
+ * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and
+ * avoids bugs caused by still running shaders reading bad data from memory.
+ */
+#define R300_VAP_PVS_STATE_FLUSH_REG 0x2284
+
+/* This register is used to define the number of core clocks to wait for a
+ * vertex to be received by the VAP input controller (while the primitive
+ * path is backed up) before forcing any accumulated vertices to be submitted
+ * to the vertex processing path.
+ */
+#define VAP_PVS_VTX_TIMEOUT_REG 0x2288
+# define R300_2288_R300 0x00750000 /* -- nh */
+# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */
+
+/* gap */
+
+/* Addresses are relative to the vertex program instruction area of the
+ * memory bank. PROGRAM_END points to the last instruction of the active
+ * program
+ *
+ * The meaning of the two UNKNOWN fields is obviously not known. However,
+ * experiments so far have shown that both *must* point to an instruction
+ * inside the vertex program, otherwise the GPU locks up.
+ *
+ * fglrx usually sets CNTL_3_UNKNOWN to the end of the program and
+ * R300_PVS_CNTL_1_POS_END_SHIFT points to instruction where last write to
+ * position takes place.
+ *
+ * Most likely this is used to ignore rest of the program in cases
+ * where group of verts arent visible. For some reason this "section"
+ * is sometimes accepted other instruction that have no relationship with
+ * position calculations.
+ */
+#define R300_VAP_PVS_CODE_CNTL_0 0x22D0
+# define R300_PVS_FIRST_INST_SHIFT 0
+# define R300_PVS_XYZW_VALID_INST_SHIFT 10
+# define R300_PVS_LAST_INST_SHIFT 20
+/* Addresses are relative the the vertex program parameters area. */
+#define R300_VAP_PVS_CONST_CNTL 0x22D4
+# define R300_PVS_CONST_BASE_OFFSET_SHIFT 0
+# define R300_PVS_MAX_CONST_ADDR_SHIFT 16
+#define R300_VAP_PVS_CODE_CNTL_1 0x22D8
+# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0
+#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC
+
+/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for
+ * immediate vertices
+ */
+#define R300_VAP_VTX_COLOR_R 0x2464
+#define R300_VAP_VTX_COLOR_G 0x2468
+#define R300_VAP_VTX_COLOR_B 0x246C
+#define R300_VAP_VTX_POS_0_X_1 0x2490 /* used for glVertex2*() */
+#define R300_VAP_VTX_POS_0_Y_1 0x2494
+#define R300_VAP_VTX_COLOR_PKD 0x249C /* RGBA */
+#define R300_VAP_VTX_POS_0_X_2 0x24A0 /* used for glVertex3*() */
+#define R300_VAP_VTX_POS_0_Y_2 0x24A4
+#define R300_VAP_VTX_POS_0_Z_2 0x24A8
+/* write 0 to indicate end of packet? */
+#define R300_VAP_VTX_END_OF_PKT 0x24AC
+
+/* gap */
+
+/* These are values from r300_reg/r300_reg.h - they are known to be correct
+ * and are here so we can use one register file instead of several
+ * - Vladimir
+ */
+#define R300_GB_VAP_RASTER_VTX_FMT_0 0x4000
+# define R300_GB_VAP_RASTER_VTX_FMT_0__POS_PRESENT (1<<0)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_0_PRESENT (1<<1)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_1_PRESENT (1<<2)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_2_PRESENT (1<<3)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_3_PRESENT (1<<4)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_SPACE (0xf<<5)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__PT_SIZE_PRESENT (0x1<<16)
+
+#define R300_GB_VAP_RASTER_VTX_FMT_1 0x4004
+ /* each of the following is 3 bits wide, specifies number
+ of components */
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21
+
+/* UNK30 seems to enables point to quad transformation on textures
+ * (or something closely related to that).
+ * This bit is rather fatal at the time being due to lackings at pixel
+ * shader side
+ * Specifies top of Raster pipe specific enable controls.
+ */
+#define R300_GB_ENABLE 0x4008
+# define R300_GB_POINT_STUFF_DISABLE (0 << 0)
+# define R300_GB_POINT_STUFF_ENABLE (1 << 0) /* Specifies if points will have stuffed texture coordinates. */
+# define R300_GB_LINE_STUFF_DISABLE (0 << 1)
+# define R300_GB_LINE_STUFF_ENABLE (1 << 1) /* Specifies if lines will have stuffed texture coordinates. */
+# define R300_GB_TRIANGLE_STUFF_DISABLE (0 << 2)
+# define R300_GB_TRIANGLE_STUFF_ENABLE (1 << 2) /* Specifies if triangles will have stuffed texture coordinates. */
+# define R300_GB_STENCIL_AUTO_DISABLE (0 << 4)
+# define R300_GB_STENCIL_AUTO_ENABLE (1 << 4) /* Enable stencil auto inc/dec based on triangle cw/ccw, force into dzy low bit. */
+# define R300_GB_STENCIL_AUTO_FORCE (2 << 4) /* Force 0 into dzy low bit. */
+
+ /* each of the following is 2 bits wide */
+#define R300_GB_TEX_REPLICATE 0 /* Replicate VAP source texture coordinates (S,T,[R,Q]). */
+#define R300_GB_TEX_ST 1 /* Stuff with source texture coordinates (S,T). */
+#define R300_GB_TEX_STR 2 /* Stuff with source texture coordinates (S,T,R). */
+# define R300_GB_TEX0_SOURCE_SHIFT 16
+# define R300_GB_TEX1_SOURCE_SHIFT 18
+# define R300_GB_TEX2_SOURCE_SHIFT 20
+# define R300_GB_TEX3_SOURCE_SHIFT 22
+# define R300_GB_TEX4_SOURCE_SHIFT 24
+# define R300_GB_TEX5_SOURCE_SHIFT 26
+# define R300_GB_TEX6_SOURCE_SHIFT 28
+# define R300_GB_TEX7_SOURCE_SHIFT 30
+
+/* MSPOS - positions for multisample antialiasing (?) */
+#define R300_GB_MSPOS0 0x4010
+ /* shifts - each of the fields is 4 bits */
+# define R300_GB_MSPOS0__MS_X0_SHIFT 0
+# define R300_GB_MSPOS0__MS_Y0_SHIFT 4
+# define R300_GB_MSPOS0__MS_X1_SHIFT 8
+# define R300_GB_MSPOS0__MS_Y1_SHIFT 12
+# define R300_GB_MSPOS0__MS_X2_SHIFT 16
+# define R300_GB_MSPOS0__MS_Y2_SHIFT 20
+# define R300_GB_MSPOS0__MSBD0_Y 24
+# define R300_GB_MSPOS0__MSBD0_X 28
+
+#define R300_GB_MSPOS1 0x4014
+# define R300_GB_MSPOS1__MS_X3_SHIFT 0
+# define R300_GB_MSPOS1__MS_Y3_SHIFT 4
+# define R300_GB_MSPOS1__MS_X4_SHIFT 8
+# define R300_GB_MSPOS1__MS_Y4_SHIFT 12
+# define R300_GB_MSPOS1__MS_X5_SHIFT 16
+# define R300_GB_MSPOS1__MS_Y5_SHIFT 20
+# define R300_GB_MSPOS1__MSBD1 24
+
+/* Specifies the graphics pipeline configuration for rasterization. */
+#define R300_GB_TILE_CONFIG 0x4018
+# define R300_GB_TILE_DISABLE (0 << 0)
+# define R300_GB_TILE_ENABLE (1 << 0)
+# define R300_GB_TILE_PIPE_COUNT_RV300 (0 << 1) /* RV350 (1 pipe, 1 ctx) */
+# define R300_GB_TILE_PIPE_COUNT_R300 (3 << 1) /* R300 (2 pipes, 1 ctx) */
+# define R300_GB_TILE_PIPE_COUNT_R420_3P (6 << 1) /* R420-3P (3 pipes, 1 ctx) */
+# define R300_GB_TILE_PIPE_COUNT_R420 (7 << 1) /* R420 (4 pipes, 1 ctx) */
+# define R300_GB_TILE_SIZE_8 (0 << 4)
+# define R300_GB_TILE_SIZE_16 (1 << 4)
+# define R300_GB_TILE_SIZE_32 (2 << 4)
+# define R300_GB_SUPER_SIZE_1 (0 << 6)
+# define R300_GB_SUPER_SIZE_2 (1 << 6)
+# define R300_GB_SUPER_SIZE_4 (2 << 6)
+# define R300_GB_SUPER_SIZE_8 (3 << 6)
+# define R300_GB_SUPER_SIZE_16 (4 << 6)
+# define R300_GB_SUPER_SIZE_32 (5 << 6)
+# define R300_GB_SUPER_SIZE_64 (6 << 6)
+# define R300_GB_SUPER_SIZE_128 (7 << 6)
+# define R300_GB_SUPER_X_SHIFT 9 /* 3 bits wide */
+# define R300_GB_SUPER_Y_SHIFT 12 /* 3 bits wide */
+# define R300_GB_SUPER_TILE_A (0 << 15)
+# define R300_GB_SUPER_TILE_B (1 << 15)
+# define R300_GB_SUBPIXEL_1_12 (0 << 16)
+# define R300_GB_SUBPIXEL_1_16 (1 << 16)
+# define GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17)
+# define GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17)
+# define GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17)
+# define GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17)
+# define GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19)
+# define GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19)
+# define GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20)
+# define GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20)
+# define GB_TILE_CONFIG_ALT_OFFSET (0 << 21)
+# define GB_TILE_CONFIG_SUBPRECISION (0 << 22)
+# define GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23)
+# define GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23)
+# define GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24)
+# define GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24)
+
+/* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */
+#define R300_GB_FIFO_SIZE 0x4024
+ /* each of the following is 2 bits wide */
+#define R300_GB_FIFO_SIZE_32 0
+#define R300_GB_FIFO_SIZE_64 1
+#define R300_GB_FIFO_SIZE_128 2
+#define R300_GB_FIFO_SIZE_256 3
+# define R300_SC_IFIFO_SIZE_SHIFT 0
+# define R300_SC_TZFIFO_SIZE_SHIFT 2
+# define R300_SC_BFIFO_SIZE_SHIFT 4
+
+# define R300_US_OFIFO_SIZE_SHIFT 12
+# define R300_US_WFIFO_SIZE_SHIFT 14
+ /* the following use the same constants as above, but meaning is
+ is times 2 (i.e. instead of 32 words it means 64 */
+# define R300_RS_TFIFO_SIZE_SHIFT 6
+# define R300_RS_CFIFO_SIZE_SHIFT 8
+# define R300_US_RAM_SIZE_SHIFT 10
+ /* watermarks, 3 bits wide */
+# define R300_RS_HIGHWATER_COL_SHIFT 16
+# define R300_RS_HIGHWATER_TEX_SHIFT 19
+# define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */
+# define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24
+
+#define GB_Z_PEQ_CONFIG 0x4028
+# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0)
+# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0)
+
+/* Specifies various polygon specific selects (fog, depth, perspective). */
+#define R300_GB_SELECT 0x401c
+# define R300_GB_FOG_SELECT_C0A (0 << 0)
+# define R300_GB_FOG_SELECT_C1A (1 << 0)
+# define R300_GB_FOG_SELECT_C2A (2 << 0)
+# define R300_GB_FOG_SELECT_C3A (3 << 0)
+# define R300_GB_FOG_SELECT_1_1_W (4 << 0)
+# define R300_GB_FOG_SELECT_Z (5 << 0)
+# define R300_GB_DEPTH_SELECT_Z (0 << 3)
+# define R300_GB_DEPTH_SELECT_1_1_W (1 << 3)
+# define R300_GB_W_SELECT_1_W (0 << 4)
+# define R300_GB_W_SELECT_1 (1 << 4)
+# define R300_GB_FOG_STUFF_DISABLE (0 << 5)
+# define R300_GB_FOG_STUFF_ENABLE (1 << 5)
+# define R300_GB_FOG_STUFF_TEX_SHIFT 6
+# define R300_GB_FOG_STUFF_TEX_MASK 0x000003c0
+# define R300_GB_FOG_STUFF_COMP_SHIFT 10
+# define R300_GB_FOG_STUFF_COMP_MASK 0x00000c00
+
+/* Specifies the graphics pipeline configuration for antialiasing. */
+#define R300_GB_AA_CONFIG 0x4020
+# define GB_AA_CONFIG_AA_DISABLE (0 << 0)
+# define GB_AA_CONFIG_AA_ENABLE (1 << 0)
+# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1)
+# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1)
+# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1)
+# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1)
+
+/* Selects which of 4 pipes are active. */
+#define GB_PIPE_SELECT 0x402c
+# define GB_PIPE_SELECT_PIPE0_ID_SHIFT 0
+# define GB_PIPE_SELECT_PIPE1_ID_SHIFT 2
+# define GB_PIPE_SELECT_PIPE2_ID_SHIFT 4
+# define GB_PIPE_SELECT_PIPE3_ID_SHIFT 6
+# define GB_PIPE_SELECT_PIPE_MASK_SHIFT 8
+# define GB_PIPE_SELECT_MAX_PIPE 12
+# define GB_PIPE_SELECT_BAD_PIPES 14
+# define GB_PIPE_SELECT_CONFIG_PIPES 18
+
+
+/* Specifies the sizes of the various FIFO`s in the sc/rs. */
+#define GB_FIFO_SIZE1 0x4070
+/* High water mark for SC input fifo */
+# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0
+# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f
+/* High water mark for SC input fifo (B) */
+# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6
+# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0
+/* High water mark for RS colors' fifo */
+# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12
+# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000
+/* High water mark for RS textures' fifo */
+# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18
+# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000
+
+/* This table specifies the source location and format for up to 16 texture
+ * addresses (i[0]:i[15]) and four colors (c[0]:c[3])
+ */
+#define R500_RS_IP_0 0x4074
+#define R500_RS_IP_1 0x4078
+#define R500_RS_IP_2 0x407C
+#define R500_RS_IP_3 0x4080
+#define R500_RS_IP_4 0x4084
+#define R500_RS_IP_5 0x4088
+#define R500_RS_IP_6 0x408C
+#define R500_RS_IP_7 0x4090
+#define R500_RS_IP_8 0x4094
+#define R500_RS_IP_9 0x4098
+#define R500_RS_IP_10 0x409C
+#define R500_RS_IP_11 0x40A0
+#define R500_RS_IP_12 0x40A4
+#define R500_RS_IP_13 0x40A8
+#define R500_RS_IP_14 0x40AC
+#define R500_RS_IP_15 0x40B0
+#define R500_RS_IP_PTR_K0 62
+#define R500_RS_IP_PTR_K1 63
+#define R500_RS_IP_TEX_PTR_S_SHIFT 0
+#define R500_RS_IP_TEX_PTR_T_SHIFT 6
+#define R500_RS_IP_TEX_PTR_R_SHIFT 12
+#define R500_RS_IP_TEX_PTR_Q_SHIFT 18
+#define R500_RS_IP_COL_PTR_SHIFT 24
+#define R500_RS_IP_COL_FMT_SHIFT 27
+# define R500_RS_COL_PTR(x) ((x) << 24)
+# define R500_RS_COL_FMT(x) ((x) << 27)
+/* gap */
+#define R500_RS_IP_OFFSET_DIS (0 << 31)
+#define R500_RS_IP_OFFSET_EN (1 << 31)
+
+/* gap */
+
+/* Zero to flush caches. */
+#define R300_TX_INVALTAGS 0x4100
+#define R300_TX_FLUSH 0x0
+
+/* The upper enable bits are guessed, based on fglrx reported limits. */
+#define R300_TX_ENABLE 0x4104
+# define R300_TX_ENABLE_0 (1 << 0)
+# define R300_TX_ENABLE_1 (1 << 1)
+# define R300_TX_ENABLE_2 (1 << 2)
+# define R300_TX_ENABLE_3 (1 << 3)
+# define R300_TX_ENABLE_4 (1 << 4)
+# define R300_TX_ENABLE_5 (1 << 5)
+# define R300_TX_ENABLE_6 (1 << 6)
+# define R300_TX_ENABLE_7 (1 << 7)
+# define R300_TX_ENABLE_8 (1 << 8)
+# define R300_TX_ENABLE_9 (1 << 9)
+# define R300_TX_ENABLE_10 (1 << 10)
+# define R300_TX_ENABLE_11 (1 << 11)
+# define R300_TX_ENABLE_12 (1 << 12)
+# define R300_TX_ENABLE_13 (1 << 13)
+# define R300_TX_ENABLE_14 (1 << 14)
+# define R300_TX_ENABLE_15 (1 << 15)
+
+#define R500_TX_FILTER_4 0x4110
+# define R500_TX_WEIGHT_1_SHIFT (0)
+# define R500_TX_WEIGHT_0_SHIFT (11)
+# define R500_TX_WEIGHT_PAIR (1<<22)
+# define R500_TX_PHASE_SHIFT (23)
+# define R500_TX_DIRECTION_HORIZONTAL (0<<27)
+# define R500_TX_DIRECTION_VERITCAL (1<<27)
+
+/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */
+#define R300_GA_POINT_S0 0x4200
+
+/* T Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */
+#define R300_GA_POINT_T0 0x4204
+
+/* S Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */
+#define R300_GA_POINT_S1 0x4208
+
+/* T Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */
+#define R300_GA_POINT_T1 0x420c
+
+/* Specifies amount to shift integer position of vertex (screen space) before
+ * converting to float for triangle stipple.
+ */
+#define R300_GA_TRIANGLE_STIPPLE 0x4214
+# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_SHIFT 0
+# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_MASK 0x0000000f
+# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT 16
+# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_MASK 0x000f0000
+
+/* The pointsize is given in multiples of 6. The pointsize can be enormous:
+ * Clear() renders a single point that fills the entire framebuffer.
+ * 1/2 Height of point; fixed (16.0), subpixel format (1/12 or 1/16, even if in
+ * 8b precision).
+ */
+#define R300_GA_POINT_SIZE 0x421C
+# define R300_POINTSIZE_Y_SHIFT 0
+# define R300_POINTSIZE_Y_MASK 0x0000ffff
+# define R300_POINTSIZE_X_SHIFT 16
+# define R300_POINTSIZE_X_MASK 0xffff0000
+# define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6)
+
+/* Blue fill color */
+#define R500_GA_FILL_R 0x4220
+
+/* Blue fill color */
+#define R500_GA_FILL_G 0x4224
+
+/* Blue fill color */
+#define R500_GA_FILL_B 0x4228
+
+/* Alpha fill color */
+#define R500_GA_FILL_A 0x422c
+
+
+/* Specifies maximum and minimum point & sprite sizes for per vertex size
+ * specification. The lower part (15:0) is MIN and (31:16) is max.
+ */
+#define R300_GA_POINT_MINMAX 0x4230
+# define R300_GA_POINT_MINMAX_MIN_SHIFT 0
+# define R300_GA_POINT_MINMAX_MIN_MASK (0xFFFF << 0)
+# define R300_GA_POINT_MINMAX_MAX_SHIFT 16
+# define R300_GA_POINT_MINMAX_MAX_MASK (0xFFFF << 16)
+
+/* 1/2 width of line, in subpixels (1/12 or 1/16 only, even in 8b
+ * subprecision); (16.0) fixed format.
+ *
+ * The line width is given in multiples of 6.
+ * In default mode lines are classified as vertical lines.
+ * HO: horizontal
+ * VE: vertical or horizontal
+ * HO & VE: no classification
+ */
+#define R300_GA_LINE_CNTL 0x4234
+# define R300_GA_LINE_CNTL_WIDTH_SHIFT 0
+# define R300_GA_LINE_CNTL_WIDTH_MASK 0x0000ffff
+# define R300_GA_LINE_CNTL_END_TYPE_HOR (0 << 16)
+# define R300_GA_LINE_CNTL_END_TYPE_VER (1 << 16)
+# define R300_GA_LINE_CNTL_END_TYPE_SQR (2 << 16) /* horizontal or vertical depending upon slope */
+# define R300_GA_LINE_CNTL_END_TYPE_COMP (3 << 16) /* Computed (perpendicular to slope) */
+# define R500_GA_LINE_CNTL_SORT_NO (0 << 18)
+# define R500_GA_LINE_CNTL_SORT_MINX_MINY (1 << 18)
+/** TODO: looks wrong */
+# define R300_LINESIZE_MAX (R300_GA_LINE_CNTL_WIDTH_MASK / 6)
+/** TODO: looks wrong */
+# define R300_LINE_CNT_HO (1 << 16)
+/** TODO: looks wrong */
+# define R300_LINE_CNT_VE (1 << 17)
+
+/* Line Stipple configuration information. */
+#define R300_GA_LINE_STIPPLE_CONFIG 0x4238
+# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_NO (0 << 0)
+# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE (1 << 0)
+# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_PACKET (2 << 0)
+# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_SHIFT 2
+# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK 0xfffffffc
+
+/* Used to load US instructions and constants */
+#define R500_GA_US_VECTOR_INDEX 0x4250
+# define R500_GA_US_VECTOR_INDEX_SHIFT 0
+# define R500_GA_US_VECTOR_INDEX_MASK 0x000000ff
+# define R500_GA_US_VECTOR_INDEX_TYPE_INSTR (0 << 16)
+# define R500_GA_US_VECTOR_INDEX_TYPE_CONST (1 << 16)
+# define R500_GA_US_VECTOR_INDEX_CLAMP_NO (0 << 17)
+# define R500_GA_US_VECTOR_INDEX_CLAMP_CONST (1 << 17)
+
+/* Data register for loading US instructions and constants */
+#define R500_GA_US_VECTOR_DATA 0x4254
+
+/* Specifies color properties and mappings of textures. */
+#define R500_GA_COLOR_CONTROL_PS3 0x4258
+# define R500_TEX0_SHADING_PS3_SOLID (0 << 0)
+# define R500_TEX0_SHADING_PS3_FLAT (1 << 0)
+# define R500_TEX0_SHADING_PS3_GOURAUD (2 << 0)
+# define R500_TEX1_SHADING_PS3_SOLID (0 << 2)
+# define R500_TEX1_SHADING_PS3_FLAT (1 << 2)
+# define R500_TEX1_SHADING_PS3_GOURAUD (2 << 2)
+# define R500_TEX2_SHADING_PS3_SOLID (0 << 4)
+# define R500_TEX2_SHADING_PS3_FLAT (1 << 4)
+# define R500_TEX2_SHADING_PS3_GOURAUD (2 << 4)
+# define R500_TEX3_SHADING_PS3_SOLID (0 << 6)
+# define R500_TEX3_SHADING_PS3_FLAT (1 << 6)
+# define R500_TEX3_SHADING_PS3_GOURAUD (2 << 6)
+# define R500_TEX4_SHADING_PS3_SOLID (0 << 8)
+# define R500_TEX4_SHADING_PS3_FLAT (1 << 8)
+# define R500_TEX4_SHADING_PS3_GOURAUD (2 << 8)
+# define R500_TEX5_SHADING_PS3_SOLID (0 << 10)
+# define R500_TEX5_SHADING_PS3_FLAT (1 << 10)
+# define R500_TEX5_SHADING_PS3_GOURAUD (2 << 10)
+# define R500_TEX6_SHADING_PS3_SOLID (0 << 12)
+# define R500_TEX6_SHADING_PS3_FLAT (1 << 12)
+# define R500_TEX6_SHADING_PS3_GOURAUD (2 << 12)
+# define R500_TEX7_SHADING_PS3_SOLID (0 << 14)
+# define R500_TEX7_SHADING_PS3_FLAT (1 << 14)
+# define R500_TEX7_SHADING_PS3_GOURAUD (2 << 14)
+# define R500_TEX8_SHADING_PS3_SOLID (0 << 16)
+# define R500_TEX8_SHADING_PS3_FLAT (1 << 16)
+# define R500_TEX8_SHADING_PS3_GOURAUD (2 << 16)
+# define R500_TEX9_SHADING_PS3_SOLID (0 << 18)
+# define R500_TEX9_SHADING_PS3_FLAT (1 << 18)
+# define R500_TEX9_SHADING_PS3_GOURAUD (2 << 18)
+# define R500_TEX10_SHADING_PS3_SOLID (0 << 20)
+# define R500_TEX10_SHADING_PS3_FLAT (1 << 20)
+# define R500_TEX10_SHADING_PS3_GOURAUD (2 << 20)
+# define R500_COLOR0_TEX_OVERRIDE_NO (0 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_0 (1 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_1 (2 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_2 (3 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_3 (4 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_4 (5 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_5 (6 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_6 (7 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_7 (8 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_8_C2 (9 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_9_C3 (10 << 22)
+# define R500_COLOR1_TEX_OVERRIDE_NO (0 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_0 (1 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_1 (2 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_2 (3 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_3 (4 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_4 (5 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_5 (6 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_6 (7 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_7 (8 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_8_C2 (9 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_9_C3 (10 << 26)
+
+/* Returns idle status of various G3D block, captured when GA_IDLE written or
+ * when hard or soft reset asserted.
+ */
+#define R500_GA_IDLE 0x425c
+# define R500_GA_IDLE_PIPE3_Z_IDLE (0 << 0)
+# define R500_GA_IDLE_PIPE2_Z_IDLE (0 << 1)
+# define R500_GA_IDLE_PIPE3_CD_IDLE (0 << 2)
+# define R500_GA_IDLE_PIPE2_CD_IDLE (0 << 3)
+# define R500_GA_IDLE_PIPE3_FG_IDLE (0 << 4)
+# define R500_GA_IDLE_PIPE2_FG_IDLE (0 << 5)
+# define R500_GA_IDLE_PIPE3_US_IDLE (0 << 6)
+# define R500_GA_IDLE_PIPE2_US_IDLE (0 << 7)
+# define R500_GA_IDLE_PIPE3_SC_IDLE (0 << 8)
+# define R500_GA_IDLE_PIPE2_SC_IDLE (0 << 9)
+# define R500_GA_IDLE_PIPE3_RS_IDLE (0 << 10)
+# define R500_GA_IDLE_PIPE2_RS_IDLE (0 << 11)
+# define R500_GA_IDLE_PIPE1_Z_IDLE (0 << 12)
+# define R500_GA_IDLE_PIPE0_Z_IDLE (0 << 13)
+# define R500_GA_IDLE_PIPE1_CD_IDLE (0 << 14)
+# define R500_GA_IDLE_PIPE0_CD_IDLE (0 << 15)
+# define R500_GA_IDLE_PIPE1_FG_IDLE (0 << 16)
+# define R500_GA_IDLE_PIPE0_FG_IDLE (0 << 17)
+# define R500_GA_IDLE_PIPE1_US_IDLE (0 << 18)
+# define R500_GA_IDLE_PIPE0_US_IDLE (0 << 19)
+# define R500_GA_IDLE_PIPE1_SC_IDLE (0 << 20)
+# define R500_GA_IDLE_PIPE0_SC_IDLE (0 << 21)
+# define R500_GA_IDLE_PIPE1_RS_IDLE (0 << 22)
+# define R500_GA_IDLE_PIPE0_RS_IDLE (0 << 23)
+# define R500_GA_IDLE_SU_IDLE (0 << 24)
+# define R500_GA_IDLE_GA_IDLE (0 << 25)
+# define R500_GA_IDLE_GA_UNIT2_IDLE (0 << 26)
+
+/* Current value of stipple accumulator. */
+#define R300_GA_LINE_STIPPLE_VALUE 0x4260
+
+/* S Texture Coordinate Value for Vertex 0 of Line (stuff textures -- i.e. AA) */
+#define R300_GA_LINE_S0 0x4264
+/* S Texture Coordinate Value for Vertex 1 of Lines (V2 of parallelogram -- stuff textures -- i.e. AA) */
+#define R300_GA_LINE_S1 0x4268
+
+/* GA Input fifo high water marks */
+#define R500_GA_FIFO_CNTL 0x4270
+# define R500_GA_FIFO_CNTL_VERTEX_FIFO_MASK 0x00000007
+# define R500_GA_FIFO_CNTL_VERTEX_FIFO_SHIFT 0
+# define R500_GA_FIFO_CNTL_VERTEX_INDEX_MASK 0x00000038
+# define R500_GA_FIFO_CNTL_VERTEX_INDEX_SHIFT 3
+# define R500_GA_FIFO_CNTL_VERTEX_REG_MASK 0x00003fc0
+# define R500_GA_FIFO_CNTL_VERTEX_REG_SHIFT 6
+
+/* GA enhance/tweaks */
+#define R300_GA_ENHANCE 0x4274
+# define R300_GA_ENHANCE_DEADLOCK_CNTL_NO_EFFECT (0 << 0)
+# define R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL (1 << 0) /* Prevents TCL interface from deadlocking on GA side. */
+# define R300_GA_ENHANCE_FASTSYNC_CNTL_NO_EFFECT (0 << 1)
+# define R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE (1 << 1) /* Enables high-performance register/primitive switching. */
+# define R500_GA_ENHANCE_REG_READWRITE_NO_EFFECT (0 << 2) /* R520+ only */
+# define R500_GA_ENHANCE_REG_READWRITE_ENABLE (1 << 2) /* R520+ only, Enables GA support of simultaneous register reads and writes. */
+# define R500_GA_ENHANCE_REG_NOSTALL_NO_EFFECT (0 << 3)
+# define R500_GA_ENHANCE_REG_NOSTALL_ENABLE (1 << 3) /* Enables GA support of no-stall reads for register read back. */
+
+#define R300_GA_COLOR_CONTROL 0x4278
+# define R300_GA_COLOR_CONTROL_RGB0_SHADING_SOLID (0 << 0)
+# define R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT (1 << 0)
+# define R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD (2 << 0)
+# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_SOLID (0 << 2)
+# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT (1 << 2)
+# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD (2 << 2)
+# define R300_GA_COLOR_CONTROL_RGB1_SHADING_SOLID (0 << 4)
+# define R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT (1 << 4)
+# define R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD (2 << 4)
+# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_SOLID (0 << 6)
+# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_FLAT (1 << 6)
+# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD (2 << 6)
+# define R300_GA_COLOR_CONTROL_RGB2_SHADING_SOLID (0 << 8)
+# define R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT (1 << 8)
+# define R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD (2 << 8)
+# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_SOLID (0 << 10)
+# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT (1 << 10)
+# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD (2 << 10)
+# define R300_GA_COLOR_CONTROL_RGB3_SHADING_SOLID (0 << 12)
+# define R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT (1 << 12)
+# define R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD (2 << 12)
+# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_SOLID (0 << 14)
+# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT (1 << 14)
+# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD (2 << 14)
+# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST (0 << 16)
+# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND (1 << 16)
+# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_THIRD (2 << 16)
+# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST (3 << 16)
+
+/** TODO: might be candidate for removal */
+# define R300_RE_SHADE_MODEL_SMOOTH ( \
+ R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST )
+/** TODO: might be candidate for removal, the GOURAUD stuff also looks buggy to me */
+# define R300_RE_SHADE_MODEL_FLAT ( \
+ R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \
+ R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \
+ R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST )
+
+/* Specifies red & green components of fill color -- S312 format -- Backwards comp. */
+#define R300_GA_SOLID_RG 0x427c
+# define GA_SOLID_RG_COLOR_GREEN_SHIFT 0
+# define GA_SOLID_RG_COLOR_GREEN_MASK 0x0000ffff
+# define GA_SOLID_RG_COLOR_RED_SHIFT 16
+# define GA_SOLID_RG_COLOR_RED_MASK 0xffff0000
+/* Specifies blue & alpha components of fill color -- S312 format -- Backwards comp. */
+#define R300_GA_SOLID_BA 0x4280
+# define GA_SOLID_BA_COLOR_ALPHA_SHIFT 0
+# define GA_SOLID_BA_COLOR_ALPHA_MASK 0x0000ffff
+# define GA_SOLID_BA_COLOR_BLUE_SHIFT 16
+# define GA_SOLID_BA_COLOR_BLUE_MASK 0xffff0000
+
+/* Polygon Mode
+ * Dangerous
+ */
+#define R300_GA_POLY_MODE 0x4288
+# define R300_GA_POLY_MODE_DISABLE (0 << 0)
+# define R300_GA_POLY_MODE_DUAL (1 << 0) /* send 2 sets of 3 polys with specified poly type */
+/* reserved */
+# define R300_GA_POLY_MODE_FRONT_PTYPE_POINT (0 << 4)
+# define R300_GA_POLY_MODE_FRONT_PTYPE_LINE (1 << 4)
+# define R300_GA_POLY_MODE_FRONT_PTYPE_TRI (2 << 4)
+/* reserved */
+# define R300_GA_POLY_MODE_BACK_PTYPE_POINT (0 << 7)
+# define R300_GA_POLY_MODE_BACK_PTYPE_LINE (1 << 7)
+# define R300_GA_POLY_MODE_BACK_PTYPE_TRI (2 << 7)
+/* reserved */
+
+/* Specifies the rouding mode for geometry & color SPFP to FP conversions. */
+#define R300_GA_ROUND_MODE 0x428c
+# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_TRUNC (0 << 0)
+# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST (1 << 0)
+# define R300_GA_ROUND_MODE_COLOR_ROUND_TRUNC (0 << 2)
+# define R300_GA_ROUND_MODE_COLOR_ROUND_NEAREST (1 << 2)
+# define R300_GA_ROUND_MODE_RGB_CLAMP_RGB (0 << 4)
+# define R300_GA_ROUND_MODE_RGB_CLAMP_FP20 (1 << 4)
+# define R300_GA_ROUND_MODE_ALPHA_CLAMP_RGB (0 << 5)
+# define R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20 (1 << 5)
+# define R500_GA_ROUND_MODE_GEOMETRY_MASK_SHIFT 6
+# define R500_GA_ROUND_MODE_GEOMETRY_MASK_MASK 0x000003c0
+
+/* Specifies x & y offsets for vertex data after conversion to FP.
+ * Offsets are in S15 format (subpixels -- 1/12 or 1/16, even in 8b
+ * subprecision).
+ */
+#define R300_GA_OFFSET 0x4290
+# define R300_GA_OFFSET_X_OFFSET_SHIFT 0
+# define R300_GA_OFFSET_X_OFFSET_MASK 0x0000ffff
+# define R300_GA_OFFSET_Y_OFFSET_SHIFT 16
+# define R300_GA_OFFSET_Y_OFFSET_MASK 0xffff0000
+
+/* Specifies the scale to apply to fog. */
+#define R300_GA_FOG_SCALE 0x4294
+/* Specifies the offset to apply to fog. */
+#define R300_GA_FOG_OFFSET 0x4298
+/* Specifies number of cycles to assert reset, and also causes RB3D soft reset to assert. */
+#define R300_GA_SOFT_RESET 0x429c
+
+/* Not sure why there are duplicate of factor and constant values.
+ * My best guess so far is that there are seperate zbiases for test and write.
+ * Ordering might be wrong.
+ * Some of the tests indicate that fgl has a fallback implementation of zbias
+ * via pixel shaders.
+ */
+#define R300_SU_TEX_WRAP 0x42A0
+#define R300_SU_POLY_OFFSET_FRONT_SCALE 0x42A4
+#define R300_SU_POLY_OFFSET_FRONT_OFFSET 0x42A8
+#define R300_SU_POLY_OFFSET_BACK_SCALE 0x42AC
+#define R300_SU_POLY_OFFSET_BACK_OFFSET 0x42B0
+
+/* This register needs to be set to (1<<1) for RV350 to correctly
+ * perform depth test (see --vb-triangles in r300_demo)
+ * Don't know about other chips. - Vladimir
+ * This is set to 3 when GL_POLYGON_OFFSET_FILL is on.
+ * My guess is that there are two bits for each zbias primitive
+ * (FILL, LINE, POINT).
+ * One to enable depth test and one for depth write.
+ * Yet this doesnt explain why depth writes work ...
+ */
+#define R300_SU_POLY_OFFSET_ENABLE 0x42B4
+# define R300_FRONT_ENABLE (1 << 0)
+# define R300_BACK_ENABLE (1 << 1)
+# define R300_PARA_ENABLE (1 << 2)
+
+#define R300_SU_CULL_MODE 0x42B8
+# define R300_CULL_FRONT (1 << 0)
+# define R300_CULL_BACK (1 << 1)
+# define R300_FRONT_FACE_CCW (0 << 2)
+# define R300_FRONT_FACE_CW (1 << 2)
+
+/* SU Depth Scale value */
+#define R300_SU_DEPTH_SCALE 0x42c0
+/* SU Depth Offset value */
+#define R300_SU_DEPTH_OFFSET 0x42c4
+
+
+/* BEGIN: Rasterization / Interpolators - many guesses */
+
+/*
+ * TC_CNT is the number of incoming texture coordinate sets (i.e. it depends
+ * on the vertex program, *not* the fragment program)
+ */
+#define R300_RS_COUNT 0x4300
+# define R300_IT_COUNT_SHIFT 0
+# define R300_IT_COUNT_MASK 0x0000007f
+# define R300_IC_COUNT_SHIFT 7
+# define R300_IC_COUNT_MASK 0x00000780
+# define R300_W_ADDR_SHIFT 12
+# define R300_W_ADDR_MASK 0x0003f000
+# define R300_HIRES_DIS (0 << 18)
+# define R300_HIRES_EN (1 << 18)
+
+#define R300_RS_INST_COUNT 0x4304
+# define R300_RS_INST_COUNT_SHIFT 0
+# define R300_RS_INST_COUNT_MASK 0x0000000f
+# define R300_RS_TX_OFFSET_SHIFT 5
+# define R300_RS_TX_OFFSET_MASK 0x000000e0
+
+/* gap */
+
+/* Only used for texture coordinates.
+ * Use the source field to route texture coordinate input from the
+ * vertex program to the desired interpolator. Note that the source
+ * field is relative to the outputs the vertex program *actually*
+ * writes. If a vertex program only writes texcoord[1], this will
+ * be source index 0.
+ * Set INTERP_USED on all interpolators that produce data used by
+ * the fragment program. INTERP_USED looks like a swizzling mask,
+ * but I haven't seen it used that way.
+ *
+ * Note: The _UNKNOWN constants are always set in their respective
+ * register. I don't know if this is necessary.
+ */
+#define R300_RS_IP_0 0x4310
+#define R300_RS_IP_1 0x4314
+#define R300_RS_IP_2 0x4318
+#define R300_RS_IP_3 0x431C
+# define R300_RS_INTERP_SRC_SHIFT 2 /* TODO: check for removal */
+# define R300_RS_INTERP_SRC_MASK (7 << 2) /* TODO: check for removal */
+# define R300_RS_TEX_PTR(x) (x << 0)
+# define R300_RS_COL_PTR(x) ((x) << 6)
+# define R300_RS_COL_FMT(x) ((x) << 9)
+# define R300_RS_COL_FMT_RGBA 0
+# define R300_RS_COL_FMT_RGB0 1
+# define R300_RS_COL_FMT_RGB1 2
+# define R300_RS_COL_FMT_000A 4
+# define R300_RS_COL_FMT_0000 5
+# define R300_RS_COL_FMT_0001 6
+# define R300_RS_COL_FMT_111A 8
+# define R300_RS_COL_FMT_1110 9
+# define R300_RS_COL_FMT_1111 10
+# define R300_RS_SEL_S(x) ((x) << 13)
+# define R300_RS_SEL_T(x) ((x) << 16)
+# define R300_RS_SEL_R(x) ((x) << 19)
+# define R300_RS_SEL_Q(x) ((x) << 22)
+# define R300_RS_SEL_C0 0
+# define R300_RS_SEL_C1 1
+# define R300_RS_SEL_C2 2
+# define R300_RS_SEL_C3 3
+# define R300_RS_SEL_K0 4
+# define R300_RS_SEL_K1 5
+
+
+/* */
+#define R500_RS_INST_0 0x4320
+#define R500_RS_INST_1 0x4324
+#define R500_RS_INST_2 0x4328
+#define R500_RS_INST_3 0x432c
+#define R500_RS_INST_4 0x4330
+#define R500_RS_INST_5 0x4334
+#define R500_RS_INST_6 0x4338
+#define R500_RS_INST_7 0x433c
+#define R500_RS_INST_8 0x4340
+#define R500_RS_INST_9 0x4344
+#define R500_RS_INST_10 0x4348
+#define R500_RS_INST_11 0x434c
+#define R500_RS_INST_12 0x4350
+#define R500_RS_INST_13 0x4354
+#define R500_RS_INST_14 0x4358
+#define R500_RS_INST_15 0x435c
+#define R500_RS_INST_TEX_ID_SHIFT 0
+#define R500_RS_INST_TEX_CN_WRITE (1 << 4)
+#define R500_RS_INST_TEX_ADDR_SHIFT 5
+#define R500_RS_INST_COL_ID_SHIFT 12
+#define R500_RS_INST_COL_CN_NO_WRITE (0 << 16)
+#define R500_RS_INST_COL_CN_WRITE (1 << 16)
+#define R500_RS_INST_COL_CN_WRITE_FBUFFER (2 << 16)
+#define R500_RS_INST_COL_CN_WRITE_BACKFACE (3 << 16)
+#define R500_RS_INST_COL_ADDR_SHIFT 18
+#define R500_RS_INST_TEX_ADJ (1 << 25)
+#define R500_RS_INST_W_CN (1 << 26)
+
+/* These DWORDs control how vertex data is routed into fragment program
+ * registers, after interpolators.
+ */
+#define R300_RS_INST_0 0x4330
+#define R300_RS_INST_1 0x4334
+#define R300_RS_INST_2 0x4338
+#define R300_RS_INST_3 0x433C
+#define R300_RS_INST_4 0x4340
+#define R300_RS_INST_5 0x4344
+#define R300_RS_INST_6 0x4348
+#define R300_RS_INST_7 0x434C
+# define R300_RS_INST_TEX_ID(x) ((x) << 0)
+# define R300_RS_INST_TEX_CN_WRITE (1 << 3)
+# define R300_RS_INST_TEX_ADDR_SHIFT 6
+# define R300_RS_INST_COL_ID(x) ((x) << 11)
+# define R300_RS_INST_COL_CN_WRITE (1 << 14)
+# define R300_RS_INST_COL_ADDR_SHIFT 17
+# define R300_RS_INST_TEX_ADJ (1 << 22)
+# define R300_RS_COL_BIAS_UNUSED_SHIFT 23
+
+/* END: Rasterization / Interpolators - many guesses */
+
+/* Hierarchical Z Enable */
+#define R300_SC_HYPERZ 0x43a4
+# define R300_SC_HYPERZ_DISABLE (0 << 0)
+# define R300_SC_HYPERZ_ENABLE (1 << 0)
+# define R300_SC_HYPERZ_MIN (0 << 1)
+# define R300_SC_HYPERZ_MAX (1 << 1)
+# define R300_SC_HYPERZ_ADJ_256 (0 << 2)
+# define R300_SC_HYPERZ_ADJ_128 (1 << 2)
+# define R300_SC_HYPERZ_ADJ_64 (2 << 2)
+# define R300_SC_HYPERZ_ADJ_32 (3 << 2)
+# define R300_SC_HYPERZ_ADJ_16 (4 << 2)
+# define R300_SC_HYPERZ_ADJ_8 (5 << 2)
+# define R300_SC_HYPERZ_ADJ_4 (6 << 2)
+# define R300_SC_HYPERZ_ADJ_2 (7 << 2)
+# define R300_SC_HYPERZ_HZ_Z0MIN_NO (0 << 5)
+# define R300_SC_HYPERZ_HZ_Z0MIN (1 << 5)
+# define R300_SC_HYPERZ_HZ_Z0MAX_NO (0 << 6)
+# define R300_SC_HYPERZ_HZ_Z0MAX (1 << 6)
+
+#define R300_SC_EDGERULE 0x43a8
+
+/* BEGIN: Scissors and cliprects */
+
+/* There are four clipping rectangles. Their corner coordinates are inclusive.
+ * Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending
+ * on whether the pixel is inside cliprects 0-3, respectively. For example,
+ * if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned
+ * the number 3 (binary 0011).
+ * Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set,
+ * the pixel is rasterized.
+ *
+ * In addition to this, there is a scissors rectangle. Only pixels inside the
+ * scissors rectangle are drawn. (coordinates are inclusive)
+ *
+ * For some reason, the top-left corner of the framebuffer is at (1440, 1440)
+ * for the purpose of clipping and scissors.
+ */
+#define R300_SC_CLIPRECT_TL_0 0x43B0
+#define R300_SC_CLIPRECT_BR_0 0x43B4
+#define R300_SC_CLIPRECT_TL_1 0x43B8
+#define R300_SC_CLIPRECT_BR_1 0x43BC
+#define R300_SC_CLIPRECT_TL_2 0x43C0
+#define R300_SC_CLIPRECT_BR_2 0x43C4
+#define R300_SC_CLIPRECT_TL_3 0x43C8
+#define R300_SC_CLIPRECT_BR_3 0x43CC
+# define R300_CLIPRECT_OFFSET 1440
+# define R300_CLIPRECT_MASK 0x1FFF
+# define R300_CLIPRECT_X_SHIFT 0
+# define R300_CLIPRECT_X_MASK (0x1FFF << 0)
+# define R300_CLIPRECT_Y_SHIFT 13
+# define R300_CLIPRECT_Y_MASK (0x1FFF << 13)
+#define R300_SC_CLIP_RULE 0x43D0
+# define R300_CLIP_OUT (1 << 0)
+# define R300_CLIP_0 (1 << 1)
+# define R300_CLIP_1 (1 << 2)
+# define R300_CLIP_10 (1 << 3)
+# define R300_CLIP_2 (1 << 4)
+# define R300_CLIP_20 (1 << 5)
+# define R300_CLIP_21 (1 << 6)
+# define R300_CLIP_210 (1 << 7)
+# define R300_CLIP_3 (1 << 8)
+# define R300_CLIP_30 (1 << 9)
+# define R300_CLIP_31 (1 << 10)
+# define R300_CLIP_310 (1 << 11)
+# define R300_CLIP_32 (1 << 12)
+# define R300_CLIP_320 (1 << 13)
+# define R300_CLIP_321 (1 << 14)
+# define R300_CLIP_3210 (1 << 15)
+
+/* gap */
+
+#define R300_SC_SCISSORS_TL 0x43E0
+#define R300_SC_SCISSORS_BR 0x43E4
+# define R300_SCISSORS_OFFSET 1440
+# define R300_SCISSORS_X_SHIFT 0
+# define R300_SCISSORS_X_MASK (0x1FFF << 0)
+# define R300_SCISSORS_Y_SHIFT 13
+# define R300_SCISSORS_Y_MASK (0x1FFF << 13)
+
+/* Screen door sample mask */
+#define R300_SC_SCREENDOOR 0x43e8
+
+/* END: Scissors and cliprects */
+
+/* BEGIN: Texture specification */
+
+/*
+ * The texture specification dwords are grouped by meaning and not by texture
+ * unit. This means that e.g. the offset for texture image unit N is found in
+ * register TX_OFFSET_0 + (4*N)
+ */
+#define R300_TX_FILTER0_0 0x4400
+#define R300_TX_FILTER0_1 0x4404
+#define R300_TX_FILTER0_2 0x4408
+#define R300_TX_FILTER0_3 0x440c
+#define R300_TX_FILTER0_4 0x4410
+#define R300_TX_FILTER0_5 0x4414
+#define R300_TX_FILTER0_6 0x4418
+#define R300_TX_FILTER0_7 0x441c
+#define R300_TX_FILTER0_8 0x4420
+#define R300_TX_FILTER0_9 0x4424
+#define R300_TX_FILTER0_10 0x4428
+#define R300_TX_FILTER0_11 0x442c
+#define R300_TX_FILTER0_12 0x4430
+#define R300_TX_FILTER0_13 0x4434
+#define R300_TX_FILTER0_14 0x4438
+#define R300_TX_FILTER0_15 0x443c
+# define R300_TX_REPEAT 0
+# define R300_TX_MIRRORED 1
+# define R300_TX_CLAMP_TO_EDGE 2
+# define R300_TX_MIRROR_ONCE_TO_EDGE 3
+# define R300_TX_CLAMP 4
+# define R300_TX_MIRROR_ONCE 5
+# define R300_TX_CLAMP_TO_BORDER 6
+# define R300_TX_MIRROR_ONCE_TO_BORDER 7
+# define R300_TX_WRAP_S_SHIFT 0
+# define R300_TX_WRAP_S_MASK (7 << 0)
+# define R300_TX_WRAP_T_SHIFT 3
+# define R300_TX_WRAP_T_MASK (7 << 3)
+# define R300_TX_WRAP_R_SHIFT 6
+# define R300_TX_WRAP_R_MASK (7 << 6)
+# define R300_TX_MAG_FILTER_4 (0 << 9)
+# define R300_TX_MAG_FILTER_NEAREST (1 << 9)
+# define R300_TX_MAG_FILTER_LINEAR (2 << 9)
+# define R300_TX_MAG_FILTER_ANISO (3 << 9)
+# define R300_TX_MAG_FILTER_MASK (3 << 9)
+# define R300_TX_MIN_FILTER_NEAREST (1 << 11)
+# define R300_TX_MIN_FILTER_LINEAR (2 << 11)
+# define R300_TX_MIN_FILTER_ANISO (3 << 11)
+# define R300_TX_MIN_FILTER_MASK (3 << 11)
+# define R300_TX_MIN_FILTER_MIP_NONE (0 << 13)
+# define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13)
+# define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13)
+# define R300_TX_MIN_FILTER_MIP_MASK (3 << 13)
+# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21)
+# define R300_TX_MAX_ANISO_2_TO_1 (1 << 21)
+# define R300_TX_MAX_ANISO_4_TO_1 (2 << 21)
+# define R300_TX_MAX_ANISO_8_TO_1 (3 << 21)
+# define R300_TX_MAX_ANISO_16_TO_1 (4 << 21)
+# define R300_TX_MAX_ANISO_MASK (7 << 21)
+
+#define R300_TX_FILTER1_0 0x4440
+# define R300_CHROMA_KEY_MODE_DISABLE 0
+# define R300_CHROMA_KEY_FORCE 1
+# define R300_CHROMA_KEY_BLEND 2
+# define R300_MC_ROUND_NORMAL (0<<2)
+# define R300_MC_ROUND_MPEG4 (1<<2)
+# define R300_LOD_BIAS_SHIFT 3
+# define R300_LOD_BIAS_MASK 0x1ff8
+# define R300_EDGE_ANISO_EDGE_DIAG (0<<13)
+# define R300_EDGE_ANISO_EDGE_ONLY (1<<13)
+# define R300_MC_COORD_TRUNCATE_DISABLE (0<<14)
+# define R300_MC_COORD_TRUNCATE_MPEG (1<<14)
+# define R300_TX_TRI_PERF_0_8 (0<<15)
+# define R300_TX_TRI_PERF_1_8 (1<<15)
+# define R300_TX_TRI_PERF_1_4 (2<<15)
+# define R300_TX_TRI_PERF_3_8 (3<<15)
+# define R300_ANISO_THRESHOLD_MASK (7<<17)
+
+# define R500_MACRO_SWITCH (1<<22)
+# define R500_BORDER_FIX (1<<31)
+
+#define R300_TX_SIZE_0 0x4480
+# define R300_TX_WIDTHMASK_SHIFT 0
+# define R300_TX_WIDTHMASK_MASK (2047 << 0)
+# define R300_TX_HEIGHTMASK_SHIFT 11
+# define R300_TX_HEIGHTMASK_MASK (2047 << 11)
+# define R300_TX_DEPTHMASK_SHIFT 22
+# define R300_TX_DEPTHMASK_MASK (0xf << 22)
+# define R300_TX_MAX_MIP_LEVEL_SHIFT 26
+# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26)
+# define R300_TX_SIZE_PROJECTED (1<<30)
+# define R300_TX_SIZE_TXPITCH_EN (1<<31)
+#define R300_TX_FORMAT_0 0x44C0
+ /* The interpretation of the format word by Wladimir van der Laan */
+ /* The X, Y, Z and W refer to the layout of the components.
+ They are given meanings as R, G, B and Alpha by the swizzle
+ specification */
+# define R300_TX_FORMAT_X8 0x0
+# define R500_TX_FORMAT_X1 0x0 // bit set in format 2
+# define R300_TX_FORMAT_X16 0x1
+# define R500_TX_FORMAT_X1_REV 0x0 // bit set in format 2
+# define R300_TX_FORMAT_Y4X4 0x2
+# define R300_TX_FORMAT_Y8X8 0x3
+# define R300_TX_FORMAT_Y16X16 0x4
+# define R300_TX_FORMAT_Z3Y3X2 0x5
+# define R300_TX_FORMAT_Z5Y6X5 0x6
+# define R300_TX_FORMAT_Z6Y5X5 0x7
+# define R300_TX_FORMAT_Z11Y11X10 0x8
+# define R300_TX_FORMAT_Z10Y11X11 0x9
+# define R300_TX_FORMAT_W4Z4Y4X4 0xA
+# define R300_TX_FORMAT_W1Z5Y5X5 0xB
+# define R300_TX_FORMAT_W8Z8Y8X8 0xC
+# define R300_TX_FORMAT_W2Z10Y10X10 0xD
+# define R300_TX_FORMAT_W16Z16Y16X16 0xE
+# define R300_TX_FORMAT_DXT1 0xF
+# define R300_TX_FORMAT_DXT3 0x10
+# define R300_TX_FORMAT_DXT5 0x11
+# define R300_TX_FORMAT_D3DMFT_CxV8U8 0x12 /* no swizzle */
+# define R300_TX_FORMAT_A8R8G8B8 0x13 /* no swizzle */
+# define R300_TX_FORMAT_B8G8_B8G8 0x14 /* no swizzle */
+# define R300_TX_FORMAT_G8R8_G8B8 0x15 /* no swizzle */
+
+ /* These two values are wrong, but they're the only values that
+ * produce any even vaguely correct results. Can r300 only do 16-bit
+ * depth textures?
+ */
+# define R300_TX_FORMAT_X24_Y8 0x1e
+# define R300_TX_FORMAT_X32 0x1e
+
+ /* 0x16 - some 16 bit green format.. ?? */
+# define R300_TX_FORMAT_3D (1 << 25)
+# define R300_TX_FORMAT_CUBIC_MAP (2 << 25)
+
+ /* gap */
+ /* Floating point formats */
+ /* Note - hardware supports both 16 and 32 bit floating point */
+# define R300_TX_FORMAT_FL_I16 0x18
+# define R300_TX_FORMAT_FL_I16A16 0x19
+# define R300_TX_FORMAT_FL_R16G16B16A16 0x1A
+# define R300_TX_FORMAT_FL_I32 0x1B
+# define R300_TX_FORMAT_FL_I32A32 0x1C
+# define R300_TX_FORMAT_FL_R32G32B32A32 0x1D
+ /* alpha modes, convenience mostly */
+ /* if you have alpha, pick constant appropriate to the
+ number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc */
+# define R300_TX_FORMAT_ALPHA_1CH 0x000
+# define R300_TX_FORMAT_ALPHA_2CH 0x200
+# define R300_TX_FORMAT_ALPHA_4CH 0x600
+# define R300_TX_FORMAT_ALPHA_NONE 0xA00
+ /* Swizzling */
+ /* constants */
+# define R300_TX_FORMAT_X 0
+# define R300_TX_FORMAT_Y 1
+# define R300_TX_FORMAT_Z 2
+# define R300_TX_FORMAT_W 3
+# define R300_TX_FORMAT_ZERO 4
+# define R300_TX_FORMAT_ONE 5
+ /* 2.0*Z, everything above 1.0 is set to 0.0 */
+# define R300_TX_FORMAT_CUT_Z 6
+ /* 2.0*W, everything above 1.0 is set to 0.0 */
+# define R300_TX_FORMAT_CUT_W 7
+
+# define R300_TX_FORMAT_B_SHIFT 18
+# define R300_TX_FORMAT_G_SHIFT 15
+# define R300_TX_FORMAT_R_SHIFT 12
+# define R300_TX_FORMAT_A_SHIFT 9
+ /* Convenience macro to take care of layout and swizzling */
+# define R300_EASY_TX_FORMAT(B, G, R, A, FMT) ( \
+ ((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT) \
+ | ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT) \
+ | ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT) \
+ | ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT) \
+ | (R300_TX_FORMAT_##FMT) \
+ )
+ /* These can be ORed with result of R300_EASY_TX_FORMAT()
+ We don't really know what they do. Take values from a
+ constant color ? */
+# define R300_TX_FORMAT_CONST_X (1<<5)
+# define R300_TX_FORMAT_CONST_Y (2<<5)
+# define R300_TX_FORMAT_CONST_Z (4<<5)
+# define R300_TX_FORMAT_CONST_W (8<<5)
+
+# define R300_TX_FORMAT_YUV_MODE 0x00800000
+
+#define R300_TX_FORMAT2_0 0x4500 /* obvious missing in gap */
+# define R300_TX_PITCHMASK_SHIFT 0
+# define R300_TX_PITCHMASK_MASK (2047 << 0)
+# define R500_TXFORMAT_MSB (1 << 14)
+# define R500_TXWIDTH_BIT11 (1 << 15)
+# define R500_TXHEIGHT_BIT11 (1 << 16)
+# define R500_POW2FIX2FLT (1 << 17)
+# define R500_SEL_FILTER4_TC0 (0 << 18)
+# define R500_SEL_FILTER4_TC1 (1 << 18)
+# define R500_SEL_FILTER4_TC2 (2 << 18)
+# define R500_SEL_FILTER4_TC3 (3 << 18)
+
+#define R300_TX_OFFSET_0 0x4540
+#define R300_TX_OFFSET_1 0x4544
+#define R300_TX_OFFSET_2 0x4548
+#define R300_TX_OFFSET_3 0x454C
+#define R300_TX_OFFSET_4 0x4550
+#define R300_TX_OFFSET_5 0x4554
+#define R300_TX_OFFSET_6 0x4558
+#define R300_TX_OFFSET_7 0x455C
+ /* BEGIN: Guess from R200 */
+# define R300_TXO_ENDIAN_NO_SWAP (0 << 0)
+# define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0)
+# define R300_TXO_ENDIAN_WORD_SWAP (2 << 0)
+# define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0)
+# define R300_TXO_MACRO_TILE (1 << 2)
+# define R300_TXO_MICRO_TILE_LINEAR (0 << 3)
+# define R300_TXO_MICRO_TILE (1 << 3)
+# define R300_TXO_MICRO_TILE_SQUARE (2 << 3)
+# define R300_TXO_OFFSET_MASK 0xffffffe0
+# define R300_TXO_OFFSET_SHIFT 5
+ /* END: Guess from R200 */
+
+/* 32 bit chroma key */
+#define R300_TX_CHROMA_KEY_0 0x4580
+#define R300_TX_CHROMA_KEY_1 0x4584
+#define R300_TX_CHROMA_KEY_2 0x4588
+#define R300_TX_CHROMA_KEY_3 0x458c
+#define R300_TX_CHROMA_KEY_4 0x4590
+#define R300_TX_CHROMA_KEY_5 0x4594
+#define R300_TX_CHROMA_KEY_6 0x4598
+#define R300_TX_CHROMA_KEY_7 0x459c
+#define R300_TX_CHROMA_KEY_8 0x45a0
+#define R300_TX_CHROMA_KEY_9 0x45a4
+#define R300_TX_CHROMA_KEY_10 0x45a8
+#define R300_TX_CHROMA_KEY_11 0x45ac
+#define R300_TX_CHROMA_KEY_12 0x45b0
+#define R300_TX_CHROMA_KEY_13 0x45b4
+#define R300_TX_CHROMA_KEY_14 0x45b8
+#define R300_TX_CHROMA_KEY_15 0x45bc
+/* ff00ff00 == { 0, 1.0, 0, 1.0 } */
+
+/* Border Color */
+#define R300_TX_BORDER_COLOR_0 0x45c0
+#define R300_TX_BORDER_COLOR_1 0x45c4
+#define R300_TX_BORDER_COLOR_2 0x45c8
+#define R300_TX_BORDER_COLOR_3 0x45cc
+#define R300_TX_BORDER_COLOR_4 0x45d0
+#define R300_TX_BORDER_COLOR_5 0x45d4
+#define R300_TX_BORDER_COLOR_6 0x45d8
+#define R300_TX_BORDER_COLOR_7 0x45dc
+#define R300_TX_BORDER_COLOR_8 0x45e0
+#define R300_TX_BORDER_COLOR_9 0x45e4
+#define R300_TX_BORDER_COLOR_10 0x45e8
+#define R300_TX_BORDER_COLOR_11 0x45ec
+#define R300_TX_BORDER_COLOR_12 0x45f0
+#define R300_TX_BORDER_COLOR_13 0x45f4
+#define R300_TX_BORDER_COLOR_14 0x45f8
+#define R300_TX_BORDER_COLOR_15 0x45fc
+
+
+/* END: Texture specification */
+
+/* BEGIN: Fragment program instruction set */
+
+/* Fragment programs are written directly into register space.
+ * There are separate instruction streams for texture instructions and ALU
+ * instructions.
+ * In order to synchronize these streams, the program is divided into up
+ * to 4 nodes. Each node begins with a number of TEX operations, followed
+ * by a number of ALU operations.
+ * The first node can have zero TEX ops, all subsequent nodes must have at
+ * least
+ * one TEX ops.
+ * All nodes must have at least one ALU op.
+ *
+ * The index of the last node is stored in PFS_CNTL_0: A value of 0 means
+ * 1 node, a value of 3 means 4 nodes.
+ * The total amount of instructions is defined in PFS_CNTL_2. The offsets are
+ * offsets into the respective instruction streams, while *_END points to the
+ * last instruction relative to this offset.
+ */
+#define R300_US_CONFIG 0x4600
+# define R300_PFS_CNTL_LAST_NODES_SHIFT 0
+# define R300_PFS_CNTL_LAST_NODES_MASK (3 << 0)
+# define R300_PFS_CNTL_FIRST_NODE_HAS_TEX (1 << 3)
+#define R300_US_PIXSIZE 0x4604
+/* There is an unshifted value here which has so far always been equal to the
+ * index of the highest used temporary register.
+ */
+#define R300_US_CODE_OFFSET 0x4608
+# define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0
+# define R300_PFS_CNTL_ALU_OFFSET_MASK (63 << 0)
+# define R300_PFS_CNTL_ALU_END_SHIFT 6
+# define R300_PFS_CNTL_ALU_END_MASK (63 << 6)
+# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 13
+# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13)
+# define R300_PFS_CNTL_TEX_END_SHIFT 18
+# define R300_PFS_CNTL_TEX_END_MASK (31 << 18)
+
+/* gap */
+
+/* Nodes are stored backwards. The last active node is always stored in
+ * PFS_NODE_3.
+ * Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The
+ * first node is stored in NODE_2, the second node is stored in NODE_3.
+ *
+ * Offsets are relative to the master offset from PFS_CNTL_2.
+ */
+#define R300_US_CODE_ADDR_0 0x4610
+#define R300_US_CODE_ADDR_1 0x4614
+#define R300_US_CODE_ADDR_2 0x4618
+#define R300_US_CODE_ADDR_3 0x461C
+# define R300_ALU_START_SHIFT 0
+# define R300_ALU_START_MASK (63 << 0)
+# define R300_ALU_SIZE_SHIFT 6
+# define R300_ALU_SIZE_MASK (63 << 6)
+# define R300_TEX_START_SHIFT 12
+# define R300_TEX_START_MASK (31 << 12)
+# define R300_TEX_SIZE_SHIFT 17
+# define R300_TEX_SIZE_MASK (31 << 17)
+# define R300_RGBA_OUT (1 << 22)
+# define R300_W_OUT (1 << 23)
+
+/* TEX
+ * As far as I can tell, texture instructions cannot write into output
+ * registers directly. A subsequent ALU instruction is always necessary,
+ * even if it's just MAD o0, r0, 1, 0
+ */
+#define R300_US_TEX_INST_0 0x4620
+# define R300_SRC_ADDR_SHIFT 0
+# define R300_SRC_ADDR_MASK (31 << 0)
+# define R300_DST_ADDR_SHIFT 6
+# define R300_DST_ADDR_MASK (31 << 6)
+# define R300_TEX_ID_SHIFT 11
+# define R300_TEX_ID_MASK (15 << 11)
+# define R300_TEX_INST_SHIFT 15
+# define R300_TEX_OP_NOP 0
+# define R300_TEX_OP_LD 1
+# define R300_TEX_OP_KIL 2
+# define R300_TEX_OP_TXP 3
+# define R300_TEX_OP_TXB 4
+# define R300_TEX_INST_MASK (7 << 15)
+
+/* Output format from the unfied shader */
+#define R300_US_OUT_FMT_0 0x46A4
+# define R300_US_OUT_FMT_C4_8 (0 << 0)
+# define R300_US_OUT_FMT_C4_10 (1 << 0)
+# define R300_US_OUT_FMT_C4_10_GAMMA (2 << 0)
+# define R300_US_OUT_FMT_C_16 (3 << 0)
+# define R300_US_OUT_FMT_C2_16 (4 << 0)
+# define R300_US_OUT_FMT_C4_16 (5 << 0)
+# define R300_US_OUT_FMT_C_16_MPEG (6 << 0)
+# define R300_US_OUT_FMT_C2_16_MPEG (7 << 0)
+# define R300_US_OUT_FMT_C2_4 (8 << 0)
+# define R300_US_OUT_FMT_C_3_3_2 (9 << 0)
+# define R300_US_OUT_FMT_C_6_5_6 (10 << 0)
+# define R300_US_OUT_FMT_C_11_11_10 (11 << 0)
+# define R300_US_OUT_FMT_C_10_11_11 (12 << 0)
+# define R300_US_OUT_FMT_C_2_10_10_10 (13 << 0)
+/* reserved */
+# define R300_US_OUT_FMT_UNUSED (15 << 0)
+# define R300_US_OUT_FMT_C_16_FP (16 << 0)
+# define R300_US_OUT_FMT_C2_16_FP (17 << 0)
+# define R300_US_OUT_FMT_C4_16_FP (18 << 0)
+# define R300_US_OUT_FMT_C_32_FP (19 << 0)
+# define R300_US_OUT_FMT_C2_32_FP (20 << 0)
+# define R300_US_OUT_FMT_C4_32_FP (21 << 0)
+# define R300_C0_SEL_A (0 << 8)
+# define R300_C0_SEL_R (1 << 8)
+# define R300_C0_SEL_G (2 << 8)
+# define R300_C0_SEL_B (3 << 8)
+# define R300_C1_SEL_A (0 << 10)
+# define R300_C1_SEL_R (1 << 10)
+# define R300_C1_SEL_G (2 << 10)
+# define R300_C1_SEL_B (3 << 10)
+# define R300_C2_SEL_A (0 << 12)
+# define R300_C2_SEL_R (1 << 12)
+# define R300_C2_SEL_G (2 << 12)
+# define R300_C2_SEL_B (3 << 12)
+# define R300_C3_SEL_A (0 << 14)
+# define R300_C3_SEL_R (1 << 14)
+# define R300_C3_SEL_G (2 << 14)
+# define R300_C3_SEL_B (3 << 14)
+# define R300_OUT_SIGN(x) ((x) << 16)
+# define R500_ROUND_ADJ (1 << 20)
+
+/* ALU
+ * The ALU instructions register blocks are enumerated according to the order
+ * in which fglrx. I assume there is space for 64 instructions, since
+ * each block has space for a maximum of 64 DWORDs, and this matches reported
+ * native limits.
+ *
+ * The basic functional block seems to be one MAD for each color and alpha,
+ * and an adder that adds all components after the MUL.
+ * - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands
+ * - DP4: Use OUTC_DP4, OUTA_DP4
+ * - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands
+ * - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands
+ * - CMPH: If ARG2 > 0.5, return ARG0, else return ARG1
+ * - CMP: If ARG2 < 0, return ARG1, else return ARG0
+ * - FLR: use FRC+MAD
+ * - XPD: use MAD+MAD
+ * - SGE, SLT: use MAD+CMP
+ * - RSQ: use ABS modifier for argument
+ * - Use OUTC_REPL_ALPHA to write results of an alpha-only operation
+ * (e.g. RCP) into color register
+ * - apparently, there's no quick DST operation
+ * - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2"
+ * - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0"
+ * - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1"
+ *
+ * Operand selection
+ * First stage selects three sources from the available registers and
+ * constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha).
+ * fglrx sorts the three source fields: Registers before constants,
+ * lower indices before higher indices; I do not know whether this is
+ * necessary.
+ *
+ * fglrx fills unused sources with "read constant 0"
+ * According to specs, you cannot select more than two different constants.
+ *
+ * Second stage selects the operands from the sources. This is defined in
+ * INSTR0 (color) and INSTR2 (alpha). You can also select the special constants
+ * zero and one.
+ * Swizzling and negation happens in this stage, as well.
+ *
+ * Important: Color and alpha seem to be mostly separate, i.e. their sources
+ * selection appears to be fully independent (the register storage is probably
+ * physically split into a color and an alpha section).
+ * However (because of the apparent physical split), there is some interaction
+ * WRT swizzling. If, for example, you want to load an R component into an
+ * Alpha operand, this R component is taken from a *color* source, not from
+ * an alpha source. The corresponding register doesn't even have to appear in
+ * the alpha sources list. (I hope this all makes sense to you)
+ *
+ * Destination selection
+ * The destination register index is in FPI1 (color) and FPI3 (alpha)
+ * together with enable bits.
+ * There are separate enable bits for writing into temporary registers
+ * (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_*
+ * /DSTA_OUTPUT). You can write to both at once, or not write at all (the
+ * same index must be used for both).
+ *
+ * Note: There is a special form for LRP
+ * - Argument order is the same as in ARB_fragment_program.
+ * - Operation is MAD
+ * - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP
+ * - Set FPI0/FPI2_SPECIAL_LRP
+ * Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD
+ */
+#define R300_US_ALU_RGB_ADDR_0 0x46C0
+# define R300_ALU_SRC0C_SHIFT 0
+# define R300_ALU_SRC0C_MASK (31 << 0)
+# define R300_ALU_SRC0C_CONST (1 << 5)
+# define R300_ALU_SRC1C_SHIFT 6
+# define R300_ALU_SRC1C_MASK (31 << 6)
+# define R300_ALU_SRC1C_CONST (1 << 11)
+# define R300_ALU_SRC2C_SHIFT 12
+# define R300_ALU_SRC2C_MASK (31 << 12)
+# define R300_ALU_SRC2C_CONST (1 << 17)
+# define R300_ALU_SRC_MASK 0x0003ffff
+# define R300_ALU_DSTC_SHIFT 18
+# define R300_ALU_DSTC_MASK (31 << 18)
+# define R300_ALU_DSTC_REG_MASK_SHIFT 23
+# define R300_ALU_DSTC_REG_X (1 << 23)
+# define R300_ALU_DSTC_REG_Y (1 << 24)
+# define R300_ALU_DSTC_REG_Z (1 << 25)
+# define R300_ALU_DSTC_OUTPUT_MASK_SHIFT 26
+# define R300_ALU_DSTC_OUTPUT_X (1 << 26)
+# define R300_ALU_DSTC_OUTPUT_Y (1 << 27)
+# define R300_ALU_DSTC_OUTPUT_Z (1 << 28)
+# define R300_ALU_DSTC_OUTPUT_XYZ (7 << 26)
+# define R300_RGB_ADDR0(x) ((x) << 0)
+# define R300_RGB_ADDR1(x) ((x) << 6)
+# define R300_RGB_ADDR2(x) ((x) << 12)
+
+#define R300_US_ALU_ALPHA_ADDR_0 0x47C0
+# define R300_ALU_SRC0A_SHIFT 0
+# define R300_ALU_SRC0A_MASK (31 << 0)
+# define R300_ALU_SRC0A_CONST (1 << 5)
+# define R300_ALU_SRC1A_SHIFT 6
+# define R300_ALU_SRC1A_MASK (31 << 6)
+# define R300_ALU_SRC1A_CONST (1 << 11)
+# define R300_ALU_SRC2A_SHIFT 12
+# define R300_ALU_SRC2A_MASK (31 << 12)
+# define R300_ALU_SRC2A_CONST (1 << 17)
+# define R300_ALU_SRC_MASK 0x0003ffff
+# define R300_ALU_DSTA_SHIFT 18
+# define R300_ALU_DSTA_MASK (31 << 18)
+# define R300_ALU_DSTA_REG (1 << 23)
+# define R300_ALU_DSTA_OUTPUT (1 << 24)
+# define R300_ALU_DSTA_DEPTH (1 << 27)
+# define R300_ALPHA_ADDR0(x) ((x) << 0)
+# define R300_ALPHA_ADDR1(x) ((x) << 6)
+# define R300_ALPHA_ADDR2(x) ((x) << 12)
+
+#define R300_US_ALU_RGB_INST_0 0x48C0
+# define R300_ALU_ARGC_SRC0C_XYZ 0
+# define R300_ALU_ARGC_SRC0C_XXX 1
+# define R300_ALU_ARGC_SRC0C_YYY 2
+# define R300_ALU_ARGC_SRC0C_ZZZ 3
+# define R300_ALU_ARGC_SRC1C_XYZ 4
+# define R300_ALU_ARGC_SRC1C_XXX 5
+# define R300_ALU_ARGC_SRC1C_YYY 6
+# define R300_ALU_ARGC_SRC1C_ZZZ 7
+# define R300_ALU_ARGC_SRC2C_XYZ 8
+# define R300_ALU_ARGC_SRC2C_XXX 9
+# define R300_ALU_ARGC_SRC2C_YYY 10
+# define R300_ALU_ARGC_SRC2C_ZZZ 11
+# define R300_ALU_ARGC_SRC0A 12
+# define R300_ALU_ARGC_SRC1A 13
+# define R300_ALU_ARGC_SRC2A 14
+# define R300_ALU_ARGC_SRCP_XYZ 15
+# define R300_ALU_ARGC_SRCP_XXX 16
+# define R300_ALU_ARGC_SRCP_YYY 17
+# define R300_ALU_ARGC_SRCP_ZZZ 18
+# define R300_ALU_ARGC_SRCP_WWW 19
+# define R300_ALU_ARGC_ZERO 20
+# define R300_ALU_ARGC_ONE 21
+# define R300_ALU_ARGC_HALF 22
+# define R300_ALU_ARGC_SRC0C_YZX 23
+# define R300_ALU_ARGC_SRC1C_YZX 24
+# define R300_ALU_ARGC_SRC2C_YZX 25
+# define R300_ALU_ARGC_SRC0C_ZXY 26
+# define R300_ALU_ARGC_SRC1C_ZXY 27
+# define R300_ALU_ARGC_SRC2C_ZXY 28
+# define R300_ALU_ARGC_SRC0CA_WZY 29
+# define R300_ALU_ARGC_SRC1CA_WZY 30
+# define R300_ALU_ARGC_SRC2CA_WZY 31
+# define R300_RGB_SWIZA(x) ((x) << 0)
+# define R300_RGB_SWIZB(x) ((x) << 7)
+# define R300_RGB_SWIZC(x) ((x) << 14)
+
+# define R300_ALU_ARG0C_SHIFT 0
+# define R300_ALU_ARG0C_MASK (31 << 0)
+# define R300_ALU_ARG0C_NOP (0 << 5)
+# define R300_ALU_ARG0C_NEG (1 << 5)
+# define R300_ALU_ARG0C_ABS (2 << 5)
+# define R300_ALU_ARG0C_NAB (3 << 5)
+# define R300_ALU_ARG1C_SHIFT 7
+# define R300_ALU_ARG1C_MASK (31 << 7)
+# define R300_ALU_ARG1C_NOP (0 << 12)
+# define R300_ALU_ARG1C_NEG (1 << 12)
+# define R300_ALU_ARG1C_ABS (2 << 12)
+# define R300_ALU_ARG1C_NAB (3 << 12)
+# define R300_ALU_ARG2C_SHIFT 14
+# define R300_ALU_ARG2C_MASK (31 << 14)
+# define R300_ALU_ARG2C_NOP (0 << 19)
+# define R300_ALU_ARG2C_NEG (1 << 19)
+# define R300_ALU_ARG2C_ABS (2 << 19)
+# define R300_ALU_ARG2C_NAB (3 << 19)
+# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21)
+# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21)
+# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21)
+# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21)
+
+# define R300_ALU_OUTC_MAD (0 << 23)
+# define R300_ALU_OUTC_DP3 (1 << 23)
+# define R300_ALU_OUTC_DP4 (2 << 23)
+# define R300_ALU_OUTC_D2A (3 << 23)
+# define R300_ALU_OUTC_MIN (4 << 23)
+# define R300_ALU_OUTC_MAX (5 << 23)
+# define R300_ALU_OUTC_CMPH (7 << 23)
+# define R300_ALU_OUTC_CMP (8 << 23)
+# define R300_ALU_OUTC_FRC (9 << 23)
+# define R300_ALU_OUTC_REPL_ALPHA (10 << 23)
+
+# define R300_ALU_OUTC_MOD_NOP (0 << 27)
+# define R300_ALU_OUTC_MOD_MUL2 (1 << 27)
+# define R300_ALU_OUTC_MOD_MUL4 (2 << 27)
+# define R300_ALU_OUTC_MOD_MUL8 (3 << 27)
+# define R300_ALU_OUTC_MOD_DIV2 (4 << 27)
+# define R300_ALU_OUTC_MOD_DIV4 (5 << 27)
+# define R300_ALU_OUTC_MOD_DIV8 (6 << 27)
+
+# define R300_ALU_OUTC_CLAMP (1 << 30)
+# define R300_ALU_INSERT_NOP (1 << 31)
+
+#define R300_US_ALU_ALPHA_INST_0 0x49C0
+# define R300_ALU_ARGA_SRC0C_X 0
+# define R300_ALU_ARGA_SRC0C_Y 1
+# define R300_ALU_ARGA_SRC0C_Z 2
+# define R300_ALU_ARGA_SRC1C_X 3
+# define R300_ALU_ARGA_SRC1C_Y 4
+# define R300_ALU_ARGA_SRC1C_Z 5
+# define R300_ALU_ARGA_SRC2C_X 6
+# define R300_ALU_ARGA_SRC2C_Y 7
+# define R300_ALU_ARGA_SRC2C_Z 8
+# define R300_ALU_ARGA_SRC0A 9
+# define R300_ALU_ARGA_SRC1A 10
+# define R300_ALU_ARGA_SRC2A 11
+# define R300_ALU_ARGA_SRCP_X 12
+# define R300_ALU_ARGA_SRCP_Y 13
+# define R300_ALU_ARGA_SRCP_Z 14
+# define R300_ALU_ARGA_SRCP_W 15
+# define R300_ALU_ARGA_ZERO 16
+# define R300_ALU_ARGA_ONE 17
+# define R300_ALU_ARGA_HALF 18
+# define R300_ALPHA_SWIZA(x) ((x) << 0)
+# define R300_ALPHA_SWIZB(x) ((x) << 7)
+# define R300_ALPHA_SWIZC(x) ((x) << 14)
+
+# define R300_ALU_ARG0A_SHIFT 0
+# define R300_ALU_ARG0A_MASK (31 << 0)
+# define R300_ALU_ARG0A_NOP (0 << 5)
+# define R300_ALU_ARG0A_NEG (1 << 5)
+# define R300_ALU_ARG0A_ABS (2 << 5)
+# define R300_ALU_ARG0A_NAB (3 << 5)
+# define R300_ALU_ARG1A_SHIFT 7
+# define R300_ALU_ARG1A_MASK (31 << 7)
+# define R300_ALU_ARG1A_NOP (0 << 12)
+# define R300_ALU_ARG1A_NEG (1 << 12)
+# define R300_ALU_ARG1A_ABS (2 << 12)
+# define R300_ALU_ARG1A_NAB (3 << 12)
+# define R300_ALU_ARG2A_SHIFT 14
+# define R300_ALU_ARG2A_MASK (31 << 14)
+# define R300_ALU_ARG2A_NOP (0 << 19)
+# define R300_ALU_ARG2A_NEG (1 << 19)
+# define R300_ALU_ARG2A_ABS (2 << 19)
+# define R300_ALU_ARG2A_NAB (3 << 19)
+# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21)
+# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21)
+# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21)
+# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21)
+
+# define R300_ALU_OUTA_MAD (0 << 23)
+# define R300_ALU_OUTA_DP4 (1 << 23)
+# define R300_ALU_OUTA_MIN (2 << 23)
+# define R300_ALU_OUTA_MAX (3 << 23)
+# define R300_ALU_OUTA_CND (5 << 23)
+# define R300_ALU_OUTA_CMP (6 << 23)
+# define R300_ALU_OUTA_FRC (7 << 23)
+# define R300_ALU_OUTA_EX2 (8 << 23)
+# define R300_ALU_OUTA_LG2 (9 << 23)
+# define R300_ALU_OUTA_RCP (10 << 23)
+# define R300_ALU_OUTA_RSQ (11 << 23)
+
+# define R300_ALU_OUTA_MOD_NOP (0 << 27)
+# define R300_ALU_OUTA_MOD_MUL2 (1 << 27)
+# define R300_ALU_OUTA_MOD_MUL4 (2 << 27)
+# define R300_ALU_OUTA_MOD_MUL8 (3 << 27)
+# define R300_ALU_OUTA_MOD_DIV2 (4 << 27)
+# define R300_ALU_OUTA_MOD_DIV4 (5 << 27)
+# define R300_ALU_OUTA_MOD_DIV8 (6 << 27)
+
+# define R300_ALU_OUTA_CLAMP (1 << 30)
+/* END: Fragment program instruction set */
+
+/* Fog: Fog Blending Enable */
+#define R300_FG_FOG_BLEND 0x4bc0
+# define R300_FG_FOG_BLEND_DISABLE (0 << 0)
+# define R300_FG_FOG_BLEND_ENABLE (1 << 0)
+# define R300_FG_FOG_BLEND_FN_LINEAR (0 << 1)
+# define R300_FG_FOG_BLEND_FN_EXP (1 << 1)
+# define R300_FG_FOG_BLEND_FN_EXP2 (2 << 1)
+# define R300_FG_FOG_BLEND_FN_CONSTANT (3 << 1)
+# define R300_FG_FOG_BLEND_FN_MASK (3 << 1)
+
+/* Fog: Red Component of Fog Color */
+#define R300_FG_FOG_COLOR_R 0x4bc8
+/* Fog: Green Component of Fog Color */
+#define R300_FG_FOG_COLOR_G 0x4bcc
+/* Fog: Blue Component of Fog Color */
+#define R300_FG_FOG_COLOR_B 0x4bd0
+# define R300_FG_FOG_COLOR_MASK 0x000003ff
+
+/* Fog: Constant Factor for Fog Blending */
+#define R300_FG_FOG_FACTOR 0x4bc4
+# define FG_FOG_FACTOR_MASK 0x000003ff
+
+/* Fog: Alpha function */
+#define R300_FG_ALPHA_FUNC 0x4bd4
+# define R300_FG_ALPHA_FUNC_VAL_MASK 0x000000ff
+# define R300_FG_ALPHA_FUNC_NEVER (0 << 8)
+# define R300_FG_ALPHA_FUNC_LESS (1 << 8)
+# define R300_FG_ALPHA_FUNC_EQUAL (2 << 8)
+# define R300_FG_ALPHA_FUNC_LE (3 << 8)
+# define R300_FG_ALPHA_FUNC_GREATER (4 << 8)
+# define R300_FG_ALPHA_FUNC_NOTEQUAL (5 << 8)
+# define R300_FG_ALPHA_FUNC_GE (6 << 8)
+# define R300_FG_ALPHA_FUNC_ALWAYS (7 << 8)
+# define R300_ALPHA_TEST_OP_MASK (7 << 8)
+# define R300_FG_ALPHA_FUNC_DISABLE (0 << 11)
+# define R300_FG_ALPHA_FUNC_ENABLE (1 << 11)
+
+# define R500_FG_ALPHA_FUNC_10BIT (0 << 12)
+# define R500_FG_ALPHA_FUNC_8BIT (1 << 12)
+
+# define R300_FG_ALPHA_FUNC_MASK_DISABLE (0 << 16)
+# define R300_FG_ALPHA_FUNC_MASK_ENABLE (1 << 16)
+# define R300_FG_ALPHA_FUNC_CFG_2_OF_4 (0 << 17)
+# define R300_FG_ALPHA_FUNC_CFG_3_OF_6 (1 << 17)
+
+# define R300_FG_ALPHA_FUNC_DITH_DISABLE (0 << 20)
+# define R300_FG_ALPHA_FUNC_DITH_ENABLE (1 << 20)
+
+# define R500_FG_ALPHA_FUNC_OFFSET_DISABLE (0 << 24)
+# define R500_FG_ALPHA_FUNC_OFFSET_ENABLE (1 << 24) /* Not supported in R520 */
+# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_DISABLE (0 << 25)
+# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_ENABLE (1 << 25)
+
+# define R500_FG_ALPHA_FUNC_FP16_DISABLE (0 << 28)
+# define R500_FG_ALPHA_FUNC_FP16_ENABLE (1 << 28)
+
+
+/* Fog: Where does the depth come from? */
+#define R300_FG_DEPTH_SRC 0x4bd8
+# define R300_FG_DEPTH_SRC_SCAN (0 << 0)
+# define R300_FG_DEPTH_SRC_SHADER (1 << 0)
+
+/* Fog: Alpha Compare Value */
+#define R500_FG_ALPHA_VALUE 0x4be0
+# define R500_FG_ALPHA_VALUE_MASK 0x0000ffff
+
+/* gap */
+
+/* Fragment program parameters in 7.16 floating point */
+#define R300_PFS_PARAM_0_X 0x4C00
+#define R300_PFS_PARAM_0_Y 0x4C04
+#define R300_PFS_PARAM_0_Z 0x4C08
+#define R300_PFS_PARAM_0_W 0x4C0C
+/* last consts */
+#define R300_PFS_PARAM_31_X 0x4DF0
+#define R300_PFS_PARAM_31_Y 0x4DF4
+#define R300_PFS_PARAM_31_Z 0x4DF8
+#define R300_PFS_PARAM_31_W 0x4DFC
+
+/* Unpipelined. */
+#define R300_RB3D_CCTL 0x4e00
+# define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER (0 << 5)
+# define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS (1 << 5)
+# define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS (2 << 5)
+# define R300_RB3D_CCTL_NUM_MULTIWRITES_4_BUFFERS (3 << 5)
+# define R300_RB3D_CCTL_CLRCMP_FLIPE_DISABLE (0 << 7)
+# define R300_RB3D_CCTL_CLRCMP_FLIPE_ENABLE (1 << 7)
+# define R300_RB3D_CCTL_AA_COMPRESSION_DISABLE (0 << 9)
+# define R300_RB3D_CCTL_AA_COMPRESSION_ENABLE (1 << 9)
+# define R300_RB3D_CCTL_CMASK_DISABLE (0 << 10)
+# define R300_RB3D_CCTL_CMASK_ENABLE (1 << 10)
+/* reserved */
+# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_DISABLE (0 << 12)
+# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_ENABLE (1 << 12)
+# define R300_RB3D_CCTL_WRITE_COMPRESSION_ENABLE (0 << 13)
+# define R300_RB3D_CCTL_WRITE_COMPRESSION_DISABLE (1 << 13)
+# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_DISABLE (0 << 14)
+# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE (1 << 14)
+
+
+/* Notes:
+ * - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used in
+ * the application
+ * - AFAIK fglrx always sets BLEND_NO_SEPARATE when CBLEND and ABLEND
+ * are set to the same
+ * function (both registers are always set up completely in any case)
+ * - Most blend flags are simply copied from R200 and not tested yet
+ */
+#define R300_RB3D_CBLEND 0x4E04
+#define R300_RB3D_ABLEND 0x4E08
+/* the following only appear in CBLEND */
+# define R300_ALPHA_BLEND_ENABLE (1 << 0)
+# define R300_SEPARATE_ALPHA_ENABLE (1 << 1)
+# define R300_READ_ENABLE (1 << 2)
+# define R300_DISCARD_SRC_PIXELS_DIS (0 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0 (1 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_0 (2 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0 (3 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3)
+
+/* the following are shared between CBLEND and ABLEND */
+# define R300_FCN_MASK (3 << 12)
+# define R300_COMB_FCN_ADD_CLAMP (0 << 12)
+# define R300_COMB_FCN_ADD_NOCLAMP (1 << 12)
+# define R300_COMB_FCN_SUB_CLAMP (2 << 12)
+# define R300_COMB_FCN_SUB_NOCLAMP (3 << 12)
+# define R300_COMB_FCN_MIN (4 << 12)
+# define R300_COMB_FCN_MAX (5 << 12)
+# define R300_COMB_FCN_RSUB_CLAMP (6 << 12)
+# define R300_COMB_FCN_RSUB_NOCLAMP (7 << 12)
+# define R300_BLEND_GL_ZERO (32)
+# define R300_BLEND_GL_ONE (33)
+# define R300_BLEND_GL_SRC_COLOR (34)
+# define R300_BLEND_GL_ONE_MINUS_SRC_COLOR (35)
+# define R300_BLEND_GL_DST_COLOR (36)
+# define R300_BLEND_GL_ONE_MINUS_DST_COLOR (37)
+# define R300_BLEND_GL_SRC_ALPHA (38)
+# define R300_BLEND_GL_ONE_MINUS_SRC_ALPHA (39)
+# define R300_BLEND_GL_DST_ALPHA (40)
+# define R300_BLEND_GL_ONE_MINUS_DST_ALPHA (41)
+# define R300_BLEND_GL_SRC_ALPHA_SATURATE (42)
+# define R300_BLEND_GL_CONST_COLOR (43)
+# define R300_BLEND_GL_ONE_MINUS_CONST_COLOR (44)
+# define R300_BLEND_GL_CONST_ALPHA (45)
+# define R300_BLEND_GL_ONE_MINUS_CONST_ALPHA (46)
+# define R300_BLEND_MASK (63)
+# define R300_SRC_BLEND_SHIFT (16)
+# define R300_DST_BLEND_SHIFT (24)
+
+/* Constant color used by the blender. Pipelined through the blender.
+ * Note: For R520, this field is ignored, use RB3D_CONSTANT_COLOR_GB__BLUE,
+ * RB3D_CONSTANT_COLOR_GB__GREEN, etc. instead.
+ */
+#define R300_RB3D_BLEND_COLOR 0x4E10
+
+
+/* 3D Color Channel Mask. If all the channels used in the current color format
+ * are disabled, then the cb will discard all the incoming quads. Pipelined
+ * through the blender.
+ */
+#define RB3D_COLOR_CHANNEL_MASK 0x4E0C
+# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 (1 << 0)
+# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 (1 << 1)
+# define RB3D_COLOR_CHANNEL_MASK_RED_MASK0 (1 << 2)
+# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 (1 << 3)
+# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK1 (1 << 4)
+# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK1 (1 << 5)
+# define RB3D_COLOR_CHANNEL_MASK_RED_MASK1 (1 << 6)
+# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK1 (1 << 7)
+# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK2 (1 << 8)
+# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK2 (1 << 9)
+# define RB3D_COLOR_CHANNEL_MASK_RED_MASK2 (1 << 10)
+# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK2 (1 << 11)
+# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK3 (1 << 12)
+# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK3 (1 << 13)
+# define RB3D_COLOR_CHANNEL_MASK_RED_MASK3 (1 << 14)
+# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK3 (1 << 15)
+
+/* Clear color that is used when the color mask is set to 00. Unpipelined.
+ * Program this register with a 32-bit value in ARGB8888 or ARGB2101010
+ * formats, ignoring the fields.
+ */
+#define RB3D_COLOR_CLEAR_VALUE 0x4e14
+
+/* gap */
+
+/* Color Compare Color. Stalls the 2d/3d datapath until it is idle. */
+#define RB3D_CLRCMP_CLR 0x4e20
+
+/* Color Compare Mask. Stalls the 2d/3d datapath until it is idle. */
+#define RB3D_CLRCMP_MSK 0x4e24
+
+/* Color Buffer Address Offset of multibuffer 0. Unpipelined. */
+#define R300_RB3D_COLOROFFSET0 0x4E28
+# define R300_COLOROFFSET_MASK 0xFFFFFFE0
+/* Color Buffer Address Offset of multibuffer 1. Unpipelined. */
+#define R300_RB3D_COLOROFFSET1 0x4E2C
+/* Color Buffer Address Offset of multibuffer 2. Unpipelined. */
+#define R300_RB3D_COLOROFFSET2 0x4E30
+/* Color Buffer Address Offset of multibuffer 3. Unpipelined. */
+#define R300_RB3D_COLOROFFSET3 0x4E34
+
+/* Color buffer format and tiling control for all the multibuffers and the
+ * pitch of multibuffer 0 to 3. Unpipelined. The cache must be empty before any
+ * of the registers are changed.
+ *
+ * Bit 16: Larger tiles
+ * Bit 17: 4x2 tiles
+ * Bit 18: Extremely weird tile like, but some pixels duplicated?
+ */
+#define R300_RB3D_COLORPITCH0 0x4E38
+# define R300_COLORPITCH_MASK 0x00003FFE
+# define R300_COLOR_TILE_DISABLE (0 << 16)
+# define R300_COLOR_TILE_ENABLE (1 << 16)
+# define R300_COLOR_MICROTILE_DISABLE (0 << 17)
+# define R300_COLOR_MICROTILE_ENABLE (1 << 17)
+# define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */
+# define R300_COLOR_ENDIAN_NO_SWAP (0 << 19)
+# define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19)
+# define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19)
+# define R300_COLOR_ENDIAN_HALF_DWORD_SWAP (3 << 19)
+# define R500_COLOR_FORMAT_ARGB10101010 (0 << 21)
+# define R500_COLOR_FORMAT_UV1010 (1 << 21)
+# define R500_COLOR_FORMAT_CI8 (2 << 21) /* 2D only */
+# define R300_COLOR_FORMAT_ARGB1555 (3 << 21)
+# define R300_COLOR_FORMAT_RGB565 (4 << 21)
+# define R500_COLOR_FORMAT_ARGB2101010 (5 << 21)
+# define R300_COLOR_FORMAT_ARGB8888 (6 << 21)
+# define R300_COLOR_FORMAT_ARGB32323232 (7 << 21)
+/* reserved */
+# define R300_COLOR_FORMAT_I8 (9 << 21)
+# define R300_COLOR_FORMAT_ARGB16161616 (10 << 21)
+# define R300_COLOR_FORMAT_VYUY (11 << 21)
+# define R300_COLOR_FORMAT_YVYU (12 << 21)
+# define R300_COLOR_FORMAT_UV88 (13 << 21)
+# define R500_COLOR_FORMAT_I10 (14 << 21)
+# define R300_COLOR_FORMAT_ARGB4444 (15 << 21)
+#define R300_RB3D_COLORPITCH1 0x4E3C
+#define R300_RB3D_COLORPITCH2 0x4E40
+#define R300_RB3D_COLORPITCH3 0x4E44
+
+/* gap */
+
+/* Destination Color Buffer Cache Control/Status. If the cb is in e2 mode, then
+ * a flush or free will not occur upon a write to this register, but a sync
+ * will be immediately sent if one is requested. If both DC_FLUSH and DC_FREE
+ * are zero but DC_FINISH is one, then a sync will be sent immediately -- the
+ * cb will not wait for all the previous operations to complete before sending
+ * the sync. Unpipelined except when DC_FINISH and DC_FREE are both set to
+ * zero.
+ *
+ * Set to 0A before 3D operations, set to 02 afterwards.
+ */
+#define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT (0 << 0)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT_1 (1 << 0)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D (2 << 0)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D_1 (3 << 0)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT (0 << 2)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT_1 (1 << 2)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS (2 << 2)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS_1 (3 << 2)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_NO_SIGNAL (0 << 4)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL (1 << 4)
+
+#define R300_RB3D_DITHER_CTL 0x4E50
+# define R300_RB3D_DITHER_CTL_DITHER_MODE_TRUNCATE (0 << 0)
+# define R300_RB3D_DITHER_CTL_DITHER_MODE_ROUND (1 << 0)
+# define R300_RB3D_DITHER_CTL_DITHER_MODE_LUT (2 << 0)
+/* reserved */
+# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_TRUNCATE (0 << 2)
+# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_ROUND (1 << 2)
+# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT (2 << 2)
+/* reserved */
+
+/* Resolve buffer destination address. The cache must be empty before changing
+ * this register if the cb is in resolve mode. Unpipelined
+ */
+#define R300_RB3D_AARESOLVE_OFFSET 0x4e80
+# define R300_RB3D_AARESOLVE_OFFSET_SHIFT 5
+# define R300_RB3D_AARESOLVE_OFFSET_MASK 0xffffffe0 /* At least according to the calculations of Christoph Brill */
+
+/* Resolve Buffer Pitch and Tiling Control. The cache must be empty before
+ * changing this register if the cb is in resolve mode. Unpipelined
+ */
+#define R300_RB3D_AARESOLVE_PITCH 0x4e84
+# define R300_RB3D_AARESOLVE_PITCH_SHIFT 1
+# define R300_RB3D_AARESOLVE_PITCH_MASK 0x00003ffe /* At least according to the calculations of Christoph Brill */
+
+/* Resolve Buffer Control. Unpipelined */
+#define R300_RB3D_AARESOLVE_CTL 0x4e88
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_NORMAL (0 << 0)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE (1 << 0)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_10 (0 << 1)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_22 (1 << 1)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_SAMPLE0 (0 << 2)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE (1 << 2)
+
+
+/* Discard src pixels less than or equal to threshold. */
+#define R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD 0x4ea0
+/* Discard src pixels greater than or equal to threshold. */
+#define R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD 0x4ea4
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_SHIFT 0
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_MASK 0x000000ff
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_SHIFT 8
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_MASK 0x0000ff00
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_SHIFT 16
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_MASK 0x00ff0000
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_SHIFT 24
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_MASK 0xff000000
+
+/* 3D ROP Control. Stalls the 2d/3d datapath until it is idle. */
+#define R300_RB3D_ROPCNTL 0x4e18
+# define R300_RB3D_ROPCNTL_ROP_ENABLE 0x00000004
+# define R300_RB3D_ROPCNTL_ROP_MASK (15 << 8)
+# define R300_RB3D_ROPCNTL_ROP_SHIFT 8
+
+/* Color Compare Flip. Stalls the 2d/3d datapath until it is idle. */
+#define R300_RB3D_CLRCMP_FLIPE 0x4e1c
+
+/* Sets the fifo sizes */
+#define R500_RB3D_FIFO_SIZE 0x4ef4
+# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_FULL (0 << 0)
+# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_HALF (1 << 0)
+# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0)
+# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (3 << 0)
+
+/* Constant color used by the blender. Pipelined through the blender. */
+#define R500_RB3D_CONSTANT_COLOR_AR 0x4ef8
+# define R500_RB3D_CONSTANT_COLOR_AR_RED_MASK 0x0000ffff
+# define R500_RB3D_CONSTANT_COLOR_AR_RED_SHIFT 0
+# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_MASK 0xffff0000
+# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_SHIFT 16
+
+/* Constant color used by the blender. Pipelined through the blender. */
+#define R500_RB3D_CONSTANT_COLOR_GB 0x4efc
+# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_MASK 0x0000ffff
+# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_SHIFT 0
+# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_MASK 0xffff0000
+# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_SHIFT 16
+
+/* gap */
+/* There seems to be no "write only" setting, so use Z-test = ALWAYS
+ * for this.
+ * Bit (1<<8) is the "test" bit. so plain write is 6 - vd
+ */
+#define R300_ZB_CNTL 0x4F00
+# define R300_STENCIL_ENABLE (1 << 0)
+# define R300_Z_ENABLE (1 << 1)
+# define R300_Z_WRITE_ENABLE (1 << 2)
+# define R300_Z_SIGNED_COMPARE (1 << 3)
+# define R300_STENCIL_FRONT_BACK (1 << 4)
+
+#define R300_ZB_ZSTENCILCNTL 0x4f04
+ /* functions */
+# define R300_ZS_NEVER 0
+# define R300_ZS_LESS 1
+# define R300_ZS_LEQUAL 2
+# define R300_ZS_EQUAL 3
+# define R300_ZS_GEQUAL 4
+# define R300_ZS_GREATER 5
+# define R300_ZS_NOTEQUAL 6
+# define R300_ZS_ALWAYS 7
+# define R300_ZS_MASK 7
+ /* operations */
+# define R300_ZS_KEEP 0
+# define R300_ZS_ZERO 1
+# define R300_ZS_REPLACE 2
+# define R300_ZS_INCR 3
+# define R300_ZS_DECR 4
+# define R300_ZS_INVERT 5
+# define R300_ZS_INCR_WRAP 6
+# define R300_ZS_DECR_WRAP 7
+# define R300_Z_FUNC_SHIFT 0
+ /* front and back refer to operations done for front
+ and back faces, i.e. separate stencil function support */
+# define R300_S_FRONT_FUNC_SHIFT 3
+# define R300_S_FRONT_SFAIL_OP_SHIFT 6
+# define R300_S_FRONT_ZPASS_OP_SHIFT 9
+# define R300_S_FRONT_ZFAIL_OP_SHIFT 12
+# define R300_S_BACK_FUNC_SHIFT 15
+# define R300_S_BACK_SFAIL_OP_SHIFT 18
+# define R300_S_BACK_ZPASS_OP_SHIFT 21
+# define R300_S_BACK_ZFAIL_OP_SHIFT 24
+
+#define R300_ZB_STENCILREFMASK 0x4f08
+# define R300_STENCILREF_SHIFT 0
+# define R300_STENCILREF_MASK 0x000000ff
+# define R300_STENCILMASK_SHIFT 8
+# define R300_STENCILMASK_MASK 0x0000ff00
+# define R300_STENCILWRITEMASK_SHIFT 16
+# define R300_STENCILWRITEMASK_MASK 0x00ff0000
+
+/* gap */
+
+#define R300_ZB_FORMAT 0x4f10
+# define R300_DEPTHFORMAT_16BIT_INT_Z (0 << 0)
+# define R300_DEPTHFORMAT_16BIT_13E3 (1 << 0)
+# define R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL (2 << 0)
+/* reserved up to (15 << 0) */
+# define R300_INVERT_13E3_LEADING_ONES (0 << 4)
+# define R300_INVERT_13E3_LEADING_ZEROS (1 << 4)
+
+#define R300_ZB_ZTOP 0x4F14
+# define R300_ZTOP_DISABLE (0 << 0)
+# define R300_ZTOP_ENABLE (1 << 0)
+
+/* gap */
+
+#define R300_ZB_ZCACHE_CTLSTAT 0x4f18
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_NO_EFFECT (0 << 0)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE (1 << 0)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_NO_EFFECT (0 << 1)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE (1 << 1)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_IDLE (0 << 31)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_BUSY (1 << 31)
+
+#define R300_ZB_BW_CNTL 0x4f1c
+# define R300_HIZ_DISABLE (0 << 0)
+# define R300_HIZ_ENABLE (1 << 0)
+# define R300_HIZ_MIN (0 << 1)
+# define R300_HIZ_MAX (1 << 1)
+# define R300_FAST_FILL_DISABLE (0 << 2)
+# define R300_FAST_FILL_ENABLE (1 << 2)
+# define R300_RD_COMP_DISABLE (0 << 3)
+# define R300_RD_COMP_ENABLE (1 << 3)
+# define R300_WR_COMP_DISABLE (0 << 4)
+# define R300_WR_COMP_ENABLE (1 << 4)
+# define R300_ZB_CB_CLEAR_RMW (0 << 5)
+# define R300_ZB_CB_CLEAR_CACHE_LINEAR (1 << 5)
+# define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE (0 << 6)
+# define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE (1 << 6)
+
+# define R500_ZEQUAL_OPTIMIZE_ENABLE (0 << 7)
+# define R500_ZEQUAL_OPTIMIZE_DISABLE (1 << 7)
+# define R500_SEQUAL_OPTIMIZE_ENABLE (0 << 8)
+# define R500_SEQUAL_OPTIMIZE_DISABLE (1 << 8)
+
+# define R500_BMASK_ENABLE (0 << 10)
+# define R500_BMASK_DISABLE (1 << 10)
+# define R500_HIZ_EQUAL_REJECT_DISABLE (0 << 11)
+# define R500_HIZ_EQUAL_REJECT_ENABLE (1 << 11)
+# define R500_HIZ_FP_EXP_BITS_DISABLE (0 << 12)
+# define R500_HIZ_FP_EXP_BITS_1 (1 << 12)
+# define R500_HIZ_FP_EXP_BITS_2 (2 << 12)
+# define R500_HIZ_FP_EXP_BITS_3 (3 << 12)
+# define R500_HIZ_FP_EXP_BITS_4 (4 << 12)
+# define R500_HIZ_FP_EXP_BITS_5 (5 << 12)
+# define R500_HIZ_FP_INVERT_LEADING_ONES (0 << 15)
+# define R500_HIZ_FP_INVERT_LEADING_ZEROS (1 << 15)
+# define R500_TILE_OVERWRITE_RECOMPRESSION_ENABLE (0 << 16)
+# define R500_TILE_OVERWRITE_RECOMPRESSION_DISABLE (1 << 16)
+# define R500_CONTIGUOUS_6XAA_SAMPLES_ENABLE (0 << 17)
+# define R500_CONTIGUOUS_6XAA_SAMPLES_DISABLE (1 << 17)
+# define R500_PEQ_PACKING_DISABLE (0 << 18)
+# define R500_PEQ_PACKING_ENABLE (1 << 18)
+# define R500_COVERED_PTR_MASKING_DISABLE (0 << 18)
+# define R500_COVERED_PTR_MASKING_ENABLE (1 << 18)
+
+
+/* gap */
+
+/* Z Buffer Address Offset.
+ * Bits 31 to 5 are used for aligned Z buffer address offset for macro tiles.
+ */
+#define R300_ZB_DEPTHOFFSET 0x4f20
+
+/* Z Buffer Pitch and Endian Control */
+#define R300_ZB_DEPTHPITCH 0x4f24
+# define R300_DEPTHPITCH_MASK 0x00003FFC
+# define R300_DEPTHMACROTILE_DISABLE (0 << 16)
+# define R300_DEPTHMACROTILE_ENABLE (1 << 16)
+# define R300_DEPTHMICROTILE_LINEAR (0 << 17)
+# define R300_DEPTHMICROTILE_TILED (1 << 17)
+# define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17)
+# define R300_DEPTHENDIAN_NO_SWAP (0 << 18)
+# define R300_DEPTHENDIAN_WORD_SWAP (1 << 18)
+# define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18)
+# define R300_DEPTHENDIAN_HALF_DWORD_SWAP (3 << 18)
+
+/* Z Buffer Clear Value */
+#define R300_ZB_DEPTHCLEARVALUE 0x4f28
+
+/* Hierarchical Z Memory Offset */
+#define R300_ZB_HIZ_OFFSET 0x4f44
+
+/* Hierarchical Z Write Index */
+#define R300_ZB_HIZ_WRINDEX 0x4f48
+
+/* Hierarchical Z Data */
+#define R300_ZB_HIZ_DWORD 0x4f4c
+
+/* Hierarchical Z Read Index */
+#define R300_ZB_HIZ_RDINDEX 0x4f50
+
+/* Hierarchical Z Pitch */
+#define R300_ZB_HIZ_PITCH 0x4f54
+
+/* Z Buffer Z Pass Counter Data */
+#define R300_ZB_ZPASS_DATA 0x4f58
+
+/* Z Buffer Z Pass Counter Address */
+#define R300_ZB_ZPASS_ADDR 0x4f5c
+
+/* Depth buffer X and Y coordinate offset */
+#define R300_ZB_DEPTHXY_OFFSET 0x4f60
+# define R300_DEPTHX_OFFSET_SHIFT 1
+# define R300_DEPTHX_OFFSET_MASK 0x000007FE
+# define R300_DEPTHY_OFFSET_SHIFT 17
+# define R300_DEPTHY_OFFSET_MASK 0x07FE0000
+
+/* Sets the fifo sizes */
+#define R500_ZB_FIFO_SIZE 0x4fd0
+# define R500_OP_FIFO_SIZE_FULL (0 << 0)
+# define R500_OP_FIFO_SIZE_HALF (1 << 0)
+# define R500_OP_FIFO_SIZE_QUATER (2 << 0)
+# define R500_OP_FIFO_SIZE_EIGTHS (4 << 0)
+
+/* Stencil Reference Value and Mask for backfacing quads */
+/* R300_ZB_STENCILREFMASK handles front face */
+#define R500_ZB_STENCILREFMASK_BF 0x4fd4
+# define R500_STENCILREF_SHIFT 0
+# define R500_STENCILREF_MASK 0x000000ff
+# define R500_STENCILMASK_SHIFT 8
+# define R500_STENCILMASK_MASK 0x0000ff00
+# define R500_STENCILWRITEMASK_SHIFT 16
+# define R500_STENCILWRITEMASK_MASK 0x00ff0000
+
+/**
+ * \defgroup R3XX_R5XX_PROGRAMMABLE_VERTEX_SHADER_DESCRIPTION R3XX-R5XX PROGRAMMABLE VERTEX SHADER DESCRIPTION
+ *
+ * The PVS_DST_MATH_INST is used to identify whether the instruction is a Vector
+ * Engine instruction or a Math Engine instruction.
+ */
+
+/*\{*/
+
+enum {
+ /* R3XX */
+ VECTOR_NO_OP = 0,
+ VE_DOT_PRODUCT = 1,
+ VE_MULTIPLY = 2,
+ VE_ADD = 3,
+ VE_MULTIPLY_ADD = 4,
+ VE_DISTANCE_VECTOR = 5,
+ VE_FRACTION = 6,
+ VE_MAXIMUM = 7,
+ VE_MINIMUM = 8,
+ VE_SET_GREATER_THAN_EQUAL = 9,
+ VE_SET_LESS_THAN = 10,
+ VE_MULTIPLYX2_ADD = 11,
+ VE_MULTIPLY_CLAMP = 12,
+ VE_FLT2FIX_DX = 13,
+ VE_FLT2FIX_DX_RND = 14,
+ /* R5XX */
+ VE_PRED_SET_EQ_PUSH = 15,
+ VE_PRED_SET_GT_PUSH = 16,
+ VE_PRED_SET_GTE_PUSH = 17,
+ VE_PRED_SET_NEQ_PUSH = 18,
+ VE_COND_WRITE_EQ = 19,
+ VE_COND_WRITE_GT = 20,
+ VE_COND_WRITE_GTE = 21,
+ VE_COND_WRITE_NEQ = 22,
+ VE_COND_MUX_EQ = 23,
+ VE_COND_MUX_GT = 24,
+ VE_COND_MUX_GTE = 25,
+ VE_SET_GREATER_THAN = 26,
+ VE_SET_EQUAL = 27,
+ VE_SET_NOT_EQUAL = 28,
+};
+
+enum {
+ /* R3XX */
+ MATH_NO_OP = 0,
+ ME_EXP_BASE2_DX = 1,
+ ME_LOG_BASE2_DX = 2,
+ ME_EXP_BASEE_FF = 3,
+ ME_LIGHT_COEFF_DX = 4,
+ ME_POWER_FUNC_FF = 5,
+ ME_RECIP_DX = 6,
+ ME_RECIP_FF = 7,
+ ME_RECIP_SQRT_DX = 8,
+ ME_RECIP_SQRT_FF = 9,
+ ME_MULTIPLY = 10,
+ ME_EXP_BASE2_FULL_DX = 11,
+ ME_LOG_BASE2_FULL_DX = 12,
+ ME_POWER_FUNC_FF_CLAMP_B = 13,
+ ME_POWER_FUNC_FF_CLAMP_B1 = 14,
+ ME_POWER_FUNC_FF_CLAMP_01 = 15,
+ ME_SIN = 16,
+ ME_COS = 17,
+ /* R5XX */
+ ME_LOG_BASE2_IEEE = 18,
+ ME_RECIP_IEEE = 19,
+ ME_RECIP_SQRT_IEEE = 20,
+ ME_PRED_SET_EQ = 21,
+ ME_PRED_SET_GT = 22,
+ ME_PRED_SET_GTE = 23,
+ ME_PRED_SET_NEQ = 24,
+ ME_PRED_SET_CLR = 25,
+ ME_PRED_SET_INV = 26,
+ ME_PRED_SET_POP = 27,
+ ME_PRED_SET_RESTORE = 28,
+};
+
+enum {
+ /* R3XX */
+ PVS_MACRO_OP_2CLK_MADD = 0,
+ PVS_MACRO_OP_2CLK_M2X_ADD = 1,
+};
+
+enum {
+ PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */
+ PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */
+ PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */
+ PVS_SRC_REG_ALT_TEMPORARY = 3, /* Alternate Intermediate Storage */
+};
+
+enum {
+ PVS_DST_REG_TEMPORARY = 0, /* Intermediate Storage */
+ PVS_DST_REG_A0 = 1, /* Address Register Storage */
+ PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */
+ PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */
+ PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */
+ PVS_DST_REG_INPUT = 5, /* Output Memory & Replicate X to all channels */
+};
+
+enum {
+ PVS_SRC_SELECT_X = 0, /* Select X Component */
+ PVS_SRC_SELECT_Y = 1, /* Select Y Component */
+ PVS_SRC_SELECT_Z = 2, /* Select Z Component */
+ PVS_SRC_SELECT_W = 3, /* Select W Component */
+ PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */
+ PVS_SRC_SELECT_FORCE_1 = 5, /* Force Component to 1.0 */
+};
+
+/* PVS Opcode & Destination Operand Description */
+
+enum {
+ PVS_DST_OPCODE_MASK = 0x3f,
+ PVS_DST_OPCODE_SHIFT = 0,
+ PVS_DST_MATH_INST_MASK = 0x1,
+ PVS_DST_MATH_INST_SHIFT = 6,
+ PVS_DST_MACRO_INST_MASK = 0x1,
+ PVS_DST_MACRO_INST_SHIFT = 7,
+ PVS_DST_REG_TYPE_MASK = 0xf,
+ PVS_DST_REG_TYPE_SHIFT = 8,
+ PVS_DST_ADDR_MODE_1_MASK = 0x1,
+ PVS_DST_ADDR_MODE_1_SHIFT = 12,
+ PVS_DST_OFFSET_MASK = 0x7f,
+ PVS_DST_OFFSET_SHIFT = 13,
+ PVS_DST_WE_X_MASK = 0x1,
+ PVS_DST_WE_X_SHIFT = 20,
+ PVS_DST_WE_Y_MASK = 0x1,
+ PVS_DST_WE_Y_SHIFT = 21,
+ PVS_DST_WE_Z_MASK = 0x1,
+ PVS_DST_WE_Z_SHIFT = 22,
+ PVS_DST_WE_W_MASK = 0x1,
+ PVS_DST_WE_W_SHIFT = 23,
+ PVS_DST_VE_SAT_MASK = 0x1,
+ PVS_DST_VE_SAT_SHIFT = 24,
+ PVS_DST_ME_SAT_MASK = 0x1,
+ PVS_DST_ME_SAT_SHIFT = 25,
+ PVS_DST_PRED_ENABLE_MASK = 0x1,
+ PVS_DST_PRED_ENABLE_SHIFT = 26,
+ PVS_DST_PRED_SENSE_MASK = 0x1,
+ PVS_DST_PRED_SENSE_SHIFT = 27,
+ PVS_DST_DUAL_MATH_OP_MASK = 0x3,
+ PVS_DST_DUAL_MATH_OP_SHIFT = 27,
+ PVS_DST_ADDR_SEL_MASK = 0x3,
+ PVS_DST_ADDR_SEL_SHIFT = 29,
+ PVS_DST_ADDR_MODE_0_MASK = 0x1,
+ PVS_DST_ADDR_MODE_0_SHIFT = 31,
+};
+
+/* PVS Source Operand Description */
+
+enum {
+ PVS_SRC_REG_TYPE_MASK = 0x3,
+ PVS_SRC_REG_TYPE_SHIFT = 0,
+ SPARE_0_MASK = 0x1,
+ SPARE_0_SHIFT = 2,
+ PVS_SRC_ABS_XYZW_MASK = 0x1,
+ PVS_SRC_ABS_XYZW_SHIFT = 3,
+ PVS_SRC_ADDR_MODE_0_MASK = 0x1,
+ PVS_SRC_ADDR_MODE_0_SHIFT = 4,
+ PVS_SRC_OFFSET_MASK = 0xff,
+ PVS_SRC_OFFSET_SHIFT = 5,
+ PVS_SRC_SWIZZLE_X_MASK = 0x7,
+ PVS_SRC_SWIZZLE_X_SHIFT = 13,
+ PVS_SRC_SWIZZLE_Y_MASK = 0x7,
+ PVS_SRC_SWIZZLE_Y_SHIFT = 16,
+ PVS_SRC_SWIZZLE_Z_MASK = 0x7,
+ PVS_SRC_SWIZZLE_Z_SHIFT = 19,
+ PVS_SRC_SWIZZLE_W_MASK = 0x7,
+ PVS_SRC_SWIZZLE_W_SHIFT = 22,
+ PVS_SRC_MODIFIER_X_MASK = 0x1,
+ PVS_SRC_MODIFIER_X_SHIFT = 25,
+ PVS_SRC_MODIFIER_Y_MASK = 0x1,
+ PVS_SRC_MODIFIER_Y_SHIFT = 26,
+ PVS_SRC_MODIFIER_Z_MASK = 0x1,
+ PVS_SRC_MODIFIER_Z_SHIFT = 27,
+ PVS_SRC_MODIFIER_W_MASK = 0x1,
+ PVS_SRC_MODIFIER_W_SHIFT = 28,
+ PVS_SRC_ADDR_SEL_MASK = 0x3,
+ PVS_SRC_ADDR_SEL_SHIFT = 29,
+ PVS_SRC_ADDR_MODE_1_MASK = 0x0,
+ PVS_SRC_ADDR_MODE_1_SHIFT = 32,
+};
+
+/*\}*/
+
+/* BEGIN: Packet 3 commands */
+
+/* A primitive emission dword. */
+#define R300_PRIM_TYPE_NONE (0 << 0)
+#define R300_PRIM_TYPE_POINT (1 << 0)
+#define R300_PRIM_TYPE_LINE (2 << 0)
+#define R300_PRIM_TYPE_LINE_STRIP (3 << 0)
+#define R300_PRIM_TYPE_TRI_LIST (4 << 0)
+#define R300_PRIM_TYPE_TRI_FAN (5 << 0)
+#define R300_PRIM_TYPE_TRI_STRIP (6 << 0)
+#define R300_PRIM_TYPE_TRI_TYPE2 (7 << 0)
+#define R300_PRIM_TYPE_RECT_LIST (8 << 0)
+#define R300_PRIM_TYPE_3VRT_POINT_LIST (9 << 0)
+#define R300_PRIM_TYPE_3VRT_LINE_LIST (10 << 0)
+ /* GUESS (based on r200) */
+#define R300_PRIM_TYPE_POINT_SPRITES (11 << 0)
+#define R300_PRIM_TYPE_LINE_LOOP (12 << 0)
+#define R300_PRIM_TYPE_QUADS (13 << 0)
+#define R300_PRIM_TYPE_QUAD_STRIP (14 << 0)
+#define R300_PRIM_TYPE_POLYGON (15 << 0)
+#define R300_PRIM_TYPE_MASK 0xF
+#define R300_PRIM_WALK_IND (1 << 4)
+#define R300_PRIM_WALK_LIST (2 << 4)
+#define R300_PRIM_WALK_RING (3 << 4)
+#define R300_PRIM_WALK_MASK (3 << 4)
+ /* GUESS (based on r200) */
+#define R300_PRIM_COLOR_ORDER_BGRA (0 << 6)
+#define R300_PRIM_COLOR_ORDER_RGBA (1 << 6)
+#define R300_PRIM_NUM_VERTICES_SHIFT 16
+#define R300_PRIM_NUM_VERTICES_MASK 0xffff
+
+
+
+/*
+ * The R500 unified shader (US) registers come in banks of 512 each, one
+ * for each instruction slot in the shader. You can't touch them directly.
+ * R500_US_VECTOR_INDEX() sets the base instruction to modify; successive
+ * writes to R500_GA_US_VECTOR_DATA autoincrement the index after the
+ * instruction is fully specified.
+ */
+#define R500_US_ALU_ALPHA_INST_0 0xa800
+# define R500_ALPHA_OP_MAD 0
+# define R500_ALPHA_OP_DP 1
+# define R500_ALPHA_OP_MIN 2
+# define R500_ALPHA_OP_MAX 3
+/* #define R500_ALPHA_OP_RESERVED 4 */
+# define R500_ALPHA_OP_CND 5
+# define R500_ALPHA_OP_CMP 6
+# define R500_ALPHA_OP_FRC 7
+# define R500_ALPHA_OP_EX2 8
+# define R500_ALPHA_OP_LN2 9
+# define R500_ALPHA_OP_RCP 10
+# define R500_ALPHA_OP_RSQ 11
+# define R500_ALPHA_OP_SIN 12
+# define R500_ALPHA_OP_COS 13
+# define R500_ALPHA_OP_MDH 14
+# define R500_ALPHA_OP_MDV 15
+# define R500_ALPHA_ADDRD(x) ((x) << 4)
+# define R500_ALPHA_ADDRD_REL (1 << 11)
+# define R500_ALPHA_SEL_A_SHIFT 12
+# define R500_ALPHA_SEL_A_SRC0 (0 << 12)
+# define R500_ALPHA_SEL_A_SRC1 (1 << 12)
+# define R500_ALPHA_SEL_A_SRC2 (2 << 12)
+# define R500_ALPHA_SEL_A_SRCP (3 << 12)
+# define R500_ALPHA_SWIZ_A_R (0 << 14)
+# define R500_ALPHA_SWIZ_A_G (1 << 14)
+# define R500_ALPHA_SWIZ_A_B (2 << 14)
+# define R500_ALPHA_SWIZ_A_A (3 << 14)
+# define R500_ALPHA_SWIZ_A_0 (4 << 14)
+# define R500_ALPHA_SWIZ_A_HALF (5 << 14)
+# define R500_ALPHA_SWIZ_A_1 (6 << 14)
+/* #define R500_ALPHA_SWIZ_A_UNUSED (7 << 14) */
+# define R500_ALPHA_MOD_A_NOP (0 << 17)
+# define R500_ALPHA_MOD_A_NEG (1 << 17)
+# define R500_ALPHA_MOD_A_ABS (2 << 17)
+# define R500_ALPHA_MOD_A_NAB (3 << 17)
+# define R500_ALPHA_SEL_B_SHIFT 19
+# define R500_ALPHA_SEL_B_SRC0 (0 << 19)
+# define R500_ALPHA_SEL_B_SRC1 (1 << 19)
+# define R500_ALPHA_SEL_B_SRC2 (2 << 19)
+# define R500_ALPHA_SEL_B_SRCP (3 << 19)
+# define R500_ALPHA_SWIZ_B_R (0 << 21)
+# define R500_ALPHA_SWIZ_B_G (1 << 21)
+# define R500_ALPHA_SWIZ_B_B (2 << 21)
+# define R500_ALPHA_SWIZ_B_A (3 << 21)
+# define R500_ALPHA_SWIZ_B_0 (4 << 21)
+# define R500_ALPHA_SWIZ_B_HALF (5 << 21)
+# define R500_ALPHA_SWIZ_B_1 (6 << 21)
+/* #define R500_ALPHA_SWIZ_B_UNUSED (7 << 21) */
+# define R500_ALPHA_MOD_B_NOP (0 << 24)
+# define R500_ALPHA_MOD_B_NEG (1 << 24)
+# define R500_ALPHA_MOD_B_ABS (2 << 24)
+# define R500_ALPHA_MOD_B_NAB (3 << 24)
+# define R500_ALPHA_OMOD_IDENTITY (0 << 26)
+# define R500_ALPHA_OMOD_MUL_2 (1 << 26)
+# define R500_ALPHA_OMOD_MUL_4 (2 << 26)
+# define R500_ALPHA_OMOD_MUL_8 (3 << 26)
+# define R500_ALPHA_OMOD_DIV_2 (4 << 26)
+# define R500_ALPHA_OMOD_DIV_4 (5 << 26)
+# define R500_ALPHA_OMOD_DIV_8 (6 << 26)
+# define R500_ALPHA_OMOD_DISABLE (7 << 26)
+# define R500_ALPHA_TARGET(x) ((x) << 29)
+# define R500_ALPHA_W_OMASK (1 << 31)
+#define R500_US_ALU_ALPHA_ADDR_0 0x9800
+# define R500_ALPHA_ADDR0(x) ((x) << 0)
+# define R500_ALPHA_ADDR0_CONST (1 << 8)
+# define R500_ALPHA_ADDR0_REL (1 << 9)
+# define R500_ALPHA_ADDR1(x) ((x) << 10)
+# define R500_ALPHA_ADDR1_CONST (1 << 18)
+# define R500_ALPHA_ADDR1_REL (1 << 19)
+# define R500_ALPHA_ADDR2(x) ((x) << 20)
+# define R500_ALPHA_ADDR2_CONST (1 << 28)
+# define R500_ALPHA_ADDR2_REL (1 << 29)
+# define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30)
+# define R500_ALPHA_SRCP_OP_A1_MINUS_A0 (1 << 30)
+# define R500_ALPHA_SRCP_OP_A1_PLUS_A0 (2 << 30)
+# define R500_ALPHA_SRCP_OP_1_MINUS_A0 (3 << 30)
+#define R500_US_ALU_RGBA_INST_0 0xb000
+# define R500_ALU_RGBA_OP_MAD (0 << 0)
+# define R500_ALU_RGBA_OP_DP3 (1 << 0)
+# define R500_ALU_RGBA_OP_DP4 (2 << 0)
+# define R500_ALU_RGBA_OP_D2A (3 << 0)
+# define R500_ALU_RGBA_OP_MIN (4 << 0)
+# define R500_ALU_RGBA_OP_MAX (5 << 0)
+/* #define R500_ALU_RGBA_OP_RESERVED (6 << 0) */
+# define R500_ALU_RGBA_OP_CND (7 << 0)
+# define R500_ALU_RGBA_OP_CMP (8 << 0)
+# define R500_ALU_RGBA_OP_FRC (9 << 0)
+# define R500_ALU_RGBA_OP_SOP (10 << 0)
+# define R500_ALU_RGBA_OP_MDH (11 << 0)
+# define R500_ALU_RGBA_OP_MDV (12 << 0)
+# define R500_ALU_RGBA_ADDRD(x) ((x) << 4)
+# define R500_ALU_RGBA_ADDRD_REL (1 << 11)
+# define R500_ALU_RGBA_SEL_C_SHIFT 12
+# define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12)
+# define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12)
+# define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12)
+# define R500_ALU_RGBA_SEL_C_SRCP (3 << 12)
+# define R500_ALU_RGBA_R_SWIZ_R (0 << 14)
+# define R500_ALU_RGBA_R_SWIZ_G (1 << 14)
+# define R500_ALU_RGBA_R_SWIZ_B (2 << 14)
+# define R500_ALU_RGBA_R_SWIZ_A (3 << 14)
+# define R500_ALU_RGBA_R_SWIZ_0 (4 << 14)
+# define R500_ALU_RGBA_R_SWIZ_HALF (5 << 14)
+# define R500_ALU_RGBA_R_SWIZ_1 (6 << 14)
+/* #define R500_ALU_RGBA_R_SWIZ_UNUSED (7 << 14) */
+# define R500_ALU_RGBA_G_SWIZ_R (0 << 17)
+# define R500_ALU_RGBA_G_SWIZ_G (1 << 17)
+# define R500_ALU_RGBA_G_SWIZ_B (2 << 17)
+# define R500_ALU_RGBA_G_SWIZ_A (3 << 17)
+# define R500_ALU_RGBA_G_SWIZ_0 (4 << 17)
+# define R500_ALU_RGBA_G_SWIZ_HALF (5 << 17)
+# define R500_ALU_RGBA_G_SWIZ_1 (6 << 17)
+/* #define R500_ALU_RGBA_G_SWIZ_UNUSED (7 << 17) */
+# define R500_ALU_RGBA_B_SWIZ_R (0 << 20)
+# define R500_ALU_RGBA_B_SWIZ_G (1 << 20)
+# define R500_ALU_RGBA_B_SWIZ_B (2 << 20)
+# define R500_ALU_RGBA_B_SWIZ_A (3 << 20)
+# define R500_ALU_RGBA_B_SWIZ_0 (4 << 20)
+# define R500_ALU_RGBA_B_SWIZ_HALF (5 << 20)
+# define R500_ALU_RGBA_B_SWIZ_1 (6 << 20)
+/* #define R500_ALU_RGBA_B_SWIZ_UNUSED (7 << 20) */
+# define R500_ALU_RGBA_MOD_C_NOP (0 << 23)
+# define R500_ALU_RGBA_MOD_C_NEG (1 << 23)
+# define R500_ALU_RGBA_MOD_C_ABS (2 << 23)
+# define R500_ALU_RGBA_MOD_C_NAB (3 << 23)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SHIFT 25
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRCP (3 << 25)
+# define R500_ALU_RGBA_A_SWIZ_R (0 << 27)
+# define R500_ALU_RGBA_A_SWIZ_G (1 << 27)
+# define R500_ALU_RGBA_A_SWIZ_B (2 << 27)
+# define R500_ALU_RGBA_A_SWIZ_A (3 << 27)
+# define R500_ALU_RGBA_A_SWIZ_0 (4 << 27)
+# define R500_ALU_RGBA_A_SWIZ_HALF (5 << 27)
+# define R500_ALU_RGBA_A_SWIZ_1 (6 << 27)
+/* #define R500_ALU_RGBA_A_SWIZ_UNUSED (7 << 27) */
+# define R500_ALU_RGBA_ALPHA_MOD_C_NOP (0 << 30)
+# define R500_ALU_RGBA_ALPHA_MOD_C_NEG (1 << 30)
+# define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30)
+# define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30)
+#define R500_US_ALU_RGB_INST_0 0xa000
+# define R500_ALU_RGB_SEL_A_SHIFT 0
+# define R500_ALU_RGB_SEL_A_SRC0 (0 << 0)
+# define R500_ALU_RGB_SEL_A_SRC1 (1 << 0)
+# define R500_ALU_RGB_SEL_A_SRC2 (2 << 0)
+# define R500_ALU_RGB_SEL_A_SRCP (3 << 0)
+# define R500_ALU_RGB_R_SWIZ_A_R (0 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_G (1 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_B (2 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_A (3 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_0 (4 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_HALF (5 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_1 (6 << 2)
+/* #define R500_ALU_RGB_R_SWIZ_A_UNUSED (7 << 2) */
+# define R500_ALU_RGB_G_SWIZ_A_R (0 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_G (1 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_B (2 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_A (3 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_0 (4 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_HALF (5 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_1 (6 << 5)
+/* #define R500_ALU_RGB_G_SWIZ_A_UNUSED (7 << 5) */
+# define R500_ALU_RGB_B_SWIZ_A_R (0 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_G (1 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_B (2 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_A (3 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_0 (4 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_HALF (5 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_1 (6 << 8)
+/* #define R500_ALU_RGB_B_SWIZ_A_UNUSED (7 << 8) */
+# define R500_ALU_RGB_MOD_A_NOP (0 << 11)
+# define R500_ALU_RGB_MOD_A_NEG (1 << 11)
+# define R500_ALU_RGB_MOD_A_ABS (2 << 11)
+# define R500_ALU_RGB_MOD_A_NAB (3 << 11)
+# define R500_ALU_RGB_SEL_B_SHIFT 13
+# define R500_ALU_RGB_SEL_B_SRC0 (0 << 13)
+# define R500_ALU_RGB_SEL_B_SRC1 (1 << 13)
+# define R500_ALU_RGB_SEL_B_SRC2 (2 << 13)
+# define R500_ALU_RGB_SEL_B_SRCP (3 << 13)
+# define R500_ALU_RGB_R_SWIZ_B_R (0 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_G (1 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_B (2 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_A (3 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_0 (4 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_HALF (5 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_1 (6 << 15)
+/* #define R500_ALU_RGB_R_SWIZ_B_UNUSED (7 << 15) */
+# define R500_ALU_RGB_G_SWIZ_B_R (0 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_G (1 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_B (2 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_A (3 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_0 (4 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_HALF (5 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_1 (6 << 18)
+/* #define R500_ALU_RGB_G_SWIZ_B_UNUSED (7 << 18) */
+# define R500_ALU_RGB_B_SWIZ_B_R (0 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_G (1 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_B (2 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_A (3 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_0 (4 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_HALF (5 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_1 (6 << 21)
+/* #define R500_ALU_RGB_B_SWIZ_B_UNUSED (7 << 21) */
+# define R500_ALU_RGB_MOD_B_NOP (0 << 24)
+# define R500_ALU_RGB_MOD_B_NEG (1 << 24)
+# define R500_ALU_RGB_MOD_B_ABS (2 << 24)
+# define R500_ALU_RGB_MOD_B_NAB (3 << 24)
+# define R500_ALU_RGB_OMOD_IDENTITY (0 << 26)
+# define R500_ALU_RGB_OMOD_MUL_2 (1 << 26)
+# define R500_ALU_RGB_OMOD_MUL_4 (2 << 26)
+# define R500_ALU_RGB_OMOD_MUL_8 (3 << 26)
+# define R500_ALU_RGB_OMOD_DIV_2 (4 << 26)
+# define R500_ALU_RGB_OMOD_DIV_4 (5 << 26)
+# define R500_ALU_RGB_OMOD_DIV_8 (6 << 26)
+# define R500_ALU_RGB_OMOD_DISABLE (7 << 26)
+# define R500_ALU_RGB_TARGET(x) ((x) << 29)
+# define R500_ALU_RGB_WMASK (1 << 31)
+#define R500_US_ALU_RGB_ADDR_0 0x9000
+# define R500_RGB_ADDR0(x) ((x) << 0)
+# define R500_RGB_ADDR0_CONST (1 << 8)
+# define R500_RGB_ADDR0_REL (1 << 9)
+# define R500_RGB_ADDR1(x) ((x) << 10)
+# define R500_RGB_ADDR1_CONST (1 << 18)
+# define R500_RGB_ADDR1_REL (1 << 19)
+# define R500_RGB_ADDR2(x) ((x) << 20)
+# define R500_RGB_ADDR2_CONST (1 << 28)
+# define R500_RGB_ADDR2_REL (1 << 29)
+# define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30)
+# define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 (1 << 30)
+# define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0 (2 << 30)
+# define R500_RGB_SRCP_OP_1_MINUS_RGB0 (3 << 30)
+#define R500_US_CMN_INST_0 0xb800
+# define R500_INST_TYPE_MASK (3 << 0)
+# define R500_INST_TYPE_ALU (0 << 0)
+# define R500_INST_TYPE_OUT (1 << 0)
+# define R500_INST_TYPE_FC (2 << 0)
+# define R500_INST_TYPE_TEX (3 << 0)
+# define R500_INST_TEX_SEM_WAIT (1 << 2)
+# define R500_INST_RGB_PRED_SEL_NONE (0 << 3)
+# define R500_INST_RGB_PRED_SEL_RGBA (1 << 3)
+# define R500_INST_RGB_PRED_SEL_RRRR (2 << 3)
+# define R500_INST_RGB_PRED_SEL_GGGG (3 << 3)
+# define R500_INST_RGB_PRED_SEL_BBBB (4 << 3)
+# define R500_INST_RGB_PRED_SEL_AAAA (5 << 3)
+# define R500_INST_RGB_PRED_INV (1 << 6)
+# define R500_INST_WRITE_INACTIVE (1 << 7)
+# define R500_INST_LAST (1 << 8)
+# define R500_INST_NOP (1 << 9)
+# define R500_INST_ALU_WAIT (1 << 10)
+# define R500_INST_RGB_WMASK_R (1 << 11)
+# define R500_INST_RGB_WMASK_G (1 << 12)
+# define R500_INST_RGB_WMASK_B (1 << 13)
+# define R500_INST_ALPHA_WMASK (1 << 14)
+# define R500_INST_RGB_OMASK_R (1 << 15)
+# define R500_INST_RGB_OMASK_G (1 << 16)
+# define R500_INST_RGB_OMASK_B (1 << 17)
+# define R500_INST_RGB_OMASK_RGB (7 << 15)
+# define R500_INST_ALPHA_OMASK (1 << 18)
+# define R500_INST_RGB_CLAMP (1 << 19)
+# define R500_INST_ALPHA_CLAMP (1 << 20)
+# define R500_INST_ALU_RESULT_SEL (1 << 21)
+# define R500_INST_ALPHA_PRED_INV (1 << 22)
+# define R500_INST_ALU_RESULT_OP_EQ (0 << 23)
+# define R500_INST_ALU_RESULT_OP_LT (1 << 23)
+# define R500_INST_ALU_RESULT_OP_GE (2 << 23)
+# define R500_INST_ALU_RESULT_OP_NE (3 << 23)
+# define R500_INST_ALPHA_PRED_SEL_NONE (0 << 25)
+# define R500_INST_ALPHA_PRED_SEL_RGBA (1 << 25)
+# define R500_INST_ALPHA_PRED_SEL_RRRR (2 << 25)
+# define R500_INST_ALPHA_PRED_SEL_GGGG (3 << 25)
+# define R500_INST_ALPHA_PRED_SEL_BBBB (4 << 25)
+# define R500_INST_ALPHA_PRED_SEL_AAAA (5 << 25)
+/* XXX next four are kind of guessed */
+# define R500_INST_STAT_WE_R (1 << 28)
+# define R500_INST_STAT_WE_G (1 << 29)
+# define R500_INST_STAT_WE_B (1 << 30)
+# define R500_INST_STAT_WE_A (1 << 31)
+
+/* note that these are 8 bit lengths, despite the offsets, at least for R500 */
+#define R500_US_CODE_ADDR 0x4630
+# define R500_US_CODE_START_ADDR(x) ((x) << 0)
+# define R500_US_CODE_END_ADDR(x) ((x) << 16)
+#define R500_US_CODE_OFFSET 0x4638
+# define R500_US_CODE_OFFSET_ADDR(x) ((x) << 0)
+#define R500_US_CODE_RANGE 0x4634
+# define R500_US_CODE_RANGE_ADDR(x) ((x) << 0)
+# define R500_US_CODE_RANGE_SIZE(x) ((x) << 16)
+#define R500_US_CONFIG 0x4600
+# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1)
+#define R500_US_FC_ADDR_0 0xa000
+# define R500_FC_BOOL_ADDR(x) ((x) << 0)
+# define R500_FC_INT_ADDR(x) ((x) << 8)
+# define R500_FC_JUMP_ADDR(x) ((x) << 16)
+# define R500_FC_JUMP_GLOBAL (1 << 31)
+#define R500_US_FC_BOOL_CONST 0x4620
+# define R500_FC_KBOOL(x) (x)
+#define R500_US_FC_CTRL 0x4624
+# define R500_FC_TEST_EN (1 << 30)
+# define R500_FC_FULL_FC_EN (1 << 31)
+#define R500_US_FC_INST_0 0x9800
+# define R500_FC_OP_JUMP (0 << 0)
+# define R500_FC_OP_LOOP (1 << 0)
+# define R500_FC_OP_ENDLOOP (2 << 0)
+# define R500_FC_OP_REP (3 << 0)
+# define R500_FC_OP_ENDREP (4 << 0)
+# define R500_FC_OP_BREAKLOOP (5 << 0)
+# define R500_FC_OP_BREAKREP (6 << 0)
+# define R500_FC_OP_CONTINUE (7 << 0)
+# define R500_FC_B_ELSE (1 << 4)
+# define R500_FC_JUMP_ANY (1 << 5)
+# define R500_FC_A_OP_NONE (0 << 6)
+# define R500_FC_A_OP_POP (1 << 6)
+# define R500_FC_A_OP_PUSH (2 << 6)
+# define R500_FC_JUMP_FUNC(x) ((x) << 8)
+# define R500_FC_B_POP_CNT(x) ((x) << 16)
+# define R500_FC_B_OP0_NONE (0 << 24)
+# define R500_FC_B_OP0_DECR (1 << 24)
+# define R500_FC_B_OP0_INCR (2 << 24)
+# define R500_FC_B_OP1_DECR (0 << 26)
+# define R500_FC_B_OP1_NONE (1 << 26)
+# define R500_FC_B_OP1_INCR (2 << 26)
+# define R500_FC_IGNORE_UNCOVERED (1 << 28)
+#define R500_US_FC_INT_CONST_0 0x4c00
+# define R500_FC_INT_CONST_KR(x) ((x) << 0)
+# define R500_FC_INT_CONST_KG(x) ((x) << 8)
+# define R500_FC_INT_CONST_KB(x) ((x) << 16)
+/* _0 through _15 */
+#define R500_US_FORMAT0_0 0x4640
+# define R500_FORMAT_TXWIDTH(x) ((x) << 0)
+# define R500_FORMAT_TXHEIGHT(x) ((x) << 11)
+# define R500_FORMAT_TXDEPTH(x) ((x) << 22)
+#define R500_US_PIXSIZE 0x4604
+# define R500_PIX_SIZE(x) (x)
+#define R500_US_TEX_ADDR_0 0x9800
+# define R500_TEX_SRC_ADDR(x) ((x) << 0)
+# define R500_TEX_SRC_ADDR_REL (1 << 7)
+# define R500_TEX_SRC_S_SWIZ_R (0 << 8)
+# define R500_TEX_SRC_S_SWIZ_G (1 << 8)
+# define R500_TEX_SRC_S_SWIZ_B (2 << 8)
+# define R500_TEX_SRC_S_SWIZ_A (3 << 8)
+# define R500_TEX_SRC_T_SWIZ_R (0 << 10)
+# define R500_TEX_SRC_T_SWIZ_G (1 << 10)
+# define R500_TEX_SRC_T_SWIZ_B (2 << 10)
+# define R500_TEX_SRC_T_SWIZ_A (3 << 10)
+# define R500_TEX_SRC_R_SWIZ_R (0 << 12)
+# define R500_TEX_SRC_R_SWIZ_G (1 << 12)
+# define R500_TEX_SRC_R_SWIZ_B (2 << 12)
+# define R500_TEX_SRC_R_SWIZ_A (3 << 12)
+# define R500_TEX_SRC_Q_SWIZ_R (0 << 14)
+# define R500_TEX_SRC_Q_SWIZ_G (1 << 14)
+# define R500_TEX_SRC_Q_SWIZ_B (2 << 14)
+# define R500_TEX_SRC_Q_SWIZ_A (3 << 14)
+# define R500_TEX_DST_ADDR(x) ((x) << 16)
+# define R500_TEX_DST_ADDR_REL (1 << 23)
+# define R500_TEX_DST_R_SWIZ_R (0 << 24)
+# define R500_TEX_DST_R_SWIZ_G (1 << 24)
+# define R500_TEX_DST_R_SWIZ_B (2 << 24)
+# define R500_TEX_DST_R_SWIZ_A (3 << 24)
+# define R500_TEX_DST_G_SWIZ_R (0 << 26)
+# define R500_TEX_DST_G_SWIZ_G (1 << 26)
+# define R500_TEX_DST_G_SWIZ_B (2 << 26)
+# define R500_TEX_DST_G_SWIZ_A (3 << 26)
+# define R500_TEX_DST_B_SWIZ_R (0 << 28)
+# define R500_TEX_DST_B_SWIZ_G (1 << 28)
+# define R500_TEX_DST_B_SWIZ_B (2 << 28)
+# define R500_TEX_DST_B_SWIZ_A (3 << 28)
+# define R500_TEX_DST_A_SWIZ_R (0 << 30)
+# define R500_TEX_DST_A_SWIZ_G (1 << 30)
+# define R500_TEX_DST_A_SWIZ_B (2 << 30)
+# define R500_TEX_DST_A_SWIZ_A (3 << 30)
+#define R500_US_TEX_ADDR_DXDY_0 0xa000
+# define R500_DX_ADDR(x) ((x) << 0)
+# define R500_DX_ADDR_REL (1 << 7)
+# define R500_DX_S_SWIZ_R (0 << 8)
+# define R500_DX_S_SWIZ_G (1 << 8)
+# define R500_DX_S_SWIZ_B (2 << 8)
+# define R500_DX_S_SWIZ_A (3 << 8)
+# define R500_DX_T_SWIZ_R (0 << 10)
+# define R500_DX_T_SWIZ_G (1 << 10)
+# define R500_DX_T_SWIZ_B (2 << 10)
+# define R500_DX_T_SWIZ_A (3 << 10)
+# define R500_DX_R_SWIZ_R (0 << 12)
+# define R500_DX_R_SWIZ_G (1 << 12)
+# define R500_DX_R_SWIZ_B (2 << 12)
+# define R500_DX_R_SWIZ_A (3 << 12)
+# define R500_DX_Q_SWIZ_R (0 << 14)
+# define R500_DX_Q_SWIZ_G (1 << 14)
+# define R500_DX_Q_SWIZ_B (2 << 14)
+# define R500_DX_Q_SWIZ_A (3 << 14)
+# define R500_DY_ADDR(x) ((x) << 16)
+# define R500_DY_ADDR_REL (1 << 17)
+# define R500_DY_S_SWIZ_R (0 << 24)
+# define R500_DY_S_SWIZ_G (1 << 24)
+# define R500_DY_S_SWIZ_B (2 << 24)
+# define R500_DY_S_SWIZ_A (3 << 24)
+# define R500_DY_T_SWIZ_R (0 << 26)
+# define R500_DY_T_SWIZ_G (1 << 26)
+# define R500_DY_T_SWIZ_B (2 << 26)
+# define R500_DY_T_SWIZ_A (3 << 26)
+# define R500_DY_R_SWIZ_R (0 << 28)
+# define R500_DY_R_SWIZ_G (1 << 28)
+# define R500_DY_R_SWIZ_B (2 << 28)
+# define R500_DY_R_SWIZ_A (3 << 28)
+# define R500_DY_Q_SWIZ_R (0 << 30)
+# define R500_DY_Q_SWIZ_G (1 << 30)
+# define R500_DY_Q_SWIZ_B (2 << 30)
+# define R500_DY_Q_SWIZ_A (3 << 30)
+#define R500_US_TEX_INST_0 0x9000
+# define R500_TEX_ID(x) ((x) << 16)
+# define R500_TEX_INST_NOP (0 << 22)
+# define R500_TEX_INST_LD (1 << 22)
+# define R500_TEX_INST_TEXKILL (2 << 22)
+# define R500_TEX_INST_PROJ (3 << 22)
+# define R500_TEX_INST_LODBIAS (4 << 22)
+# define R500_TEX_INST_LOD (5 << 22)
+# define R500_TEX_INST_DXDY (6 << 22)
+# define R500_TEX_SEM_ACQUIRE (1 << 25)
+# define R500_TEX_IGNORE_UNCOVERED (1 << 26)
+# define R500_TEX_UNSCALED (1 << 27)
+#define R300_US_W_FMT 0x46b4
+# define R300_W_FMT_W0 (0 << 0)
+# define R300_W_FMT_W24 (1 << 0)
+# define R300_W_FMT_W24FP (2 << 0)
+# define R300_W_SRC_US (0 << 2)
+# define R300_W_SRC_RAS (1 << 2)
+
+
+/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR.
+ * Two parameter dwords:
+ * 0. VAP_VTX_FMT: The first parameter is not written to hardware
+ * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
+ */
+#define R300_PACKET3_3D_DRAW_VBUF 0x00002800
+
+/* Draw a primitive from immediate vertices in this packet
+ * Up to 16382 dwords:
+ * 0. VAP_VTX_FMT: The first parameter is not written to hardware
+ * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
+ * 2 to end: Up to 16380 dwords of vertex data.
+ */
+#define R300_PACKET3_3D_DRAW_IMMD 0x00002900
+
+/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR and
+ * immediate vertices in this packet
+ * Up to 16382 dwords:
+ * 0. VAP_VTX_FMT: The first parameter is not written to hardware
+ * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
+ * 2 to end: Up to 16380 dwords of vertex data.
+ */
+#define R300_PACKET3_3D_DRAW_INDX 0x00002A00
+
+
+/* Specify the full set of vertex arrays as (address, stride).
+ * The first parameter is the number of vertex arrays specified.
+ * The rest of the command is a variable length list of blocks, where
+ * each block is three dwords long and specifies two arrays.
+ * The first dword of a block is split into two words, the lower significant
+ * word refers to the first array, the more significant word to the second
+ * array in the block.
+ * The low byte of each word contains the size of an array entry in dwords,
+ * the high byte contains the stride of the array.
+ * The second dword of a block contains the pointer to the first array,
+ * the third dword of a block contains the pointer to the second array.
+ * Note that if the total number of arrays is odd, the third dword of
+ * the last block is omitted.
+ */
+#define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00
+
+#define R300_PACKET3_INDX_BUFFER 0x00003300
+# define R300_INDX_BUFFER_DST_SHIFT 0
+# define R300_INDX_BUFFER_SKIP_SHIFT 16
+# define R300_INDX_BUFFER_ONE_REG_WR (1<<31)
+
+/* Same as R300_PACKET3_3D_DRAW_VBUF but without VAP_VTX_FMT */
+#define R300_PACKET3_3D_DRAW_VBUF_2 0x00003400
+/* Same as R300_PACKET3_3D_DRAW_IMMD but without VAP_VTX_FMT */
+#define R300_PACKET3_3D_DRAW_IMMD_2 0x00003500
+/* Same as R300_PACKET3_3D_DRAW_INDX but without VAP_VTX_FMT */
+#define R300_PACKET3_3D_DRAW_INDX_2 0x00003600
+
+/* Clears a portion of hierachical Z RAM
+ * 3 dword parameters
+ * 0. START
+ * 1. COUNT: 13:0 (max is 0x3FFF)
+ * 2. CLEAR_VALUE: Value to write into HIZ RAM.
+ */
+#define R300_PACKET3_3D_CLEAR_HIZ 0x00003700
+
+/* Draws a set of primitives using vertex buffers pointed by the state data.
+ * At least 2 Parameters:
+ * 0. VAP_VF_CNTL: The first parameter is a standard primitive emission dword.
+ * 2 to end: Data or indices (see other 3D_DRAW_* packets for details)
+ */
+#define R300_PACKET3_3D_DRAW_128 0x00003900
+
+/* END: Packet 3 commands */
+
+
+/* Color formats for 2d packets
+ */
+#define R300_CP_COLOR_FORMAT_CI8 2
+#define R300_CP_COLOR_FORMAT_ARGB1555 3
+#define R300_CP_COLOR_FORMAT_RGB565 4
+#define R300_CP_COLOR_FORMAT_ARGB8888 6
+#define R300_CP_COLOR_FORMAT_RGB332 7
+#define R300_CP_COLOR_FORMAT_RGB8 9
+#define R300_CP_COLOR_FORMAT_ARGB4444 15
+
+/*
+ * CP type-3 packets
+ */
+#define R300_CP_CMD_BITBLT_MULTI 0xC0009B00
+
+/* XXX Corbin's stuff from radeon and r200 */
+
+#define RADEON_WAIT_UNTIL 0x1720
+# define RADEON_WAIT_CRTC_PFLIP (1 << 0)
+# define RADEON_WAIT_2D_IDLECLEAN (1 << 16)
+# define RADEON_WAIT_3D_IDLECLEAN (1 << 17)
+# define RADEON_WAIT_HOST_IDLECLEAN (1 << 18)
+
+#define RADEON_CP_PACKET3 0xC0000000
+
+#define R200_3D_DRAW_IMMD_2 0xC0003500
+
+#endif /* _R300_REG_H */
+
+/* *INDENT-ON* */
+
+/* vim: set foldenable foldmarker=\\{,\\} foldmethod=marker : */
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
new file mode 100644
index 0000000000..5ff9015a7b
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_screen.h"
+
+/* Return the identifier behind whom the brave coders responsible for this
+ * amalgamation of code, sweat, and duct tape, routinely obscure their names.
+ *
+ * ...I should have just put "Corbin Simpson", but I'm not that cool.
+ *
+ * (Or egotistical. Yet.) */
+static const char* r300_get_vendor(struct pipe_screen* pscreen)
+{
+ return "X.Org R300 Project";
+}
+
+static const char* chip_families[] = {
+ "R300",
+ "R350",
+ "R360",
+ "RV350",
+ "RV370",
+ "RV380",
+ "R420",
+ "R423",
+ "R430",
+ "R480",
+ "R481",
+ "RV410",
+ "RS400",
+ "RC410",
+ "RS480",
+ "RS482",
+ "RS690",
+ "RS740",
+ "RV515",
+ "R520",
+ "RV530",
+ "R580",
+ "RV560",
+ "RV570"
+};
+
+static const char* r300_get_name(struct pipe_screen* pscreen)
+{
+ struct r300_screen* r300screen = r300_screen(pscreen);
+
+ return chip_families[r300screen->caps->family];
+}
+
+static int r300_get_param(struct pipe_screen* pscreen, int param)
+{
+ struct r300_screen* r300screen = r300_screen(pscreen);
+
+ switch (param) {
+ /* XXX cases marked "IN THEORY" are possible on the hardware,
+ * but haven't been implemented yet. */
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ /* XXX I'm told this goes up to 16 */
+ return 8;
+ case PIPE_CAP_NPOT_TEXTURES:
+ /* IN THEORY */
+ return 0;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ if (r300screen->caps->is_r500) {
+ return 1;
+ } else {
+ return 0;
+ }
+ return 0;
+ case PIPE_CAP_GLSL:
+ /* IN THEORY */
+ return 0;
+ case PIPE_CAP_S3TC:
+ /* IN THEORY */
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ /* IN THEORY */
+ return 0;
+ case PIPE_CAP_POINT_SPRITE:
+ /* IN THEORY */
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 4;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ /* IN THEORY */
+ return 0;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ /* IN THEORY */
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ if (r300screen->caps->is_r500) {
+ /* 13 == 4096x4096 */
+ return 13;
+ } else {
+ /* 12 == 2048x2048 */
+ return 12;
+ }
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ /* So, technically, the limit is the same as above, but some math
+ * shows why this is silly. Assuming RGBA, 4cpp, we can see that
+ * 4096*4096*4096 = 64.0 GiB exactly, so it's not exactly
+ * practical. However, if at some point a game really wants this,
+ * then we can remove or raise this limit. */
+ if (r300screen->caps->is_r500) {
+ /* 9 == 256x256x256 */
+ return 9;
+ } else {
+ /* 8 == 128*128*128 */
+ return 8;
+ }
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ if (r300screen->caps->is_r500) {
+ /* 13 == 4096x4096 */
+ return 13;
+ } else {
+ /* 12 == 2048x2048 */
+ return 12;
+ }
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ return 1;
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ return 1;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ /* XXX guessing (what a terrible guess) */
+ return 2;
+ default:
+ debug_printf("r300: Implementation error: Bad param %d\n",
+ param);
+ return 0;
+ }
+}
+
+static float r300_get_paramf(struct pipe_screen* pscreen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ /* XXX this is the biggest thing that will fit in that register.
+ * Perhaps the actual rendering limits are less? */
+ return 10922.0f;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ /* XXX this is the biggest thing that will fit in that register.
+ * Perhaps the actual rendering limits are less? */
+ return 10922.0f;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 16.0f;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 16.0f;
+ default:
+ debug_printf("r300: Implementation error: Bad paramf %d\n",
+ param);
+ return 0.0f;
+ }
+}
+
+static boolean check_tex_2d_format(enum pipe_format format, boolean is_r500)
+{
+ switch (format) {
+ /* Colorbuffer */
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ /* Texture */
+ case PIPE_FORMAT_I8_UNORM:
+ /* Z buffer */
+ case PIPE_FORMAT_Z16_UNORM:
+ /* Z buffer with stencil */
+ case PIPE_FORMAT_Z24S8_UNORM:
+ return TRUE;
+
+ /* XXX Supported yet unimplemented formats: */
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ /* XXX These don't even exist
+ case PIPE_FORMAT_A32R32G32B32:
+ case PIPE_FORMAT_A16R16G16B16: */
+ /* XXX Insert YUV422 packed VYUY and YVYU here */
+ /* XXX What the deuce is UV88? (r3xx accel page 14) */
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ debug_printf("r300: Warning: Got unimplemented format: %s in %s\n",
+ pf_name(format), __FUNCTION__);
+ return FALSE;
+
+ /* XXX Supported yet unimplemented r5xx formats: */
+ /* XXX Again, what is UV1010 this time? (r5xx accel page 148) */
+ /* XXX Even more that don't exist
+ case PIPE_FORMAT_A10R10G10B10_UNORM:
+ case PIPE_FORMAT_A2R10G10B10_UNORM:
+ case PIPE_FORMAT_I10_UNORM: */
+ debug_printf(
+ "r300: Warning: Got unimplemented r500 format: %s in %s\n",
+ pf_name(format), __FUNCTION__);
+ return FALSE;
+
+ default:
+ debug_printf("r300: Warning: Got unsupported format: %s in %s\n",
+ pf_name(format), __FUNCTION__);
+ break;
+ }
+
+ return FALSE;
+}
+
+/* XXX moar targets */
+static boolean r300_is_format_supported(struct pipe_screen* pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage,
+ unsigned geom_flags)
+{
+ switch (target) {
+ case PIPE_TEXTURE_2D:
+ return check_tex_2d_format(format,
+ r300_screen(pscreen)->caps->is_r500);
+ default:
+ debug_printf("r300: Warning: Got unknown format target: %d\n",
+ format);
+ break;
+ }
+
+ return FALSE;
+}
+
+static struct pipe_transfer*
+r300_get_tex_transfer(struct pipe_screen *screen,
+ struct pipe_texture *texture,
+ unsigned face, unsigned level, unsigned zslice,
+ enum pipe_transfer_usage usage, unsigned x, unsigned y,
+ unsigned w, unsigned h)
+{
+ struct r300_texture *tex = (struct r300_texture *)texture;
+ struct r300_transfer *trans;
+ unsigned offset; /* in bytes */
+
+ /* XXX Add support for these things */
+ if (texture->target == PIPE_TEXTURE_CUBE) {
+ debug_printf("PIPE_TEXTURE_CUBE is not yet supported.\n");
+ /* offset = tex->image_offset[level][face]; */
+ }
+ else if (texture->target == PIPE_TEXTURE_3D) {
+ debug_printf("PIPE_TEXTURE_3D is not yet supported.\n");
+ /* offset = tex->image_offset[level][zslice]; */
+ }
+ else {
+ offset = tex->offset[level];
+ assert(face == 0);
+ assert(zslice == 0);
+ }
+
+ trans = CALLOC_STRUCT(r300_transfer);
+ if (trans) {
+ trans->transfer.refcount = 1;
+ pipe_texture_reference(&trans->transfer.texture, texture);
+ trans->transfer.format = trans->transfer.format;
+ trans->transfer.width = w;
+ trans->transfer.height = h;
+ trans->transfer.block = texture->block;
+ trans->transfer.nblocksx = texture->nblocksx[level];
+ trans->transfer.nblocksy = texture->nblocksy[level];
+ trans->transfer.stride = tex->stride;
+ trans->transfer.usage = usage;
+ trans->offset = offset;
+ }
+ return &trans->transfer;
+}
+
+static void
+r300_tex_transfer_release(struct pipe_screen *screen,
+ struct pipe_transfer **transfer)
+{
+ struct pipe_transfer *trans = *transfer;
+
+ if (--trans->refcount == 0) {
+ pipe_texture_reference(&trans->texture, NULL);
+ FREE(trans);
+ }
+
+ *transfer = NULL;
+}
+
+static void* r300_transfer_map(struct pipe_screen* screen,
+ struct pipe_transfer* transfer)
+{
+ struct r300_texture* tex = (struct r300_texture*)transfer->texture;
+ char* map;
+ unsigned flags = 0;
+
+ if (transfer->usage != PIPE_TRANSFER_WRITE) {
+ flags |= PIPE_BUFFER_USAGE_CPU_READ;
+ }
+ if (transfer->usage != PIPE_TRANSFER_READ) {
+ flags |= PIPE_BUFFER_USAGE_CPU_WRITE;
+ }
+
+ map = pipe_buffer_map(screen, tex->buffer, flags);
+
+ if (!map) {
+ return NULL;
+ }
+
+ return map + r300_transfer(transfer)->offset +
+ transfer->y / transfer->block.height * transfer->stride +
+ transfer->x / transfer->block.width * transfer->block.size;
+}
+
+static void r300_transfer_unmap(struct pipe_screen* screen,
+ struct pipe_transfer* transfer)
+{
+ struct r300_texture* tex = (struct r300_texture*)transfer->texture;
+ pipe_buffer_unmap(screen, tex->buffer);
+}
+
+static void r300_destroy_screen(struct pipe_screen* pscreen)
+{
+ struct r300_screen* r300screen = r300_screen(pscreen);
+
+ FREE(r300screen->caps);
+ FREE(r300screen);
+}
+
+struct pipe_screen* r300_create_screen(struct pipe_winsys* winsys,
+ struct r300_winsys* r300_winsys)
+{
+ struct r300_screen* r300screen = CALLOC_STRUCT(r300_screen);
+ struct r300_capabilities* caps = CALLOC_STRUCT(r300_capabilities);
+
+ if (!r300screen || !caps)
+ return NULL;
+
+ caps->pci_id = r300_winsys->pci_id;
+ caps->num_frag_pipes = r300_winsys->gb_pipes;
+
+ r300_parse_chipset(caps);
+
+ r300screen->caps = caps;
+ r300screen->screen.winsys = winsys;
+ r300screen->screen.destroy = r300_destroy_screen;
+ r300screen->screen.get_name = r300_get_name;
+ r300screen->screen.get_vendor = r300_get_vendor;
+ r300screen->screen.get_param = r300_get_param;
+ r300screen->screen.get_paramf = r300_get_paramf;
+ r300screen->screen.is_format_supported = r300_is_format_supported;
+ r300screen->screen.get_tex_transfer = r300_get_tex_transfer;
+ r300screen->screen.tex_transfer_release = r300_tex_transfer_release;
+ r300screen->screen.transfer_map = r300_transfer_map;
+ r300screen->screen.transfer_unmap = r300_transfer_unmap;
+
+ r300_init_screen_texture_functions(&r300screen->screen);
+ u_simple_screen_init(&r300screen->screen);
+
+ return &r300screen->screen;
+}
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
new file mode 100644
index 0000000000..6c845144cb
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_SCREEN_H
+#define R300_SCREEN_H
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_screen.h"
+#include "util/u_memory.h"
+#include "util/u_simple_screen.h"
+
+#include "r300_chipset.h"
+#include "r300_texture.h"
+#include "r300_winsys.h"
+
+struct r300_screen {
+ /* Parent class */
+ struct pipe_screen screen;
+
+ /* Chipset capabilities */
+ struct r300_capabilities* caps;
+};
+
+struct r300_transfer {
+ /* Parent class */
+ struct pipe_transfer transfer;
+
+ /* Offset from start of buffer. */
+ unsigned offset;
+};
+
+/* Convenience cast wrapper. */
+static struct r300_screen* r300_screen(struct pipe_screen* screen) {
+ return (struct r300_screen*)screen;
+}
+
+/* Convenience cast wrapper. */
+static INLINE struct r300_transfer*
+r300_transfer(struct pipe_transfer* transfer)
+{
+ return (struct r300_transfer*)transfer;
+}
+
+/* Creates a new r300 screen. */
+struct pipe_screen* r300_create_screen(struct pipe_winsys* winsys,
+ struct r300_winsys* r300_winsys);
+
+#endif /* R300_SCREEN_H */
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
new file mode 100644
index 0000000000..da99a3be6b
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -0,0 +1,854 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "util/u_math.h"
+#include "util/u_pack_color.h"
+
+#include "util/u_debug.h"
+#include "pipe/internal/p_winsys_screen.h"
+
+#include "r300_context.h"
+#include "r300_reg.h"
+#include "r300_state_shader.h"
+
+/* r300_state: Functions used to intialize state context by translating
+ * Gallium state objects into semi-native r300 state objects.
+ *
+ * XXX break this file up into pieces if it gets too big! */
+
+/* Pack a float into a dword. */
+static uint32_t pack_float_32(float f)
+{
+ union {
+ float f;
+ uint32_t u;
+ } u;
+
+ u.f = f;
+ return u.u;
+}
+
+static uint32_t translate_blend_function(int blend_func) {
+ switch (blend_func) {
+ case PIPE_BLEND_ADD:
+ return R300_COMB_FCN_ADD_CLAMP;
+ case PIPE_BLEND_SUBTRACT:
+ return R300_COMB_FCN_SUB_CLAMP;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return R300_COMB_FCN_RSUB_CLAMP;
+ case PIPE_BLEND_MIN:
+ return R300_COMB_FCN_MIN;
+ case PIPE_BLEND_MAX:
+ return R300_COMB_FCN_MAX;
+ default:
+ debug_printf("r300: Unknown blend function %d\n", blend_func);
+ break;
+ }
+ return 0;
+}
+
+/* XXX we can also offer the D3D versions of some of these... */
+static uint32_t translate_blend_factor(int blend_fact) {
+ switch (blend_fact) {
+ case PIPE_BLENDFACTOR_ONE:
+ return R300_BLEND_GL_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return R300_BLEND_GL_SRC_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return R300_BLEND_GL_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return R300_BLEND_GL_DST_ALPHA;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return R300_BLEND_GL_DST_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return R300_BLEND_GL_SRC_ALPHA_SATURATE;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return R300_BLEND_GL_CONST_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return R300_BLEND_GL_CONST_ALPHA;
+ /* XXX WTF are these?
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA: */
+ case PIPE_BLENDFACTOR_ZERO:
+ return R300_BLEND_GL_ZERO;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return R300_BLEND_GL_ONE_MINUS_SRC_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return R300_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return R300_BLEND_GL_ONE_MINUS_DST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return R300_BLEND_GL_ONE_MINUS_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return R300_BLEND_GL_ONE_MINUS_CONST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return R300_BLEND_GL_ONE_MINUS_CONST_ALPHA;
+ /* XXX see above
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: */
+ default:
+ debug_printf("r300: Unknown blend factor %d\n", blend_fact);
+ break;
+ }
+ return 0;
+}
+
+/* Create a new blend state based on the CSO blend state.
+ *
+ * This encompasses alpha blending, logic/raster ops, and blend dithering. */
+static void* r300_create_blend_state(struct pipe_context* pipe,
+ const struct pipe_blend_state* state)
+{
+ struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state);
+
+ if (state->blend_enable) {
+ /* XXX for now, always do separate alpha...
+ * is it faster to do it with one reg? */
+ blend->blend_control = R300_ALPHA_BLEND_ENABLE |
+ R300_SEPARATE_ALPHA_ENABLE |
+ R300_READ_ENABLE |
+ translate_blend_function(state->rgb_func) |
+ (translate_blend_factor(state->rgb_src_factor) <<
+ R300_SRC_BLEND_SHIFT) |
+ (translate_blend_factor(state->rgb_dst_factor) <<
+ R300_DST_BLEND_SHIFT);
+ blend->alpha_blend_control =
+ translate_blend_function(state->alpha_func) |
+ (translate_blend_factor(state->alpha_src_factor) <<
+ R300_SRC_BLEND_SHIFT) |
+ (translate_blend_factor(state->alpha_dst_factor) <<
+ R300_DST_BLEND_SHIFT);
+ }
+
+ /* PIPE_LOGICOP_* don't need to be translated, fortunately. */
+ /* XXX are logicops still allowed if blending's disabled?
+ * Does Gallium take care of it for us? */
+ if (state->logicop_enable) {
+ blend->rop = R300_RB3D_ROPCNTL_ROP_ENABLE |
+ (state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT;
+ }
+
+ if (state->dither) {
+ blend->dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT |
+ R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT;
+ }
+
+ return (void*)blend;
+}
+
+/* Bind blend state. */
+static void r300_bind_blend_state(struct pipe_context* pipe,
+ void* state)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ r300->blend_state = (struct r300_blend_state*)state;
+ r300->dirty_state |= R300_NEW_BLEND;
+}
+
+/* Free blend state. */
+static void r300_delete_blend_state(struct pipe_context* pipe,
+ void* state)
+{
+ FREE(state);
+}
+
+/* Set blend color.
+ * Setup both R300 and R500 registers, figure out later which one to write. */
+static void r300_set_blend_color(struct pipe_context* pipe,
+ const struct pipe_blend_color* color)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ uint32_t r, g, b, a;
+ ubyte ur, ug, ub, ua;
+
+ r = util_iround(color->color[0] * 1023.0f);
+ g = util_iround(color->color[1] * 1023.0f);
+ b = util_iround(color->color[2] * 1023.0f);
+ a = util_iround(color->color[3] * 1023.0f);
+
+ ur = float_to_ubyte(color->color[0]);
+ ug = float_to_ubyte(color->color[1]);
+ ub = float_to_ubyte(color->color[2]);
+ ua = float_to_ubyte(color->color[3]);
+
+ r300->blend_color_state->blend_color = (a << 24) | (r << 16) | (g << 8) | b;
+
+ r300->blend_color_state->blend_color_red_alpha = ur | (ua << 16);
+ r300->blend_color_state->blend_color_green_blue = ub | (ug << 16);
+
+ r300->dirty_state |= R300_NEW_BLEND_COLOR;
+}
+
+static void r300_set_clip_state(struct pipe_context* pipe,
+ const struct pipe_clip_state* state)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ /* XXX Draw */
+ draw_flush(r300->draw);
+ draw_set_clip_state(r300->draw, state);
+}
+
+static void
+ r300_set_constant_buffer(struct pipe_context* pipe,
+ uint shader, uint index,
+ const struct pipe_constant_buffer* buffer)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ /* This entire chunk of code seems ever-so-slightly baked.
+ * It's as if I've got pipe_buffer* matryoshkas... */
+ if (buffer && buffer->buffer && buffer->buffer->size) {
+ void* map = pipe->winsys->buffer_map(pipe->winsys, buffer->buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ memcpy(r300->shader_constants[shader].constants,
+ map, buffer->buffer->size);
+ pipe->winsys->buffer_unmap(pipe->winsys, buffer->buffer);
+
+ r300->shader_constants[shader].user_count =
+ buffer->buffer->size / (sizeof(float) * 4);
+ } else {
+ r300->shader_constants[shader].user_count = 0;
+ }
+
+ r300->dirty_state |= R300_NEW_CONSTANTS;
+}
+
+static uint32_t translate_depth_stencil_function(int zs_func) {
+ switch (zs_func) {
+ case PIPE_FUNC_NEVER:
+ return R300_ZS_NEVER;
+ case PIPE_FUNC_LESS:
+ return R300_ZS_LESS;
+ case PIPE_FUNC_EQUAL:
+ return R300_ZS_EQUAL;
+ case PIPE_FUNC_LEQUAL:
+ return R300_ZS_LEQUAL;
+ case PIPE_FUNC_GREATER:
+ return R300_ZS_GREATER;
+ case PIPE_FUNC_NOTEQUAL:
+ return R300_ZS_NOTEQUAL;
+ case PIPE_FUNC_GEQUAL:
+ return R300_ZS_GEQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return R300_ZS_ALWAYS;
+ default:
+ debug_printf("r300: Unknown depth/stencil function %d\n",
+ zs_func);
+ break;
+ }
+ return 0;
+}
+
+static uint32_t translate_stencil_op(int s_op) {
+ switch (s_op) {
+ case PIPE_STENCIL_OP_KEEP:
+ return R300_ZS_KEEP;
+ case PIPE_STENCIL_OP_ZERO:
+ return R300_ZS_ZERO;
+ case PIPE_STENCIL_OP_REPLACE:
+ return R300_ZS_REPLACE;
+ case PIPE_STENCIL_OP_INCR:
+ return R300_ZS_INCR;
+ case PIPE_STENCIL_OP_DECR:
+ return R300_ZS_DECR;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ return R300_ZS_INCR_WRAP;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ return R300_ZS_DECR_WRAP;
+ case PIPE_STENCIL_OP_INVERT:
+ return R300_ZS_INVERT;
+ default:
+ debug_printf("r300: Unknown stencil op %d", s_op);
+ break;
+ }
+ return 0;
+}
+
+static uint32_t translate_alpha_function(int alpha_func) {
+ switch (alpha_func) {
+ case PIPE_FUNC_NEVER:
+ return R300_FG_ALPHA_FUNC_NEVER;
+ case PIPE_FUNC_LESS:
+ return R300_FG_ALPHA_FUNC_LESS;
+ case PIPE_FUNC_EQUAL:
+ return R300_FG_ALPHA_FUNC_EQUAL;
+ case PIPE_FUNC_LEQUAL:
+ return R300_FG_ALPHA_FUNC_LE;
+ case PIPE_FUNC_GREATER:
+ return R300_FG_ALPHA_FUNC_GREATER;
+ case PIPE_FUNC_NOTEQUAL:
+ return R300_FG_ALPHA_FUNC_NOTEQUAL;
+ case PIPE_FUNC_GEQUAL:
+ return R300_FG_ALPHA_FUNC_GE;
+ case PIPE_FUNC_ALWAYS:
+ return R300_FG_ALPHA_FUNC_ALWAYS;
+ default:
+ debug_printf("r300: Unknown alpha function %d", alpha_func);
+ break;
+ }
+ return 0;
+}
+
+/* Create a new depth, stencil, and alpha state based on the CSO dsa state.
+ *
+ * This contains the depth buffer, stencil buffer, alpha test, and such.
+ * On the Radeon, depth and stencil buffer setup are intertwined, which is
+ * the reason for some of the strange-looking assignments across registers. */
+static void*
+ r300_create_dsa_state(struct pipe_context* pipe,
+ const struct pipe_depth_stencil_alpha_state* state)
+{
+ struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state);
+
+ /* Depth test setup. */
+ if (state->depth.enabled) {
+ dsa->z_buffer_control |= R300_Z_ENABLE;
+
+ if (state->depth.writemask) {
+ dsa->z_buffer_control |= R300_Z_WRITE_ENABLE;
+ }
+
+ dsa->z_stencil_control |=
+ (translate_depth_stencil_function(state->depth.func) <<
+ R300_Z_FUNC_SHIFT);
+ }
+
+ /* Stencil buffer setup. */
+ if (state->stencil[0].enabled) {
+ dsa->z_buffer_control |= R300_STENCIL_ENABLE;
+ dsa->z_stencil_control |=
+ (translate_depth_stencil_function(state->stencil[0].func) <<
+ R300_S_FRONT_FUNC_SHIFT) |
+ (translate_stencil_op(state->stencil[0].fail_op) <<
+ R300_S_FRONT_SFAIL_OP_SHIFT) |
+ (translate_stencil_op(state->stencil[0].zpass_op) <<
+ R300_S_FRONT_ZPASS_OP_SHIFT) |
+ (translate_stencil_op(state->stencil[0].zfail_op) <<
+ R300_S_FRONT_ZFAIL_OP_SHIFT);
+
+ dsa->stencil_ref_mask = (state->stencil[0].ref_value) |
+ (state->stencil[0].valuemask << R300_STENCILMASK_SHIFT) |
+ (state->stencil[0].writemask << R300_STENCILWRITEMASK_SHIFT);
+
+ if (state->stencil[1].enabled) {
+ dsa->z_buffer_control |= R300_STENCIL_FRONT_BACK;
+ dsa->z_stencil_control |=
+ (translate_depth_stencil_function(state->stencil[1].func) <<
+ R300_S_BACK_FUNC_SHIFT) |
+ (translate_stencil_op(state->stencil[1].fail_op) <<
+ R300_S_BACK_SFAIL_OP_SHIFT) |
+ (translate_stencil_op(state->stencil[1].zpass_op) <<
+ R300_S_BACK_ZPASS_OP_SHIFT) |
+ (translate_stencil_op(state->stencil[1].zfail_op) <<
+ R300_S_BACK_ZFAIL_OP_SHIFT);
+
+ dsa->stencil_ref_bf = (state->stencil[1].ref_value) |
+ (state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) |
+ (state->stencil[1].writemask << R300_STENCILWRITEMASK_SHIFT);
+ }
+ }
+
+ /* Alpha test setup. */
+ if (state->alpha.enabled) {
+ dsa->alpha_function = translate_alpha_function(state->alpha.func) |
+ R300_FG_ALPHA_FUNC_ENABLE;
+ dsa->alpha_reference = CLAMP(state->alpha.ref_value * 1023.0f,
+ 0, 1023);
+ } else {
+ dsa->z_buffer_top = R300_ZTOP_ENABLE;
+ }
+
+ return (void*)dsa;
+}
+
+/* Bind DSA state. */
+static void r300_bind_dsa_state(struct pipe_context* pipe,
+ void* state)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ r300->dsa_state = (struct r300_dsa_state*)state;
+ r300->dirty_state |= R300_NEW_DSA;
+}
+
+/* Free DSA state. */
+static void r300_delete_dsa_state(struct pipe_context* pipe,
+ void* state)
+{
+ FREE(state);
+}
+
+static void r300_set_edgeflags(struct pipe_context* pipe,
+ const unsigned* bitfield)
+{
+ /* XXX you know it's bad when i915 has this blank too */
+}
+
+static void
+ r300_set_framebuffer_state(struct pipe_context* pipe,
+ const struct pipe_framebuffer_state* state)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ draw_flush(r300->draw);
+
+ r300->framebuffer_state = *state;
+
+ r300->dirty_state |= R300_NEW_FRAMEBUFFERS;
+}
+
+/* Create fragment shader state. */
+static void* r300_create_fs_state(struct pipe_context* pipe,
+ const struct pipe_shader_state* shader)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r3xx_fragment_shader* fs = NULL;
+
+ if (r300_screen(r300->context.screen)->caps->is_r500) {
+ fs =
+ (struct r3xx_fragment_shader*)CALLOC_STRUCT(r500_fragment_shader);
+ } else {
+ fs =
+ (struct r3xx_fragment_shader*)CALLOC_STRUCT(r300_fragment_shader);
+ }
+
+ /* Copy state directly into shader. */
+ fs->state = *shader;
+
+ tgsi_scan_shader(shader->tokens, &fs->info);
+
+ return (void*)fs;
+}
+
+/* Bind fragment shader state. */
+static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r3xx_fragment_shader* fs = (struct r3xx_fragment_shader*)shader;
+
+ if (fs == NULL) {
+ r300->fs = NULL;
+ return;
+ } else if (!fs->translated) {
+ if (r300_screen(r300->context.screen)->caps->is_r500) {
+ r500_translate_fragment_shader(r300, (struct r500_fragment_shader*)fs);
+ } else {
+ r300_translate_fragment_shader(r300, (struct r300_fragment_shader*)fs);
+ }
+ }
+
+ fs->translated = true;
+ r300->fs = fs;
+
+ r300->dirty_state |= R300_NEW_FRAGMENT_SHADER;
+}
+
+/* Delete fragment shader state. */
+static void r300_delete_fs_state(struct pipe_context* pipe, void* shader)
+{
+ FREE(shader);
+}
+
+static void r300_set_polygon_stipple(struct pipe_context* pipe,
+ const struct pipe_poly_stipple* state)
+{
+ /* XXX */
+}
+
+static INLINE int pack_float_16_6x(float f) {
+ return ((int)(f * 6.0) & 0xffff);
+}
+
+/* Create a new rasterizer state based on the CSO rasterizer state.
+ *
+ * This is a very large chunk of state, and covers most of the graphics
+ * backend (GB), geometry assembly (GA), and setup unit (SU) blocks.
+ *
+ * In a not entirely unironic sidenote, this state has nearly nothing to do
+ * with the actual block on the Radeon called the rasterizer (RS). */
+static void* r300_create_rs_state(struct pipe_context* pipe,
+ const struct pipe_rasterizer_state* state)
+{
+ struct r300_rs_state* rs = CALLOC_STRUCT(r300_rs_state);
+
+ /* XXX this is part of HW TCL */
+ /* XXX endian control */
+ rs->vap_control_status = R300_VAP_TCL_BYPASS;
+
+ rs->point_size = pack_float_16_6x(state->point_size) |
+ (pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT);
+
+ rs->line_control = pack_float_16_6x(state->line_width) |
+ R300_GA_LINE_CNTL_END_TYPE_COMP;
+
+ /* Radeons don't think in "CW/CCW", they think in "front/back". */
+ if (state->front_winding == PIPE_WINDING_CW) {
+ rs->cull_mode = R300_FRONT_FACE_CW;
+
+ if (state->offset_cw) {
+ rs->polygon_offset_enable |= R300_FRONT_ENABLE;
+ }
+ if (state->offset_ccw) {
+ rs->polygon_offset_enable |= R300_BACK_ENABLE;
+ }
+ } else {
+ rs->cull_mode = R300_FRONT_FACE_CCW;
+
+ if (state->offset_ccw) {
+ rs->polygon_offset_enable |= R300_FRONT_ENABLE;
+ }
+ if (state->offset_cw) {
+ rs->polygon_offset_enable |= R300_BACK_ENABLE;
+ }
+ }
+ if (state->front_winding & state->cull_mode) {
+ rs->cull_mode |= R300_CULL_FRONT;
+ }
+ if (~(state->front_winding) & state->cull_mode) {
+ rs->cull_mode |= R300_CULL_BACK;
+ }
+
+ if (rs->polygon_offset_enable) {
+ rs->depth_offset_front = rs->depth_offset_back =
+ pack_float_32(state->offset_units);
+ rs->depth_scale_front = rs->depth_scale_back =
+ pack_float_32(state->offset_scale);
+ }
+
+ if (state->line_stipple_enable) {
+ rs->line_stipple_config =
+ R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE |
+ (pack_float_32((float)state->line_stipple_factor) &
+ R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK);
+ /* XXX this might need to be scaled up */
+ rs->line_stipple_value = state->line_stipple_pattern;
+ }
+
+ rs->rs = *state;
+
+ return (void*)rs;
+}
+
+/* Bind rasterizer state. */
+static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_rs_state* rs = (struct r300_rs_state*)state;
+
+ draw_set_rasterizer_state(r300->draw, &rs->rs);
+
+ r300->rs_state = rs;
+ r300->dirty_state |= R300_NEW_RASTERIZER;
+}
+
+/* Free rasterizer state. */
+static void r300_delete_rs_state(struct pipe_context* pipe, void* state)
+{
+ FREE(state);
+}
+
+static uint32_t translate_wrap(int wrap) {
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return R300_TX_REPEAT;
+ case PIPE_TEX_WRAP_CLAMP:
+ return R300_TX_CLAMP;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return R300_TX_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return R300_TX_CLAMP_TO_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return R300_TX_REPEAT | R300_TX_MIRRORED;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ return R300_TX_CLAMP | R300_TX_MIRRORED;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED;
+ default:
+ debug_printf("r300: Unknown texture wrap %d", wrap);
+ return 0;
+ }
+}
+
+static uint32_t translate_tex_filters(int min, int mag, int mip) {
+ uint32_t retval = 0;
+ switch (min) {
+ case PIPE_TEX_FILTER_NEAREST:
+ retval |= R300_TX_MIN_FILTER_NEAREST;
+ case PIPE_TEX_FILTER_LINEAR:
+ retval |= R300_TX_MIN_FILTER_LINEAR;
+ case PIPE_TEX_FILTER_ANISO:
+ retval |= R300_TX_MIN_FILTER_ANISO;
+ default:
+ debug_printf("r300: Unknown texture filter %d", min);
+ break;
+ }
+ switch (mag) {
+ case PIPE_TEX_FILTER_NEAREST:
+ retval |= R300_TX_MAG_FILTER_NEAREST;
+ case PIPE_TEX_FILTER_LINEAR:
+ retval |= R300_TX_MAG_FILTER_LINEAR;
+ case PIPE_TEX_FILTER_ANISO:
+ retval |= R300_TX_MAG_FILTER_ANISO;
+ default:
+ debug_printf("r300: Unknown texture filter %d", mag);
+ break;
+ }
+ switch (mip) {
+ case PIPE_TEX_MIPFILTER_NONE:
+ retval |= R300_TX_MIN_FILTER_MIP_NONE;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ retval |= R300_TX_MIN_FILTER_MIP_NEAREST;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ retval |= R300_TX_MIN_FILTER_MIP_LINEAR;
+ default:
+ debug_printf("r300: Unknown texture filter %d", mip);
+ break;
+ }
+
+ return retval;
+}
+
+static uint32_t anisotropy(float max_aniso) {
+ if (max_aniso >= 16.0f) {
+ return R300_TX_MAX_ANISO_16_TO_1;
+ } else if (max_aniso >= 8.0f) {
+ return R300_TX_MAX_ANISO_8_TO_1;
+ } else if (max_aniso >= 4.0f) {
+ return R300_TX_MAX_ANISO_4_TO_1;
+ } else if (max_aniso >= 2.0f) {
+ return R300_TX_MAX_ANISO_2_TO_1;
+ } else {
+ return R300_TX_MAX_ANISO_1_TO_1;
+ }
+}
+
+static void*
+ r300_create_sampler_state(struct pipe_context* pipe,
+ const struct pipe_sampler_state* state)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state);
+ int lod_bias;
+
+ sampler->filter0 |=
+ (translate_wrap(state->wrap_s) << R300_TX_WRAP_S_SHIFT) |
+ (translate_wrap(state->wrap_t) << R300_TX_WRAP_T_SHIFT) |
+ (translate_wrap(state->wrap_r) << R300_TX_WRAP_R_SHIFT);
+
+ sampler->filter0 |= translate_tex_filters(state->min_img_filter,
+ state->mag_img_filter,
+ state->min_mip_filter);
+
+ lod_bias = CLAMP((int)(state->lod_bias * 32), -(1 << 9), (1 << 9) - 1);
+
+ sampler->filter1 |= lod_bias << R300_LOD_BIAS_SHIFT;
+
+ sampler->filter1 |= anisotropy(state->max_anisotropy);
+
+ util_pack_color(state->border_color, PIPE_FORMAT_A8R8G8B8_UNORM,
+ &sampler->border_color);
+
+ /* R500-specific fixups and optimizations */
+ if (r300_screen(r300->context.screen)->caps->is_r500) {
+ sampler->filter1 |= R500_BORDER_FIX;
+ }
+
+ return (void*)sampler;
+}
+
+static void r300_bind_sampler_states(struct pipe_context* pipe,
+ unsigned count,
+ void** states)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ int i;
+
+ if (count > 8) {
+ return;
+ }
+
+ for (i = 0; i < count; i++) {
+ if (r300->sampler_states[i] != states[i]) {
+ r300->sampler_states[i] = (struct r300_sampler_state*)states[i];
+ r300->dirty_state |= (R300_NEW_SAMPLER << i);
+ }
+ }
+
+ r300->sampler_count = count;
+}
+
+static void r300_delete_sampler_state(struct pipe_context* pipe, void* state)
+{
+ FREE(state);
+}
+
+static void r300_set_sampler_textures(struct pipe_context* pipe,
+ unsigned count,
+ struct pipe_texture** texture)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ int i;
+
+ /* XXX magic num */
+ if (count > 8) {
+ return;
+ }
+
+ for (i = 0; i < count; i++) {
+ if (r300->textures[i] != (struct r300_texture*)texture[i]) {
+ pipe_texture_reference((struct pipe_texture**)&r300->textures[i],
+ texture[i]);
+ r300->dirty_state |= (R300_NEW_TEXTURE << i);
+ }
+ }
+
+ for (i = count; i < 8; i++) {
+ if (r300->textures[i]) {
+ pipe_texture_reference((struct pipe_texture**)&r300->textures[i],
+ NULL);
+ r300->dirty_state |= (R300_NEW_TEXTURE << i);
+ }
+ }
+
+ r300->texture_count = count;
+}
+
+static void r300_set_scissor_state(struct pipe_context* pipe,
+ const struct pipe_scissor_state* state)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ draw_flush(r300->draw);
+
+ r300->scissor_state->scissor_top_left =
+ (state->minx << R300_SCISSORS_X_SHIFT) |
+ (state->miny << R300_SCISSORS_Y_SHIFT);
+ r300->scissor_state->scissor_bottom_right =
+ (state->maxx << R300_SCISSORS_X_SHIFT) |
+ (state->maxy << R300_SCISSORS_Y_SHIFT);
+
+ r300->dirty_state |= R300_NEW_SCISSOR;
+}
+
+static void r300_set_viewport_state(struct pipe_context* pipe,
+ const struct pipe_viewport_state* state)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ /* XXX handing this off to Draw for now */
+ draw_set_viewport_state(r300->draw, state);
+}
+
+static void r300_set_vertex_buffers(struct pipe_context* pipe,
+ unsigned count,
+ const struct pipe_vertex_buffer* buffers)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ memcpy(r300->vertex_buffers, buffers,
+ sizeof(struct pipe_vertex_buffer) * count);
+
+ r300->vertex_buffer_count = count;
+
+ draw_flush(r300->draw);
+ draw_set_vertex_buffers(r300->draw, count, buffers);
+}
+
+static void r300_set_vertex_elements(struct pipe_context* pipe,
+ unsigned count,
+ const struct pipe_vertex_element* elements)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ /* XXX Draw */
+ draw_flush(r300->draw);
+ draw_set_vertex_elements(r300->draw, count, elements);
+}
+
+static void* r300_create_vs_state(struct pipe_context* pipe,
+ const struct pipe_shader_state* state)
+{
+ struct r300_context* context = r300_context(pipe);
+ /* XXX handing this off to Draw for now */
+ return draw_create_vertex_shader(context->draw, state);
+}
+
+static void r300_bind_vs_state(struct pipe_context* pipe, void* state) {
+ struct r300_context* context = r300_context(pipe);
+ /* XXX handing this off to Draw for now */
+ draw_bind_vertex_shader(context->draw, (struct draw_vertex_shader*)state);
+}
+
+static void r300_delete_vs_state(struct pipe_context* pipe, void* state)
+{
+ struct r300_context* context = r300_context(pipe);
+ /* XXX handing this off to Draw for now */
+ draw_delete_vertex_shader(context->draw, (struct draw_vertex_shader*)state);
+}
+
+void r300_init_state_functions(struct r300_context* r300)
+{
+ r300->context.create_blend_state = r300_create_blend_state;
+ r300->context.bind_blend_state = r300_bind_blend_state;
+ r300->context.delete_blend_state = r300_delete_blend_state;
+
+ r300->context.set_blend_color = r300_set_blend_color;
+
+ r300->context.set_clip_state = r300_set_clip_state;
+
+ r300->context.set_constant_buffer = r300_set_constant_buffer;
+
+ r300->context.create_depth_stencil_alpha_state = r300_create_dsa_state;
+ r300->context.bind_depth_stencil_alpha_state = r300_bind_dsa_state;
+ r300->context.delete_depth_stencil_alpha_state = r300_delete_dsa_state;
+
+ r300->context.set_edgeflags = r300_set_edgeflags;
+
+ r300->context.set_framebuffer_state = r300_set_framebuffer_state;
+
+ r300->context.create_fs_state = r300_create_fs_state;
+ r300->context.bind_fs_state = r300_bind_fs_state;
+ r300->context.delete_fs_state = r300_delete_fs_state;
+
+ r300->context.set_polygon_stipple = r300_set_polygon_stipple;
+
+ r300->context.create_rasterizer_state = r300_create_rs_state;
+ r300->context.bind_rasterizer_state = r300_bind_rs_state;
+ r300->context.delete_rasterizer_state = r300_delete_rs_state;
+
+ r300->context.create_sampler_state = r300_create_sampler_state;
+ r300->context.bind_sampler_states = r300_bind_sampler_states;
+ r300->context.delete_sampler_state = r300_delete_sampler_state;
+
+ r300->context.set_sampler_textures = r300_set_sampler_textures;
+
+ r300->context.set_scissor_state = r300_set_scissor_state;
+
+ r300->context.set_viewport_state = r300_set_viewport_state;
+
+ r300->context.set_vertex_buffers = r300_set_vertex_buffers;
+ r300->context.set_vertex_elements = r300_set_vertex_elements;
+
+ r300->context.create_vs_state = r300_create_vs_state;
+ r300->context.bind_vs_state = r300_bind_vs_state;
+ r300->context.delete_vs_state = r300_delete_vs_state;
+}
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
new file mode 100644
index 0000000000..a51904096f
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_state_derived.h"
+
+/* r300_state_derived: Various bits of state which are dependent upon
+ * currently bound CSO data. */
+
+static uint32_t translate_vertex_data_type(int type) {
+ switch (type) {
+ case EMIT_1F:
+ case EMIT_1F_PSIZE:
+ return R300_DATA_TYPE_FLOAT_1;
+ break;
+ case EMIT_2F:
+ return R300_DATA_TYPE_FLOAT_2;
+ break;
+ case EMIT_3F:
+ return R300_DATA_TYPE_FLOAT_3;
+ break;
+ case EMIT_4F:
+ return R300_DATA_TYPE_FLOAT_4;
+ break;
+ default:
+ debug_printf("r300: Implementation error: "
+ "Bad vertex data type!\n");
+ break;
+ }
+
+ return 0;
+}
+
+/* Update the vertex_info struct in our r300_context.
+ *
+ * The vertex_info struct describes the post-TCL format of vertices. It is
+ * required for Draw when doing SW TCL, and also for describing the
+ * dreaded RS block on R300 chipsets. */
+/* XXX this function should be able to handle vert shaders as well as draw */
+static void r300_update_vertex_layout(struct r300_context* r300)
+{
+ struct vertex_info vinfo;
+ boolean pos = false, psize = false, fog = false;
+ int i, texs = 0, cols = 0;
+
+ struct tgsi_shader_info* info = &r300->fs->info;
+ memset(&vinfo, 0, sizeof(vinfo));
+
+ assert(info->num_inputs <= 16);
+
+ /* This is rather lame. Since draw_find_vs_output doesn't return an error
+ * when it can't find an output, we have to pre-iterate and count each
+ * output ourselves. */
+ for (i = 0; i < info->num_inputs; i++) {
+ switch (info->input_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ pos = true;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ cols++;
+ break;
+ case TGSI_SEMANTIC_FOG:
+ fog = true;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ psize = true;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ texs++;
+ break;
+ default:
+ debug_printf("r300: Unknown vertex input %d\n",
+ info->input_semantic_name[i]);
+ break;
+ }
+ }
+
+ /* Do the actual vertex_info setup.
+ *
+ * vertex_info has four uints of hardware-specific data in it.
+ * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL
+ * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM
+ * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0
+ * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */
+
+ vinfo.hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */
+
+ if (!pos) {
+ debug_printf("r300: Forcing vertex position attribute emit...\n");
+ }
+
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_POS,
+ draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0));
+ vinfo.hwfmt[1] |= R300_INPUT_CNTL_POS;
+ vinfo.hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
+
+ if (psize) {
+ draw_emit_vertex_attr(&vinfo, EMIT_1F_PSIZE, INTERP_LINEAR,
+ draw_find_vs_output(r300->draw, TGSI_SEMANTIC_PSIZE, 0));
+ vinfo.hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
+ }
+
+ for (i = 0; i < cols; i++) {
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR,
+ draw_find_vs_output(r300->draw, TGSI_SEMANTIC_COLOR, i));
+ vinfo.hwfmt[1] |= R300_INPUT_CNTL_COLOR;
+ vinfo.hwfmt[2] |= (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i);
+ }
+
+ if (fog) {
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE,
+ draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0));
+ vinfo.hwfmt[2] |=
+ (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << cols);
+ }
+
+ for (i = 0; i < texs; i++) {
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR,
+ draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i));
+ vinfo.hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i);
+ vinfo.hwfmt[3] |= (4 << (3 * i));
+ }
+
+ draw_compute_vertex_size(&vinfo);
+
+ if (memcmp(&r300->vertex_info, &vinfo, sizeof(struct vertex_info))) {
+ uint32_t temp;
+
+#define BORING_SWIZZLE \
+ ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \
+ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \
+ (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | \
+ (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | \
+ (0xf << R300_WRITE_ENA_SHIFT))
+
+ for (i = 0; i < vinfo.num_attribs; i++) {
+ temp = translate_vertex_data_type(vinfo.attrib[i].emit) |
+ R300_SIGNED;
+ if (i & 1) {
+ r300->vertex_info.vap_prog_stream_cntl[i >> 1] &= 0xffff0000;
+ r300->vertex_info.vap_prog_stream_cntl[i >> 1] |=
+ (translate_vertex_data_type(vinfo.attrib[i].emit) |
+ R300_SIGNED) << 16;
+ } else {
+ r300->vertex_info.vap_prog_stream_cntl[i >> 1] &= 0xffff;
+ r300->vertex_info.vap_prog_stream_cntl[i >> 1] |=
+ translate_vertex_data_type(vinfo.attrib[i].emit) |
+ R300_SIGNED;
+ }
+
+ r300->vertex_info.vap_prog_stream_cntl_ext[i >> 1] |=
+ (BORING_SWIZZLE << (i & 1 ? 16 : 0));
+ }
+ r300->vertex_info.vap_prog_stream_cntl[i >> 1] |= (R300_LAST_VEC <<
+ (i & 1 ? 16 : 0));
+
+ memcpy(&r300->vertex_info, &vinfo, sizeof(struct vertex_info));
+ r300->dirty_state |= R300_NEW_VERTEX_FORMAT;
+ }
+}
+
+/* Set up the RS block. This is the part of the chipset that actually does
+ * the rasterization of vertices into fragments. This is also the part of the
+ * chipset that locks up if any part of it is even slightly wrong. */
+void r300_update_rs_block(struct r300_context* r300)
+{
+}
+
+void r300_update_derived_state(struct r300_context* r300)
+{
+ if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) {
+ r300_update_vertex_layout(r300);
+ }
+
+ if (r300->dirty_state & R300_NEW_VERTEX_FORMAT) {
+ r300_update_rs_block(r300);
+ }
+}
diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h
new file mode 100644
index 0000000000..72ba6b928d
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_state_derived.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_STATE_DERIVED_H
+#define R300_STATE_DERIVED_H
+
+#include "draw/draw_vertex.h"
+
+#include "r300_context.h"
+#include "r300_reg.h"
+
+void r300_update_derived_state(struct r300_context* r300);
+
+#endif /* R300_STATE_DERIVED_H */
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
new file mode 100644
index 0000000000..005fb74ed6
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2009 Joakim Sindholt <opensource@zhasha.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_STATE_INLINES_H
+#define R300_STATE_INLINES_H
+
+#include "pipe/p_format.h"
+
+#include "r300_reg.h"
+
+static INLINE uint32_t r300_translate_colorformat(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return R300_COLOR_FORMAT_ARGB8888;
+ case PIPE_FORMAT_I8_UNORM:
+ return R300_COLOR_FORMAT_I8;
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ return R300_COLOR_FORMAT_ARGB1555;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ return R300_COLOR_FORMAT_RGB565;
+ /* XXX Not in pipe_format
+ case PIPE_FORMAT_A32R32G32B32:
+ return R300_COLOR_FORMAT_ARGB32323232;
+ case PIPE_FORMAT_A16R16G16B16:
+ return R300_COLOR_FORMAT_ARGB16161616; */
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ return R300_COLOR_FORMAT_ARGB4444;
+ /* XXX Not in pipe_format
+ case PIPE_FORMAT_A10R10G10B10_UNORM:
+ return R500_COLOR_FORMAT_ARGB10101010;
+ case PIPE_FORMAT_A2R10G10B10_UNORM:
+ return R500_COLOR_FORMAT_ARGB2101010;
+ case PIPE_FORMAT_I10_UNORM:
+ return R500_COLOR_FORMAT_I10; */
+ default:
+ debug_printf("r300: Implementation error: " \
+ "Got unsupported color format %s in %s\n",
+ pf_name(format), __FUNCTION__);
+ break;
+ }
+
+ return 0;
+}
+
+static INLINE uint32_t r300_translate_zsformat(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ return R300_DEPTHFORMAT_16BIT_INT_Z;
+ /* XXX R300_DEPTHFORMAT_16BIT_13E3 anyone? */
+ case PIPE_FORMAT_Z24S8_UNORM:
+ return R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
+ default:
+ debug_printf("r300: Implementation error: " \
+ "Got unsupported ZS format %s in %s\n",
+ pf_name(format), __FUNCTION__);
+ break;
+ }
+
+ return 0;
+}
+
+#endif /* R300_STATE_INLINES_H */
diff --git a/src/gallium/drivers/r300/r300_state_shader.c b/src/gallium/drivers/r300/r300_state_shader.c
new file mode 100644
index 0000000000..d10ac55580
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_state_shader.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_state_shader.h"
+
+static void r300_copy_passthrough_shader(struct r300_fragment_shader* fs)
+{
+ struct r300_fragment_shader* pt = &r300_passthrough_fragment_shader;
+ fs->shader.stack_size = pt->shader.stack_size;
+ fs->alu_instruction_count = pt->alu_instruction_count;
+ fs->tex_instruction_count = pt->tex_instruction_count;
+ fs->indirections = pt->indirections;
+ fs->instructions[0] = pt->instructions[0];
+}
+
+static void r500_copy_passthrough_shader(struct r500_fragment_shader* fs)
+{
+ struct r500_fragment_shader* pt = &r500_passthrough_fragment_shader;
+ fs->shader.stack_size = pt->shader.stack_size;
+ fs->instruction_count = pt->instruction_count;
+ fs->instructions[0] = pt->instructions[0];
+}
+
+void r300_translate_fragment_shader(struct r300_context* r300,
+ struct r300_fragment_shader* fs)
+{
+ r300_copy_passthrough_shader(fs);
+}
+
+void r500_translate_fragment_shader(struct r300_context* r300,
+ struct r500_fragment_shader* fs)
+{
+ r500_copy_passthrough_shader(fs);
+}
diff --git a/src/gallium/drivers/r300/r300_state_shader.h b/src/gallium/drivers/r300/r300_state_shader.h
new file mode 100644
index 0000000000..73025b2dcc
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_state_shader.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_STATE_SHADER_H
+#define R300_STATE_SHADER_H
+
+#include "r300_context.h"
+#include "r300_reg.h"
+#include "r300_screen.h"
+
+void r300_translate_fragment_shader(struct r300_context* r300,
+ struct r300_fragment_shader* fs);
+
+void r500_translate_fragment_shader(struct r300_context* r300,
+ struct r500_fragment_shader* fs);
+
+static const struct r300_fragment_shader r300_passthrough_fragment_shader = {
+ /* XXX This is the emission code. TODO: decode
+ OUT_CS_REG(R300_US_CONFIG, 0);
+ OUT_CS_REG(R300_US_CODE_OFFSET, 0x0);
+ OUT_CS_REG(R300_US_CODE_ADDR_0, 0x0);
+ OUT_CS_REG(R300_US_CODE_ADDR_1, 0x0);
+ OUT_CS_REG(R300_US_CODE_ADDR_2, 0x0);
+ OUT_CS_REG(R300_US_CODE_ADDR_3, 0x400000);
+*/
+ .alu_instruction_count = 1,
+ .tex_instruction_count = 0,
+ .indirections = 1,
+ .shader.stack_size = 2,
+
+ .instructions[0].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) |
+ R300_RGB_SWIZB(R300_ALU_ARGC_ONE) |
+ R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) |
+ R300_ALU_OUTC_MAD,
+ .instructions[0].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) |
+ R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ,
+ .instructions[0].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) |
+ R300_ALPHA_SWIZB(R300_ALU_ARGA_ONE) |
+ R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) |
+ R300_ALU_OUTA_MAD,
+ .instructions[0].alu_alpha_addr = R300_ALPHA_ADDR0(0) |
+ R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT,
+};
+
+static const struct r500_fragment_shader r500_passthrough_fragment_shader = {
+ .shader.stack_size = 0,
+ .instruction_count = 1,
+ .instructions[0].inst0 = R500_INST_TYPE_OUT |
+ R500_INST_TEX_SEM_WAIT | R500_INST_LAST |
+ R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK |
+ R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP,
+ .instructions[0].inst1 =
+ R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST |
+ R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST,
+ .instructions[0].inst2 =
+ R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST |
+ R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST,
+ .instructions[0].inst3 =
+ R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B,
+ .instructions[0].inst4 =
+ R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A,
+ .instructions[0].inst5 =
+ R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 |
+ R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 |
+ R500_ALU_RGBA_A_SWIZ_0,
+};
+
+#endif /* R300_STATE_SHADER_H */
diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c
new file mode 100644
index 0000000000..49e4a96f83
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_surface.c
@@ -0,0 +1,326 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Joakim Sindholt <opensource@zhasha.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_surface.h"
+
+/* Provides pipe_context's "surface_fill". Commonly used for clearing
+ * buffers. */
+static void r300_surface_fill(struct pipe_context* pipe,
+ struct pipe_surface* dest,
+ unsigned x, unsigned y,
+ unsigned w, unsigned h,
+ unsigned color)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ CS_LOCALS(r300);
+ struct r300_capabilities* caps = ((struct r300_screen*)pipe->screen)->caps;
+ struct r300_texture* tex = (struct r300_texture*)dest->texture;
+ int i;
+ float r, g, b, a;
+ unsigned pixpitch = tex->stride / tex->tex.block.size;
+ r = (float)((color >> 16) & 0xff) / 255.0f;
+ g = (float)((color >> 8) & 0xff) / 255.0f;
+ b = (float)((color >> 0) & 0xff) / 255.0f;
+ debug_printf("r300: Filling surface %p at (%d,%d),"
+ " dimensions %dx%d (pixel pitch %d), color 0x%x\n",
+ dest, x, y, w, h, pixpitch, color);
+
+ /* Fallback? */
+ /*if (0) {
+ debug_printf("r300: Falling back on surface clear...");
+ void* map = pipe->screen->surface_map(pipe->screen, dest,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+ pipe_fill_rect(map, &dest->block, &dest->stride, x, y, w, h, color);
+ pipe->screen->surface_unmap(pipe->screen, dest);
+ return;
+ }*/
+
+ BEGIN_CS(163 + (caps->is_r500 ? 22 : 14) + (caps->has_tcl ? 4 : 2));
+ /* Flush PVS. */
+ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
+
+ OUT_CS_REG(R300_SE_VTE_CNTL, R300_VPORT_X_SCALE_ENA |
+ R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
+ R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
+ R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT);
+ /* Vertex size. */
+ OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8);
+ /* Max and min vertex index clamp. */
+ OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, 0xFFFFFF);
+ OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0x0);
+ /* XXX endian */
+ OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VC_NO_SWAP);
+ OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, 0x0);
+ /* XXX magic number not in r300_reg */
+ OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0xAAAAAAAA);
+ OUT_CS_REG(R300_VAP_CLIP_CNTL, 0x0);
+ OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4);
+ OUT_CS_32F(1.0);
+ OUT_CS_32F(1.0);
+ OUT_CS_32F(1.0);
+ OUT_CS_32F(1.0);
+ /* XXX is this too long? */
+ OUT_CS_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xFFFF);
+ OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE |
+ R300_GB_LINE_STUFF_ENABLE | R300_GB_TRIANGLE_STUFF_ENABLE);
+ /* XXX more magic numbers */
+ OUT_CS_REG(R300_GB_MSPOS0, 0x66666666);
+ OUT_CS_REG(R300_GB_MSPOS1, 0x66666666);
+ /* XXX why doesn't classic Mesa write the number of pipes, too? */
+ OUT_CS_REG(R300_GB_TILE_CONFIG, R300_GB_TILE_ENABLE |
+ R300_GB_TILE_SIZE_16);
+ OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W);
+ OUT_CS_REG(R300_GB_AA_CONFIG, 0x0);
+ /* XXX point tex stuffing */
+ OUT_CS_REG_SEQ(R300_GA_POINT_S0, 1);
+ OUT_CS_32F(0.0);
+ OUT_CS_REG_SEQ(R300_GA_POINT_S1, 1);
+ OUT_CS_32F(1.0);
+ OUT_CS_REG(R300_GA_TRIANGLE_STIPPLE, 0x5 |
+ (0x5 << R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT));
+ /* XXX should this be related to the actual point size? */
+ OUT_CS_REG(R300_GA_POINT_MINMAX, 0x6 |
+ (0x1800 << R300_GA_POINT_MINMAX_MAX_SHIFT));
+ /* XXX this big chunk should be refactored into rs_state */
+ OUT_CS_REG(R300_GA_LINE_CNTL, 0x00030006);
+ OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, 0x3BAAAAAB);
+ OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, 0x00000000);
+ OUT_CS_REG(R300_GA_LINE_S0, 0x00000000);
+ OUT_CS_REG(R300_GA_LINE_S1, 0x3F800000);
+ OUT_CS_REG(R300_GA_ENHANCE, 0x00000002);
+ OUT_CS_REG(R300_GA_COLOR_CONTROL, 0x0003AAAA);
+ OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000);
+ OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000);
+ OUT_CS_REG(R300_GA_POLY_MODE, 0x00000000);
+ OUT_CS_REG(R300_GA_ROUND_MODE, 0x00000001);
+ OUT_CS_REG(R300_GA_OFFSET, 0x00000000);
+ OUT_CS_REG(R300_GA_FOG_SCALE, 0x3DBF1412);
+ OUT_CS_REG(R300_GA_FOG_OFFSET, 0x00000000);
+ OUT_CS_REG(R300_SU_TEX_WRAP, 0x00000000);
+ OUT_CS_REG(R300_SU_POLY_OFFSET_FRONT_SCALE, 0x00000000);
+ OUT_CS_REG(R300_SU_POLY_OFFSET_FRONT_OFFSET, 0x00000000);
+ OUT_CS_REG(R300_SU_POLY_OFFSET_BACK_SCALE, 0x00000000);
+ OUT_CS_REG(R300_SU_POLY_OFFSET_BACK_OFFSET, 0x00000000);
+ OUT_CS_REG(R300_SU_POLY_OFFSET_ENABLE, 0x00000000);
+ OUT_CS_REG(R300_SU_CULL_MODE, 0x00000000);
+ OUT_CS_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF);
+ OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0x00000000);
+ OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C);
+ OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525);
+ OUT_CS_REG(R300_FG_FOG_BLEND, 0x00000002);
+ OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x00000000);
+ OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x00000000);
+ OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x00000000);
+ OUT_CS_REG(R300_FG_DEPTH_SRC, 0x00000000);
+ OUT_CS_REG(R300_FG_DEPTH_SRC, 0x00000000);
+ OUT_CS_REG(R300_RB3D_CCTL, 0x00000000);
+ OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F);
+
+ /* XXX: Oh the wonderful unknown.
+ * Not writing these 8 regs seems to make no difference at all and seeing
+ * as how they're not documented, we should leave them out for now.
+ OUT_CS_REG_SEQ(0x4E54, 8);
+ for (i = 0; i < 8; i++) {
+ OUT_CS(0x00000000);
+ } */
+ OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000);
+ OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000);
+ OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF);
+ OUT_CS_REG(R300_ZB_FORMAT, 0x00000002);
+ OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, 0x00000003);
+ OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000);
+ OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000);
+ /* XXX Moar unknown that should probably be left out.
+ OUT_CS_REG(0x4F30, 0x00000000);
+ OUT_CS_REG(0x4F34, 0x00000000); */
+ OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000);
+ OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000);
+ if (caps->has_tcl) {
+ OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0,
+ (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) |
+ ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) |
+ R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT));
+ } else {
+ OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0,
+ (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) |
+ ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) |
+ R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT));
+ }
+ OUT_CS_REG(R300_FG_FOG_BLEND, 0x00000000);
+ OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0xF688F688);
+ OUT_CS_REG(R300_VAP_VTX_STATE_CNTL, 0x1);
+ OUT_CS_REG(R300_VAP_VSM_VTX_ASSM, 0x405);
+ OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F);
+ OUT_CS_REG(R300_VAP_VTX_SIZE, 0x00000008);
+ OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0xAAAAAAAA);
+ OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0, 0x00000003);
+ OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x00000000);
+ OUT_CS_REG(R300_TX_ENABLE, 0x0);
+ /* XXX viewport setup */
+ OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6);
+ OUT_CS_32F(1.0);
+ OUT_CS_32F((float)x);
+ OUT_CS_32F(1.0);
+ OUT_CS_32F((float)y);
+ OUT_CS_32F(1.0);
+ OUT_CS_32F(0.0);
+
+ if (caps->has_tcl) {
+ OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE |
+ R300_PS_UCP_MODE_CLIP_AS_TRIFAN);
+ }
+
+ /* The size of the point we're about to draw, in sixths of pixels */
+ OUT_CS_REG(R300_GA_POINT_SIZE,
+ ((h * 6) & R300_POINTSIZE_Y_MASK) |
+ ((w * 6) << R300_POINTSIZE_X_SHIFT));
+
+ /* XXX */
+ OUT_CS_REG(R300_SC_CLIP_RULE, 0xaaaa);
+
+ /* Pixel scissors */
+ OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
+ OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT));
+ OUT_CS((w << R300_SCISSORS_X_SHIFT) | (h << R300_SCISSORS_Y_SHIFT));
+
+ /* RS block setup */
+ if (caps->is_r500) {
+ /* XXX We seem to be in disagreement about how many of these we have
+ * RS:RS_IP_[0-15] [R/W] 32 bits Access: 8/16/32 MMReg:0x4074-0x40b0
+ * Now that's from the docs. I don't care what the mesa driver says */
+ OUT_CS_REG_SEQ(R500_RS_IP_0, 16);
+ for (i = 0; i < 16; i++) {
+ OUT_CS((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
+ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
+ }
+ OUT_CS_REG_SEQ(R300_RS_COUNT, 2);
+ OUT_CS((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
+ OUT_CS(0x00000000);
+ OUT_CS_REG(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE);
+ } else {
+ OUT_CS_REG_SEQ(R300_RS_IP_0, 8);
+ for (i = 0; i < 8; i++) {
+ OUT_CS(R300_RS_SEL_T(R300_RS_SEL_K0) |
+ R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1));
+ }
+ OUT_CS_REG_SEQ(R300_RS_COUNT, 2);
+ OUT_CS((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
+ /* XXX Shouldn't this be 0? */
+ OUT_CS(1);
+ OUT_CS_REG(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE);
+ }
+ END_CS;
+
+ /* Fragment shader setup */
+ if (caps->is_r500) {
+ r500_emit_fragment_shader(r300, &r500_passthrough_fragment_shader);
+ } else {
+ r300_emit_fragment_shader(r300, &r300_passthrough_fragment_shader);
+ }
+
+ BEGIN_CS(8 + (caps->has_tcl ? 20 : 2));
+ OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4);
+ OUT_CS(R300_C0_SEL_B | R300_C1_SEL_G | R300_C2_SEL_R | R300_C3_SEL_A);
+ OUT_CS(R300_US_OUT_FMT_UNUSED);
+ OUT_CS(R300_US_OUT_FMT_UNUSED);
+ OUT_CS(R300_US_OUT_FMT_UNUSED);
+ OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W0);
+ /* XXX these magic numbers should be explained when
+ * this becomes a cached state object */
+ if (caps->has_tcl) {
+ OUT_CS_REG(R300_VAP_CNTL, 0xA |
+ (0x5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+ (0xB << R300_VF_MAX_VTX_NUM_SHIFT) |
+ (caps->num_vert_fpus << R300_PVS_NUM_FPUS_SHIFT));
+ OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_0, 0x00100000);
+ OUT_CS_REG(R300_VAP_PVS_CONST_CNTL, 0x00000000);
+ OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_1, 0x00000001);
+ /* XXX translate these back into normal instructions */
+ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x1);
+ OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0x0);
+ OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 8);
+ OUT_CS(0x00F00203);
+ OUT_CS(0x00D10001);
+ OUT_CS(0x01248001);
+ OUT_CS(0x00000000);
+ OUT_CS(0x00F02203);
+ OUT_CS(0x00D10021);
+ OUT_CS(0x01248021);
+ OUT_CS(0x00000000);
+ } else {
+ OUT_CS_REG(R300_VAP_CNTL, 0xA |
+ (0x5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+ (0x5 << R300_VF_MAX_VTX_NUM_SHIFT) |
+ (caps->num_vert_fpus << R300_PVS_NUM_FPUS_SHIFT));
+ }
+ END_CS;
+
+ r300_emit_blend_state(r300, &blend_clear_state);
+ r300_emit_blend_color_state(r300, &blend_color_clear_state);
+ r300_emit_dsa_state(r300, &dsa_clear_state);
+
+ BEGIN_CS(24);
+ /* Flush colorbuffer and blend caches. */
+ OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D |
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL);
+ OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT,
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+
+ OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1);
+ OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_CS_REG(R300_RB3D_COLORPITCH0, pixpitch |
+ r300_translate_colorformat(tex->tex.format));
+ OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F);
+ /* XXX Packet3 */
+ OUT_CS(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
+ OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
+ (1 << R300_PRIM_NUM_VERTICES_SHIFT));
+ OUT_CS_32F(w / 2.0);
+ OUT_CS_32F(h / 2.0);
+ /* XXX this should be the depth value to clear to */
+ OUT_CS_32F(1.0);
+ OUT_CS_32F(1.0);
+ OUT_CS_32F(r);
+ OUT_CS_32F(g);
+ OUT_CS_32F(b);
+ OUT_CS_32F(1.0);
+
+ /* XXX figure out why this is 0xA and not 0x2 */
+ OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA);
+ /* XXX OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT,
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); */
+
+ END_CS;
+
+ r300->dirty_hw++;
+}
+
+void r300_init_surface_functions(struct r300_context* r300)
+{
+ r300->context.surface_fill = r300_surface_fill;
+}
diff --git a/src/gallium/drivers/r300/r300_surface.h b/src/gallium/drivers/r300/r300_surface.h
new file mode 100644
index 0000000000..442eac2cf2
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_surface.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_SURFACE_H
+#define R300_SURFACE_H
+
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+
+#include "util/u_rect.h"
+
+#include "r300_context.h"
+#include "r300_cs.h"
+#include "r300_emit.h"
+#include "r300_state_shader.h"
+#include "r300_state_inlines.h"
+
+const struct r300_blend_state blend_clear_state = {
+ .blend_control = 0x0,
+ .alpha_blend_control = 0x0,
+ .rop = 0x0,
+ .dither = 0x0,
+};
+
+const struct r300_blend_color_state blend_color_clear_state = {
+ .blend_color = 0x0,
+ .blend_color_red_alpha = 0x0,
+ .blend_color_green_blue = 0x0,
+};
+
+const struct r300_dsa_state dsa_clear_state = {
+ .alpha_function = 0x0,
+ .alpha_reference = 0x0,
+ .z_buffer_control = 0x0,
+ .z_stencil_control = 0x0,
+ .stencil_ref_mask = R300_STENCILWRITEMASK_MASK,
+ .z_buffer_top = R300_ZTOP_ENABLE,
+ .stencil_ref_bf = 0x0,
+};
+
+#endif /* R300_SURFACE_H */
diff --git a/src/gallium/drivers/r300/r300_swtcl_emit.c b/src/gallium/drivers/r300/r300_swtcl_emit.c
new file mode 100644
index 0000000000..5b028aaf7b
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_swtcl_emit.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "draw/draw_pipe.h"
+#include "draw/draw_vbuf.h"
+#include "util/u_memory.h"
+
+#include "r300_cs.h"
+#include "r300_context.h"
+#include "r300_reg.h"
+
+/* r300_swtcl_emit: Vertex and index buffer primitive emission. No HW TCL. */
+
+struct r300_swtcl_render {
+ /* Parent class */
+ struct vbuf_render base;
+
+ /* Pipe context */
+ struct r300_context* r300;
+
+ /* Vertex information */
+ size_t vertex_size;
+ unsigned prim;
+ unsigned hwprim;
+
+ /* VBO */
+ struct pipe_buffer* vbo;
+ size_t vbo_size;
+ size_t vbo_offset;
+ void* vbo_map;
+ size_t vbo_alloc_size;
+ size_t vbo_max_used;
+};
+
+static INLINE struct r300_swtcl_render*
+r300_swtcl_render(struct vbuf_render* render)
+{
+ return (struct r300_swtcl_render*)render;
+}
+
+static const struct vertex_info*
+r300_swtcl_render_get_vertex_info(struct vbuf_render* render)
+{
+ struct r300_swtcl_render* r300render = r300_swtcl_render(render);
+ struct r300_context* r300 = r300render->r300;
+
+ r300_update_derived_state(r300);
+
+ return &r300->vertex_info;
+}
+
+static boolean r300_swtcl_render_allocate_vertices(struct vbuf_render* render,
+ ushort vertex_size,
+ ushort count)
+{
+ struct r300_swtcl_render* r300render = r300_swtcl_render(render);
+ struct r300_context* r300 = r300render->r300;
+ struct pipe_screen* screen = r300->context.screen;
+ size_t size = (size_t)vertex_size * (size_t)count;
+
+ if (r300render->vbo) {
+ pipe_buffer_reference(screen, &r300render->vbo, NULL);
+ }
+
+ r300render->vbo_size = MAX2(size, r300render->vbo_alloc_size);
+ r300render->vbo_offset = 0;
+ r300render->vbo = pipe_buffer_create(screen,
+ 64,
+ PIPE_BUFFER_USAGE_VERTEX,
+ r300render->vbo_size);
+
+ r300render->vertex_size = vertex_size;
+
+ if (r300render->vbo) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static void* r300_swtcl_render_map_vertices(struct vbuf_render* render)
+{
+ struct r300_swtcl_render* r300render = r300_swtcl_render(render);
+ struct pipe_screen* screen = r300render->r300->context.screen;
+
+ r300render->vbo_map = pipe_buffer_map(screen, r300render->vbo,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+
+ return (unsigned char*)r300render->vbo_map + r300render->vbo_offset;
+}
+
+static void r300_swtcl_render_unmap_vertices(struct vbuf_render* render,
+ ushort min,
+ ushort max)
+{
+ struct r300_swtcl_render* r300render = r300_swtcl_render(render);
+ struct pipe_screen* screen = r300render->r300->context.screen;
+
+ r300render->vbo_max_used = MAX2(r300render->vbo_max_used,
+ r300render->vertex_size * (max + 1));
+
+ pipe_buffer_unmap(screen, r300render->vbo);
+}
+
+static void r300_swtcl_render_release_vertices(struct vbuf_render* render)
+{
+ struct r300_swtcl_render* r300render = r300_swtcl_render(render);
+ struct pipe_screen* screen = r300render->r300->context.screen;
+
+ pipe_buffer_reference(screen, &r300render->vbo, NULL);
+}
+
+static boolean r300_swtcl_render_set_primitive(struct vbuf_render* render,
+ unsigned prim)
+{
+ struct r300_swtcl_render* r300render = r300_swtcl_render(render);
+ r300render->prim = prim;
+
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ r300render->hwprim = R300_VAP_VF_CNTL__PRIM_POINTS;
+ break;
+ case PIPE_PRIM_LINES:
+ r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINES;
+ break;
+ case PIPE_PRIM_LINE_LOOP:
+ r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
+ break;
+ case PIPE_PRIM_LINE_STRIP:
+ r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLES;
+ break;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
+ break;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
+ break;
+ case PIPE_PRIM_QUADS:
+ r300render->hwprim = R300_VAP_VF_CNTL__PRIM_QUADS;
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ r300render->hwprim = R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
+ break;
+ case PIPE_PRIM_POLYGON:
+ r300render->hwprim = R300_VAP_VF_CNTL__PRIM_POLYGON;
+ break;
+ default:
+ return false;
+ break;
+ }
+
+ return true;
+}
+
+static void prepare_render(struct r300_swtcl_render* render)
+{
+ struct r300_context* r300 = render->r300;
+ int i;
+
+ CS_LOCALS(r300);
+
+ /* Make sure that all possible state is emitted. */
+ r300_emit_dirty_state(r300);
+
+ debug_printf("r300: Preparing vertex buffer %p for render, "
+ "vertex size %d, vertex count %d\n", render->vbo,
+ r300->vertex_info.vinfo.size, render->vbo_size);
+ /* Set the pointer to our vertex buffer. The emitted values are this:
+ * PACKET3 [3D_LOAD_VBPNTR]
+ * COUNT [1]
+ * FORMAT [size | stride << 8]
+ * VBPNTR [relocated BO]
+ */
+ BEGIN_CS(5);
+ OUT_CS(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 3));
+ OUT_CS(1);
+ OUT_CS(r300->vertex_info.vinfo.size | (r300->vertex_info.vinfo.size << 8));
+ OUT_CS_RELOC(render->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_CS;
+}
+
+static void r300_swtcl_render_draw_arrays(struct vbuf_render* render,
+ unsigned start,
+ unsigned count)
+{
+ struct r300_swtcl_render* r300render = r300_swtcl_render(render);
+ struct r300_context* r300 = r300render->r300;
+ struct pipe_screen* screen = r300->context.screen;
+
+ CS_LOCALS(r300);
+
+ prepare_render(r300render);
+
+ debug_printf("r300: Doing vbuf render, count %d\n", count);
+
+ BEGIN_CS(2);
+ OUT_CS(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0));
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
+ r300render->hwprim | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
+ END_CS;
+}
+
+static void r300_swtcl_render_draw(struct vbuf_render* render,
+ const ushort* indices,
+ uint count)
+{
+ struct r300_swtcl_render* r300render = r300_swtcl_render(render);
+ struct r300_context* r300 = r300render->r300;
+ struct pipe_screen* screen = r300->context.screen;
+ struct pipe_buffer* index_buffer;
+ void* index_map;
+
+ CS_LOCALS(r300);
+
+ prepare_render(r300render);
+
+ /* Send our indices into an index buffer. */
+ index_buffer = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX,
+ count * 4);
+ if (!index_buffer) {
+ return;
+ }
+
+ index_map = pipe_buffer_map(screen, index_buffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+ memcpy(index_map, indices, count * 4);
+ pipe_buffer_unmap(screen, index_buffer);
+
+ debug_printf("r300: Doing indexbuf render, count %d\n", count);
+
+ BEGIN_CS(5);
+ OUT_CS(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0));
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
+ r300render->hwprim | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
+
+ OUT_CS(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2));
+ OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2));
+ OUT_CS_RELOC(index_buffer, 0, RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_CS;
+}
+
+static void r300_swtcl_render_destroy(struct vbuf_render* render)
+{
+ FREE(render);
+}
+
+static struct vbuf_render* r300_swtcl_render_create(struct r300_context* r300)
+{
+ struct r300_swtcl_render* r300render = CALLOC_STRUCT(r300_swtcl_render);
+ struct pipe_screen* screen = r300->context.screen;
+
+ r300render->r300 = r300;
+
+ /* XXX find real numbers plz */
+ r300render->base.max_vertex_buffer_bytes = 128 * 1024;
+ r300render->base.max_indices = 16 * 1024;
+
+ r300render->base.get_vertex_info = r300_swtcl_render_get_vertex_info;
+ r300render->base.allocate_vertices = r300_swtcl_render_allocate_vertices;
+ r300render->base.map_vertices = r300_swtcl_render_map_vertices;
+ r300render->base.unmap_vertices = r300_swtcl_render_unmap_vertices;
+ r300render->base.set_primitive = r300_swtcl_render_set_primitive;
+ r300render->base.draw = r300_swtcl_render_draw;
+ r300render->base.draw_arrays = r300_swtcl_render_draw_arrays;
+ r300render->base.release_vertices = r300_swtcl_render_release_vertices;
+ r300render->base.destroy = r300_swtcl_render_destroy;
+
+ /* XXX bonghits ahead
+ r300render->vbo_alloc_size = 128 * 4096;
+ r300render->vbo_size = r300render->vbo_alloc_size;
+ r300render->vbo_offset = 0;
+ r300render->vbo = pipe_buffer_create(screen,
+ 64,
+ PIPE_BUFFER_USAGE_VERTEX,
+ r300render->vbo_size);
+ r300render->vbo_map = pipe_buffer_map(screen,
+ r300render->vbo,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+ pipe_buffer_unmap(screen, r300render->vbo); */
+
+ return &r300render->base;
+}
+
+struct draw_stage* r300_draw_swtcl_stage(struct r300_context* r300)
+{
+ struct vbuf_render* render;
+ struct draw_stage* stage;
+
+ render = r300_swtcl_render_create(r300);
+
+ if (!render) {
+ return NULL;
+ }
+
+ stage = draw_vbuf_stage(r300->draw, render);
+
+ if (!stage) {
+ render->destroy(render);
+ return NULL;
+ }
+
+ draw_set_render(r300->draw, render);
+
+ return stage;
+}
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
new file mode 100644
index 0000000000..edd4370663
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_texture.h"
+
+static int minify(int i)
+{
+ return MAX2(1, i >> 1);
+}
+
+static void r300_setup_miptree(struct r300_texture* tex)
+{
+ struct pipe_texture* base = &tex->tex;
+ int stride, size, offset;
+
+ for (int i = 0; i <= base->last_level; i++) {
+ if (i > 0) {
+ base->width[i] = minify(base->width[i-1]);
+ base->height[i] = minify(base->height[i-1]);
+ base->depth[i] = minify(base->depth[i-1]);
+ }
+
+ base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]);
+ base->nblocksy[i] = pf_get_nblocksy(&base->block, base->width[i]);
+
+ /* Radeons enjoy things in multiples of 32. */
+ /* XXX NPOT -> 64, not 32 */
+ stride = (base->nblocksx[i] * base->block.size + 63) & ~63;
+ size = stride * base->nblocksy[i] * base->depth[i];
+
+ /* XXX 64 for NPOT */
+ tex->offset[i] = (tex->size + 63) & ~63;
+ tex->size = tex->offset[i] + size;
+ }
+}
+
+/* Create a new texture. */
+static struct pipe_texture*
+ r300_texture_create(struct pipe_screen* screen,
+ const struct pipe_texture* template)
+{
+ /* XXX struct r300_screen* r300screen = r300_screen(screen); */
+
+ struct r300_texture* tex = CALLOC_STRUCT(r300_texture);
+
+ if (!tex) {
+ return NULL;
+ }
+
+ tex->tex = *template;
+ tex->tex.refcount = 1;
+ tex->tex.screen = screen;
+
+ r300_setup_miptree(tex);
+
+ tex->buffer = screen->buffer_create(screen, 64,
+ PIPE_BUFFER_USAGE_PIXEL,
+ tex->size);
+
+ if (!tex->buffer) {
+ FREE(tex);
+ return NULL;
+ }
+
+ return (struct pipe_texture*)tex;
+}
+
+static void r300_texture_release(struct pipe_screen* screen,
+ struct pipe_texture** texture)
+{
+ if (!*texture) {
+ return;
+ }
+
+ (*texture)->refcount--;
+
+ if ((*texture)->refcount <= 0) {
+ struct r300_texture* tex = (struct r300_texture*)*texture;
+
+ pipe_buffer_reference(screen, &tex->buffer, NULL);
+
+ FREE(tex);
+ }
+
+ *texture = NULL;
+}
+
+static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
+ struct pipe_texture* texture,
+ unsigned face,
+ unsigned level,
+ unsigned zslice,
+ unsigned flags)
+{
+ struct r300_texture* tex = (struct r300_texture*)texture;
+ struct pipe_surface* surface = CALLOC_STRUCT(pipe_surface);
+ unsigned offset;
+
+ /* XXX this is certainly dependent on tex target */
+ offset = tex->offset[level];
+
+ if (surface) {
+ surface->refcount = 1;
+ pipe_texture_reference(&surface->texture, texture);
+ surface->format = texture->format;
+ surface->width = texture->width[level];
+ surface->height = texture->height[level];
+ surface->offset = offset;
+ surface->usage = flags;
+ surface->status = PIPE_SURFACE_STATUS_DEFINED;
+ }
+
+ return surface;
+}
+
+static void r300_tex_surface_release(struct pipe_screen* screen,
+ struct pipe_surface** surface)
+{
+ struct pipe_surface* s = *surface;
+
+ s->refcount--;
+
+ if (s->refcount <= 0) {
+ pipe_texture_reference(&s->texture, NULL);
+ FREE(s);
+ }
+
+ *surface = NULL;
+}
+
+static struct pipe_texture*
+ r300_texture_blanket(struct pipe_screen* screen,
+ const struct pipe_texture* base,
+ const unsigned* stride,
+ struct pipe_buffer* buffer)
+{
+ struct r300_texture* tex;
+
+ if (base->target != PIPE_TEXTURE_2D ||
+ base->last_level != 0 ||
+ base->depth[0] != 1) {
+ return NULL;
+ }
+
+ tex = CALLOC_STRUCT(r300_texture);
+ if (!tex) {
+ return NULL;
+ }
+
+ tex->tex = *base;
+ tex->tex.refcount = 1;
+ tex->tex.screen = screen;
+
+ tex->stride = *stride;
+
+ pipe_buffer_reference(screen, &tex->buffer, buffer);
+
+ return (struct pipe_texture*)tex;
+}
+
+void r300_init_screen_texture_functions(struct pipe_screen* screen)
+{
+ screen->texture_create = r300_texture_create;
+ screen->texture_release = r300_texture_release;
+ screen->get_tex_surface = r300_get_tex_surface;
+ screen->tex_surface_release = r300_tex_surface_release;
+ screen->texture_blanket = r300_texture_blanket;
+}
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
new file mode 100644
index 0000000000..7964229a94
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_TEXTURE_H
+#define R300_TEXTURE_H
+
+#include "pipe/p_screen.h"
+
+#include "util/u_math.h"
+
+#include "r300_context.h"
+
+void r300_init_screen_texture_functions(struct pipe_screen* screen);
+
+#endif /* R300_TEXTURE_H */
diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h
new file mode 100644
index 0000000000..5a3a212892
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_winsys.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_WINSYS_H
+#define R300_WINSYS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The public interface header for the r300 pipe driver.
+ * Any winsys hosting this pipe needs to implement r300_winsys and then
+ * call r300_create_context to start things. */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+struct radeon_cs;
+
+struct r300_winsys {
+
+ /* PCI ID */
+ uint32_t pci_id;
+
+ /* GB pipe count */
+ uint32_t gb_pipes;
+
+ /* CS object. This is very much like Intel's batchbuffer.
+ * Fill it full of dwords and relocs and then submit.
+ * Repeat as needed. */
+ /* Note: Unlike Mesa's version of this, we don't keep a copy of the CSM
+ * that was used to create this CS. Is this a good idea? */
+ /* Note: The pipe driver doesn't know how to use this. This is purely
+ * for the winsys. */
+ struct radeon_cs* cs;
+
+ /* Check to see if there's room for commands. */
+ boolean (*check_cs)(struct radeon_cs* cs, int size);
+
+ /* Start a command emit. */
+ void (*begin_cs)(struct radeon_cs* cs,
+ int size,
+ const char* file,
+ const char* function,
+ int line);
+
+ /* Write a dword to the command buffer. */
+ void (*write_cs_dword)(struct radeon_cs* cs, uint32_t dword);
+
+ /* Write a relocated dword to the command buffer. */
+ void (*write_cs_reloc)(struct radeon_cs* cs,
+ struct pipe_buffer* bo,
+ uint32_t rd,
+ uint32_t wd,
+ uint32_t flags);
+
+ /* Finish a command emit. */
+ void (*end_cs)(struct radeon_cs* cs,
+ const char* file,
+ const char* function,
+ int line);
+
+ /* Flush the CS. */
+ void (*flush_cs)(struct radeon_cs* cs);
+};
+
+struct pipe_context* r300_create_context(struct pipe_screen* screen,
+ struct pipe_winsys* winsys,
+ struct r300_winsys* r300_winsys);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* R300_WINSYS_H */
diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile
index 120bdfd9dd..516e3992fd 100644
--- a/src/gallium/drivers/softpipe/Makefile
+++ b/src/gallium/drivers/softpipe/Makefile
@@ -14,7 +14,7 @@ C_SOURCES = \
sp_draw_arrays.c \
sp_prim_setup.c \
sp_prim_vbuf.c \
- sp_quad.c \
+ sp_quad_pipe.c \
sp_quad_alpha_test.c \
sp_quad_blend.c \
sp_quad_colormask.c \
@@ -42,6 +42,3 @@ C_SOURCES = \
sp_surface.c
include ../../Makefile.template
-
-symlinks:
-
diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript
index c1f7daa8ab..f8720638a7 100644
--- a/src/gallium/drivers/softpipe/SConscript
+++ b/src/gallium/drivers/softpipe/SConscript
@@ -17,7 +17,7 @@ softpipe = env.ConvenienceLibrary(
'sp_setup.c',
'sp_quad_alpha_test.c',
'sp_quad_blend.c',
- 'sp_quad.c',
+ 'sp_quad_pipe.c',
'sp_quad_colormask.c',
'sp_quad_coverage.c',
'sp_quad_depth_test.c',
diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c
index dfa46c9fb7..ad108ec446 100644
--- a/src/gallium/drivers/softpipe/sp_clear.c
+++ b/src/gallium/drivers/softpipe/sp_clear.c
@@ -85,7 +85,7 @@ softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps,
#endif
}
- for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) {
+ for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
if (ps == sp_tile_cache_get_surface(softpipe->cbuf_cache[i])) {
unsigned cv;
if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) {
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index cd1e6663d8..ff5d1b54a4 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -2,6 +2,7 @@
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
+ * Copyright 2008 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -52,15 +53,15 @@
* Map any drawing surfaces which aren't already mapped
*/
void
-softpipe_map_surfaces(struct softpipe_context *sp)
+softpipe_map_transfers(struct softpipe_context *sp)
{
unsigned i;
- for (i = 0; i < sp->framebuffer.num_cbufs; i++) {
- sp_tile_cache_map_surfaces(sp->cbuf_cache[i]);
+ for (i = 0; i < sp->framebuffer.nr_cbufs; i++) {
+ sp_tile_cache_map_transfers(sp->cbuf_cache[i]);
}
- sp_tile_cache_map_surfaces(sp->zsbuf_cache);
+ sp_tile_cache_map_transfers(sp->zsbuf_cache);
}
@@ -68,25 +69,25 @@ softpipe_map_surfaces(struct softpipe_context *sp)
* Unmap any mapped drawing surfaces
*/
void
-softpipe_unmap_surfaces(struct softpipe_context *sp)
+softpipe_unmap_transfers(struct softpipe_context *sp)
{
uint i;
- for (i = 0; i < sp->framebuffer.num_cbufs; i++)
+ for (i = 0; i < sp->framebuffer.nr_cbufs; i++)
sp_flush_tile_cache(sp, sp->cbuf_cache[i]);
sp_flush_tile_cache(sp, sp->zsbuf_cache);
- for (i = 0; i < sp->framebuffer.num_cbufs; i++) {
- sp_tile_cache_unmap_surfaces(sp->cbuf_cache[i]);
+ for (i = 0; i < sp->framebuffer.nr_cbufs; i++) {
+ sp_tile_cache_unmap_transfers(sp->cbuf_cache[i]);
}
- sp_tile_cache_unmap_surfaces(sp->zsbuf_cache);
+ sp_tile_cache_unmap_transfers(sp->zsbuf_cache);
}
static void softpipe_destroy( struct pipe_context *pipe )
{
struct softpipe_context *softpipe = softpipe_context( pipe );
- struct pipe_winsys *ws = pipe->winsys;
+ struct pipe_screen *screen = pipe->screen;
uint i;
if (softpipe->draw)
@@ -115,7 +116,7 @@ static void softpipe_destroy( struct pipe_context *pipe )
for (i = 0; i < Elements(softpipe->constants); i++) {
if (softpipe->constants[i].buffer) {
- winsys_buffer_reference(ws, &softpipe->constants[i].buffer, NULL);
+ pipe_buffer_reference(screen, &softpipe->constants[i].buffer, NULL);
}
}
@@ -221,6 +222,24 @@ softpipe_create( struct pipe_screen *screen,
softpipe->quad[i].output = sp_quad_output_stage(softpipe);
}
+ /* vertex shader samplers */
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ softpipe->tgsi.vert_samplers[i].base.get_samples = sp_get_samples_vertex;
+ softpipe->tgsi.vert_samplers[i].unit = i;
+ softpipe->tgsi.vert_samplers[i].sp = softpipe;
+ softpipe->tgsi.vert_samplers[i].cache = softpipe->tex_cache[i];
+ softpipe->tgsi.vert_samplers_list[i] = &softpipe->tgsi.vert_samplers[i];
+ }
+
+ /* fragment shader samplers */
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ softpipe->tgsi.frag_samplers[i].base.get_samples = sp_get_samples_fragment;
+ softpipe->tgsi.frag_samplers[i].unit = i;
+ softpipe->tgsi.frag_samplers[i].sp = softpipe;
+ softpipe->tgsi.frag_samplers[i].cache = softpipe->tex_cache[i];
+ softpipe->tgsi.frag_samplers_list[i] = &softpipe->tgsi.frag_samplers[i];
+ }
+
/*
* Create drawing context and plug our rendering stage into it.
*/
@@ -228,6 +247,11 @@ softpipe_create( struct pipe_screen *screen,
if (!softpipe->draw)
goto fail;
+ draw_texture_samplers(softpipe->draw,
+ PIPE_MAX_SAMPLERS,
+ (struct tgsi_sampler **)
+ softpipe->tgsi.vert_samplers_list);
+
softpipe->setup = sp_draw_render_stage(softpipe);
if (!softpipe->setup)
goto fail;
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 2b9a2a8ee5..59d6df8f2d 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -32,11 +32,11 @@
#define SP_CONTEXT_H
#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
#include "draw/draw_vertex.h"
-#include "sp_quad.h"
+#include "sp_quad_pipe.h"
+#include "sp_tex_sample.h"
/**
@@ -50,7 +50,6 @@
*/
#define SP_NUM_QUAD_THREADS 1
-struct softpipe_winsys;
struct softpipe_vbuf_render;
struct draw_context;
struct draw_stage;
@@ -62,15 +61,15 @@ struct sp_vertex_shader;
struct softpipe_context {
struct pipe_context pipe; /**< base class */
- /* The most recent drawing state as set by the driver:
- */
- const struct pipe_blend_state *blend;
+ /** Constant state objects */
+ const struct pipe_blend_state *blend;
const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
- const struct pipe_depth_stencil_alpha_state *depth_stencil;
+ const struct pipe_depth_stencil_alpha_state *depth_stencil;
const struct pipe_rasterizer_state *rasterizer;
const struct sp_fragment_shader *fs;
const struct sp_vertex_shader *vs;
+ /** Other rendering state */
struct pipe_blend_color blend_color;
struct pipe_clip_state clip;
struct pipe_constant_buffer constants[PIPE_SHADER_TYPES];
@@ -81,23 +80,20 @@ struct softpipe_context {
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
- unsigned dirty;
unsigned num_samplers;
unsigned num_textures;
unsigned num_vertex_elements;
unsigned num_vertex_buffers;
- boolean no_rast;
+ unsigned dirty; /**< Mask of SP_NEW_x flags */
/* Counter for occlusion queries. Note this supports overlapping
* queries.
*/
- uint64 occlusion_count;
+ uint64_t occlusion_count;
- /*
- * Mapped vertex buffers
- */
+ /** Mapped vertex buffers */
ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS];
/** Mapped constant buffers */
@@ -107,16 +103,11 @@ struct softpipe_context {
struct vertex_info vertex_info;
struct vertex_info vertex_info_vbuf;
+ /** Which vertex shader output slot contains point size */
int psize_slot;
unsigned reduced_api_prim; /**< PIPE_PRIM_POINTS, _LINES or _TRIANGLES */
-#if 0
- /* Stipple derived state:
- */
- ubyte stipple_masks[16][16];
-#endif
-
/** Derived from scissor and surface bounds: */
struct pipe_scissor_state cliprect;
@@ -139,6 +130,14 @@ struct softpipe_context {
struct quad_stage *first; /**< points to one of the above stages */
} quad[SP_NUM_QUAD_THREADS];
+ /** TGSI exec things */
+ struct {
+ struct sp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS];
+ struct sp_shader_sampler *vert_samplers_list[PIPE_MAX_SAMPLERS];
+ struct sp_shader_sampler frag_samplers[PIPE_MAX_SAMPLERS];
+ struct sp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS];
+ } tgsi;
+
/** The primitive drawing context */
struct draw_context *draw;
struct draw_stage *setup;
@@ -150,8 +149,9 @@ struct softpipe_context {
struct softpipe_tile_cache *tex_cache[PIPE_MAX_SAMPLERS];
- int use_sse : 1;
- int dump_fs : 1;
+ unsigned use_sse : 1;
+ unsigned dump_fs : 1;
+ unsigned no_rast : 1;
};
diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c
index 424bd56846..f117096bf7 100644
--- a/src/gallium/drivers/softpipe/sp_draw_arrays.c
+++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c
@@ -33,7 +33,7 @@
#include "pipe/p_defines.h"
#include "pipe/p_context.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_inlines.h"
#include "sp_context.h"
@@ -47,16 +47,22 @@ static void
softpipe_map_constant_buffers(struct softpipe_context *sp)
{
struct pipe_winsys *ws = sp->pipe.winsys;
- uint i;
+ uint i, size;
+
for (i = 0; i < PIPE_SHADER_TYPES; i++) {
- if (sp->constants[i].size)
+ if (sp->constants[i].buffer && sp->constants[i].buffer->size)
sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer,
PIPE_BUFFER_USAGE_CPU_READ);
}
+ if (sp->constants[PIPE_SHADER_VERTEX].buffer)
+ size = sp->constants[PIPE_SHADER_VERTEX].buffer->size;
+ else
+ size = 0;
+
draw_set_mapped_constant_buffer(sp->draw,
sp->mapped_constants[PIPE_SHADER_VERTEX],
- sp->constants[PIPE_SHADER_VERTEX].size);
+ size);
}
static void
@@ -73,7 +79,7 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp)
draw_set_mapped_constant_buffer(sp->draw, NULL, 0);
for (i = 0; i < 2; i++) {
- if (sp->constants[i].size)
+ if (sp->constants[i].buffer && sp->constants[i].buffer->size)
ws->buffer_unmap(ws, sp->constants[i].buffer);
sp->mapped_constants[i] = NULL;
}
@@ -128,7 +134,7 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
if (sp->dirty)
softpipe_update_derived( sp );
- softpipe_map_surfaces(sp);
+ softpipe_map_transfers(sp);
softpipe_map_constant_buffers(sp);
/*
diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c
index 401764bb43..035f4b963e 100644
--- a/src/gallium/drivers/softpipe/sp_flush.c
+++ b/src/gallium/drivers/softpipe/sp_flush.c
@@ -57,7 +57,7 @@ softpipe_flush( struct pipe_context *pipe,
}
if (flags & PIPE_FLUSH_RENDER_CACHE) {
- for (i = 0; i < softpipe->framebuffer.num_cbufs; i++)
+ for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++)
if (softpipe->cbuf_cache[i])
sp_flush_tile_cache(softpipe, softpipe->cbuf_cache[i]);
@@ -70,7 +70,7 @@ softpipe_flush( struct pipe_context *pipe,
* that's called before swapbuffers because we don't always want
* to unmap surfaces when flushing.
*/
- softpipe_unmap_surfaces(softpipe);
+ softpipe_unmap_transfers(softpipe);
}
/* Enable to dump BMPs of the color/depth buffers each frame */
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 701ee4c72f..3c7ba565d6 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -29,7 +29,7 @@
#include "sp_context.h"
#include "sp_state.h"
#include "sp_fs.h"
-#include "sp_headers.h"
+#include "sp_quad.h"
#include "pipe/p_state.h"
@@ -39,11 +39,19 @@
#include "tgsi/tgsi_exec.h"
#include "tgsi/tgsi_parse.h"
-struct sp_exec_fragment_shader {
+struct sp_exec_fragment_shader
+{
struct sp_fragment_shader base;
};
+/** cast wrapper */
+static INLINE struct sp_exec_fragment_shader *
+sp_exec_fragment_shader(const struct sp_fragment_shader *base)
+{
+ return (struct sp_exec_fragment_shader *) base;
+}
+
/**
* Compute quad X,Y,Z,W for the four fragments in a quad.
@@ -84,12 +92,18 @@ sp_setup_pos_vector(const struct tgsi_interp_coef *coef,
static void
exec_prepare( const struct sp_fragment_shader *base,
struct tgsi_exec_machine *machine,
- struct tgsi_sampler *samplers )
+ struct tgsi_sampler **samplers )
{
- tgsi_exec_machine_bind_shader( machine,
- base->shader.tokens,
- PIPE_MAX_SAMPLERS,
- samplers );
+ /*
+ * Bind tokens/shader to the interpreter's machine state.
+ * Avoid redundant binding.
+ */
+ if (machine->Tokens != base->shader.tokens) {
+ tgsi_exec_machine_bind_shader( machine,
+ base->shader.tokens,
+ PIPE_MAX_SAMPLERS,
+ samplers );
+ }
}
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index 50eb2c07bc..7e22081132 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -29,7 +29,7 @@
#include "sp_context.h"
#include "sp_state.h"
#include "sp_fs.h"
-#include "sp_headers.h"
+#include "sp_quad.h"
#include "pipe/p_state.h"
@@ -40,7 +40,7 @@
#include "tgsi/tgsi_sse2.h"
-#ifdef PIPE_ARCH_X86
+#if defined(PIPE_ARCH_X86)
#include "rtasm/rtasm_x86sse.h"
@@ -69,7 +69,7 @@ struct sp_sse_fragment_shader {
static void
fs_sse_prepare( const struct sp_fragment_shader *base,
struct tgsi_exec_machine *machine,
- struct tgsi_sampler *samplers )
+ struct tgsi_sampler **samplers )
{
}
diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h
deleted file mode 100644
index 4a42cb3c19..0000000000
--- a/src/gallium/drivers/softpipe/sp_headers.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/* Authors: Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#ifndef SP_HEADERS_H
-#define SP_HEADERS_H
-
-#include "pipe/p_state.h"
-#include "tgsi/tgsi_exec.h"
-
-#define PRIM_POINT 1
-#define PRIM_LINE 2
-#define PRIM_TRI 3
-
-
-/* The rasterizer generates 2x2 quads of fragment and feeds them to
- * the current fp_machine (see below).
- * Remember that Y=0=top with Y increasing down the window.
- */
-#define QUAD_TOP_LEFT 0
-#define QUAD_TOP_RIGHT 1
-#define QUAD_BOTTOM_LEFT 2
-#define QUAD_BOTTOM_RIGHT 3
-
-#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT)
-#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT)
-#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT)
-#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
-#define MASK_ALL 0xf
-
-
-/**
- * Encodes everything we need to know about a 2x2 pixel block. Uses
- * "Channel-Serial" or "SoA" layout.
- */
-struct quad_header_input
-{
- int x0;
- int y0;
- float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */
- unsigned facing:1; /**< Front (0) or back (1) facing? */
- unsigned prim:2; /**< PRIM_POINT, LINE, TRI */
-};
-
-struct quad_header_inout
-{
- unsigned mask:4;
-};
-
-struct quad_header_output
-{
- /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */
- float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE];
- float depth[QUAD_SIZE];
-};
-
-struct quad_header {
- struct quad_header_input input;
- struct quad_header_inout inout;
- struct quad_header_output output;
-
- const struct tgsi_interp_coef *coef;
- const struct tgsi_interp_coef *posCoef;
-
- unsigned nr_attrs;
-};
-
-#endif /* SP_HEADERS_H */
-
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
index 425e13cd28..d56eed80a4 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -26,10 +26,10 @@
**************************************************************************/
/**
- * Post-transform vertex buffering. This is an optional part of the
- * softpipe rendering pipeline.
- * Probably not desired in general, but useful for testing/debuggin.
- * Enabled/Disabled with SP_VBUF env var.
+ * Interface between 'draw' module's output and the softpipe rasterizer/setup
+ * code. When the 'draw' module has finished filling a vertex buffer, the
+ * draw_arrays() functions below will be called. Loop over the vertices and
+ * call the point/line/tri setup functions.
*
* Authors
* Brian Paul
@@ -60,6 +60,7 @@ struct softpipe_vbuf_render
struct softpipe_context *softpipe;
uint prim;
uint vertex_size;
+ uint vertex_buffer_size;
void *vertex_buffer;
};
@@ -80,26 +81,44 @@ sp_vbuf_get_vertex_info(struct vbuf_render *vbr)
}
-static void *
+static boolean
sp_vbuf_allocate_vertices(struct vbuf_render *vbr,
- ushort vertex_size, ushort nr_vertices)
+ ushort vertex_size, ushort nr_vertices)
{
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
- assert(!cvbr->vertex_buffer);
- cvbr->vertex_buffer = align_malloc(vertex_size * nr_vertices, 16);
+ unsigned size = vertex_size * nr_vertices;
+
+ if (cvbr->vertex_buffer_size < size) {
+ align_free(cvbr->vertex_buffer);
+ cvbr->vertex_buffer = align_malloc(size, 16);
+ cvbr->vertex_buffer_size = size;
+ }
+
cvbr->vertex_size = vertex_size;
- return cvbr->vertex_buffer;
+ return cvbr->vertex_buffer != NULL;
}
-
static void
-sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices,
- unsigned vertex_size, unsigned vertices_used)
+sp_vbuf_release_vertices(struct vbuf_render *vbr)
+{
+ /* keep the old allocation for next time */
+}
+
+static void *
+sp_vbuf_map_vertices(struct vbuf_render *vbr)
+{
+ struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
+ return cvbr->vertex_buffer;
+}
+
+static void
+sp_vbuf_unmap_vertices(struct vbuf_render *vbr,
+ ushort min_index,
+ ushort max_index )
{
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
- align_free(vertices);
- assert(vertices == cvbr->vertex_buffer);
- cvbr->vertex_buffer = NULL;
+ assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size );
+ /* do nothing */
}
@@ -115,8 +134,6 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
setup_prepare( setup_ctx );
-
-
cvbr->prim = prim;
return TRUE;
@@ -131,21 +148,23 @@ static INLINE cptrf4 get_vert( const void *vertex_buffer,
}
+/**
+ * draw elements / indexed primitives
+ */
static void
sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
{
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
struct softpipe_context *softpipe = cvbr->softpipe;
- unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float);
- unsigned i;
+ const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float);
const void *vertex_buffer = cvbr->vertex_buffer;
+ unsigned i;
/* XXX: break this dependency - make setup_context live under
* softpipe, rename the old "setup" draw stage to something else.
*/
struct draw_stage *setup = softpipe->setup;
- struct setup_context *setup_ctx = sp_draw_setup_context(softpipe->setup);
-
+ struct setup_context *setup_ctx = sp_draw_setup_context(setup);
switch (cvbr->prim) {
case PIPE_PRIM_POINTS:
@@ -258,13 +277,16 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
{
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
struct softpipe_context *softpipe = cvbr->softpipe;
- struct draw_stage *setup = softpipe->setup;
- const void *vertex_buffer = NULL;
const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float);
+ const void *vertex_buffer =
+ (void *) get_vert(cvbr->vertex_buffer, start, stride);
unsigned i;
- struct setup_context *setup_ctx = sp_draw_setup_context(setup);
- vertex_buffer = (void *)get_vert(cvbr->vertex_buffer, start, stride);
+ /* XXX: break this dependency - make setup_context live under
+ * softpipe, rename the old "setup" draw stage to something else.
+ */
+ struct draw_stage *setup = softpipe->setup;
+ struct setup_context *setup_ctx = sp_draw_setup_context(setup);
switch (cvbr->prim) {
case PIPE_PRIM_POINTS:
@@ -389,6 +411,8 @@ sp_init_vbuf(struct softpipe_context *sp)
sp->vbuf_render->base.get_vertex_info = sp_vbuf_get_vertex_info;
sp->vbuf_render->base.allocate_vertices = sp_vbuf_allocate_vertices;
+ sp->vbuf_render->base.map_vertices = sp_vbuf_map_vertices;
+ sp->vbuf_render->base.unmap_vertices = sp_vbuf_unmap_vertices;
sp->vbuf_render->base.set_primitive = sp_vbuf_set_primitive;
sp->vbuf_render->base.draw = sp_vbuf_draw;
sp->vbuf_render->base.draw_arrays = sp_vbuf_draw_arrays;
diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h
index 08513cb95f..bd6c6cb912 100644
--- a/src/gallium/drivers/softpipe/sp_quad.h
+++ b/src/gallium/drivers/softpipe/sp_quad.h
@@ -31,39 +31,76 @@
#ifndef SP_QUAD_H
#define SP_QUAD_H
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_exec.h"
-struct softpipe_context;
-struct quad_header;
+#define QUAD_PRIM_POINT 1
+#define QUAD_PRIM_LINE 2
+#define QUAD_PRIM_TRI 3
-struct quad_stage {
- struct softpipe_context *softpipe;
- struct quad_stage *next;
+/* The rasterizer generates 2x2 quads of fragment and feeds them to
+ * the current fp_machine (see below).
+ * Remember that Y=0=top with Y increasing down the window.
+ */
+#define QUAD_TOP_LEFT 0
+#define QUAD_TOP_RIGHT 1
+#define QUAD_BOTTOM_LEFT 2
+#define QUAD_BOTTOM_RIGHT 3
+
+#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT)
+#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT)
+#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT)
+#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
+#define MASK_ALL 0xf
+
+
+/**
+ * Quad stage inputs (pos, coverage, front/back face, etc)
+ */
+struct quad_header_input
+{
+ int x0, y0; /**< quad window pos, always even */
+ float coverage[QUAD_SIZE]; /**< fragment coverage for antialiasing */
+ unsigned facing:1; /**< Front (0) or back (1) facing? */
+ unsigned prim:2; /**< QUAD_PRIM_POINT, LINE, TRI */
+};
- void (*begin)(struct quad_stage *qs);
- /** the stage action */
- void (*run)(struct quad_stage *qs, struct quad_header *quad);
+/**
+ * Quad stage inputs/outputs.
+ */
+struct quad_header_inout
+{
+ unsigned mask:4;
+};
+
- void (*destroy)(struct quad_stage *qs);
+/**
+ * Quad stage outputs (color & depth).
+ */
+struct quad_header_output
+{
+ /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */
+ float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE];
+ float depth[QUAD_SIZE];
};
-struct quad_stage *sp_quad_polygon_stipple_stage( struct softpipe_context *softpipe );
-struct quad_stage *sp_quad_earlyz_stage( struct softpipe_context *softpipe );
-struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe );
-struct quad_stage *sp_quad_alpha_test_stage( struct softpipe_context *softpipe );
-struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe );
-struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe );
-struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe );
-struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe );
-struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe );
-struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe );
-struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe );
+/**
+ * Encodes everything we need to know about a 2x2 pixel block. Uses
+ * "Channel-Serial" or "SoA" layout.
+ */
+struct quad_header {
+ struct quad_header_input input;
+ struct quad_header_inout inout;
+ struct quad_header_output output;
-void sp_build_quad_pipeline(struct softpipe_context *sp);
+ const struct tgsi_interp_coef *coef;
+ const struct tgsi_interp_coef *posCoef;
-void sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad);
+ unsigned nr_attrs;
+};
#endif /* SP_QUAD_H */
diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c
index 5bebd141e9..0845bae0e6 100644
--- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c
+++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c
@@ -4,8 +4,8 @@
*/
#include "sp_context.h"
-#include "sp_headers.h"
#include "sp_quad.h"
+#include "sp_quad_pipe.h"
#include "pipe/p_defines.h"
#include "util/u_memory.h"
@@ -14,7 +14,7 @@ static void
alpha_test_quad(struct quad_stage *qs, struct quad_header *quad)
{
struct softpipe_context *softpipe = qs->softpipe;
- const float ref = softpipe->depth_stencil->alpha.ref;
+ const float ref = softpipe->depth_stencil->alpha.ref_value;
unsigned passMask = 0x0, j;
const uint cbuf = 0; /* only output[0].alpha is tested */
const float *aaaa = quad->output.color[cbuf][3];
diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index 6f64c6e584..e134e44337 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -34,10 +34,10 @@
#include "util/u_math.h"
#include "util/u_memory.h"
#include "sp_context.h"
-#include "sp_headers.h"
+#include "sp_quad.h"
#include "sp_surface.h"
#include "sp_tile_cache.h"
-#include "sp_quad.h"
+#include "sp_quad_pipe.h"
#define VEC4_COPY(DST, SRC) \
@@ -105,7 +105,7 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad)
uint cbuf;
/* loop over colorbuffer outputs */
- for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+ for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
float dest[4][QUAD_SIZE];
ubyte src[4][4], dst[4][4], res[4][4];
uint *src4 = (uint *) src;
@@ -239,7 +239,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
}
/* loop over colorbuffer outputs */
- for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+ for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
float source[4][QUAD_SIZE], dest[4][QUAD_SIZE];
struct softpipe_cached_tile *tile
= sp_get_cached_tile(softpipe,
diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c
index 92e9af09c1..953d8516b9 100644
--- a/src/gallium/drivers/softpipe/sp_quad_bufloop.c
+++ b/src/gallium/drivers/softpipe/sp_quad_bufloop.c
@@ -1,9 +1,9 @@
#include "util/u_memory.h"
#include "sp_context.h"
-#include "sp_headers.h"
-#include "sp_surface.h"
#include "sp_quad.h"
+#include "sp_surface.h"
+#include "sp_quad_pipe.h"
/**
@@ -17,7 +17,7 @@ cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad)
unsigned i;
assert(sizeof(quad->outputs.color) == sizeof(tmp));
- assert(softpipe->framebuffer.num_cbufs <= PIPE_MAX_COLOR_BUFS);
+ assert(softpipe->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS);
/* make copy of original colors since they can get modified
* by blending and masking.
@@ -28,7 +28,7 @@ cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad)
*/
memcpy(tmp, quad->outputs.color, sizeof(tmp));
- for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) {
+ for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
/* set current cbuffer */
#if 0 /* obsolete & going away */
softpipe->current_cbuf = i;
diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c
index f32bdfab78..dc90e5d5e9 100644
--- a/src/gallium/drivers/softpipe/sp_quad_colormask.c
+++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c
@@ -34,9 +34,9 @@
#include "util/u_math.h"
#include "util/u_memory.h"
#include "sp_context.h"
-#include "sp_headers.h"
-#include "sp_surface.h"
#include "sp_quad.h"
+#include "sp_surface.h"
+#include "sp_quad_pipe.h"
#include "sp_tile_cache.h"
@@ -51,7 +51,7 @@ colormask_quad(struct quad_stage *qs, struct quad_header *quad)
uint cbuf;
/* loop over colorbuffer outputs */
- for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+ for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
float dest[4][QUAD_SIZE];
struct softpipe_cached_tile *tile
= sp_get_cached_tile(softpipe,
diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c
index ee29aa7dfe..4aeee85870 100644
--- a/src/gallium/drivers/softpipe/sp_quad_coverage.c
+++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c
@@ -35,8 +35,8 @@
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "sp_context.h"
-#include "sp_headers.h"
#include "sp_quad.h"
+#include "sp_quad_pipe.h"
/**
@@ -46,14 +46,15 @@ static void
coverage_quad(struct quad_stage *qs, struct quad_header *quad)
{
struct softpipe_context *softpipe = qs->softpipe;
+ const uint prim = quad->input.prim;
- if ((softpipe->rasterizer->poly_smooth && quad->input.prim == PRIM_TRI) ||
- (softpipe->rasterizer->line_smooth && quad->input.prim == PRIM_LINE) ||
- (softpipe->rasterizer->point_smooth && quad->input.prim == PRIM_POINT)) {
+ if ((softpipe->rasterizer->poly_smooth && prim == QUAD_PRIM_TRI) ||
+ (softpipe->rasterizer->line_smooth && prim == QUAD_PRIM_LINE) ||
+ (softpipe->rasterizer->point_smooth && prim == QUAD_PRIM_POINT)) {
uint cbuf;
/* loop over colorbuffer outputs */
- for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+ for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
float (*quadColor)[4] = quad->output.color[cbuf];
unsigned j;
for (j = 0; j < QUAD_SIZE; j++) {
diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
index 523bd3e080..d463930bae 100644
--- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c
+++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
@@ -32,9 +32,9 @@
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "sp_context.h"
-#include "sp_headers.h"
-#include "sp_surface.h"
#include "sp_quad.h"
+#include "sp_surface.h"
+#include "sp_quad_pipe.h"
#include "sp_tile_cache.h"
diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c
index 6e2dde304e..496fd39ed1 100644
--- a/src/gallium/drivers/softpipe/sp_quad_earlyz.c
+++ b/src/gallium/drivers/softpipe/sp_quad_earlyz.c
@@ -31,8 +31,8 @@
#include "pipe/p_defines.h"
#include "util/u_memory.h"
-#include "sp_headers.h"
#include "sp_quad.h"
+#include "sp_quad_pipe.h"
/**
diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c
index 1f0cb3e035..adca5df73d 100644
--- a/src/gallium/drivers/softpipe/sp_quad_fs.c
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@@ -2,6 +2,7 @@
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
+ * Copyright 2008 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -42,16 +43,15 @@
#include "sp_context.h"
#include "sp_state.h"
-#include "sp_headers.h"
#include "sp_quad.h"
+#include "sp_quad_pipe.h"
#include "sp_texture.h"
#include "sp_tex_sample.h"
struct quad_shade_stage
{
- struct quad_stage stage;
- struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS];
+ struct quad_stage stage; /**< base class */
struct tgsi_exec_machine machine;
struct tgsi_exec_vector *inputs, *outputs;
};
@@ -147,18 +147,11 @@ static void shade_begin(struct quad_stage *qs)
{
struct quad_shade_stage *qss = quad_shade_stage(qs);
struct softpipe_context *softpipe = qs->softpipe;
- unsigned i;
- unsigned num = MAX2(softpipe->num_textures, softpipe->num_samplers);
-
- /* set TGSI sampler state that varies */
- for (i = 0; i < num; i++) {
- qss->samplers[i].state = softpipe->sampler[i];
- qss->samplers[i].texture = softpipe->texture[i];
- }
softpipe->fs->prepare( softpipe->fs,
&qss->machine,
- qss->samplers );
+ (struct tgsi_sampler **)
+ softpipe->tgsi.frag_samplers_list );
qs->next->begin(qs->next);
}
@@ -178,7 +171,6 @@ static void shade_destroy(struct quad_stage *qs)
struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe )
{
struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage);
- uint i;
/* allocate storage for program inputs/outputs, aligned to 16 bytes */
qss->inputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->inputs) + 16);
@@ -191,14 +183,6 @@ struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe )
qss->stage.run = shade_quad;
qss->stage.destroy = shade_destroy;
- /* set TGSI sampler state that's constant */
- for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
- assert(softpipe->tex_cache[i]);
- qss->samplers[i].get_samples = sp_get_samples;
- qss->samplers[i].pipe = &softpipe->pipe;
- qss->samplers[i].cache = softpipe->tex_cache[i];
- }
-
tgsi_exec_machine_init( &qss->machine );
return &qss->stage;
diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c
index 169bd82876..dfa7ff3b1d 100644
--- a/src/gallium/drivers/softpipe/sp_quad_occlusion.c
+++ b/src/gallium/drivers/softpipe/sp_quad_occlusion.c
@@ -35,9 +35,9 @@
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "sp_context.h"
-#include "sp_headers.h"
-#include "sp_surface.h"
#include "sp_quad.h"
+#include "sp_surface.h"
+#include "sp_quad_pipe.h"
static unsigned count_bits( unsigned val )
{
diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c
index b7aac7f84a..92d5f9f3c1 100644
--- a/src/gallium/drivers/softpipe/sp_quad_output.c
+++ b/src/gallium/drivers/softpipe/sp_quad_output.c
@@ -27,9 +27,9 @@
#include "util/u_memory.h"
#include "sp_context.h"
-#include "sp_headers.h"
-#include "sp_surface.h"
#include "sp_quad.h"
+#include "sp_surface.h"
+#include "sp_quad_pipe.h"
#include "sp_tile_cache.h"
@@ -48,7 +48,7 @@ output_quad(struct quad_stage *qs, struct quad_header *quad)
uint cbuf;
/* loop over colorbuffer outputs */
- for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+ for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
struct softpipe_cached_tile *tile
= sp_get_cached_tile(softpipe,
softpipe->cbuf_cache[cbuf],
diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c
index 892ef87ee9..892ef87ee9 100644
--- a/src/gallium/drivers/softpipe/sp_quad.c
+++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c
diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.h b/src/gallium/drivers/softpipe/sp_quad_pipe.h
new file mode 100644
index 0000000000..0e40586ffc
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_quad_pipe.h
@@ -0,0 +1,74 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef SP_QUAD_PIPE_H
+#define SP_QUAD_PIPE_H
+
+
+struct softpipe_context;
+struct quad_header;
+
+
+/**
+ * Fragment processing is performed on 2x2 blocks of pixels called "quads".
+ * Quad processing is performed with a pipeline of stages represented by
+ * this type.
+ */
+struct quad_stage {
+ struct softpipe_context *softpipe;
+
+ struct quad_stage *next;
+
+ void (*begin)(struct quad_stage *qs);
+
+ /** the stage action */
+ void (*run)(struct quad_stage *qs, struct quad_header *quad);
+
+ void (*destroy)(struct quad_stage *qs);
+};
+
+
+struct quad_stage *sp_quad_polygon_stipple_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_earlyz_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_alpha_test_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe );
+
+void sp_build_quad_pipeline(struct softpipe_context *sp);
+
+void sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad);
+
+#endif /* SP_QUAD_PIPE_H */
diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c
index abb5487748..5e9d447737 100644
--- a/src/gallium/drivers/softpipe/sp_quad_stencil.c
+++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c
@@ -5,10 +5,10 @@
#include "sp_context.h"
-#include "sp_headers.h"
+#include "sp_quad.h"
#include "sp_surface.h"
#include "sp_tile_cache.h"
-#include "sp_quad.h"
+#include "sp_quad_pipe.h"
#include "pipe/p_defines.h"
#include "util/u_memory.h"
@@ -222,8 +222,8 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad)
zFailOp = softpipe->depth_stencil->stencil[face].zfail_op;
zPassOp = softpipe->depth_stencil->stencil[face].zpass_op;
ref = softpipe->depth_stencil->stencil[face].ref_value;
- wrtMask = softpipe->depth_stencil->stencil[face].write_mask;
- valMask = softpipe->depth_stencil->stencil[face].value_mask;
+ wrtMask = softpipe->depth_stencil->stencil[face].writemask;
+ valMask = softpipe->depth_stencil->stencil[face].valuemask;
assert(ps); /* shouldn't get here if there's no stencil buffer */
diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c
index ccf37f6be5..05e862f097 100644
--- a/src/gallium/drivers/softpipe/sp_quad_stipple.c
+++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c
@@ -4,8 +4,8 @@
*/
#include "sp_context.h"
-#include "sp_headers.h"
#include "sp_quad.h"
+#include "sp_quad_pipe.h"
#include "pipe/p_defines.h"
#include "util/u_memory.h"
@@ -19,11 +19,13 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad)
static const uint bit31 = 1 << 31;
static const uint bit30 = 1 << 30;
- if (quad->input.prim == PRIM_TRI) {
+ if (quad->input.prim == QUAD_PRIM_TRI) {
struct softpipe_context *softpipe = qs->softpipe;
/* need to invert Y to index into OpenGL's stipple pattern */
int y0, y1;
uint stipple0, stipple1;
+ const int col0 = quad->input.x0 % 32;
+
if (softpipe->rasterizer->origin_lower_left) {
y0 = softpipe->framebuffer.height - 1 - quad->input.y0;
y1 = y0 - 1;
@@ -32,12 +34,11 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad)
y0 = quad->input.y0;
y1 = y0 + 1;
}
+
stipple0 = softpipe->poly_stipple.stipple[y0 % 32];
stipple1 = softpipe->poly_stipple.stipple[y1 % 32];
-#if 1
- {
- const int col0 = quad->input.x0 % 32;
+ /* turn off quad mask bits that fail the stipple test */
if ((stipple0 & (bit31 >> col0)) == 0)
quad->inout.mask &= ~MASK_TOP_LEFT;
@@ -49,19 +50,11 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad)
if ((stipple1 & (bit30 >> col0)) == 0)
quad->inout.mask &= ~MASK_BOTTOM_RIGHT;
- }
-#else
- /* We'd like to use this code, but we'd need to redefine
- * MASK_TOP_LEFT to be (1 << 1) and MASK_TOP_RIGHT to be (1 << 0),
- * and similarly for the BOTTOM bits. But that may have undesirable
- * side effects elsewhere.
- */
- const int col0 = 30 - (quad->input.x0 % 32);
- quad->inout.mask &= (((stipple0 >> col0) & 0x3) |
- (((stipple1 >> col0) & 0x3) << 2));
-#endif
- if (!quad->inout.mask)
+
+ if (!quad->inout.mask) {
+ /* all fragments failed stipple test, end of quad pipeline */
return;
+ }
}
qs->next->run(qs->next, quad);
diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c
index 2106ee1d23..b0d8e01426 100644
--- a/src/gallium/drivers/softpipe/sp_query.c
+++ b/src/gallium/drivers/softpipe/sp_query.c
@@ -37,8 +37,8 @@
#include "sp_query.h"
struct softpipe_query {
- uint64 start;
- uint64 end;
+ uint64_t start;
+ uint64_t end;
};
@@ -87,7 +87,7 @@ static boolean
softpipe_get_query_result(struct pipe_context *pipe,
struct pipe_query *q,
boolean wait,
- uint64 *result )
+ uint64_t *result )
{
struct softpipe_query *sq = softpipe_query(q);
*result = sq->end - sq->start;
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 9644dbd168..7380a6ae2b 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -27,7 +27,8 @@
#include "util/u_memory.h"
-#include "pipe/p_winsys.h"
+#include "util/u_simple_screen.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
@@ -55,7 +56,9 @@ softpipe_get_param(struct pipe_screen *screen, int param)
{
switch (param) {
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
- return 8;
+ return PIPE_MAX_SAMPLERS;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return PIPE_MAX_SAMPLERS;
case PIPE_CAP_NPOT_TEXTURES:
return 1;
case PIPE_CAP_TWO_SIDED_STENCIL:
@@ -172,6 +175,7 @@ softpipe_create_screen(struct pipe_winsys *winsys)
screen->base.is_format_supported = softpipe_is_format_supported;
softpipe_init_screen_texture_funcs(&screen->base);
+ u_simple_screen_init(&screen->base);
return &screen->base;
}
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index 13d8017393..0925653b5d 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -32,13 +32,12 @@
* \author Brian Paul
*/
-#include "sp_setup.h"
-
#include "sp_context.h"
-#include "sp_headers.h"
+#include "sp_prim_setup.h"
#include "sp_quad.h"
+#include "sp_quad_pipe.h"
+#include "sp_setup.h"
#include "sp_state.h"
-#include "sp_prim_setup.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
#include "draw/draw_vertex.h"
@@ -265,17 +264,20 @@ is_inf_or_nan(float x)
}
-static boolean cull_tri( struct setup_context *setup,
- float det )
+/**
+ * Do triangle cull test using tri determinant (sign indicates orientation)
+ * \return true if triangle is to be culled.
+ */
+static INLINE boolean
+cull_tri(const struct setup_context *setup, float det)
{
- if (det != 0)
- {
+ if (det != 0) {
/* if (det < 0 then Z points toward camera and triangle is
* counter-clockwise winding.
*/
unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW;
-
- if ((winding & setup->winding) == 0)
+
+ if ((winding & setup->winding) == 0)
return FALSE;
}
@@ -968,7 +970,7 @@ void setup_tri( struct setup_context *setup,
setup_tri_coefficients( setup );
setup_tri_edges( setup );
- setup->quad.input.prim = PRIM_TRI;
+ setup->quad.input.prim = QUAD_PRIM_TRI;
setup->span.y = 0;
setup->span.y_flags = 0;
@@ -1009,7 +1011,7 @@ void setup_tri( struct setup_context *setup,
* for a line.
*/
static void
-line_linear_coeff(struct setup_context *setup,
+line_linear_coeff(const struct setup_context *setup,
struct tgsi_interp_coef *coef,
uint vertSlot, uint i)
{
@@ -1029,9 +1031,9 @@ line_linear_coeff(struct setup_context *setup,
* for a line.
*/
static void
-line_persp_coeff(struct setup_context *setup,
- struct tgsi_interp_coef *coef,
- uint vertSlot, uint i)
+line_persp_coeff(const struct setup_context *setup,
+ struct tgsi_interp_coef *coef,
+ uint vertSlot, uint i)
{
/* XXX double-check/verify this arithmetic */
const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3];
@@ -1206,7 +1208,7 @@ setup_line(struct setup_context *setup,
setup->quad.input.x0 = setup->quad.input.y0 = -1;
setup->quad.inout.mask = 0x0;
- setup->quad.input.prim = PRIM_LINE;
+ setup->quad.input.prim = QUAD_PRIM_LINE;
/* XXX temporary: set coverage to 1.0 so the line appears
* if AA mode happens to be enabled.
*/
@@ -1266,7 +1268,7 @@ setup_line(struct setup_context *setup,
static void
-point_persp_coeff(struct setup_context *setup,
+point_persp_coeff(const struct setup_context *setup,
const float (*vert)[4],
struct tgsi_interp_coef *coef,
uint vertSlot, uint i)
@@ -1361,7 +1363,7 @@ setup_point( struct setup_context *setup,
}
}
- setup->quad.input.prim = PRIM_POINT;
+ setup->quad.input.prim = QUAD_PRIM_POINT;
if (halfSize <= 0.5 && !round) {
/* special case for 1-pixel points */
@@ -1497,7 +1499,7 @@ void setup_prepare( struct setup_context *setup )
}
/* Mark surfaces as defined now */
- for (i = 0; i < sp->framebuffer.num_cbufs; i++){
+ for (i = 0; i < sp->framebuffer.nr_cbufs; i++){
if (sp->framebuffer.cbufs[i]) {
sp->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED;
}
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index 476ef3dc8f..6f558e6da5 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -69,7 +69,7 @@ struct sp_fragment_shader {
void (*prepare)( const struct sp_fragment_shader *shader,
struct tgsi_exec_machine *machine,
- struct tgsi_sampler *samplers);
+ struct tgsi_sampler **samplers);
/* Run the shader - this interface will get cleaned up in the
* future:
@@ -184,10 +184,10 @@ softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags);
void
-softpipe_map_surfaces(struct softpipe_context *sp);
+softpipe_map_transfers(struct softpipe_context *sp);
void
-softpipe_unmap_surfaces(struct softpipe_context *sp);
+softpipe_unmap_transfers(struct softpipe_context *sp);
void
softpipe_map_texture_surfaces(struct softpipe_context *sp);
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index e5b609cf6c..4d01a9dbe1 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -32,7 +32,7 @@
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_shader_tokens.h"
#include "draw/draw_context.h"
#include "tgsi/tgsi_dump.h"
@@ -146,16 +146,15 @@ softpipe_set_constant_buffer(struct pipe_context *pipe,
const struct pipe_constant_buffer *buf)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
- struct pipe_winsys *ws = pipe->winsys;
+ struct pipe_screen *screen = pipe->screen;
assert(shader < PIPE_SHADER_TYPES);
assert(index == 0);
/* note: reference counting */
- winsys_buffer_reference(ws,
+ pipe_buffer_reference(screen,
&softpipe->constants[shader].buffer,
buf ? buf->buffer : NULL);
- softpipe->constants[shader].size = buf ? buf->size : 0;
softpipe->dirty |= SP_NEW_CONSTANTS;
}
diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c
index b5376e522d..1493c65884 100644
--- a/src/gallium/drivers/softpipe/sp_state_surface.c
+++ b/src/gallium/drivers/softpipe/sp_state_surface.c
@@ -64,7 +64,7 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe,
}
}
- sp->framebuffer.num_cbufs = fb->num_cbufs;
+ sp->framebuffer.nr_cbufs = fb->nr_cbufs;
/* zbuf changing? */
if (sp->framebuffer.zsbuf != fb->zsbuf) {
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 49250ec084..adbd0cb7f0 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2,6 +2,7 @@
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
+ * Copyright 2008 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -33,17 +34,18 @@
*/
#include "sp_context.h"
-#include "sp_headers.h"
+#include "sp_quad.h"
#include "sp_surface.h"
+#include "sp_texture.h"
#include "sp_tex_sample.h"
#include "sp_tile_cache.h"
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
-#include "tgsi/tgsi_exec.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+
/*
* Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes
* see 1-pixel bands of improperly weighted linear-filtered textures.
@@ -57,7 +59,11 @@
/**
* Linear interpolation macro
*/
-#define LERP(T, A, B) ( (A) + (T) * ((B) - (A)) )
+static INLINE float
+lerp(float a, float v0, float v1)
+{
+ return v0 + a * (v1 - v0);
+}
/**
@@ -72,13 +78,28 @@ static INLINE float
lerp_2d(float a, float b,
float v00, float v10, float v01, float v11)
{
- const float temp0 = LERP(a, v00, v10);
- const float temp1 = LERP(a, v01, v11);
- return LERP(b, temp0, temp1);
+ const float temp0 = lerp(a, v00, v10);
+ const float temp1 = lerp(a, v01, v11);
+ return lerp(b, temp0, temp1);
}
/**
+ * As above, but 3D interpolation of 8 values.
+ */
+static INLINE float
+lerp_3d(float a, float b, float c,
+ float v000, float v100, float v010, float v110,
+ float v001, float v101, float v011, float v111)
+{
+ const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
+ const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
+ return lerp(c, temp0, temp1);
+}
+
+
+
+/**
* If A is a signed integer, A % B doesn't give the right value for A < 0
* (in terms of texture repeat). Just casting to unsigned fixes that.
*/
@@ -86,250 +107,275 @@ lerp_2d(float a, float b,
/**
- * Apply texture coord wrapping mode and return integer texture index.
+ * Apply texture coord wrapping mode and return integer texture indexes
+ * for a vector of four texcoords (S or T or P).
* \param wrapMode PIPE_TEX_WRAP_x
- * \param s the texcoord
+ * \param s the incoming texcoords
* \param size the texture image size
+ * \param icoord returns the integer texcoords
* \return integer texture index
*/
-static INLINE int
-nearest_texcoord(unsigned wrapMode, float s, unsigned size)
+static INLINE void
+nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
+ int icoord[4])
{
- int i;
+ uint ch;
switch (wrapMode) {
case PIPE_TEX_WRAP_REPEAT:
/* s limited to [0,1) */
/* i limited to [0,size-1] */
- i = util_ifloor(s * size);
- i = REMAINDER(i, size);
- return i;
+ for (ch = 0; ch < 4; ch++) {
+ int i = util_ifloor(s[ch] * size);
+ icoord[ch] = REMAINDER(i, size);
+ }
+ return;
case PIPE_TEX_WRAP_CLAMP:
/* s limited to [0,1] */
/* i limited to [0,size-1] */
- if (s <= 0.0F)
- i = 0;
- else if (s >= 1.0F)
- i = size - 1;
- else
- i = util_ifloor(s * size);
- return i;
+ for (ch = 0; ch < 4; ch++) {
+ if (s[ch] <= 0.0F)
+ icoord[ch] = 0;
+ else if (s[ch] >= 1.0F)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(s[ch] * size);
+ }
+ return;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
{
/* s limited to [min,max] */
/* i limited to [0, size-1] */
const float min = 1.0F / (2.0F * size);
const float max = 1.0F - min;
- if (s < min)
- i = 0;
- else if (s > max)
- i = size - 1;
- else
- i = util_ifloor(s * size);
+ for (ch = 0; ch < 4; ch++) {
+ if (s[ch] < min)
+ icoord[ch] = 0;
+ else if (s[ch] > max)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(s[ch] * size);
+ }
}
- return i;
+ return;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
{
/* s limited to [min,max] */
/* i limited to [-1, size] */
const float min = -1.0F / (2.0F * size);
const float max = 1.0F - min;
- if (s <= min)
- i = -1;
- else if (s >= max)
- i = size;
- else
- i = util_ifloor(s * size);
+ for (ch = 0; ch < 4; ch++) {
+ if (s[ch] <= min)
+ icoord[ch] = -1;
+ else if (s[ch] >= max)
+ icoord[ch] = size;
+ else
+ icoord[ch] = util_ifloor(s[ch] * size);
+ }
}
- return i;
+ return;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
{
const float min = 1.0F / (2.0F * size);
const float max = 1.0F - min;
- const int flr = util_ifloor(s);
- float u;
- if (flr & 1)
- u = 1.0F - (s - (float) flr);
- else
- u = s - (float) flr;
- if (u < min)
- i = 0;
- else if (u > max)
- i = size - 1;
- else
- i = util_ifloor(u * size);
+ for (ch = 0; ch < 4; ch++) {
+ const int flr = util_ifloor(s[ch]);
+ float u;
+ if (flr & 1)
+ u = 1.0F - (s[ch] - (float) flr);
+ else
+ u = s[ch] - (float) flr;
+ if (u < min)
+ icoord[ch] = 0;
+ else if (u > max)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(u * size);
+ }
}
- return i;
+ return;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
- {
+ for (ch = 0; ch < 4; ch++) {
/* s limited to [0,1] */
/* i limited to [0,size-1] */
- const float u = fabsf(s);
+ const float u = fabsf(s[ch]);
if (u <= 0.0F)
- i = 0;
+ icoord[ch] = 0;
else if (u >= 1.0F)
- i = size - 1;
+ icoord[ch] = size - 1;
else
- i = util_ifloor(u * size);
+ icoord[ch] = util_ifloor(u * size);
}
- return i;
+ return;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
{
/* s limited to [min,max] */
/* i limited to [0, size-1] */
const float min = 1.0F / (2.0F * size);
const float max = 1.0F - min;
- const float u = fabsf(s);
- if (u < min)
- i = 0;
- else if (u > max)
- i = size - 1;
- else
- i = util_ifloor(u * size);
+ for (ch = 0; ch < 4; ch++) {
+ const float u = fabsf(s[ch]);
+ if (u < min)
+ icoord[ch] = 0;
+ else if (u > max)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(u * size);
+ }
}
- return i;
+ return;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
{
/* s limited to [min,max] */
/* i limited to [0, size-1] */
const float min = -1.0F / (2.0F * size);
const float max = 1.0F - min;
- const float u = fabsf(s);
- if (u < min)
- i = -1;
- else if (u > max)
- i = size;
- else
- i = util_ifloor(u * size);
+ for (ch = 0; ch < 4; ch++) {
+ const float u = fabsf(s[ch]);
+ if (u < min)
+ icoord[ch] = -1;
+ else if (u > max)
+ icoord[ch] = size;
+ else
+ icoord[ch] = util_ifloor(u * size);
+ }
}
- return i;
+ return;
default:
assert(0);
- return 0;
}
}
/**
- * Used to compute texel locations for linear sampling.
+ * Used to compute texel locations for linear sampling for four texcoords.
* \param wrapMode PIPE_TEX_WRAP_x
- * \param s the texcoord
+ * \param s the texcoords
* \param size the texture image size
- * \param i0 returns first texture index
- * \param i1 returns second texture index (usually *i0 + 1)
- * \param a returns blend factor/weight between texture indexes
+ * \param icoord0 returns first texture indexes
+ * \param icoord1 returns second texture indexes (usually icoord0 + 1)
+ * \param w returns blend factor/weight between texture indexes
+ * \param icoord returns the computed integer texture coords
*/
static INLINE void
-linear_texcoord(unsigned wrapMode, float s, unsigned size,
- int *i0, int *i1, float *a)
+linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
+ int icoord0[4], int icoord1[4], float w[4])
{
- float u;
+ uint ch;
+
switch (wrapMode) {
case PIPE_TEX_WRAP_REPEAT:
- u = s * size - 0.5F;
- *i0 = REMAINDER(util_ifloor(u), size);
- *i1 = REMAINDER(*i0 + 1, size);
- break;
+ for (ch = 0; ch < 4; ch++) {
+ float u = s[ch] * size - 0.5F;
+ icoord0[ch] = REMAINDER(util_ifloor(u), size);
+ icoord1[ch] = REMAINDER(icoord0[ch] + 1, size);
+ w[ch] = FRAC(u);
+ }
+ break;;
case PIPE_TEX_WRAP_CLAMP:
- if (s <= 0.0F)
- u = 0.0F;
- else if (s >= 1.0F)
- u = (float) size;
- else
- u = s * size;
- u -= 0.5F;
- *i0 = util_ifloor(u);
- *i1 = *i0 + 1;
- break;
+ for (ch = 0; ch < 4; ch++) {
+ float u = CLAMP(s[ch], 0.0F, 1.0F);
+ u = u * size - 0.5f;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = FRAC(u);
+ }
+ break;;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- if (s <= 0.0F)
- u = 0.0F;
- else if (s >= 1.0F)
- u = (float) size;
- else
- u = s * size;
- u -= 0.5F;
- *i0 = util_ifloor(u);
- *i1 = *i0 + 1;
- if (*i0 < 0)
- *i0 = 0;
- if (*i1 >= (int) size)
- *i1 = size - 1;
- break;
+ for (ch = 0; ch < 4; ch++) {
+ float u = CLAMP(s[ch], 0.0F, 1.0F);
+ u = u * size - 0.5f;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ if (icoord0[ch] < 0)
+ icoord0[ch] = 0;
+ if (icoord1[ch] >= (int) size)
+ icoord1[ch] = size - 1;
+ w[ch] = FRAC(u);
+ }
+ break;;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
{
const float min = -1.0F / (2.0F * size);
const float max = 1.0F - min;
- if (s <= min)
- u = min * size;
- else if (s >= max)
- u = max * size;
- else
- u = s * size;
- u -= 0.5F;
- *i0 = util_ifloor(u);
- *i1 = *i0 + 1;
+ for (ch = 0; ch < 4; ch++) {
+ float u = CLAMP(s[ch], min, max);
+ u = u * size - 0.5f;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = FRAC(u);
+ }
}
- break;
+ break;;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
- {
- const int flr = util_ifloor(s);
+ for (ch = 0; ch < 4; ch++) {
+ const int flr = util_ifloor(s[ch]);
+ float u;
if (flr & 1)
- u = 1.0F - (s - (float) flr);
+ u = 1.0F - (s[ch] - (float) flr);
else
- u = s - (float) flr;
- u = (u * size) - 0.5F;
- *i0 = util_ifloor(u);
- *i1 = *i0 + 1;
- if (*i0 < 0)
- *i0 = 0;
- if (*i1 >= (int) size)
- *i1 = size - 1;
+ u = s[ch] - (float) flr;
+ u = u * size - 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ if (icoord0[ch] < 0)
+ icoord0[ch] = 0;
+ if (icoord1[ch] >= (int) size)
+ icoord1[ch] = size - 1;
+ w[ch] = FRAC(u);
}
- break;
+ break;;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
- u = fabsf(s);
- if (u >= 1.0F)
- u = (float) size;
- else
- u *= size;
- u -= 0.5F;
- *i0 = util_ifloor(u);
- *i1 = *i0 + 1;
- break;
+ for (ch = 0; ch < 4; ch++) {
+ float u = fabsf(s[ch]);
+ if (u >= 1.0F)
+ u = (float) size;
+ else
+ u *= size;
+ u -= 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = FRAC(u);
+ }
+ break;;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- u = fabsf(s);
- if (u >= 1.0F)
- u = (float) size;
- else
- u *= size;
- u -= 0.5F;
- *i0 = util_ifloor(u);
- *i1 = *i0 + 1;
- if (*i0 < 0)
- *i0 = 0;
- if (*i1 >= (int) size)
- *i1 = size - 1;
- break;
+ for (ch = 0; ch < 4; ch++) {
+ float u = fabsf(s[ch]);
+ if (u >= 1.0F)
+ u = (float) size;
+ else
+ u *= size;
+ u -= 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ if (icoord0[ch] < 0)
+ icoord0[ch] = 0;
+ if (icoord1[ch] >= (int) size)
+ icoord1[ch] = size - 1;
+ w[ch] = FRAC(u);
+ }
+ break;;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
{
const float min = -1.0F / (2.0F * size);
const float max = 1.0F - min;
- u = fabsf(s);
- if (u <= min)
- u = min * size;
- else if (u >= max)
- u = max * size;
- else
- u *= size;
- u -= 0.5F;
- *i0 = util_ifloor(u);
- *i1 = *i0 + 1;
+ for (ch = 0; ch < 4; ch++) {
+ float u = fabsf(s[ch]);
+ if (u <= min)
+ u = min * size;
+ else if (u >= max)
+ u = max * size;
+ else
+ u *= size;
+ u -= 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = FRAC(u);
+ }
}
- break;
+ break;;
default:
assert(0);
}
- *a = FRAC(u);
}
@@ -337,21 +383,27 @@ linear_texcoord(unsigned wrapMode, float s, unsigned size,
* For RECT textures / unnormalized texcoords
* Only a subset of wrap modes supported.
*/
-static INLINE int
-nearest_texcoord_unnorm(unsigned wrapMode, float s, unsigned size)
+static INLINE void
+nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
+ int icoord[4])
{
- int i;
+ uint ch;
switch (wrapMode) {
case PIPE_TEX_WRAP_CLAMP:
- i = util_ifloor(s);
- return CLAMP(i, 0, (int) size-1);
+ for (ch = 0; ch < 4; ch++) {
+ int i = util_ifloor(s[ch]);
+ icoord[ch]= CLAMP(i, 0, (int) size-1);
+ }
+ return;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
/* fall-through */
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- return util_ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) );
+ for (ch = 0; ch < 4; ch++) {
+ icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
+ }
+ return;
default:
assert(0);
- return 0;
}
}
@@ -361,30 +413,36 @@ nearest_texcoord_unnorm(unsigned wrapMode, float s, unsigned size)
* Only a subset of wrap modes supported.
*/
static INLINE void
-linear_texcoord_unnorm(unsigned wrapMode, float s, unsigned size,
- int *i0, int *i1, float *a)
+linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
+ int icoord0[4], int icoord1[4], float w[4])
{
+ uint ch;
switch (wrapMode) {
case PIPE_TEX_WRAP_CLAMP:
- /* Not exactly what the spec says, but it matches NVIDIA output */
- s = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f);
- *i0 = util_ifloor(s);
- *i1 = *i0 + 1;
- break;
+ for (ch = 0; ch < 4; ch++) {
+ /* Not exactly what the spec says, but it matches NVIDIA output */
+ float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = FRAC(u);
+ }
+ return;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
/* fall-through */
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- s = CLAMP(s, 0.5F, (float) size - 0.5F);
- s -= 0.5F;
- *i0 = util_ifloor(s);
- *i1 = *i0 + 1;
- if (*i1 > (int) size - 1)
- *i1 = size - 1;
+ for (ch = 0; ch < 4; ch++) {
+ float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
+ u -= 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ if (icoord1[ch] > (int) size - 1)
+ icoord1[ch] = size - 1;
+ w[ch] = FRAC(u);
+ }
break;
default:
assert(0);
}
- *a = FRAC(s);
}
@@ -463,7 +521,8 @@ choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
* This is only done for fragment shaders, not vertex shaders.
*/
static float
-compute_lambda(struct tgsi_sampler *sampler,
+compute_lambda(const struct pipe_texture *tex,
+ const struct pipe_sampler_state *sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
@@ -471,7 +530,7 @@ compute_lambda(struct tgsi_sampler *sampler,
{
float rho, lambda;
- assert(sampler->state->normalized_coords);
+ assert(sampler->normalized_coords);
assert(s);
{
@@ -479,7 +538,7 @@ compute_lambda(struct tgsi_sampler *sampler,
float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT];
dsdx = fabsf(dsdx);
dsdy = fabsf(dsdy);
- rho = MAX2(dsdx, dsdy) * sampler->texture->width[0];
+ rho = MAX2(dsdx, dsdy) * tex->width[0];
}
if (t) {
float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
@@ -487,7 +546,7 @@ compute_lambda(struct tgsi_sampler *sampler,
float max;
dtdx = fabsf(dtdx);
dtdy = fabsf(dtdy);
- max = MAX2(dtdx, dtdy) * sampler->texture->height[0];
+ max = MAX2(dtdx, dtdy) * tex->height[0];
rho = MAX2(rho, max);
}
if (p) {
@@ -496,13 +555,13 @@ compute_lambda(struct tgsi_sampler *sampler,
float max;
dpdx = fabsf(dpdx);
dpdy = fabsf(dpdy);
- max = MAX2(dpdx, dpdy) * sampler->texture->depth[0];
+ max = MAX2(dpdx, dpdy) * tex->depth[0];
rho = MAX2(rho, max);
}
lambda = util_fast_log2(rho);
- lambda += lodbias + sampler->state->lod_bias;
- lambda = CLAMP(lambda, sampler->state->min_lod, sampler->state->max_lod);
+ lambda += lodbias + sampler->lod_bias;
+ lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
return lambda;
}
@@ -514,68 +573,74 @@ compute_lambda(struct tgsi_sampler *sampler,
* 2. Determine if we're minifying or magnifying
* 3. If minifying, choose mipmap levels
* 4. Return image filter to use within mipmap images
+ * \param level0 Returns first mipmap level to sample from
+ * \param level1 Returns second mipmap level to sample from
+ * \param levelBlend Returns blend factor between levels, in [0,1]
+ * \param imgFilter Returns either the min or mag filter, depending on lambda
*/
static void
-choose_mipmap_levels(struct tgsi_sampler *sampler,
+choose_mipmap_levels(const struct pipe_texture *texture,
+ const struct pipe_sampler_state *sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
+ boolean computeLambda,
float lodbias,
unsigned *level0, unsigned *level1, float *levelBlend,
unsigned *imgFilter)
{
- if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+ if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
/* no mipmap selection needed */
- *level0 = *level1 = CLAMP((int) sampler->state->min_lod,
- 0, (int) sampler->texture->last_level);
+ *level0 = *level1 = CLAMP((int) sampler->min_lod,
+ 0, (int) texture->last_level);
- if (sampler->state->min_img_filter != sampler->state->mag_img_filter) {
+ if (sampler->min_img_filter != sampler->mag_img_filter) {
/* non-mipmapped texture, but still need to determine if doing
* minification or magnification.
*/
- float lambda = compute_lambda(sampler, s, t, p, lodbias);
+ float lambda = compute_lambda(texture, sampler, s, t, p, lodbias);
if (lambda <= 0.0) {
- *imgFilter = sampler->state->mag_img_filter;
+ *imgFilter = sampler->mag_img_filter;
}
else {
- *imgFilter = sampler->state->min_img_filter;
+ *imgFilter = sampler->min_img_filter;
}
}
else {
- *imgFilter = sampler->state->mag_img_filter;
+ *imgFilter = sampler->mag_img_filter;
}
}
else {
float lambda;
- if (1)
+ if (computeLambda)
/* fragment shader */
- lambda = compute_lambda(sampler, s, t, p, lodbias);
+ lambda = compute_lambda(texture, sampler, s, t, p, lodbias);
else
/* vertex shader */
lambda = lodbias; /* not really a bias, but absolute LOD */
if (lambda <= 0.0) { /* XXX threshold depends on the filter */
/* magnifying */
- *imgFilter = sampler->state->mag_img_filter;
+ *imgFilter = sampler->mag_img_filter;
*level0 = *level1 = 0;
}
else {
/* minifying */
- *imgFilter = sampler->state->min_img_filter;
+ *imgFilter = sampler->min_img_filter;
/* choose mipmap level(s) and compute the blend factor between them */
- if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
+ if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
/* Nearest mipmap level */
const int lvl = (int) (lambda + 0.5);
*level0 =
- *level1 = CLAMP(lvl, 0, (int) sampler->texture->last_level);
+ *level1 = CLAMP(lvl, 0, (int) texture->last_level);
}
else {
/* Linear interpolation between mipmap levels */
const int lvl = (int) lambda;
- *level0 = CLAMP(lvl, 0, (int) sampler->texture->last_level);
- *level1 = CLAMP(lvl + 1, 0, (int) sampler->texture->last_level);
+ *level0 = CLAMP(lvl, 0, (int) texture->last_level);
+ *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level);
*levelBlend = FRAC(lambda); /* blending weight between levels */
}
}
@@ -598,23 +663,29 @@ choose_mipmap_levels(struct tgsi_sampler *sampler,
* sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1...
*/
static void
-get_texel(struct tgsi_sampler *sampler,
+get_texel(const struct tgsi_sampler *tgsi_sampler,
unsigned face, unsigned level, int x, int y, int z,
float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j)
{
- if (x < 0 || x >= (int) sampler->texture->width[level] ||
- y < 0 || y >= (int) sampler->texture->height[level] ||
- z < 0 || z >= (int) sampler->texture->depth[level]) {
- rgba[0][j] = sampler->state->border_color[0];
- rgba[1][j] = sampler->state->border_color[1];
- rgba[2][j] = sampler->state->border_color[2];
- rgba[3][j] = sampler->state->border_color[3];
+ const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+ struct softpipe_context *sp = samp->sp;
+ const uint unit = samp->unit;
+ const struct pipe_texture *texture = sp->texture[unit];
+ const struct pipe_sampler_state *sampler = sp->sampler[unit];
+
+ if (x < 0 || x >= (int) texture->width[level] ||
+ y < 0 || y >= (int) texture->height[level] ||
+ z < 0 || z >= (int) texture->depth[level]) {
+ rgba[0][j] = sampler->border_color[0];
+ rgba[1][j] = sampler->border_color[1];
+ rgba[2][j] = sampler->border_color[2];
+ rgba[3][j] = sampler->border_color[3];
}
else {
const int tx = x % TILE_SIZE;
const int ty = y % TILE_SIZE;
const struct softpipe_cached_tile *tile
- = sp_get_cached_tile_tex(sampler->pipe, sampler->cache,
+ = sp_get_cached_tile_tex(sp, samp->cache,
x, y, z, face, level);
rgba[0][j] = tile->data.color[ty][tx][0];
rgba[1][j] = tile->data.color[ty][tx][1];
@@ -624,7 +695,7 @@ get_texel(struct tgsi_sampler *sampler,
{
debug_printf("Get texel %f %f %f %f from %s\n",
rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j],
- pf_name(sampler->texture->format));
+ pf_name(texture->format));
}
}
}
@@ -682,103 +753,124 @@ shadow_compare(uint compare_func,
* Could probably extend for 3D...
*/
static void
-sp_get_samples_2d_common(struct tgsi_sampler *sampler,
+sp_get_samples_2d_common(const struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
+ boolean computeLambda,
float lodbias,
float rgba[NUM_CHANNELS][QUAD_SIZE],
const unsigned faces[4])
{
- const uint compare_func = sampler->state->compare_func;
+ const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+ const struct softpipe_context *sp = samp->sp;
+ const uint unit = samp->unit;
+ const struct pipe_texture *texture = sp->texture[unit];
+ const struct pipe_sampler_state *sampler = sp->sampler[unit];
+ const uint compare_func = sampler->compare_func;
unsigned level0, level1, j, imgFilter;
int width, height;
float levelBlend;
- choose_mipmap_levels(sampler, s, t, p, lodbias,
+ choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias,
&level0, &level1, &levelBlend, &imgFilter);
- assert(sampler->state->normalized_coords);
+ assert(sampler->normalized_coords);
- width = sampler->texture->width[level0];
- height = sampler->texture->height[level0];
+ width = texture->width[level0];
+ height = texture->height[level0];
assert(width > 0);
switch (imgFilter) {
case PIPE_TEX_FILTER_NEAREST:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = nearest_texcoord(sampler->state->wrap_s, s[j], width);
- int y = nearest_texcoord(sampler->state->wrap_t, t[j], height);
- get_texel(sampler, faces[j], level0, x, y, 0, rgba, j);
- if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- shadow_compare(compare_func, rgba, p, j);
- }
-
- if (level0 != level1) {
- /* get texels from second mipmap level and blend */
- float rgba2[4][4];
- unsigned c;
- x = x / 2;
- y = y / 2;
- get_texel(sampler, faces[j], level1, x, y, 0, rgba2, j);
- if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
- shadow_compare(compare_func, rgba2, p, j);
+ {
+ int x[4], y[4];
+ nearest_texcoord_4(sampler->wrap_s, s, width, x);
+ nearest_texcoord_4(sampler->wrap_t, t, height, y);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ shadow_compare(compare_func, rgba, p, j);
}
- for (c = 0; c < NUM_CHANNELS; c++) {
- rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]);
+ if (level0 != level1) {
+ /* get texels from second mipmap level and blend */
+ float rgba2[4][4];
+ unsigned c;
+ x[j] /= 2;
+ y[j] /= 2;
+ get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0,
+ rgba2, j);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
+ shadow_compare(compare_func, rgba2, p, j);
+ }
+
+ for (c = 0; c < NUM_CHANNELS; c++) {
+ rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
+ }
}
}
}
break;
case PIPE_TEX_FILTER_LINEAR:
case PIPE_TEX_FILTER_ANISO:
- for (j = 0; j < QUAD_SIZE; j++) {
- float tx[4][4], a, b;
- int x0, y0, x1, y1, c;
- linear_texcoord(sampler->state->wrap_s, s[j], width, &x0, &x1, &a);
- linear_texcoord(sampler->state->wrap_t, t[j], height, &y0, &y1, &b);
- get_texel(sampler, faces[j], level0, x0, y0, 0, tx, 0);
- get_texel(sampler, faces[j], level0, x1, y0, 0, tx, 1);
- get_texel(sampler, faces[j], level0, x0, y1, 0, tx, 2);
- get_texel(sampler, faces[j], level0, x1, y1, 0, tx, 3);
- if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- shadow_compare(compare_func, tx, p, 0);
- shadow_compare(compare_func, tx, p, 1);
- shadow_compare(compare_func, tx, p, 2);
- shadow_compare(compare_func, tx, p, 3);
- }
-
- for (c = 0; c < 4; c++) {
- rgba[c][j] = lerp_2d(a, b, tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
- }
-
- if (level0 != level1) {
- /* get texels from second mipmap level and blend */
- float rgba2[4][4];
- x0 = x0 / 2;
- y0 = y0 / 2;
- x1 = x1 / 2;
- y1 = y1 / 2;
- get_texel(sampler, faces[j], level1, x0, y0, 0, tx, 0);
- get_texel(sampler, faces[j], level1, x1, y0, 0, tx, 1);
- get_texel(sampler, faces[j], level1, x0, y1, 0, tx, 2);
- get_texel(sampler, faces[j], level1, x1, y1, 0, tx, 3);
- if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
+ {
+ int x0[4], y0[4], x1[4], y1[4];
+ float xw[4], yw[4]; /* weights */
+
+ linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
+ linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ float tx[4][4]; /* texels */
+ int c;
+ get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0);
+ get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1);
+ get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2);
+ get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
shadow_compare(compare_func, tx, p, 0);
shadow_compare(compare_func, tx, p, 1);
shadow_compare(compare_func, tx, p, 2);
shadow_compare(compare_func, tx, p, 3);
}
+ /* interpolate R, G, B, A */
for (c = 0; c < 4; c++) {
- rgba2[c][j] = lerp_2d(a, b,
- tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
+ rgba[c][j] = lerp_2d(xw[j], yw[j],
+ tx[c][0], tx[c][1],
+ tx[c][2], tx[c][3]);
}
- for (c = 0; c < NUM_CHANNELS; c++) {
- rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]);
+ if (level0 != level1) {
+ /* get texels from second mipmap level and blend */
+ float rgba2[4][4];
+ x0[j] /= 2;
+ y0[j] /= 2;
+ x1[j] /= 2;
+ y1[j] /= 2;
+ get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0);
+ get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1);
+ get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2);
+ get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
+ shadow_compare(compare_func, tx, p, 0);
+ shadow_compare(compare_func, tx, p, 1);
+ shadow_compare(compare_func, tx, p, 2);
+ shadow_compare(compare_func, tx, p, 3);
+ }
+
+ /* interpolate R, G, B, A */
+ for (c = 0; c < 4; c++) {
+ rgba2[c][j] = lerp_2d(xw[j], yw[j],
+ tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
+ }
+
+ for (c = 0; c < NUM_CHANNELS; c++) {
+ rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
+ }
}
}
}
@@ -789,55 +881,65 @@ sp_get_samples_2d_common(struct tgsi_sampler *sampler,
}
-static void
-sp_get_samples_1d(struct tgsi_sampler *sampler,
+static INLINE void
+sp_get_samples_1d(const struct tgsi_sampler *sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
+ boolean computeLambda,
float lodbias,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
static const unsigned faces[4] = {0, 0, 0, 0};
static const float tzero[4] = {0, 0, 0, 0};
- sp_get_samples_2d_common(sampler, s, tzero, NULL, lodbias, rgba, faces);
+ sp_get_samples_2d_common(sampler, s, tzero, NULL,
+ computeLambda, lodbias, rgba, faces);
}
-static void
-sp_get_samples_2d(struct tgsi_sampler *sampler,
+static INLINE void
+sp_get_samples_2d(const struct tgsi_sampler *sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
+ boolean computeLambda,
float lodbias,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
static const unsigned faces[4] = {0, 0, 0, 0};
- sp_get_samples_2d_common(sampler, s, t, p, lodbias, rgba, faces);
+ sp_get_samples_2d_common(sampler, s, t, p,
+ computeLambda, lodbias, rgba, faces);
}
-static void
-sp_get_samples_3d(struct tgsi_sampler *sampler,
+static INLINE void
+sp_get_samples_3d(const struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
+ boolean computeLambda,
float lodbias,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
+ const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+ const struct softpipe_context *sp = samp->sp;
+ const uint unit = samp->unit;
+ const struct pipe_texture *texture = sp->texture[unit];
+ const struct pipe_sampler_state *sampler = sp->sampler[unit];
/* get/map pipe_surfaces corresponding to 3D tex slices */
unsigned level0, level1, j, imgFilter;
int width, height, depth;
float levelBlend;
const uint face = 0;
- choose_mipmap_levels(sampler, s, t, p, lodbias,
+ choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias,
&level0, &level1, &levelBlend, &imgFilter);
- assert(sampler->state->normalized_coords);
+ assert(sampler->normalized_coords);
- width = sampler->texture->width[level0];
- height = sampler->texture->height[level0];
- depth = sampler->texture->depth[level0];
+ width = texture->width[level0];
+ height = texture->height[level0];
+ depth = texture->depth[level0];
assert(width > 0);
assert(height > 0);
@@ -845,89 +947,89 @@ sp_get_samples_3d(struct tgsi_sampler *sampler,
switch (imgFilter) {
case PIPE_TEX_FILTER_NEAREST:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = nearest_texcoord(sampler->state->wrap_s, s[j], width);
- int y = nearest_texcoord(sampler->state->wrap_t, t[j], height);
- int z = nearest_texcoord(sampler->state->wrap_r, p[j], depth);
- get_texel(sampler, face, level0, x, y, z, rgba, j);
-
- if (level0 != level1) {
- /* get texels from second mipmap level and blend */
- float rgba2[4][4];
- unsigned c;
- x /= 2;
- y /= 2;
- z /= 2;
- get_texel(sampler, face, level1, x, y, z, rgba2, j);
- for (c = 0; c < NUM_CHANNELS; c++) {
- rgba[c][j] = LERP(levelBlend, rgba2[c][j], rgba[c][j]);
+ {
+ int x[4], y[4], z[4];
+ nearest_texcoord_4(sampler->wrap_s, s, width, x);
+ nearest_texcoord_4(sampler->wrap_t, t, height, y);
+ nearest_texcoord_4(sampler->wrap_r, p, depth, z);
+ for (j = 0; j < QUAD_SIZE; j++) {
+ get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j);
+ if (level0 != level1) {
+ /* get texels from second mipmap level and blend */
+ float rgba2[4][4];
+ unsigned c;
+ x[j] /= 2;
+ y[j] /= 2;
+ z[j] /= 2;
+ get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j);
+ for (c = 0; c < NUM_CHANNELS; c++) {
+ rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]);
+ }
}
}
}
break;
case PIPE_TEX_FILTER_LINEAR:
case PIPE_TEX_FILTER_ANISO:
- for (j = 0; j < QUAD_SIZE; j++) {
- float texel0[4][4], texel1[4][4];
- float xw, yw, zw; /* interpolation weights */
- int x0, x1, y0, y1, z0, z1, c;
- linear_texcoord(sampler->state->wrap_s, s[j], width, &x0, &x1, &xw);
- linear_texcoord(sampler->state->wrap_t, t[j], height, &y0, &y1, &yw);
- linear_texcoord(sampler->state->wrap_r, p[j], depth, &z0, &z1, &zw);
- get_texel(sampler, face, level0, x0, y0, z0, texel0, 0);
- get_texel(sampler, face, level0, x1, y0, z0, texel0, 1);
- get_texel(sampler, face, level0, x0, y1, z0, texel0, 2);
- get_texel(sampler, face, level0, x1, y1, z0, texel0, 3);
- get_texel(sampler, face, level0, x0, y0, z1, texel1, 0);
- get_texel(sampler, face, level0, x1, y0, z1, texel1, 1);
- get_texel(sampler, face, level0, x0, y1, z1, texel1, 2);
- get_texel(sampler, face, level0, x1, y1, z1, texel1, 3);
-
- /* 3D lerp */
- for (c = 0; c < 4; c++) {
- float ctemp0[4][4], ctemp1[4][4];
- ctemp0[c][j] = lerp_2d(xw, yw,
- texel0[c][0], texel0[c][1],
- texel0[c][2], texel0[c][3]);
- ctemp1[c][j] = lerp_2d(xw, yw,
- texel1[c][0], texel1[c][1],
- texel1[c][2], texel1[c][3]);
- rgba[c][j] = LERP(zw, ctemp0[c][j], ctemp1[c][j]);
- }
-
- if (level0 != level1) {
- /* get texels from second mipmap level and blend */
- float rgba2[4][4];
- x0 /= 2;
- y0 /= 2;
- z0 /= 2;
- x1 /= 2;
- y1 /= 2;
- z1 /= 2;
- get_texel(sampler, face, level1, x0, y0, z0, texel0, 0);
- get_texel(sampler, face, level1, x1, y0, z0, texel0, 1);
- get_texel(sampler, face, level1, x0, y1, z0, texel0, 2);
- get_texel(sampler, face, level1, x1, y1, z0, texel0, 3);
- get_texel(sampler, face, level1, x0, y0, z1, texel1, 0);
- get_texel(sampler, face, level1, x1, y0, z1, texel1, 1);
- get_texel(sampler, face, level1, x0, y1, z1, texel1, 2);
- get_texel(sampler, face, level1, x1, y1, z1, texel1, 3);
-
- /* 3D lerp */
+ {
+ int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
+ float xw[4], yw[4], zw[4]; /* interpolation weights */
+ linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
+ linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
+ linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int c;
+ float tx0[4][4], tx1[4][4];
+ get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0);
+ get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1);
+ get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2);
+ get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3);
+ get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0);
+ get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1);
+ get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2);
+ get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3);
+
+ /* interpolate R, G, B, A */
for (c = 0; c < 4; c++) {
- float ctemp0[4][4], ctemp1[4][4];
- ctemp0[c][j] = lerp_2d(xw, yw,
- texel0[c][0], texel0[c][1],
- texel0[c][2], texel0[c][3]);
- ctemp1[c][j] = lerp_2d(xw, yw,
- texel1[c][0], texel1[c][1],
- texel1[c][2], texel1[c][3]);
- rgba2[c][j] = LERP(zw, ctemp0[c][j], ctemp1[c][j]);
+ rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
+ tx0[c][0], tx0[c][1],
+ tx0[c][2], tx0[c][3],
+ tx1[c][0], tx1[c][1],
+ tx1[c][2], tx1[c][3]);
}
- /* blend mipmap levels */
- for (c = 0; c < NUM_CHANNELS; c++) {
- rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]);
+ if (level0 != level1) {
+ /* get texels from second mipmap level and blend */
+ float rgba2[4][4];
+ x0[j] /= 2;
+ y0[j] /= 2;
+ z0[j] /= 2;
+ x1[j] /= 2;
+ y1[j] /= 2;
+ z1[j] /= 2;
+ get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0);
+ get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1);
+ get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2);
+ get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3);
+ get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0);
+ get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1);
+ get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2);
+ get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3);
+
+ /* interpolate R, G, B, A */
+ for (c = 0; c < 4; c++) {
+ rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j],
+ tx0[c][0], tx0[c][1],
+ tx0[c][2], tx0[c][3],
+ tx1[c][0], tx1[c][1],
+ tx1[c][2], tx1[c][3]);
+ }
+
+ /* blend mipmap levels */
+ for (c = 0; c < NUM_CHANNELS; c++) {
+ rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
+ }
}
}
}
@@ -939,10 +1041,11 @@ sp_get_samples_3d(struct tgsi_sampler *sampler,
static void
-sp_get_samples_cube(struct tgsi_sampler *sampler,
+sp_get_samples_cube(const struct tgsi_sampler *sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
+ boolean computeLambda,
float lodbias,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
@@ -951,67 +1054,80 @@ sp_get_samples_cube(struct tgsi_sampler *sampler,
for (j = 0; j < QUAD_SIZE; j++) {
faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j);
}
- sp_get_samples_2d_common(sampler, ssss, tttt, NULL, lodbias, rgba, faces);
+ sp_get_samples_2d_common(sampler, ssss, tttt, NULL,
+ computeLambda, lodbias, rgba, faces);
}
static void
-sp_get_samples_rect(struct tgsi_sampler *sampler,
+sp_get_samples_rect(const struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
+ boolean computeLambda,
float lodbias,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- //sp_get_samples_2d_common(sampler, s, t, p, lodbias, rgba, faces);
- static const uint face = 0;
- const uint compare_func = sampler->state->compare_func;
+ const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+ const struct softpipe_context *sp = samp->sp;
+ const uint unit = samp->unit;
+ const struct pipe_texture *texture = sp->texture[unit];
+ const struct pipe_sampler_state *sampler = sp->sampler[unit];
+ const uint face = 0;
+ const uint compare_func = sampler->compare_func;
unsigned level0, level1, j, imgFilter;
int width, height;
float levelBlend;
- choose_mipmap_levels(sampler, s, t, p, lodbias,
+ choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias,
&level0, &level1, &levelBlend, &imgFilter);
/* texture RECTS cannot be mipmapped */
assert(level0 == level1);
- width = sampler->texture->width[level0];
- height = sampler->texture->height[level0];
+ width = texture->width[level0];
+ height = texture->height[level0];
assert(width > 0);
switch (imgFilter) {
case PIPE_TEX_FILTER_NEAREST:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = nearest_texcoord_unnorm(sampler->state->wrap_s, s[j], width);
- int y = nearest_texcoord_unnorm(sampler->state->wrap_t, t[j], height);
- get_texel(sampler, face, level0, x, y, 0, rgba, j);
- if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- shadow_compare(compare_func, rgba, p, j);
+ {
+ int x[4], y[4];
+ nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x);
+ nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y);
+ for (j = 0; j < QUAD_SIZE; j++) {
+ get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ shadow_compare(compare_func, rgba, p, j);
+ }
}
}
break;
case PIPE_TEX_FILTER_LINEAR:
case PIPE_TEX_FILTER_ANISO:
- for (j = 0; j < QUAD_SIZE; j++) {
- float tx[4][4], a, b;
- int x0, y0, x1, y1, c;
- linear_texcoord_unnorm(sampler->state->wrap_s, s[j], width, &x0, &x1, &a);
- linear_texcoord_unnorm(sampler->state->wrap_t, t[j], height, &y0, &y1, &b);
- get_texel(sampler, face, level0, x0, y0, 0, tx, 0);
- get_texel(sampler, face, level0, x1, y0, 0, tx, 1);
- get_texel(sampler, face, level0, x0, y1, 0, tx, 2);
- get_texel(sampler, face, level0, x1, y1, 0, tx, 3);
- if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- shadow_compare(compare_func, tx, p, 0);
- shadow_compare(compare_func, tx, p, 1);
- shadow_compare(compare_func, tx, p, 2);
- shadow_compare(compare_func, tx, p, 3);
- }
-
- for (c = 0; c < 4; c++) {
- rgba[c][j] = lerp_2d(a, b, tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
+ {
+ int x0[4], y0[4], x1[4], y1[4];
+ float xw[4], yw[4]; /* weights */
+ linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw);
+ linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw);
+ for (j = 0; j < QUAD_SIZE; j++) {
+ float tx[4][4]; /* texels */
+ int c;
+ get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0);
+ get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1);
+ get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2);
+ get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ shadow_compare(compare_func, tx, p, 0);
+ shadow_compare(compare_func, tx, p, 1);
+ shadow_compare(compare_func, tx, p, 2);
+ shadow_compare(compare_func, tx, p, 3);
+ }
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = lerp_2d(xw[j], yw[j],
+ tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
+ }
}
}
break;
@@ -1021,49 +1137,45 @@ sp_get_samples_rect(struct tgsi_sampler *sampler,
}
-
-
/**
- * Called via tgsi_sampler::get_samples()
- * Use the sampler's state setting to get a filtered RGBA value
- * from the sampler's texture.
- *
- * XXX we can implement many versions of this function, each
- * tightly coded for a specific combination of sampler state
- * (nearest + repeat), (bilinear mipmap + clamp), etc.
- *
- * The update_samplers() function in st_atom_sampler.c could create
- * a new tgsi_sampler object for each state combo it finds....
+ * Common code for vertex/fragment program texture sampling.
*/
-void
-sp_get_samples(struct tgsi_sampler *sampler,
+static INLINE void
+sp_get_samples(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
+ boolean computeLambda,
float lodbias,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- if (!sampler->texture)
+ const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+ const struct softpipe_context *sp = samp->sp;
+ const uint unit = samp->unit;
+ const struct pipe_texture *texture = sp->texture[unit];
+ const struct pipe_sampler_state *sampler = sp->sampler[unit];
+
+ if (!texture)
return;
- switch (sampler->texture->target) {
+ switch (texture->target) {
case PIPE_TEXTURE_1D:
- assert(sampler->state->normalized_coords);
- sp_get_samples_1d(sampler, s, t, p, lodbias, rgba);
+ assert(sampler->normalized_coords);
+ sp_get_samples_1d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
break;
case PIPE_TEXTURE_2D:
- if (sampler->state->normalized_coords)
- sp_get_samples_2d(sampler, s, t, p, lodbias, rgba);
+ if (sampler->normalized_coords)
+ sp_get_samples_2d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
else
- sp_get_samples_rect(sampler, s, t, p, lodbias, rgba);
+ sp_get_samples_rect(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
break;
case PIPE_TEXTURE_3D:
- assert(sampler->state->normalized_coords);
- sp_get_samples_3d(sampler, s, t, p, lodbias, rgba);
+ assert(sampler->normalized_coords);
+ sp_get_samples_3d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
break;
case PIPE_TEXTURE_CUBE:
- assert(sampler->state->normalized_coords);
- sp_get_samples_cube(sampler, s, t, p, lodbias, rgba);
+ assert(sampler->normalized_coords);
+ sp_get_samples_cube(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
break;
default:
assert(0);
@@ -1084,3 +1196,34 @@ sp_get_samples(struct tgsi_sampler *sampler,
#endif
}
+
+/**
+ * Called via tgsi_sampler::get_samples() when running a fragment shader.
+ * Get four filtered RGBA values from the sampler's texture.
+ */
+void
+sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ sp_get_samples(tgsi_sampler, s, t, p, TRUE, lodbias, rgba);
+}
+
+
+/**
+ * Called via tgsi_sampler::get_samples() when running a vertex shader.
+ * Get four filtered RGBA values from the sampler's texture.
+ */
+void
+sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ sp_get_samples(tgsi_sampler, s, t, p, FALSE, lodbias, rgba);
+}
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index 404bfd0c36..40d8eb2c2a 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -1,17 +1,73 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
#ifndef SP_TEX_SAMPLE_H
#define SP_TEX_SAMPLE_H
-struct tgsi_sampler;
+#include "tgsi/tgsi_exec.h"
+
+
+/**
+ * Subclass of tgsi_sampler
+ */
+struct sp_shader_sampler
+{
+ struct tgsi_sampler base; /**< base class */
+
+ uint unit;
+ struct softpipe_context *sp;
+ struct softpipe_tile_cache *cache;
+};
+
+static INLINE const struct sp_shader_sampler *
+sp_shader_sampler(const struct tgsi_sampler *sampler)
+{
+ return (const struct sp_shader_sampler *) sampler;
+}
+
+
+extern void
+sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE]);
+
extern void
-sp_get_samples(struct tgsi_sampler *sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE]);
+sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE]);
#endif /* SP_TEX_SAMPLE_H */
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index a64dc89f43..28a9784b16 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -33,7 +33,7 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -94,49 +94,23 @@ softpipe_texture_layout(struct pipe_screen *screen,
return spt->buffer != NULL;
}
-/* Hack it up to use the old winsys->surface_alloc_storage()
- * method for now:
- */
static boolean
softpipe_displaytarget_layout(struct pipe_screen *screen,
struct softpipe_texture * spt)
{
struct pipe_winsys *ws = screen->winsys;
- struct pipe_surface surf;
- unsigned flags = (PIPE_BUFFER_USAGE_CPU_READ |
- PIPE_BUFFER_USAGE_CPU_WRITE |
- PIPE_BUFFER_USAGE_GPU_READ |
- PIPE_BUFFER_USAGE_GPU_WRITE);
- int ret;
-
-
- memset(&surf, 0, sizeof(surf));
-
- ret =ws->surface_alloc_storage( ws,
- &surf,
- spt->base.width[0],
- spt->base.height[0],
- spt->base.format,
- flags,
- spt->base.tex_usage);
- if(ret != 0)
- return FALSE;
-
- if (!surf.buffer) {
- /* allocation failed */
- return FALSE;
- }
+ unsigned usage = (PIPE_BUFFER_USAGE_CPU_READ_WRITE |
+ PIPE_BUFFER_USAGE_GPU_READ_WRITE);
- /* Now extract the goodies:
- */
spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]);
spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]);
- spt->stride[0] = surf.stride;
- /* Transfer the reference:
- */
- spt->buffer = surf.buffer;
- surf.buffer = NULL;
+ spt->buffer = ws->surface_buffer_create( ws,
+ spt->base.width[0],
+ spt->base.height[0],
+ spt->base.format,
+ usage,
+ &spt->stride[0]);
return spt->buffer != NULL;
}
@@ -231,28 +205,21 @@ softpipe_get_tex_surface(struct pipe_screen *screen,
unsigned face, unsigned level, unsigned zslice,
unsigned usage)
{
- struct pipe_winsys *ws = screen->winsys;
struct softpipe_texture *spt = softpipe_texture(pt);
struct pipe_surface *ps;
assert(level <= pt->last_level);
ps = CALLOC_STRUCT(pipe_surface);
- ps->refcount = 1;
if (ps) {
- assert(ps->refcount);
+ ps->refcount = 1;
pipe_texture_reference(&ps->texture, pt);
- pipe_buffer_reference(screen, &ps->buffer, spt->buffer);
ps->format = pt->format;
- ps->block = pt->block;
ps->width = pt->width[level];
ps->height = pt->height[level];
- ps->nblocksx = pt->nblocksx[level];
- ps->nblocksy = pt->nblocksy[level];
- ps->stride = spt->stride[level];
ps->offset = spt->level_offset[level];
ps->usage = usage;
-
+
/* Because we are softpipe, anything that the state tracker
* thought was going to be done with the GPU will actually get
* done with the CPU. Let's adjust the flags to take that into
@@ -278,8 +245,7 @@ softpipe_get_tex_surface(struct pipe_screen *screen,
if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) {
ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) *
- ps->nblocksy *
- ps->stride;
+ pt->nblocksy[level] * spt->stride[level];
}
else {
assert(face == 0);
@@ -299,37 +265,108 @@ softpipe_tex_surface_release(struct pipe_screen *screen,
* needed post-processing to put them into hardware layout, this is
* where it would happen. For softpipe, nothing to do.
*/
- assert ((*s)->texture);
+ assert(surf->texture);
if (--surf->refcount == 0) {
- pipe_texture_reference(&surf->texture, NULL);
- pipe_buffer_reference(screen, &surf->buffer, NULL);
+ pipe_texture_reference(&surf->texture, NULL);
FREE(surf);
}
*s = NULL;
}
+static struct pipe_transfer *
+softpipe_get_tex_transfer(struct pipe_screen *screen,
+ struct pipe_texture *texture,
+ unsigned face, unsigned level, unsigned zslice,
+ enum pipe_transfer_usage usage,
+ unsigned x, unsigned y, unsigned w, unsigned h)
+{
+ struct softpipe_texture *sptex = softpipe_texture(texture);
+ struct softpipe_transfer *spt;
+ struct pipe_transfer *pt;
+
+ assert(texture);
+ assert(level <= texture->last_level);
+
+ spt = CALLOC_STRUCT(softpipe_transfer);
+ pt = &spt->base;
+ if (spt) {
+ pt->refcount = 1;
+ pipe_texture_reference(&pt->texture, texture);
+ pt->format = texture->format;
+ pt->block = texture->block;
+ pt->x = x;
+ pt->y = y;
+ pt->width = w;
+ pt->height = h;
+ pt->nblocksx = texture->nblocksx[level];
+ pt->nblocksy = texture->nblocksy[level];
+ pt->stride = sptex->stride[level];
+ spt->offset = sptex->level_offset[level];
+ pt->usage = usage;
+ pt->face = face;
+ pt->level = level;
+ pt->zslice = zslice;
+
+ if (texture->target == PIPE_TEXTURE_CUBE ||
+ texture->target == PIPE_TEXTURE_3D) {
+ spt->offset += ((texture->target == PIPE_TEXTURE_CUBE) ? face :
+ zslice) * pt->nblocksy * pt->stride;
+ }
+ else {
+ assert(face == 0);
+ assert(zslice == 0);
+ }
+ }
+ return pt;
+}
+
+
+static void
+softpipe_tex_transfer_release(struct pipe_screen *screen,
+ struct pipe_transfer **t)
+{
+ struct softpipe_transfer *transfer = softpipe_transfer(*t);
+ /* Effectively do the texture_update work here - if texture images
+ * needed post-processing to put them into hardware layout, this is
+ * where it would happen. For softpipe, nothing to do.
+ */
+ assert (transfer->base.texture);
+ if (--transfer->base.refcount == 0) {
+ pipe_texture_reference(&transfer->base.texture, NULL);
+ FREE(transfer);
+ }
+ *t = NULL;
+}
+
+
static void *
-softpipe_surface_map( struct pipe_screen *screen,
- struct pipe_surface *surface,
- unsigned flags )
+softpipe_transfer_map( struct pipe_screen *screen,
+ struct pipe_transfer *transfer )
{
ubyte *map;
+ struct softpipe_texture *spt;
+ unsigned flags = 0;
- if (flags & ~surface->usage) {
- assert(0);
- return NULL;
+ assert(transfer->texture);
+ spt = softpipe_texture(transfer->texture);
+
+ if (transfer->usage != PIPE_TRANSFER_READ) {
+ flags |= PIPE_BUFFER_USAGE_CPU_WRITE;
+ }
+
+ if (transfer->usage != PIPE_TRANSFER_WRITE) {
+ flags |= PIPE_BUFFER_USAGE_CPU_READ;
}
- map = pipe_buffer_map( screen, surface->buffer, flags );
+ map = pipe_buffer_map(screen, spt->buffer, flags);
if (map == NULL)
return NULL;
/* May want to different things here depending on read/write nature
* of the map:
*/
- if (surface->texture &&
- (flags & PIPE_BUFFER_USAGE_CPU_WRITE))
+ if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ)
{
/* Do something to notify sharing contexts of a texture change.
* In softpipe, that would mean flushing the texture cache.
@@ -337,15 +374,22 @@ softpipe_surface_map( struct pipe_screen *screen,
softpipe_screen(screen)->timestamp++;
}
- return map + surface->offset;
+ return map + softpipe_transfer(transfer)->offset +
+ transfer->y / transfer->block.height * transfer->stride +
+ transfer->x / transfer->block.width * transfer->block.size;
}
static void
-softpipe_surface_unmap(struct pipe_screen *screen,
- struct pipe_surface *surface)
+softpipe_transfer_unmap(struct pipe_screen *screen,
+ struct pipe_transfer *transfer)
{
- pipe_buffer_unmap( screen, surface->buffer );
+ struct softpipe_texture *spt;
+
+ assert(transfer->texture);
+ spt = softpipe_texture(transfer->texture);
+
+ pipe_buffer_unmap( screen, spt->buffer );
}
@@ -365,6 +409,8 @@ softpipe_init_screen_texture_funcs(struct pipe_screen *screen)
screen->get_tex_surface = softpipe_get_tex_surface;
screen->tex_surface_release = softpipe_tex_surface_release;
- screen->surface_map = softpipe_surface_map;
- screen->surface_unmap = softpipe_surface_unmap;
+ screen->get_tex_transfer = softpipe_get_tex_transfer;
+ screen->tex_transfer_release = softpipe_tex_transfer_release;
+ screen->transfer_map = softpipe_transfer_map;
+ screen->transfer_unmap = softpipe_transfer_unmap;
}
diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h
index bf437a7c61..893aa7d11d 100644
--- a/src/gallium/drivers/softpipe/sp_texture.h
+++ b/src/gallium/drivers/softpipe/sp_texture.h
@@ -42,7 +42,7 @@ struct softpipe_texture
struct pipe_texture base;
unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS];
- unsigned long stride[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned stride[PIPE_MAX_TEXTURE_LEVELS];
/* The data is held here:
*/
@@ -51,14 +51,27 @@ struct softpipe_texture
boolean modified;
};
+struct softpipe_transfer
+{
+ struct pipe_transfer base;
+
+ unsigned long offset;
+};
+
-/** cast wrapper */
+/** cast wrappers */
static INLINE struct softpipe_texture *
softpipe_texture(struct pipe_texture *pt)
{
return (struct softpipe_texture *) pt;
}
+static INLINE struct softpipe_transfer *
+softpipe_transfer(struct pipe_transfer *pt)
+{
+ return (struct softpipe_transfer *) pt;
+}
+
extern void
softpipe_init_texture_funcs( struct softpipe_context *softpipe );
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index b50c984513..593360aab0 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -26,7 +26,7 @@
**************************************************************************/
/**
- * Framebuffer/surface tile caching.
+ * Texture tile caching.
*
* Author:
* Brian Paul
@@ -40,7 +40,7 @@
#include "sp_texture.h"
#include "sp_tile_cache.h"
-#define NUM_ENTRIES 32
+#define NUM_ENTRIES 50
/** XXX move these */
@@ -52,7 +52,8 @@ struct softpipe_tile_cache
{
struct pipe_screen *screen;
struct pipe_surface *surface; /**< the surface we're caching */
- void *surface_map;
+ struct pipe_transfer *transfer;
+ void *transfer_map;
struct pipe_texture *texture; /**< if caching a texture */
struct softpipe_cached_tile entries[NUM_ENTRIES];
uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32];
@@ -60,8 +61,8 @@ struct softpipe_tile_cache
uint clear_val;
boolean depth_stencil; /** Is the surface a depth/stencil format? */
- struct pipe_surface *tex_surf;
- void *tex_surf_map;
+ struct pipe_transfer *tex_trans;
+ void *tex_trans_map;
int tex_face, tex_level, tex_z;
struct softpipe_cached_tile tile; /**< scratch tile for clears */
@@ -131,16 +132,19 @@ sp_create_tile_cache( struct pipe_screen *screen )
void
sp_destroy_tile_cache(struct softpipe_tile_cache *tc)
{
+ struct pipe_screen *screen;
uint pos;
for (pos = 0; pos < NUM_ENTRIES; pos++) {
/*assert(tc->entries[pos].x < 0);*/
}
- if (tc->surface) {
- pipe_surface_reference(&tc->surface, NULL);
+ if (tc->transfer) {
+ screen = tc->transfer->texture->screen;
+ screen->tex_transfer_release(screen, &tc->transfer);
}
- if (tc->tex_surf) {
- pipe_surface_reference(&tc->tex_surf, NULL);
+ if (tc->tex_trans) {
+ screen = tc->tex_trans->texture->screen;
+ screen->tex_transfer_release(screen, &tc->tex_trans);
}
FREE( tc );
@@ -156,18 +160,29 @@ sp_tile_cache_set_surface(struct softpipe_tile_cache *tc,
{
assert(!tc->texture);
- if (tc->surface_map) {
- tc->screen->surface_unmap(tc->screen, tc->surface);
- tc->surface_map = NULL;
+ if (tc->transfer) {
+ struct pipe_screen *screen = tc->transfer->texture->screen;
+
+ if (ps == tc->surface)
+ return;
+
+ if (tc->transfer_map) {
+ tc->screen->transfer_unmap(tc->screen, tc->transfer);
+ tc->transfer_map = NULL;
+ }
+
+ screen->tex_transfer_release(screen, &tc->transfer);
}
- pipe_surface_reference(&tc->surface, ps);
+ tc->surface = ps;
+
+ if (ps) {
+ struct pipe_screen *screen = ps->texture->screen;
- if (tc->surface) {
- if (tc->surface_map) /* XXX: this is always NULL!? */
- tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface,
- PIPE_BUFFER_USAGE_CPU_READ |
- PIPE_BUFFER_USAGE_CPU_WRITE);
+ tc->transfer = screen->get_tex_transfer(screen, ps->texture, ps->face,
+ ps->level, ps->zslice,
+ PIPE_TRANSFER_READ_WRITE,
+ 0, 0, ps->width, ps->height);
tc->depth_stencil = (ps->format == PIPE_FORMAT_S8Z24_UNORM ||
ps->format == PIPE_FORMAT_X8Z24_UNORM ||
@@ -181,7 +196,7 @@ sp_tile_cache_set_surface(struct softpipe_tile_cache *tc,
/**
- * Return the surface being cached.
+ * Return the transfer being cached.
*/
struct pipe_surface *
sp_tile_cache_get_surface(struct softpipe_tile_cache *tc)
@@ -191,30 +206,27 @@ sp_tile_cache_get_surface(struct softpipe_tile_cache *tc)
void
-sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc)
+sp_tile_cache_map_transfers(struct softpipe_tile_cache *tc)
{
- if (tc->surface && !tc->surface_map)
- tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface,
- PIPE_BUFFER_USAGE_CPU_WRITE |
- PIPE_BUFFER_USAGE_CPU_READ);
-
- if (tc->tex_surf && !tc->tex_surf_map)
- tc->tex_surf_map = tc->screen->surface_map(tc->screen, tc->tex_surf,
- PIPE_BUFFER_USAGE_CPU_READ);
+ if (tc->transfer && !tc->transfer_map)
+ tc->transfer_map = tc->screen->transfer_map(tc->screen, tc->transfer);
+
+ if (tc->tex_trans && !tc->tex_trans_map)
+ tc->tex_trans_map = tc->screen->transfer_map(tc->screen, tc->tex_trans);
}
void
-sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc)
+sp_tile_cache_unmap_transfers(struct softpipe_tile_cache *tc)
{
- if (tc->surface_map) {
- tc->screen->surface_unmap(tc->screen, tc->surface);
- tc->surface_map = NULL;
+ if (tc->transfer_map) {
+ tc->screen->transfer_unmap(tc->screen, tc->transfer);
+ tc->transfer_map = NULL;
}
- if (tc->tex_surf_map) {
- tc->screen->surface_unmap(tc->screen, tc->tex_surf);
- tc->tex_surf_map = NULL;
+ if (tc->tex_trans_map) {
+ tc->screen->transfer_unmap(tc->screen, tc->tex_trans);
+ tc->tex_trans_map = NULL;
}
}
@@ -229,15 +241,20 @@ sp_tile_cache_set_texture(struct pipe_context *pipe,
{
uint i;
- assert(!tc->surface);
+ assert(!tc->transfer);
pipe_texture_reference(&tc->texture, texture);
- if (tc->tex_surf_map) {
- tc->screen->surface_unmap(tc->screen, tc->tex_surf);
- tc->tex_surf_map = NULL;
+ if (tc->transfer) {
+ struct pipe_screen *screen = tc->transfer->texture->screen;
+
+ if (tc->tex_trans_map) {
+ tc->screen->transfer_unmap(tc->screen, tc->tex_trans);
+ tc->tex_trans_map = NULL;
+ }
+
+ screen->tex_transfer_release(screen, &tc->tex_trans);
}
- pipe_surface_reference(&tc->tex_surf, NULL);
/* mark as entries as invalid/empty */
/* XXX we should try to avoid this when the teximage hasn't changed */
@@ -328,20 +345,20 @@ static void
sp_tile_cache_flush_clear(struct pipe_context *pipe,
struct softpipe_tile_cache *tc)
{
- struct pipe_surface *ps = tc->surface;
- const uint w = tc->surface->width;
- const uint h = tc->surface->height;
+ struct pipe_transfer *pt = tc->transfer;
+ const uint w = tc->transfer->width;
+ const uint h = tc->transfer->height;
uint x, y;
uint numCleared = 0;
/* clear the scratch tile to the clear value */
- clear_tile(&tc->tile, ps->format, tc->clear_val);
+ clear_tile(&tc->tile, pt->format, tc->clear_val);
/* push the tile to all positions marked as clear */
for (y = 0; y < h; y += TILE_SIZE) {
for (x = 0; x < w; x += TILE_SIZE) {
if (is_clear_flag_set(tc->clear_flags, x, y)) {
- pipe_put_tile_raw(ps,
+ pipe_put_tile_raw(pt,
x, y, TILE_SIZE, TILE_SIZE,
tc->tile.data.color32, 0/*STRIDE*/);
@@ -359,28 +376,28 @@ sp_tile_cache_flush_clear(struct pipe_context *pipe,
/**
- * Flush the tile cache: write all dirty tiles back to the surface.
+ * Flush the tile cache: write all dirty tiles back to the transfer.
* any tiles "flagged" as cleared will be "really" cleared.
*/
void
sp_flush_tile_cache(struct softpipe_context *softpipe,
struct softpipe_tile_cache *tc)
{
- struct pipe_surface *ps = tc->surface;
+ struct pipe_transfer *pt = tc->transfer;
int inuse = 0, pos;
- if (ps && ps->buffer) {
- /* caching a drawing surface */
+ if (pt) {
+ /* caching a drawing transfer */
for (pos = 0; pos < NUM_ENTRIES; pos++) {
struct softpipe_cached_tile *tile = tc->entries + pos;
if (tile->x >= 0) {
if (tc->depth_stencil) {
- pipe_put_tile_raw(ps,
+ pipe_put_tile_raw(pt,
tile->x, tile->y, TILE_SIZE, TILE_SIZE,
tile->data.depth32, 0/*STRIDE*/);
}
else {
- pipe_put_tile_rgba(ps,
+ pipe_put_tile_rgba(pt,
tile->x, tile->y, TILE_SIZE, TILE_SIZE,
(float *) tile->data.color);
}
@@ -415,7 +432,7 @@ struct softpipe_cached_tile *
sp_get_cached_tile(struct softpipe_context *softpipe,
struct softpipe_tile_cache *tc, int x, int y)
{
- struct pipe_surface *ps = tc->surface;
+ struct pipe_transfer *pt = tc->transfer;
/* tile pos in framebuffer: */
const int tile_x = x & ~(TILE_SIZE - 1);
@@ -431,12 +448,12 @@ sp_get_cached_tile(struct softpipe_context *softpipe,
if (tile->x != -1) {
/* put dirty tile back in framebuffer */
if (tc->depth_stencil) {
- pipe_put_tile_raw(ps,
+ pipe_put_tile_raw(pt,
tile->x, tile->y, TILE_SIZE, TILE_SIZE,
tile->data.depth32, 0/*STRIDE*/);
}
else {
- pipe_put_tile_rgba(ps,
+ pipe_put_tile_rgba(pt,
tile->x, tile->y, TILE_SIZE, TILE_SIZE,
(float *) tile->data.color);
}
@@ -448,22 +465,22 @@ sp_get_cached_tile(struct softpipe_context *softpipe,
if (is_clear_flag_set(tc->clear_flags, x, y)) {
/* don't get tile from framebuffer, just clear it */
if (tc->depth_stencil) {
- clear_tile(tile, ps->format, tc->clear_val);
+ clear_tile(tile, pt->format, tc->clear_val);
}
else {
- clear_tile_rgba(tile, ps->format, tc->clear_color);
+ clear_tile_rgba(tile, pt->format, tc->clear_color);
}
clear_clear_flag(tc->clear_flags, x, y);
}
else {
- /* get new tile data from surface */
+ /* get new tile data from transfer */
if (tc->depth_stencil) {
- pipe_get_tile_raw(ps,
+ pipe_get_tile_raw(pt,
tile->x, tile->y, TILE_SIZE, TILE_SIZE,
tile->data.depth32, 0/*STRIDE*/);
}
else {
- pipe_get_tile_rgba(ps,
+ pipe_get_tile_rgba(pt,
tile->x, tile->y, TILE_SIZE, TILE_SIZE,
(float *) tile->data.color);
}
@@ -484,7 +501,7 @@ sp_get_cached_tile(struct softpipe_context *softpipe,
static INLINE uint
tex_cache_pos(int x, int y, int z, int face, int level)
{
- uint entry = x + y * 5 + z * 4 + face + level;
+ uint entry = x + y * 9 + z * 3 + face + level * 7;
return entry % NUM_ENTRIES;
}
@@ -494,11 +511,11 @@ tex_cache_pos(int x, int y, int z, int face, int level)
* Tiles are read-only and indexed with more params.
*/
const struct softpipe_cached_tile *
-sp_get_cached_tile_tex(struct pipe_context *pipe,
+sp_get_cached_tile_tex(struct softpipe_context *sp,
struct softpipe_tile_cache *tc, int x, int y, int z,
int face, int level)
{
- struct pipe_screen *screen = pipe->screen;
+ struct pipe_screen *screen = sp->pipe.screen;
/* tile pos in framebuffer: */
const int tile_x = x & ~(TILE_SIZE - 1);
const int tile_y = y & ~(TILE_SIZE - 1);
@@ -510,8 +527,12 @@ sp_get_cached_tile_tex(struct pipe_context *pipe,
if (tc->texture) {
struct softpipe_texture *spt = softpipe_texture(tc->texture);
if (spt->modified) {
- /* texture was modified, force a cache reload */
- tile->x = -1;
+ /* texture was modified, invalidate all cached tiles */
+ uint p;
+ for (p = 0; p < NUM_ENTRIES; p++) {
+ tile = tc->entries + p;
+ tile->x = -1;
+ }
spt->modified = FALSE;
}
}
@@ -523,28 +544,37 @@ sp_get_cached_tile_tex(struct pipe_context *pipe,
level != tile->level) {
/* cache miss */
- /* check if we need to get a new surface */
- if (!tc->tex_surf ||
+#if 0
+ printf("miss at %u x=%d y=%d z=%d face=%d level=%d\n", pos,
+ x/TILE_SIZE, y/TILE_SIZE, z, face, level);
+#endif
+ /* check if we need to get a new transfer */
+ if (!tc->tex_trans ||
tc->tex_face != face ||
tc->tex_level != level ||
tc->tex_z != z) {
- /* get new surface (view into texture) */
+ /* get new transfer (view into texture) */
+
+ if (tc->transfer) {
+ if (tc->tex_trans_map)
+ tc->screen->transfer_unmap(tc->screen, tc->tex_trans);
- if (tc->tex_surf_map)
- tc->screen->surface_unmap(tc->screen, tc->tex_surf);
+ screen->tex_transfer_release(screen, &tc->tex_trans);
+ }
- tc->tex_surf = screen->get_tex_surface(screen, tc->texture, face, level, z,
- PIPE_BUFFER_USAGE_CPU_READ);
- tc->tex_surf_map = screen->surface_map(screen, tc->tex_surf,
- PIPE_BUFFER_USAGE_CPU_READ);
+ tc->tex_trans = screen->get_tex_transfer(screen, tc->texture, face, level, z,
+ PIPE_TRANSFER_READ, 0, 0,
+ tc->texture->width[level],
+ tc->texture->height[level]);
+ tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans);
tc->tex_face = face;
tc->tex_level = level;
tc->tex_z = z;
}
- /* get tile from the surface (view into texture) */
- pipe_get_tile_rgba(tc->tex_surf,
+ /* get tile from the transfer (view into texture) */
+ pipe_get_tile_rgba(tc->tex_trans,
tile_x, tile_y, TILE_SIZE, TILE_SIZE,
(float *) tile->data.color);
tile->x = tile_x;
@@ -571,7 +601,7 @@ sp_tile_cache_clear(struct softpipe_tile_cache *tc, uint clearValue)
tc->clear_val = clearValue;
- switch (tc->surface->format) {
+ switch (tc->transfer->format) {
case PIPE_FORMAT_R8G8B8A8_UNORM:
r = (clearValue >> 24) & 0xff;
g = (clearValue >> 16) & 0xff;
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h
index bc96c941f6..9ac3fdda94 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.h
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.h
@@ -74,10 +74,10 @@ extern struct pipe_surface *
sp_tile_cache_get_surface(struct softpipe_tile_cache *tc);
extern void
-sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc);
+sp_tile_cache_map_transfers(struct softpipe_tile_cache *tc);
extern void
-sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc);
+sp_tile_cache_unmap_transfers(struct softpipe_tile_cache *tc);
extern void
sp_tile_cache_set_texture(struct pipe_context *pipe,
@@ -96,7 +96,7 @@ sp_get_cached_tile(struct softpipe_context *softpipe,
struct softpipe_tile_cache *tc, int x, int y);
extern const struct softpipe_cached_tile *
-sp_get_cached_tile_tex(struct pipe_context *pipe,
+sp_get_cached_tile_tex(struct softpipe_context *softpipe,
struct softpipe_tile_cache *tc, int x, int y, int z,
int face, int level);
diff --git a/src/gallium/drivers/trace/Makefile b/src/gallium/drivers/trace/Makefile
new file mode 100644
index 0000000000..e1bd970937
--- /dev/null
+++ b/src/gallium/drivers/trace/Makefile
@@ -0,0 +1,14 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = trace
+
+C_SOURCES = \
+ tr_context.c \
+ tr_dump.c \
+ tr_screen.c \
+ tr_state.c \
+ tr_texture.c \
+ tr_winsys.c
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 1dd7719379..ec8be27077 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -274,11 +274,11 @@ static INLINE boolean
trace_context_get_query_result(struct pipe_context *_pipe,
struct pipe_query *query,
boolean wait,
- uint64 *presult)
+ uint64_t *presult)
{
struct trace_context *tr_ctx = trace_context(_pipe);
struct pipe_context *pipe = tr_ctx->pipe;
- uint64 result;
+ uint64_t result;
boolean _result;
trace_dump_call_begin("pipe_context", "get_query_result");
@@ -722,9 +722,9 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe,
/* Unwrap the input state */
memcpy(&unwrapped_state, state, sizeof(unwrapped_state));
- for(i = 0; i < state->num_cbufs; ++i)
+ for(i = 0; i < state->nr_cbufs; ++i)
unwrapped_state.cbufs[i] = trace_surface_unwrap(tr_ctx, state->cbufs[i]);
- for(i = state->num_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i)
+ for(i = state->nr_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i)
unwrapped_state.cbufs[i] = NULL;
unwrapped_state.zsbuf = trace_surface_unwrap(tr_ctx, state->zsbuf);
state = &unwrapped_state;
diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h
index 7831900ec2..6704175964 100644
--- a/src/gallium/drivers/trace/tr_context.h
+++ b/src/gallium/drivers/trace/tr_context.h
@@ -30,7 +30,7 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_context.h"
diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c
index a0ead0ded3..d98cef221b 100644
--- a/src/gallium/drivers/trace/tr_dump.c
+++ b/src/gallium/drivers/trace/tr_dump.c
@@ -45,7 +45,7 @@
#endif
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "util/u_stream.h"
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index 8789f86b1a..164c6bbc4d 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -315,26 +315,101 @@ trace_screen_tex_surface_release(struct pipe_screen *_screen,
}
+static struct pipe_transfer *
+trace_screen_get_tex_transfer(struct pipe_screen *_screen,
+ struct pipe_texture *texture,
+ unsigned face, unsigned level,
+ unsigned zslice,
+ enum pipe_transfer_usage usage,
+ unsigned x, unsigned y, unsigned w, unsigned h)
+{
+ struct trace_screen *tr_scr = trace_screen(_screen);
+ struct pipe_screen *screen = tr_scr->screen;
+ struct trace_texture *tr_tex;
+ struct pipe_transfer *result;
+
+ assert(texture);
+ tr_tex = trace_texture(tr_scr, texture);
+ texture = tr_tex->texture;
+ assert(texture->screen == screen);
+
+ trace_dump_call_begin("pipe_screen", "get_tex_transfer");
+
+ trace_dump_arg(ptr, screen);
+ trace_dump_arg(ptr, texture);
+ trace_dump_arg(uint, face);
+ trace_dump_arg(uint, level);
+ trace_dump_arg(uint, zslice);
+ trace_dump_arg(uint, usage);
+
+ result = screen->get_tex_transfer(screen, texture, face, level, zslice, usage,
+ x, y, w, h);
+
+ trace_dump_ret(ptr, result);
+
+ trace_dump_call_end();
+
+ result = trace_transfer_create(tr_tex, result);
+
+ return result;
+}
+
+
+static void
+trace_screen_tex_transfer_release(struct pipe_screen *_screen,
+ struct pipe_transfer **ptransfer)
+{
+ struct trace_screen *tr_scr = trace_screen(_screen);
+ struct pipe_screen *screen = tr_scr->screen;
+ struct trace_texture *tr_tex;
+ struct trace_transfer *tr_trans;
+ struct pipe_transfer *transfer;
+
+ assert(ptransfer);
+ if(*ptransfer) {
+ tr_tex = trace_texture(tr_scr, (*ptransfer)->texture);
+ tr_trans = trace_transfer(tr_tex, *ptransfer);
+ transfer = tr_trans->transfer;
+ }
+ else
+ transfer = NULL;
+
+ if (*ptransfer) {
+ if (!--(*ptransfer)->refcount) {
+ trace_dump_call_begin("pipe_screen", "tex_transfer_destroy");
+
+ trace_dump_arg(ptr, screen);
+ trace_dump_arg(ptr, transfer);
+
+ trace_transfer_destroy(tr_tex, *ptransfer);
+
+ trace_dump_call_end();
+ }
+
+ *ptransfer = NULL;
+ }
+}
+
+
static void *
-trace_screen_surface_map(struct pipe_screen *_screen,
- struct pipe_surface *surface,
- unsigned flags)
+trace_screen_transfer_map(struct pipe_screen *_screen,
+ struct pipe_transfer *transfer)
{
struct trace_screen *tr_scr = trace_screen(_screen);
struct pipe_screen *screen = tr_scr->screen;
struct trace_texture *tr_tex;
- struct trace_surface *tr_surf;
+ struct trace_transfer *tr_trans;
void *map;
- tr_tex = trace_texture(tr_scr, surface->texture);
- tr_surf = trace_surface(tr_tex, surface);
- surface = tr_surf->surface;
+ tr_tex = trace_texture(tr_scr, transfer->texture);
+ tr_trans = trace_transfer(tr_tex, transfer);
+ transfer = tr_trans->transfer;
- map = screen->surface_map(screen, surface, flags);
+ map = screen->transfer_map(screen, transfer);
if(map) {
- if(flags & PIPE_BUFFER_USAGE_CPU_WRITE) {
- assert(!tr_surf->map);
- tr_surf->map = map;
+ if(transfer->usage != PIPE_TRANSFER_READ) {
+ assert(!tr_trans->map);
+ tr_trans->map = map;
}
}
@@ -343,33 +418,33 @@ trace_screen_surface_map(struct pipe_screen *_screen,
static void
-trace_screen_surface_unmap(struct pipe_screen *_screen,
- struct pipe_surface *surface)
+trace_screen_transfer_unmap(struct pipe_screen *_screen,
+ struct pipe_transfer *transfer)
{
struct trace_screen *tr_scr = trace_screen(_screen);
struct pipe_screen *screen = tr_scr->screen;
struct trace_texture *tr_tex;
- struct trace_surface *tr_surf;
+ struct trace_transfer *tr_trans;
- tr_tex = trace_texture(tr_scr, surface->texture);
- tr_surf = trace_surface(tr_tex, surface);
- surface = tr_surf->surface;
+ tr_tex = trace_texture(tr_scr, transfer->texture);
+ tr_trans = trace_transfer(tr_tex, transfer);
+ transfer = tr_trans->transfer;
- if(tr_surf->map) {
- size_t size = surface->nblocksy * surface->stride;
+ if(tr_trans->map) {
+ size_t size = transfer->nblocksy * transfer->stride;
- trace_dump_call_begin("pipe_winsys", "surface_write");
+ trace_dump_call_begin("pipe_winsys", "transfer_write");
trace_dump_arg(ptr, screen);
- trace_dump_arg(ptr, surface);
+ trace_dump_arg(ptr, transfer);
trace_dump_arg_begin("data");
- trace_dump_bytes(tr_surf->map, size);
+ trace_dump_bytes(tr_trans->map, size);
trace_dump_arg_end();
trace_dump_arg_begin("stride");
- trace_dump_uint(surface->stride);
+ trace_dump_uint(transfer->stride);
trace_dump_arg_end();
trace_dump_arg_begin("size");
@@ -378,10 +453,10 @@ trace_screen_surface_unmap(struct pipe_screen *_screen,
trace_dump_call_end();
- tr_surf->map = NULL;
+ tr_trans->map = NULL;
}
- screen->surface_unmap(screen, surface);
+ screen->transfer_unmap(screen, transfer);
}
@@ -437,8 +512,10 @@ trace_screen_create(struct pipe_screen *screen)
tr_scr->base.texture_release = trace_screen_texture_release;
tr_scr->base.get_tex_surface = trace_screen_get_tex_surface;
tr_scr->base.tex_surface_release = trace_screen_tex_surface_release;
- tr_scr->base.surface_map = trace_screen_surface_map;
- tr_scr->base.surface_unmap = trace_screen_surface_unmap;
+ tr_scr->base.get_tex_transfer = trace_screen_get_tex_transfer;
+ tr_scr->base.tex_transfer_release = trace_screen_tex_transfer_release;
+ tr_scr->base.transfer_map = trace_screen_transfer_map;
+ tr_scr->base.transfer_unmap = trace_screen_transfer_unmap;
tr_scr->screen = screen;
diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c
index 986d939e0c..81a9e2376e 100644
--- a/src/gallium/drivers/trace/tr_state.c
+++ b/src/gallium/drivers/trace/tr_state.c
@@ -223,7 +223,6 @@ void trace_dump_constant_buffer(const struct pipe_constant_buffer *state)
trace_dump_struct_begin("pipe_constant_buffer");
trace_dump_member(ptr, state, buffer);
- trace_dump_member(uint, state, size);
trace_dump_struct_end();
}
@@ -280,9 +279,9 @@ void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_
trace_dump_member(uint, &state->stencil[i], fail_op);
trace_dump_member(uint, &state->stencil[i], zpass_op);
trace_dump_member(uint, &state->stencil[i], zfail_op);
- trace_dump_member(uint, &state->stencil[i], ref_value);
- trace_dump_member(uint, &state->stencil[i], value_mask);
- trace_dump_member(uint, &state->stencil[i], write_mask);
+ trace_dump_member(uint, &state->stencil[i], ref_value);
+ trace_dump_member(uint, &state->stencil[i], valuemask);
+ trace_dump_member(uint, &state->stencil[i], writemask);
trace_dump_struct_end();
trace_dump_elem_end();
}
@@ -293,7 +292,7 @@ void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_
trace_dump_struct_begin("pipe_alpha_state");
trace_dump_member(bool, &state->alpha, enabled);
trace_dump_member(uint, &state->alpha, func);
- trace_dump_member(float, &state->alpha, ref);
+ trace_dump_member(float, &state->alpha, ref_value);
trace_dump_struct_end();
trace_dump_member_end();
@@ -351,7 +350,7 @@ void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state)
trace_dump_member(uint, state, width);
trace_dump_member(uint, state, height);
- trace_dump_member(uint, state, num_cbufs);
+ trace_dump_member(uint, state, nr_cbufs);
trace_dump_member_array(ptr, state, cbufs);
trace_dump_member(ptr, state, zsbuf);
@@ -398,13 +397,39 @@ void trace_dump_surface(const struct pipe_surface *state)
trace_dump_struct_begin("pipe_surface");
- trace_dump_member(ptr, state, buffer);
trace_dump_member(format, state, format);
trace_dump_member(uint, state, status);
trace_dump_member(uint, state, clear_value);
trace_dump_member(uint, state, width);
trace_dump_member(uint, state, height);
+ trace_dump_member(uint, state, layout);
+ trace_dump_member(uint, state, offset);
+ trace_dump_member(uint, state, refcount);
+ trace_dump_member(uint, state, usage);
+
+ trace_dump_member(ptr, state, texture);
+ trace_dump_member(uint, state, face);
+ trace_dump_member(uint, state, level);
+ trace_dump_member(uint, state, zslice);
+
+ trace_dump_struct_end();
+}
+
+
+void trace_dump_transfer(const struct pipe_transfer *state)
+{
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_transfer");
+
+ trace_dump_member(format, state, format);
+ trace_dump_member(uint, state, width);
+ trace_dump_member(uint, state, height);
+
trace_dump_member_begin("block");
trace_dump_block(&state->block);
trace_dump_member_end();
@@ -412,8 +437,6 @@ void trace_dump_surface(const struct pipe_surface *state)
trace_dump_member(uint, state, nblocksx);
trace_dump_member(uint, state, nblocksy);
trace_dump_member(uint, state, stride);
- trace_dump_member(uint, state, layout);
- trace_dump_member(uint, state, offset);
trace_dump_member(uint, state, refcount);
trace_dump_member(uint, state, usage);
@@ -435,7 +458,7 @@ void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state)
trace_dump_struct_begin("pipe_vertex_buffer");
- trace_dump_member(uint, state, pitch);
+ trace_dump_member(uint, state, stride);
trace_dump_member(uint, state, max_index);
trace_dump_member(uint, state, buffer_offset);
trace_dump_member(ptr, state, buffer);
diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h
index 5ae533dc66..513ed0ac98 100644
--- a/src/gallium/drivers/trace/tr_state.h
+++ b/src/gallium/drivers/trace/tr_state.h
@@ -68,6 +68,8 @@ void trace_dump_sampler_state(const struct pipe_sampler_state *state);
void trace_dump_surface(const struct pipe_surface *state);
+void trace_dump_transfer(const struct pipe_transfer *state);
+
void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state);
void trace_dump_vertex_element(const struct pipe_vertex_element *state);
diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c
index 440a78704a..120ba0dd31 100644
--- a/src/gallium/drivers/trace/tr_texture.c
+++ b/src/gallium/drivers/trace/tr_texture.c
@@ -87,7 +87,6 @@ trace_surface_create(struct trace_texture *tr_tex,
memcpy(&tr_surf->base, surface, sizeof(struct pipe_surface));
- tr_surf->base.winsys = tr_tex->base.screen->winsys;
tr_surf->base.texture = NULL;
pipe_texture_reference(&tr_surf->base.texture, &tr_tex->base);
tr_surf->surface = surface;
@@ -110,3 +109,43 @@ trace_surface_destroy(struct trace_texture *tr_tex,
FREE(tr_surf);
}
+
+struct pipe_transfer *
+trace_transfer_create(struct trace_texture *tr_tex,
+ struct pipe_transfer *transfer)
+{
+ struct trace_transfer *tr_trans;
+
+ if(!transfer)
+ goto error;
+
+ assert(transfer->texture == tr_tex->texture);
+
+ tr_trans = CALLOC_STRUCT(trace_transfer);
+ if(!tr_trans)
+ goto error;
+
+ memcpy(&tr_trans->base, transfer, sizeof(struct pipe_transfer));
+
+ tr_trans->base.texture = NULL;
+ pipe_texture_reference(&tr_trans->base.texture, &tr_tex->base);
+ tr_trans->transfer = transfer;
+
+ return &tr_trans->base;
+
+error:
+ pipe_transfer_reference(&transfer, NULL);
+ return NULL;
+}
+
+
+void
+trace_transfer_destroy(struct trace_texture *tr_tex,
+ struct pipe_transfer *transfer)
+{
+ struct trace_transfer *tr_trans = trace_transfer(tr_tex, transfer);
+ pipe_texture_reference(&tr_trans->base.texture, NULL);
+ pipe_transfer_reference(&tr_trans->transfer, NULL);
+ FREE(tr_trans);
+}
+
diff --git a/src/gallium/drivers/trace/tr_texture.h b/src/gallium/drivers/trace/tr_texture.h
index 9e72edb8a3..168cefd53d 100644
--- a/src/gallium/drivers/trace/tr_texture.h
+++ b/src/gallium/drivers/trace/tr_texture.h
@@ -48,6 +48,14 @@ struct trace_surface
struct pipe_surface base;
struct pipe_surface *surface;
+};
+
+
+struct trace_transfer
+{
+ struct pipe_transfer base;
+
+ struct pipe_transfer *transfer;
void *map;
};
@@ -75,6 +83,17 @@ trace_surface(struct trace_texture *tr_tex,
}
+static INLINE struct trace_transfer *
+trace_transfer(struct trace_texture *tr_tex,
+ struct pipe_transfer *transfer)
+{
+ if(!transfer)
+ return NULL;
+ assert(transfer->texture == &tr_tex->base);
+ return (struct trace_transfer *)transfer;
+}
+
+
struct pipe_texture *
trace_texture_create(struct trace_screen *tr_scr,
struct pipe_texture *texture);
@@ -91,5 +110,13 @@ void
trace_surface_destroy(struct trace_texture *tr_tex,
struct pipe_surface *surface);
+struct pipe_transfer *
+trace_transfer_create(struct trace_texture *tr_tex,
+ struct pipe_transfer *transfer);
+
+void
+trace_transfer_destroy(struct trace_texture *tr_tex,
+ struct pipe_transfer *transfer);
+
#endif /* TR_TEXTURE_H_ */
diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c
index 177835854e..c4148fe810 100644
--- a/src/gallium/drivers/trace/tr_winsys.c
+++ b/src/gallium/drivers/trace/tr_winsys.c
@@ -98,86 +98,41 @@ trace_winsys_flush_frontbuffer(struct pipe_winsys *_winsys,
}
-static struct pipe_surface *
-trace_winsys_surface_alloc(struct pipe_winsys *_winsys)
-{
- struct trace_winsys *tr_ws = trace_winsys(_winsys);
- struct pipe_winsys *winsys = tr_ws->winsys;
- struct pipe_surface *result;
-
- trace_dump_call_begin("pipe_winsys", "surface_alloc");
-
- trace_dump_arg(ptr, winsys);
-
- result = winsys->surface_alloc(winsys);
-
- trace_dump_ret(ptr, result);
-
- trace_dump_call_end();
-
- assert(!result || !result->texture);
-
- return result;
-}
-
-
-static int
-trace_winsys_surface_alloc_storage(struct pipe_winsys *_winsys,
- struct pipe_surface *surface,
+static struct pipe_buffer *
+trace_winsys_surface_buffer_create(struct pipe_winsys *_winsys,
unsigned width, unsigned height,
enum pipe_format format,
- unsigned flags,
- unsigned tex_usage)
+ unsigned usage,
+ unsigned *pstride)
{
struct trace_winsys *tr_ws = trace_winsys(_winsys);
struct pipe_winsys *winsys = tr_ws->winsys;
- int result;
+ unsigned stride;
+ struct pipe_buffer *result;
- assert(surface && !surface->texture);
-
- trace_dump_call_begin("pipe_winsys", "surface_alloc_storage");
+ trace_dump_call_begin("pipe_winsys", "surface_buffer_create");
trace_dump_arg(ptr, winsys);
- trace_dump_arg(ptr, surface);
trace_dump_arg(uint, width);
trace_dump_arg(uint, height);
trace_dump_arg(format, format);
- trace_dump_arg(uint, flags);
- trace_dump_arg(uint, tex_usage);
+ trace_dump_arg(uint, usage);
- result = winsys->surface_alloc_storage(winsys,
- surface,
+ result = winsys->surface_buffer_create(winsys,
width, height,
format,
- flags,
- tex_usage);
+ usage,
+ pstride);
- trace_dump_ret(int, result);
+ stride = *pstride;
- trace_dump_call_end();
+ trace_dump_arg(uint, stride);
- return result;
-}
-
-
-static void
-trace_winsys_surface_release(struct pipe_winsys *_winsys,
- struct pipe_surface **psurface)
-{
- struct trace_winsys *tr_ws = trace_winsys(_winsys);
- struct pipe_winsys *winsys = tr_ws->winsys;
- struct pipe_surface *surface = *psurface;
-
- assert(psurface && *psurface && !(*psurface)->texture);
-
- trace_dump_call_begin("pipe_winsys", "surface_release");
-
- trace_dump_arg(ptr, winsys);
- trace_dump_arg(ptr, surface);
-
- winsys->surface_release(winsys, psurface);
+ trace_dump_ret(ptr, result);
trace_dump_call_end();
+
+ return result;
}
@@ -465,9 +420,7 @@ trace_winsys_create(struct pipe_winsys *winsys)
tr_ws->base.destroy = trace_winsys_destroy;
tr_ws->base.get_name = trace_winsys_get_name;
tr_ws->base.flush_frontbuffer = trace_winsys_flush_frontbuffer;
- tr_ws->base.surface_alloc = trace_winsys_surface_alloc;
- tr_ws->base.surface_alloc_storage = trace_winsys_surface_alloc_storage;
- tr_ws->base.surface_release = trace_winsys_surface_release;
+ tr_ws->base.surface_buffer_create = trace_winsys_surface_buffer_create;
tr_ws->base.buffer_create = trace_winsys_buffer_create;
tr_ws->base.user_buffer_create = trace_winsys_user_buffer_create;
tr_ws->base.buffer_map = trace_winsys_buffer_map;
diff --git a/src/gallium/drivers/trace/tr_winsys.h b/src/gallium/drivers/trace/tr_winsys.h
index 062ddf66a0..3670cb915e 100644
--- a/src/gallium/drivers/trace/tr_winsys.h
+++ b/src/gallium/drivers/trace/tr_winsys.h
@@ -30,8 +30,8 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
-#include "pipe/p_winsys.h"
+#include "util/u_debug.h"
+#include "pipe/internal/p_winsys_screen.h"
/**
diff --git a/src/gallium/include/pipe/p_winsys.h b/src/gallium/include/pipe/internal/p_winsys_screen.h
index 5d18291dc6..ee835578b2 100644
--- a/src/gallium/include/pipe/p_winsys.h
+++ b/src/gallium/include/pipe/internal/p_winsys_screen.h
@@ -36,7 +36,7 @@
#define P_WINSYS_H
-#include "p_format.h"
+#include "pipe/p_format.h"
#ifdef __cplusplus
@@ -76,24 +76,6 @@ struct pipe_winsys
void *context_private );
- /** allocate a new surface (no context dependency) */
- struct pipe_surface *(*surface_alloc)(struct pipe_winsys *ws);
-
- /**
- * Allocate storage for a pipe_surface.
- * \param flags XXX unused, remove someday
- * \return 0 if succeeds.
- */
- int (*surface_alloc_storage)(struct pipe_winsys *ws,
- struct pipe_surface *surf,
- unsigned width, unsigned height,
- enum pipe_format format,
- unsigned flags,
- unsigned tex_usage);
-
- void (*surface_release)(struct pipe_winsys *ws, struct pipe_surface **s);
-
-
/**
* Buffer management. Buffer attributes are mostly fixed over its lifetime.
*
@@ -138,6 +120,24 @@ struct pipe_winsys
void *ptr,
unsigned bytes);
+ /**
+ * Allocate storage for a display target surface.
+ *
+ * Often surfaces which are meant to be blitted to the front screen (i.e.,
+ * display targets) must be allocated with special characteristics, memory
+ * pools, or obtained directly from the windowing system.
+ *
+ * This callback is invoked by the pipe_screenwhen creating a texture marked
+ * with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET flag to get the underlying
+ * buffer storage.
+ */
+ struct pipe_buffer *(*surface_buffer_create)(struct pipe_winsys *ws,
+ unsigned width, unsigned height,
+ enum pipe_format format,
+ unsigned usage,
+ unsigned *stride);
+
+
/**
* Map the entire data store of a buffer object into the client's address.
* flags is bitmask of PIPE_BUFFER_USAGE_CPU_READ/WRITE flags.
@@ -178,7 +178,6 @@ struct pipe_winsys
};
-
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index 4d64c74a4a..bc2a0a7ef3 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -96,7 +96,6 @@ typedef int _Bool;
typedef unsigned int uint;
typedef unsigned char ubyte;
typedef unsigned short ushort;
-typedef uint64_t uint64;
#if 0
#define boolean bool
@@ -112,20 +111,22 @@ typedef unsigned char boolean;
/* Function inlining */
-#ifdef __cplusplus
-# define INLINE inline
-#elif defined(__GNUC__)
-# define INLINE __inline__
-#elif defined(_MSC_VER)
-# define INLINE __inline
-#elif defined(__ICL)
-# define INLINE __inline
-#elif defined(__INTEL_COMPILER)
-# define INLINE inline
-#elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
-# define INLINE __inline
-#else
-# define INLINE
+#ifndef INLINE
+# ifdef __cplusplus
+# define INLINE inline
+# elif defined(__GNUC__)
+# define INLINE __inline__
+# elif defined(_MSC_VER)
+# define INLINE __inline
+# elif defined(__ICL)
+# define INLINE __inline
+# elif defined(__INTEL_COMPILER)
+# define INLINE inline
+# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
+# define INLINE __inline
+# else
+# define INLINE
+# endif
#endif
@@ -144,10 +145,12 @@ typedef unsigned char boolean;
#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) ))
#define ALIGN16_ASSIGN(NAME) NAME##___aligned
#define ALIGN16_ATTRIB __attribute__(( aligned( 16 ) ))
+#define ALIGN8_ATTRIB __attribute__(( aligned( 8 ) ))
#else
#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___unaligned[SIZE + 1]
#define ALIGN16_ASSIGN(NAME) align16(NAME##___unaligned)
#define ALIGN16_ATTRIB
+#define ALIGN8_ATTRIB
#endif
diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h
index af3746c026..05cbd2fc4d 100644
--- a/src/gallium/include/pipe/p_config.h
+++ b/src/gallium/include/pipe/p_config.h
@@ -85,8 +85,19 @@
#define PIPE_ARCH_X86_64
#endif
-#if 0 /* FIXME */
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+#if defined(PIPE_CC_GCC) && !defined(__SSE2__)
+/* #warning SSE2 support requires -msse -msse2 compiler options */
+#else
+#define PIPE_ARCH_SSE
+#endif
+#endif
+
+#if defined(__PPC__)
#define PIPE_ARCH_PPC
+#if defined(__PPC64__)
+#define PIPE_ARCH_PPC_64
+#endif
#endif
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 2646706ff2..9454cc87db 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -40,6 +40,7 @@ struct pipe_screen;
struct pipe_fence_handle;
struct pipe_state_cache;
struct pipe_query;
+struct pipe_winsys;
/**
@@ -109,7 +110,7 @@ struct pipe_context {
boolean (*get_query_result)(struct pipe_context *pipe,
struct pipe_query *q,
boolean wait,
- uint64 *result);
+ uint64_t *result);
/*@}*/
/**
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index dc8a92dccb..3cbc93d12b 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -171,6 +171,9 @@ enum pipe_texture_target {
#define PIPE_TEXTURE_USAGE_PRIMARY 0x4 /* ie a frontbuffer */
#define PIPE_TEXTURE_USAGE_DEPTH_STENCIL 0x8
#define PIPE_TEXTURE_USAGE_SAMPLER 0x10
+#define PIPE_TEXTURE_USAGE_DYNAMIC 0x20
+/** Pipe driver custom usage flags should be greater or equal to this value */
+#define PIPE_TEXTURE_USAGE_CUSTOM (1 << 16)
#define PIPE_TEXTURE_GEOM_NON_SQUARE 0x1
#define PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO 0x2
@@ -191,6 +194,16 @@ enum pipe_texture_target {
/**
+ * Transfer object usage flags
+ */
+enum pipe_transfer_usage {
+ PIPE_TRANSFER_READ,
+ PIPE_TRANSFER_WRITE,
+ PIPE_TRANSFER_READ_WRITE //< Read/modify/write
+};
+
+
+/**
* Buffer usage flags
*/
#define PIPE_BUFFER_USAGE_CPU_READ (1 << 0)
@@ -201,6 +214,7 @@ enum pipe_texture_target {
#define PIPE_BUFFER_USAGE_VERTEX (1 << 5)
#define PIPE_BUFFER_USAGE_INDEX (1 << 6)
#define PIPE_BUFFER_USAGE_CONSTANT (1 << 7)
+#define PIPE_BUFFER_USAGE_DISCARD (1 << 8)
/** Pipe driver custom usage flags should be greater or equal to this value */
#define PIPE_BUFFER_USAGE_CUSTOM (1 << 16)
@@ -243,6 +257,7 @@ enum pipe_texture_target {
#define PIPE_PRIM_QUADS 7
#define PIPE_PRIM_QUAD_STRIP 8
#define PIPE_PRIM_POLYGON 9
+#define PIPE_PRIM_MAX 10
/**
@@ -292,6 +307,7 @@ enum pipe_texture_target {
#define PIPE_CAP_GUARD_BAND_BOTTOM 23 /*< float */
#define PIPE_CAP_TEXTURE_MIRROR_CLAMP 24
#define PIPE_CAP_TEXTURE_MIRROR_REPEAT 25
+#define PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS 26
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index 546cf5d9b4..3f65a60436 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -1,6 +1,7 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (c) 2008 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -29,8 +30,9 @@
#define PIPE_FORMAT_H
#include "p_compiler.h"
-#include "p_debug.h"
+/* FIXME: remove these header dependencies */
+#include "util/u_debug.h"
#include "util/u_string.h"
#ifdef __cplusplus
@@ -245,13 +247,14 @@ static INLINE uint pf_rev(pipe_format_ycbcr_t f)
/**
* Compresssed format layouts (this will probably change)
*/
-#define _PIPE_FORMAT_DXT( LEVEL, RSIZE, GSIZE, BSIZE, ASIZE ) \
+#define _PIPE_FORMAT_DXT( LEVEL, RSIZE, GSIZE, BSIZE, ASIZE, TYPE ) \
((PIPE_FORMAT_LAYOUT_DXT << 0) | \
((LEVEL) << 2) | \
((RSIZE) << 5) | \
((GSIZE) << 8) | \
((BSIZE) << 11) | \
- ((ASIZE) << 14) )
+ ((ASIZE) << 14) | \
+ ((TYPE) << 29))
@@ -360,20 +363,30 @@ enum pipe_format {
PIPE_FORMAT_R32G32B32A32_FIXED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_FIXED ),
/* sRGB formats */
PIPE_FORMAT_L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRR1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SRGB ),
- PIPE_FORMAT_A8_L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRG, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ),
+ PIPE_FORMAT_A8L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRG, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ),
PIPE_FORMAT_R8G8B8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SRGB ),
PIPE_FORMAT_R8G8B8A8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ),
PIPE_FORMAT_R8G8B8X8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ),
+ PIPE_FORMAT_A8R8G8B8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_ARGB, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ),
+ PIPE_FORMAT_X8R8G8B8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_1RGB, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ),
+ PIPE_FORMAT_B8G8R8A8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGRA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ),
+ PIPE_FORMAT_B8G8R8X8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGR1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ),
/* mixed formats */
PIPE_FORMAT_X8UB8UG8SR8S_NORM = _PIPE_FORMAT_MIXED( _PIPE_FORMAT_1BGR, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1 ),
PIPE_FORMAT_B6UG5SR5S_NORM = _PIPE_FORMAT_MIXED( _PIPE_FORMAT_BGR1, 6, 5, 5, 0, 0, 1, 1, 0, 1, 0 ),
/* compressed formats */
- PIPE_FORMAT_DXT1_RGB = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 0 ),
- PIPE_FORMAT_DXT1_RGBA = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 8 ),
- PIPE_FORMAT_DXT3_RGBA = _PIPE_FORMAT_DXT( 3, 8, 8, 8, 8 ),
- PIPE_FORMAT_DXT5_RGBA = _PIPE_FORMAT_DXT( 5, 8, 8, 8, 8 )
+ PIPE_FORMAT_DXT1_RGB = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 0, PIPE_FORMAT_TYPE_UNORM ),
+ PIPE_FORMAT_DXT1_RGBA = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 8, PIPE_FORMAT_TYPE_UNORM ),
+ PIPE_FORMAT_DXT3_RGBA = _PIPE_FORMAT_DXT( 3, 8, 8, 8, 8, PIPE_FORMAT_TYPE_UNORM ),
+ PIPE_FORMAT_DXT5_RGBA = _PIPE_FORMAT_DXT( 5, 8, 8, 8, 8, PIPE_FORMAT_TYPE_UNORM ),
+
+ /* sRGB, compressed */
+ PIPE_FORMAT_DXT1_SRGB = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 0, PIPE_FORMAT_TYPE_SRGB ),
+ PIPE_FORMAT_DXT1_SRGBA = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 8, PIPE_FORMAT_TYPE_SRGB ),
+ PIPE_FORMAT_DXT3_SRGBA = _PIPE_FORMAT_DXT( 3, 8, 8, 8, 8, PIPE_FORMAT_TYPE_SRGB ),
+ PIPE_FORMAT_DXT5_SRGBA = _PIPE_FORMAT_DXT( 5, 8, 8, 8, 8, PIPE_FORMAT_TYPE_SRGB )
};
/**
@@ -477,12 +490,16 @@ pf_get_block(enum pipe_format format, struct pipe_format_block *block)
switch(format) {
case PIPE_FORMAT_DXT1_RGBA:
case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_SRGBA:
+ case PIPE_FORMAT_DXT1_SRGB:
block->size = 8;
block->width = 4;
block->height = 4;
break;
case PIPE_FORMAT_DXT3_RGBA:
case PIPE_FORMAT_DXT5_RGBA:
+ case PIPE_FORMAT_DXT3_SRGBA:
+ case PIPE_FORMAT_DXT5_SRGBA:
block->size = 16;
block->width = 4;
block->height = 4;
@@ -540,7 +557,7 @@ pf_has_alpha( enum pipe_format format )
/* FIXME: pf_get_component_bits( PIPE_FORMAT_A8L8_UNORM, PIPE_FORMAT_COMP_A ) should not return 0 right? */
if(format == PIPE_FORMAT_A8_UNORM ||
format == PIPE_FORMAT_A8L8_UNORM ||
- format == PIPE_FORMAT_A8_L8_SRGB)
+ format == PIPE_FORMAT_A8L8_SRGB)
return TRUE;
return pf_get_component_bits( format, PIPE_FORMAT_COMP_A ) ? TRUE : FALSE;
case PIPE_FORMAT_LAYOUT_YCBCR:
@@ -550,6 +567,9 @@ pf_has_alpha( enum pipe_format format )
case PIPE_FORMAT_DXT1_RGBA:
case PIPE_FORMAT_DXT3_RGBA:
case PIPE_FORMAT_DXT5_RGBA:
+ case PIPE_FORMAT_DXT1_SRGBA:
+ case PIPE_FORMAT_DXT3_SRGBA:
+ case PIPE_FORMAT_DXT5_SRGBA:
return TRUE;
default:
return FALSE;
diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h
index d70de8e301..ffbe2d7612 100644
--- a/src/gallium/include/pipe/p_inlines.h
+++ b/src/gallium/include/pipe/p_inlines.h
@@ -31,7 +31,6 @@
#include "p_context.h"
#include "p_defines.h"
#include "p_screen.h"
-#include "p_winsys.h"
#ifdef __cplusplus
@@ -39,40 +38,6 @@ extern "C" {
#endif
-/* XXX: these are a kludge. will fix when all surfaces are views into
- * textures, and free-floating winsys surfaces go away.
- */
-static INLINE void *
-pipe_surface_map( struct pipe_surface *surf, unsigned flags )
-{
- if (surf->texture) {
- struct pipe_screen *screen = surf->texture->screen;
- return surf->texture->screen->surface_map( screen, surf, flags );
- }
- else {
- struct pipe_winsys *winsys = surf->winsys;
- char *map = (char *)winsys->buffer_map( winsys, surf->buffer, flags );
- if (map == NULL)
- return NULL;
- return (void *)(map + surf->offset);
- }
-}
-
-static INLINE void
-pipe_surface_unmap( struct pipe_surface *surf )
-{
- if (surf->texture) {
- struct pipe_screen *screen = surf->texture->screen;
- surf->texture->screen->surface_unmap( screen, surf );
- }
- else {
- struct pipe_winsys *winsys = surf->winsys;
- winsys->buffer_unmap( winsys, surf->buffer );
- }
-}
-
-
-
/**
* Set 'ptr' to point to 'surf' and update reference counting.
* The old thing pointed to, if any, will be unreferenced first.
@@ -82,23 +47,17 @@ static INLINE void
pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
{
/* bump the refcount first */
- if (surf)
+ if (surf) {
+ assert(surf->refcount);
surf->refcount++;
+ }
if (*ptr) {
-
- /* There are currently two sorts of surfaces... This needs to be
- * fixed so that all surfaces are views into a texture.
- */
- if ((*ptr)->texture) {
- struct pipe_screen *screen = (*ptr)->texture->screen;
- screen->tex_surface_release( screen, ptr );
- }
- else {
- struct pipe_winsys *winsys = (*ptr)->winsys;
- winsys->surface_release(winsys, ptr);
- }
-
+ struct pipe_screen *screen;
+ assert((*ptr)->refcount);
+ assert((*ptr)->texture);
+ screen = (*ptr)->texture->screen;
+ screen->tex_surface_release( screen, ptr );
assert(!*ptr);
}
@@ -106,24 +65,31 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
}
-/* XXX: thread safety issues!
+/**
+ * \sa pipe_surface_reference
*/
static INLINE void
-winsys_buffer_reference(struct pipe_winsys *winsys,
- struct pipe_buffer **ptr,
- struct pipe_buffer *buf)
+pipe_transfer_reference(struct pipe_transfer **ptr, struct pipe_transfer *trans)
{
- if (buf)
- buf->refcount++;
+ /* bump the refcount first */
+ if (trans) {
+ assert(trans->refcount);
+ trans->refcount++;
+ }
- if (*ptr && --(*ptr)->refcount == 0)
- winsys->buffer_destroy( winsys, *ptr );
+ if (*ptr) {
+ struct pipe_screen *screen;
+ assert((*ptr)->refcount);
+ assert((*ptr)->texture);
+ screen = (*ptr)->texture->screen;
+ screen->tex_transfer_release( screen, ptr );
+ assert(!*ptr);
+ }
- *ptr = buf;
+ *ptr = trans;
}
-
/**
* \sa pipe_surface_reference
*/
@@ -133,12 +99,15 @@ pipe_texture_reference(struct pipe_texture **ptr,
{
assert(ptr);
- if (pt)
+ if (pt) {
+ assert(pt->refcount);
pt->refcount++;
+ }
if (*ptr) {
struct pipe_screen *screen = (*ptr)->screen;
assert(screen);
+ assert((*ptr)->refcount);
screen->texture_release(screen, ptr);
assert(!*ptr);
@@ -154,32 +123,27 @@ pipe_texture_release(struct pipe_texture **ptr)
struct pipe_screen *screen;
assert(ptr);
screen = (*ptr)->screen;
+ assert((*ptr)->refcount);
screen->texture_release(screen, ptr);
*ptr = NULL;
}
/**
- * Convenience wrappers for winsys buffer functions.
+ * Convenience wrappers for screen buffer functions.
*/
static INLINE struct pipe_buffer *
pipe_buffer_create( struct pipe_screen *screen,
unsigned alignment, unsigned usage, unsigned size )
{
- return screen->winsys->buffer_create(screen->winsys, alignment, usage, size);
+ return screen->buffer_create(screen, alignment, usage, size);
}
static INLINE struct pipe_buffer *
pipe_user_buffer_create( struct pipe_screen *screen, void *ptr, unsigned size )
{
- return screen->winsys->user_buffer_create(screen->winsys, ptr, size);
-}
-
-static INLINE void
-pipe_buffer_destroy( struct pipe_screen *screen, struct pipe_buffer *buf )
-{
- screen->winsys->buffer_destroy(screen->winsys, buf);
+ return screen->user_buffer_create(screen, ptr, size);
}
static INLINE void *
@@ -187,25 +151,36 @@ pipe_buffer_map(struct pipe_screen *screen,
struct pipe_buffer *buf,
unsigned usage)
{
- return screen->winsys->buffer_map(screen->winsys, buf, usage);
+ return screen->buffer_map(screen, buf, usage);
}
static INLINE void
pipe_buffer_unmap(struct pipe_screen *screen,
struct pipe_buffer *buf)
{
- screen->winsys->buffer_unmap(screen->winsys, buf);
+ screen->buffer_unmap(screen, buf);
}
-/* XXX when we're using this everywhere, get rid of
- * winsys_buffer_reference() above.
+/* XXX: thread safety issues!
*/
static INLINE void
pipe_buffer_reference(struct pipe_screen *screen,
struct pipe_buffer **ptr,
struct pipe_buffer *buf)
{
- winsys_buffer_reference(screen->winsys, ptr, buf);
+ if (buf) {
+ assert(buf->refcount);
+ buf->refcount++;
+ }
+
+ if (*ptr) {
+ assert((*ptr)->refcount);
+ if(--(*ptr)->refcount == 0) {
+ screen->buffer_destroy( screen, *ptr );
+ }
+ }
+
+ *ptr = buf;
}
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index b15affef7a..341d1caea0 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -26,6 +26,8 @@
**************************************************************************/
/**
+ * @file
+ *
* Screen, Adapter or GPU
*
* These are driver functions/facilities that are context independent.
@@ -37,7 +39,8 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_state.h"
+#include "pipe/p_format.h"
+#include "pipe/p_defines.h"
@@ -46,6 +49,12 @@ extern "C" {
#endif
+/** Opaque type */
+struct pipe_fence_handle;
+struct pipe_winsys;
+struct pipe_buffer;
+
+
/**
* Gallium screen/adapter context. Basically everything
@@ -101,7 +110,7 @@ struct pipe_screen {
*/
struct pipe_texture * (*texture_blanket)(struct pipe_screen *,
const struct pipe_texture *templat,
- const unsigned *pitch,
+ const unsigned *stride,
struct pipe_buffer *buffer);
void (*texture_release)(struct pipe_screen *,
@@ -120,13 +129,127 @@ struct pipe_screen {
struct pipe_surface ** );
- void *(*surface_map)( struct pipe_screen *,
- struct pipe_surface *surface,
- unsigned flags );
+ /** Get a transfer object for transferring data to/from a texture */
+ struct pipe_transfer *(*get_tex_transfer)(struct pipe_screen *,
+ struct pipe_texture *texture,
+ unsigned face, unsigned level,
+ unsigned zslice,
+ enum pipe_transfer_usage usage,
+ unsigned x, unsigned y,
+ unsigned w, unsigned h);
- void (*surface_unmap)( struct pipe_screen *,
- struct pipe_surface *surface );
+ /* Transfer objects allocated by the above must be released here:
+ */
+ void (*tex_transfer_release)( struct pipe_screen *,
+ struct pipe_transfer ** );
+ void *(*transfer_map)( struct pipe_screen *,
+ struct pipe_transfer *transfer );
+
+ void (*transfer_unmap)( struct pipe_screen *,
+ struct pipe_transfer *transfer );
+
+
+ /**
+ * Buffer management. Buffer attributes are mostly fixed over its lifetime.
+ *
+ */
+ struct pipe_buffer *(*buffer_create)( struct pipe_screen *screen,
+ unsigned alignment,
+ unsigned usage,
+ unsigned size );
+
+ /**
+ * Create a buffer that wraps user-space data.
+ *
+ * Effectively this schedules a delayed call to buffer_create
+ * followed by an upload of the data at *some point in the future*,
+ * or perhaps never. Basically the allocate/upload is delayed
+ * until the buffer is actually passed to hardware.
+ *
+ * The intention is to provide a quick way to turn regular data
+ * into a buffer, and secondly to avoid a copy operation if that
+ * data subsequently turns out to be only accessed by the CPU.
+ *
+ * Common example is OpenGL vertex buffers that are subsequently
+ * processed either by software TNL in the driver or by passing to
+ * hardware.
+ *
+ * XXX: What happens if the delayed call to buffer_create() fails?
+ *
+ * Note that ptr may be accessed at any time upto the time when the
+ * buffer is destroyed, so the data must not be freed before then.
+ */
+ struct pipe_buffer *(*user_buffer_create)(struct pipe_screen *screen,
+ void *ptr,
+ unsigned bytes);
+
+ /**
+ * Allocate storage for a display target surface.
+ *
+ * Often surfaces which are meant to be blitted to the front screen (i.e.,
+ * display targets) must be allocated with special characteristics, memory
+ * pools, or obtained directly from the windowing system.
+ *
+ * This callback is invoked by the pipe_screenwhen creating a texture marked
+ * with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET flag to get the underlying
+ * buffer storage.
+ */
+ struct pipe_buffer *(*surface_buffer_create)(struct pipe_screen *screen,
+ unsigned width, unsigned height,
+ enum pipe_format format,
+ unsigned usage,
+ unsigned *stride);
+
+
+ /**
+ * Map the entire data store of a buffer object into the client's address.
+ * flags is bitmask of PIPE_BUFFER_USAGE_CPU_READ/WRITE flags.
+ */
+ void *(*buffer_map)( struct pipe_screen *screen,
+ struct pipe_buffer *buf,
+ unsigned usage );
+
+ void (*buffer_unmap)( struct pipe_screen *screen,
+ struct pipe_buffer *buf );
+
+ void (*buffer_destroy)( struct pipe_screen *screen,
+ struct pipe_buffer *buf );
+
+
+ /**
+ * Do any special operations to ensure frontbuffer contents are
+ * displayed, eg copy fake frontbuffer.
+ */
+ void (*flush_frontbuffer)( struct pipe_screen *screen,
+ struct pipe_surface *surf,
+ void *context_private );
+
+
+
+ /** Set ptr = fence, with reference counting */
+ void (*fence_reference)( struct pipe_screen *screen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence );
+
+ /**
+ * Checks whether the fence has been signalled.
+ * \param flags driver-specific meaning
+ * \return zero on success.
+ */
+ int (*fence_signalled)( struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flag );
+
+ /**
+ * Wait for the fence to finish.
+ * \param flags driver-specific meaning
+ * \return zero on success.
+ */
+ int (*fence_finish)( struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flag );
+
};
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index d591f046fb..35df70e7b7 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -1,4 +1,31 @@
-#if !defined TGSI_TOKEN_H
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef TGSI_TOKEN_H
#define TGSI_TOKEN_H
#ifdef __cplusplus
@@ -36,10 +63,10 @@ struct tgsi_processor
struct tgsi_token
{
- unsigned Type : 4; /* TGSI_TOKEN_TYPE_ */
- unsigned Size : 8; /* UINT */
+ unsigned Type : 4; /**< TGSI_TOKEN_TYPE_x */
+ unsigned NrTokens : 8; /**< UINT */
unsigned Padding : 19;
- unsigned Extended : 1; /* BOOL */
+ unsigned Extended : 1; /**< BOOL */
};
enum tgsi_file_type {
@@ -79,22 +106,22 @@ enum tgsi_file_type {
struct tgsi_declaration
{
- unsigned Type : 4; /* TGSI_TOKEN_TYPE_DECLARATION */
- unsigned Size : 8; /* UINT */
- unsigned File : 4; /* one of TGSI_FILE_x */
- unsigned UsageMask : 4; /* bitmask of TGSI_WRITEMASK_x flags */
- unsigned Interpolate : 4; /* TGSI_INTERPOLATE_ */
- unsigned Semantic : 1; /* BOOL, any semantic info? */
- unsigned Centroid : 1; /* centroid sampling */
- unsigned Invariant : 1; /* invariant optimization */
+ unsigned Type : 4; /**< TGSI_TOKEN_TYPE_DECLARATION */
+ unsigned NrTokens : 8; /**< UINT */
+ unsigned File : 4; /**< one of TGSI_FILE_x */
+ unsigned UsageMask : 4; /**< bitmask of TGSI_WRITEMASK_x flags */
+ unsigned Interpolate : 4; /**< one of TGSI_INTERPOLATE_x */
+ unsigned Semantic : 1; /**< BOOL, any semantic info? */
+ unsigned Centroid : 1; /**< centroid sampling? */
+ unsigned Invariant : 1; /**< invariant optimization? */
unsigned Padding : 4;
- unsigned Extended : 1; /* BOOL */
+ unsigned Extended : 1; /**< BOOL */
};
struct tgsi_declaration_range
{
- unsigned First : 16; /* UINT */
- unsigned Last : 16; /* UINT */
+ unsigned First : 16; /**< UINT */
+ unsigned Last : 16; /**< UINT */
};
#define TGSI_SEMANTIC_POSITION 0
@@ -108,8 +135,8 @@ struct tgsi_declaration_range
struct tgsi_declaration_semantic
{
- unsigned SemanticName : 8; /* one of TGSI_SEMANTIC_ */
- unsigned SemanticIndex : 16; /* UINT */
+ unsigned SemanticName : 8; /**< one of TGSI_SEMANTIC_x */
+ unsigned SemanticIndex : 16; /**< UINT */
unsigned Padding : 8;
};
@@ -117,11 +144,11 @@ struct tgsi_declaration_semantic
struct tgsi_immediate
{
- unsigned Type : 4; /* TGSI_TOKEN_TYPE_IMMEDIATE */
- unsigned Size : 8; /* UINT */
- unsigned DataType : 4; /* TGSI_IMM_ */
+ unsigned Type : 4; /**< TGSI_TOKEN_TYPE_IMMEDIATE */
+ unsigned NrTokens : 8; /**< UINT */
+ unsigned DataType : 4; /**< one of TGSI_IMM_x */
unsigned Padding : 15;
- unsigned Extended : 1; /* BOOL */
+ unsigned Extended : 1; /**< BOOL */
};
struct tgsi_immediate_float32
@@ -398,7 +425,7 @@ struct tgsi_immediate_float32
#define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */
#define TGSI_SAT_MINUS_PLUS_ONE 2 /* clamp to [-1,1] */
-/*
+/**
* Opcode is the operation code to execute. A given operation defines the
* semantics how the source registers (if any) are interpreted and what is
* written to the destination registers (if any) as a result of execution.
@@ -415,7 +442,7 @@ struct tgsi_immediate_float32
struct tgsi_instruction
{
unsigned Type : 4; /* TGSI_TOKEN_TYPE_INSTRUCTION */
- unsigned Size : 8; /* UINT */
+ unsigned NrTokens : 8; /* UINT */
unsigned Opcode : 8; /* TGSI_OPCODE_ */
unsigned Saturate : 2; /* TGSI_SAT_ */
unsigned NumDstRegs : 2; /* UINT */
@@ -431,7 +458,7 @@ struct tgsi_instruction
*
* Then, tgsi_instruction::NumSrcRegs of tgsi_src_register follow.
*
- * tgsi_instruction::Size contains the total number of words that make the
+ * tgsi_instruction::NrTokens contains the total number of words that make the
* instruction, including the instruction word.
*/
@@ -483,7 +510,7 @@ struct tgsi_instruction_ext
#define TGSI_SWIZZLE_Z 2
#define TGSI_SWIZZLE_W 3
-/*
+/**
* Precision controls the precision at which the operation should be executed.
*
* CondDstUpdate enables condition code register writes. When this field is
@@ -550,7 +577,7 @@ struct tgsi_instruction_ext_predicate
unsigned Extended : 1; /* BOOL */
};
-/*
+/**
* File specifies the register array to access.
*
* Index specifies the element number of a register in the register file.
@@ -582,7 +609,7 @@ struct tgsi_src_register
unsigned Extended : 1; /* BOOL */
};
-/*
+/**
* If tgsi_src_register::Extended is TRUE, tgsi_src_register_ext follows.
*
* Then, if tgsi_src_register::Indirect is TRUE, another tgsi_src_register
@@ -601,7 +628,7 @@ struct tgsi_src_register_ext
unsigned Extended : 1; /* BOOL */
};
-/*
+/**
* If tgsi_src_register_ext::Type is TGSI_SRC_REGISTER_EXT_TYPE_SWZ,
* it should be cast to tgsi_src_register_ext_swz.
*
@@ -619,7 +646,7 @@ struct tgsi_src_register_ext
#define TGSI_EXTSWIZZLE_ZERO 4
#define TGSI_EXTSWIZZLE_ONE 5
-/*
+/**
* ExtSwizzleX, ExtSwizzleY, ExtSwizzleZ and ExtSwizzleW swizzle the source
* register in an extended manner.
*
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index da783389da..a2e839da5c 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -27,6 +27,8 @@
/**
+ * @file
+ *
* Abstract graphics pipe state objects.
*
* Basic notes:
@@ -64,7 +66,6 @@ extern "C" {
/* fwd decls */
struct pipe_screen;
struct pipe_surface;
-struct pipe_winsys;
@@ -160,7 +161,6 @@ struct pipe_clip_state
struct pipe_constant_buffer
{
struct pipe_buffer *buffer;
- unsigned size; /** in bytes (XXX: redundant!) */
};
@@ -170,7 +170,8 @@ struct pipe_shader_state
};
-struct pipe_depth_state {
+struct pipe_depth_state
+{
unsigned enabled:1; /**< depth test enabled? */
unsigned writemask:1; /**< allow depth buffer writes? */
unsigned func:3; /**< depth test func (PIPE_FUNC_x) */
@@ -178,22 +179,24 @@ struct pipe_depth_state {
};
-struct pipe_stencil_state {
+struct pipe_stencil_state
+{
unsigned enabled:1; /**< stencil[0]: stencil enabled, stencil[1]: two-side enabled */
unsigned func:3; /**< PIPE_FUNC_x */
unsigned fail_op:3; /**< PIPE_STENCIL_OP_x */
unsigned zpass_op:3; /**< PIPE_STENCIL_OP_x */
unsigned zfail_op:3; /**< PIPE_STENCIL_OP_x */
- ubyte ref_value;
- ubyte value_mask;
- ubyte write_mask;
+ ubyte ref_value;
+ ubyte valuemask;
+ ubyte writemask;
};
-struct pipe_alpha_state {
+struct pipe_alpha_state
+{
unsigned enabled:1;
unsigned func:3; /**< PIPE_FUNC_x */
- float ref; /**< reference value */
+ float ref_value; /**< reference value */
};
@@ -236,7 +239,7 @@ struct pipe_framebuffer_state
unsigned width, height;
/** multiple colorbuffers for multiple render targets */
- unsigned num_cbufs;
+ unsigned nr_cbufs;
struct pipe_surface *cbufs[PIPE_MAX_COLOR_BUFS];
struct pipe_surface *zsbuf; /**< Z/stencil buffer */
@@ -272,24 +275,41 @@ struct pipe_sampler_state
*/
struct pipe_surface
{
- struct pipe_buffer *buffer; /**< surface's buffer/memory */
enum pipe_format format; /**< PIPE_FORMAT_x */
unsigned status; /**< PIPE_SURFACE_STATUS_x */
unsigned clear_value; /**< XXX may be temporary */
unsigned width; /**< logical width in pixels */
unsigned height; /**< logical height in pixels */
- struct pipe_format_block block;
- unsigned nblocksx; /**< allocated width in blocks */
- unsigned nblocksy; /**< allocated height in blocks */
- unsigned stride; /**< stride in bytes between rows of blocks */
unsigned layout; /**< PIPE_SURFACE_LAYOUT_x */
unsigned offset; /**< offset from start of buffer, in bytes */
unsigned refcount;
unsigned usage; /**< PIPE_BUFFER_USAGE_* */
- struct pipe_winsys *winsys; /**< winsys which owns/created the surface */
+ struct pipe_texture *texture; /**< texture into which this is a view */
+ unsigned face;
+ unsigned level;
+ unsigned zslice;
+};
+
+
+/**
+ * Transfer object. For data transfer to/from a texture.
+ */
+struct pipe_transfer
+{
+ enum pipe_format format; /**< PIPE_FORMAT_x */
+ unsigned x; /**< x offset from start of texture image */
+ unsigned y; /**< y offset from start of texture image */
+ unsigned width; /**< logical width in pixels */
+ unsigned height; /**< logical height in pixels */
+ struct pipe_format_block block;
+ unsigned nblocksx; /**< allocated width in blocks */
+ unsigned nblocksy; /**< allocated height in blocks */
+ unsigned stride; /**< stride in bytes between rows of blocks */
+ unsigned refcount;
+ unsigned usage; /**< PIPE_TRANSFER_* */
- struct pipe_texture *texture; /**< optional texture into which this is a view */
+ struct pipe_texture *texture; /**< texture to transfer to/from */
unsigned face;
unsigned level;
unsigned zslice;
@@ -315,9 +335,9 @@ struct pipe_texture
unsigned last_level:8; /**< Index of last mipmap level present/defined */
unsigned compressed:1;
- unsigned nr_samples:8; /**< for multisampled surfaces, nr of samples */
+ unsigned nr_samples:8; /**< for multisampled surfaces, nr of samples */
- unsigned tex_usage; /* PIPE_TEXTURE_USAGE_* */
+ unsigned tex_usage; /* PIPE_TEXTURE_USAGE_* */
/* These are also refcounted:
*/
@@ -334,7 +354,7 @@ struct pipe_texture
*/
struct pipe_vertex_buffer
{
- unsigned pitch; /**< stride to same attrib in next vertex, in bytes */
+ unsigned stride; /**< stride to same attrib in next vertex, in bytes */
unsigned max_index; /**< number of vertices in this buffer */
unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */
struct pipe_buffer *buffer; /**< the actual buffer */
diff --git a/src/gallium/include/pipe/p_thread.h b/src/gallium/include/pipe/p_thread.h
index e01d5a602b..8af3cd958b 100644
--- a/src/gallium/include/pipe/p_thread.h
+++ b/src/gallium/include/pipe/p_thread.h
@@ -25,6 +25,8 @@
/**
+ * @file
+ *
* Thread, mutex, condition var and thread-specific data functions.
*/
diff --git a/src/gallium/include/state_tracker/drm_api.h b/src/gallium/include/state_tracker/drm_api.h
new file mode 100644
index 0000000000..54480fa047
--- /dev/null
+++ b/src/gallium/include/state_tracker/drm_api.h
@@ -0,0 +1,33 @@
+
+#ifndef _DRM_API_H_
+#define _DRM_API_H_
+
+struct pipe_screen;
+struct pipe_winsys;
+struct pipe_context;
+
+struct drm_api
+{
+ /**
+ * Special buffer function
+ */
+ /*@{*/
+ struct pipe_screen* (*create_screen)(int drmFB, int pciID);
+ struct pipe_context* (*create_context)(struct pipe_screen *screen);
+ /*@}*/
+
+ /**
+ * Special buffer function
+ */
+ /*@{*/
+ struct pipe_buffer* (*buffer_from_handle)(struct pipe_winsys *winsys, const char *name, unsigned handle);
+ unsigned (*handle_from_buffer)(struct pipe_winsys *winsys, struct pipe_buffer *buffer);
+ /*@}*/
+};
+
+/**
+ * A driver needs to export this symbol
+ */
+extern struct drm_api drm_api_hocks;
+
+#endif
diff --git a/src/gallium/state_trackers/Makefile b/src/gallium/state_trackers/Makefile
new file mode 100644
index 0000000000..265ca468c2
--- /dev/null
+++ b/src/gallium/state_trackers/Makefile
@@ -0,0 +1,25 @@
+TOP = ../../..
+include $(TOP)/configs/current
+
+
+SUBDIRS = $(GALLIUM_STATE_TRACKERS_DIRS)
+
+
+default: subdirs
+
+
+subdirs:
+ @for dir in $(SUBDIRS) ; do \
+ if [ -d $$dir ] ; then \
+ (cd $$dir && $(MAKE)) || exit 1 ; \
+ fi \
+ done
+
+
+clean:
+ rm -f `find . -name \*.[oa]`
+ rm -f `find . -name depend`
+
+
+# Dummy install target
+install:
diff --git a/src/gallium/state_trackers/egl/Makefile b/src/gallium/state_trackers/egl/Makefile
new file mode 100644
index 0000000000..ea4cec0bb8
--- /dev/null
+++ b/src/gallium/state_trackers/egl/Makefile
@@ -0,0 +1,28 @@
+TARGET = libegldrm.a
+CFILES = $(wildcard ./*.c)
+OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES))
+GALLIUMDIR = ../..
+TOP = ../../../..
+
+include ${TOP}/configs/current
+
+CFLAGS += -g -Wall -Werror-implicit-function-declaration -fPIC \
+ -I${GALLIUMDIR}/include \
+ -I${GALLIUMDIR}/auxiliary \
+ -I${TOP}/src/mesa/drivers/dri/common \
+ -I${TOP}/src/mesa \
+ -I$(TOP)/include \
+ -I$(TOP)/src/egl/main \
+ ${LIBDRM_CFLAGS}
+
+#############################################
+
+.PHONY = all clean
+
+all: ${TARGET}
+
+${TARGET}: ${OBJECTS}
+ ar rcs $@ $^
+
+clean:
+ rm -rf ${OBJECTS} ${TARGET}
diff --git a/src/gallium/state_trackers/egl/egl_context.c b/src/gallium/state_trackers/egl/egl_context.c
new file mode 100644
index 0000000000..8564972b91
--- /dev/null
+++ b/src/gallium/state_trackers/egl/egl_context.c
@@ -0,0 +1,193 @@
+
+#include "utils.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "egl_tracker.h"
+
+#include "egllog.h"
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+
+#include "state_tracker/st_public.h"
+#include "state_tracker/drm_api.h"
+
+#include "GL/internal/glcore.h"
+
+#define need_GL_ARB_multisample
+#define need_GL_ARB_point_parameters
+#define need_GL_ARB_texture_compression
+#define need_GL_ARB_vertex_buffer_object
+#define need_GL_ARB_vertex_program
+#define need_GL_ARB_window_pos
+#define need_GL_EXT_blend_color
+#define need_GL_EXT_blend_equation_separate
+#define need_GL_EXT_blend_func_separate
+#define need_GL_EXT_blend_minmax
+#define need_GL_EXT_cull_vertex
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_framebuffer_object
+#define need_GL_EXT_multi_draw_arrays
+#define need_GL_EXT_secondary_color
+#define need_GL_NV_vertex_program
+#include "extension_helper.h"
+
+/**
+ * TODO HACK! FUGLY!
+ * Copied for intel extentions.
+ */
+const struct dri_extension card_extensions[] = {
+ {"GL_ARB_multisample", GL_ARB_multisample_functions},
+ {"GL_ARB_multitexture", NULL},
+ {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions},
+ {"GL_ARB_texture_border_clamp", NULL},
+ {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions},
+ {"GL_ARB_texture_cube_map", NULL},
+ {"GL_ARB_texture_env_add", NULL},
+ {"GL_ARB_texture_env_combine", NULL},
+ {"GL_ARB_texture_env_dot3", NULL},
+ {"GL_ARB_texture_mirrored_repeat", NULL},
+ {"GL_ARB_texture_rectangle", NULL},
+ {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions},
+ {"GL_ARB_pixel_buffer_object", NULL},
+ {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions},
+ {"GL_ARB_window_pos", GL_ARB_window_pos_functions},
+ {"GL_EXT_blend_color", GL_EXT_blend_color_functions},
+ {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions},
+ {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions},
+ {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions},
+ {"GL_EXT_blend_subtract", NULL},
+ {"GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions},
+ {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions},
+ {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions},
+ {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions},
+ {"GL_EXT_packed_depth_stencil", NULL},
+ {"GL_EXT_pixel_buffer_object", NULL},
+ {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
+ {"GL_EXT_stencil_wrap", NULL},
+ {"GL_EXT_texture_edge_clamp", NULL},
+ {"GL_EXT_texture_env_combine", NULL},
+ {"GL_EXT_texture_env_dot3", NULL},
+ {"GL_EXT_texture_filter_anisotropic", NULL},
+ {"GL_EXT_texture_lod_bias", NULL},
+ {"GL_3DFX_texture_compression_FXT1", NULL},
+ {"GL_APPLE_client_storage", NULL},
+ {"GL_MESA_pack_invert", NULL},
+ {"GL_MESA_ycbcr_texture", NULL},
+ {"GL_NV_blend_square", NULL},
+ {"GL_NV_vertex_program", GL_NV_vertex_program_functions},
+ {"GL_NV_vertex_program1_1", NULL},
+ {"GL_SGIS_generate_mipmap", NULL },
+ {NULL, NULL}
+};
+
+EGLContext
+drm_create_context(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, EGLContext share_list, const EGLint *attrib_list)
+{
+ struct drm_device *dev = (struct drm_device *)drv;
+ struct drm_context *ctx;
+ struct drm_context *share = NULL;
+ struct st_context *st_share = NULL;
+ _EGLConfig *conf;
+ int i;
+ __GLcontextModes *visual;
+
+ conf = _eglLookupConfig(drv, dpy, config);
+ if (!conf) {
+ _eglError(EGL_BAD_CONFIG, "eglCreateContext");
+ return EGL_NO_CONTEXT;
+ }
+
+ for (i = 0; attrib_list && attrib_list[i] != EGL_NONE; i++) {
+ switch (attrib_list[i]) {
+ /* no attribs defined for now */
+ default:
+ _eglError(EGL_BAD_ATTRIBUTE, "eglCreateContext");
+ return EGL_NO_CONTEXT;
+ }
+ }
+
+ ctx = (struct drm_context *) calloc(1, sizeof(struct drm_context));
+ if (!ctx)
+ goto err_c;
+
+ _eglInitContext(drv, dpy, &ctx->base, config, attrib_list);
+
+ ctx->pipe = drm_api_hocks.create_context(dev->screen);
+ if (!ctx->pipe)
+ goto err_pipe;
+
+ if (share)
+ st_share = share->st;
+
+ visual = drm_visual_from_config(conf);
+ ctx->st = st_create_context(ctx->pipe, visual, st_share);
+ drm_visual_modes_destroy(visual);
+
+ if (!ctx->st)
+ goto err_gl;
+
+ /* generate handle and insert into hash table */
+ _eglSaveContext(&ctx->base);
+ assert(_eglGetContextHandle(&ctx->base));
+
+ return _eglGetContextHandle(&ctx->base);
+
+err_gl:
+ ctx->pipe->destroy(ctx->pipe);
+err_pipe:
+ free(ctx);
+err_c:
+ return EGL_NO_CONTEXT;
+}
+
+EGLBoolean
+drm_destroy_context(_EGLDriver *drv, EGLDisplay dpy, EGLContext context)
+{
+ struct drm_context *c = lookup_drm_context(context);
+ _eglRemoveContext(&c->base);
+ if (c->base.IsBound) {
+ c->base.DeletePending = EGL_TRUE;
+ } else {
+ st_destroy_context(c->st);
+ c->pipe->destroy(c->pipe);
+ free(c);
+ }
+ return EGL_TRUE;
+}
+
+EGLBoolean
+drm_make_current(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext context)
+{
+ struct drm_surface *readSurf = lookup_drm_surface(read);
+ struct drm_surface *drawSurf = lookup_drm_surface(draw);
+ struct drm_context *ctx = lookup_drm_context(context);
+ EGLBoolean b;
+
+ b = _eglMakeCurrent(drv, dpy, draw, read, context);
+ if (!b)
+ return EGL_FALSE;
+
+ if (ctx) {
+ if (!drawSurf || !readSurf)
+ return EGL_FALSE;
+
+ drawSurf->user = ctx;
+ readSurf->user = ctx;
+
+ st_make_current(ctx->st, drawSurf->stfb, readSurf->stfb);
+
+ /* st_resize_framebuffer needs a bound context to work */
+ st_resize_framebuffer(drawSurf->stfb, drawSurf->w, drawSurf->h);
+ st_resize_framebuffer(readSurf->stfb, readSurf->w, readSurf->h);
+ } else {
+ drawSurf->user = NULL;
+ readSurf->user = NULL;
+
+ st_make_current(NULL, NULL, NULL);
+ }
+
+ return EGL_TRUE;
+}
diff --git a/src/gallium/state_trackers/egl/egl_surface.c b/src/gallium/state_trackers/egl/egl_surface.c
new file mode 100644
index 0000000000..281dff9f8a
--- /dev/null
+++ b/src/gallium/state_trackers/egl/egl_surface.c
@@ -0,0 +1,409 @@
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "egl_tracker.h"
+
+#include "egllog.h"
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_context.h"
+
+#include "state_tracker/drm_api.h"
+
+/*
+ * Util functions
+ */
+
+static drmModeModeInfoPtr
+drm_find_mode(drmModeConnectorPtr connector, _EGLMode *mode)
+{
+ int i;
+ drmModeModeInfoPtr m = NULL;
+
+ for (i = 0; i < connector->count_modes; i++) {
+ m = &connector->modes[i];
+ if (m->hdisplay == mode->Width && m->vdisplay == mode->Height && m->vrefresh == mode->RefreshRate)
+ break;
+ m = &connector->modes[0]; /* if we can't find one, return first */
+ }
+
+ return m;
+}
+
+static struct st_framebuffer *
+drm_create_framebuffer(const __GLcontextModes *visual,
+ unsigned width,
+ unsigned height,
+ void *priv)
+{
+ enum pipe_format colorFormat, depthFormat, stencilFormat;
+
+ if (visual->redBits == 5)
+ colorFormat = PIPE_FORMAT_R5G6B5_UNORM;
+ else
+ colorFormat = PIPE_FORMAT_A8R8G8B8_UNORM;
+
+ if (visual->depthBits == 16)
+ depthFormat = PIPE_FORMAT_Z16_UNORM;
+ else if (visual->depthBits == 24)
+ depthFormat = PIPE_FORMAT_S8Z24_UNORM;
+ else
+ depthFormat = PIPE_FORMAT_NONE;
+
+ if (visual->stencilBits == 8)
+ stencilFormat = PIPE_FORMAT_S8Z24_UNORM;
+ else
+ stencilFormat = PIPE_FORMAT_NONE;
+
+ return st_create_framebuffer(visual,
+ colorFormat,
+ depthFormat,
+ stencilFormat,
+ width,
+ height,
+ priv);
+}
+
+static void
+drm_create_texture(_EGLDriver *drv,
+ struct drm_screen *scrn,
+ unsigned w, unsigned h)
+{
+ struct drm_device *dev = (struct drm_device *)drv;
+ struct pipe_screen *screen = dev->screen;
+ struct pipe_surface *surface;
+ struct pipe_texture *texture;
+ struct pipe_texture templat;
+ struct pipe_buffer *buf;
+ unsigned stride = 1024;
+ unsigned pitch = 0;
+ unsigned size = 0;
+
+ /* ugly */
+ if (stride < w)
+ stride = 2048;
+
+ pitch = stride * 4;
+ size = h * 2 * pitch;
+
+ buf = pipe_buffer_create(screen,
+ 0, /* alignment */
+ PIPE_BUFFER_USAGE_GPU_READ_WRITE |
+ PIPE_BUFFER_USAGE_CPU_READ_WRITE,
+ size);
+
+ if (!buf)
+ goto err_buf;
+
+ memset(&templat, 0, sizeof(templat));
+ templat.tex_usage |= PIPE_TEXTURE_USAGE_DISPLAY_TARGET;
+ templat.tex_usage |= PIPE_TEXTURE_USAGE_RENDER_TARGET;
+ templat.target = PIPE_TEXTURE_2D;
+ templat.last_level = 0;
+ templat.depth[0] = 1;
+ templat.format = PIPE_FORMAT_A8R8G8B8_UNORM;
+ templat.width[0] = w;
+ templat.height[0] = h;
+ pf_get_block(templat.format, &templat.block);
+
+ texture = screen->texture_blanket(dev->screen,
+ &templat,
+ &pitch,
+ buf);
+ if (!texture)
+ goto err_tex;
+
+ surface = screen->get_tex_surface(screen,
+ texture,
+ 0,
+ 0,
+ 0,
+ PIPE_BUFFER_USAGE_GPU_WRITE);
+
+ if (!surface)
+ goto err_surf;
+
+
+ scrn->tex = texture;
+ scrn->surface = surface;
+ scrn->buffer = buf;
+ scrn->front.width = w;
+ scrn->front.height = h;
+ scrn->front.pitch = pitch;
+ scrn->front.handle = drm_api_hocks.handle_from_buffer(dev->winsys, scrn->buffer);
+ if (0)
+ goto err_handle;
+
+ return;
+
+err_handle:
+ pipe_surface_reference(&surface, NULL);
+err_surf:
+ pipe_texture_reference(&texture, NULL);
+err_tex:
+ pipe_buffer_reference(screen, &buf, NULL);
+err_buf:
+ return;
+}
+
+/*
+ * Exported functions
+ */
+
+void
+drm_takedown_shown_screen(_EGLDriver *drv, struct drm_screen *screen)
+{
+ struct drm_device *dev = (struct drm_device *)drv;
+
+ screen->surf = NULL;
+
+ drmModeSetCrtc(
+ dev->drmFD,
+ screen->crtcID,
+ 0, // FD
+ 0, 0,
+ NULL, 0, // List of output ids
+ NULL);
+
+ drmModeRmFB(dev->drmFD, screen->fbID);
+ drmModeFreeFB(screen->fb);
+ screen->fb = NULL;
+
+ pipe_surface_reference(&screen->surface, NULL);
+ pipe_texture_reference(&screen->tex, NULL);
+ pipe_buffer_reference(dev->screen, &screen->buffer, NULL);
+
+ screen->shown = 0;
+}
+
+EGLSurface
+drm_create_window_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, NativeWindowType window, const EGLint *attrib_list)
+{
+ return EGL_NO_SURFACE;
+}
+
+
+EGLSurface
+drm_create_pixmap_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, NativePixmapType pixmap, const EGLint *attrib_list)
+{
+ return EGL_NO_SURFACE;
+}
+
+
+EGLSurface
+drm_create_pbuffer_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config,
+ const EGLint *attrib_list)
+{
+ int i;
+ int width = -1;
+ int height = -1;
+ struct drm_surface *surf = NULL;
+ __GLcontextModes *visual;
+ _EGLConfig *conf;
+
+ conf = _eglLookupConfig(drv, dpy, config);
+ if (!conf) {
+ _eglError(EGL_BAD_CONFIG, "eglCreatePbufferSurface");
+ return EGL_NO_CONTEXT;
+ }
+
+ for (i = 0; attrib_list && attrib_list[i] != EGL_NONE; i++) {
+ switch (attrib_list[i]) {
+ case EGL_WIDTH:
+ width = attrib_list[++i];
+ break;
+ case EGL_HEIGHT:
+ height = attrib_list[++i];
+ break;
+ default:
+ _eglError(EGL_BAD_ATTRIBUTE, "eglCreatePbufferSurface");
+ return EGL_NO_SURFACE;
+ }
+ }
+
+ if (width < 1 || height < 1) {
+ _eglError(EGL_BAD_ATTRIBUTE, "eglCreatePbufferSurface");
+ return EGL_NO_SURFACE;
+ }
+
+ surf = (struct drm_surface *) calloc(1, sizeof(struct drm_surface));
+ if (!surf)
+ goto err;
+
+ if (!_eglInitSurface(drv, dpy, &surf->base, EGL_PBUFFER_BIT, config, attrib_list))
+ goto err_surf;
+
+ surf->w = width;
+ surf->h = height;
+
+ visual = drm_visual_from_config(conf);
+ surf->stfb = drm_create_framebuffer(visual,
+ width,
+ height,
+ (void*)surf);
+ drm_visual_modes_destroy(visual);
+
+ _eglSaveSurface(&surf->base);
+ return surf->base.Handle;
+
+err_surf:
+ free(surf);
+err:
+ return EGL_NO_SURFACE;
+}
+
+EGLSurface
+drm_create_screen_surface_mesa(_EGLDriver *drv, EGLDisplay dpy, EGLConfig cfg,
+ const EGLint *attrib_list)
+{
+ EGLSurface surf = drm_create_pbuffer_surface(drv, dpy, cfg, attrib_list);
+
+ return surf;
+}
+
+EGLBoolean
+drm_show_screen_surface_mesa(_EGLDriver *drv, EGLDisplay dpy,
+ EGLScreenMESA screen,
+ EGLSurface surface, EGLModeMESA m)
+{
+ struct drm_device *dev = (struct drm_device *)drv;
+ struct drm_surface *surf = lookup_drm_surface(surface);
+ struct drm_screen *scrn = lookup_drm_screen(dpy, screen);
+ struct pipe_context *pipe;
+ _EGLMode *mode = _eglLookupMode(dpy, m);
+ int ret;
+ unsigned int i, k;
+
+ if (scrn->shown)
+ drm_takedown_shown_screen(drv, scrn);
+
+
+ drm_create_texture(drv, scrn, mode->Width, mode->Height);
+ if (!scrn->buffer)
+ return EGL_FALSE;
+
+ ret = drmModeAddFB(dev->drmFD,
+ scrn->front.width, scrn->front.height,
+ 32, 32, scrn->front.pitch,
+ scrn->front.handle,
+ &scrn->fbID);
+
+ if (ret)
+ goto err_bo;
+
+ scrn->fb = drmModeGetFB(dev->drmFD, scrn->fbID);
+ if (!scrn->fb)
+ goto err_bo;
+
+ /* find a fitting crtc */
+ {
+ drmModeConnector *con = scrn->connector;
+
+ scrn->mode = drm_find_mode(con, mode);
+ if (!scrn->mode)
+ goto err_fb;
+
+ for (k = 0; k < con->count_encoders; k++) {
+ drmModeEncoder *enc = drmModeGetEncoder(dev->drmFD, con->encoders[k]);
+ for (i = 0; i < dev->res->count_crtcs; i++) {
+ if (enc->possible_crtcs & (1<<i)) {
+ /* save the ID */
+ scrn->crtcID = dev->res->crtcs[i];
+
+ /* skip the rest */
+ i = dev->res->count_crtcs;
+ k = dev->res->count_encoders;
+ }
+ }
+ drmModeFreeEncoder(enc);
+ }
+ }
+
+ ret = drmModeSetCrtc(dev->drmFD,
+ scrn->crtcID,
+ scrn->fbID,
+ 0, 0,
+ &scrn->connectorID, 1,
+ scrn->mode);
+
+ if (ret)
+ goto err_crtc;
+
+ surf->screen = scrn;
+
+ scrn->surf = surf;
+ scrn->shown = 1;
+
+ return EGL_TRUE;
+
+err_crtc:
+ scrn->crtcID = 0;
+
+err_fb:
+ drmModeRmFB(dev->drmFD, scrn->fbID);
+ drmModeFreeFB(scrn->fb);
+ scrn->fb = NULL;
+
+err_bo:
+ pipe_surface_reference(&scrn->surface, NULL);
+ pipe_texture_reference(&scrn->tex, NULL);
+ pipe_buffer_reference(dev->screen, &scrn->buffer, NULL);
+
+ return EGL_FALSE;
+}
+
+EGLBoolean
+drm_destroy_surface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface)
+{
+ struct drm_surface *surf = lookup_drm_surface(surface);
+ _eglRemoveSurface(&surf->base);
+ if (surf->base.IsBound) {
+ surf->base.DeletePending = EGL_TRUE;
+ } else {
+ if (surf->screen)
+ drm_takedown_shown_screen(drv, surf->screen);
+ st_unreference_framebuffer(surf->stfb);
+ free(surf);
+ }
+ return EGL_TRUE;
+}
+
+EGLBoolean
+drm_swap_buffers(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw)
+{
+ struct drm_surface *surf = lookup_drm_surface(draw);
+ struct pipe_surface *back_surf;
+
+ if (!surf)
+ return EGL_FALSE;
+
+ /* error checking */
+ if (!_eglSwapBuffers(drv, dpy, draw))
+ return EGL_FALSE;
+
+ st_get_framebuffer_surface(surf->stfb, ST_SURFACE_BACK_LEFT, &back_surf);
+
+ if (back_surf) {
+
+ st_notify_swapbuffers(surf->stfb);
+
+ if (surf->screen) {
+ surf->user->pipe->flush(surf->user->pipe, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE, NULL);
+ surf->user->pipe->surface_copy(surf->user->pipe,
+ 0,
+ surf->screen->surface,
+ 0, 0,
+ back_surf,
+ 0, 0,
+ surf->w, surf->h);
+ surf->user->pipe->flush(surf->user->pipe, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE, NULL);
+ /* TODO stuff here */
+ }
+
+ st_notify_swapbuffers_complete(surf->stfb);
+ }
+
+ return EGL_TRUE;
+}
diff --git a/src/gallium/state_trackers/egl/egl_tracker.c b/src/gallium/state_trackers/egl/egl_tracker.c
new file mode 100644
index 0000000000..2813bf4360
--- /dev/null
+++ b/src/gallium/state_trackers/egl/egl_tracker.c
@@ -0,0 +1,217 @@
+
+#include "utils.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "egl_tracker.h"
+
+#include "egllog.h"
+#include "state_tracker/drm_api.h"
+
+#include "pipe/p_screen.h"
+#include "pipe/internal/p_winsys_screen.h"
+
+/** HACK */
+void* driDriverAPI;
+extern const struct dri_extension card_extensions[];
+
+
+/*
+ * Exported functions
+ */
+
+/**
+ * The bootstrap function. Return a new drm_driver object and
+ * plug in API functions.
+ */
+_EGLDriver *
+_eglMain(_EGLDisplay *dpy, const char *args)
+{
+ struct drm_device *drm;
+
+ drm = (struct drm_device *) calloc(1, sizeof(struct drm_device));
+ if (!drm) {
+ return NULL;
+ }
+
+ /* First fill in the dispatch table with defaults */
+ _eglInitDriverFallbacks(&drm->base);
+ /* then plug in our Drm-specific functions */
+ drm->base.API.Initialize = drm_initialize;
+ drm->base.API.Terminate = drm_terminate;
+ drm->base.API.CreateContext = drm_create_context;
+ drm->base.API.MakeCurrent = drm_make_current;
+ drm->base.API.CreateWindowSurface = drm_create_window_surface;
+ drm->base.API.CreatePixmapSurface = drm_create_pixmap_surface;
+ drm->base.API.CreatePbufferSurface = drm_create_pbuffer_surface;
+ drm->base.API.DestroySurface = drm_destroy_surface;
+ drm->base.API.DestroyContext = drm_destroy_context;
+ drm->base.API.CreateScreenSurfaceMESA = drm_create_screen_surface_mesa;
+ drm->base.API.ShowScreenSurfaceMESA = drm_show_screen_surface_mesa;
+ drm->base.API.SwapBuffers = drm_swap_buffers;
+
+ drm->base.ClientAPIsMask = EGL_OPENGL_BIT /*| EGL_OPENGL_ES_BIT*/;
+ drm->base.Name = "DRM/Gallium/Win";
+
+ /* enable supported extensions */
+ drm->base.Extensions.MESA_screen_surface = EGL_TRUE;
+ drm->base.Extensions.MESA_copy_context = EGL_TRUE;
+
+ return &drm->base;
+}
+
+static void
+drm_get_device_id(struct drm_device *device)
+{
+ char path[512];
+ FILE *file;
+
+ /* TODO get the real minor */
+ int minor = 0;
+
+ snprintf(path, sizeof(path), "/sys/class/drm/card%d/device/device", minor);
+ file = fopen(path, "r");
+ if (!file) {
+ _eglLog(_EGL_WARNING, "Could not retrive device ID\n");
+ return;
+ }
+
+ fgets(path, sizeof( path ), file);
+ sscanf(path, "%x", &device->deviceID);
+ fclose(file);
+}
+
+static void
+drm_update_res(struct drm_device *dev)
+{
+ drmModeFreeResources(dev->res);
+ dev->res = drmModeGetResources(dev->drmFD);
+}
+
+static void
+drm_add_modes_from_connector(_EGLScreen *screen, drmModeConnectorPtr connector)
+{
+ drmModeModeInfoPtr m = NULL;
+ int i;
+
+ for (i = 0; i < connector->count_modes; i++) {
+ m = &connector->modes[i];
+ _eglAddNewMode(screen, m->hdisplay, m->vdisplay, m->vrefresh, m->name);
+ }
+}
+
+EGLBoolean
+drm_initialize(_EGLDriver *drv, EGLDisplay dpy, EGLint *major, EGLint *minor)
+{
+ _EGLDisplay *disp = _eglLookupDisplay(dpy);
+ struct drm_device *dev = (struct drm_device *)drv;
+ struct drm_screen *screen = NULL;
+ drmModeConnectorPtr connector = NULL;
+ drmModeResPtr res = NULL;
+ unsigned count_connectors = 0;
+ int num_screens = 0;
+ EGLint i;
+ int fd;
+
+ fd = drmOpen("i915", NULL);
+ if (fd < 0)
+ goto err_fd;
+
+ dev->drmFD = fd;
+ drm_get_device_id(dev);
+
+ dev->screen = drm_api_hocks.create_screen(dev->drmFD, dev->deviceID);
+ if (!dev->screen)
+ goto err_screen;
+ dev->winsys = dev->screen->winsys;
+
+ /* TODO HACK */
+ driInitExtensions(NULL, card_extensions, GL_FALSE);
+
+ drm_update_res(dev);
+ res = dev->res;
+ if (res)
+ count_connectors = res->count_connectors;
+ else
+ _eglLog(_EGL_WARNING, "Could not retrive kms information\n");
+
+ for(i = 0; i < count_connectors && i < MAX_SCREENS; i++) {
+ connector = drmModeGetConnector(fd, res->connectors[i]);
+
+ if (!connector)
+ continue;
+
+ if (connector->connection != DRM_MODE_CONNECTED) {
+ drmModeFreeConnector(connector);
+ continue;
+ }
+
+ screen = malloc(sizeof(struct drm_screen));
+ memset(screen, 0, sizeof(*screen));
+ screen->connector = connector;
+ screen->connectorID = connector->connector_id;
+ _eglInitScreen(&screen->base);
+ _eglAddScreen(disp, &screen->base);
+ drm_add_modes_from_connector(&screen->base, connector);
+ dev->screens[num_screens++] = screen;
+ }
+ dev->count_screens = num_screens;
+
+ /* for now we only have one config */
+ _EGLConfig *config = calloc(1, sizeof(*config));
+ memset(config, 1, sizeof(*config));
+ _eglInitConfig(config, 1);
+ _eglSetConfigAttrib(config, EGL_RED_SIZE, 8);
+ _eglSetConfigAttrib(config, EGL_GREEN_SIZE, 8);
+ _eglSetConfigAttrib(config, EGL_BLUE_SIZE, 8);
+ _eglSetConfigAttrib(config, EGL_ALPHA_SIZE, 8);
+ _eglSetConfigAttrib(config, EGL_BUFFER_SIZE, 32);
+ _eglSetConfigAttrib(config, EGL_DEPTH_SIZE, 24);
+ _eglSetConfigAttrib(config, EGL_STENCIL_SIZE, 8);
+ _eglSetConfigAttrib(config, EGL_SURFACE_TYPE, EGL_PBUFFER_BIT);
+ _eglAddConfig(disp, config);
+
+ drv->Initialized = EGL_TRUE;
+
+ *major = 1;
+ *minor = 4;
+
+ return EGL_TRUE;
+
+err_screen:
+ drmClose(fd);
+err_fd:
+ return EGL_FALSE;
+}
+
+EGLBoolean
+drm_terminate(_EGLDriver *drv, EGLDisplay dpy)
+{
+ struct drm_device *dev = (struct drm_device *)drv;
+ struct drm_screen *screen;
+ int i = 0;
+
+ drmFreeVersion(dev->version);
+
+ for (i = 0; i < dev->count_screens; i++) {
+ screen = dev->screens[i];
+
+ if (screen->shown)
+ drm_takedown_shown_screen(drv, screen);
+
+ drmModeFreeConnector(screen->connector);
+ _eglDestroyScreen(&screen->base);
+ dev->screens[i] = NULL;
+ }
+
+ dev->screen->destroy(dev->screen);
+ dev->winsys = NULL;
+
+ drmClose(dev->drmFD);
+
+ _eglCleanupDisplay(_eglLookupDisplay(dpy));
+ free(dev);
+
+ return EGL_TRUE;
+}
diff --git a/src/gallium/state_trackers/egl/egl_tracker.h b/src/gallium/state_trackers/egl/egl_tracker.h
new file mode 100644
index 0000000000..908bab5f9b
--- /dev/null
+++ b/src/gallium/state_trackers/egl/egl_tracker.h
@@ -0,0 +1,191 @@
+
+#ifndef _EGL_TRACKER_H_
+#define _EGL_TRACKER_H_
+
+#include <stdint.h>
+
+#include "eglconfig.h"
+#include "eglcontext.h"
+#include "egldisplay.h"
+#include "egldriver.h"
+#include "eglglobals.h"
+#include "eglmode.h"
+#include "eglscreen.h"
+#include "eglsurface.h"
+
+#include "xf86drm.h"
+#include "xf86drmMode.h"
+
+#include "pipe/p_compiler.h"
+
+#include "state_tracker/st_public.h"
+
+#define MAX_SCREENS 16
+
+struct pipe_winsys;
+struct pipe_screen;
+struct pipe_context;
+struct state_tracker;
+
+struct drm_screen;
+struct drm_context;
+
+struct drm_device
+{
+ _EGLDriver base; /* base class/object */
+
+ /*
+ * pipe
+ */
+
+ struct pipe_winsys *winsys;
+ struct pipe_screen *screen;
+
+ /*
+ * drm
+ */
+
+ int drmFD;
+ drmVersionPtr version;
+ int deviceID;
+
+ drmModeResPtr res;
+
+ struct drm_screen *screens[MAX_SCREENS];
+ size_t count_screens;
+};
+
+struct drm_surface
+{
+ _EGLSurface base; /* base class/object */
+
+ /*
+ * pipe
+ */
+
+
+ struct st_framebuffer *stfb;
+
+ /*
+ * drm
+ */
+
+ struct drm_context *user;
+ struct drm_screen *screen;
+
+ int w;
+ int h;
+};
+
+struct drm_context
+{
+ _EGLContext base; /* base class/object */
+
+ /* pipe */
+
+ struct pipe_context *pipe;
+ struct st_context *st;
+};
+
+struct drm_screen
+{
+ _EGLScreen base;
+
+ /*
+ * pipe
+ */
+
+ struct pipe_buffer *buffer;
+ struct pipe_texture *tex;
+ struct pipe_surface *surface;
+
+ /*
+ * drm
+ */
+
+ struct {
+ unsigned height;
+ unsigned width;
+ unsigned pitch;
+ unsigned handle;
+ } front;
+
+ /* currently only support one connector */
+ drmModeConnectorPtr connector;
+ uint32_t connectorID;
+
+ /* Has this screen been shown */
+ int shown;
+
+ /* Surface that is currently attached to this screen */
+ struct drm_surface *surf;
+
+ /* framebuffer */
+ drmModeFBPtr fb;
+ uint32_t fbID;
+
+ /* crtc and mode used */
+ /*drmModeCrtcPtr crtc;*/
+ uint32_t crtcID;
+
+ drmModeModeInfoPtr mode;
+};
+
+
+static INLINE struct drm_context *
+lookup_drm_context(EGLContext context)
+{
+ _EGLContext *c = _eglLookupContext(context);
+ return (struct drm_context *) c;
+}
+
+
+static INLINE struct drm_surface *
+lookup_drm_surface(EGLSurface surface)
+{
+ _EGLSurface *s = _eglLookupSurface(surface);
+ return (struct drm_surface *) s;
+}
+
+static INLINE struct drm_screen *
+lookup_drm_screen(EGLDisplay dpy, EGLScreenMESA screen)
+{
+ _EGLScreen *s = _eglLookupScreen(dpy, screen);
+ return (struct drm_screen *) s;
+}
+
+/**
+ * egl_visual.h
+ */
+/*@{*/
+void drm_visual_modes_destroy(__GLcontextModes *modes);
+__GLcontextModes* drm_visual_modes_create(unsigned count, size_t minimum_size);
+__GLcontextModes* drm_visual_from_config(_EGLConfig *conf);
+/*@}*/
+
+/**
+ * egl_surface.h
+ */
+/*@{*/
+void drm_takedown_shown_screen(_EGLDriver *drv, struct drm_screen *screen);
+/*@}*/
+
+/**
+ * All function exported to the egl side.
+ */
+/*@{*/
+EGLBoolean drm_initialize(_EGLDriver *drv, EGLDisplay dpy, EGLint *major, EGLint *minor);
+EGLBoolean drm_terminate(_EGLDriver *drv, EGLDisplay dpy);
+EGLContext drm_create_context(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, EGLContext share_list, const EGLint *attrib_list);
+EGLBoolean drm_destroy_context(_EGLDriver *drv, EGLDisplay dpy, EGLContext context);
+EGLSurface drm_create_window_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, NativeWindowType window, const EGLint *attrib_list);
+EGLSurface drm_create_pixmap_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, NativePixmapType pixmap, const EGLint *attrib_list);
+EGLSurface drm_create_pbuffer_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, const EGLint *attrib_list);
+EGLSurface drm_create_screen_surface_mesa(_EGLDriver *drv, EGLDisplay dpy, EGLConfig cfg, const EGLint *attrib_list);
+EGLBoolean drm_show_screen_surface_mesa(_EGLDriver *drv, EGLDisplay dpy, EGLScreenMESA screen, EGLSurface surface, EGLModeMESA m);
+EGLBoolean drm_destroy_surface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface);
+EGLBoolean drm_make_current(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext context);
+EGLBoolean drm_swap_buffers(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw);
+/*@}*/
+
+#endif
diff --git a/src/gallium/state_trackers/egl/egl_visual.c b/src/gallium/state_trackers/egl/egl_visual.c
new file mode 100644
index 0000000000..e59f893851
--- /dev/null
+++ b/src/gallium/state_trackers/egl/egl_visual.c
@@ -0,0 +1,85 @@
+
+#include "egl_tracker.h"
+
+#include "egllog.h"
+
+void
+drm_visual_modes_destroy(__GLcontextModes *modes)
+{
+ _eglLog(_EGL_DEBUG, "%s", __FUNCTION__);
+
+ while (modes) {
+ __GLcontextModes * const next = modes->next;
+ free(modes);
+ modes = next;
+ }
+}
+
+__GLcontextModes *
+drm_visual_modes_create(unsigned count, size_t minimum_size)
+{
+ /* This code copied from libGLX, and modified */
+ const size_t size = (minimum_size > sizeof(__GLcontextModes))
+ ? minimum_size : sizeof(__GLcontextModes);
+ __GLcontextModes * head = NULL;
+ __GLcontextModes ** next;
+ unsigned i;
+
+ _eglLog(_EGL_DEBUG, "%s %d %d", __FUNCTION__, count, minimum_size);
+
+ next = & head;
+ for (i = 0 ; i < count ; i++) {
+ *next = (__GLcontextModes *) calloc(1, size);
+ if (*next == NULL) {
+ drm_visual_modes_destroy(head);
+ head = NULL;
+ break;
+ }
+
+ (*next)->doubleBufferMode = 1;
+ (*next)->visualID = GLX_DONT_CARE;
+ (*next)->visualType = GLX_DONT_CARE;
+ (*next)->visualRating = GLX_NONE;
+ (*next)->transparentPixel = GLX_NONE;
+ (*next)->transparentRed = GLX_DONT_CARE;
+ (*next)->transparentGreen = GLX_DONT_CARE;
+ (*next)->transparentBlue = GLX_DONT_CARE;
+ (*next)->transparentAlpha = GLX_DONT_CARE;
+ (*next)->transparentIndex = GLX_DONT_CARE;
+ (*next)->xRenderable = GLX_DONT_CARE;
+ (*next)->fbconfigID = GLX_DONT_CARE;
+ (*next)->swapMethod = GLX_SWAP_UNDEFINED_OML;
+ (*next)->bindToTextureRgb = GLX_DONT_CARE;
+ (*next)->bindToTextureRgba = GLX_DONT_CARE;
+ (*next)->bindToMipmapTexture = GLX_DONT_CARE;
+ (*next)->bindToTextureTargets = 0;
+ (*next)->yInverted = GLX_DONT_CARE;
+
+ next = & ((*next)->next);
+ }
+
+ return head;
+}
+
+__GLcontextModes *
+drm_visual_from_config(_EGLConfig *conf)
+{
+ __GLcontextModes *visual;
+ (void)conf;
+
+ visual = drm_visual_modes_create(1, sizeof(*visual));
+ visual->redBits = 8;
+ visual->greenBits = 8;
+ visual->blueBits = 8;
+ visual->alphaBits = 8;
+
+ visual->rgbBits = 32;
+ visual->doubleBufferMode = 1;
+
+ visual->depthBits = 24;
+ visual->haveDepthBuffer = visual->depthBits > 0;
+ visual->stencilBits = 8;
+ visual->haveStencilBuffer = visual->stencilBits > 0;
+
+ return visual;
+}
diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile
new file mode 100644
index 0000000000..f9f4d6be3c
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/Makefile
@@ -0,0 +1,21 @@
+TARGET = libg3dvl.a
+OBJECTS = vl_display.o vl_screen.o vl_context.o vl_surface.o vl_shader_build.o vl_util.o vl_basic_csc.o \
+ vl_r16snorm_mc_buf.o
+GALLIUMDIR = ../..
+
+CFLAGS += -g -Wall -Werror-implicit-function-declaration -fPIC \
+ -I${GALLIUMDIR}/include \
+ -I${GALLIUMDIR}/auxiliary \
+ -I${GALLIUMDIR}/winsys/g3dvl \
+
+#############################################
+
+.PHONY = all clean
+
+all: ${TARGET}
+
+${TARGET}: ${OBJECTS}
+ ar rcs $@ $^
+
+clean:
+ rm -rf ${OBJECTS} ${TARGET}
diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c
new file mode 100644
index 0000000000..187a13a560
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c
@@ -0,0 +1,715 @@
+#define VL_INTERNAL
+#include "vl_basic_csc.h"
+#include <assert.h>
+#include <pipe/p_context.h>
+#include <pipe/p_state.h>
+#include <pipe/p_inlines.h>
+#include <tgsi/tgsi_parse.h>
+#include <tgsi/tgsi_build.h>
+#include <util/u_memory.h>
+#include "vl_csc.h"
+#include "vl_surface.h"
+#include "vl_shader_build.h"
+#include "vl_types.h"
+
+struct vlVertexShaderConsts
+{
+ struct vlVertex4f dst_scale;
+ struct vlVertex4f dst_trans;
+ struct vlVertex4f src_scale;
+ struct vlVertex4f src_trans;
+};
+
+struct vlFragmentShaderConsts
+{
+ struct vlVertex4f bias;
+ float matrix[16];
+};
+
+struct vlBasicCSC
+{
+ struct vlCSC base;
+
+ struct pipe_context *pipe;
+ struct pipe_viewport_state viewport;
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_texture *framebuffer_tex;
+ void *sampler;
+ void *vertex_shader, *fragment_shader;
+ struct pipe_vertex_buffer vertex_bufs[2];
+ struct pipe_vertex_element vertex_elems[2];
+ struct pipe_constant_buffer vs_const_buf, fs_const_buf;
+};
+
+static int vlResizeFrameBuffer
+(
+ struct vlCSC *csc,
+ unsigned int width,
+ unsigned int height
+)
+{
+ struct vlBasicCSC *basic_csc;
+ struct pipe_context *pipe;
+ struct pipe_texture template;
+
+ assert(csc);
+
+ basic_csc = (struct vlBasicCSC*)csc;
+ pipe = basic_csc->pipe;
+
+ if (basic_csc->framebuffer.width == width && basic_csc->framebuffer.height == height)
+ return 0;
+
+ basic_csc->viewport.scale[0] = width;
+ basic_csc->viewport.scale[1] = height;
+ basic_csc->viewport.scale[2] = 1;
+ basic_csc->viewport.scale[3] = 1;
+ basic_csc->viewport.translate[0] = 0;
+ basic_csc->viewport.translate[1] = 0;
+ basic_csc->viewport.translate[2] = 0;
+ basic_csc->viewport.translate[3] = 0;
+
+ if (basic_csc->framebuffer_tex)
+ {
+ pipe_surface_reference(&basic_csc->framebuffer.cbufs[0], NULL);
+ pipe_texture_reference(&basic_csc->framebuffer_tex, NULL);
+ }
+
+ memset(&template, 0, sizeof(struct pipe_texture));
+ template.target = PIPE_TEXTURE_2D;
+ template.format = PIPE_FORMAT_A8R8G8B8_UNORM;
+ template.last_level = 0;
+ template.width[0] = width;
+ template.height[0] = height;
+ template.depth[0] = 1;
+ template.compressed = 0;
+ pf_get_block(template.format, &template.block);
+ template.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET;
+
+ basic_csc->framebuffer_tex = pipe->screen->texture_create(pipe->screen, &template);
+
+ basic_csc->framebuffer.width = width;
+ basic_csc->framebuffer.height = height;
+ basic_csc->framebuffer.cbufs[0] = pipe->screen->get_tex_surface
+ (
+ pipe->screen,
+ basic_csc->framebuffer_tex,
+ 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
+ );
+
+ /* Clear to black, in case video doesn't fill the entire window */
+ pipe->clear(pipe, basic_csc->framebuffer.cbufs[0], 0);
+
+ return 0;
+}
+
+static int vlBegin
+(
+ struct vlCSC *csc
+)
+{
+ struct vlBasicCSC *basic_csc;
+ struct pipe_context *pipe;
+
+ assert(csc);
+
+ basic_csc = (struct vlBasicCSC*)csc;
+ pipe = basic_csc->pipe;
+
+ pipe->set_framebuffer_state(pipe, &basic_csc->framebuffer);
+ pipe->set_viewport_state(pipe, &basic_csc->viewport);
+ pipe->bind_sampler_states(pipe, 1, (void**)&basic_csc->sampler);
+ /* Source texture set in vlPutPictureCSC() */
+ pipe->bind_vs_state(pipe, basic_csc->vertex_shader);
+ pipe->bind_fs_state(pipe, basic_csc->fragment_shader);
+ pipe->set_vertex_buffers(pipe, 2, basic_csc->vertex_bufs);
+ pipe->set_vertex_elements(pipe, 2, basic_csc->vertex_elems);
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &basic_csc->vs_const_buf);
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &basic_csc->fs_const_buf);
+
+ return 0;
+}
+
+static int vlPutPictureCSC
+(
+ struct vlCSC *csc,
+ struct vlSurface *surface,
+ int srcx,
+ int srcy,
+ int srcw,
+ int srch,
+ int destx,
+ int desty,
+ int destw,
+ int desth,
+ enum vlPictureType picture_type
+)
+{
+ struct vlBasicCSC *basic_csc;
+ struct pipe_context *pipe;
+ struct vlVertexShaderConsts *vs_consts;
+
+ assert(csc);
+ assert(surface);
+
+ basic_csc = (struct vlBasicCSC*)csc;
+ pipe = basic_csc->pipe;
+
+ vs_consts = pipe_buffer_map
+ (
+ pipe->screen,
+ basic_csc->vs_const_buf.buffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
+ );
+
+ vs_consts->dst_scale.x = destw / (float)basic_csc->framebuffer.cbufs[0]->width;
+ vs_consts->dst_scale.y = desth / (float)basic_csc->framebuffer.cbufs[0]->height;
+ vs_consts->dst_scale.z = 1;
+ vs_consts->dst_scale.w = 1;
+ vs_consts->dst_trans.x = destx / (float)basic_csc->framebuffer.cbufs[0]->width;
+ vs_consts->dst_trans.y = desty / (float)basic_csc->framebuffer.cbufs[0]->height;
+ vs_consts->dst_trans.z = 0;
+ vs_consts->dst_trans.w = 0;
+
+ vs_consts->src_scale.x = srcw / (float)surface->texture->width[0];
+ vs_consts->src_scale.y = srch / (float)surface->texture->height[0];
+ vs_consts->src_scale.z = 1;
+ vs_consts->src_scale.w = 1;
+ vs_consts->src_trans.x = srcx / (float)surface->texture->width[0];
+ vs_consts->src_trans.y = srcy / (float)surface->texture->height[0];
+ vs_consts->src_trans.z = 0;
+ vs_consts->src_trans.w = 0;
+
+ pipe_buffer_unmap(pipe->screen, basic_csc->vs_const_buf.buffer);
+
+ pipe->set_sampler_textures(pipe, 1, &surface->texture);
+ pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
+
+ return 0;
+}
+
+static int vlEnd
+(
+ struct vlCSC *csc
+)
+{
+ assert(csc);
+
+ return 0;
+}
+
+static struct pipe_surface* vlGetFrameBuffer
+(
+ struct vlCSC *csc
+)
+{
+ struct vlBasicCSC *basic_csc;
+
+ assert(csc);
+
+ basic_csc = (struct vlBasicCSC*)csc;
+
+ return basic_csc->framebuffer.cbufs[0];
+}
+
+static int vlDestroy
+(
+ struct vlCSC *csc
+)
+{
+ struct vlBasicCSC *basic_csc;
+ struct pipe_context *pipe;
+ unsigned int i;
+
+ assert(csc);
+
+ basic_csc = (struct vlBasicCSC*)csc;
+ pipe = basic_csc->pipe;
+
+ if (basic_csc->framebuffer_tex)
+ {
+ pipe_surface_reference(&basic_csc->framebuffer.cbufs[0], NULL);
+ pipe_texture_reference(&basic_csc->framebuffer_tex, NULL);
+ }
+
+ pipe->delete_sampler_state(pipe, basic_csc->sampler);
+ pipe->delete_vs_state(pipe, basic_csc->vertex_shader);
+ pipe->delete_fs_state(pipe, basic_csc->fragment_shader);
+
+ for (i = 0; i < 2; ++i)
+ pipe_buffer_reference(pipe->screen, &basic_csc->vertex_bufs[i].buffer, NULL);
+
+ pipe_buffer_reference(pipe->screen, &basic_csc->vs_const_buf.buffer, NULL);
+ pipe_buffer_reference(pipe->screen, &basic_csc->fs_const_buf.buffer, NULL);
+
+ FREE(basic_csc);
+
+ return 0;
+}
+
+/*
+ * Represents 2 triangles in a strip in normalized coords.
+ * Used to render the surface onto the frame buffer.
+ */
+static const struct vlVertex2f surface_verts[4] =
+{
+ {0.0f, 0.0f},
+ {0.0f, 1.0f},
+ {1.0f, 0.0f},
+ {1.0f, 1.0f}
+};
+
+/*
+ * Represents texcoords for the above. We can use the position values directly.
+ * TODO: Duplicate these in the shader, no need to create a buffer.
+ */
+static const struct vlVertex2f *surface_texcoords = surface_verts;
+
+/*
+ * Identity color conversion constants, for debugging
+ */
+static const struct vlFragmentShaderConsts identity =
+{
+ {
+ 0.0f, 0.0f, 0.0f, 0.0f
+ },
+ {
+ 1.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 1.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 1.0f
+ }
+};
+
+/*
+ * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where:
+ * Y is in [16,235], Cb and Cr are in [16,240]
+ * R, G, and B are in [16,235]
+ */
+static const struct vlFragmentShaderConsts bt_601 =
+{
+ {
+ 0.0f, 0.501960784f, 0.501960784f, 0.0f
+ },
+ {
+ 1.0f, 0.0f, 1.371f, 0.0f,
+ 1.0f, -0.336f, -0.698f, 0.0f,
+ 1.0f, 1.732f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 1.0f
+ }
+};
+
+/*
+ * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where:
+ * Y is in [16,235], Cb and Cr are in [16,240]
+ * R, G, and B are in [0,255]
+ */
+static const struct vlFragmentShaderConsts bt_601_full =
+{
+ {
+ 0.062745098f, 0.501960784f, 0.501960784f, 0.0f
+ },
+ {
+ 1.164f, 0.0f, 1.596f, 0.0f,
+ 1.164f, -0.391f, -0.813f, 0.0f,
+ 1.164f, 2.018f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 1.0f
+ }
+};
+
+/*
+ * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where:
+ * Y is in [16,235], Cb and Cr are in [16,240]
+ * R, G, and B are in [16,235]
+ */
+static const struct vlFragmentShaderConsts bt_709 =
+{
+ {
+ 0.0f, 0.501960784f, 0.501960784f, 0.0f
+ },
+ {
+ 1.0f, 0.0f, 1.540f, 0.0f,
+ 1.0f, -0.183f, -0.459f, 0.0f,
+ 1.0f, 1.816f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 1.0f
+ }
+};
+
+/*
+ * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where:
+ * Y is in [16,235], Cb and Cr are in [16,240]
+ * R, G, and B are in [0,255]
+ */
+const struct vlFragmentShaderConsts bt_709_full =
+{
+ {
+ 0.062745098f, 0.501960784f, 0.501960784f, 0.0f
+ },
+ {
+ 1.164f, 0.0f, 1.793f, 0.0f,
+ 1.164f, -0.213f, -0.534f, 0.0f,
+ 1.164f, 2.115f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 1.0f
+ }
+};
+
+static int vlCreateVertexShader
+(
+ struct vlBasicCSC *csc
+)
+{
+ const unsigned int max_tokens = 50;
+
+ struct pipe_context *pipe;
+ struct pipe_shader_state vs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned int ti;
+ unsigned int i;
+
+ assert(context);
+
+ pipe = csc->pipe;
+ tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token));
+
+ /* Version */
+ *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+ /* Header */
+ header = (struct tgsi_header*)&tokens[1];
+ *header = tgsi_build_header();
+ /* Processor */
+ *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Vertex pos
+ * decl i1 ; Vertex texcoords
+ */
+ for (i = 0; i < 2; i++)
+ {
+ decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * decl c0 ; Scaling vector to scale vertex pos rect to destination size
+ * decl c1 ; Translation vector to move vertex pos rect into position
+ * decl c2 ; Scaling vector to scale texcoord rect to source size
+ * decl c3 ; Translation vector to move texcoord rect into position
+ */
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * decl o0 ; Vertex pos
+ * decl o1 ; Vertex texcoords
+ */
+ for (i = 0; i < 2; i++)
+ {
+ decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* decl t0, t1 */
+ decl = vl_decl_temps(0, 1);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * madd o0, i0, c0, c1 ; Scale and translate unit output rect to destination size and pos
+ * madd o1, i1, c2, c3 ; Scale and translate unit texcoord rect to source size and pos
+ */
+ for (i = 0; i < 2; ++i)
+ {
+ inst = vl_inst4(TGSI_OPCODE_MADD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ vs.tokens = tokens;
+ csc->vertex_shader = pipe->create_vs_state(pipe, &vs);
+ FREE(tokens);
+
+ return 0;
+}
+
+static int vlCreateFragmentShader
+(
+ struct vlBasicCSC *csc
+)
+{
+ const unsigned int max_tokens = 50;
+
+ struct pipe_context *pipe;
+ struct pipe_shader_state fs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned int ti;
+ unsigned int i;
+
+ assert(context);
+
+ pipe = csc->pipe;
+ tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token));
+
+ /* Version */
+ *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+ /* Header */
+ header = (struct tgsi_header*)&tokens[1];
+ *header = tgsi_build_header();
+ /* Processor */
+ *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+ ti = 3;
+
+ /* decl i0 ; Texcoords for s0 */
+ decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * decl c0 ; Bias vector for CSC
+ * decl c1-c4 ; CSC matrix c1-c4
+ */
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl o0 ; Fragment color */
+ decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl t0 */
+ decl = vl_decl_temps(0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl s0 ; Sampler for tex containing picture to display */
+ decl = vl_decl_samplers(0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* tex2d t0, i0, s0 ; Read src pixel */
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* sub t0, t0, c0 ; Subtract bias vector from pixel */
+ inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix
+ * dp4 o0.y, t0, c2
+ * dp4 o0.z, t0, c3
+ */
+ for (i = 0; i < 3; ++i)
+ {
+ inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1);
+ inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ fs.tokens = tokens;
+ csc->fragment_shader = pipe->create_fs_state(pipe, &fs);
+ FREE(tokens);
+
+ return 0;
+}
+
+static int vlCreateDataBufs
+(
+ struct vlBasicCSC *csc
+)
+{
+ struct pipe_context *pipe;
+
+ assert(csc);
+
+ pipe = csc->pipe;
+
+ /*
+ * Create our vertex buffer and vertex buffer element
+ * VB contains 4 vertices that render a quad covering the entire window
+ * to display a rendered surface
+ * Quad is rendered as a tri strip
+ */
+ csc->vertex_bufs[0].stride = sizeof(struct vlVertex2f);
+ csc->vertex_bufs[0].max_index = 3;
+ csc->vertex_bufs[0].buffer_offset = 0;
+ csc->vertex_bufs[0].buffer = pipe_buffer_create
+ (
+ pipe->screen,
+ 1,
+ PIPE_BUFFER_USAGE_VERTEX,
+ sizeof(struct vlVertex2f) * 4
+ );
+
+ memcpy
+ (
+ pipe_buffer_map(pipe->screen, csc->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+ surface_verts,
+ sizeof(struct vlVertex2f) * 4
+ );
+
+ pipe_buffer_unmap(pipe->screen, csc->vertex_bufs[0].buffer);
+
+ csc->vertex_elems[0].src_offset = 0;
+ csc->vertex_elems[0].vertex_buffer_index = 0;
+ csc->vertex_elems[0].nr_components = 2;
+ csc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /*
+ * Create our texcoord buffer and texcoord buffer element
+ * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices
+ */
+ csc->vertex_bufs[1].stride = sizeof(struct vlVertex2f);
+ csc->vertex_bufs[1].max_index = 3;
+ csc->vertex_bufs[1].buffer_offset = 0;
+ csc->vertex_bufs[1].buffer = pipe_buffer_create
+ (
+ pipe->screen,
+ 1,
+ PIPE_BUFFER_USAGE_VERTEX,
+ sizeof(struct vlVertex2f) * 4
+ );
+
+ memcpy
+ (
+ pipe_buffer_map(pipe->screen, csc->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+ surface_texcoords,
+ sizeof(struct vlVertex2f) * 4
+ );
+
+ pipe_buffer_unmap(pipe->screen, csc->vertex_bufs[1].buffer);
+
+ csc->vertex_elems[1].src_offset = 0;
+ csc->vertex_elems[1].vertex_buffer_index = 1;
+ csc->vertex_elems[1].nr_components = 2;
+ csc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /*
+ * Create our vertex shader's constant buffer
+ * Const buffer contains scaling and translation vectors
+ */
+ csc->vs_const_buf.buffer = pipe_buffer_create
+ (
+ pipe->screen,
+ 1,
+ PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD,
+ sizeof(struct vlVertexShaderConsts)
+ );
+
+ /*
+ * Create our fragment shader's constant buffer
+ * Const buffer contains the color conversion matrix and bias vectors
+ */
+ csc->fs_const_buf.buffer = pipe_buffer_create
+ (
+ pipe->screen,
+ 1,
+ PIPE_BUFFER_USAGE_CONSTANT,
+ sizeof(struct vlFragmentShaderConsts)
+ );
+
+ /*
+ * TODO: Refactor this into a seperate function,
+ * allow changing the CSC matrix at runtime to switch between regular & full versions
+ */
+ memcpy
+ (
+ pipe_buffer_map(pipe->screen, csc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+ &bt_601_full,
+ sizeof(struct vlFragmentShaderConsts)
+ );
+
+ pipe_buffer_unmap(pipe->screen, csc->fs_const_buf.buffer);
+
+ return 0;
+}
+
+static int vlInit
+(
+ struct vlBasicCSC *csc
+)
+{
+ struct pipe_context *pipe;
+ struct pipe_sampler_state sampler;
+
+ assert(csc);
+
+ pipe = csc->pipe;
+
+ /* Delay creating the FB until vlPutPictureCSC() so we know window size */
+ csc->framebuffer_tex = NULL;
+ csc->framebuffer.width = 0;
+ csc->framebuffer.height = 0;
+ csc->framebuffer.nr_cbufs = 1;
+ csc->framebuffer.cbufs[0] = NULL;
+ csc->framebuffer.zsbuf = NULL;
+
+ sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
+ sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
+ sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+ sampler.compare_func = PIPE_FUNC_ALWAYS;
+ sampler.normalized_coords = 1;
+ /*sampler.prefilter = ;*/
+ /*sampler.shadow_ambient = ;*/
+ /*sampler.lod_bias = ;*/
+ /*sampler.min_lod = ;*/
+ /*sampler.max_lod = ;*/
+ /*sampler.border_color[i] = ;*/
+ /*sampler.max_anisotropy = ;*/
+ csc->sampler = pipe->create_sampler_state(pipe, &sampler);
+
+ vlCreateVertexShader(csc);
+ vlCreateFragmentShader(csc);
+ vlCreateDataBufs(csc);
+
+ return 0;
+}
+
+int vlCreateBasicCSC
+(
+ struct pipe_context *pipe,
+ struct vlCSC **csc
+)
+{
+ struct vlBasicCSC *basic_csc;
+
+ assert(pipe);
+ assert(csc);
+
+ basic_csc = CALLOC_STRUCT(vlBasicCSC);
+
+ if (!basic_csc)
+ return 1;
+
+ basic_csc->base.vlResizeFrameBuffer = &vlResizeFrameBuffer;
+ basic_csc->base.vlBegin = &vlBegin;
+ basic_csc->base.vlPutPicture = &vlPutPictureCSC;
+ basic_csc->base.vlEnd = &vlEnd;
+ basic_csc->base.vlGetFrameBuffer = &vlGetFrameBuffer;
+ basic_csc->base.vlDestroy = &vlDestroy;
+ basic_csc->pipe = pipe;
+
+ vlInit(basic_csc);
+
+ *csc = &basic_csc->base;
+
+ return 0;
+}
diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.h b/src/gallium/state_trackers/g3dvl/vl_basic_csc.h
new file mode 100644
index 0000000000..2e17f1d814
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.h
@@ -0,0 +1,13 @@
+#ifndef vl_basic_csc_h
+#define vl_basic_csc_h
+
+struct pipe_context;
+struct vlCSC;
+
+int vlCreateBasicCSC
+(
+ struct pipe_context *pipe,
+ struct vlCSC **csc
+);
+
+#endif
diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c
new file mode 100644
index 0000000000..65ddb9f01e
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_context.c
@@ -0,0 +1,208 @@
+#define VL_INTERNAL
+#include "vl_context.h"
+#include <assert.h>
+#include <pipe/p_context.h>
+#include <pipe/p_state.h>
+#include <util/u_memory.h>
+#include "vl_render.h"
+#include "vl_r16snorm_mc_buf.h"
+#include "vl_csc.h"
+#include "vl_basic_csc.h"
+
+static int vlInitCommon(struct vlContext *context)
+{
+ struct pipe_context *pipe;
+ struct pipe_rasterizer_state rast;
+ struct pipe_blend_state blend;
+ struct pipe_depth_stencil_alpha_state dsa;
+ unsigned int i;
+
+ assert(context);
+
+ pipe = context->pipe;
+
+ rast.flatshade = 1;
+ rast.flatshade_first = 0;
+ rast.light_twoside = 0;
+ rast.front_winding = PIPE_WINDING_CCW;
+ rast.cull_mode = PIPE_WINDING_CW;
+ rast.fill_cw = PIPE_POLYGON_MODE_FILL;
+ rast.fill_ccw = PIPE_POLYGON_MODE_FILL;
+ rast.offset_cw = 0;
+ rast.offset_ccw = 0;
+ rast.scissor = 0;
+ rast.poly_smooth = 0;
+ rast.poly_stipple_enable = 0;
+ rast.point_sprite = 0;
+ rast.point_size_per_vertex = 0;
+ rast.multisample = 0;
+ rast.line_smooth = 0;
+ rast.line_stipple_enable = 0;
+ rast.line_stipple_factor = 0;
+ rast.line_stipple_pattern = 0;
+ rast.line_last_pixel = 0;
+ /* Don't need clipping, but viewport mapping done here */
+ rast.bypass_clipping = 0;
+ rast.bypass_vs = 0;
+ rast.origin_lower_left = 0;
+ rast.line_width = 1;
+ rast.point_smooth = 0;
+ rast.point_size = 1;
+ rast.offset_units = 1;
+ rast.offset_scale = 1;
+ /*rast.sprite_coord_mode[i] = ;*/
+ context->raster = pipe->create_rasterizer_state(pipe, &rast);
+ pipe->bind_rasterizer_state(pipe, context->raster);
+
+ blend.blend_enable = 0;
+ blend.rgb_func = PIPE_BLEND_ADD;
+ blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+ blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+ blend.alpha_func = PIPE_BLEND_ADD;
+ blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+ blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+ blend.logicop_enable = 0;
+ blend.logicop_func = PIPE_LOGICOP_CLEAR;
+ /* Needed to allow color writes to FB, even if blending disabled */
+ blend.colormask = PIPE_MASK_RGBA;
+ blend.dither = 0;
+ context->blend = pipe->create_blend_state(pipe, &blend);
+ pipe->bind_blend_state(pipe, context->blend);
+
+ dsa.depth.enabled = 0;
+ dsa.depth.writemask = 0;
+ dsa.depth.func = PIPE_FUNC_ALWAYS;
+ dsa.depth.occlusion_count = 0;
+ for (i = 0; i < 2; ++i)
+ {
+ dsa.stencil[i].enabled = 0;
+ dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
+ dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
+ dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
+ dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
+ dsa.stencil[i].ref_value = 0;
+ dsa.stencil[i].valuemask = 0;
+ dsa.stencil[i].writemask = 0;
+ }
+ dsa.alpha.enabled = 0;
+ dsa.alpha.func = PIPE_FUNC_ALWAYS;
+ dsa.alpha.ref_value = 0;
+ context->dsa = pipe->create_depth_stencil_alpha_state(pipe, &dsa);
+ pipe->bind_depth_stencil_alpha_state(pipe, context->dsa);
+
+ return 0;
+}
+
+int vlCreateContext
+(
+ struct vlScreen *screen,
+ struct pipe_context *pipe,
+ unsigned int picture_width,
+ unsigned int picture_height,
+ enum vlFormat picture_format,
+ enum vlProfile profile,
+ enum vlEntryPoint entry_point,
+ struct vlContext **context
+)
+{
+ struct vlContext *ctx;
+
+ assert(screen);
+ assert(context);
+ assert(pipe);
+
+ ctx = CALLOC_STRUCT(vlContext);
+
+ if (!ctx)
+ return 1;
+
+ ctx->screen = screen;
+ ctx->pipe = pipe;
+ ctx->picture_width = picture_width;
+ ctx->picture_height = picture_height;
+ ctx->picture_format = picture_format;
+ ctx->profile = profile;
+ ctx->entry_point = entry_point;
+
+ vlInitCommon(ctx);
+
+ vlCreateR16SNormBufferedMC(pipe, picture_width, picture_height, picture_format, &ctx->render);
+ vlCreateBasicCSC(pipe, &ctx->csc);
+
+ *context = ctx;
+
+ return 0;
+}
+
+int vlDestroyContext
+(
+ struct vlContext *context
+)
+{
+ assert(context);
+
+ /* XXX: Must unbind shaders before we can delete them for some reason */
+ context->pipe->bind_vs_state(context->pipe, NULL);
+ context->pipe->bind_fs_state(context->pipe, NULL);
+
+ context->render->vlDestroy(context->render);
+ context->csc->vlDestroy(context->csc);
+
+ context->pipe->delete_blend_state(context->pipe, context->blend);
+ context->pipe->delete_rasterizer_state(context->pipe, context->raster);
+ context->pipe->delete_depth_stencil_alpha_state(context->pipe, context->dsa);
+
+ FREE(context);
+
+ return 0;
+}
+
+struct vlScreen* vlContextGetScreen
+(
+ struct vlContext *context
+)
+{
+ assert(context);
+
+ return context->screen;
+}
+
+struct pipe_context* vlGetPipeContext
+(
+ struct vlContext *context
+)
+{
+ assert(context);
+
+ return context->pipe;
+}
+
+unsigned int vlGetPictureWidth
+(
+ struct vlContext *context
+)
+{
+ assert(context);
+
+ return context->picture_width;
+}
+
+unsigned int vlGetPictureHeight
+(
+ struct vlContext *context
+)
+{
+ assert(context);
+
+ return context->picture_height;
+}
+
+enum vlFormat vlGetPictureFormat
+(
+ struct vlContext *context
+)
+{
+ assert(context);
+
+ return context->picture_format;
+}
diff --git a/src/gallium/state_trackers/g3dvl/vl_context.h b/src/gallium/state_trackers/g3dvl/vl_context.h
new file mode 100644
index 0000000000..3d14634c44
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_context.h
@@ -0,0 +1,73 @@
+#ifndef vl_context_h
+#define vl_context_h
+
+#include "vl_types.h"
+
+struct pipe_context;
+
+#ifdef VL_INTERNAL
+struct vlRender;
+struct vlCSC;
+
+struct vlContext
+{
+ struct vlScreen *screen;
+ struct pipe_context *pipe;
+ unsigned int picture_width;
+ unsigned int picture_height;
+ enum vlFormat picture_format;
+ enum vlProfile profile;
+ enum vlEntryPoint entry_point;
+
+ void *raster;
+ void *dsa;
+ void *blend;
+
+ struct vlRender *render;
+ struct vlCSC *csc;
+};
+#endif
+
+int vlCreateContext
+(
+ struct vlScreen *screen,
+ struct pipe_context *pipe,
+ unsigned int picture_width,
+ unsigned int picture_height,
+ enum vlFormat picture_format,
+ enum vlProfile profile,
+ enum vlEntryPoint entry_point,
+ struct vlContext **context
+);
+
+int vlDestroyContext
+(
+ struct vlContext *context
+);
+
+struct vlScreen* vlContextGetScreen
+(
+ struct vlContext *context
+);
+
+struct pipe_context* vlGetPipeContext
+(
+ struct vlContext *context
+);
+
+unsigned int vlGetPictureWidth
+(
+ struct vlContext *context
+);
+
+unsigned int vlGetPictureHeight
+(
+ struct vlContext *context
+);
+
+enum vlFormat vlGetPictureFormat
+(
+ struct vlContext *context
+);
+
+#endif
diff --git a/src/gallium/state_trackers/g3dvl/vl_csc.h b/src/gallium/state_trackers/g3dvl/vl_csc.h
new file mode 100644
index 0000000000..36417a2792
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_csc.h
@@ -0,0 +1,53 @@
+#ifndef vl_csc_h
+#define vl_csc_h
+
+#include "vl_types.h"
+
+struct pipe_surface;
+
+struct vlCSC
+{
+ int (*vlResizeFrameBuffer)
+ (
+ struct vlCSC *csc,
+ unsigned int width,
+ unsigned int height
+ );
+
+ int (*vlBegin)
+ (
+ struct vlCSC *csc
+ );
+
+ int (*vlPutPicture)
+ (
+ struct vlCSC *csc,
+ struct vlSurface *surface,
+ int srcx,
+ int srcy,
+ int srcw,
+ int srch,
+ int destx,
+ int desty,
+ int destw,
+ int desth,
+ enum vlPictureType picture_type
+ );
+
+ int (*vlEnd)
+ (
+ struct vlCSC *csc
+ );
+
+ struct pipe_surface* (*vlGetFrameBuffer)
+ (
+ struct vlCSC *csc
+ );
+
+ int (*vlDestroy)
+ (
+ struct vlCSC *csc
+ );
+};
+
+#endif
diff --git a/src/gallium/state_trackers/g3dvl/vl_defs.h b/src/gallium/state_trackers/g3dvl/vl_defs.h
new file mode 100644
index 0000000000..d612d02502
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_defs.h
@@ -0,0 +1,11 @@
+#ifndef vl_defs_h
+#define vl_defs_h
+
+#define VL_BLOCK_WIDTH 8
+#define VL_BLOCK_HEIGHT 8
+#define VL_BLOCK_SIZE (VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT)
+#define VL_MACROBLOCK_WIDTH 16
+#define VL_MACROBLOCK_HEIGHT 16
+#define VL_MACROBLOCK_SIZE (VL_MACROBLOCK_WIDTH * VL_MACROBLOCK_HEIGHT)
+
+#endif
diff --git a/src/gallium/state_trackers/g3dvl/vl_display.c b/src/gallium/state_trackers/g3dvl/vl_display.c
new file mode 100644
index 0000000000..dce06de758
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_display.c
@@ -0,0 +1,48 @@
+#define VL_INTERNAL
+#include "vl_display.h"
+#include <assert.h>
+#include <util/u_memory.h>
+
+int vlCreateDisplay
+(
+ vlNativeDisplay native_display,
+ struct vlDisplay **display
+)
+{
+ struct vlDisplay *dpy;
+
+ assert(native_display);
+ assert(display);
+
+ dpy = CALLOC_STRUCT(vlDisplay);
+
+ if (!dpy)
+ return 1;
+
+ dpy->native = native_display;
+ *display = dpy;
+
+ return 0;
+}
+
+int vlDestroyDisplay
+(
+ struct vlDisplay *display
+)
+{
+ assert(display);
+
+ FREE(display);
+
+ return 0;
+}
+
+vlNativeDisplay vlGetNativeDisplay
+(
+ struct vlDisplay *display
+)
+{
+ assert(display);
+
+ return display->native;
+}
diff --git a/src/gallium/state_trackers/g3dvl/vl_display.h b/src/gallium/state_trackers/g3dvl/vl_display.h
new file mode 100644
index 0000000000..e11fd40799
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_display.h
@@ -0,0 +1,29 @@
+#ifndef vl_display_h
+#define vl_display_h
+
+#include "vl_types.h"
+
+#ifdef VL_INTERNAL
+struct vlDisplay
+{
+ vlNativeDisplay native;
+};
+#endif
+
+int vlCreateDisplay
+(
+ vlNativeDisplay native_display,
+ struct vlDisplay **display
+);
+
+int vlDestroyDisplay
+(
+ struct vlDisplay *display
+);
+
+vlNativeDisplay vlGetNativeDisplay
+(
+ struct vlDisplay *display
+);
+
+#endif
diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c
new file mode 100644
index 0000000000..7cd753f736
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c
@@ -0,0 +1,1157 @@
+#define VL_INTERNAL
+#include "vl_r16snorm_mc_buf.h"
+#include <assert.h>
+#include <pipe/p_context.h>
+#include <pipe/p_screen.h>
+#include <pipe/p_state.h>
+#include <pipe/p_inlines.h>
+#include <tgsi/tgsi_parse.h>
+#include <tgsi/tgsi_build.h>
+#include <util/u_math.h>
+#include <util/u_memory.h>
+#include "vl_render.h"
+#include "vl_shader_build.h"
+#include "vl_surface.h"
+#include "vl_util.h"
+#include "vl_types.h"
+#include "vl_defs.h"
+
+const unsigned int DEFAULT_BUF_ALIGNMENT = 1;
+
+enum vlMacroBlockTypeEx
+{
+ vlMacroBlockExTypeIntra,
+ vlMacroBlockExTypeFwdPredictedFrame,
+ vlMacroBlockExTypeFwdPredictedField,
+ vlMacroBlockExTypeBkwdPredictedFrame,
+ vlMacroBlockExTypeBkwdPredictedField,
+ vlMacroBlockExTypeBiPredictedFrame,
+ vlMacroBlockExTypeBiPredictedField,
+
+ vlNumMacroBlockExTypes
+};
+
+struct vlVertexShaderConsts
+{
+ struct vlVertex4f denorm;
+};
+
+struct vlFragmentShaderConsts
+{
+ struct vlVertex4f multiplier;
+ struct vlVertex4f div;
+};
+
+struct vlMacroBlockVertexStream0
+{
+ struct vlVertex2f pos;
+ struct vlVertex2f luma_tc;
+ struct vlVertex2f cb_tc;
+ struct vlVertex2f cr_tc;
+};
+
+struct vlR16SnormBufferedMC
+{
+ struct vlRender base;
+
+ unsigned int picture_width;
+ unsigned int picture_height;
+ enum vlFormat picture_format;
+ unsigned int macroblocks_per_picture;
+
+ struct vlSurface *buffered_surface;
+ struct vlSurface *past_surface;
+ struct vlSurface *future_surface;
+ struct vlVertex2f surface_tex_inv_size;
+ struct vlVertex2f zero_block[3];
+ unsigned int num_macroblocks;
+ struct vlMpeg2MacroBlock *macroblocks;
+ struct pipe_transfer *tex_transfer[3];
+ short *texels[3];
+
+ struct pipe_context *pipe;
+ struct pipe_viewport_state viewport;
+ struct pipe_framebuffer_state render_target;
+
+ union
+ {
+ void *all[5];
+ struct
+ {
+ void *y;
+ void *cb;
+ void *cr;
+ void *ref[2];
+ };
+ } samplers;
+
+ union
+ {
+ struct pipe_texture *all[5];
+ struct
+ {
+ struct pipe_texture *y;
+ struct pipe_texture *cb;
+ struct pipe_texture *cr;
+ struct pipe_texture *ref[2];
+ };
+ } textures;
+
+ union
+ {
+ struct pipe_vertex_buffer all[3];
+ struct
+ {
+ struct pipe_vertex_buffer ycbcr;
+ struct pipe_vertex_buffer ref[2];
+ };
+ } vertex_bufs;
+
+ void *i_vs, *p_vs[2], *b_vs[2];
+ void *i_fs, *p_fs[2], *b_fs[2];
+ struct pipe_vertex_element vertex_elems[8];
+ struct pipe_constant_buffer vs_const_buf;
+ struct pipe_constant_buffer fs_const_buf;
+};
+
+static inline int vlBegin
+(
+ struct vlRender *render
+)
+{
+ assert(render);
+
+ return 0;
+}
+
+static inline int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch)
+{
+ unsigned int y;
+
+ for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
+ memcpy
+ (
+ dst + y * dst_pitch,
+ src + y * VL_BLOCK_WIDTH,
+ VL_BLOCK_WIDTH * 2
+ );
+
+ return 0;
+}
+
+static inline int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch)
+{
+ unsigned int y;
+
+ for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
+ memcpy
+ (
+ dst + y * dst_pitch * 2,
+ src + y * VL_BLOCK_WIDTH,
+ VL_BLOCK_WIDTH * 2
+ );
+
+ return 0;
+}
+
+static inline int vlGrabNoBlock(short *dst, unsigned int dst_pitch)
+{
+ unsigned int y;
+
+ for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
+ memset
+ (
+ dst + y * dst_pitch,
+ 0,
+ VL_BLOCK_WIDTH * 2
+ );
+
+ return 0;
+}
+
+static inline int vlGrabBlocks
+(
+ struct vlR16SnormBufferedMC *mc,
+ unsigned int mbx,
+ unsigned int mby,
+ enum vlDCTType dct_type,
+ unsigned int coded_block_pattern,
+ short *blocks
+)
+{
+ short *texels;
+ unsigned int tex_pitch;
+ unsigned int x, y, tb = 0, sb = 0;
+ unsigned int mbpx = mbx * VL_MACROBLOCK_WIDTH, mbpy = mby * VL_MACROBLOCK_HEIGHT;
+
+ assert(mc);
+ assert(blocks);
+
+ tex_pitch = mc->tex_transfer[0]->stride / mc->tex_transfer[0]->block.size;
+ texels = mc->texels[0] + mbpy * tex_pitch + mbpx;
+
+ for (y = 0; y < 2; ++y)
+ {
+ for (x = 0; x < 2; ++x, ++tb)
+ {
+ if ((coded_block_pattern >> (5 - tb)) & 1)
+ {
+ short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
+
+ if (dct_type == vlDCTTypeFrameCoded)
+ {
+ vlGrabFrameCodedBlock
+ (
+ cur_block,
+ texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH,
+ tex_pitch
+ );
+ }
+ else
+ {
+ vlGrabFieldCodedBlock
+ (
+ cur_block,
+ texels + y * tex_pitch + x * VL_BLOCK_WIDTH,
+ tex_pitch
+ );
+ }
+
+ ++sb;
+ }
+ else if (mc->zero_block[0].x < 0.0f)
+ {
+ vlGrabNoBlock(texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH, tex_pitch);
+
+ mc->zero_block[0].x = (mbpx + x * 8) * mc->surface_tex_inv_size.x;
+ mc->zero_block[0].y = (mbpy + y * 8) * mc->surface_tex_inv_size.y;
+ }
+ }
+ }
+
+ /* TODO: Implement 422, 444 */
+ mbpx >>= 1;
+ mbpy >>= 1;
+
+ for (tb = 0; tb < 2; ++tb)
+ {
+ tex_pitch = mc->tex_transfer[tb + 1]->stride / mc->tex_transfer[tb + 1]->block.size;
+ texels = mc->texels[tb + 1] + mbpy * tex_pitch + mbpx;
+
+ if ((coded_block_pattern >> (1 - tb)) & 1)
+ {
+ short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
+
+ vlGrabFrameCodedBlock
+ (
+ cur_block,
+ texels,
+ tex_pitch
+ );
+
+ ++sb;
+ }
+ else if (mc->zero_block[tb + 1].x < 0.0f)
+ {
+ vlGrabNoBlock(texels, tex_pitch);
+
+ mc->zero_block[tb + 1].x = (mbpx << 1) * mc->surface_tex_inv_size.x;
+ mc->zero_block[tb + 1].y = (mbpy << 1) * mc->surface_tex_inv_size.y;
+ }
+ }
+
+ return 0;
+}
+
+static inline enum vlMacroBlockTypeEx vlGetMacroBlockTypeEx(struct vlMpeg2MacroBlock *mb)
+{
+ assert(mb);
+
+ switch (mb->mb_type)
+ {
+ case vlMacroBlockTypeIntra:
+ return vlMacroBlockExTypeIntra;
+ case vlMacroBlockTypeFwdPredicted:
+ return mb->mo_type == vlMotionTypeFrame ?
+ vlMacroBlockExTypeFwdPredictedFrame : vlMacroBlockExTypeFwdPredictedField;
+ case vlMacroBlockTypeBkwdPredicted:
+ return mb->mo_type == vlMotionTypeFrame ?
+ vlMacroBlockExTypeBkwdPredictedFrame : vlMacroBlockExTypeBkwdPredictedField;
+ case vlMacroBlockTypeBiPredicted:
+ return mb->mo_type == vlMotionTypeFrame ?
+ vlMacroBlockExTypeBiPredictedFrame : vlMacroBlockExTypeBiPredictedField;
+ default:
+ assert(0);
+ }
+
+ /* Unreachable */
+ return -1;
+}
+
+static inline int vlGrabMacroBlock
+(
+ struct vlR16SnormBufferedMC *mc,
+ struct vlMpeg2MacroBlock *macroblock
+)
+{
+ assert(mc);
+ assert(macroblock);
+ assert(mc->num_macroblocks < mc->macroblocks_per_picture);
+
+ mc->macroblocks[mc->num_macroblocks].mbx = macroblock->mbx;
+ mc->macroblocks[mc->num_macroblocks].mby = macroblock->mby;
+ mc->macroblocks[mc->num_macroblocks].mb_type = macroblock->mb_type;
+ mc->macroblocks[mc->num_macroblocks].mo_type = macroblock->mo_type;
+ mc->macroblocks[mc->num_macroblocks].dct_type = macroblock->dct_type;
+ mc->macroblocks[mc->num_macroblocks].PMV[0][0][0] = macroblock->PMV[0][0][0];
+ mc->macroblocks[mc->num_macroblocks].PMV[0][0][1] = macroblock->PMV[0][0][1];
+ mc->macroblocks[mc->num_macroblocks].PMV[0][1][0] = macroblock->PMV[0][1][0];
+ mc->macroblocks[mc->num_macroblocks].PMV[0][1][1] = macroblock->PMV[0][1][1];
+ mc->macroblocks[mc->num_macroblocks].PMV[1][0][0] = macroblock->PMV[1][0][0];
+ mc->macroblocks[mc->num_macroblocks].PMV[1][0][1] = macroblock->PMV[1][0][1];
+ mc->macroblocks[mc->num_macroblocks].PMV[1][1][0] = macroblock->PMV[1][1][0];
+ mc->macroblocks[mc->num_macroblocks].PMV[1][1][1] = macroblock->PMV[1][1][1];
+ mc->macroblocks[mc->num_macroblocks].cbp = macroblock->cbp;
+ mc->macroblocks[mc->num_macroblocks].blocks = macroblock->blocks;
+
+ vlGrabBlocks
+ (
+ mc,
+ macroblock->mbx,
+ macroblock->mby,
+ macroblock->dct_type,
+ macroblock->cbp,
+ macroblock->blocks
+ );
+
+ mc->num_macroblocks++;
+
+ return 0;
+}
+
+#define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, zb) \
+ do { \
+ (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \
+ (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \
+ (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \
+ (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \
+ (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \
+ (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \
+ \
+ if ((cbp) & (lm)) \
+ { \
+ (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ } \
+ else \
+ { \
+ (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \
+ (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \
+ (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \
+ (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \
+ (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \
+ (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \
+ } \
+ \
+ if ((cbp) & (cbm)) \
+ { \
+ (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ } \
+ else \
+ { \
+ (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \
+ (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \
+ (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \
+ (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \
+ (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \
+ (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \
+ } \
+ \
+ if ((cbp) & (crm)) \
+ { \
+ (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \
+ (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
+ } \
+ else \
+ { \
+ (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \
+ (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \
+ (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \
+ (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \
+ (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \
+ (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \
+ } \
+ } while (0)
+
+static inline int vlGenMacroblockVerts
+(
+ struct vlR16SnormBufferedMC *mc,
+ struct vlMpeg2MacroBlock *macroblock,
+ unsigned int pos,
+ struct vlMacroBlockVertexStream0 *ycbcr_vb,
+ struct vlVertex2f **ref_vb
+)
+{
+ struct vlVertex2f mo_vec[2];
+ unsigned int i;
+
+ assert(mc);
+ assert(macroblock);
+ assert(ycbcr_vb);
+ assert(pos < mc->macroblocks_per_picture);
+
+ switch (macroblock->mb_type)
+ {
+ case vlMacroBlockTypeBiPredicted:
+ {
+ struct vlVertex2f *vb;
+
+ assert(ref_vb && ref_vb[1]);
+
+ vb = ref_vb[1] + pos * 2 * 24;
+
+ mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
+ mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
+
+ if (macroblock->mo_type == vlMotionTypeFrame)
+ {
+ for (i = 0; i < 24 * 2; i += 2)
+ {
+ vb[i].x = mo_vec[0].x;
+ vb[i].y = mo_vec[0].y;
+ }
+ }
+ else
+ {
+ mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
+ mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
+
+ for (i = 0; i < 24 * 2; i += 2)
+ {
+ vb[i].x = mo_vec[0].x;
+ vb[i].y = mo_vec[0].y;
+ vb[i + 1].x = mo_vec[1].x;
+ vb[i + 1].y = mo_vec[1].y;
+ }
+ }
+
+ /* fall-through */
+ }
+ case vlMacroBlockTypeFwdPredicted:
+ case vlMacroBlockTypeBkwdPredicted:
+ {
+ struct vlVertex2f *vb;
+
+ assert(ref_vb && ref_vb[0]);
+
+ vb = ref_vb[0] + pos * 2 * 24;
+
+ if (macroblock->mb_type == vlMacroBlockTypeBkwdPredicted)
+ {
+ mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
+ mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
+
+ if (macroblock->mo_type == vlMotionTypeField)
+ {
+ mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
+ mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
+ }
+ }
+ else
+ {
+ mo_vec[0].x = macroblock->PMV[0][0][0] * 0.5f * mc->surface_tex_inv_size.x;
+ mo_vec[0].y = macroblock->PMV[0][0][1] * 0.5f * mc->surface_tex_inv_size.y;
+
+ if (macroblock->mo_type == vlMotionTypeField)
+ {
+ mo_vec[1].x = macroblock->PMV[1][0][0] * 0.5f * mc->surface_tex_inv_size.x;
+ mo_vec[1].y = macroblock->PMV[1][0][1] * 0.5f * mc->surface_tex_inv_size.y;
+ }
+ }
+
+ if (macroblock->mo_type == vlMotionTypeFrame)
+ {
+ for (i = 0; i < 24 * 2; i += 2)
+ {
+ vb[i].x = mo_vec[0].x;
+ vb[i].y = mo_vec[0].y;
+ }
+ }
+ else
+ {
+ for (i = 0; i < 24 * 2; i += 2)
+ {
+ vb[i].x = mo_vec[0].x;
+ vb[i].y = mo_vec[0].y;
+ vb[i + 1].x = mo_vec[1].x;
+ vb[i + 1].y = mo_vec[1].y;
+ }
+ }
+
+ /* fall-through */
+ }
+ case vlMacroBlockTypeIntra:
+ {
+ const struct vlVertex2f unit =
+ {
+ mc->surface_tex_inv_size.x * VL_MACROBLOCK_WIDTH,
+ mc->surface_tex_inv_size.y * VL_MACROBLOCK_HEIGHT
+ };
+ const struct vlVertex2f half =
+ {
+ mc->surface_tex_inv_size.x * (VL_MACROBLOCK_WIDTH / 2),
+ mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2)
+ };
+
+ struct vlMacroBlockVertexStream0 *vb;
+
+ vb = ycbcr_vb + pos * 24;
+
+ SET_BLOCK
+ (
+ vb,
+ macroblock->cbp, macroblock->mbx, macroblock->mby,
+ unit.x, unit.y, 0, 0, half.x, half.y,
+ 32, 2, 1, mc->zero_block
+ );
+
+ SET_BLOCK
+ (
+ vb + 6,
+ macroblock->cbp, macroblock->mbx, macroblock->mby,
+ unit.x, unit.y, half.x, 0, half.x, half.y,
+ 16, 2, 1, mc->zero_block
+ );
+
+ SET_BLOCK
+ (
+ vb + 12,
+ macroblock->cbp, macroblock->mbx, macroblock->mby,
+ unit.x, unit.y, 0, half.y, half.x, half.y,
+ 8, 2, 1, mc->zero_block
+ );
+
+ SET_BLOCK
+ (
+ vb + 18,
+ macroblock->cbp, macroblock->mbx, macroblock->mby,
+ unit.x, unit.y, half.x, half.y, half.x, half.y,
+ 4, 2, 1, mc->zero_block
+ );
+
+ break;
+ }
+ default:
+ assert(0);
+ }
+
+ return 0;
+}
+
+static int vlFlush
+(
+ struct vlRender *render
+)
+{
+ struct vlR16SnormBufferedMC *mc;
+ struct pipe_context *pipe;
+ struct vlVertexShaderConsts *vs_consts;
+ unsigned int num_macroblocks[vlNumMacroBlockExTypes] = {0};
+ unsigned int offset[vlNumMacroBlockExTypes];
+ unsigned int vb_start = 0;
+ unsigned int i;
+
+ assert(render);
+
+ mc = (struct vlR16SnormBufferedMC*)render;
+
+ if (!mc->buffered_surface)
+ return 0;
+
+ if (mc->num_macroblocks < mc->macroblocks_per_picture)
+ return 0;
+
+ assert(mc->num_macroblocks <= mc->macroblocks_per_picture);
+
+ pipe = mc->pipe;
+
+ for (i = 0; i < mc->num_macroblocks; ++i)
+ {
+ enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]);
+
+ num_macroblocks[mb_type_ex]++;
+ }
+
+ offset[0] = 0;
+
+ for (i = 1; i < vlNumMacroBlockExTypes; ++i)
+ offset[i] = offset[i - 1] + num_macroblocks[i - 1];
+
+ {
+ struct vlMacroBlockVertexStream0 *ycbcr_vb;
+ struct vlVertex2f *ref_vb[2];
+
+ ycbcr_vb = (struct vlMacroBlockVertexStream0*)pipe_buffer_map
+ (
+ pipe->screen,
+ mc->vertex_bufs.ycbcr.buffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
+ );
+
+ for (i = 0; i < 2; ++i)
+ ref_vb[i] = (struct vlVertex2f*)pipe_buffer_map
+ (
+ pipe->screen,
+ mc->vertex_bufs.ref[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
+ );
+
+ for (i = 0; i < mc->num_macroblocks; ++i)
+ {
+ enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]);
+
+ vlGenMacroblockVerts(mc, &mc->macroblocks[i], offset[mb_type_ex], ycbcr_vb, ref_vb);
+
+ offset[mb_type_ex]++;
+ }
+
+ pipe_buffer_unmap(pipe->screen, mc->vertex_bufs.ycbcr.buffer);
+ for (i = 0; i < 2; ++i)
+ pipe_buffer_unmap(pipe->screen, mc->vertex_bufs.ref[i].buffer);
+ }
+
+ for (i = 0; i < 3; ++i)
+ {
+ pipe->screen->transfer_unmap(pipe->screen, mc->tex_transfer[i]);
+ pipe->screen->tex_transfer_release(pipe->screen, &mc->tex_transfer[i]);
+ }
+
+ mc->render_target.cbufs[0] = pipe->screen->get_tex_surface
+ (
+ pipe->screen,
+ mc->buffered_surface->texture,
+ 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
+ );
+
+ pipe->set_framebuffer_state(pipe, &mc->render_target);
+ pipe->set_viewport_state(pipe, &mc->viewport);
+ vs_consts = pipe_buffer_map
+ (
+ pipe->screen,
+ mc->vs_const_buf.buffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
+ );
+
+ vs_consts->denorm.x = mc->buffered_surface->texture->width[0];
+ vs_consts->denorm.y = mc->buffered_surface->texture->height[0];
+
+ pipe_buffer_unmap(pipe->screen, mc->vs_const_buf.buffer);
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &mc->vs_const_buf);
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf);
+
+ if (num_macroblocks[vlMacroBlockExTypeIntra] > 0)
+ {
+ pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs.all);
+ pipe->set_vertex_elements(pipe, 4, mc->vertex_elems);
+ pipe->set_sampler_textures(pipe, 3, mc->textures.all);
+ pipe->bind_sampler_states(pipe, 3, mc->samplers.all);
+ pipe->bind_vs_state(pipe, mc->i_vs);
+ pipe->bind_fs_state(pipe, mc->i_fs);
+
+ pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeIntra] * 24);
+ vb_start += num_macroblocks[vlMacroBlockExTypeIntra] * 24;
+ }
+
+ if (num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] > 0)
+ {
+ pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs.all);
+ pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
+ mc->textures.ref[0] = mc->past_surface->texture;
+ pipe->set_sampler_textures(pipe, 4, mc->textures.all);
+ pipe->bind_sampler_states(pipe, 4, mc->samplers.all);
+ pipe->bind_vs_state(pipe, mc->p_vs[0]);
+ pipe->bind_fs_state(pipe, mc->p_fs[0]);
+
+ pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24);
+ vb_start += num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24;
+ }
+
+ if (num_macroblocks[vlMacroBlockExTypeFwdPredictedField] > 0)
+ {
+ pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs.all);
+ pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
+ mc->textures.ref[0] = mc->past_surface->texture;
+ pipe->set_sampler_textures(pipe, 4, mc->textures.all);
+ pipe->bind_sampler_states(pipe, 4, mc->samplers.all);
+ pipe->bind_vs_state(pipe, mc->p_vs[1]);
+ pipe->bind_fs_state(pipe, mc->p_fs[1]);
+
+ pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24);
+ vb_start += num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24;
+ }
+
+ if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] > 0)
+ {
+ pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs.all);
+ pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
+ mc->textures.ref[0] = mc->future_surface->texture;
+ pipe->set_sampler_textures(pipe, 4, mc->textures.all);
+ pipe->bind_sampler_states(pipe, 4, mc->samplers.all);
+ pipe->bind_vs_state(pipe, mc->p_vs[0]);
+ pipe->bind_fs_state(pipe, mc->p_fs[0]);
+
+ pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24);
+ vb_start += num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24;
+ }
+
+ if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] > 0)
+ {
+ pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs.all);
+ pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
+ mc->textures.ref[0] = mc->future_surface->texture;
+ pipe->set_sampler_textures(pipe, 4, mc->textures.all);
+ pipe->bind_sampler_states(pipe, 4, mc->samplers.all);
+ pipe->bind_vs_state(pipe, mc->p_vs[1]);
+ pipe->bind_fs_state(pipe, mc->p_fs[1]);
+
+ pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24);
+ vb_start += num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24;
+ }
+
+ if (num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] > 0)
+ {
+ pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs.all);
+ pipe->set_vertex_elements(pipe, 8, mc->vertex_elems);
+ mc->textures.ref[0] = mc->past_surface->texture;
+ mc->textures.ref[1] = mc->future_surface->texture;
+ pipe->set_sampler_textures(pipe, 5, mc->textures.all);
+ pipe->bind_sampler_states(pipe, 5, mc->samplers.all);
+ pipe->bind_vs_state(pipe, mc->b_vs[0]);
+ pipe->bind_fs_state(pipe, mc->b_fs[0]);
+
+ pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24);
+ vb_start += num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24;
+ }
+
+ if (num_macroblocks[vlMacroBlockExTypeBiPredictedField] > 0)
+ {
+ pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs.all);
+ pipe->set_vertex_elements(pipe, 8, mc->vertex_elems);
+ mc->textures.ref[0] = mc->past_surface->texture;
+ mc->textures.ref[1] = mc->future_surface->texture;
+ pipe->set_sampler_textures(pipe, 5, mc->textures.all);
+ pipe->bind_sampler_states(pipe, 5, mc->samplers.all);
+ pipe->bind_vs_state(pipe, mc->b_vs[1]);
+ pipe->bind_fs_state(pipe, mc->b_fs[1]);
+
+ pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24);
+ vb_start += num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24;
+ }
+
+ pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, &mc->buffered_surface->render_fence);
+ pipe_surface_reference(&mc->render_target.cbufs[0], NULL);
+
+ for (i = 0; i < 3; ++i)
+ mc->zero_block[i].x = -1.0f;
+
+ mc->buffered_surface = NULL;
+ mc->num_macroblocks = 0;
+
+ return 0;
+}
+
+static int vlRenderMacroBlocksMpeg2R16SnormBuffered
+(
+ struct vlRender *render,
+ struct vlMpeg2MacroBlockBatch *batch,
+ struct vlSurface *surface
+)
+{
+ struct vlR16SnormBufferedMC *mc;
+ bool new_surface = false;
+ unsigned int i;
+
+ assert(render);
+
+ mc = (struct vlR16SnormBufferedMC*)render;
+
+ if (mc->buffered_surface)
+ {
+ if (mc->buffered_surface != surface)
+ {
+ vlFlush(&mc->base);
+ new_surface = true;
+ }
+ }
+ else
+ new_surface = true;
+
+ if (new_surface)
+ {
+ mc->buffered_surface = surface;
+ mc->past_surface = batch->past_surface;
+ mc->future_surface = batch->future_surface;
+ mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0];
+ mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0];
+
+ for (i = 0; i < 3; ++i)
+ {
+ mc->tex_transfer[i] = mc->pipe->screen->get_tex_transfer
+ (
+ mc->pipe->screen,
+ mc->textures.all[i],
+ 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0,
+ surface->texture->width[0],
+ surface->texture->height[0]
+ );
+
+ mc->texels[i] = mc->pipe->screen->transfer_map(mc->pipe->screen, mc->tex_transfer[i]);
+ }
+ }
+
+ for (i = 0; i < batch->num_macroblocks; ++i)
+ vlGrabMacroBlock(mc, &batch->macroblocks[i]);
+
+ return 0;
+}
+
+static inline int vlEnd
+(
+ struct vlRender *render
+)
+{
+ assert(render);
+
+ return 0;
+}
+
+static int vlDestroy
+(
+ struct vlRender *render
+)
+{
+ struct vlR16SnormBufferedMC *mc;
+ struct pipe_context *pipe;
+ unsigned int i;
+
+ assert(render);
+
+ mc = (struct vlR16SnormBufferedMC*)render;
+ pipe = mc->pipe;
+
+ for (i = 0; i < 5; ++i)
+ pipe->delete_sampler_state(pipe, mc->samplers.all[i]);
+
+ for (i = 0; i < 3; ++i)
+ pipe_buffer_reference(pipe->screen, &mc->vertex_bufs.all[i].buffer, NULL);
+
+ /* Textures 3 & 4 are not created directly, no need to release them here */
+ for (i = 0; i < 3; ++i)
+ pipe_texture_reference(&mc->textures.all[i], NULL);
+
+ pipe->delete_vs_state(pipe, mc->i_vs);
+ pipe->delete_fs_state(pipe, mc->i_fs);
+
+ for (i = 0; i < 2; ++i)
+ {
+ pipe->delete_vs_state(pipe, mc->p_vs[i]);
+ pipe->delete_fs_state(pipe, mc->p_fs[i]);
+ pipe->delete_vs_state(pipe, mc->b_vs[i]);
+ pipe->delete_fs_state(pipe, mc->b_fs[i]);
+ }
+
+ pipe_buffer_reference(pipe->screen, &mc->vs_const_buf.buffer, NULL);
+ pipe_buffer_reference(pipe->screen, &mc->fs_const_buf.buffer, NULL);
+
+ FREE(mc->macroblocks);
+ FREE(mc);
+
+ return 0;
+}
+
+/*
+ * Muliplier renormalizes block samples from 16 bits to 12 bits.
+ * Divider is used when calculating Y % 2 for choosing top or bottom
+ * field for P or B macroblocks.
+ * TODO: Use immediates.
+ */
+static const struct vlFragmentShaderConsts fs_consts =
+{
+ {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
+ {0.5f, 2.0f, 0.0f, 0.0f}
+};
+
+#include "vl_r16snorm_mc_buf_shaders.inc"
+
+static int vlCreateDataBufs
+(
+ struct vlR16SnormBufferedMC *mc
+)
+{
+ const unsigned int mbw = align(mc->picture_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH;
+ const unsigned int mbh = align(mc->picture_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT;
+
+ struct pipe_context *pipe;
+ unsigned int i;
+
+ assert(mc);
+
+ pipe = mc->pipe;
+ mc->macroblocks_per_picture = mbw * mbh;
+
+ /* Create our vertex buffers */
+ mc->vertex_bufs.ycbcr.stride = sizeof(struct vlVertex2f) * 4;
+ mc->vertex_bufs.ycbcr.max_index = 24 * mc->macroblocks_per_picture - 1;
+ mc->vertex_bufs.ycbcr.buffer_offset = 0;
+ mc->vertex_bufs.ycbcr.buffer = pipe_buffer_create
+ (
+ pipe->screen,
+ DEFAULT_BUF_ALIGNMENT,
+ PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
+ sizeof(struct vlVertex2f) * 4 * 24 * mc->macroblocks_per_picture
+ );
+
+ for (i = 1; i < 3; ++i)
+ {
+ mc->vertex_bufs.all[i].stride = sizeof(struct vlVertex2f) * 2;
+ mc->vertex_bufs.all[i].max_index = 24 * mc->macroblocks_per_picture - 1;
+ mc->vertex_bufs.all[i].buffer_offset = 0;
+ mc->vertex_bufs.all[i].buffer = pipe_buffer_create
+ (
+ pipe->screen,
+ DEFAULT_BUF_ALIGNMENT,
+ PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
+ sizeof(struct vlVertex2f) * 2 * 24 * mc->macroblocks_per_picture
+ );
+ }
+
+ /* Position element */
+ mc->vertex_elems[0].src_offset = 0;
+ mc->vertex_elems[0].vertex_buffer_index = 0;
+ mc->vertex_elems[0].nr_components = 2;
+ mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* Luma, texcoord element */
+ mc->vertex_elems[1].src_offset = sizeof(struct vlVertex2f);
+ mc->vertex_elems[1].vertex_buffer_index = 0;
+ mc->vertex_elems[1].nr_components = 2;
+ mc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* Chroma Cr texcoord element */
+ mc->vertex_elems[2].src_offset = sizeof(struct vlVertex2f) * 2;
+ mc->vertex_elems[2].vertex_buffer_index = 0;
+ mc->vertex_elems[2].nr_components = 2;
+ mc->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* Chroma Cb texcoord element */
+ mc->vertex_elems[3].src_offset = sizeof(struct vlVertex2f) * 3;
+ mc->vertex_elems[3].vertex_buffer_index = 0;
+ mc->vertex_elems[3].nr_components = 2;
+ mc->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* First ref surface top field texcoord element */
+ mc->vertex_elems[4].src_offset = 0;
+ mc->vertex_elems[4].vertex_buffer_index = 1;
+ mc->vertex_elems[4].nr_components = 2;
+ mc->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* First ref surface bottom field texcoord element */
+ mc->vertex_elems[5].src_offset = sizeof(struct vlVertex2f);
+ mc->vertex_elems[5].vertex_buffer_index = 1;
+ mc->vertex_elems[5].nr_components = 2;
+ mc->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* Second ref surface top field texcoord element */
+ mc->vertex_elems[6].src_offset = 0;
+ mc->vertex_elems[6].vertex_buffer_index = 2;
+ mc->vertex_elems[6].nr_components = 2;
+ mc->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* Second ref surface bottom field texcoord element */
+ mc->vertex_elems[7].src_offset = sizeof(struct vlVertex2f);
+ mc->vertex_elems[7].vertex_buffer_index = 2;
+ mc->vertex_elems[7].nr_components = 2;
+ mc->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* Create our constant buffer */
+ mc->vs_const_buf.buffer = pipe_buffer_create
+ (
+ pipe->screen,
+ DEFAULT_BUF_ALIGNMENT,
+ PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD,
+ sizeof(struct vlVertexShaderConsts)
+ );
+
+ mc->fs_const_buf.buffer = pipe_buffer_create
+ (
+ pipe->screen,
+ DEFAULT_BUF_ALIGNMENT,
+ PIPE_BUFFER_USAGE_CONSTANT,
+ sizeof(struct vlFragmentShaderConsts)
+ );
+
+ memcpy
+ (
+ pipe_buffer_map(pipe->screen, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+ &fs_consts,
+ sizeof(struct vlFragmentShaderConsts)
+ );
+
+ pipe_buffer_unmap(pipe->screen, mc->fs_const_buf.buffer);
+
+ mc->macroblocks = MALLOC(sizeof(struct vlMpeg2MacroBlock) * mc->macroblocks_per_picture);
+
+ return 0;
+}
+
+static int vlInit
+(
+ struct vlR16SnormBufferedMC *mc
+)
+{
+ struct pipe_context *pipe;
+ struct pipe_sampler_state sampler;
+ struct pipe_texture template;
+ unsigned int filters[5];
+ unsigned int i;
+
+ assert(mc);
+
+ pipe = mc->pipe;
+
+ mc->buffered_surface = NULL;
+ mc->past_surface = NULL;
+ mc->future_surface = NULL;
+ for (i = 0; i < 3; ++i)
+ mc->zero_block[i].x = -1.0f;
+ mc->num_macroblocks = 0;
+
+ /* For MC we render to textures, which are rounded up to nearest POT */
+ mc->viewport.scale[0] = vlRoundUpPOT(mc->picture_width);
+ mc->viewport.scale[1] = vlRoundUpPOT(mc->picture_height);
+ mc->viewport.scale[2] = 1;
+ mc->viewport.scale[3] = 1;
+ mc->viewport.translate[0] = 0;
+ mc->viewport.translate[1] = 0;
+ mc->viewport.translate[2] = 0;
+ mc->viewport.translate[3] = 0;
+
+ mc->render_target.width = vlRoundUpPOT(mc->picture_width);
+ mc->render_target.height = vlRoundUpPOT(mc->picture_height);
+ mc->render_target.nr_cbufs = 1;
+ /* FB for MC stage is a vlSurface created by the user, set at render time */
+ mc->render_target.zsbuf = NULL;
+
+ filters[0] = PIPE_TEX_FILTER_NEAREST;
+ /* FIXME: Linear causes discoloration around block edges */
+ filters[1] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/;
+ filters[2] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/;
+ filters[3] = PIPE_TEX_FILTER_LINEAR;
+ filters[4] = PIPE_TEX_FILTER_LINEAR;
+
+ for (i = 0; i < 5; ++i)
+ {
+ sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.min_img_filter = filters[i];
+ sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ sampler.mag_img_filter = filters[i];
+ sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+ sampler.compare_func = PIPE_FUNC_ALWAYS;
+ sampler.normalized_coords = 1;
+ /*sampler.prefilter = ;*/
+ /*sampler.shadow_ambient = ;*/
+ /*sampler.lod_bias = ;*/
+ sampler.min_lod = 0;
+ /*sampler.max_lod = ;*/
+ /*sampler.border_color[i] = ;*/
+ /*sampler.max_anisotropy = ;*/
+ mc->samplers.all[i] = pipe->create_sampler_state(pipe, &sampler);
+ }
+
+ memset(&template, 0, sizeof(struct pipe_texture));
+ template.target = PIPE_TEXTURE_2D;
+ template.format = PIPE_FORMAT_R16_SNORM;
+ template.last_level = 0;
+ template.width[0] = vlRoundUpPOT(mc->picture_width);
+ template.height[0] = vlRoundUpPOT(mc->picture_height);
+ template.depth[0] = 1;
+ template.compressed = 0;
+ pf_get_block(template.format, &template.block);
+ template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
+
+ mc->textures.y = pipe->screen->texture_create(pipe->screen, &template);
+
+ if (mc->picture_format == vlFormatYCbCr420)
+ {
+ template.width[0] = vlRoundUpPOT(mc->picture_width / 2);
+ template.height[0] = vlRoundUpPOT(mc->picture_height / 2);
+ }
+ else if (mc->picture_format == vlFormatYCbCr422)
+ template.height[0] = vlRoundUpPOT(mc->picture_height / 2);
+
+ mc->textures.cb = pipe->screen->texture_create(pipe->screen, &template);
+ mc->textures.cr = pipe->screen->texture_create(pipe->screen, &template);
+
+ /* textures.all[3] & textures.all[4] are assigned from vlSurfaces for P and B macroblocks at render time */
+
+ vlCreateVertexShaderIMB(mc);
+ vlCreateFragmentShaderIMB(mc);
+ vlCreateVertexShaderFramePMB(mc);
+ vlCreateVertexShaderFieldPMB(mc);
+ vlCreateFragmentShaderFramePMB(mc);
+ vlCreateFragmentShaderFieldPMB(mc);
+ vlCreateVertexShaderFrameBMB(mc);
+ vlCreateVertexShaderFieldBMB(mc);
+ vlCreateFragmentShaderFrameBMB(mc);
+ vlCreateFragmentShaderFieldBMB(mc);
+ vlCreateDataBufs(mc);
+
+ return 0;
+}
+
+int vlCreateR16SNormBufferedMC
+(
+ struct pipe_context *pipe,
+ unsigned int picture_width,
+ unsigned int picture_height,
+ enum vlFormat picture_format,
+ struct vlRender **render
+)
+{
+ struct vlR16SnormBufferedMC *mc;
+
+ assert(pipe);
+ assert(render);
+
+ mc = CALLOC_STRUCT(vlR16SnormBufferedMC);
+
+ mc->base.vlBegin = &vlBegin;
+ mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16SnormBuffered;
+ mc->base.vlEnd = &vlEnd;
+ mc->base.vlFlush = &vlFlush;
+ mc->base.vlDestroy = &vlDestroy;
+ mc->pipe = pipe;
+ mc->picture_width = picture_width;
+ mc->picture_height = picture_height;
+
+ vlInit(mc);
+
+ *render = &mc->base;
+
+ return 0;
+}
diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h
new file mode 100644
index 0000000000..27177d64ca
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h
@@ -0,0 +1,18 @@
+#ifndef vl_r16snorm_mc_buf_h
+#define vl_r16snorm_mc_buf_h
+
+#include "vl_types.h"
+
+struct pipe_context;
+struct vlRender;
+
+int vlCreateR16SNormBufferedMC
+(
+ struct pipe_context *pipe,
+ unsigned int picture_width,
+ unsigned int picture_height,
+ enum vlFormat picture_format,
+ struct vlRender **render
+);
+
+#endif
diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc
new file mode 100644
index 0000000000..ef4a4b2add
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc
@@ -0,0 +1,1185 @@
+static int vlCreateVertexShaderIMB
+(
+ struct vlR16SnormBufferedMC *mc
+)
+{
+ const unsigned int max_tokens = 50;
+
+ struct pipe_context *pipe;
+ struct pipe_shader_state vs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned int ti;
+ unsigned int i;
+
+ assert(mc);
+
+ pipe = mc->pipe;
+ tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+ /* Version */
+ *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+ /* Header */
+ header = (struct tgsi_header*)&tokens[1];
+ *header = tgsi_build_header();
+ /* Processor */
+ *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Vertex pos
+ * decl i1 ; Luma texcoords
+ * decl i2 ; Chroma Cb texcoords
+ * decl i3 ; Chroma Cr texcoords
+ */
+ for (i = 0; i < 4; i++)
+ {
+ decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * decl o0 ; Vertex pos
+ * decl o1 ; Luma texcoords
+ * decl o2 ; Chroma Cb texcoords
+ * decl o3 ; Chroma Cr texcoords
+ */
+ for (i = 0; i < 4; i++)
+ {
+ decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * mov o0, i0 ; Move input vertex pos to output
+ * mov o1, i1 ; Move input luma texcoords to output
+ * mov o2, i2 ; Move input chroma Cb texcoords to output
+ * mov o3, i3 ; Move input chroma Cr texcoords to output
+ */
+ for (i = 0; i < 4; ++i)
+ {
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ vs.tokens = tokens;
+ mc->i_vs = pipe->create_vs_state(pipe, &vs);
+ free(tokens);
+
+ return 0;
+}
+
+static int vlCreateFragmentShaderIMB
+(
+ struct vlR16SnormBufferedMC *mc
+)
+{
+ const unsigned int max_tokens = 100;
+
+ struct pipe_context *pipe;
+ struct pipe_shader_state fs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned int ti;
+ unsigned int i;
+
+ assert(mc);
+
+ pipe = mc->pipe;
+ tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+ /* Version */
+ *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+ /* Header */
+ header = (struct tgsi_header*)&tokens[1];
+ *header = tgsi_build_header();
+ /* Processor */
+ *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Luma texcoords
+ * decl i1 ; Chroma Cb texcoords
+ * decl i2 ; Chroma Cr texcoords
+ */
+ for (i = 0; i < 3; ++i)
+ {
+ decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl o0 ; Fragment color */
+ decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl t0, t1 */
+ decl = vl_decl_temps(0, 1);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * decl s0 ; Sampler for luma texture
+ * decl s1 ; Sampler for chroma Cb texture
+ * decl s2 ; Sampler for chroma Cr texture
+ */
+ for (i = 0; i < 3; ++i)
+ {
+ decl = vl_decl_samplers(i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti);
+ }
+
+ /*
+ * tex2d t1, i0, s0 ; Read texel from luma texture
+ * mov t0.x, t1.x ; Move luma sample into .x component
+ * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
+ * mov t0.y, t1.x ; Move Cb sample into .y component
+ * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
+ * mov t0.z, t1.x ; Move Cr sample into .z component
+ */
+ for (i = 0; i < 3; ++i)
+ {
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+ inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* mul o0, t0, c0 ; Rescale texel to correct range */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ fs.tokens = tokens;
+ mc->i_fs = pipe->create_fs_state(pipe, &fs);
+ free(tokens);
+
+ return 0;
+}
+
+static int vlCreateVertexShaderFramePMB
+(
+ struct vlR16SnormBufferedMC *mc
+)
+{
+ const unsigned int max_tokens = 100;
+
+ struct pipe_context *pipe;
+ struct pipe_shader_state vs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned int ti;
+ unsigned int i;
+
+ assert(mc);
+
+ pipe = mc->pipe;
+ tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+ /* Version */
+ *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+ /* Header */
+ header = (struct tgsi_header*)&tokens[1];
+ *header = tgsi_build_header();
+ /* Processor */
+ *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Vertex pos
+ * decl i1 ; Luma texcoords
+ * decl i2 ; Chroma Cb texcoords
+ * decl i3 ; Chroma Cr texcoords
+ * decl i4 ; Ref surface top field texcoords
+ * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
+ */
+ for (i = 0; i < 6; i++)
+ {
+ decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * decl o0 ; Vertex pos
+ * decl o1 ; Luma texcoords
+ * decl o2 ; Chroma Cb texcoords
+ * decl o3 ; Chroma Cr texcoords
+ * decl o4 ; Ref macroblock texcoords
+ */
+ for (i = 0; i < 5; i++)
+ {
+ decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * mov o0, i0 ; Move input vertex pos to output
+ * mov o1, i1 ; Move input luma texcoords to output
+ * mov o2, i2 ; Move input chroma Cb texcoords to output
+ * mov o3, i3 ; Move input chroma Cr texcoords to output
+ */
+ for (i = 0; i < 4; ++i)
+ {
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
+ inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ vs.tokens = tokens;
+ mc->p_vs[0] = pipe->create_vs_state(pipe, &vs);
+ free(tokens);
+
+ return 0;
+}
+
+static int vlCreateVertexShaderFieldPMB
+(
+ struct vlR16SnormBufferedMC *mc
+)
+{
+ const unsigned int max_tokens = 100;
+
+ struct pipe_context *pipe;
+ struct pipe_shader_state vs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned int ti;
+ unsigned int i;
+
+ assert(mc);
+
+ pipe = mc->pipe;
+ tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+ /* Version */
+ *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+ /* Header */
+ header = (struct tgsi_header*)&tokens[1];
+ *header = tgsi_build_header();
+ /* Processor */
+ *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Vertex pos
+ * decl i1 ; Luma texcoords
+ * decl i2 ; Chroma Cb texcoords
+ * decl i3 ; Chroma Cr texcoords
+ * decl i4 ; Ref macroblock top field texcoords
+ * decl i5 ; Ref macroblock bottom field texcoords
+ */
+ for (i = 0; i < 6; i++)
+ {
+ decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* decl c0 ; Render target dimensions */
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * decl o0 ; Vertex pos
+ * decl o1 ; Luma texcoords
+ * decl o2 ; Chroma Cb texcoords
+ * decl o3 ; Chroma Cr texcoords
+ * decl o4 ; Ref macroblock top field texcoords
+ * decl o5 ; Ref macroblock bottom field texcoords
+ * decl o6 ; Denormalized vertex pos
+ */
+ for (i = 0; i < 7; i++)
+ {
+ decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * mov o0, i0 ; Move input vertex pos to output
+ * mov o1, i1 ; Move input luma texcoords to output
+ * mov o2, i2 ; Move input chroma Cb texcoords to output
+ * mov o3, i3 ; Move input chroma Cr texcoords to output
+ */
+ for (i = 0; i < 4; ++i)
+ {
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * add o4, i0, i4 ; Translate vertex pos by motion vec to form top field macroblock texcoords
+ * add o5, i0, i5 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords
+ */
+ for (i = 0; i < 2; ++i)
+ {
+ inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* mul o6, i0, c0 ; Denorm vertex pos */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 6, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ vs.tokens = tokens;
+ mc->p_vs[1] = pipe->create_vs_state(pipe, &vs);
+ free(tokens);
+
+ return 0;
+}
+
+static int vlCreateFragmentShaderFramePMB
+(
+ struct vlR16SnormBufferedMC *mc
+)
+{
+ const unsigned int max_tokens = 100;
+
+ struct pipe_context *pipe;
+ struct pipe_shader_state fs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned int ti;
+ unsigned int i;
+
+ assert(mc);
+
+ pipe = mc->pipe;
+ tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+ /* Version */
+ *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+ /* Header */
+ header = (struct tgsi_header*)&tokens[1];
+ *header = tgsi_build_header();
+ /* Processor */
+ *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Luma texcoords
+ * decl i1 ; Chroma Cb texcoords
+ * decl i2 ; Chroma Cr texcoords
+ * decl i3 ; Ref macroblock texcoords
+ */
+ for (i = 0; i < 4; ++i)
+ {
+ decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl o0 ; Fragment color */
+ decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl t0, t1 */
+ decl = vl_decl_temps(0, 1);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * decl s0 ; Sampler for luma texture
+ * decl s1 ; Sampler for chroma Cb texture
+ * decl s2 ; Sampler for chroma Cr texture
+ * decl s3 ; Sampler for ref surface texture
+ */
+ for (i = 0; i < 4; ++i)
+ {
+ decl = vl_decl_samplers(i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * tex2d t1, i0, s0 ; Read texel from luma texture
+ * mov t0.x, t1.x ; Move luma sample into .x component
+ * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
+ * mov t0.y, t1.x ; Move Cb sample into .y component
+ * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
+ * mov t0.z, t1.x ; Move Cr sample into .z component
+ */
+ for (i = 0; i < 3; ++i)
+ {
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+ inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* mul t0, t0, c0 ; Rescale texel to correct range */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* add o0, t0, t1 ; Add ref and differential to form final output */
+ inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ fs.tokens = tokens;
+ mc->p_fs[0] = pipe->create_fs_state(pipe, &fs);
+ free(tokens);
+
+ return 0;
+}
+
+static int vlCreateFragmentShaderFieldPMB
+(
+ struct vlR16SnormBufferedMC *mc
+)
+{
+ const unsigned int max_tokens = 200;
+
+ struct pipe_context *pipe;
+ struct pipe_shader_state fs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned int ti;
+ unsigned int i;
+
+ assert(mc);
+
+ pipe = mc->pipe;
+ tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+ /* Version */
+ *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+ /* Header */
+ header = (struct tgsi_header*)&tokens[1];
+ *header = tgsi_build_header();
+ /* Processor */
+ *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Luma texcoords
+ * decl i1 ; Chroma Cb texcoords
+ * decl i2 ; Chroma Cr texcoords
+ * decl i3 ; Ref macroblock top field texcoords
+ * decl i4 ; Ref macroblock bottom field texcoords
+ * decl i5 ; Denormalized vertex pos
+ */
+ for (i = 0; i < 6; ++i)
+ {
+ decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
+ * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
+ */
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl o0 ; Fragment color */
+ decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl t0-t4 */
+ decl = vl_decl_temps(0, 4);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * decl s0 ; Sampler for luma texture
+ * decl s1 ; Sampler for chroma Cb texture
+ * decl s2 ; Sampler for chroma Cr texture
+ * decl s3 ; Sampler for ref surface texture
+ */
+ for (i = 0; i < 4; ++i)
+ {
+ decl = vl_decl_samplers(i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * tex2d t1, i0, s0 ; Read texel from luma texture
+ * mov t0.x, t1.x ; Move luma sample into .x component
+ * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
+ * mov t0.y, t1.x ; Move Cb sample into .y component
+ * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
+ * mov t0.z, t1.x ; Move Cr sample into .z component
+ */
+ for (i = 0; i < 3; ++i)
+ {
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+ inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* mul t0, t0, c0 ; Rescale texel to correct range */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * tex2d t1, i3, s3 ; Read texel from ref macroblock top field
+ * tex2d t2, i4, s3 ; Read texel from ref macroblock bottom field
+ */
+ for (i = 0; i < 2; ++i)
+ {
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* XXX: Pos values off by 0.5? */
+ /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */
+ inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 5, TGSI_FILE_CONSTANT, 1);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* floor t3, t3 ; Get rid of fractional part */
+ inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* mul t3, t3, c1.y ; Multiply by 2 */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
+ inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* TODO: Move to conditional tex fetch on t3 instead of lerp */
+ /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
+ inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* add o0, t0, t1 ; Add ref and differential to form final output */
+ inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ fs.tokens = tokens;
+ mc->p_fs[1] = pipe->create_fs_state(pipe, &fs);
+ free(tokens);
+
+ return 0;
+}
+
+static int vlCreateVertexShaderFrameBMB
+(
+ struct vlR16SnormBufferedMC *mc
+)
+{
+ const unsigned int max_tokens = 100;
+
+ struct pipe_context *pipe;
+ struct pipe_shader_state vs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned int ti;
+ unsigned int i;
+
+ assert(mc);
+
+ pipe = mc->pipe;
+ tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+ /* Version */
+ *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+ /* Header */
+ header = (struct tgsi_header*)&tokens[1];
+ *header = tgsi_build_header();
+ /* Processor */
+ *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Vertex pos
+ * decl i1 ; Luma texcoords
+ * decl i2 ; Chroma Cb texcoords
+ * decl i3 ; Chroma Cr texcoords
+ * decl i4 ; First ref macroblock top field texcoords
+ * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
+ * decl i6 ; Second ref macroblock top field texcoords
+ * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
+ */
+ for (i = 0; i < 8; i++)
+ {
+ decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * decl o0 ; Vertex pos
+ * decl o1 ; Luma texcoords
+ * decl o2 ; Chroma Cb texcoords
+ * decl o3 ; Chroma Cr texcoords
+ * decl o4 ; First ref macroblock texcoords
+ * decl o5 ; Second ref macroblock texcoords
+ */
+ for (i = 0; i < 6; i++)
+ {
+ decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * mov o0, i0 ; Move input vertex pos to output
+ * mov o1, i1 ; Move input luma texcoords to output
+ * mov o2, i2 ; Move input chroma Cb texcoords to output
+ * mov o3, i3 ; Move input chroma Cr texcoords to output
+ */
+ for (i = 0; i < 4; ++i)
+ {
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
+ * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
+ */
+ for (i = 0; i < 2; ++i)
+ {
+ inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ vs.tokens = tokens;
+ mc->b_vs[0] = pipe->create_vs_state(pipe, &vs);
+ free(tokens);
+
+ return 0;
+}
+
+static int vlCreateVertexShaderFieldBMB
+(
+ struct vlR16SnormBufferedMC *mc
+)
+{
+ const unsigned int max_tokens = 100;
+
+ struct pipe_context *pipe;
+ struct pipe_shader_state vs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned int ti;
+ unsigned int i;
+
+ assert(mc);
+
+ pipe = mc->pipe;
+ tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+ /* Version */
+ *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+ /* Header */
+ header = (struct tgsi_header*)&tokens[1];
+ *header = tgsi_build_header();
+ /* Processor */
+ *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Vertex pos
+ * decl i1 ; Luma texcoords
+ * decl i2 ; Chroma Cb texcoords
+ * decl i3 ; Chroma Cr texcoords
+ * decl i4 ; First ref macroblock top field texcoords
+ * decl i5 ; First ref macroblock bottom field texcoords
+ * decl i6 ; Second ref macroblock top field texcoords
+ * decl i7 ; Second ref macroblock bottom field texcoords
+ */
+ for (i = 0; i < 8; i++)
+ {
+ decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* decl c0 ; Render target dimensions */
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * decl o0 ; Vertex pos
+ * decl o1 ; Luma texcoords
+ * decl o2 ; Chroma Cb texcoords
+ * decl o3 ; Chroma Cr texcoords
+ * decl o4 ; First ref macroblock top field texcoords
+ * decl o5 ; First ref macroblock Bottom field texcoords
+ * decl o6 ; Second ref macroblock top field texcoords
+ * decl o7 ; Second ref macroblock Bottom field texcoords
+ * decl o8 ; Denormalized vertex pos
+ */
+ for (i = 0; i < 9; i++)
+ {
+ decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* decl t0, t1 */
+ decl = vl_decl_temps(0, 1);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * mov o0, i0 ; Move input vertex pos to output
+ * mov o1, i1 ; Move input luma texcoords to output
+ * mov o2, i2 ; Move input chroma Cb texcoords to output
+ * mov o3, i3 ; Move input chroma Cr texcoords to output
+ */
+ for (i = 0; i < 4; ++i)
+ {
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * add o4, i0, i4 ; Translate vertex pos by motion vec to form first top field macroblock texcoords
+ * add o5, i0, i5 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords
+ * add o6, i0, i6 ; Translate vertex pos by motion vec to form second top field macroblock texcoords
+ * add o7, i0, i7 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords
+ */
+ for (i = 0; i < 4; ++i)
+ {
+ inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* mul o8, i0, c0 ; Denorm vertex pos */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 8, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ vs.tokens = tokens;
+ mc->b_vs[1] = pipe->create_vs_state(pipe, &vs);
+ free(tokens);
+
+ return 0;
+}
+
+static int vlCreateFragmentShaderFrameBMB
+(
+ struct vlR16SnormBufferedMC *mc
+)
+{
+ const unsigned int max_tokens = 100;
+
+ struct pipe_context *pipe;
+ struct pipe_shader_state fs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned int ti;
+ unsigned int i;
+
+ assert(mc);
+
+ pipe = mc->pipe;
+ tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+ /* Version */
+ *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+ /* Header */
+ header = (struct tgsi_header*)&tokens[1];
+ *header = tgsi_build_header();
+ /* Processor */
+ *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Luma texcoords
+ * decl i1 ; Chroma Cb texcoords
+ * decl i2 ; Chroma Cr texcoords
+ * decl i3 ; First ref macroblock texcoords
+ * decl i4 ; Second ref macroblock texcoords
+ */
+ for (i = 0; i < 5; ++i)
+ {
+ decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
+ * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
+ */
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl o0 ; Fragment color */
+ decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl t0-t2 */
+ decl = vl_decl_temps(0, 2);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * decl s0 ; Sampler for luma texture
+ * decl s1 ; Sampler for chroma Cb texture
+ * decl s2 ; Sampler for chroma Cr texture
+ * decl s3 ; Sampler for first ref surface texture
+ * decl s4 ; Sampler for second ref surface texture
+ */
+ for (i = 0; i < 5; ++i)
+ {
+ decl = vl_decl_samplers(i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * tex2d t1, i0, s0 ; Read texel from luma texture
+ * mov t0.x, t1.x ; Move luma sample into .x component
+ * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
+ * mov t0.y, t1.x ; Move Cb sample into .y component
+ * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
+ * mov t0.z, t1.x ; Move Cr sample into .z component
+ */
+ for (i = 0; i < 3; ++i)
+ {
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+ inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* mul t0, t0, c0 ; Rescale texel to correct range */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * tex2d t1, i3, s3 ; Read texel from first ref macroblock
+ * tex2d t2, i4, s4 ; Read texel from second ref macroblock
+ */
+ for (i = 0; i < 2; ++i)
+ {
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
+ inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
+ inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ fs.tokens = tokens;
+ mc->b_fs[0] = pipe->create_fs_state(pipe, &fs);
+ free(tokens);
+
+ return 0;
+}
+
+static int vlCreateFragmentShaderFieldBMB
+(
+ struct vlR16SnormBufferedMC *mc
+)
+{
+ const unsigned int max_tokens = 200;
+
+ struct pipe_context *pipe;
+ struct pipe_shader_state fs;
+ struct tgsi_token *tokens;
+ struct tgsi_header *header;
+
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+
+ unsigned int ti;
+ unsigned int i;
+
+ assert(mc);
+
+ pipe = mc->pipe;
+ tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+ /* Version */
+ *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+ /* Header */
+ header = (struct tgsi_header*)&tokens[1];
+ *header = tgsi_build_header();
+ /* Processor */
+ *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+ ti = 3;
+
+ /*
+ * decl i0 ; Luma texcoords
+ * decl i1 ; Chroma Cb texcoords
+ * decl i2 ; Chroma Cr texcoords
+ * decl i3 ; First ref macroblock top field texcoords
+ * decl i4 ; First ref macroblock bottom field texcoords
+ * decl i5 ; Second ref macroblock top field texcoords
+ * decl i6 ; Second ref macroblock bottom field texcoords
+ * decl i7 ; Denormalized vertex pos
+ */
+ for (i = 0; i < 8; ++i)
+ {
+ decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
+ * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
+ * ; and for Y-mod-2 top/bottom field selection
+ */
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl o0 ; Fragment color */
+ decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /* decl t0-t5 */
+ decl = vl_decl_temps(0, 5);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * decl s0 ; Sampler for luma texture
+ * decl s1 ; Sampler for chroma Cb texture
+ * decl s2 ; Sampler for chroma Cr texture
+ * decl s3 ; Sampler for first ref surface texture
+ * decl s4 ; Sampler for second ref surface texture
+ */
+ for (i = 0; i < 5; ++i)
+ {
+ decl = vl_decl_samplers(i, i);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /*
+ * tex2d t1, i0, s0 ; Read texel from luma texture
+ * mov t0.x, t1.x ; Move luma sample into .x component
+ * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
+ * mov t0.y, t1.x ; Move Cb sample into .y component
+ * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
+ * mov t0.z, t1.x ; Move Cr sample into .z component
+ */
+ for (i = 0; i < 3; ++i)
+ {
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+ inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* mul t0, t0, c0 ; Rescale texel to correct range */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* XXX: Pos values off by 0.5? */
+ /* sub t4, i7.y, c1.x ; Sub 0.5 from denormalized pos */
+ inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 7, TGSI_FILE_CONSTANT, 1);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* floor t3, t3 ; Get rid of fractional part */
+ inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* mul t3, t3, c1.y ; Multiply by 2 */
+ inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
+ inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
+ inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * tex2d t1, i3, s3 ; Read texel from past ref macroblock top field
+ * tex2d t2, i4, s3 ; Read texel from past ref macroblock bottom field
+ */
+ for (i = 0; i < 2; ++i)
+ {
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* TODO: Move to conditional tex fetch on t3 instead of lerp */
+ /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
+ inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /*
+ * tex2d t4, i5, s4 ; Read texel from future ref macroblock top field
+ * tex2d t5, i6, s4 ; Read texel from future ref macroblock bottom field
+ */
+ for (i = 0; i < 2; ++i)
+ {
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 5, TGSI_FILE_SAMPLER, 4);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+ }
+
+ /* TODO: Move to conditional tex fetch on t3 instead of lerp */
+ /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */
+ inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
+ inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
+ inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ /* end */
+ inst = vl_end();
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ fs.tokens = tokens;
+ mc->b_fs[1] = pipe->create_fs_state(pipe, &fs);
+ free(tokens);
+
+ return 0;
+}
diff --git a/src/gallium/state_trackers/g3dvl/vl_render.h b/src/gallium/state_trackers/g3dvl/vl_render.h
new file mode 100644
index 0000000000..166030b498
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_render.h
@@ -0,0 +1,38 @@
+#ifndef vl_render_h
+#define vl_render_h
+
+#include "vl_types.h"
+
+struct pipe_surface;
+
+struct vlRender
+{
+ int (*vlBegin)
+ (
+ struct vlRender *render
+ );
+
+ int (*vlRenderMacroBlocksMpeg2)
+ (
+ struct vlRender *render,
+ struct vlMpeg2MacroBlockBatch *batch,
+ struct vlSurface *surface
+ );
+
+ int (*vlEnd)
+ (
+ struct vlRender *render
+ );
+
+ int (*vlFlush)
+ (
+ struct vlRender *render
+ );
+
+ int (*vlDestroy)
+ (
+ struct vlRender *render
+ );
+};
+
+#endif
diff --git a/src/gallium/state_trackers/g3dvl/vl_screen.c b/src/gallium/state_trackers/g3dvl/vl_screen.c
new file mode 100644
index 0000000000..ade8643a66
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_screen.c
@@ -0,0 +1,115 @@
+#define VL_INTERNAL
+#include "vl_screen.h"
+#include <assert.h>
+#include <util/u_memory.h>
+
+int vlCreateScreen
+(
+ struct vlDisplay *display,
+ int screen,
+ struct pipe_screen *pscreen,
+ struct vlScreen **vl_screen
+)
+{
+ struct vlScreen *scrn;
+
+ assert(display);
+ assert(pscreen);
+ assert(vl_screen);
+
+ scrn = CALLOC_STRUCT(vlScreen);
+
+ if (!scrn)
+ return 1;
+
+ scrn->display = display;
+ scrn->ordinal = screen;
+ scrn->pscreen = pscreen;
+ *vl_screen = scrn;
+
+ return 0;
+}
+
+int vlDestroyScreen
+(
+ struct vlScreen *screen
+)
+{
+ assert(screen);
+
+ FREE(screen);
+
+ return 0;
+}
+
+struct vlDisplay* vlGetDisplay
+(
+ struct vlScreen *screen
+)
+{
+ assert(screen);
+
+ return screen->display;
+}
+
+struct pipe_screen* vlGetPipeScreen
+(
+ struct vlScreen *screen
+)
+{
+ assert(screen);
+
+ return screen->pscreen;
+}
+
+unsigned int vlGetMaxProfiles
+(
+ struct vlScreen *screen
+)
+{
+ assert(screen);
+
+ return vlProfileCount;
+}
+
+int vlQueryProfiles
+(
+ struct vlScreen *screen,
+ enum vlProfile *profiles
+)
+{
+ assert(screen);
+ assert(profiles);
+
+ profiles[0] = vlProfileMpeg2Simple;
+ profiles[1] = vlProfileMpeg2Main;
+
+ return 0;
+}
+
+unsigned int vlGetMaxEntryPoints
+(
+ struct vlScreen *screen
+)
+{
+ assert(screen);
+
+ return vlEntryPointCount;
+}
+
+int vlQueryEntryPoints
+(
+ struct vlScreen *screen,
+ enum vlProfile profile,
+ enum vlEntryPoint *entry_points
+)
+{
+ assert(screen);
+ assert(entry_points);
+
+ entry_points[0] = vlEntryPointIDCT;
+ entry_points[1] = vlEntryPointMC;
+ entry_points[2] = vlEntryPointCSC;
+
+ return 0;
+}
diff --git a/src/gallium/state_trackers/g3dvl/vl_screen.h b/src/gallium/state_trackers/g3dvl/vl_screen.h
new file mode 100644
index 0000000000..98f3d429b6
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_screen.h
@@ -0,0 +1,63 @@
+#ifndef vl_screen_h
+#define vl_screen_h
+
+#include "vl_types.h"
+
+struct pipe_screen;
+
+#ifdef VL_INTERNAL
+struct vlScreen
+{
+ struct vlDisplay *display;
+ unsigned int ordinal;
+ struct pipe_screen *pscreen;
+};
+#endif
+
+int vlCreateScreen
+(
+ struct vlDisplay *display,
+ int screen,
+ struct pipe_screen *pscreen,
+ struct vlScreen **vl_screen
+);
+
+int vlDestroyScreen
+(
+ struct vlScreen *screen
+);
+
+struct vlDisplay* vlGetDisplay
+(
+ struct vlScreen *screen
+);
+
+struct pipe_screen* vlGetPipeScreen
+(
+ struct vlScreen *screen
+);
+
+unsigned int vlGetMaxProfiles
+(
+ struct vlScreen *screen
+);
+
+int vlQueryProfiles
+(
+ struct vlScreen *screen,
+ enum vlProfile *profiles
+);
+
+unsigned int vlGetMaxEntryPoints
+(
+ struct vlScreen *screen
+);
+
+int vlQueryEntryPoints
+(
+ struct vlScreen *screen,
+ enum vlProfile profile,
+ enum vlEntryPoint *entry_points
+);
+
+#endif
diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.c b/src/gallium/state_trackers/g3dvl/vl_shader_build.c
new file mode 100644
index 0000000000..51f1721a33
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.c
@@ -0,0 +1,215 @@
+#include "vl_shader_build.h"
+#include <assert.h>
+#include <tgsi/tgsi_parse.h>
+#include <tgsi/tgsi_build.h>
+
+struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last)
+{
+ struct tgsi_full_declaration decl = tgsi_default_full_declaration();
+
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.SemanticName = name;
+ decl.Semantic.SemanticIndex = index;
+ decl.DeclarationRange.First = first;
+ decl.DeclarationRange.Last = last;
+
+ return decl;
+}
+
+struct tgsi_full_declaration vl_decl_interpolated_input
+(
+ unsigned int name,
+ unsigned int index,
+ unsigned int first,
+ unsigned int last,
+ int interpolation
+)
+{
+ struct tgsi_full_declaration decl = tgsi_default_full_declaration();
+
+ assert
+ (
+ interpolation == TGSI_INTERPOLATE_CONSTANT ||
+ interpolation == TGSI_INTERPOLATE_LINEAR ||
+ interpolation == TGSI_INTERPOLATE_PERSPECTIVE
+ );
+
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.SemanticName = name;
+ decl.Semantic.SemanticIndex = index;
+ decl.Declaration.Interpolate = interpolation;;
+ decl.DeclarationRange.First = first;
+ decl.DeclarationRange.Last = last;
+
+ return decl;
+}
+
+struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last)
+{
+ struct tgsi_full_declaration decl = tgsi_default_full_declaration();
+
+ decl.Declaration.File = TGSI_FILE_CONSTANT;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.SemanticName = name;
+ decl.Semantic.SemanticIndex = index;
+ decl.DeclarationRange.First = first;
+ decl.DeclarationRange.Last = last;
+
+ return decl;
+}
+
+struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last)
+{
+ struct tgsi_full_declaration decl = tgsi_default_full_declaration();
+
+ decl.Declaration.File = TGSI_FILE_OUTPUT;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.SemanticName = name;
+ decl.Semantic.SemanticIndex = index;
+ decl.DeclarationRange.First = first;
+ decl.DeclarationRange.Last = last;
+
+ return decl;
+}
+
+struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last)
+{
+ struct tgsi_full_declaration decl = tgsi_default_full_declaration();
+
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ decl.DeclarationRange.First = first;
+ decl.DeclarationRange.Last = last;
+
+ return decl;
+}
+
+struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last)
+{
+ struct tgsi_full_declaration decl = tgsi_default_full_declaration();
+
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_SAMPLER;
+ decl.DeclarationRange.First = first;
+ decl.DeclarationRange.Last = last;
+
+ return decl;
+}
+
+struct tgsi_full_instruction vl_inst2
+(
+ int opcode,
+ enum tgsi_file_type dst_file,
+ unsigned int dst_index,
+ enum tgsi_file_type src_file,
+ unsigned int src_index
+)
+{
+ struct tgsi_full_instruction inst = tgsi_default_full_instruction();
+
+ inst.Instruction.Opcode = opcode;
+ inst.Instruction.NumDstRegs = 1;
+ inst.FullDstRegisters[0].DstRegister.File = dst_file;
+ inst.FullDstRegisters[0].DstRegister.Index = dst_index;
+ inst.Instruction.NumSrcRegs = 1;
+ inst.FullSrcRegisters[0].SrcRegister.File = src_file;
+ inst.FullSrcRegisters[0].SrcRegister.Index = src_index;
+
+ return inst;
+}
+
+struct tgsi_full_instruction vl_inst3
+(
+ int opcode,
+ enum tgsi_file_type dst_file,
+ unsigned int dst_index,
+ enum tgsi_file_type src1_file,
+ unsigned int src1_index,
+ enum tgsi_file_type src2_file,
+ unsigned int src2_index
+)
+{
+ struct tgsi_full_instruction inst = tgsi_default_full_instruction();
+
+ inst.Instruction.Opcode = opcode;
+ inst.Instruction.NumDstRegs = 1;
+ inst.FullDstRegisters[0].DstRegister.File = dst_file;
+ inst.FullDstRegisters[0].DstRegister.Index = dst_index;
+ inst.Instruction.NumSrcRegs = 2;
+ inst.FullSrcRegisters[0].SrcRegister.File = src1_file;
+ inst.FullSrcRegisters[0].SrcRegister.Index = src1_index;
+ inst.FullSrcRegisters[1].SrcRegister.File = src2_file;
+ inst.FullSrcRegisters[1].SrcRegister.Index = src2_index;
+
+ return inst;
+}
+
+struct tgsi_full_instruction vl_tex
+(
+ int tex,
+ enum tgsi_file_type dst_file,
+ unsigned int dst_index,
+ enum tgsi_file_type src1_file,
+ unsigned int src1_index,
+ enum tgsi_file_type src2_file,
+ unsigned int src2_index
+)
+{
+ struct tgsi_full_instruction inst = tgsi_default_full_instruction();
+
+ inst.Instruction.Opcode = TGSI_OPCODE_TEX;
+ inst.Instruction.NumDstRegs = 1;
+ inst.FullDstRegisters[0].DstRegister.File = dst_file;
+ inst.FullDstRegisters[0].DstRegister.Index = dst_index;
+ inst.Instruction.NumSrcRegs = 2;
+ inst.InstructionExtTexture.Texture = tex;
+ inst.FullSrcRegisters[0].SrcRegister.File = src1_file;
+ inst.FullSrcRegisters[0].SrcRegister.Index = src1_index;
+ inst.FullSrcRegisters[1].SrcRegister.File = src2_file;
+ inst.FullSrcRegisters[1].SrcRegister.Index = src2_index;
+
+ return inst;
+}
+
+struct tgsi_full_instruction vl_inst4
+(
+ int opcode,
+ enum tgsi_file_type dst_file,
+ unsigned int dst_index,
+ enum tgsi_file_type src1_file,
+ unsigned int src1_index,
+ enum tgsi_file_type src2_file,
+ unsigned int src2_index,
+ enum tgsi_file_type src3_file,
+ unsigned int src3_index
+)
+{
+ struct tgsi_full_instruction inst = tgsi_default_full_instruction();
+
+ inst.Instruction.Opcode = opcode;
+ inst.Instruction.NumDstRegs = 1;
+ inst.FullDstRegisters[0].DstRegister.File = dst_file;
+ inst.FullDstRegisters[0].DstRegister.Index = dst_index;
+ inst.Instruction.NumSrcRegs = 3;
+ inst.FullSrcRegisters[0].SrcRegister.File = src1_file;
+ inst.FullSrcRegisters[0].SrcRegister.Index = src1_index;
+ inst.FullSrcRegisters[1].SrcRegister.File = src2_file;
+ inst.FullSrcRegisters[1].SrcRegister.Index = src2_index;
+ inst.FullSrcRegisters[2].SrcRegister.File = src3_file;
+ inst.FullSrcRegisters[2].SrcRegister.Index = src3_index;
+
+ return inst;
+}
+
+struct tgsi_full_instruction vl_end(void)
+{
+ struct tgsi_full_instruction inst = tgsi_default_full_instruction();
+
+ inst.Instruction.Opcode = TGSI_OPCODE_END;
+ inst.Instruction.NumDstRegs = 0;
+ inst.Instruction.NumSrcRegs = 0;
+
+ return inst;
+}
diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.h b/src/gallium/state_trackers/g3dvl/vl_shader_build.h
new file mode 100644
index 0000000000..dc615cb156
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.h
@@ -0,0 +1,61 @@
+#ifndef vl_shader_build_h
+#define vl_shader_build_h
+
+#include <pipe/p_shader_tokens.h>
+
+struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last);
+struct tgsi_full_declaration vl_decl_interpolated_input
+(
+ unsigned int name,
+ unsigned int index,
+ unsigned int first,
+ unsigned int last,
+ int interpolation
+);
+struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last);
+struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last);
+struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last);
+struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last);
+struct tgsi_full_instruction vl_inst2
+(
+ int opcode,
+ enum tgsi_file_type dst_file,
+ unsigned int dst_index,
+ enum tgsi_file_type src_file,
+ unsigned int src_index
+);
+struct tgsi_full_instruction vl_inst3
+(
+ int opcode,
+ enum tgsi_file_type dst_file,
+ unsigned int dst_index,
+ enum tgsi_file_type src1_file,
+ unsigned int src1_index,
+ enum tgsi_file_type src2_file,
+ unsigned int src2_index
+);
+struct tgsi_full_instruction vl_tex
+(
+ int tex,
+ enum tgsi_file_type dst_file,
+ unsigned int dst_index,
+ enum tgsi_file_type src1_file,
+ unsigned int src1_index,
+ enum tgsi_file_type src2_file,
+ unsigned int src2_index
+);
+struct tgsi_full_instruction vl_inst4
+(
+ int opcode,
+ enum tgsi_file_type dst_file,
+ unsigned int dst_index,
+ enum tgsi_file_type src1_file,
+ unsigned int src1_index,
+ enum tgsi_file_type src2_file,
+ unsigned int src2_index,
+ enum tgsi_file_type src3_file,
+ unsigned int src3_index
+);
+struct tgsi_full_instruction vl_end(void);
+
+#endif
diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c
new file mode 100644
index 0000000000..92388f7978
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_surface.c
@@ -0,0 +1,243 @@
+#define VL_INTERNAL
+#include "vl_surface.h"
+#include <assert.h>
+#include <string.h>
+#include <pipe/p_screen.h>
+#include <pipe/p_state.h>
+#include <pipe/p_inlines.h>
+#include <util/u_memory.h>
+#include <vl_winsys.h>
+#include "vl_screen.h"
+#include "vl_context.h"
+#include "vl_render.h"
+#include "vl_csc.h"
+#include "vl_util.h"
+
+int vlCreateSurface
+(
+ struct vlScreen *screen,
+ unsigned int width,
+ unsigned int height,
+ enum vlFormat format,
+ struct vlSurface **surface
+)
+{
+ struct vlSurface *sfc;
+ struct pipe_texture template;
+
+ assert(screen);
+ assert(surface);
+
+ sfc = CALLOC_STRUCT(vlSurface);
+
+ if (!sfc)
+ return 1;
+
+ sfc->screen = screen;
+ sfc->width = width;
+ sfc->height = height;
+ sfc->format = format;
+
+ memset(&template, 0, sizeof(struct pipe_texture));
+ template.target = PIPE_TEXTURE_2D;
+ template.format = PIPE_FORMAT_A8R8G8B8_UNORM;
+ template.last_level = 0;
+ template.width[0] = vlRoundUpPOT(sfc->width);
+ template.height[0] = vlRoundUpPOT(sfc->height);
+ template.depth[0] = 1;
+ template.compressed = 0;
+ pf_get_block(template.format, &template.block);
+ template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET;
+
+ sfc->texture = vlGetPipeScreen(screen)->texture_create(vlGetPipeScreen(screen), &template);
+
+ if (!sfc->texture)
+ {
+ FREE(sfc);
+ return 1;
+ }
+
+ *surface = sfc;
+
+ return 0;
+}
+
+int vlDestroySurface
+(
+ struct vlSurface *surface
+)
+{
+ assert(surface);
+
+ pipe_texture_reference(&surface->texture, NULL);
+ FREE(surface);
+
+ return 0;
+}
+
+int vlRenderMacroBlocksMpeg2
+(
+ struct vlMpeg2MacroBlockBatch *batch,
+ struct vlSurface *surface
+)
+{
+ assert(batch);
+ assert(surface);
+ assert(surface->context);
+
+ surface->context->render->vlBegin(surface->context->render);
+
+ surface->context->render->vlRenderMacroBlocksMpeg2
+ (
+ surface->context->render,
+ batch,
+ surface
+ );
+
+ surface->context->render->vlEnd(surface->context->render);
+
+ return 0;
+}
+
+int vlPutPicture
+(
+ struct vlSurface *surface,
+ vlNativeDrawable drawable,
+ int srcx,
+ int srcy,
+ int srcw,
+ int srch,
+ int destx,
+ int desty,
+ int destw,
+ int desth,
+ int drawable_w,
+ int drawable_h,
+ enum vlPictureType picture_type
+)
+{
+ struct vlCSC *csc;
+ struct pipe_context *pipe;
+
+ assert(surface);
+ assert(surface->context);
+
+ surface->context->render->vlFlush(surface->context->render);
+
+ csc = surface->context->csc;
+ pipe = surface->context->pipe;
+
+ csc->vlResizeFrameBuffer(csc, drawable_w, drawable_h);
+
+ csc->vlBegin(csc);
+
+ csc->vlPutPicture
+ (
+ csc,
+ surface,
+ srcx,
+ srcy,
+ srcw,
+ srch,
+ destx,
+ desty,
+ destw,
+ desth,
+ picture_type
+ );
+
+ csc->vlEnd(csc);
+
+ pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, &surface->disp_fence);
+
+ bind_pipe_drawable(pipe, drawable);
+
+ pipe->screen->flush_frontbuffer
+ (
+ pipe->screen,
+ csc->vlGetFrameBuffer(csc),
+ pipe->priv
+ );
+
+ return 0;
+}
+
+int vlSurfaceGetStatus
+(
+ struct vlSurface *surface,
+ enum vlResourceStatus *status
+)
+{
+ assert(surface);
+ assert(surface->context);
+ assert(status);
+
+ if (surface->render_fence && !surface->context->pipe->screen->fence_signalled(surface->context->pipe->screen, surface->render_fence, 0))
+ {
+ *status = vlResourceStatusRendering;
+ return 0;
+ }
+
+ if (surface->disp_fence && !surface->context->pipe->screen->fence_signalled(surface->context->pipe->screen, surface->disp_fence, 0))
+ {
+ *status = vlResourceStatusDisplaying;
+ return 0;
+ }
+
+ *status = vlResourceStatusFree;
+
+ return 0;
+}
+
+int vlSurfaceFlush
+(
+ struct vlSurface *surface
+)
+{
+ assert(surface);
+ assert(surface->context);
+
+ surface->context->render->vlFlush(surface->context->render);
+
+ return 0;
+}
+
+int vlSurfaceSync
+(
+ struct vlSurface *surface
+)
+{
+ assert(surface);
+ assert(surface->context);
+ assert(surface->render_fence);
+
+ surface->context->pipe->screen->fence_finish(surface->context->pipe->screen, surface->render_fence, 0);
+
+ return 0;
+}
+
+struct vlScreen* vlSurfaceGetScreen
+(
+ struct vlSurface *surface
+)
+{
+ assert(surface);
+
+ return surface->screen;
+}
+
+struct vlContext* vlBindToContext
+(
+ struct vlSurface *surface,
+ struct vlContext *context
+)
+{
+ struct vlContext *old;
+
+ assert(surface);
+
+ old = surface->context;
+ surface->context = context;
+
+ return old;
+}
diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.h b/src/gallium/state_trackers/g3dvl/vl_surface.h
new file mode 100644
index 0000000000..133e1515ef
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_surface.h
@@ -0,0 +1,86 @@
+#ifndef vl_surface_h
+#define vl_surface_h
+
+#include "vl_types.h"
+
+#ifdef VL_INTERNAL
+struct pipe_texture;
+
+struct vlSurface
+{
+ struct vlScreen *screen;
+ struct vlContext *context;
+ unsigned int width;
+ unsigned int height;
+ enum vlFormat format;
+ struct pipe_texture *texture;
+ struct pipe_fence_handle *render_fence;
+ struct pipe_fence_handle *disp_fence;
+};
+#endif
+
+int vlCreateSurface
+(
+ struct vlScreen *screen,
+ unsigned int width,
+ unsigned int height,
+ enum vlFormat format,
+ struct vlSurface **surface
+);
+
+int vlDestroySurface
+(
+ struct vlSurface *surface
+);
+
+int vlRenderMacroBlocksMpeg2
+(
+ struct vlMpeg2MacroBlockBatch *batch,
+ struct vlSurface *surface
+);
+
+int vlPutPicture
+(
+ struct vlSurface *surface,
+ vlNativeDrawable drawable,
+ int srcx,
+ int srcy,
+ int srcw,
+ int srch,
+ int destx,
+ int desty,
+ int destw,
+ int desth,
+ int drawable_w,
+ int drawable_h,
+ enum vlPictureType picture_type
+);
+
+int vlSurfaceGetStatus
+(
+ struct vlSurface *surface,
+ enum vlResourceStatus *status
+);
+
+int vlSurfaceFlush
+(
+ struct vlSurface *surface
+);
+
+int vlSurfaceSync
+(
+ struct vlSurface *surface
+);
+
+struct vlScreen* vlSurfaceGetScreen
+(
+ struct vlSurface *surface
+);
+
+struct vlContext* vlBindToContext
+(
+ struct vlSurface *surface,
+ struct vlContext *context
+);
+
+#endif
diff --git a/src/gallium/state_trackers/g3dvl/vl_types.h b/src/gallium/state_trackers/g3dvl/vl_types.h
new file mode 100644
index 0000000000..274e1f7437
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_types.h
@@ -0,0 +1,115 @@
+#ifndef vl_types_h
+#define vl_types_h
+
+#if 1 /*#ifdef X11*/
+#include <X11/Xlib.h>
+
+typedef Display* vlNativeDisplay;
+typedef Drawable vlNativeDrawable;
+#endif
+
+struct vlDisplay;
+struct vlScreen;
+struct vlContext;
+struct vlSurface;
+
+enum vlResourceStatus
+{
+ vlResourceStatusFree,
+ vlResourceStatusRendering,
+ vlResourceStatusDisplaying
+};
+
+enum vlProfile
+{
+ vlProfileMpeg2Simple,
+ vlProfileMpeg2Main,
+
+ vlProfileCount
+};
+
+enum vlEntryPoint
+{
+ vlEntryPointIDCT,
+ vlEntryPointMC,
+ vlEntryPointCSC,
+
+ vlEntryPointCount
+};
+
+enum vlFormat
+{
+ vlFormatYCbCr420,
+ vlFormatYCbCr422,
+ vlFormatYCbCr444
+};
+
+enum vlPictureType
+{
+ vlPictureTypeTopField,
+ vlPictureTypeBottomField,
+ vlPictureTypeFrame
+};
+
+enum vlMotionType
+{
+ vlMotionTypeField,
+ vlMotionTypeFrame,
+ vlMotionTypeDualPrime,
+ vlMotionType16x8
+};
+
+enum vlFieldOrder
+{
+ vlFieldOrderFirst,
+ vlFieldOrderSecond
+};
+
+enum vlDCTType
+{
+ vlDCTTypeFrameCoded,
+ vlDCTTypeFieldCoded
+};
+
+struct vlVertex2f
+{
+ float x, y;
+};
+
+struct vlVertex4f
+{
+ float x, y, z, w;
+};
+
+enum vlMacroBlockType
+{
+ vlMacroBlockTypeIntra,
+ vlMacroBlockTypeFwdPredicted,
+ vlMacroBlockTypeBkwdPredicted,
+ vlMacroBlockTypeBiPredicted,
+
+ vlNumMacroBlockTypes
+};
+
+struct vlMpeg2MacroBlock
+{
+ unsigned int mbx, mby;
+ enum vlMacroBlockType mb_type;
+ enum vlMotionType mo_type;
+ enum vlDCTType dct_type;
+ int PMV[2][2][2];
+ unsigned int cbp;
+ short *blocks;
+};
+
+struct vlMpeg2MacroBlockBatch
+{
+ struct vlSurface *past_surface;
+ struct vlSurface *future_surface;
+ enum vlPictureType picture_type;
+ enum vlFieldOrder field_order;
+ unsigned int num_macroblocks;
+ struct vlMpeg2MacroBlock *macroblocks;
+};
+
+#endif
diff --git a/src/gallium/state_trackers/g3dvl/vl_util.c b/src/gallium/state_trackers/g3dvl/vl_util.c
new file mode 100644
index 0000000000..50aa9af66f
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_util.c
@@ -0,0 +1,16 @@
+#include "vl_util.h"
+#include <assert.h>
+
+unsigned int vlRoundUpPOT(unsigned int x)
+{
+ unsigned int i;
+
+ assert(x > 0);
+
+ --x;
+
+ for (i = 1; i < sizeof(unsigned int) * 8; i <<= 1)
+ x |= x >> i;
+
+ return x + 1;
+}
diff --git a/src/gallium/state_trackers/g3dvl/vl_util.h b/src/gallium/state_trackers/g3dvl/vl_util.h
new file mode 100644
index 0000000000..bc98e79df4
--- /dev/null
+++ b/src/gallium/state_trackers/g3dvl/vl_util.h
@@ -0,0 +1,6 @@
+#ifndef vl_util_h
+#define vl_util_h
+
+unsigned int vlRoundUpPOT(unsigned int x);
+
+#endif
diff --git a/src/gallium/state_trackers/glx/Makefile b/src/gallium/state_trackers/glx/Makefile
new file mode 100644
index 0000000000..f779035763
--- /dev/null
+++ b/src/gallium/state_trackers/glx/Makefile
@@ -0,0 +1,25 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+
+SUBDIRS = xlib
+
+
+default: subdirs
+
+
+subdirs:
+ @for dir in $(SUBDIRS) ; do \
+ if [ -d $$dir ] ; then \
+ (cd $$dir && $(MAKE)) || exit 1 ; \
+ fi \
+ done
+
+
+clean:
+ rm -f `find . -name \*.[oa]`
+ rm -f `find . -name depend`
+
+
+# Dummy install target
+install:
diff --git a/src/gallium/state_trackers/glx/dri/dri_context.c b/src/gallium/state_trackers/glx/dri/dri_context.c
new file mode 100644
index 0000000000..9424e18bee
--- /dev/null
+++ b/src/gallium/state_trackers/glx/dri/dri_context.c
@@ -0,0 +1,168 @@
+/**************************************************************************
+ *
+ * Copyright 2009, VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "dri_screen.h"
+#include "dri_context.h"
+#include "dri_winsys.h"
+
+#include "state_tracker/st_public.h"
+#include "state_tracker/st_context.h"
+#include "pipe/p_context.h"
+
+#include "util/u_memory.h"
+
+
+GLboolean
+dri_create_context(const __GLcontextModes *visual,
+ __DRIcontextPrivate *cPriv,
+ void *sharedContextPrivate)
+{
+ __DRIscreenPrivate *sPriv = cPriv->driScreenPriv;
+ struct dri_screen *screen = dri_screen(sPriv);
+ struct dri_context *ctx = NULL;
+ struct st_context *st_share = NULL;
+
+ if (sharedContextPrivate) {
+ st_share = ((struct dri_context *) sharedContextPrivate)->st;
+ }
+
+ ctx = CALLOC_STRUCT(dri_context);
+ if (ctx == NULL)
+ goto fail;
+
+ cPriv->driverPrivate = ctx;
+ ctx->cPriv = cPriv;
+ ctx->sPriv = sPriv;
+
+ driParseConfigFiles(&ctx->optionCache,
+ &screen->optionCache,
+ sPriv->myNum,
+ "dri");
+
+ ctx->pipe = screen->pipe_screen->create_context(screen->pipe_screen,
+ screen->pipe_winsys,
+ hw_winsys );
+ if (ctx->pipe == NULL)
+ goto fail;
+
+ ctx->pipe->priv = ctx; /* I guess */
+
+ ctx->st = st_create_context(ctx->pipe, visual, st_share);
+ if (ctx->st == NULL)
+ goto fail;
+
+ dri_init_extensions( ctx );
+
+ return GL_TRUE;
+
+fail:
+ if (ctx && ctx->st)
+ st_destroy_context( ctx->st );
+
+ if (ctx && ctx->pipe)
+ ctx->pipe->destroy( ctx->pipe );
+
+ FREE(ctx);
+ return FALSE;
+}
+
+
+void
+dri_destroy_context(__DRIcontextPrivate *cPriv)
+{
+ struct dri_context *ctx = dri_context(cPriv);
+ struct dri_screen *screen = dri_screen(cPriv->driScreenPriv);
+ struct pipe_winsys *winsys = screen->winsys;
+
+ /* No particular reason to wait for command completion before
+ * destroying a context, but it is probably worthwhile flushing it
+ * to avoid having to add code elsewhere to cope with flushing a
+ * partially destroyed context.
+ */
+ st_flush(ctx->st);
+
+ if (screen->dummyContext == ctx)
+ screen->dummyContext = NULL;
+
+ /* Also frees ctx->pipe?
+ */
+ st_destroy_context(ctx->st);
+
+ FREE(ctx);
+}
+
+
+GLboolean
+dri_unbind_context(__DRIcontextPrivate *cPriv)
+{
+ struct dri_context *ctx = dri_context(cPriv);
+ st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL);
+ /* XXX make_current(NULL)? */
+ return GL_TRUE;
+}
+
+
+GLboolean
+dri_make_current(__DRIcontextPrivate *cPriv,
+ __DRIdrawablePrivate *driDrawPriv,
+ __DRIdrawablePrivate *driReadPriv)
+{
+ if (cPriv) {
+ struct dri_context *ctx = dri_context(cPriv);
+ struct dri_screen *screen = dri_screen(cPriv->driScreenPriv);
+ struct dri_drawable *draw = dri_drawable(driDrawPriv);
+ struct dri_drawable *read = dri_drawable(driReadPriv);
+
+ /* This is for situations in which we need a rendering context but
+ * there may not be any currently bound.
+ */
+ screen->dummyContext = ctx;
+
+ st_make_current( ctx->st,
+ draw->stfb,
+ read->stfb );
+
+ ctx->dPriv = driDrawPriv;
+
+ /* Update window sizes if necessary:
+ */
+ if (draw->stamp != driDrawPriv->lastStamp) {
+ dri_update_window_size( draw );
+ }
+
+ if (read->stamp != driReadPriv->lastStamp) {
+ dri_update_window_size( read );
+ }
+
+ }
+ else {
+ st_make_current(NULL, NULL, NULL);
+ }
+
+ return GL_TRUE;
+}
diff --git a/src/gallium/state_trackers/glx/dri/dri_context.h b/src/gallium/state_trackers/glx/dri/dri_context.h
new file mode 100644
index 0000000000..4e6a305abb
--- /dev/null
+++ b/src/gallium/state_trackers/glx/dri/dri_context.h
@@ -0,0 +1,95 @@
+/**************************************************************************
+ *
+ * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef DRI_CONTEXT_H
+#define DRI_CONTEXT_H
+
+#include "pipe/p_compiler.h"
+#include "drm.h"
+#include "dri_util.h"
+
+
+struct pipe_context;
+struct pipe_fence;
+struct st_context;
+
+
+struct dri_context
+{
+ __DRIcontextPrivate *cPriv;
+ __DRIdrawablePrivate *dPriv;
+
+ struct st_context *st;
+ struct pipe_context *pipe;
+
+ boolean locked;
+
+ /**
+ * Configuration cache
+ */
+ driOptionCache optionCache;
+};
+
+
+static INLINE struct dri_context *
+dri_context(__DRIcontextPrivate *driContextPriv)
+{
+ return (struct dri_context *) driContextPriv->driverPrivate;
+}
+
+/***********************************************************************
+ * dri_context.c
+ */
+void
+dri_destroy_context(__DRIcontextPrivate * driContextPriv);
+
+boolean
+dri_unbind_context(__DRIcontextPrivate * driContextPriv);
+
+boolean
+dri_make_current(__DRIcontextPrivate * driContextPriv,
+ __DRIdrawablePrivate * driDrawPriv,
+ __DRIdrawablePrivate * driReadPriv);
+
+boolean
+dri_create_context(const __GLcontextModes * visual,
+ __DRIcontextPrivate * driContextPriv,
+ void *sharedContextPrivate);
+
+
+
+/***********************************************************************
+ * dri_lock.c
+ */
+void dri_lock_hardware( struct dri_context *context,
+ struct dri_drawable *drawable );
+
+void dri_unlock_hardware( struct dri_context *dri );
+boolean dri_is_locked( struct dri_context *dri );
+
+
+
+#endif
diff --git a/src/gallium/state_trackers/glx/dri/dri_drawable.c b/src/gallium/state_trackers/glx/dri/dri_drawable.c
new file mode 100644
index 0000000000..b712acda88
--- /dev/null
+++ b/src/gallium/state_trackers/glx/dri/dri_drawable.c
@@ -0,0 +1,363 @@
+/**************************************************************************
+ *
+ * Copyright 2009, VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "dri_screen.h"
+#include "dri_context.h"
+#include "dri_swapbuffers.h"
+
+#include "pipe/p_context.h"
+#include "state_tracker/st_public.h"
+#include "state_tracker/st_context.h"
+#include "state_tracker/st_cb_fbo.h"
+
+
+static void
+blit_swapbuffers(__DRIdrawablePrivate *dPriv,
+ __DRIcontextPrivate *cPriv,
+ struct pipe_surface *src,
+ const drm_clip_rect_t *rect)
+{
+ struct dri_screen *screen = dri_screen(dPriv->driScreenPriv);
+ struct dri_drawable *fb = dri_drawable(dPriv);
+ struct dri_context *context = dri_context(cPriv);
+
+ const int nbox = dPriv->numClipRects;
+ const drm_clip_rect_t *pbox = dPriv->pClipRects;
+
+ struct pipe_surface *dest = fb->front_surface;
+ const int backWidth = fb->stfb->Base.Width;
+ const int backHeight = fb->stfb->Base.Height;
+ int i;
+
+ for (i = 0; i < nbox; i++, pbox++) {
+ drm_clip_rect_t box;
+ drm_clip_rect_t sbox;
+
+ if (pbox->x1 > pbox->x2 ||
+ pbox->y1 > pbox->y2 ||
+ (pbox->x2 - pbox->x1) > dest->width ||
+ (pbox->y2 - pbox->y1) > dest->height)
+ continue;
+
+ box = *pbox;
+
+ if (rect) {
+ drm_clip_rect_t rrect;
+
+ rrect.x1 = dPriv->x + rect->x1;
+ rrect.y1 = (dPriv->h - rect->y1 - rect->y2) + dPriv->y;
+ rrect.x2 = rect->x2 + rrect.x1;
+ rrect.y2 = rect->y2 + rrect.y1;
+ if (rrect.x1 > box.x1)
+ box.x1 = rrect.x1;
+ if (rrect.y1 > box.y1)
+ box.y1 = rrect.y1;
+ if (rrect.x2 < box.x2)
+ box.x2 = rrect.x2;
+ if (rrect.y2 < box.y2)
+ box.y2 = rrect.y2;
+
+ if (box.x1 > box.x2 || box.y1 > box.y2)
+ continue;
+ }
+
+ /* restrict blit to size of actually rendered area */
+ if (box.x2 - box.x1 > backWidth)
+ box.x2 = backWidth + box.x1;
+ if (box.y2 - box.y1 > backHeight)
+ box.y2 = backHeight + box.y1;
+
+ debug_printf("%s: box %d,%d-%d,%d\n", __FUNCTION__,
+ box.x1, box.y1, box.x2, box.y2);
+
+ sbox.x1 = box.x1 - dPriv->x;
+ sbox.y1 = box.y1 - dPriv->y;
+
+ ctx->st->pipe->surface_copy( ctx->st->pipe,
+ FALSE,
+ dest,
+ box.x1, box.y1,
+ src,
+ sbox.x1, sbox.y1,
+ box.x2 - box.x1,
+ box.y2 - box.y1 );
+ }
+}
+
+/**
+ * Display a colorbuffer surface in an X window.
+ * Used for SwapBuffers and flushing front buffer rendering.
+ *
+ * \param dPriv the window/drawable to display into
+ * \param surf the surface to display
+ * \param rect optional subrect of surface to display (may be NULL).
+ */
+static void
+dri_display_surface(__DRIdrawablePrivate *dPriv,
+ struct pipe_surface *source,
+ const drm_clip_rect_t *rect)
+{
+ struct dri_drawable *drawable = dri_drawable(dPriv);
+ struct dri_screen *screen = dri_screen(dPriv->driScreenPriv);
+ struct dri_context *context = screen->dummy_context;
+ struct pipe_winsys *winsys = screen->winsys;
+
+ if (!context)
+ return;
+
+ if (drawable->last_swap_fence) {
+ winsys->fence_finish( winsys,
+ drawable->last_swap_fence,
+ 0 );
+
+ winsys->fence_reference( winsys,
+ &drawable->last_swap_fence,
+ NULL );
+ }
+
+ drawable->last_swap_fence = drawable->first_swap_fence;
+ drawable->first_swap_fence = NULL;
+
+ /* Call lock_hardware to update dPriv cliprects.
+ */
+ dri_lock_hardware(context, drawable);
+ {
+ if (dPriv->numClipRects) {
+ blit_swapbuffers( context, dPriv, source, rect );
+ }
+ }
+ dri_unlock_hardware(context);
+
+ if (drawble->stamp != drawable->dPriv->lastStamp) {
+ dri_update_window_size( dpriv );
+ }
+}
+
+
+
+/**
+ * This will be called a drawable is known to have moved/resized.
+ */
+void
+dri_update_window_size(__DRIdrawablePrivate *dPriv)
+{
+ struct dri_drawable *drawable = dri_drawable(dPriv);
+ st_resize_framebuffer(drawable->stfb, dPriv->w, dPriv->h);
+ drawable->stamp = dPriv->lastStamp;
+}
+
+
+
+void
+dri_swap_buffers(__DRIdrawablePrivate * dPriv)
+{
+ struct dri_drawable *drawable = dri_drawable(dPriv);
+ struct pipe_surface *back_surf;
+
+ assert(drawable);
+ assert(drawable->stfb);
+
+ back_surf = st_get_framebuffer_surface(drawable->stfb,
+ ST_SURFACE_BACK_LEFT);
+ if (back_surf) {
+ st_notify_swapbuffers(drawable->stfb);
+ dri_display_surface(dPriv, back_surf, NULL);
+ st_notify_swapbuffers_complete(drawable->stfb);
+ }
+}
+
+
+/**
+ * Called via glXCopySubBufferMESA() to copy a subrect of the back
+ * buffer to the front buffer/screen.
+ */
+void
+dri_copy_sub_buffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h)
+{
+ struct dri_drawable *drawable = dri_drawable(dPriv);
+ struct pipe_surface *back_surf;
+
+ assert(drawable);
+ assert(drawable->stfb);
+
+ back_surf = st_get_framebuffer_surface(drawable->stfb,
+ ST_SURFACE_BACK_LEFT);
+ if (back_surf) {
+ drm_clip_rect_t rect;
+ rect.x1 = x;
+ rect.y1 = y;
+ rect.x2 = w;
+ rect.y2 = h;
+
+ st_notify_swapbuffers(drawable->stfb);
+ dri_display_surface(dPriv, back_surf, &rect);
+ }
+}
+
+
+
+/*
+ * The state tracker keeps track of whether the fake frontbuffer has
+ * been touched by any rendering since the last time we copied its
+ * contents to the real frontbuffer. Our task is easy:
+ */
+static void
+dri_flush_frontbuffer( struct pipe_winsys *winsys,
+ struct pipe_surface *surf,
+ void *context_private)
+{
+ struct dri_context *dri = (struct dri_context *) context_private;
+ __DRIdrawablePrivate *dPriv = dri->driDrawable;
+
+ dri_display_surface(dPriv, surf, NULL);
+}
+
+
+
+/* Need to create a surface which wraps the front surface to support
+ * client-side swapbuffers.
+ */
+static void
+dri_create_front_surface(struct dri_screen *screen,
+ struct pipe_winsys *winsys,
+ unsigned handle)
+{
+ struct pipe_screen *pipe_screen = screen->pipe_screen;
+ struct pipe_texture *texture;
+ struct pipe_texture templat;
+ struct pipe_surface *surface;
+ struct pipe_buffer *buffer;
+ unsigned pitch;
+
+ assert(screen->front.cpp == 4);
+
+// buffer = dri_buffer_from_handle(screen->winsys,
+// "front", handle);
+
+ if (!buffer)
+ return;
+
+ screen->front.buffer = dri_bo(buffer);
+
+ memset(&templat, 0, sizeof(templat));
+ templat.tex_usage |= PIPE_TEXTURE_USAGE_DISPLAY_TARGET;
+ templat.target = PIPE_TEXTURE_2D;
+ templat.last_level = 0;
+ templat.depth[0] = 1;
+ templat.format = PIPE_FORMAT_A8R8G8B8_UNORM;
+ templat.width[0] = screen->front.width;
+ templat.height[0] = screen->front.height;
+ pf_get_block(templat.format, &templat.block);
+ pitch = screen->front.pitch;
+
+ texture = pipe_screen->texture_blanket(pipe_screen,
+ &templat,
+ &pitch,
+ buffer);
+
+ /* Unref the buffer we don't need it anyways */
+ pipe_buffer_reference(screen, &buffer, NULL);
+
+ surface = pipe_screen->get_tex_surface(pipe_screen,
+ texture,
+ 0,
+ 0,
+ 0,
+ PIPE_BUFFER_USAGE_GPU_WRITE);
+
+ screen->front.texture = texture;
+ screen->front.surface = surface;
+}
+
+/**
+ * This is called when we need to set up GL rendering to a new X window.
+ */
+static boolean
+dri_create_buffer(__DRIscreenPrivate *sPriv,
+ __DRIdrawablePrivate *dPriv,
+ const __GLcontextModes *visual,
+ boolean isPixmap)
+{
+ enum pipe_format colorFormat, depthFormat, stencilFormat;
+ struct dri_drawable *drawable;
+
+ if (isPixmap)
+ goto fail; /* not implemented */
+
+ drawable = CALLOC_STRUCT(dri_drawable);
+ if (drawable == NULL)
+ goto fail;
+
+ /* XXX: todo: use the pipe_screen queries to figure out which
+ * render targets are supportable.
+ */
+ if (visual->redBits == 5)
+ colorFormat = PIPE_FORMAT_R5G6B5_UNORM;
+ else
+ colorFormat = PIPE_FORMAT_A8R8G8B8_UNORM;
+
+ if (visual->depthBits == 16)
+ depthFormat = PIPE_FORMAT_Z16_UNORM;
+ else if (visual->depthBits == 24) {
+ if (visual->stencilBits == 8)
+ depthFormat = PIPE_FORMAT_S8Z24_UNORM;
+ else
+ depthFormat = PIPE_FORMAT_X8Z24_UNORM;
+ }
+
+ drawable->stfb = st_create_framebuffer(visual,
+ colorFormat,
+ depthFormat,
+ dPriv->w,
+ dPriv->h,
+ (void*) drawable);
+ if (drawable->stfb == NULL)
+ goto fail;
+
+ dPriv->driverPrivate = (void *) drawable;
+ return GL_TRUE;
+
+fail:
+ FREE(drawable);
+ return GL_FALSE;
+}
+
+static void
+dri_destroy_buffer(__DRIdrawablePrivate *dPriv)
+{
+ struct dri_drawable *drawable = dri_drawable(dPriv);
+
+ /* No particular need to wait on fences before dereferencing them:
+ */
+ winsys->fence_reference( winsys, &ctx->last_swap_fence, NULL );
+ winsys->fence_reference( winsys, &ctx->first_swap_fence, NULL );
+
+ st_unreference_framebuffer(drawable->stfb);
+
+ FREE(drawable);
+}
+
diff --git a/src/gallium/winsys/drm/intel/egl/intel_reg.h b/src/gallium/state_trackers/glx/dri/dri_drawable.h
index 4f33bee438..1001bb8c57 100644
--- a/src/gallium/winsys/drm/intel/egl/intel_reg.h
+++ b/src/gallium/state_trackers/glx/dri/dri_drawable.h
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2009, VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,29 +25,49 @@
*
**************************************************************************/
+#ifndef DRI_DRAWABLE_H
+#define DRI_DRAWABLE_H
-#ifndef _INTEL_REG_H_
-#define _INTEL_REG_H_
+#include "pipe/p_compiler.h"
+struct pipe_surface;
+struct pipe_fence;
+struct st_framebuffer;
-#define BR00_BITBLT_CLIENT 0x40000000
-#define BR00_OP_COLOR_BLT 0x10000000
-#define BR00_OP_SRC_COPY_BLT 0x10C00000
-#define BR13_SOLID_PATTERN 0x80000000
-#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|0x4)
-#define XY_COLOR_BLT_WRITE_ALPHA (1<<21)
-#define XY_COLOR_BLT_WRITE_RGB (1<<20)
+struct dri_drawable
+{
+ __DRIdrawablePrivate *dPriv;
+ unsigned stamp;
-#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6)
-#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21)
-#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20)
+ struct pipe_fence *last_swap_fence;
+ struct pipe_fence *first_swap_fence;
-#define MI_WAIT_FOR_EVENT ((0x3<<23))
-#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6)
-#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2)
+ struct st_framebuffer *stfb;
+};
-#define MI_BATCH_BUFFER_END (0xA<<23)
+
+static INLINE struct dri_drawable *
+dri_drawable(__DRIdrawablePrivate * driDrawPriv)
+{
+ return (struct dri_drawable *) driDrawPriv->driverPrivate;
+}
+
+
+/***********************************************************************
+ * dri_drawable.c
+ */
+
+void
+dri_swap_buffers(__DRIdrawablePrivate * dPriv);
+
+void
+dri_copy_sub_buffer(__DRIdrawablePrivate * dPriv,
+ int x, int y,
+ int w, int h);
+
+void
+dri_update_window_size(__DRIdrawablePrivate *dPriv);
#endif
diff --git a/src/gallium/state_trackers/glx/dri/dri_extensions.c b/src/gallium/state_trackers/glx/dri/dri_extensions.c
new file mode 100644
index 0000000000..126faf7601
--- /dev/null
+++ b/src/gallium/state_trackers/glx/dri/dri_extensions.c
@@ -0,0 +1,108 @@
+/**************************************************************************
+ *
+ * Copyright 2009, VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+
+
+#define need_GL_ARB_multisample
+#define need_GL_ARB_point_parameters
+#define need_GL_ARB_texture_compression
+#define need_GL_ARB_vertex_buffer_object
+#define need_GL_ARB_vertex_program
+#define need_GL_ARB_window_pos
+#define need_GL_EXT_blend_color
+#define need_GL_EXT_blend_equation_separate
+#define need_GL_EXT_blend_func_separate
+#define need_GL_EXT_blend_minmax
+#define need_GL_EXT_cull_vertex
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_framebuffer_object
+#define need_GL_EXT_multi_draw_arrays
+#define need_GL_EXT_secondary_color
+#define need_GL_NV_vertex_program
+#include "extension_helper.h"
+
+
+/**
+ * Extension strings exported by the driver.
+ */
+const struct dri_extension card_extensions[] = {
+ {"GL_ARB_multisample", GL_ARB_multisample_functions},
+ {"GL_ARB_multitexture", NULL},
+ {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions},
+ {"GL_ARB_texture_border_clamp", NULL},
+ {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions},
+ {"GL_ARB_texture_cube_map", NULL},
+ {"GL_ARB_texture_env_add", NULL},
+ {"GL_ARB_texture_env_combine", NULL},
+ {"GL_ARB_texture_env_dot3", NULL},
+ {"GL_ARB_texture_mirrored_repeat", NULL},
+ {"GL_ARB_texture_rectangle", NULL},
+ {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions},
+ {"GL_ARB_pixel_buffer_object", NULL},
+ {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions},
+ {"GL_ARB_window_pos", GL_ARB_window_pos_functions},
+ {"GL_EXT_blend_color", GL_EXT_blend_color_functions},
+ {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions},
+ {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions},
+ {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions},
+ {"GL_EXT_blend_subtract", NULL},
+ {"GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions},
+ {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions},
+ {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions},
+ {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions},
+ {"GL_EXT_packed_depth_stencil", NULL},
+ {"GL_EXT_pixel_buffer_object", NULL},
+ {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
+ {"GL_EXT_stencil_wrap", NULL},
+ {"GL_EXT_texture_edge_clamp", NULL},
+ {"GL_EXT_texture_env_combine", NULL},
+ {"GL_EXT_texture_env_dot3", NULL},
+ {"GL_EXT_texture_filter_anisotropic", NULL},
+ {"GL_EXT_texture_lod_bias", NULL},
+ {"GL_3DFX_texture_compression_FXT1", NULL},
+ {"GL_APPLE_client_storage", NULL},
+ {"GL_MESA_pack_invert", NULL},
+ {"GL_MESA_ycbcr_texture", NULL},
+ {"GL_NV_blend_square", NULL},
+ {"GL_NV_vertex_program", GL_NV_vertex_program_functions},
+ {"GL_NV_vertex_program1_1", NULL},
+ {"GL_SGIS_generate_mipmap", NULL },
+ {NULL, NULL}
+};
+
+
+
+void
+dri_init_extensions( void )
+{
+ /* The card_extensions list should be pruned according to the
+ * capabilities of the pipe_screen. This is actually something
+ * that can/should be done inside st_create_context().
+ */
+ driInitExtensions( ctx->st->ctx, card_extensions, GL_TRUE );
+}
diff --git a/src/gallium/winsys/drm/intel/dri/intel_lock.c b/src/gallium/state_trackers/glx/dri/dri_lock.c
index ad1c202429..b272ab55f3 100644
--- a/src/gallium/winsys/drm/intel/dri/intel_lock.c
+++ b/src/gallium/state_trackers/glx/dri/dri_lock.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2009 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -26,77 +26,65 @@
**************************************************************************/
-#include "main/glheader.h"
#include "pipe/p_thread.h"
-#include <GL/internal/glcore.h>
-#include "state_tracker/st_public.h"
-#include "intel_context.h"
-#include "i830_dri.h"
-
-
+#include "dri_context.h"
+#include "xf86drm.h"
pipe_static_mutex( lockMutex );
-
static void
-intelContendedLock(struct intel_context *intel, uint flags)
+dri_contended_lock(struct dri_context *ctx)
{
- __DRIdrawablePrivate *dPriv = intel->driDrawable;
- __DRIscreenPrivate *sPriv = intel->driScreen;
- struct intel_screen *intelScreen = intel_screen(sPriv);
- drmI830Sarea *sarea = intel->sarea;
+ __DRIdrawablePrivate *dPriv = ctx->dPriv;
+ __DRIcontextPrivate *cPriv = ctx->cPriv;
+ __DRIscreenPrivate *sPriv = cPriv->driScreenPriv;
- drmGetLock(intel->driFd, intel->hHWContext, flags);
+ drmGetLock(sPriv->fd, cPriv->hHWContext, 0);
- DBG(LOCK, "%s - got contended lock\n", __progname);
-
- /* If the window moved, may need to set a new cliprect now.
- *
- * NOTE: This releases and regains the hw lock, so all state
- * checking must be done *after* this call:
+ /* Perform round trip communication with server (including dropping
+ * and retaking the above lock) to update window dimensions:
*/
if (dPriv)
DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv);
-
- if (sarea->width != intelScreen->front.width ||
- sarea->height != intelScreen->front.height) {
-
- intelUpdateScreenRotation(sPriv, sarea);
- }
}
/* Lock the hardware and validate our state.
*/
-void LOCK_HARDWARE( struct intel_context *intel )
+void dri_lock_hardware( struct dri_context *ctx )
{
- char __ret = 0;
-
- pipe_mutex_lock(lockMutex);
- assert(!intel->locked);
+ __DRIcontextPrivate *cPriv = ctx->cPriv;
+ __DRIscreenPrivate *sPriv = cPriv->driScreenPriv;
+ char __ret = 0;
- DRM_CAS(intel->driHwLock, intel->hHWContext,
- (DRM_LOCK_HELD|intel->hHWContext), __ret);
+ pipe_mutex_lock(lockMutex);
+ assert(!ctx->locked);
- if (__ret)
- intelContendedLock( intel, 0 );
+ DRM_CAS((drmLock *) &sPriv->pSAREA->lock,
+ cPriv->hHWContext,
+ (DRM_LOCK_HELD | cPriv->hHWContext),
+ __ret);
- DBG(LOCK, "%s - locked\n", __progname);
+ if (__ret)
+ dri_contended_lock( ctx );
- intel->locked = 1;
+ ctx->locked = TRUE;
}
/* Unlock the hardware using the global current context
*/
-void UNLOCK_HARDWARE( struct intel_context *intel )
+void dri_unlock_hardware( struct dri_context *ctx )
{
- assert(intel->locked);
- intel->locked = 0;
+ __DRIcontextPrivate *cPriv = ctx->cPriv;
+ __DRIscreenPrivate *sPriv = cPriv->driScreenPriv;
- DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext);
+ assert(ctx->locked);
+ ctx->locked = FALSE;
- pipe_mutex_unlock(lockMutex);
+ DRM_UNLOCK(sPriv->fd,
+ (drmLock *) &sPriv->pSAREA->lock,
+ cPriv->hHWContext);
- DBG(LOCK, "%s - unlocked\n", __progname);
+ pipe_mutex_unlock(lockMutex);
}
diff --git a/src/gallium/state_trackers/glx/dri/dri_screen.c b/src/gallium/state_trackers/glx/dri/dri_screen.c
new file mode 100644
index 0000000000..f7119b949a
--- /dev/null
+++ b/src/gallium/state_trackers/glx/dri/dri_screen.c
@@ -0,0 +1,255 @@
+/**************************************************************************
+ *
+ * Copyright 2009, VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "utils.h"
+#include "vblank.h"
+#include "xmlpool.h"
+
+#include "dri_context.h"
+#include "dri_screen.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_inlines.h"
+#include "state_tracker/st_public.h"
+#include "state_tracker/st_cb_fbo.h"
+
+
+PUBLIC const char __driConfigOptions[] =
+ DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE
+ DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
+ DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+ DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY
+// DRI_CONF_FORCE_S3TC_ENABLE(false)
+ DRI_CONF_ALLOW_LARGE_TEXTURES(1)
+ DRI_CONF_SECTION_END DRI_CONF_END;
+
+const uint __driNConfigOptions = 3;
+
+static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
+
+extern const struct dri_extension card_extensions[];
+
+
+
+static const __DRIextension *driScreenExtensions[] = {
+ &driReadDrawableExtension,
+ &driCopySubBufferExtension.base,
+ &driSwapControlExtension.base,
+ &driFrameTrackingExtension.base,
+ &driMediaStreamCounterExtension.base,
+ NULL
+};
+
+
+
+
+static const char *
+dri_get_name( struct pipe_winsys *winsys )
+{
+ return "dri";
+}
+
+
+
+static void
+dri_destroy_screen(__DRIscreenPrivate * sPriv)
+{
+ struct dri_screen *screen = dri_screen(sPriv);
+
+ screen->pipe_screen->destroy( screen->pipe_screen );
+ screen->pipe_winsys->destroy( screen->pipe_winsys );
+ FREE(screen);
+ sPriv->private = NULL;
+}
+
+
+/**
+ * Get information about previous buffer swaps.
+ */
+static int
+dri_get_swap_info(__DRIdrawablePrivate * dPriv,
+ __DRIswapInfo * sInfo)
+{
+ if (dPriv == NULL ||
+ dPriv->driverPrivate == NULL ||
+ sInfo == NULL)
+ return -1;
+ else
+ return 0;
+}
+
+static const __DRIconfig **
+dri_fill_in_modes(__DRIscreenPrivate *psp,
+ unsigned pixel_bits )
+{
+ __DRIconfig **configs;
+ __GLcontextModes *m;
+ unsigned num_modes;
+ uint8_t depth_bits_array[3];
+ uint8_t stencil_bits_array[3];
+ uint8_t msaa_samples_array[1];
+ unsigned depth_buffer_factor;
+ unsigned back_buffer_factor;
+ GLenum fb_format;
+ GLenum fb_type;
+ int i;
+
+ static const GLenum back_buffer_modes[] = {
+ GLX_NONE, GLX_SWAP_UNDEFINED_OML
+ };
+
+ depth_bits_array[0] = 0;
+ depth_bits_array[1] = depth_bits;
+ depth_bits_array[2] = depth_bits;
+
+ stencil_bits_array[0] = 0; /* no depth or stencil */
+ stencil_bits_array[1] = 0; /* z24x8 */
+ stencil_bits_array[2] = 8; /* z24s8 */
+
+ msaa_samples_array[0] = 0;
+
+ depth_buffer_factor = 3;
+ back_buffer_factor = 1;
+
+ num_modes = depth_buffer_factor * back_buffer_factor * 4;
+
+ if (pixel_bits == 16) {
+ fb_format = GL_RGB;
+ fb_type = GL_UNSIGNED_SHORT_5_6_5;
+ }
+ else {
+ fb_format = GL_BGRA;
+ fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+ }
+
+ configs = driCreateConfigs(fb_format, fb_type,
+ depth_bits_array,
+ stencil_bits_array, depth_buffer_factor,
+ back_buffer_modes, back_buffer_factor,
+ msaa_samples_array, 1);
+ if (configs == NULL) {
+ debug_printf("%s: driCreateConfigs failed\n", __FUNCTION__);
+ return NULL;
+ }
+
+ return configs;
+}
+
+
+
+/* This is the driver specific part of the createNewScreen entry point.
+ *
+ * Returns the __GLcontextModes supported by this driver.
+ */
+static const __DRIconfig **dri_init_screen(__DRIscreenPrivate *sPriv)
+{
+ static const __DRIversion ddx_expected = { 1, 6, 0 }; /* hw query */
+ static const __DRIversion dri_expected = { 4, 0, 0 };
+ static const __DRIversion drm_expected = { 1, 5, 0 }; /* hw query */
+ struct dri_screen *screen;
+
+ if (!driCheckDriDdxDrmVersions2("dri",
+ &sPriv->dri_version, &dri_expected,
+ &sPriv->ddx_version, &ddx_expected,
+ &sPriv->drm_version, &drm_expected)) {
+ return NULL;
+ }
+
+ /* Set up dispatch table to cope with all known extensions:
+ */
+ driInitExtensions( NULL, card_extensions, GL_FALSE );
+
+
+ screen = CALLOC_STRUCT(dri_screen);
+ if (!screen)
+ goto fail;
+
+ screen->sPriv = sPriv;
+ sPriv->private = (void *) screen;
+
+
+ /* Search the registered winsys' for one that likes this sPriv.
+ * This is required in situations where multiple devices speak to
+ * the same DDX and are built into the same binary.
+ *
+ * Note that cases like Intel i915 vs i965 doesn't fall into this
+ * category because they are built into separate binaries.
+ *
+ * Nonetheless, it's healthy to keep that level of detail out of
+ * this state_tracker.
+ */
+ for (i = 0;
+ i < dri1_winsys_count &&
+ screen->st_winsys == NULL;
+ i++)
+ {
+ screen->dri_winsys =
+ dri_winsys[i]->check_dri_privates( sPriv->pDevPriv,
+ sPriv->pSAREA
+ /* versions, etc?? */));
+ }
+
+
+ driParseOptionInfo(&screen->optionCache,
+ __driConfigOptions,
+ __driNConfigOptions);
+
+
+ /* Plug our info back into the __DRIscreenPrivate:
+ */
+ sPriv->private = (void *) screen;
+ sPriv->extensions = driScreenExtensions;
+
+ return dri_fill_in_modes(sPriv,
+ dri_priv->cpp * 8,
+ 24,
+ 8,
+ 1);
+fail:
+ return NULL;
+}
+
+
+
+const struct __DriverAPIRec driDriverAPI = {
+ .InitScreen = dri_init_screen,
+ .DestroyScreen = dri_destroy_screen,
+ .CreateContext = dri_create_context,
+ .DestroyContext = dri_destroy_context,
+ .CreateBuffer = dri_create_buffer,
+ .DestroyBuffer = dri_destroy_buffer,
+ .SwapBuffers = dri_swap_buffers,
+ .MakeCurrent = dri_make_current,
+ .UnbindContext = dri_unbind_context,
+ .GetSwapInfo = dri_get_swap_info,
+ .GetDrawableMSC = driDrawableGetMSC32,
+ .WaitForMSC = driWaitForMSC32,
+ .CopySubBuffer = dri_copy_sub_buffer,
+
+ //.InitScreen2 = dri_init_screen2,
+};
diff --git a/src/gallium/winsys/drm/intel/dri/intel_screen.h b/src/gallium/state_trackers/glx/dri/dri_screen.h
index 0bb43a915c..12ed86d22a 100644
--- a/src/gallium/winsys/drm/intel/dri/intel_screen.h
+++ b/src/gallium/state_trackers/glx/dri/dri_screen.h
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2009, VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,33 +25,27 @@
*
**************************************************************************/
-#ifndef _INTEL_SCREEN_H_
-#define _INTEL_SCREEN_H_
+#ifndef DRI_SCREEN_H
+#define DRI_SCREEN_H
#include "dri_util.h"
-#include "i830_common.h"
#include "xmlconfig.h"
-#include "ws_dri_bufpool.h"
#include "pipe/p_compiler.h"
-#include "intel_be_device.h"
-
-struct intel_screen
+struct dri_screen
{
- struct intel_be_device base;
+ __DRIScreenPrivate *sPriv;
+ struct pipe_winsys *pipe_winsys;
+ struct pipe_screen *pipe_screen;
struct {
- drm_handle_t handle;
-
- /* We create a static dri buffer for the frontbuffer.
+ /* Need a pipe_surface pointer to do client-side swapbuffers:
*/
- struct _DriBufferObject *buffer;
+ unsigned long buffer_handle;
struct pipe_surface *surface;
struct pipe_texture *texture;
- char *map; /* memory map */
- int offset; /* from start of video mem, in bytes */
int pitch; /* row stride, in bytes */
int width;
int height;
@@ -62,61 +56,28 @@ struct intel_screen
int deviceID;
int drmMinor;
- drmI830Sarea *sarea;
/**
- * Configuration cache with default values for all contexts
- */
+ * Configuration cache with default values for all contexts
+ */
driOptionCache optionCache;
- boolean havePools;
-
/**
* Temporary(?) context to use for SwapBuffers or other situations in
* which we need a rendering context, but none is currently bound.
*/
- struct intel_context *dummyContext;
-
- /*
- * New stuff form the i915tex integration
- */
- unsigned batch_id;
-
-
- struct pipe_winsys *winsys;
+ struct dri_context *dummyContext;
};
/** cast wrapper */
-static INLINE struct intel_screen *
-intel_screen(__DRIscreenPrivate *sPriv)
+static INLINE struct dri_screen *
+dri_screen(__DRIscreenPrivate *sPriv)
{
- return (struct intel_screen *) sPriv->private;
+ return (struct dri_screen *) sPriv->private;
}
-extern void
-intelUpdateScreenRotation(__DRIscreenPrivate * sPriv, drmI830Sarea * sarea);
-
-
-extern void intelDestroyContext(__DRIcontextPrivate * driContextPriv);
-
-extern boolean intelUnbindContext(__DRIcontextPrivate * driContextPriv);
-
-extern boolean
-intelMakeCurrent(__DRIcontextPrivate * driContextPriv,
- __DRIdrawablePrivate * driDrawPriv,
- __DRIdrawablePrivate * driReadPriv);
-
-
-extern boolean
-intelCreatePools(__DRIscreenPrivate *sPriv);
-
-extern boolean
-intelCreateContext(const __GLcontextModes * visual,
- __DRIcontextPrivate * driContextPriv,
- void *sharedContextPrivate);
-
#endif
diff --git a/src/gallium/state_trackers/glx/xlib/Makefile b/src/gallium/state_trackers/glx/xlib/Makefile
new file mode 100644
index 0000000000..6d10b090aa
--- /dev/null
+++ b/src/gallium/state_trackers/glx/xlib/Makefile
@@ -0,0 +1,17 @@
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = xlib
+
+LIBRARY_INCLUDES = \
+ -I$(TOP)/include \
+ -I$(TOP)/src/mesa \
+ -I$(TOP)/src/mesa/main
+
+C_SOURCES = \
+ glxapi.c \
+ fakeglx.c \
+ fakeglx_fonts.c \
+ xm_api.c
+
+include ../../../Makefile.template
diff --git a/src/gallium/state_trackers/glx/xlib/SConscript b/src/gallium/state_trackers/glx/xlib/SConscript
new file mode 100644
index 0000000000..01641e90e4
--- /dev/null
+++ b/src/gallium/state_trackers/glx/xlib/SConscript
@@ -0,0 +1,25 @@
+#######################################################################
+# SConscript for xlib state_tracker
+
+Import('*')
+
+if env['platform'] == 'linux' \
+ and 'mesa' in env['statetrackers'] \
+ and ('softpipe' or 'i915simple' or 'trace') in env['drivers']:
+
+ env = env.Clone()
+
+ env.Append(CPPPATH = [
+ '#/src/mesa',
+ '#/src/mesa/main',
+ ])
+
+ st_xlib = env.ConvenienceLibrary(
+ target = 'st_xlib',
+ source = [ 'glxapi.c',
+ 'fakeglx.c',
+ 'fakeglx_fonts.c',
+ 'xm_api.c',
+ ]
+ )
+ Export('st_xlib')
diff --git a/src/gallium/winsys/xlib/fakeglx.c b/src/gallium/state_trackers/glx/xlib/fakeglx.c
index fd2d222c85..65e7048188 100644
--- a/src/gallium/winsys/xlib/fakeglx.c
+++ b/src/gallium/state_trackers/glx/xlib/fakeglx.c
@@ -40,25 +40,18 @@
-#include "glxheader.h"
#include "glxapi.h"
-#include "GL/xmesa.h"
+#include "xm_api.h"
#include "context.h"
#include "config.h"
#include "macros.h"
#include "imports.h"
-#include "mtypes.h"
#include "version.h"
-#include "xfonts.h"
-#include "xmesaP.h"
+#include "fakeglx.h"
#include "state_tracker/st_context.h"
#include "state_tracker/st_public.h"
-#ifdef __VMS
-#define _mesa_sprintf sprintf
-#endif
-
/* This indicates the client-side GLX API and GLX encoder version. */
#define CLIENT_MAJOR_VERSION 1
#define CLIENT_MINOR_VERSION 4 /* but don't have 1.3's pbuffers, etc yet */
@@ -76,7 +69,6 @@
#define VENDOR "Brian Paul"
#define EXTENSIONS \
- "GLX_MESA_set_3dfx_mode " \
"GLX_MESA_copy_sub_buffer " \
"GLX_MESA_pixmap_colormap " \
"GLX_MESA_release_buffers " \
@@ -116,22 +108,6 @@ static XMesaVisual *VisualTable = NULL;
static int NumVisuals = 0;
-/*
- * This struct and some code fragments borrowed
- * from Mark Kilgard's GLUT library.
- */
-typedef struct _OverlayInfo {
- /* Avoid 64-bit portability problems by being careful to use
- longs due to the way XGetWindowProperty is specified. Note
- that these parameters are passed as CARD32s over X
- protocol. */
- unsigned long overlay_visual;
- long transparent_type;
- long value;
- long layer;
-} OverlayInfo;
-
-
/* Macro to handle c_class vs class field name in XVisualInfo struct */
#if defined(__cplusplus) || defined(c_plusplus)
@@ -173,98 +149,6 @@ is_usable_visual( XVisualInfo *vinfo )
}
-
-/**
- * Get an array OverlayInfo records for specified screen.
- * \param dpy the display
- * \param screen screen number
- * \param numOverlays returns numver of OverlayInfo records
- * \return pointer to OverlayInfo array, free with XFree()
- */
-static OverlayInfo *
-GetOverlayInfo(Display *dpy, int screen, int *numOverlays)
-{
- Atom overlayVisualsAtom;
- Atom actualType;
- Status status;
- unsigned char *ovInfo;
- unsigned long sizeData, bytesLeft;
- int actualFormat;
-
- /*
- * The SERVER_OVERLAY_VISUALS property on the root window contains
- * a list of overlay visuals. Get that list now.
- */
- overlayVisualsAtom = XInternAtom(dpy,"SERVER_OVERLAY_VISUALS", True);
- if (overlayVisualsAtom == None) {
- return 0;
- }
-
- status = XGetWindowProperty(dpy, RootWindow(dpy, screen),
- overlayVisualsAtom, 0L, (long) 10000, False,
- overlayVisualsAtom, &actualType, &actualFormat,
- &sizeData, &bytesLeft,
- &ovInfo);
-
- if (status != Success || actualType != overlayVisualsAtom ||
- actualFormat != 32 || sizeData < 4) {
- /* something went wrong */
- XFree((void *) ovInfo);
- *numOverlays = 0;
- return NULL;
- }
-
- *numOverlays = sizeData / 4;
- return (OverlayInfo *) ovInfo;
-}
-
-
-
-/**
- * Return the level (overlay, normal, underlay) of a given XVisualInfo.
- * Input: dpy - the X display
- * vinfo - the XVisualInfo to test
- * Return: level of the visual:
- * 0 = normal planes
- * >0 = overlay planes
- * <0 = underlay planes
- */
-static int
-level_of_visual( Display *dpy, XVisualInfo *vinfo )
-{
- OverlayInfo *overlay_info;
- int numOverlaysPerScreen, i;
-
- overlay_info = GetOverlayInfo(dpy, vinfo->screen, &numOverlaysPerScreen);
- if (!overlay_info) {
- return 0;
- }
-
- /* search the overlay visual list for the visual ID of interest */
- for (i = 0; i < numOverlaysPerScreen; i++) {
- const OverlayInfo *ov = overlay_info + i;
- if (ov->overlay_visual == vinfo->visualid) {
- /* found the visual */
- if (/*ov->transparent_type==1 &&*/ ov->layer!=0) {
- int level = ov->layer;
- XFree((void *) overlay_info);
- return level;
- }
- else {
- XFree((void *) overlay_info);
- return 0;
- }
- }
- }
-
- /* The visual ID was not found in the overlay list. */
- XFree((void *) overlay_info);
- return 0;
-}
-
-
-
-
/*
* Given an XVisualInfo and RGB, Double, and Depth buffer flags, save the
* configuration in our list of GLX visuals.
@@ -421,60 +305,28 @@ default_accum_bits(void)
static XMesaVisual
create_glx_visual( Display *dpy, XVisualInfo *visinfo )
{
- int vislevel;
GLint zBits = default_depth_bits();
GLint accBits = default_accum_bits();
GLboolean alphaFlag = default_alpha_bits() > 0;
- vislevel = level_of_visual( dpy, visinfo );
- if (vislevel) {
- /* Configure this visual as a CI, single-buffered overlay */
+ if (is_usable_visual( visinfo )) {
+ /* Configure this visual as RGB, double-buffered, depth-buffered. */
+ /* This is surely wrong for some people's needs but what else */
+ /* can be done? They should use glXChooseVisual(). */
return save_glx_visual( dpy, visinfo,
- GL_FALSE, /* rgb */
- GL_FALSE, /* alpha */
- GL_FALSE, /* double */
+ GL_TRUE, /* rgb */
+ alphaFlag, /* alpha */
+ GL_TRUE, /* double */
GL_FALSE, /* stereo */
- 0, /* depth bits */
- 0, /* stencil bits */
- 0,0,0,0, /* accum bits */
- vislevel, /* level */
+ zBits,
+ STENCIL_BITS,
+ accBits, /* r */
+ accBits, /* g */
+ accBits, /* b */
+ accBits, /* a */
+ 0, /* level */
0 /* numAux */
- );
- }
- else if (is_usable_visual( visinfo )) {
- if (_mesa_getenv("MESA_GLX_FORCE_CI")) {
- /* Configure this visual as a COLOR INDEX visual. */
- return save_glx_visual( dpy, visinfo,
- GL_FALSE, /* rgb */
- GL_FALSE, /* alpha */
- GL_TRUE, /* double */
- GL_FALSE, /* stereo */
- zBits,
- STENCIL_BITS,
- 0, 0, 0, 0, /* accum bits */
- 0, /* level */
- 0 /* numAux */
- );
- }
- else {
- /* Configure this visual as RGB, double-buffered, depth-buffered. */
- /* This is surely wrong for some people's needs but what else */
- /* can be done? They should use glXChooseVisual(). */
- return save_glx_visual( dpy, visinfo,
- GL_TRUE, /* rgb */
- alphaFlag, /* alpha */
- GL_TRUE, /* double */
- GL_FALSE, /* stereo */
- zBits,
- STENCIL_BITS,
- accBits, /* r */
- accBits, /* g */
- accBits, /* b */
- accBits, /* a */
- 0, /* level */
- 0 /* numAux */
- );
- }
+ );
}
else {
_mesa_warning(NULL, "Mesa: error in glXCreateContext: bad visual\n");
@@ -512,45 +364,6 @@ find_glx_visual( Display *dpy, XVisualInfo *vinfo )
-/**
- * Return the transparent pixel value for a GLX visual.
- * Input: glxvis - the glx_visual
- * Return: a pixel value or -1 if no transparent pixel
- */
-static int
-transparent_pixel( XMesaVisual glxvis )
-{
- Display *dpy = glxvis->display;
- XVisualInfo *vinfo = glxvis->visinfo;
- OverlayInfo *overlay_info;
- int numOverlaysPerScreen, i;
-
- overlay_info = GetOverlayInfo(dpy, vinfo->screen, &numOverlaysPerScreen);
- if (!overlay_info) {
- return -1;
- }
-
- for (i = 0; i < numOverlaysPerScreen; i++) {
- const OverlayInfo *ov = overlay_info + i;
- if (ov->overlay_visual == vinfo->visualid) {
- /* found it! */
- if (ov->transparent_type == 0) {
- /* type 0 indicates no transparency */
- XFree((void *) overlay_info);
- return -1;
- }
- else {
- /* ov->value is the transparent pixel */
- XFree((void *) overlay_info);
- return ov->value;
- }
- }
- }
-
- /* The visual ID was not found in the overlay list. */
- XFree((void *) overlay_info);
- return -1;
-}
@@ -597,7 +410,7 @@ get_visual( Display *dpy, int scr, unsigned int depth, int xclass )
return NULL;
}
}
-
+
return vis;
}
@@ -650,89 +463,48 @@ get_env_visual(Display *dpy, int scr, const char *varname)
/*
- * Select an X visual which satisfies the RGBA/CI flag and minimum depth.
- * Input: dpy, screen - X display and screen number
- * rgba - GL_TRUE = RGBA mode, GL_FALSE = CI mode
+ * Select an X visual which satisfies the RGBA flag and minimum depth.
+ * Input: dpy,
+ * screen - X display and screen number
* min_depth - minimum visual depth
* preferred_class - preferred GLX visual class or DONT_CARE
* Return: pointer to an XVisualInfo or NULL.
*/
static XVisualInfo *
-choose_x_visual( Display *dpy, int screen, GLboolean rgba, int min_depth,
+choose_x_visual( Display *dpy, int screen, int min_depth,
int preferred_class )
{
XVisualInfo *vis;
int xclass, visclass = 0;
int depth;
- if (rgba) {
- Atom hp_cr_maps = XInternAtom(dpy, "_HP_RGB_SMOOTH_MAP_LIST", True);
- /* First see if the MESA_RGB_VISUAL env var is defined */
- vis = get_env_visual( dpy, screen, "MESA_RGB_VISUAL" );
- if (vis) {
- return vis;
- }
- /* Otherwise, search for a suitable visual */
- if (preferred_class==DONT_CARE) {
- for (xclass=0;xclass<6;xclass++) {
- switch (xclass) {
- case 0: visclass = TrueColor; break;
- case 1: visclass = DirectColor; break;
- case 2: visclass = PseudoColor; break;
- case 3: visclass = StaticColor; break;
- case 4: visclass = GrayScale; break;
- case 5: visclass = StaticGray; break;
- }
- if (min_depth==0) {
- /* start with shallowest */
- for (depth=0;depth<=32;depth++) {
- if (visclass==TrueColor && depth==8 && !hp_cr_maps) {
- /* Special case: try to get 8-bit PseudoColor before */
- /* 8-bit TrueColor */
- vis = get_visual( dpy, screen, 8, PseudoColor );
- if (vis) {
- return vis;
- }
- }
- vis = get_visual( dpy, screen, depth, visclass );
- if (vis) {
- return vis;
- }
- }
- }
- else {
- /* start with deepest */
- for (depth=32;depth>=min_depth;depth--) {
- if (visclass==TrueColor && depth==8 && !hp_cr_maps) {
- /* Special case: try to get 8-bit PseudoColor before */
- /* 8-bit TrueColor */
- vis = get_visual( dpy, screen, 8, PseudoColor );
- if (vis) {
- return vis;
- }
- }
- vis = get_visual( dpy, screen, depth, visclass );
- if (vis) {
- return vis;
- }
- }
- }
- }
- }
- else {
- /* search for a specific visual class */
- switch (preferred_class) {
- case GLX_TRUE_COLOR_EXT: visclass = TrueColor; break;
- case GLX_DIRECT_COLOR_EXT: visclass = DirectColor; break;
- case GLX_PSEUDO_COLOR_EXT: visclass = PseudoColor; break;
- case GLX_STATIC_COLOR_EXT: visclass = StaticColor; break;
- case GLX_GRAY_SCALE_EXT: visclass = GrayScale; break;
- case GLX_STATIC_GRAY_EXT: visclass = StaticGray; break;
- default: return NULL;
+ /* First see if the MESA_RGB_VISUAL env var is defined */
+ vis = get_env_visual( dpy, screen, "MESA_RGB_VISUAL" );
+ if (vis) {
+ return vis;
+ }
+ /* Otherwise, search for a suitable visual */
+ if (preferred_class==DONT_CARE) {
+ for (xclass=0;xclass<6;xclass++) {
+ switch (xclass) {
+ case 0: visclass = TrueColor; break;
+ case 1: visclass = DirectColor; break;
+ case 2: visclass = PseudoColor; break;
+ case 3: visclass = StaticColor; break;
+ case 4: visclass = GrayScale; break;
+ case 5: visclass = StaticGray; break;
}
if (min_depth==0) {
/* start with shallowest */
for (depth=0;depth<=32;depth++) {
+ if (visclass==TrueColor && depth==8) {
+ /* Special case: try to get 8-bit PseudoColor before */
+ /* 8-bit TrueColor */
+ vis = get_visual( dpy, screen, 8, PseudoColor );
+ if (vis) {
+ return vis;
+ }
+ }
vis = get_visual( dpy, screen, depth, visclass );
if (vis) {
return vis;
@@ -742,6 +514,14 @@ choose_x_visual( Display *dpy, int screen, GLboolean rgba, int min_depth,
else {
/* start with deepest */
for (depth=32;depth>=min_depth;depth--) {
+ if (visclass==TrueColor && depth==8) {
+ /* Special case: try to get 8-bit PseudoColor before */
+ /* 8-bit TrueColor */
+ vis = get_visual( dpy, screen, 8, PseudoColor );
+ if (vis) {
+ return vis;
+ }
+ }
vis = get_visual( dpy, screen, depth, visclass );
if (vis) {
return vis;
@@ -751,56 +531,28 @@ choose_x_visual( Display *dpy, int screen, GLboolean rgba, int min_depth,
}
}
else {
- /* First see if the MESA_CI_VISUAL env var is defined */
- vis = get_env_visual( dpy, screen, "MESA_CI_VISUAL" );
- if (vis) {
- return vis;
- }
- /* Otherwise, search for a suitable visual, starting with shallowest */
- if (preferred_class==DONT_CARE) {
- for (xclass=0;xclass<4;xclass++) {
- switch (xclass) {
- case 0: visclass = PseudoColor; break;
- case 1: visclass = StaticColor; break;
- case 2: visclass = GrayScale; break;
- case 3: visclass = StaticGray; break;
- }
- /* try 8-bit up through 16-bit */
- for (depth=8;depth<=16;depth++) {
- vis = get_visual( dpy, screen, depth, visclass );
- if (vis) {
- return vis;
- }
- }
- /* try min_depth up to 8-bit */
- for (depth=min_depth;depth<8;depth++) {
- vis = get_visual( dpy, screen, depth, visclass );
- if (vis) {
- return vis;
- }
- }
- }
+ /* search for a specific visual class */
+ switch (preferred_class) {
+ case GLX_TRUE_COLOR_EXT: visclass = TrueColor; break;
+ case GLX_DIRECT_COLOR_EXT: visclass = DirectColor; break;
+ case GLX_PSEUDO_COLOR_EXT: visclass = PseudoColor; break;
+ case GLX_STATIC_COLOR_EXT: visclass = StaticColor; break;
+ case GLX_GRAY_SCALE_EXT: visclass = GrayScale; break;
+ case GLX_STATIC_GRAY_EXT: visclass = StaticGray; break;
+ default: return NULL;
}
- else {
- /* search for a specific visual class */
- switch (preferred_class) {
- case GLX_TRUE_COLOR_EXT: visclass = TrueColor; break;
- case GLX_DIRECT_COLOR_EXT: visclass = DirectColor; break;
- case GLX_PSEUDO_COLOR_EXT: visclass = PseudoColor; break;
- case GLX_STATIC_COLOR_EXT: visclass = StaticColor; break;
- case GLX_GRAY_SCALE_EXT: visclass = GrayScale; break;
- case GLX_STATIC_GRAY_EXT: visclass = StaticGray; break;
- default: return NULL;
- }
- /* try 8-bit up through 16-bit */
- for (depth=8;depth<=16;depth++) {
+ if (min_depth==0) {
+ /* start with shallowest */
+ for (depth=0;depth<=32;depth++) {
vis = get_visual( dpy, screen, depth, visclass );
if (vis) {
return vis;
}
}
- /* try min_depth up to 8-bit */
- for (depth=min_depth;depth<8;depth++) {
+ }
+ else {
+ /* start with deepest */
+ for (depth=32;depth>=min_depth;depth--) {
vis = get_visual( dpy, screen, depth, visclass );
if (vis) {
return vis;
@@ -815,117 +567,6 @@ choose_x_visual( Display *dpy, int screen, GLboolean rgba, int min_depth,
-/*
- * Find the deepest X over/underlay visual of at least min_depth.
- * Input: dpy, screen - X display and screen number
- * level - the over/underlay level
- * trans_type - transparent pixel type: GLX_NONE_EXT,
- * GLX_TRANSPARENT_RGB_EXT, GLX_TRANSPARENT_INDEX_EXT,
- * or DONT_CARE
- * trans_value - transparent pixel value or DONT_CARE
- * min_depth - minimum visual depth
- * preferred_class - preferred GLX visual class or DONT_CARE
- * Return: pointer to an XVisualInfo or NULL.
- */
-static XVisualInfo *
-choose_x_overlay_visual( Display *dpy, int scr, GLboolean rgbFlag,
- int level, int trans_type, int trans_value,
- int min_depth, int preferred_class )
-{
- OverlayInfo *overlay_info;
- int numOverlaysPerScreen;
- int i;
- XVisualInfo *deepvis;
- int deepest;
-
- /*DEBUG int tt, tv; */
-
- switch (preferred_class) {
- case GLX_TRUE_COLOR_EXT: preferred_class = TrueColor; break;
- case GLX_DIRECT_COLOR_EXT: preferred_class = DirectColor; break;
- case GLX_PSEUDO_COLOR_EXT: preferred_class = PseudoColor; break;
- case GLX_STATIC_COLOR_EXT: preferred_class = StaticColor; break;
- case GLX_GRAY_SCALE_EXT: preferred_class = GrayScale; break;
- case GLX_STATIC_GRAY_EXT: preferred_class = StaticGray; break;
- default: preferred_class = DONT_CARE;
- }
-
- overlay_info = GetOverlayInfo(dpy, scr, &numOverlaysPerScreen);
- if (!overlay_info) {
- return NULL;
- }
-
- /* Search for the deepest overlay which satisifies all criteria. */
- deepest = min_depth;
- deepvis = NULL;
-
- for (i = 0; i < numOverlaysPerScreen; i++) {
- const OverlayInfo *ov = overlay_info + i;
- XVisualInfo *vislist, vistemplate;
- int count;
-
- if (ov->layer!=level) {
- /* failed overlay level criteria */
- continue;
- }
- if (!(trans_type==DONT_CARE
- || (trans_type==GLX_TRANSPARENT_INDEX_EXT
- && ov->transparent_type>0)
- || (trans_type==GLX_NONE_EXT && ov->transparent_type==0))) {
- /* failed transparent pixel type criteria */
- continue;
- }
- if (trans_value!=DONT_CARE && trans_value!=ov->value) {
- /* failed transparent pixel value criteria */
- continue;
- }
-
- /* get XVisualInfo and check the depth */
- vistemplate.visualid = ov->overlay_visual;
- vistemplate.screen = scr;
- vislist = XGetVisualInfo( dpy, VisualIDMask | VisualScreenMask,
- &vistemplate, &count );
-
- if (count!=1) {
- /* something went wrong */
- continue;
- }
- if (preferred_class!=DONT_CARE && preferred_class!=vislist->CLASS) {
- /* wrong visual class */
- continue;
- }
-
- /* if RGB was requested, make sure we have True/DirectColor */
- if (rgbFlag && vislist->CLASS != TrueColor
- && vislist->CLASS != DirectColor)
- continue;
-
- /* if CI was requested, make sure we have a color indexed visual */
- if (!rgbFlag
- && (vislist->CLASS == TrueColor || vislist->CLASS == DirectColor))
- continue;
-
- if (deepvis==NULL || vislist->depth > deepest) {
- /* YES! found a satisfactory visual */
- if (deepvis) {
- XFree( deepvis );
- }
- deepest = vislist->depth;
- deepvis = vislist;
- /* DEBUG tt = ov->transparent_type;*/
- /* DEBUG tv = ov->value; */
- }
- }
-
-/*DEBUG
- if (deepvis) {
- printf("chose 0x%x: layer=%d depth=%d trans_type=%d trans_value=%d\n",
- deepvis->visualid, level, deepvis->depth, tt, tv );
- }
-*/
- return deepvis;
-}
-
/**********************************************************************/
/*** Display-related functions ***/
@@ -1273,6 +914,7 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig )
(void) caveat;
+
/*
* Since we're only simulating the GLX extension this function will never
* find any real GL visuals. Instead, all we can do is try to find an RGB
@@ -1290,8 +932,7 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig )
if (vis) {
/* give the visual some useful GLX attributes */
double_flag = GL_TRUE;
- if (vis->depth > 8)
- rgb_flag = GL_TRUE;
+ rgb_flag = GL_TRUE;
depth_size = default_depth_bits();
stencil_size = STENCIL_BITS;
/* XXX accum??? */
@@ -1299,38 +940,17 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig )
}
else if (level==0) {
/* normal color planes */
- if (rgb_flag) {
- /* Get an RGB visual */
- int min_rgb = min_red + min_green + min_blue;
- if (min_rgb>1 && min_rgb<8) {
- /* a special case to be sure we can get a monochrome visual */
- min_rgb = 1;
- }
- vis = choose_x_visual( dpy, screen, rgb_flag, min_rgb, visual_type );
- }
- else {
- /* Get a color index visual */
- vis = choose_x_visual( dpy, screen, rgb_flag, min_ci, visual_type );
- accumRedSize = accumGreenSize = accumBlueSize = accumAlphaSize = 0;
+ /* Get an RGB visual */
+ int min_rgb = min_red + min_green + min_blue;
+ if (min_rgb>1 && min_rgb<8) {
+ /* a special case to be sure we can get a monochrome visual */
+ min_rgb = 1;
}
+ vis = choose_x_visual( dpy, screen, min_rgb, visual_type );
}
else {
- /* over/underlay planes */
- if (rgb_flag) {
- /* rgba overlay */
- int min_rgb = min_red + min_green + min_blue;
- if (min_rgb>1 && min_rgb<8) {
- /* a special case to be sure we can get a monochrome visual */
- min_rgb = 1;
- }
- vis = choose_x_overlay_visual( dpy, screen, rgb_flag, level,
- trans_type, trans_value, min_rgb, visual_type );
- }
- else {
- /* color index overlay */
- vis = choose_x_overlay_visual( dpy, screen, rgb_flag, level,
- trans_type, trans_value, min_ci, visual_type );
- }
+ _mesa_warning(NULL, "overlay not supported");
+ return NULL;
}
if (vis) {
@@ -1356,11 +976,16 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig )
/* we only support one size of stencil and accum buffers. */
if (stencil_size > 0)
stencil_size = STENCIL_BITS;
- if (accumRedSize > 0 || accumGreenSize > 0 || accumBlueSize > 0 ||
+
+ if (accumRedSize > 0 ||
+ accumGreenSize > 0 ||
+ accumBlueSize > 0 ||
accumAlphaSize > 0) {
+
accumRedSize =
- accumGreenSize =
- accumBlueSize = default_accum_bits();
+ accumGreenSize =
+ accumBlueSize = default_accum_bits();
+
accumAlphaSize = alpha_flag ? accumRedSize : 0;
}
@@ -1384,16 +1009,12 @@ Fake_glXChooseVisual( Display *dpy, int screen, int *list )
xmvis = choose_visual(dpy, screen, list, GL_FALSE);
if (xmvis) {
-#if 0
- return xmvis->vishandle;
-#else
/* create a new vishandle - the cached one may be stale */
xmvis->vishandle = (XVisualInfo *) _mesa_malloc(sizeof(XVisualInfo));
if (xmvis->vishandle) {
_mesa_memcpy(xmvis->vishandle, xmvis->visinfo, sizeof(XVisualInfo));
}
return xmvis->vishandle;
-#endif
}
else
return NULL;
@@ -1489,9 +1110,6 @@ Fake_glXMakeContextCurrent( Display *dpy, GLXDrawable draw,
/* Out of memory, or context/drawable depth mismatch */
return False;
}
-#ifdef FX
- FXcreateContext( xmctx->xm_visual, draw, xmctx, drawBuffer );
-#endif
}
/* Find the XMesaBuffer which corresponds to the GLXDrawable 'read' */
@@ -1509,9 +1127,6 @@ Fake_glXMakeContextCurrent( Display *dpy, GLXDrawable draw,
/* Out of memory, or context/drawable depth mismatch */
return False;
}
-#ifdef FX
- FXcreateContext( xmctx->xm_visual, read, xmctx, readBuffer );
-#endif
}
if (no_rast &&
@@ -1541,7 +1156,7 @@ Fake_glXMakeContextCurrent( Display *dpy, GLXDrawable draw,
}
else if (!ctx && !draw && !read) {
/* release current context w/out assigning new one. */
- XMesaMakeCurrent( NULL, NULL );
+ XMesaMakeCurrent2( NULL, NULL, NULL );
MakeCurrent_PrevContext = 0;
MakeCurrent_PrevDrawable = 0;
MakeCurrent_PrevReadable = 0;
@@ -1824,32 +1439,11 @@ get_config( XMesaVisual xmvis, int attrib, int *value, GLboolean fbconfig )
}
return 0;
case GLX_TRANSPARENT_TYPE_EXT:
- if (xmvis->mesa_visual.level==0) {
- /* normal planes */
- *value = GLX_NONE_EXT;
- }
- else if (xmvis->mesa_visual.level>0) {
- /* overlay */
- if (xmvis->mesa_visual.rgbMode) {
- *value = GLX_TRANSPARENT_RGB_EXT;
- }
- else {
- *value = GLX_TRANSPARENT_INDEX_EXT;
- }
- }
- else if (xmvis->mesa_visual.level<0) {
- /* underlay */
- *value = GLX_NONE_EXT;
- }
+ /* normal planes */
+ *value = GLX_NONE_EXT;
return 0;
case GLX_TRANSPARENT_INDEX_VALUE_EXT:
- {
- int pixel = transparent_pixel( xmvis );
- if (pixel>=0) {
- *value = pixel;
- }
- /* else undefined */
- }
+ /* undefined */
return 0;
case GLX_TRANSPARENT_RED_VALUE_EXT:
/* undefined */
@@ -2017,13 +1611,7 @@ Fake_glXWaitX( void )
static const char *
get_extensions( void )
{
-#ifdef FX
- const char *fx = _mesa_getenv("MESA_GLX_FX");
- if (fx && fx[0] != 'd') {
- return EXTENSIONS;
- }
-#endif
- return EXTENSIONS + 23; /* skip "GLX_MESA_set_3dfx_mode" */
+ return EXTENSIONS;
}
@@ -2198,11 +1786,6 @@ Fake_glXCreateWindow( Display *dpy, GLXFBConfig config, Window win,
if (!xmbuf)
return 0;
-#ifdef FX
- /* XXX this will segfault if actually called */
- FXcreateContext(xmvis, win, NULL, xmbuf);
-#endif
-
(void) dpy;
(void) attribList; /* Ignored in GLX 1.3 */
@@ -2213,7 +1796,7 @@ Fake_glXCreateWindow( Display *dpy, GLXFBConfig config, Window win,
static void
Fake_glXDestroyWindow( Display *dpy, GLXWindow window )
{
- XMesaBuffer b = XMesaFindBuffer(dpy, (XMesaDrawable) window);
+ XMesaBuffer b = XMesaFindBuffer(dpy, (Drawable) window);
if (b)
XMesaDestroyBuffer(b);
/* don't destroy X window */
@@ -2334,7 +1917,7 @@ Fake_glXCreatePixmap( Display *dpy, GLXFBConfig config, Pixmap pixmap,
static void
Fake_glXDestroyPixmap( Display *dpy, GLXPixmap pixmap )
{
- XMesaBuffer b = XMesaFindBuffer(dpy, (XMesaDrawable)pixmap);
+ XMesaBuffer b = XMesaFindBuffer(dpy, (Drawable)pixmap);
if (b)
XMesaDestroyBuffer(b);
/* don't destroy X pixmap */
@@ -2988,50 +2571,6 @@ Fake_glXReleaseBuffersMESA( Display *dpy, GLXDrawable d )
return False;
}
-
-
-/*** GLX_MESA_set_3dfx_mode ***/
-
-static Bool
-Fake_glXSet3DfxModeMESA( int mode )
-{
- return XMesaSetFXmode( mode );
-}
-
-
-
-/*** GLX_NV_vertex_array range ***/
-static void *
-Fake_glXAllocateMemoryNV( GLsizei size,
- GLfloat readFrequency,
- GLfloat writeFrequency,
- GLfloat priority )
-{
- (void) size;
- (void) readFrequency;
- (void) writeFrequency;
- (void) priority;
- return NULL;
-}
-
-
-static void
-Fake_glXFreeMemoryNV( GLvoid *pointer )
-{
- (void) pointer;
-}
-
-
-/*** GLX_MESA_agp_offset ***/
-
-static GLuint
-Fake_glXGetAGPOffsetMESA( const GLvoid *pointer )
-{
- (void) pointer;
- return ~0;
-}
-
-
/*** GLX_EXT_texture_from_pixmap ***/
static void
@@ -3052,16 +2591,15 @@ Fake_glXReleaseTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer)
}
-/* silence warning */
-extern struct _glxapi_table *_mesa_GetGLXDispatchTable(void);
-
/**
* Create a new GLX API dispatch table with its function pointers
* initialized to point to Mesa's "fake" GLX API functions.
- * Note: there's a similar function (_real_GetGLXDispatchTable) that
- * returns a new dispatch table with all pointers initalized to point
- * to "real" GLX functions (which understand GLX wire protocol, etc).
+ *
+ * Note: there used to be a similar function
+ * (_real_GetGLXDispatchTable) that returns a new dispatch table with
+ * all pointers initalized to point to "real" GLX functions (which
+ * understand GLX wire protocol, etc).
*/
struct _glxapi_table *
_mesa_GetGLXDispatchTable(void)
@@ -3197,16 +2735,6 @@ _mesa_GetGLXDispatchTable(void)
/*** GLX_MESA_pixmap_colormap ***/
glx.CreateGLXPixmapMESA = Fake_glXCreateGLXPixmapMESA;
- /*** GLX_MESA_set_3dfx_mode ***/
- glx.Set3DfxModeMESA = Fake_glXSet3DfxModeMESA;
-
- /*** GLX_NV_vertex_array_range ***/
- glx.AllocateMemoryNV = Fake_glXAllocateMemoryNV;
- glx.FreeMemoryNV = Fake_glXFreeMemoryNV;
-
- /*** GLX_MESA_agp_offset ***/
- glx.GetAGPOffsetMESA = Fake_glXGetAGPOffsetMESA;
-
/*** GLX_EXT_texture_from_pixmap ***/
glx.BindTexImageEXT = Fake_glXBindTexImageEXT;
glx.ReleaseTexImageEXT = Fake_glXReleaseTexImageEXT;
diff --git a/src/gallium/winsys/xlib/xfonts.h b/src/gallium/state_trackers/glx/xlib/fakeglx.h
index e36f42f817..e5fd960072 100644
--- a/src/gallium/winsys/xlib/xfonts.h
+++ b/src/gallium/state_trackers/glx/xlib/fakeglx.h
@@ -24,15 +24,15 @@
*/
-#ifndef XFONTS_H
-#define XFONTS_H
+#ifndef FAKEGLX_H
+#define FAKEGLX_H
-#ifdef __VMS
-#include <GL/vms_x_fix.h>
-#endif
#include <X11/Xlib.h>
+struct _glxapi_table;
+
+extern struct _glxapi_table *_mesa_GetGLXDispatchTable(void);
extern void Fake_glXUseXFont( Font font, int first, int count, int listbase );
diff --git a/src/gallium/winsys/xlib/xfonts.c b/src/gallium/state_trackers/glx/xlib/fakeglx_fonts.c
index d72c600bd1..e359046756 100644
--- a/src/gallium/winsys/xlib/xfonts.c
+++ b/src/gallium/state_trackers/glx/xlib/fakeglx_fonts.c
@@ -28,14 +28,10 @@
* Copyright (C) 1995 Thorsten.Ohl @ Physik.TH-Darmstadt.de
*/
-#ifdef __VMS
-#include <GL/vms_x_fix.h>
-#endif
-
-#include "glxheader.h"
#include "context.h"
#include "imports.h"
-#include "xfonts.h"
+#include "fakeglx.h"
+#include <GL/glx.h>
/* Some debugging info. */
diff --git a/src/gallium/winsys/xlib/glxapi.c b/src/gallium/state_trackers/glx/xlib/glxapi.c
index c059fc3edb..c2cb34d7cf 100644
--- a/src/gallium/winsys/xlib/glxapi.c
+++ b/src/gallium/state_trackers/glx/xlib/glxapi.c
@@ -34,14 +34,19 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
-#include "main/glheader.h"
#include "glapi/glapi.h"
#include "glxapi.h"
+#include "fakeglx.h"
#include "pipe/p_thread.h"
-extern struct _glxapi_table *_real_GetGLXDispatchTable(void);
-extern struct _glxapi_table *_mesa_GetGLXDispatchTable(void);
+#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303
+# define PUBLIC __attribute__((visibility("default")))
+# define USED __attribute__((used))
+#else
+# define PUBLIC
+# define USED
+#endif
struct display_dispatch {
@@ -999,93 +1004,6 @@ glXCreateGLXPixmapMESA(Display *dpy, XVisualInfo *visinfo, Pixmap pixmap, Colorm
return (t->CreateGLXPixmapMESA)(dpy, visinfo, pixmap, cmap);
}
-
-
-/*** GLX_MESA_set_3dfx_mode ***/
-
-Bool PUBLIC
-glXSet3DfxModeMESA(int mode)
-{
- struct _glxapi_table *t;
- Display *dpy = glXGetCurrentDisplay();
- GET_DISPATCH(dpy, t);
- if (!t)
- return False;
- return (t->Set3DfxModeMESA)(mode);
-}
-
-
-
-/*** GLX_NV_vertex_array_range ***/
-
-void PUBLIC *
-glXAllocateMemoryNV( GLsizei size,
- GLfloat readFrequency,
- GLfloat writeFrequency,
- GLfloat priority )
-{
- struct _glxapi_table *t;
- Display *dpy = glXGetCurrentDisplay();
- GET_DISPATCH(dpy, t);
- if (!t)
- return NULL;
- return (t->AllocateMemoryNV)(size, readFrequency, writeFrequency, priority);
-}
-
-
-void PUBLIC
-glXFreeMemoryNV( GLvoid *pointer )
-{
- struct _glxapi_table *t;
- Display *dpy = glXGetCurrentDisplay();
- GET_DISPATCH(dpy, t);
- if (!t)
- return;
- (t->FreeMemoryNV)(pointer);
-}
-
-
-
-
-/*** GLX_MESA_agp_offset */
-
-GLuint PUBLIC
-glXGetAGPOffsetMESA( const GLvoid *pointer )
-{
- struct _glxapi_table *t;
- Display *dpy = glXGetCurrentDisplay();
- GET_DISPATCH(dpy, t);
- if (!t)
- return ~0;
- return (t->GetAGPOffsetMESA)(pointer);
-}
-
-
-/*** GLX_MESA_allocate_memory */
-
-void *
-glXAllocateMemoryMESA(Display *dpy, int scrn, size_t size,
- float readfreq, float writefreq, float priority)
-{
- /* dummy */
- return NULL;
-}
-
-void
-glXFreeMemoryMESA(Display *dpy, int scrn, void *pointer)
-{
- /* dummy */
-}
-
-
-GLuint
-glXGetMemoryOffsetMESA(Display *dpy, int scrn, const void *pointer)
-{
- /* dummy */
- return 0;
-}
-
-
/*** GLX_EXT_texture_from_pixmap */
void
@@ -1120,45 +1038,6 @@ _glxapi_get_version(void)
}
-/*
- * Return array of extension strings.
- */
-const char **
-_glxapi_get_extensions(void)
-{
- static const char *extensions[] = {
-#ifdef GLX_EXT_import_context
- "GLX_EXT_import_context",
-#endif
-#ifdef GLX_SGI_video_sync
- "GLX_SGI_video_sync",
-#endif
-#ifdef GLX_MESA_copy_sub_buffer
- "GLX_MESA_copy_sub_buffer",
-#endif
-#ifdef GLX_MESA_release_buffers
- "GLX_MESA_release_buffers",
-#endif
-#ifdef GLX_MESA_pixmap_colormap
- "GLX_MESA_pixmap_colormap",
-#endif
-#ifdef GLX_MESA_set_3dfx_mode
- "GLX_MESA_set_3dfx_mode",
-#endif
-#ifdef GLX_SGIX_fbconfig
- "GLX_SGIX_fbconfig",
-#endif
-#ifdef GLX_SGIX_pbuffer
- "GLX_SGIX_pbuffer",
-#endif
-#ifdef GLX_EXT_texture_from_pixmap
- "GLX_EXT_texture_from_pixmap",
-#endif
- NULL
- };
- return extensions;
-}
-
/*
* Return size of the GLX dispatch table, in entries, not bytes.
@@ -1321,24 +1200,9 @@ static struct name_address_pair GLX_functions[] = {
/*** GLX_MESA_release_buffers ***/
{ "glXReleaseBuffersMESA", (__GLXextFuncPtr) glXReleaseBuffersMESA },
- /*** GLX_MESA_set_3dfx_mode ***/
- { "glXSet3DfxModeMESA", (__GLXextFuncPtr) glXSet3DfxModeMESA },
-
/*** GLX_ARB_get_proc_address ***/
{ "glXGetProcAddressARB", (__GLXextFuncPtr) glXGetProcAddressARB },
- /*** GLX_NV_vertex_array_range ***/
- { "glXAllocateMemoryNV", (__GLXextFuncPtr) glXAllocateMemoryNV },
- { "glXFreeMemoryNV", (__GLXextFuncPtr) glXFreeMemoryNV },
-
- /*** GLX_MESA_agp_offset ***/
- { "glXGetAGPOffsetMESA", (__GLXextFuncPtr) glXGetAGPOffsetMESA },
-
- /*** GLX_MESA_allocate_memory ***/
- { "glXAllocateMemoryMESA", (__GLXextFuncPtr) glXAllocateMemoryMESA },
- { "glXFreeMemoryMESA", (__GLXextFuncPtr) glXFreeMemoryMESA },
- { "glXGetMemoryOffsetMESA", (__GLXextFuncPtr) glXGetMemoryOffsetMESA },
-
/*** GLX_EXT_texture_from_pixmap ***/
{ "glXBindTexImageEXT", (__GLXextFuncPtr) glXBindTexImageEXT },
{ "glXReleaseTexImageEXT", (__GLXextFuncPtr) glXReleaseTexImageEXT },
diff --git a/src/gallium/winsys/xlib/glxapi.h b/src/gallium/state_trackers/glx/xlib/glxapi.h
index 37de81e55a..b4e12b4162 100644
--- a/src/gallium/winsys/xlib/glxapi.h
+++ b/src/gallium/state_trackers/glx/xlib/glxapi.h
@@ -184,19 +184,6 @@ struct _glxapi_table {
/*** GLX_MESA_pixmap_colormap ***/
GLXPixmap (*CreateGLXPixmapMESA)(Display *dpy, XVisualInfo *visinfo, Pixmap pixmap, Colormap cmap);
- /*** GLX_MESA_set_3dfx_mode ***/
- Bool (*Set3DfxModeMESA)(int mode);
-
- /*** GLX_NV_vertex_array_range ***/
- void * (*AllocateMemoryNV)( GLsizei size,
- GLfloat readFrequency,
- GLfloat writeFrequency,
- GLfloat priority );
- void (*FreeMemoryNV)( GLvoid *pointer );
-
- /*** GLX_MESA_agp_offset ***/
- GLuint (*GetAGPOffsetMESA)( const GLvoid *pointer );
-
/*** GLX_EXT_texture_from_pixmap ***/
void (*BindTexImageEXT)(Display *dpy, GLXDrawable drawable, int buffer,
const int *attrib_list);
@@ -209,8 +196,6 @@ extern const char *
_glxapi_get_version(void);
-extern const char **
-_glxapi_get_extensions(void);
extern GLuint
diff --git a/src/gallium/winsys/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c
index d28a6423b9..33dc044ad5 100644
--- a/src/gallium/winsys/xlib/xm_api.c
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.c
@@ -57,9 +57,7 @@
#undef __WIN32__
#endif
-#include "glxheader.h"
-#include "GL/xmesa.h"
-#include "xmesaP.h"
+#include "xm_api.h"
#include "main/context.h"
#include "main/framebuffer.h"
@@ -69,7 +67,20 @@
#include "pipe/p_screen.h"
#include "pipe/p_context.h"
-#include "xm_winsys_aub.h"
+#include "xm_winsys.h"
+#include <GL/glx.h>
+
+
+/* Driver interface routines, set up by xlib backend on library
+ * _init(). These are global in the same way that function names are
+ * global.
+ */
+static struct xm_driver driver;
+
+void xmesa_set_driver( const struct xm_driver *templ )
+{
+ driver = *templ;
+}
/**
* Global X driver lock
@@ -77,8 +88,6 @@
pipe_mutex _xmesa_lock;
-int xmesa_mode;
-
/**********************************************************************/
/***** X Utility Functions *****/
@@ -88,14 +97,12 @@ int xmesa_mode;
/**
* Return the host's byte order as LSBFirst or MSBFirst ala X.
*/
-#ifndef XFree86Server
static int host_byte_order( void )
{
int i = 1;
char *cptr = (char *) &i;
return (*cptr==1) ? LSBFirst : MSBFirst;
}
-#endif
/**
@@ -104,9 +111,9 @@ static int host_byte_order( void )
* 1 = shared XImage support available
* 2 = shared Pixmap support available also
*/
-int xmesa_check_for_xshm( XMesaDisplay *display )
+int xmesa_check_for_xshm( Display *display )
{
-#if defined(USE_XSHM) && !defined(XFree86Server)
+#if defined(USE_XSHM)
int major, minor, ignore;
Bool pixmaps;
@@ -146,19 +153,9 @@ int xmesa_check_for_xshm( XMesaDisplay *display )
static int
bits_per_pixel( XMesaVisual xmv )
{
-#ifdef XFree86Server
- const int depth = xmv->nplanes;
- int i;
- assert(depth > 0);
- for (i = 0; i < screenInfo.numPixmapFormats; i++) {
- if (screenInfo.formats[i].depth == depth)
- return screenInfo.formats[i].bitsPerPixel;
- }
- return depth; /* should never get here, but this should be safe */
-#else
- XMesaDisplay *dpy = xmv->display;
- XMesaVisualInfo visinfo = xmv->visinfo;
- XMesaImage *img;
+ Display *dpy = xmv->display;
+ XVisualInfo * visinfo = xmv->visinfo;
+ XImage *img;
int bitsPerPixel;
/* Create a temporary XImage */
img = XCreateImage( dpy, visinfo->visual, visinfo->depth,
@@ -174,9 +171,8 @@ bits_per_pixel( XMesaVisual xmv )
/* free the XImage */
_mesa_free( img->data );
img->data = NULL;
- XMesaDestroyImage( img );
+ XDestroyImage( img );
return bitsPerPixel;
-#endif
}
@@ -190,10 +186,9 @@ bits_per_pixel( XMesaVisual xmv )
* Return: GL_TRUE - window exists
* GL_FALSE - window doesn't exist
*/
-#ifndef XFree86Server
static GLboolean WindowExistsFlag;
-static int window_exists_err_handler( XMesaDisplay* dpy, XErrorEvent* xerr )
+static int window_exists_err_handler( Display* dpy, XErrorEvent* xerr )
{
(void) dpy;
if (xerr->error_code == BadWindow) {
@@ -202,10 +197,10 @@ static int window_exists_err_handler( XMesaDisplay* dpy, XErrorEvent* xerr )
return 0;
}
-static GLboolean window_exists( XMesaDisplay *dpy, Window win )
+static GLboolean window_exists( Display *dpy, Window win )
{
XWindowAttributes wa;
- int (*old_handler)( XMesaDisplay*, XErrorEvent* );
+ int (*old_handler)( Display*, XErrorEvent* );
WindowExistsFlag = GL_TRUE;
old_handler = XSetErrorHandler(window_exists_err_handler);
XGetWindowAttributes( dpy, win, &wa ); /* dummy request */
@@ -214,7 +209,7 @@ static GLboolean window_exists( XMesaDisplay *dpy, Window win )
}
static Status
-get_drawable_size( XMesaDisplay *dpy, Drawable d, uint *width, uint *height )
+get_drawable_size( Display *dpy, Drawable d, uint *width, uint *height )
{
Window root;
Status stat;
@@ -225,7 +220,6 @@ get_drawable_size( XMesaDisplay *dpy, Drawable d, uint *width, uint *height )
*height = h;
return stat;
}
-#endif
/**
@@ -235,13 +229,9 @@ get_drawable_size( XMesaDisplay *dpy, Drawable d, uint *width, uint *height )
* \param height returns height in pixels
*/
static void
-xmesa_get_window_size(XMesaDisplay *dpy, XMesaBuffer b,
+xmesa_get_window_size(Display *dpy, XMesaBuffer b,
GLuint *width, GLuint *height)
{
-#ifdef XFree86Server
- *width = MIN2(b->drawable->width, MAX_WIDTH);
- *height = MIN2(b->drawable->height, MAX_HEIGHT);
-#else
Status stat;
pipe_mutex_lock(_xmesa_lock);
@@ -254,9 +244,12 @@ xmesa_get_window_size(XMesaDisplay *dpy, XMesaBuffer b,
_mesa_warning(NULL, "XGetGeometry failed!\n");
*width = *height = 1;
}
-#endif
}
+#define GET_REDMASK(__v) __v->mesa_visual.redMask
+#define GET_GREENMASK(__v) __v->mesa_visual.greenMask
+#define GET_BLUEMASK(__v) __v->mesa_visual.blueMask
+
/**
* Choose the pixel format for the given visual.
@@ -266,11 +259,14 @@ xmesa_get_window_size(XMesaDisplay *dpy, XMesaBuffer b,
static GLuint
choose_pixel_format(XMesaVisual v)
{
+ boolean native_byte_order = (host_byte_order() ==
+ ImageByteOrder(v->display));
+
if ( GET_REDMASK(v) == 0x0000ff
&& GET_GREENMASK(v) == 0x00ff00
&& GET_BLUEMASK(v) == 0xff0000
&& v->BitsPerPixel == 32) {
- if (CHECK_BYTE_ORDER(v)) {
+ if (native_byte_order) {
/* no byteswapping needed */
return 0 /* PIXEL_FORMAT_U_A8_B8_G8_R8 */;
}
@@ -282,7 +278,7 @@ choose_pixel_format(XMesaVisual v)
&& GET_GREENMASK(v) == 0x00ff00
&& GET_BLUEMASK(v) == 0x0000ff
&& v->BitsPerPixel == 32) {
- if (CHECK_BYTE_ORDER(v)) {
+ if (native_byte_order) {
/* no byteswapping needed */
return PIPE_FORMAT_A8R8G8B8_UNORM;
}
@@ -293,7 +289,7 @@ choose_pixel_format(XMesaVisual v)
else if ( GET_REDMASK(v) == 0xf800
&& GET_GREENMASK(v) == 0x07e0
&& GET_BLUEMASK(v) == 0x001f
- && CHECK_BYTE_ORDER(v)
+ && native_byte_order
&& v->BitsPerPixel == 16) {
/* 5-6-5 RGB */
return PIPE_FORMAT_R5G6B5_UNORM;
@@ -324,8 +320,8 @@ XMesaBuffer XMesaBufferList = NULL;
* \return new XMesaBuffer or NULL if any problem
*/
static XMesaBuffer
-create_xmesa_buffer(XMesaDrawable d, BufferType type,
- XMesaVisual vis, XMesaColormap cmap)
+create_xmesa_buffer(Drawable d, BufferType type,
+ XMesaVisual vis, Colormap cmap)
{
XMesaBuffer b;
GLframebuffer *fb;
@@ -418,7 +414,7 @@ create_xmesa_buffer(XMesaDrawable d, BufferType type,
* the notThis buffer.
*/
XMesaBuffer
-xmesa_find_buffer(XMesaDisplay *dpy, XMesaColormap cmap, XMesaBuffer notThis)
+xmesa_find_buffer(Display *dpy, Colormap cmap, XMesaBuffer notThis)
{
XMesaBuffer b;
for (b = XMesaBufferList; b; b = b->Next) {
@@ -496,13 +492,9 @@ xmesa_free_buffer(XMesaBuffer buffer)
*/
static GLboolean
initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b,
- GLboolean rgb_flag, XMesaDrawable window,
- XMesaColormap cmap)
+ GLboolean rgb_flag, Drawable window,
+ Colormap cmap)
{
-#ifdef XFree86Server
- int client = (window) ? CLIENT_ID(window->id) : 0;
-#endif
-
ASSERT(!b || b->xm_visual == v);
/* Save true bits/pixel */
@@ -542,7 +534,7 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b,
if (_mesa_getenv("MESA_INFO")) {
_mesa_printf("X/Mesa visual = %p\n", (void *) v);
_mesa_printf("X/Mesa level = %d\n", v->mesa_visual.level);
- _mesa_printf("X/Mesa depth = %d\n", GET_VISUAL_DEPTH(v));
+ _mesa_printf("X/Mesa depth = %d\n", v->visinfo->depth);
_mesa_printf("X/Mesa bits per pixel = %d\n", v->BitsPerPixel);
}
@@ -557,12 +549,8 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b,
}
/* X11 graphics context */
-#ifdef XFree86Server
- b->gc = CreateScratchGC(v->display, window->depth);
-#else
b->gc = XCreateGC( v->display, window, 0, NULL );
-#endif
- XMesaSetFunction( v->display, b->gc, GXcopy );
+ XSetFunction( v->display, b->gc, GXcopy );
}
return GL_TRUE;
@@ -627,8 +615,8 @@ xmesa_convert_from_x_visual_type( int visualType )
* Return; a new XMesaVisual or 0 if error.
*/
PUBLIC
-XMesaVisual XMesaCreateVisual( XMesaDisplay *display,
- XMesaVisualInfo visinfo,
+XMesaVisual XMesaCreateVisual( Display *display,
+ XVisualInfo * visinfo,
GLboolean rgb_flag,
GLboolean alpha_flag,
GLboolean db_flag,
@@ -647,7 +635,6 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display,
XMesaVisual v;
GLint red_bits, green_bits, blue_bits, alpha_bits;
-#ifndef XFree86Server
/* For debugging only */
if (_mesa_getenv("MESA_XSYNC")) {
/* This makes debugging X easier.
@@ -656,7 +643,6 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display,
*/
XSynchronize( display, 1 );
}
-#endif
v = (XMesaVisual) CALLOC_STRUCT(xmesa_visual);
if (!v) {
@@ -669,41 +655,22 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display,
* the struct but we may need some of the information contained in it
* at a later time.
*/
-#ifndef XFree86Server
v->visinfo = (XVisualInfo *) MALLOC(sizeof(*visinfo));
if(!v->visinfo) {
_mesa_free(v);
return NULL;
}
MEMCPY(v->visinfo, visinfo, sizeof(*visinfo));
-#endif
v->ximage_flag = ximage_flag;
-#ifdef XFree86Server
- /* We could calculate these values by ourselves. nplanes is either the sum
- * of the red, green, and blue bits or the number index bits.
- * ColormapEntries is either (1U << index_bits) or
- * (1U << max(redBits, greenBits, blueBits)).
- */
- assert(visinfo->nplanes > 0);
- v->nplanes = visinfo->nplanes;
- v->ColormapEntries = visinfo->ColormapEntries;
-
- v->mesa_visual.redMask = visinfo->redMask;
- v->mesa_visual.greenMask = visinfo->greenMask;
- v->mesa_visual.blueMask = visinfo->blueMask;
- v->mesa_visual.visualID = visinfo->vid;
- v->mesa_visual.screen = 0; /* FIXME: What should be done here? */
-#else
v->mesa_visual.redMask = visinfo->red_mask;
v->mesa_visual.greenMask = visinfo->green_mask;
v->mesa_visual.blueMask = visinfo->blue_mask;
v->mesa_visual.visualID = visinfo->visualid;
v->mesa_visual.screen = visinfo->screen;
-#endif
-#if defined(XFree86Server) || !(defined(__cplusplus) || defined(c_plusplus))
+#if !(defined(__cplusplus) || defined(c_plusplus))
v->mesa_visual.visualType = xmesa_convert_from_x_visual_type(visinfo->class);
#else
v->mesa_visual.visualType = xmesa_convert_from_x_visual_type(visinfo->c_class);
@@ -726,14 +693,14 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display,
else {
/* this is an approximation */
int depth;
- depth = GET_VISUAL_DEPTH(v);
+ depth = v->visinfo->depth;
red_bits = depth / 3;
depth -= red_bits;
green_bits = depth / 2;
depth -= green_bits;
blue_bits = depth;
alpha_bits = 0;
- assert( red_bits + green_bits + blue_bits == GET_VISUAL_DEPTH(v) );
+ assert( red_bits + green_bits + blue_bits == v->visinfo->depth );
}
alpha_bits = v->mesa_visual.alphaBits;
}
@@ -758,9 +725,7 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display,
PUBLIC
void XMesaDestroyVisual( XMesaVisual v )
{
-#ifndef XFree86Server
_mesa_free(v->visinfo);
-#endif
_mesa_free(v);
}
@@ -777,6 +742,7 @@ PUBLIC
XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
{
static GLboolean firstTime = GL_TRUE;
+ struct pipe_screen *screen;
struct pipe_context *pipe;
XMesaContext c;
GLcontext *mesaCtx;
@@ -797,24 +763,24 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
c->xm_visual = v;
c->xm_buffer = NULL; /* set later by XMesaMakeCurrent */
+
+ /* XXX: create once per Xlib Display.
+ */
+ screen = driver.create_pipe_screen();
+ if (screen == NULL)
+ goto fail;
- if (!getenv("XM_AUB")) {
- xmesa_mode = XMESA_SOFTPIPE;
- pipe = xmesa_create_pipe_context( c, pf );
- }
- else {
- xmesa_mode = XMESA_AUB;
- pipe = xmesa_create_i965simple(xmesa_get_pipe_winsys_aub(v));
- }
-
+ pipe = driver.create_pipe_context( screen,
+ (void *)c );
if (pipe == NULL)
goto fail;
- c->st = st_create_context(pipe, &v->mesa_visual,
+ c->st = st_create_context(pipe,
+ &v->mesa_visual,
share_list ? share_list->st : NULL);
if (c->st == NULL)
goto fail;
-
+
mesaCtx = c->st->ctx;
c->st->ctx->DriverCtx = c;
@@ -826,13 +792,6 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
_mesa_enable_2_0_extensions(mesaCtx);
#endif
-#ifdef XFree86Server
- /* If we're running in the X server, do bounds checking to prevent
- * segfaults and server crashes!
- */
- mesaCtx->Const.CheckArrayBounds = GL_TRUE;
-#endif
-
return c;
fail:
@@ -840,6 +799,10 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
st_destroy_context(c->st);
else if (pipe)
pipe->destroy(pipe);
+
+ if (screen)
+ screen->destroy( screen );
+
FREE(c);
return NULL;
}
@@ -849,12 +812,14 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
PUBLIC
void XMesaDestroyContext( XMesaContext c )
{
- struct pipe_screen *screen = c->st->pipe->screen;
st_destroy_context(c->st);
+
/* FIXME: We should destroy the screen here, but if we do so, surfaces may
* outlive it, causing segfaults
+ struct pipe_screen *screen = c->st->pipe->screen;
screen->destroy(screen);
*/
+
_mesa_free(c);
}
@@ -868,35 +833,26 @@ void XMesaDestroyContext( XMesaContext c )
* \return new XMesaBuffer or NULL if error
*/
PUBLIC XMesaBuffer
-XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w)
+XMesaCreateWindowBuffer(XMesaVisual v, Window w)
{
-#ifndef XFree86Server
XWindowAttributes attr;
-#endif
XMesaBuffer b;
- XMesaColormap cmap;
+ Colormap cmap;
int depth;
assert(v);
assert(w);
/* Check that window depth matches visual depth */
-#ifdef XFree86Server
- depth = ((XMesaDrawable)w)->depth;
-#else
XGetWindowAttributes( v->display, w, &attr );
depth = attr.depth;
-#endif
- if (GET_VISUAL_DEPTH(v) != depth) {
+ if (v->visinfo->depth != depth) {
_mesa_warning(NULL, "XMesaCreateWindowBuffer: depth mismatch between visual (%d) and window (%d)!\n",
- GET_VISUAL_DEPTH(v), depth);
+ v->visinfo->depth, depth);
return NULL;
}
/* Find colormap */
-#ifdef XFree86Server
- cmap = (ColormapPtr)LookupIDByType(wColormap(w), RT_COLORMAP);
-#else
if (attr.colormap) {
cmap = attr.colormap;
}
@@ -906,14 +862,13 @@ XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w)
/* OK, let's just allocate a new one and hope for the best */
cmap = XCreateColormap(v->display, w, attr.visual, AllocNone);
}
-#endif
- b = create_xmesa_buffer((XMesaDrawable) w, WINDOW, v, cmap);
+ b = create_xmesa_buffer((Drawable) w, WINDOW, v, cmap);
if (!b)
return NULL;
if (!initialize_visual_and_buffer( v, b, v->mesa_visual.rgbMode,
- (XMesaDrawable) w, cmap )) {
+ (Drawable) w, cmap )) {
xmesa_free_buffer(b);
return NULL;
}
@@ -933,18 +888,18 @@ XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w)
* \returns new XMesaBuffer or NULL if error
*/
PUBLIC XMesaBuffer
-XMesaCreatePixmapBuffer(XMesaVisual v, XMesaPixmap p, XMesaColormap cmap)
+XMesaCreatePixmapBuffer(XMesaVisual v, Pixmap p, Colormap cmap)
{
XMesaBuffer b;
assert(v);
- b = create_xmesa_buffer((XMesaDrawable) p, PIXMAP, v, cmap);
+ b = create_xmesa_buffer((Drawable) p, PIXMAP, v, cmap);
if (!b)
return NULL;
if (!initialize_visual_and_buffer(v, b, v->mesa_visual.rgbMode,
- (XMesaDrawable) p, cmap)) {
+ (Drawable) p, cmap)) {
xmesa_free_buffer(b);
return NULL;
}
@@ -957,8 +912,8 @@ XMesaCreatePixmapBuffer(XMesaVisual v, XMesaPixmap p, XMesaColormap cmap)
* For GLX_EXT_texture_from_pixmap
*/
XMesaBuffer
-XMesaCreatePixmapTextureBuffer(XMesaVisual v, XMesaPixmap p,
- XMesaColormap cmap,
+XMesaCreatePixmapTextureBuffer(XMesaVisual v, Pixmap p,
+ Colormap cmap,
int format, int target, int mipmap)
{
GET_CURRENT_CONTEXT(ctx);
@@ -967,7 +922,7 @@ XMesaCreatePixmapTextureBuffer(XMesaVisual v, XMesaPixmap p,
assert(v);
- b = create_xmesa_buffer((XMesaDrawable) p, PIXMAP, v, cmap);
+ b = create_xmesa_buffer((Drawable) p, PIXMAP, v, cmap);
if (!b)
return NULL;
@@ -1005,7 +960,7 @@ XMesaCreatePixmapTextureBuffer(XMesaVisual v, XMesaPixmap p,
b->TextureMipmap = mipmap;
if (!initialize_visual_and_buffer(v, b, v->mesa_visual.rgbMode,
- (XMesaDrawable) p, cmap)) {
+ (Drawable) p, cmap)) {
xmesa_free_buffer(b);
return NULL;
}
@@ -1016,12 +971,11 @@ XMesaCreatePixmapTextureBuffer(XMesaVisual v, XMesaPixmap p,
XMesaBuffer
-XMesaCreatePBuffer(XMesaVisual v, XMesaColormap cmap,
+XMesaCreatePBuffer(XMesaVisual v, Colormap cmap,
unsigned int width, unsigned int height)
{
-#ifndef XFree86Server
- XMesaWindow root;
- XMesaDrawable drawable; /* X Pixmap Drawable */
+ Window root;
+ Drawable drawable; /* X Pixmap Drawable */
XMesaBuffer b;
/* allocate pixmap for front buffer */
@@ -1042,9 +996,6 @@ XMesaCreatePBuffer(XMesaVisual v, XMesaColormap cmap,
}
return b;
-#else
- return 0;
-#endif
}
@@ -1064,8 +1015,7 @@ XMesaDestroyBuffer(XMesaBuffer b)
* and all attached renderbuffers.
* Called when:
* 1. the first time a buffer is bound to a context.
- * 2. from the XMesaResizeBuffers() API function.
- * 3. SwapBuffers. XXX probabaly from xm_flush_frontbuffer() too...
+ * 2. SwapBuffers. XXX probabaly from xm_flush_frontbuffer() too...
* Note: it's possible (and legal) for xmctx to be NULL. That can happen
* when resizing a buffer when no rendering context is bound.
*/
@@ -1078,13 +1028,6 @@ xmesa_check_and_update_buffer_size(XMesaContext xmctx, XMesaBuffer drawBuffer)
}
-/*
- * Bind buffer b to context c and make c the current rendering context.
- */
-GLboolean XMesaMakeCurrent( XMesaContext c, XMesaBuffer b )
-{
- return XMesaMakeCurrent2( c, b, b );
-}
/*
@@ -1156,76 +1099,8 @@ XMesaContext XMesaGetCurrentContext( void )
}
-XMesaBuffer XMesaGetCurrentBuffer( void )
-{
- GET_CURRENT_CONTEXT(ctx);
- if (ctx) {
- XMesaBuffer xmbuf = xmesa_buffer(ctx->DrawBuffer);
- return xmbuf;
- }
- else {
- return 0;
- }
-}
-
-
-/* New in Mesa 3.1 */
-XMesaBuffer XMesaGetCurrentReadBuffer( void )
-{
- GET_CURRENT_CONTEXT(ctx);
- if (ctx) {
- return xmesa_buffer(ctx->ReadBuffer);
- }
- else {
- return 0;
- }
-}
-
-
-#ifdef XFree86Server
-PUBLIC
-GLboolean XMesaForceCurrent(XMesaContext c)
-{
- if (c) {
- _glapi_set_dispatch(c->mesa.CurrentDispatch);
-
- if (&(c->mesa) != _mesa_get_current_context()) {
- _mesa_make_current(&c->mesa, c->mesa.DrawBuffer, c->mesa.ReadBuffer);
- }
- }
- else {
- _mesa_make_current(NULL, NULL, NULL);
- }
- return GL_TRUE;
-}
-
-
-PUBLIC
-GLboolean XMesaLoseCurrent(XMesaContext c)
-{
- (void) c;
- _mesa_make_current(NULL, NULL, NULL);
- return GL_TRUE;
-}
-PUBLIC
-GLboolean XMesaCopyContext( XMesaContext xm_src, XMesaContext xm_dst, GLuint mask )
-{
- _mesa_copy_context(&xm_src->mesa, &xm_dst->mesa, mask);
- return GL_TRUE;
-}
-#endif /* XFree86Server */
-
-
-#ifndef FX
-GLboolean XMesaSetFXmode( GLint mode )
-{
- (void) mode;
- return GL_FALSE;
-}
-#endif
-
/*
@@ -1242,12 +1117,9 @@ void XMesaSwapBuffers( XMesaBuffer b )
*/
st_notify_swapbuffers(b->stfb);
- surf = st_get_framebuffer_surface(b->stfb, ST_SURFACE_BACK_LEFT);
+ st_get_framebuffer_surface(b->stfb, ST_SURFACE_BACK_LEFT, &surf);
if (surf) {
- if (xmesa_mode == XMESA_AUB)
- xmesa_display_aub( surf );
- else
- xmesa_display_surface(b, surf);
+ driver.display_surface(b, surf);
}
xmesa_check_and_update_buffer_size(NULL, b);
@@ -1260,12 +1132,13 @@ void XMesaSwapBuffers( XMesaBuffer b )
*/
void XMesaCopySubBuffer( XMesaBuffer b, int x, int y, int width, int height )
{
- struct pipe_surface *surf_front
- = st_get_framebuffer_surface(b->stfb, ST_SURFACE_FRONT_LEFT);
- struct pipe_surface *surf_back
- = st_get_framebuffer_surface(b->stfb, ST_SURFACE_BACK_LEFT);
+ struct pipe_surface *surf_front;
+ struct pipe_surface *surf_back;
struct pipe_context *pipe = NULL; /* XXX fix */
+ st_get_framebuffer_surface(b->stfb, ST_SURFACE_FRONT_LEFT, &surf_front);
+ st_get_framebuffer_surface(b->stfb, ST_SURFACE_BACK_LEFT, &surf_back);
+
if (!surf_front || !surf_back)
return;
@@ -1278,56 +1151,19 @@ void XMesaCopySubBuffer( XMesaBuffer b, int x, int y, int width, int height )
-/*
- * Return the depth buffer associated with an XMesaBuffer.
- * Input: b - the XMesa buffer handle
- * Output: width, height - size of buffer in pixels
- * bytesPerValue - bytes per depth value (2 or 4)
- * buffer - pointer to depth buffer values
- * Return: GL_TRUE or GL_FALSE to indicate success or failure.
- */
-GLboolean XMesaGetDepthBuffer( XMesaBuffer b, GLint *width, GLint *height,
- GLint *bytesPerValue, void **buffer )
-{
- *width = 0;
- *height = 0;
- *bytesPerValue = 0;
- *buffer = 0;
- return GL_FALSE;
-}
-
-
void XMesaFlush( XMesaContext c )
{
if (c && c->xm_visual->display) {
-#ifdef XFree86Server
- /* NOT_NEEDED */
-#else
st_finish(c->st);
XSync( c->xm_visual->display, False );
-#endif
}
}
-const char *XMesaGetString( XMesaContext c, int name )
-{
- (void) c;
- if (name==XMESA_VERSION) {
- return "5.0";
- }
- else if (name==XMESA_EXTENSIONS) {
- return "";
- }
- else {
- return NULL;
- }
-}
-
-XMesaBuffer XMesaFindBuffer( XMesaDisplay *dpy, XMesaDrawable d )
+XMesaBuffer XMesaFindBuffer( Display *dpy, Drawable d )
{
XMesaBuffer b;
for (b=XMesaBufferList; b; b=b->Next) {
@@ -1342,7 +1178,7 @@ XMesaBuffer XMesaFindBuffer( XMesaDisplay *dpy, XMesaDrawable d )
/**
* Free/destroy all XMesaBuffers associated with given display.
*/
-void xmesa_destroy_buffers_on_display(XMesaDisplay *dpy)
+void xmesa_destroy_buffers_on_display(Display *dpy)
{
XMesaBuffer b, next;
for (b = XMesaBufferList; b; b = next) {
@@ -1367,48 +1203,20 @@ void XMesaGarbageCollect( void )
b->xm_visual->display &&
b->drawable &&
b->type == WINDOW) {
-#ifdef XFree86Server
- /* NOT_NEEDED */
-#else
XSync(b->xm_visual->display, False);
if (!window_exists( b->xm_visual->display, b->drawable )) {
/* found a dead window, free the ancillary info */
XMesaDestroyBuffer( b );
}
-#endif
}
}
}
-unsigned long XMesaDitherColor( XMesaContext xmesa, GLint x, GLint y,
- GLfloat red, GLfloat green,
- GLfloat blue, GLfloat alpha )
-{
- /* no longer supported */
- return 0;
-}
-
-
-/*
- * This is typically called when the window size changes and we need
- * to reallocate the buffer's back/depth/stencil/accum buffers.
- */
-PUBLIC void
-XMesaResizeBuffers( XMesaBuffer b )
-{
- GET_CURRENT_CONTEXT(ctx);
- XMesaContext xmctx = xmesa_context(ctx);
- if (!xmctx)
- return;
- xmesa_check_and_update_buffer_size(xmctx, b);
-}
-
-
PUBLIC void
-XMesaBindTexImage(XMesaDisplay *dpy, XMesaBuffer drawable, int buffer,
+XMesaBindTexImage(Display *dpy, XMesaBuffer drawable, int buffer,
const int *attrib_list)
{
}
@@ -1416,7 +1224,7 @@ XMesaBindTexImage(XMesaDisplay *dpy, XMesaBuffer drawable, int buffer,
PUBLIC void
-XMesaReleaseTexImage(XMesaDisplay *dpy, XMesaBuffer drawable, int buffer)
+XMesaReleaseTexImage(Display *dpy, XMesaBuffer drawable, int buffer)
{
}
diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.h b/src/gallium/state_trackers/glx/xlib/xm_api.h
new file mode 100644
index 0000000000..2b8302d174
--- /dev/null
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.h
@@ -0,0 +1,393 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.1
+ *
+ * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+
+/* Sample Usage:
+
+In addition to the usual X calls to select a visual, create a colormap
+and create a window, you must do the following to use the X/Mesa interface:
+
+1. Call XMesaCreateVisual() to make an XMesaVisual from an XVisualInfo.
+
+2. Call XMesaCreateContext() to create an X/Mesa rendering context, given
+ the XMesaVisual.
+
+3. Call XMesaCreateWindowBuffer() to create an XMesaBuffer from an X window
+ and XMesaVisual.
+
+4. Call XMesaMakeCurrent() to bind the XMesaBuffer to an XMesaContext and
+ to make the context the current one.
+
+5. Make gl* calls to render your graphics.
+
+6. Use XMesaSwapBuffers() when double buffering to swap front/back buffers.
+
+7. Before the X window is destroyed, call XMesaDestroyBuffer().
+
+8. Before exiting, call XMesaDestroyVisual and XMesaDestroyContext.
+
+*/
+
+
+
+
+#ifndef XMESA_H
+#define XMESA_H
+
+
+#include "mtypes.h"
+#include "state_tracker/st_context.h"
+#include "state_tracker/st_public.h"
+#include "pipe/p_thread.h"
+
+
+# include <X11/Xlib.h>
+# include <X11/Xlibint.h>
+# include <X11/Xutil.h>
+# ifdef USE_XSHM /* was SHM */
+# include <sys/ipc.h>
+# include <sys/shm.h>
+# include <X11/extensions/XShm.h>
+# endif
+
+typedef struct xmesa_buffer *XMesaBuffer;
+typedef struct xmesa_context *XMesaContext;
+typedef struct xmesa_visual *XMesaVisual;
+
+
+
+/*
+ * Create a new X/Mesa visual.
+ * Input: display - X11 display
+ * visinfo - an XVisualInfo pointer
+ * rgb_flag - GL_TRUE = RGB mode,
+ * GL_FALSE = color index mode
+ * alpha_flag - alpha buffer requested?
+ * db_flag - GL_TRUE = double-buffered,
+ * GL_FALSE = single buffered
+ * stereo_flag - stereo visual?
+ * ximage_flag - GL_TRUE = use an XImage for back buffer,
+ * GL_FALSE = use an off-screen pixmap for back buffer
+ * depth_size - requested bits/depth values, or zero
+ * stencil_size - requested bits/stencil values, or zero
+ * accum_red_size - requested bits/red accum values, or zero
+ * accum_green_size - requested bits/green accum values, or zero
+ * accum_blue_size - requested bits/blue accum values, or zero
+ * accum_alpha_size - requested bits/alpha accum values, or zero
+ * num_samples - number of samples/pixel if multisampling, or zero
+ * level - visual level, usually 0
+ * visualCaveat - ala the GLX extension, usually GLX_NONE_EXT
+ * Return; a new XMesaVisual or 0 if error.
+ */
+extern XMesaVisual XMesaCreateVisual( Display *display,
+ XVisualInfo * visinfo,
+ GLboolean rgb_flag,
+ GLboolean alpha_flag,
+ GLboolean db_flag,
+ GLboolean stereo_flag,
+ GLboolean ximage_flag,
+ GLint depth_size,
+ GLint stencil_size,
+ GLint accum_red_size,
+ GLint accum_green_size,
+ GLint accum_blue_size,
+ GLint accum_alpha_size,
+ GLint num_samples,
+ GLint level,
+ GLint visualCaveat );
+
+/*
+ * Destroy an XMesaVisual, but not the associated XVisualInfo.
+ */
+extern void XMesaDestroyVisual( XMesaVisual v );
+
+
+
+/*
+ * Create a new XMesaContext for rendering into an X11 window.
+ *
+ * Input: visual - an XMesaVisual
+ * share_list - another XMesaContext with which to share display
+ * lists or NULL if no sharing is wanted.
+ * Return: an XMesaContext or NULL if error.
+ */
+extern XMesaContext XMesaCreateContext( XMesaVisual v,
+ XMesaContext share_list );
+
+
+/*
+ * Destroy a rendering context as returned by XMesaCreateContext()
+ */
+extern void XMesaDestroyContext( XMesaContext c );
+
+
+
+/*
+ * Create an XMesaBuffer from an X window.
+ */
+extern XMesaBuffer XMesaCreateWindowBuffer( XMesaVisual v, Window w );
+
+
+/*
+ * Create an XMesaBuffer from an X pixmap.
+ */
+extern XMesaBuffer XMesaCreatePixmapBuffer( XMesaVisual v,
+ Pixmap p,
+ Colormap cmap );
+
+
+/*
+ * Destroy an XMesaBuffer, but not the corresponding window or pixmap.
+ */
+extern void XMesaDestroyBuffer( XMesaBuffer b );
+
+
+/*
+ * Return the XMesaBuffer handle which corresponds to an X drawable, if any.
+ *
+ * New in Mesa 2.3.
+ */
+extern XMesaBuffer XMesaFindBuffer( Display *dpy,
+ Drawable d );
+
+
+
+/*
+ * Bind two buffers (read and draw) to a context and make the
+ * context the current one.
+ * New in Mesa 3.3
+ */
+extern GLboolean XMesaMakeCurrent2( XMesaContext c,
+ XMesaBuffer drawBuffer,
+ XMesaBuffer readBuffer );
+
+
+/*
+ * Unbind the current context from its buffer.
+ */
+extern GLboolean XMesaUnbindContext( XMesaContext c );
+
+
+/*
+ * Return a handle to the current context.
+ */
+extern XMesaContext XMesaGetCurrentContext( void );
+
+
+/*
+ * Swap the front and back buffers for the given buffer. No action is
+ * taken if the buffer is not double buffered.
+ */
+extern void XMesaSwapBuffers( XMesaBuffer b );
+
+
+/*
+ * Copy a sub-region of the back buffer to the front buffer.
+ *
+ * New in Mesa 2.6
+ */
+extern void XMesaCopySubBuffer( XMesaBuffer b,
+ int x,
+ int y,
+ int width,
+ int height );
+
+
+
+
+
+/*
+ * Flush/sync a context
+ */
+extern void XMesaFlush( XMesaContext c );
+
+
+
+/*
+ * Scan for XMesaBuffers whose window/pixmap has been destroyed, then free
+ * any memory used by that buffer.
+ *
+ * New in Mesa 2.3.
+ */
+extern void XMesaGarbageCollect( void );
+
+
+
+/*
+ * Create a pbuffer.
+ * New in Mesa 4.1
+ */
+extern XMesaBuffer XMesaCreatePBuffer(XMesaVisual v, Colormap cmap,
+ unsigned int width, unsigned int height);
+
+
+
+/*
+ * Texture from Pixmap
+ * New in Mesa 7.1
+ */
+extern void
+XMesaBindTexImage(Display *dpy, XMesaBuffer drawable, int buffer,
+ const int *attrib_list);
+
+extern void
+XMesaReleaseTexImage(Display *dpy, XMesaBuffer drawable, int buffer);
+
+
+extern XMesaBuffer
+XMesaCreatePixmapTextureBuffer(XMesaVisual v, Pixmap p,
+ Colormap cmap,
+ int format, int target, int mipmap);
+
+
+
+
+/***********************************************************************
+ */
+
+extern pipe_mutex _xmesa_lock;
+
+extern struct xmesa_buffer *XMesaBufferList;
+
+
+/**
+ * Visual inforation, derived from GLvisual.
+ * Basically corresponds to an XVisualInfo.
+ */
+struct xmesa_visual {
+ GLvisual mesa_visual; /* Device independent visual parameters */
+ Display *display; /* The X11 display */
+ XVisualInfo * visinfo; /* X's visual info (pointer to private copy) */
+ XVisualInfo *vishandle; /* Only used in fakeglx.c */
+ GLint BitsPerPixel; /* True bits per pixel for XImages */
+
+ GLboolean ximage_flag; /* Use XImage for back buffer (not pixmap)? */
+};
+
+
+/**
+ * Context info, derived from st_context.
+ * Basically corresponds to a GLXContext.
+ */
+struct xmesa_context {
+ struct st_context *st;
+ XMesaVisual xm_visual; /** pixel format info */
+ XMesaBuffer xm_buffer; /** current drawbuffer */
+};
+
+
+/**
+ * Types of X/GLX drawables we might render into.
+ */
+typedef enum {
+ WINDOW, /* An X window */
+ GLXWINDOW, /* GLX window */
+ PIXMAP, /* GLX pixmap */
+ PBUFFER /* GLX Pbuffer */
+} BufferType;
+
+
+/**
+ * Framebuffer information, derived from.
+ * Basically corresponds to a GLXDrawable.
+ */
+struct xmesa_buffer {
+ struct st_framebuffer *stfb;
+
+ GLboolean wasCurrent; /* was ever the current buffer? */
+ XMesaVisual xm_visual; /* the X/Mesa visual */
+ Drawable drawable; /* Usually the X window ID */
+ Colormap cmap; /* the X colormap */
+ BufferType type; /* window, pixmap, pbuffer or glxwindow */
+
+ XImage *tempImage;
+ unsigned long selectedEvents;/* for pbuffers only */
+
+ GLuint shm; /* X Shared Memory extension status: */
+ /* 0 = not available */
+ /* 1 = XImage support available */
+ /* 2 = Pixmap support available too */
+#if defined(USE_XSHM)
+ XShmSegmentInfo shminfo;
+#endif
+
+ GC gc; /* scratch GC for span, line, tri drawing */
+
+ /* GLX_EXT_texture_from_pixmap */
+ GLint TextureTarget; /** GLX_TEXTURE_1D_EXT, for example */
+ GLint TextureFormat; /** GLX_TEXTURE_FORMAT_RGB_EXT, for example */
+ GLint TextureMipmap; /** 0 or 1 */
+
+ struct xmesa_buffer *Next; /* Linked list pointer: */
+};
+
+
+
+/** cast wrapper */
+static INLINE XMesaContext
+xmesa_context(GLcontext *ctx)
+{
+ return (XMesaContext) ctx->DriverCtx;
+}
+
+
+/** cast wrapper */
+static INLINE XMesaBuffer
+xmesa_buffer(GLframebuffer *fb)
+{
+ struct st_framebuffer *stfb = (struct st_framebuffer *) fb;
+ return (XMesaBuffer) st_framebuffer_private(stfb);
+}
+
+
+extern void
+xmesa_delete_framebuffer(struct gl_framebuffer *fb);
+
+extern XMesaBuffer
+xmesa_find_buffer(Display *dpy, Colormap cmap, XMesaBuffer notThis);
+
+extern void
+xmesa_check_and_update_buffer_size(XMesaContext xmctx, XMesaBuffer drawBuffer);
+
+extern void
+xmesa_destroy_buffers_on_display(Display *dpy);
+
+static INLINE GLuint
+xmesa_buffer_width(XMesaBuffer b)
+{
+ return b->stfb->Base.Width;
+}
+
+static INLINE GLuint
+xmesa_buffer_height(XMesaBuffer b)
+{
+ return b->stfb->Base.Height;
+}
+
+extern int
+xmesa_check_for_xshm(Display *display);
+
+
+#endif
diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.h b/src/gallium/state_trackers/glx/xlib/xm_winsys.h
index cc2a755277..0e57605c34 100644
--- a/src/gallium/winsys/xlib/xm_winsys_aub.h
+++ b/src/gallium/state_trackers/glx/xlib/xm_winsys.h
@@ -1,3 +1,4 @@
+
/**************************************************************************
*
* Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
@@ -25,44 +26,34 @@
*
**************************************************************************/
-#ifndef AUB_WINSYS_H
-#define AUB_WINSYS_H
+#ifndef XM_WINSYS_H
+#define XM_WINSYS_H
struct pipe_context;
-struct pipe_winsys;
-struct pipe_buffer;
+struct pipe_screen;
struct pipe_surface;
-
-struct pipe_winsys *
-xmesa_create_pipe_winsys_aub( void );
-
-void
-xmesa_destroy_pipe_winsys_aub( struct pipe_winsys *winsys );
-
+struct xmesa_buffer;
-struct pipe_context *
-xmesa_create_i965simple( struct pipe_winsys *winsys );
+struct xm_driver {
+ struct pipe_screen *(*create_pipe_screen)( void );
+ /* The context_private argument needs to go away. Is currently used
+ * in a round-about way to associate a display-target surface with its
+ * Xlib window.
+ */
+ struct pipe_context *(*create_pipe_context)( struct pipe_screen *,
+ void *context_private );
-void xmesa_buffer_subdata_aub(struct pipe_winsys *winsys,
- struct pipe_buffer *buf,
- unsigned long offset,
- unsigned long size,
- const void *data,
- unsigned aub_type,
- unsigned aub_sub_type);
+ void (*display_surface)( struct xmesa_buffer *,
+ struct pipe_surface * );
-void xmesa_commands_aub(struct pipe_winsys *winsys,
- unsigned *cmds,
- unsigned nr_dwords);
+};
-void xmesa_display_aub( /* struct pipe_winsys *winsys, */
- struct pipe_surface *surface );
+extern void
+xmesa_set_driver( const struct xm_driver *driver );
-extern struct pipe_winsys *
-xmesa_get_pipe_winsys_aub(struct xmesa_visual *xm_vis);
#endif
diff --git a/src/gallium/state_trackers/python/p_format.i b/src/gallium/state_trackers/python/p_format.i
index 51ad4bebcd..26fb12b387 100644
--- a/src/gallium/state_trackers/python/p_format.i
+++ b/src/gallium/state_trackers/python/p_format.i
@@ -1,6 +1,7 @@
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (c) 2008 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -128,10 +129,14 @@ enum pipe_format {
PIPE_FORMAT_R32G32B32A32_FIXED,
PIPE_FORMAT_L8_SRGB,
- PIPE_FORMAT_A8_L8_SRGB,
+ PIPE_FORMAT_A8L8_SRGB,
PIPE_FORMAT_R8G8B8_SRGB,
PIPE_FORMAT_R8G8B8A8_SRGB,
PIPE_FORMAT_R8G8B8X8_SRGB,
+ PIPE_FORMAT_A8R8G8B8_SRGB,
+ PIPE_FORMAT_X8R8G8B8_SRGB,
+ PIPE_FORMAT_B8G8R8A8_SRGB,
+ PIPE_FORMAT_B8G8R8X8_SRGB,
PIPE_FORMAT_X8UB8UG8SR8S_NORM,
PIPE_FORMAT_B6UG5SR5S_NORM,
@@ -140,6 +145,11 @@ enum pipe_format {
PIPE_FORMAT_DXT1_RGBA,
PIPE_FORMAT_DXT3_RGBA,
PIPE_FORMAT_DXT5_RGBA,
+
+ PIPE_FORMAT_DXT1_SRGB,
+ PIPE_FORMAT_DXT1_SRGBA,
+ PIPE_FORMAT_DXT3_SRGBA,
+ PIPE_FORMAT_DXT5_SRGBA,
};
diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c
index 95c1378a03..20dd8d269d 100644
--- a/src/gallium/state_trackers/python/st_device.c
+++ b/src/gallium/state_trackers/python/st_device.c
@@ -26,7 +26,7 @@
**************************************************************************/
-#include "pipe/p_winsys.h"
+#include "pipe/p_screen.h"
#include "pipe/p_context.h"
#include "pipe/p_shader_tokens.h"
#include "pipe/p_inlines.h"
diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c
index f62113a469..4d798df99b 100644
--- a/src/gallium/state_trackers/python/st_softpipe_winsys.c
+++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c
@@ -36,7 +36,7 @@
*/
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"/* port to just p_screen */
#include "pipe/p_format.h"
#include "pipe/p_context.h"
#include "pipe/p_inlines.h"
@@ -168,63 +168,25 @@ round_up(unsigned n, unsigned multiple)
}
-static int
-st_softpipe_surface_alloc_storage(struct pipe_winsys *winsys,
- struct pipe_surface *surf,
+static struct pipe_buffer *
+st_softpipe_surface_buffer_create(struct pipe_winsys *winsys,
unsigned width, unsigned height,
- enum pipe_format format,
- unsigned flags,
- unsigned tex_usage)
+ enum pipe_format format,
+ unsigned usage,
+ unsigned *stride)
{
const unsigned alignment = 64;
+ struct pipe_format_block block;
+ unsigned nblocksx, nblocksy;
- surf->width = width;
- surf->height = height;
- surf->format = format;
- pf_get_block(format, &surf->block);
- surf->nblocksx = pf_get_nblocksx(&surf->block, width);
- surf->nblocksy = pf_get_nblocksy(&surf->block, height);
- surf->stride = round_up(surf->nblocksx * surf->block.size, alignment);
- surf->usage = flags;
-
- assert(!surf->buffer);
- surf->buffer = winsys->buffer_create(winsys, alignment,
- PIPE_BUFFER_USAGE_PIXEL,
- surf->stride * surf->nblocksy);
- if(!surf->buffer)
- return -1;
-
- return 0;
-}
-
-
-static struct pipe_surface *
-st_softpipe_surface_alloc(struct pipe_winsys *winsys)
-{
- struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface);
-
- assert(winsys);
-
- surface->refcount = 1;
- surface->winsys = winsys;
-
- return surface;
-}
-
+ pf_get_block(format, &block);
+ nblocksx = pf_get_nblocksx(&block, width);
+ nblocksy = pf_get_nblocksy(&block, height);
+ *stride = round_up(nblocksx * block.size, alignment);
-static void
-st_softpipe_surface_release(struct pipe_winsys *winsys,
- struct pipe_surface **s)
-{
- struct pipe_surface *surf = *s;
- assert(!surf->texture);
- surf->refcount--;
- if (surf->refcount == 0) {
- if (surf->buffer)
- winsys_buffer_reference(winsys, &surf->buffer, NULL);
- free(surf);
- }
- *s = NULL;
+ return winsys->buffer_create(winsys, alignment,
+ usage,
+ *stride * nblocksy);
}
@@ -279,9 +241,7 @@ st_softpipe_screen_create(void)
winsys->buffer_unmap = st_softpipe_buffer_unmap;
winsys->buffer_destroy = st_softpipe_buffer_destroy;
- winsys->surface_alloc = st_softpipe_surface_alloc;
- winsys->surface_alloc_storage = st_softpipe_surface_alloc_storage;
- winsys->surface_release = st_softpipe_surface_release;
+ winsys->surface_buffer_create = st_softpipe_surface_buffer_create;
winsys->fence_reference = st_softpipe_fence_reference;
winsys->fence_signalled = st_softpipe_fence_signalled;
diff --git a/src/gallium/state_trackers/wgl/SConscript b/src/gallium/state_trackers/wgl/SConscript
new file mode 100644
index 0000000000..2141b02d68
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/SConscript
@@ -0,0 +1,40 @@
+import os
+
+Import('*')
+
+if env['platform'] in ['windows']:
+
+ env = env.Clone()
+
+ env.Append(CPPPATH = [
+ '#src/mesa',
+ '.',
+ ])
+
+ env.Append(CPPDEFINES = [
+ '_GDI32_', # prevent wgl* being declared __declspec(dllimport)
+ 'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers
+ 'WIN32_THREADS', # use Win32 thread API
+ ])
+
+ sources = [
+ 'icd/stw_icd.c',
+
+ 'wgl/stw_wgl.c',
+
+ 'shared/stw_context.c',
+ 'shared/stw_device.c',
+ 'shared/stw_framebuffer.c',
+ 'shared/stw_pixelformat.c',
+ 'shared/stw_quirks.c',
+ 'shared/stw_arbextensionsstring.c',
+ 'shared/stw_getprocaddress.c',
+ 'shared/stw_arbpixelformat.c',
+ ]
+
+ wgl = env.ConvenienceLibrary(
+ target ='wgl',
+ source = sources,
+ )
+
+ Export('wgl')
diff --git a/src/gallium/state_trackers/wgl/icd/stw_icd.c b/src/gallium/state_trackers/wgl/icd/stw_icd.c
new file mode 100644
index 0000000000..8ae6aa1f3e
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/icd/stw_icd.c
@@ -0,0 +1,594 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <windows.h>
+#include <stdio.h>
+
+#include "GL/gl.h"
+
+#include "util/u_debug.h"
+#include "pipe/p_thread.h"
+
+#include "shared/stw_public.h"
+#include "icd/stw_icd.h"
+
+
+static GLCLTPROCTABLE cpt;
+static boolean cpt_initialized = FALSE;
+
+
+BOOL APIENTRY
+DrvCopyContext(
+ DHGLRC dhrcSource,
+ DHGLRC dhrcDest,
+ UINT fuMask )
+{
+ return stw_copy_context(dhrcSource, dhrcDest, fuMask);
+}
+
+
+DHGLRC APIENTRY
+DrvCreateLayerContext(
+ HDC hdc,
+ INT iLayerPlane )
+{
+ return stw_create_layer_context( hdc, iLayerPlane );
+}
+
+DHGLRC APIENTRY
+DrvCreateContext(
+ HDC hdc )
+{
+ return DrvCreateLayerContext( hdc, 0 );
+}
+
+BOOL APIENTRY
+DrvDeleteContext(
+ DHGLRC dhglrc )
+{
+ return stw_delete_context( dhglrc );
+}
+
+BOOL APIENTRY
+DrvDescribeLayerPlane(
+ HDC hdc,
+ INT iPixelFormat,
+ INT iLayerPlane,
+ UINT nBytes,
+ LPLAYERPLANEDESCRIPTOR plpd )
+{
+ debug_printf( "%s\n", __FUNCTION__ );
+
+ return FALSE;
+}
+
+LONG APIENTRY
+DrvDescribePixelFormat(
+ HDC hdc,
+ INT iPixelFormat,
+ ULONG cjpfd,
+ PIXELFORMATDESCRIPTOR *ppfd )
+{
+ LONG r;
+
+ r = stw_pixelformat_describe( hdc, iPixelFormat, cjpfd, ppfd );
+
+ debug_printf( "%s( %p, %d, %u, %p ) = %d\n",
+ __FUNCTION__, hdc, iPixelFormat, cjpfd, ppfd, r );
+
+ return r;
+}
+
+int APIENTRY
+DrvGetLayerPaletteEntries(
+ HDC hdc,
+ INT iLayerPlane,
+ INT iStart,
+ INT cEntries,
+ COLORREF *pcr )
+{
+ debug_printf( "%s\n", __FUNCTION__ );
+
+ return 0;
+}
+
+PROC APIENTRY
+DrvGetProcAddress(
+ LPCSTR lpszProc )
+{
+ PROC r;
+
+ r = stw_get_proc_address( lpszProc );
+
+ debug_printf( "%s( \", __FUNCTION__%s\" ) = %p\n", lpszProc, r );
+
+ return r;
+}
+
+BOOL APIENTRY
+DrvRealizeLayerPalette(
+ HDC hdc,
+ INT iLayerPlane,
+ BOOL bRealize )
+{
+ debug_printf( "%s\n", __FUNCTION__ );
+
+ return FALSE;
+}
+
+BOOL APIENTRY
+DrvReleaseContext(
+ DHGLRC dhglrc )
+{
+ return stw_release_context(dhglrc);
+}
+
+void APIENTRY
+DrvSetCallbackProcs(
+ INT nProcs,
+ PROC *pProcs )
+{
+ debug_printf( "%s( %d, %p )\n", __FUNCTION__, nProcs, pProcs );
+
+ return;
+}
+
+
+static void init_proc_table( GLCLTPROCTABLE *cpt )
+{
+ GLDISPATCHTABLE *disp = &cpt->glDispatchTable;
+
+ memset( cpt, 0, sizeof *cpt );
+ cpt->cEntries = OPENGL_VERSION_110_ENTRIES;
+
+#define GPA_GL( NAME ) disp->NAME = gl##NAME
+ GPA_GL( NewList );
+ GPA_GL( EndList );
+ GPA_GL( CallList );
+ GPA_GL( CallLists );
+ GPA_GL( DeleteLists );
+ GPA_GL( GenLists );
+ GPA_GL( ListBase );
+ GPA_GL( Begin );
+ GPA_GL( Bitmap );
+ GPA_GL( Color3b );
+ GPA_GL( Color3bv );
+ GPA_GL( Color3d );
+ GPA_GL( Color3dv );
+ GPA_GL( Color3f );
+ GPA_GL( Color3fv );
+ GPA_GL( Color3i );
+ GPA_GL( Color3iv );
+ GPA_GL( Color3s );
+ GPA_GL( Color3sv );
+ GPA_GL( Color3ub );
+ GPA_GL( Color3ubv );
+ GPA_GL( Color3ui );
+ GPA_GL( Color3uiv );
+ GPA_GL( Color3us );
+ GPA_GL( Color3usv );
+ GPA_GL( Color4b );
+ GPA_GL( Color4bv );
+ GPA_GL( Color4d );
+ GPA_GL( Color4dv );
+ GPA_GL( Color4f );
+ GPA_GL( Color4fv );
+ GPA_GL( Color4i );
+ GPA_GL( Color4iv );
+ GPA_GL( Color4s );
+ GPA_GL( Color4sv );
+ GPA_GL( Color4ub );
+ GPA_GL( Color4ubv );
+ GPA_GL( Color4ui );
+ GPA_GL( Color4uiv );
+ GPA_GL( Color4us );
+ GPA_GL( Color4usv );
+ GPA_GL( EdgeFlag );
+ GPA_GL( EdgeFlagv );
+ GPA_GL( End );
+ GPA_GL( Indexd );
+ GPA_GL( Indexdv );
+ GPA_GL( Indexf );
+ GPA_GL( Indexfv );
+ GPA_GL( Indexi );
+ GPA_GL( Indexiv );
+ GPA_GL( Indexs );
+ GPA_GL( Indexsv );
+ GPA_GL( Normal3b );
+ GPA_GL( Normal3bv );
+ GPA_GL( Normal3d );
+ GPA_GL( Normal3dv );
+ GPA_GL( Normal3f );
+ GPA_GL( Normal3fv );
+ GPA_GL( Normal3i );
+ GPA_GL( Normal3iv );
+ GPA_GL( Normal3s );
+ GPA_GL( Normal3sv );
+ GPA_GL( RasterPos2d );
+ GPA_GL( RasterPos2dv );
+ GPA_GL( RasterPos2f );
+ GPA_GL( RasterPos2fv );
+ GPA_GL( RasterPos2i );
+ GPA_GL( RasterPos2iv );
+ GPA_GL( RasterPos2s );
+ GPA_GL( RasterPos2sv );
+ GPA_GL( RasterPos3d );
+ GPA_GL( RasterPos3dv );
+ GPA_GL( RasterPos3f );
+ GPA_GL( RasterPos3fv );
+ GPA_GL( RasterPos3i );
+ GPA_GL( RasterPos3iv );
+ GPA_GL( RasterPos3s );
+ GPA_GL( RasterPos3sv );
+ GPA_GL( RasterPos4d );
+ GPA_GL( RasterPos4dv );
+ GPA_GL( RasterPos4f );
+ GPA_GL( RasterPos4fv );
+ GPA_GL( RasterPos4i );
+ GPA_GL( RasterPos4iv );
+ GPA_GL( RasterPos4s );
+ GPA_GL( RasterPos4sv );
+ GPA_GL( Rectd );
+ GPA_GL( Rectdv );
+ GPA_GL( Rectf );
+ GPA_GL( Rectfv );
+ GPA_GL( Recti );
+ GPA_GL( Rectiv );
+ GPA_GL( Rects );
+ GPA_GL( Rectsv );
+ GPA_GL( TexCoord1d );
+ GPA_GL( TexCoord1dv );
+ GPA_GL( TexCoord1f );
+ GPA_GL( TexCoord1fv );
+ GPA_GL( TexCoord1i );
+ GPA_GL( TexCoord1iv );
+ GPA_GL( TexCoord1s );
+ GPA_GL( TexCoord1sv );
+ GPA_GL( TexCoord2d );
+ GPA_GL( TexCoord2dv );
+ GPA_GL( TexCoord2f );
+ GPA_GL( TexCoord2fv );
+ GPA_GL( TexCoord2i );
+ GPA_GL( TexCoord2iv );
+ GPA_GL( TexCoord2s );
+ GPA_GL( TexCoord2sv );
+ GPA_GL( TexCoord3d );
+ GPA_GL( TexCoord3dv );
+ GPA_GL( TexCoord3f );
+ GPA_GL( TexCoord3fv );
+ GPA_GL( TexCoord3i );
+ GPA_GL( TexCoord3iv );
+ GPA_GL( TexCoord3s );
+ GPA_GL( TexCoord3sv );
+ GPA_GL( TexCoord4d );
+ GPA_GL( TexCoord4dv );
+ GPA_GL( TexCoord4f );
+ GPA_GL( TexCoord4fv );
+ GPA_GL( TexCoord4i );
+ GPA_GL( TexCoord4iv );
+ GPA_GL( TexCoord4s );
+ GPA_GL( TexCoord4sv );
+ GPA_GL( Vertex2d );
+ GPA_GL( Vertex2dv );
+ GPA_GL( Vertex2f );
+ GPA_GL( Vertex2fv );
+ GPA_GL( Vertex2i );
+ GPA_GL( Vertex2iv );
+ GPA_GL( Vertex2s );
+ GPA_GL( Vertex2sv );
+ GPA_GL( Vertex3d );
+ GPA_GL( Vertex3dv );
+ GPA_GL( Vertex3f );
+ GPA_GL( Vertex3fv );
+ GPA_GL( Vertex3i );
+ GPA_GL( Vertex3iv );
+ GPA_GL( Vertex3s );
+ GPA_GL( Vertex3sv );
+ GPA_GL( Vertex4d );
+ GPA_GL( Vertex4dv );
+ GPA_GL( Vertex4f );
+ GPA_GL( Vertex4fv );
+ GPA_GL( Vertex4i );
+ GPA_GL( Vertex4iv );
+ GPA_GL( Vertex4s );
+ GPA_GL( Vertex4sv );
+ GPA_GL( ClipPlane );
+ GPA_GL( ColorMaterial );
+ GPA_GL( CullFace );
+ GPA_GL( Fogf );
+ GPA_GL( Fogfv );
+ GPA_GL( Fogi );
+ GPA_GL( Fogiv );
+ GPA_GL( FrontFace );
+ GPA_GL( Hint );
+ GPA_GL( Lightf );
+ GPA_GL( Lightfv );
+ GPA_GL( Lighti );
+ GPA_GL( Lightiv );
+ GPA_GL( LightModelf );
+ GPA_GL( LightModelfv );
+ GPA_GL( LightModeli );
+ GPA_GL( LightModeliv );
+ GPA_GL( LineStipple );
+ GPA_GL( LineWidth );
+ GPA_GL( Materialf );
+ GPA_GL( Materialfv );
+ GPA_GL( Materiali );
+ GPA_GL( Materialiv );
+ GPA_GL( PointSize );
+ GPA_GL( PolygonMode );
+ GPA_GL( PolygonStipple );
+ GPA_GL( Scissor );
+ GPA_GL( ShadeModel );
+ GPA_GL( TexParameterf );
+ GPA_GL( TexParameterfv );
+ GPA_GL( TexParameteri );
+ GPA_GL( TexParameteriv );
+ GPA_GL( TexImage1D );
+ GPA_GL( TexImage2D );
+ GPA_GL( TexEnvf );
+ GPA_GL( TexEnvfv );
+ GPA_GL( TexEnvi );
+ GPA_GL( TexEnviv );
+ GPA_GL( TexGend );
+ GPA_GL( TexGendv );
+ GPA_GL( TexGenf );
+ GPA_GL( TexGenfv );
+ GPA_GL( TexGeni );
+ GPA_GL( TexGeniv );
+ GPA_GL( FeedbackBuffer );
+ GPA_GL( SelectBuffer );
+ GPA_GL( RenderMode );
+ GPA_GL( InitNames );
+ GPA_GL( LoadName );
+ GPA_GL( PassThrough );
+ GPA_GL( PopName );
+ GPA_GL( PushName );
+ GPA_GL( DrawBuffer );
+ GPA_GL( Clear );
+ GPA_GL( ClearAccum );
+ GPA_GL( ClearIndex );
+ GPA_GL( ClearColor );
+ GPA_GL( ClearStencil );
+ GPA_GL( ClearDepth );
+ GPA_GL( StencilMask );
+ GPA_GL( ColorMask );
+ GPA_GL( DepthMask );
+ GPA_GL( IndexMask );
+ GPA_GL( Accum );
+ GPA_GL( Disable );
+ GPA_GL( Enable );
+ GPA_GL( Finish );
+ GPA_GL( Flush );
+ GPA_GL( PopAttrib );
+ GPA_GL( PushAttrib );
+ GPA_GL( Map1d );
+ GPA_GL( Map1f );
+ GPA_GL( Map2d );
+ GPA_GL( Map2f );
+ GPA_GL( MapGrid1d );
+ GPA_GL( MapGrid1f );
+ GPA_GL( MapGrid2d );
+ GPA_GL( MapGrid2f );
+ GPA_GL( EvalCoord1d );
+ GPA_GL( EvalCoord1dv );
+ GPA_GL( EvalCoord1f );
+ GPA_GL( EvalCoord1fv );
+ GPA_GL( EvalCoord2d );
+ GPA_GL( EvalCoord2dv );
+ GPA_GL( EvalCoord2f );
+ GPA_GL( EvalCoord2fv );
+ GPA_GL( EvalMesh1 );
+ GPA_GL( EvalPoint1 );
+ GPA_GL( EvalMesh2 );
+ GPA_GL( EvalPoint2 );
+ GPA_GL( AlphaFunc );
+ GPA_GL( BlendFunc );
+ GPA_GL( LogicOp );
+ GPA_GL( StencilFunc );
+ GPA_GL( StencilOp );
+ GPA_GL( DepthFunc );
+ GPA_GL( PixelZoom );
+ GPA_GL( PixelTransferf );
+ GPA_GL( PixelTransferi );
+ GPA_GL( PixelStoref );
+ GPA_GL( PixelStorei );
+ GPA_GL( PixelMapfv );
+ GPA_GL( PixelMapuiv );
+ GPA_GL( PixelMapusv );
+ GPA_GL( ReadBuffer );
+ GPA_GL( CopyPixels );
+ GPA_GL( ReadPixels );
+ GPA_GL( DrawPixels );
+ GPA_GL( GetBooleanv );
+ GPA_GL( GetClipPlane );
+ GPA_GL( GetDoublev );
+ GPA_GL( GetError );
+ GPA_GL( GetFloatv );
+ GPA_GL( GetIntegerv );
+ GPA_GL( GetLightfv );
+ GPA_GL( GetLightiv );
+ GPA_GL( GetMapdv );
+ GPA_GL( GetMapfv );
+ GPA_GL( GetMapiv );
+ GPA_GL( GetMaterialfv );
+ GPA_GL( GetMaterialiv );
+ GPA_GL( GetPixelMapfv );
+ GPA_GL( GetPixelMapuiv );
+ GPA_GL( GetPixelMapusv );
+ GPA_GL( GetPolygonStipple );
+ GPA_GL( GetString );
+ GPA_GL( GetTexEnvfv );
+ GPA_GL( GetTexEnviv );
+ GPA_GL( GetTexGendv );
+ GPA_GL( GetTexGenfv );
+ GPA_GL( GetTexGeniv );
+ GPA_GL( GetTexImage );
+ GPA_GL( GetTexParameterfv );
+ GPA_GL( GetTexParameteriv );
+ GPA_GL( GetTexLevelParameterfv );
+ GPA_GL( GetTexLevelParameteriv );
+ GPA_GL( IsEnabled );
+ GPA_GL( IsList );
+ GPA_GL( DepthRange );
+ GPA_GL( Frustum );
+ GPA_GL( LoadIdentity );
+ GPA_GL( LoadMatrixf );
+ GPA_GL( LoadMatrixd );
+ GPA_GL( MatrixMode );
+ GPA_GL( MultMatrixf );
+ GPA_GL( MultMatrixd );
+ GPA_GL( Ortho );
+ GPA_GL( PopMatrix );
+ GPA_GL( PushMatrix );
+ GPA_GL( Rotated );
+ GPA_GL( Rotatef );
+ GPA_GL( Scaled );
+ GPA_GL( Scalef );
+ GPA_GL( Translated );
+ GPA_GL( Translatef );
+ GPA_GL( Viewport );
+ GPA_GL( ArrayElement );
+ GPA_GL( BindTexture );
+ GPA_GL( ColorPointer );
+ GPA_GL( DisableClientState );
+ GPA_GL( DrawArrays );
+ GPA_GL( DrawElements );
+ GPA_GL( EdgeFlagPointer );
+ GPA_GL( EnableClientState );
+ GPA_GL( IndexPointer );
+ GPA_GL( Indexub );
+ GPA_GL( Indexubv );
+ GPA_GL( InterleavedArrays );
+ GPA_GL( NormalPointer );
+ GPA_GL( PolygonOffset );
+ GPA_GL( TexCoordPointer );
+ GPA_GL( VertexPointer );
+ GPA_GL( AreTexturesResident );
+ GPA_GL( CopyTexImage1D );
+ GPA_GL( CopyTexImage2D );
+ GPA_GL( CopyTexSubImage1D );
+ GPA_GL( CopyTexSubImage2D );
+ GPA_GL( DeleteTextures );
+ GPA_GL( GenTextures );
+ GPA_GL( GetPointerv );
+ GPA_GL( IsTexture );
+ GPA_GL( PrioritizeTextures );
+ GPA_GL( TexSubImage1D );
+ GPA_GL( TexSubImage2D );
+ GPA_GL( PopClientAttrib );
+ GPA_GL( PushClientAttrib );
+}
+
+PGLCLTPROCTABLE APIENTRY
+DrvSetContext(
+ HDC hdc,
+ DHGLRC dhglrc,
+ PFN_SETPROCTABLE pfnSetProcTable )
+{
+ debug_printf( "%s( 0x%p, %u, 0x%p )\n",
+ __FUNCTION__, hdc, dhglrc, pfnSetProcTable );
+
+ /* Although WGL allows different dispatch entrypoints per
+ */
+ if (!cpt_initialized) {
+ init_proc_table( &cpt );
+ cpt_initialized = TRUE;
+ }
+
+ if (!stw_make_current( hdc, dhglrc ))
+ return NULL;
+
+ return &cpt;
+}
+
+int APIENTRY
+DrvSetLayerPaletteEntries(
+ HDC hdc,
+ INT iLayerPlane,
+ INT iStart,
+ INT cEntries,
+ CONST COLORREF *pcr )
+{
+ debug_printf( "%s\n", __FUNCTION__ );
+
+ return 0;
+}
+
+BOOL APIENTRY
+DrvSetPixelFormat(
+ HDC hdc,
+ LONG iPixelFormat )
+{
+ BOOL r;
+
+ r = stw_pixelformat_set( hdc, iPixelFormat );
+
+ debug_printf( "%s( %p, %d ) = %s\n", __FUNCTION__, hdc, iPixelFormat, r ? "TRUE" : "FALSE" );
+
+ return r;
+}
+
+BOOL APIENTRY
+DrvShareLists(
+ DHGLRC dhglrc1,
+ DHGLRC dhglrc2 )
+{
+ debug_printf( "%s\n", __FUNCTION__ );
+
+ return FALSE;
+}
+
+BOOL APIENTRY
+DrvSwapBuffers(
+ HDC hdc )
+{
+ debug_printf( "%s( %p )\n", __FUNCTION__, hdc );
+
+ return stw_swap_buffers( hdc );
+}
+
+BOOL APIENTRY
+DrvSwapLayerBuffers(
+ HDC hdc,
+ UINT fuPlanes )
+{
+ debug_printf( "%s\n", __FUNCTION__ );
+
+ return FALSE;
+}
+
+BOOL APIENTRY
+DrvValidateVersion(
+ ULONG ulVersion )
+{
+ debug_printf( "%s( %u )\n", __FUNCTION__, ulVersion );
+
+ /* TODO: get the expected version from the winsys */
+
+ return ulVersion == 1;
+}
diff --git a/src/gallium/state_trackers/wgl/icd/stw_icd.h b/src/gallium/state_trackers/wgl/icd/stw_icd.h
new file mode 100644
index 0000000000..8e676fb5b7
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/icd/stw_icd.h
@@ -0,0 +1,489 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef DRV_H
+#define DRV_H
+
+
+#include <windows.h>
+
+#include "GL/gl.h"
+
+
+typedef ULONG DHGLRC;
+
+#define OPENGL_VERSION_110_ENTRIES 336
+
+struct __GLdispatchTableRec
+{
+ void (GLAPIENTRY * NewList)(GLuint, GLenum);
+ void (GLAPIENTRY * EndList)(void);
+ void (GLAPIENTRY * CallList)(GLuint);
+ void (GLAPIENTRY * CallLists)(GLsizei, GLenum, const GLvoid *);
+ void (GLAPIENTRY * DeleteLists)(GLuint, GLsizei);
+ GLuint (GLAPIENTRY * GenLists)(GLsizei);
+ void (GLAPIENTRY * ListBase)(GLuint);
+ void (GLAPIENTRY * Begin)(GLenum);
+ void (GLAPIENTRY * Bitmap)(GLsizei, GLsizei, GLfloat, GLfloat, GLfloat, GLfloat, const GLubyte *);
+ void (GLAPIENTRY * Color3b)(GLbyte, GLbyte, GLbyte);
+ void (GLAPIENTRY * Color3bv)(const GLbyte *);
+ void (GLAPIENTRY * Color3d)(GLdouble, GLdouble, GLdouble);
+ void (GLAPIENTRY * Color3dv)(const GLdouble *);
+ void (GLAPIENTRY * Color3f)(GLfloat, GLfloat, GLfloat);
+ void (GLAPIENTRY * Color3fv)(const GLfloat *);
+ void (GLAPIENTRY * Color3i)(GLint, GLint, GLint);
+ void (GLAPIENTRY * Color3iv)(const GLint *);
+ void (GLAPIENTRY * Color3s)(GLshort, GLshort, GLshort);
+ void (GLAPIENTRY * Color3sv)(const GLshort *);
+ void (GLAPIENTRY * Color3ub)(GLubyte, GLubyte, GLubyte);
+ void (GLAPIENTRY * Color3ubv)(const GLubyte *);
+ void (GLAPIENTRY * Color3ui)(GLuint, GLuint, GLuint);
+ void (GLAPIENTRY * Color3uiv)(const GLuint *);
+ void (GLAPIENTRY * Color3us)(GLushort, GLushort, GLushort);
+ void (GLAPIENTRY * Color3usv)(const GLushort *);
+ void (GLAPIENTRY * Color4b)(GLbyte, GLbyte, GLbyte, GLbyte);
+ void (GLAPIENTRY * Color4bv)(const GLbyte *);
+ void (GLAPIENTRY * Color4d)(GLdouble, GLdouble, GLdouble, GLdouble);
+ void (GLAPIENTRY * Color4dv)(const GLdouble *);
+ void (GLAPIENTRY * Color4f)(GLfloat, GLfloat, GLfloat, GLfloat);
+ void (GLAPIENTRY * Color4fv)(const GLfloat *);
+ void (GLAPIENTRY * Color4i)(GLint, GLint, GLint, GLint);
+ void (GLAPIENTRY * Color4iv)(const GLint *);
+ void (GLAPIENTRY * Color4s)(GLshort, GLshort, GLshort, GLshort);
+ void (GLAPIENTRY * Color4sv)(const GLshort *);
+ void (GLAPIENTRY * Color4ub)(GLubyte, GLubyte, GLubyte, GLubyte);
+ void (GLAPIENTRY * Color4ubv)(const GLubyte *);
+ void (GLAPIENTRY * Color4ui)(GLuint, GLuint, GLuint, GLuint);
+ void (GLAPIENTRY * Color4uiv)(const GLuint *);
+ void (GLAPIENTRY * Color4us)(GLushort, GLushort, GLushort, GLushort);
+ void (GLAPIENTRY * Color4usv)(const GLushort *);
+ void (GLAPIENTRY * EdgeFlag)(GLboolean);
+ void (GLAPIENTRY * EdgeFlagv)(const GLboolean *);
+ void (GLAPIENTRY * End)(void);
+ void (GLAPIENTRY * Indexd)(GLdouble);
+ void (GLAPIENTRY * Indexdv)(const GLdouble *);
+ void (GLAPIENTRY * Indexf)(GLfloat);
+ void (GLAPIENTRY * Indexfv)(const GLfloat *);
+ void (GLAPIENTRY * Indexi)(GLint);
+ void (GLAPIENTRY * Indexiv)(const GLint *);
+ void (GLAPIENTRY * Indexs)(GLshort);
+ void (GLAPIENTRY * Indexsv)(const GLshort *);
+ void (GLAPIENTRY * Normal3b)(GLbyte, GLbyte, GLbyte);
+ void (GLAPIENTRY * Normal3bv)(const GLbyte *);
+ void (GLAPIENTRY * Normal3d)(GLdouble, GLdouble, GLdouble);
+ void (GLAPIENTRY * Normal3dv)(const GLdouble *);
+ void (GLAPIENTRY * Normal3f)(GLfloat, GLfloat, GLfloat);
+ void (GLAPIENTRY * Normal3fv)(const GLfloat *);
+ void (GLAPIENTRY * Normal3i)(GLint, GLint, GLint);
+ void (GLAPIENTRY * Normal3iv)(const GLint *);
+ void (GLAPIENTRY * Normal3s)(GLshort, GLshort, GLshort);
+ void (GLAPIENTRY * Normal3sv)(const GLshort *);
+ void (GLAPIENTRY * RasterPos2d)(GLdouble, GLdouble);
+ void (GLAPIENTRY * RasterPos2dv)(const GLdouble *);
+ void (GLAPIENTRY * RasterPos2f)(GLfloat, GLfloat);
+ void (GLAPIENTRY * RasterPos2fv)(const GLfloat *);
+ void (GLAPIENTRY * RasterPos2i)(GLint, GLint);
+ void (GLAPIENTRY * RasterPos2iv)(const GLint *);
+ void (GLAPIENTRY * RasterPos2s)(GLshort, GLshort);
+ void (GLAPIENTRY * RasterPos2sv)(const GLshort *);
+ void (GLAPIENTRY * RasterPos3d)(GLdouble, GLdouble, GLdouble);
+ void (GLAPIENTRY * RasterPos3dv)(const GLdouble *);
+ void (GLAPIENTRY * RasterPos3f)(GLfloat, GLfloat, GLfloat);
+ void (GLAPIENTRY * RasterPos3fv)(const GLfloat *);
+ void (GLAPIENTRY * RasterPos3i)(GLint, GLint, GLint);
+ void (GLAPIENTRY * RasterPos3iv)(const GLint *);
+ void (GLAPIENTRY * RasterPos3s)(GLshort, GLshort, GLshort);
+ void (GLAPIENTRY * RasterPos3sv)(const GLshort *);
+ void (GLAPIENTRY * RasterPos4d)(GLdouble, GLdouble, GLdouble, GLdouble);
+ void (GLAPIENTRY * RasterPos4dv)(const GLdouble *);
+ void (GLAPIENTRY * RasterPos4f)(GLfloat, GLfloat, GLfloat, GLfloat);
+ void (GLAPIENTRY * RasterPos4fv)(const GLfloat *);
+ void (GLAPIENTRY * RasterPos4i)(GLint, GLint, GLint, GLint);
+ void (GLAPIENTRY * RasterPos4iv)(const GLint *);
+ void (GLAPIENTRY * RasterPos4s)(GLshort, GLshort, GLshort, GLshort);
+ void (GLAPIENTRY * RasterPos4sv)(const GLshort *);
+ void (GLAPIENTRY * Rectd)(GLdouble, GLdouble, GLdouble, GLdouble);
+ void (GLAPIENTRY * Rectdv)(const GLdouble *, const GLdouble *);
+ void (GLAPIENTRY * Rectf)(GLfloat, GLfloat, GLfloat, GLfloat);
+ void (GLAPIENTRY * Rectfv)(const GLfloat *, const GLfloat *);
+ void (GLAPIENTRY * Recti)(GLint, GLint, GLint, GLint);
+ void (GLAPIENTRY * Rectiv)(const GLint *, const GLint *);
+ void (GLAPIENTRY * Rects)(GLshort, GLshort, GLshort, GLshort);
+ void (GLAPIENTRY * Rectsv)(const GLshort *, const GLshort *);
+ void (GLAPIENTRY * TexCoord1d)(GLdouble);
+ void (GLAPIENTRY * TexCoord1dv)(const GLdouble *);
+ void (GLAPIENTRY * TexCoord1f)(GLfloat);
+ void (GLAPIENTRY * TexCoord1fv)(const GLfloat *);
+ void (GLAPIENTRY * TexCoord1i)(GLint);
+ void (GLAPIENTRY * TexCoord1iv)(const GLint *);
+ void (GLAPIENTRY * TexCoord1s)(GLshort);
+ void (GLAPIENTRY * TexCoord1sv)(const GLshort *);
+ void (GLAPIENTRY * TexCoord2d)(GLdouble, GLdouble);
+ void (GLAPIENTRY * TexCoord2dv)(const GLdouble *);
+ void (GLAPIENTRY * TexCoord2f)(GLfloat, GLfloat);
+ void (GLAPIENTRY * TexCoord2fv)(const GLfloat *);
+ void (GLAPIENTRY * TexCoord2i)(GLint, GLint);
+ void (GLAPIENTRY * TexCoord2iv)(const GLint *);
+ void (GLAPIENTRY * TexCoord2s)(GLshort, GLshort);
+ void (GLAPIENTRY * TexCoord2sv)(const GLshort *);
+ void (GLAPIENTRY * TexCoord3d)(GLdouble, GLdouble, GLdouble);
+ void (GLAPIENTRY * TexCoord3dv)(const GLdouble *);
+ void (GLAPIENTRY * TexCoord3f)(GLfloat, GLfloat, GLfloat);
+ void (GLAPIENTRY * TexCoord3fv)(const GLfloat *);
+ void (GLAPIENTRY * TexCoord3i)(GLint, GLint, GLint);
+ void (GLAPIENTRY * TexCoord3iv)(const GLint *);
+ void (GLAPIENTRY * TexCoord3s)(GLshort, GLshort, GLshort);
+ void (GLAPIENTRY * TexCoord3sv)(const GLshort *);
+ void (GLAPIENTRY * TexCoord4d)(GLdouble, GLdouble, GLdouble, GLdouble);
+ void (GLAPIENTRY * TexCoord4dv)(const GLdouble *);
+ void (GLAPIENTRY * TexCoord4f)(GLfloat, GLfloat, GLfloat, GLfloat);
+ void (GLAPIENTRY * TexCoord4fv)(const GLfloat *);
+ void (GLAPIENTRY * TexCoord4i)(GLint, GLint, GLint, GLint);
+ void (GLAPIENTRY * TexCoord4iv)(const GLint *);
+ void (GLAPIENTRY * TexCoord4s)(GLshort, GLshort, GLshort, GLshort);
+ void (GLAPIENTRY * TexCoord4sv)(const GLshort *);
+ void (GLAPIENTRY * Vertex2d)(GLdouble, GLdouble);
+ void (GLAPIENTRY * Vertex2dv)(const GLdouble *);
+ void (GLAPIENTRY * Vertex2f)(GLfloat, GLfloat);
+ void (GLAPIENTRY * Vertex2fv)(const GLfloat *);
+ void (GLAPIENTRY * Vertex2i)(GLint, GLint);
+ void (GLAPIENTRY * Vertex2iv)(const GLint *);
+ void (GLAPIENTRY * Vertex2s)(GLshort, GLshort);
+ void (GLAPIENTRY * Vertex2sv)(const GLshort *);
+ void (GLAPIENTRY * Vertex3d)(GLdouble, GLdouble, GLdouble);
+ void (GLAPIENTRY * Vertex3dv)(const GLdouble *);
+ void (GLAPIENTRY * Vertex3f)(GLfloat, GLfloat, GLfloat);
+ void (GLAPIENTRY * Vertex3fv)(const GLfloat *);
+ void (GLAPIENTRY * Vertex3i)(GLint, GLint, GLint);
+ void (GLAPIENTRY * Vertex3iv)(const GLint *);
+ void (GLAPIENTRY * Vertex3s)(GLshort, GLshort, GLshort);
+ void (GLAPIENTRY * Vertex3sv)(const GLshort *);
+ void (GLAPIENTRY * Vertex4d)(GLdouble, GLdouble, GLdouble, GLdouble);
+ void (GLAPIENTRY * Vertex4dv)(const GLdouble *);
+ void (GLAPIENTRY * Vertex4f)(GLfloat, GLfloat, GLfloat, GLfloat);
+ void (GLAPIENTRY * Vertex4fv)(const GLfloat *);
+ void (GLAPIENTRY * Vertex4i)(GLint, GLint, GLint, GLint);
+ void (GLAPIENTRY * Vertex4iv)(const GLint *);
+ void (GLAPIENTRY * Vertex4s)(GLshort, GLshort, GLshort, GLshort);
+ void (GLAPIENTRY * Vertex4sv)(const GLshort *);
+ void (GLAPIENTRY * ClipPlane)(GLenum, const GLdouble *);
+ void (GLAPIENTRY * ColorMaterial)(GLenum, GLenum);
+ void (GLAPIENTRY * CullFace)(GLenum);
+ void (GLAPIENTRY * Fogf)(GLenum, GLfloat);
+ void (GLAPIENTRY * Fogfv)(GLenum, const GLfloat *);
+ void (GLAPIENTRY * Fogi)(GLenum, GLint);
+ void (GLAPIENTRY * Fogiv)(GLenum, const GLint *);
+ void (GLAPIENTRY * FrontFace)(GLenum);
+ void (GLAPIENTRY * Hint)(GLenum, GLenum);
+ void (GLAPIENTRY * Lightf)(GLenum, GLenum, GLfloat);
+ void (GLAPIENTRY * Lightfv)(GLenum, GLenum, const GLfloat *);
+ void (GLAPIENTRY * Lighti)(GLenum, GLenum, GLint);
+ void (GLAPIENTRY * Lightiv)(GLenum, GLenum, const GLint *);
+ void (GLAPIENTRY * LightModelf)(GLenum, GLfloat);
+ void (GLAPIENTRY * LightModelfv)(GLenum, const GLfloat *);
+ void (GLAPIENTRY * LightModeli)(GLenum, GLint);
+ void (GLAPIENTRY * LightModeliv)(GLenum, const GLint *);
+ void (GLAPIENTRY * LineStipple)(GLint, GLushort);
+ void (GLAPIENTRY * LineWidth)(GLfloat);
+ void (GLAPIENTRY * Materialf)(GLenum, GLenum, GLfloat);
+ void (GLAPIENTRY * Materialfv)(GLenum, GLenum, const GLfloat *);
+ void (GLAPIENTRY * Materiali)(GLenum, GLenum, GLint);
+ void (GLAPIENTRY * Materialiv)(GLenum, GLenum, const GLint *);
+ void (GLAPIENTRY * PointSize)(GLfloat);
+ void (GLAPIENTRY * PolygonMode)(GLenum, GLenum);
+ void (GLAPIENTRY * PolygonStipple)(const GLubyte *);
+ void (GLAPIENTRY * Scissor)(GLint, GLint, GLsizei, GLsizei);
+ void (GLAPIENTRY * ShadeModel)(GLenum);
+ void (GLAPIENTRY * TexParameterf)(GLenum, GLenum, GLfloat);
+ void (GLAPIENTRY * TexParameterfv)(GLenum, GLenum, const GLfloat *);
+ void (GLAPIENTRY * TexParameteri)(GLenum, GLenum, GLint);
+ void (GLAPIENTRY * TexParameteriv)(GLenum, GLenum, const GLint *);
+ void (GLAPIENTRY * TexImage1D)(GLenum, GLint, GLint, GLsizei, GLint, GLenum, GLenum, const GLvoid *);
+ void (GLAPIENTRY * TexImage2D)(GLenum, GLint, GLint, GLsizei, GLsizei, GLint, GLenum, GLenum, const GLvoid *);
+ void (GLAPIENTRY * TexEnvf)(GLenum, GLenum, GLfloat);
+ void (GLAPIENTRY * TexEnvfv)(GLenum, GLenum, const GLfloat *);
+ void (GLAPIENTRY * TexEnvi)(GLenum, GLenum, GLint);
+ void (GLAPIENTRY * TexEnviv)(GLenum, GLenum, const GLint *);
+ void (GLAPIENTRY * TexGend)(GLenum, GLenum, GLdouble);
+ void (GLAPIENTRY * TexGendv)(GLenum, GLenum, const GLdouble *);
+ void (GLAPIENTRY * TexGenf)(GLenum, GLenum, GLfloat);
+ void (GLAPIENTRY * TexGenfv)(GLenum, GLenum, const GLfloat *);
+ void (GLAPIENTRY * TexGeni)(GLenum, GLenum, GLint);
+ void (GLAPIENTRY * TexGeniv)(GLenum, GLenum, const GLint *);
+ void (GLAPIENTRY * FeedbackBuffer)(GLsizei, GLenum, GLfloat *);
+ void (GLAPIENTRY * SelectBuffer)(GLsizei, GLuint *);
+ GLint (GLAPIENTRY * RenderMode)(GLenum);
+ void (GLAPIENTRY * InitNames)(void);
+ void (GLAPIENTRY * LoadName)(GLuint);
+ void (GLAPIENTRY * PassThrough)(GLfloat);
+ void (GLAPIENTRY * PopName)(void);
+ void (GLAPIENTRY * PushName)(GLuint);
+ void (GLAPIENTRY * DrawBuffer)(GLenum);
+ void (GLAPIENTRY * Clear)(GLbitfield);
+ void (GLAPIENTRY * ClearAccum)(GLfloat, GLfloat, GLfloat, GLfloat);
+ void (GLAPIENTRY * ClearIndex)(GLfloat);
+ void (GLAPIENTRY * ClearColor)(GLclampf, GLclampf, GLclampf, GLclampf);
+ void (GLAPIENTRY * ClearStencil)(GLint);
+ void (GLAPIENTRY * ClearDepth)(GLclampd);
+ void (GLAPIENTRY * StencilMask)(GLuint);
+ void (GLAPIENTRY * ColorMask)(GLboolean, GLboolean, GLboolean, GLboolean);
+ void (GLAPIENTRY * DepthMask)(GLboolean);
+ void (GLAPIENTRY * IndexMask)(GLuint);
+ void (GLAPIENTRY * Accum)(GLenum, GLfloat);
+ void (GLAPIENTRY * Disable)(GLenum);
+ void (GLAPIENTRY * Enable)(GLenum);
+ void (GLAPIENTRY * Finish)(void);
+ void (GLAPIENTRY * Flush)(void);
+ void (GLAPIENTRY * PopAttrib)(void);
+ void (GLAPIENTRY * PushAttrib)(GLbitfield);
+ void (GLAPIENTRY * Map1d)(GLenum, GLdouble, GLdouble, GLint, GLint, const GLdouble *);
+ void (GLAPIENTRY * Map1f)(GLenum, GLfloat, GLfloat, GLint, GLint, const GLfloat *);
+ void (GLAPIENTRY * Map2d)(GLenum, GLdouble, GLdouble, GLint, GLint, GLdouble, GLdouble, GLint, GLint, const GLdouble *);
+ void (GLAPIENTRY * Map2f)(GLenum, GLfloat, GLfloat, GLint, GLint, GLfloat, GLfloat, GLint, GLint, const GLfloat *);
+ void (GLAPIENTRY * MapGrid1d)(GLint, GLdouble, GLdouble);
+ void (GLAPIENTRY * MapGrid1f)(GLint, GLfloat, GLfloat);
+ void (GLAPIENTRY * MapGrid2d)(GLint, GLdouble, GLdouble, GLint, GLdouble, GLdouble);
+ void (GLAPIENTRY * MapGrid2f)(GLint, GLfloat, GLfloat, GLint, GLfloat, GLfloat);
+ void (GLAPIENTRY * EvalCoord1d)(GLdouble);
+ void (GLAPIENTRY * EvalCoord1dv)(const GLdouble *);
+ void (GLAPIENTRY * EvalCoord1f)(GLfloat);
+ void (GLAPIENTRY * EvalCoord1fv)(const GLfloat *);
+ void (GLAPIENTRY * EvalCoord2d)(GLdouble, GLdouble);
+ void (GLAPIENTRY * EvalCoord2dv)(const GLdouble *);
+ void (GLAPIENTRY * EvalCoord2f)(GLfloat, GLfloat);
+ void (GLAPIENTRY * EvalCoord2fv)(const GLfloat *);
+ void (GLAPIENTRY * EvalMesh1)(GLenum, GLint, GLint);
+ void (GLAPIENTRY * EvalPoint1)(GLint);
+ void (GLAPIENTRY * EvalMesh2)(GLenum, GLint, GLint, GLint, GLint);
+ void (GLAPIENTRY * EvalPoint2)(GLint, GLint);
+ void (GLAPIENTRY * AlphaFunc)(GLenum, GLclampf);
+ void (GLAPIENTRY * BlendFunc)(GLenum, GLenum);
+ void (GLAPIENTRY * LogicOp)(GLenum);
+ void (GLAPIENTRY * StencilFunc)(GLenum, GLint, GLuint);
+ void (GLAPIENTRY * StencilOp)(GLenum, GLenum, GLenum);
+ void (GLAPIENTRY * DepthFunc)(GLenum);
+ void (GLAPIENTRY * PixelZoom)(GLfloat, GLfloat);
+ void (GLAPIENTRY * PixelTransferf)(GLenum, GLfloat);
+ void (GLAPIENTRY * PixelTransferi)(GLenum, GLint);
+ void (GLAPIENTRY * PixelStoref)(GLenum, GLfloat);
+ void (GLAPIENTRY * PixelStorei)(GLenum, GLint);
+ void (GLAPIENTRY * PixelMapfv)(GLenum, GLint, const GLfloat *);
+ void (GLAPIENTRY * PixelMapuiv)(GLenum, GLint, const GLuint *);
+ void (GLAPIENTRY * PixelMapusv)(GLenum, GLint, const GLushort *);
+ void (GLAPIENTRY * ReadBuffer)(GLenum);
+ void (GLAPIENTRY * CopyPixels)(GLint, GLint, GLsizei, GLsizei, GLenum);
+ void (GLAPIENTRY * ReadPixels)(GLint, GLint, GLsizei, GLsizei, GLenum, GLenum, GLvoid *);
+ void (GLAPIENTRY * DrawPixels)(GLsizei, GLsizei, GLenum, GLenum, const GLvoid *);
+ void (GLAPIENTRY * GetBooleanv)(GLenum, GLboolean *);
+ void (GLAPIENTRY * GetClipPlane)(GLenum, GLdouble *);
+ void (GLAPIENTRY * GetDoublev)(GLenum, GLdouble *);
+ GLenum (GLAPIENTRY * GetError)(void);
+ void (GLAPIENTRY * GetFloatv)(GLenum, GLfloat *);
+ void (GLAPIENTRY * GetIntegerv)(GLenum, GLint *);
+ void (GLAPIENTRY * GetLightfv)(GLenum, GLenum, GLfloat *);
+ void (GLAPIENTRY * GetLightiv)(GLenum, GLenum, GLint *);
+ void (GLAPIENTRY * GetMapdv)(GLenum, GLenum, GLdouble *);
+ void (GLAPIENTRY * GetMapfv)(GLenum, GLenum, GLfloat *);
+ void (GLAPIENTRY * GetMapiv)(GLenum, GLenum, GLint *);
+ void (GLAPIENTRY * GetMaterialfv)(GLenum, GLenum, GLfloat *);
+ void (GLAPIENTRY * GetMaterialiv)(GLenum, GLenum, GLint *);
+ void (GLAPIENTRY * GetPixelMapfv)(GLenum, GLfloat *);
+ void (GLAPIENTRY * GetPixelMapuiv)(GLenum, GLuint *);
+ void (GLAPIENTRY * GetPixelMapusv)(GLenum, GLushort *);
+ void (GLAPIENTRY * GetPolygonStipple)(GLubyte *);
+ const GLubyte * (GLAPIENTRY * GetString)(GLenum);
+ void (GLAPIENTRY * GetTexEnvfv)(GLenum, GLenum, GLfloat *);
+ void (GLAPIENTRY * GetTexEnviv)(GLenum, GLenum, GLint *);
+ void (GLAPIENTRY * GetTexGendv)(GLenum, GLenum, GLdouble *);
+ void (GLAPIENTRY * GetTexGenfv)(GLenum, GLenum, GLfloat *);
+ void (GLAPIENTRY * GetTexGeniv)(GLenum, GLenum, GLint *);
+ void (GLAPIENTRY * GetTexImage)(GLenum, GLint, GLenum, GLenum, GLvoid *);
+ void (GLAPIENTRY * GetTexParameterfv)(GLenum, GLenum, GLfloat *);
+ void (GLAPIENTRY * GetTexParameteriv)(GLenum, GLenum, GLint *);
+ void (GLAPIENTRY * GetTexLevelParameterfv)(GLenum, GLint, GLenum, GLfloat *);
+ void (GLAPIENTRY * GetTexLevelParameteriv)(GLenum, GLint, GLenum, GLint *);
+ GLboolean (GLAPIENTRY * IsEnabled)(GLenum);
+ GLboolean (GLAPIENTRY * IsList)(GLuint);
+ void (GLAPIENTRY * DepthRange)(GLclampd, GLclampd);
+ void (GLAPIENTRY * Frustum)(GLdouble, GLdouble, GLdouble, GLdouble, GLdouble, GLdouble);
+ void (GLAPIENTRY * LoadIdentity)(void);
+ void (GLAPIENTRY * LoadMatrixf)(const GLfloat *);
+ void (GLAPIENTRY * LoadMatrixd)(const GLdouble *);
+ void (GLAPIENTRY * MatrixMode)(GLenum);
+ void (GLAPIENTRY * MultMatrixf)(const GLfloat *);
+ void (GLAPIENTRY * MultMatrixd)(const GLdouble *);
+ void (GLAPIENTRY * Ortho)(GLdouble, GLdouble, GLdouble, GLdouble, GLdouble, GLdouble);
+ void (GLAPIENTRY * PopMatrix)(void);
+ void (GLAPIENTRY * PushMatrix)(void);
+ void (GLAPIENTRY * Rotated)(GLdouble, GLdouble, GLdouble, GLdouble);
+ void (GLAPIENTRY * Rotatef)(GLfloat, GLfloat, GLfloat, GLfloat);
+ void (GLAPIENTRY * Scaled)(GLdouble, GLdouble, GLdouble);
+ void (GLAPIENTRY * Scalef)(GLfloat, GLfloat, GLfloat);
+ void (GLAPIENTRY * Translated)(GLdouble, GLdouble, GLdouble);
+ void (GLAPIENTRY * Translatef)(GLfloat, GLfloat, GLfloat);
+ void (GLAPIENTRY * Viewport)(GLint, GLint, GLsizei, GLsizei);
+ void (GLAPIENTRY * ArrayElement)(GLint);
+ void (GLAPIENTRY * BindTexture)(GLenum, GLuint);
+ void (GLAPIENTRY * ColorPointer)(GLint, GLenum, GLsizei, const GLvoid *);
+ void (GLAPIENTRY * DisableClientState)(GLenum);
+ void (GLAPIENTRY * DrawArrays)(GLenum, GLint, GLsizei);
+ void (GLAPIENTRY * DrawElements)(GLenum, GLsizei, GLenum, const GLvoid *);
+ void (GLAPIENTRY * EdgeFlagPointer)(GLsizei, const GLvoid *);
+ void (GLAPIENTRY * EnableClientState)(GLenum);
+ void (GLAPIENTRY * IndexPointer)(GLenum, GLsizei, const GLvoid *);
+ void (GLAPIENTRY * Indexub)(GLubyte);
+ void (GLAPIENTRY * Indexubv)(const GLubyte *);
+ void (GLAPIENTRY * InterleavedArrays)(GLenum, GLsizei, const GLvoid *);
+ void (GLAPIENTRY * NormalPointer)(GLenum, GLsizei, const GLvoid *);
+ void (GLAPIENTRY * PolygonOffset)(GLfloat, GLfloat);
+ void (GLAPIENTRY * TexCoordPointer)(GLint, GLenum, GLsizei, const GLvoid *);
+ void (GLAPIENTRY * VertexPointer)(GLint, GLenum, GLsizei, const GLvoid *);
+ GLboolean (GLAPIENTRY * AreTexturesResident)(GLsizei, const GLuint *, GLboolean *);
+ void (GLAPIENTRY * CopyTexImage1D)(GLenum, GLint, GLenum, GLint, GLint, GLsizei, GLint);
+ void (GLAPIENTRY * CopyTexImage2D)(GLenum, GLint, GLenum, GLint, GLint, GLsizei, GLsizei, GLint);
+ void (GLAPIENTRY * CopyTexSubImage1D)(GLenum, GLint, GLint, GLint, GLint, GLsizei);
+ void (GLAPIENTRY * CopyTexSubImage2D)(GLenum, GLint, GLint, GLint, GLint, GLint, GLsizei, GLsizei);
+ void (GLAPIENTRY * DeleteTextures)(GLsizei, const GLuint *);
+ void (GLAPIENTRY * GenTextures)(GLsizei, GLuint *);
+ void (GLAPIENTRY * GetPointerv)(GLenum, GLvoid **);
+ GLboolean (GLAPIENTRY * IsTexture)(GLuint);
+ void (GLAPIENTRY * PrioritizeTextures)(GLsizei, const GLuint *, const GLclampf *);
+ void (GLAPIENTRY * TexSubImage1D)(GLenum, GLint, GLint, GLsizei, GLenum, GLenum, const GLvoid *);
+ void (GLAPIENTRY * TexSubImage2D)(GLenum, GLint, GLint, GLint, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *);
+ void (GLAPIENTRY * PopClientAttrib)(void);
+ void (GLAPIENTRY * PushClientAttrib)(GLbitfield);
+};
+
+typedef struct __GLdispatchTableRec GLDISPATCHTABLE;
+
+typedef struct _GLCLTPROCTABLE
+{
+ int cEntries;
+ GLDISPATCHTABLE glDispatchTable;
+} GLCLTPROCTABLE, * PGLCLTPROCTABLE;
+
+typedef VOID (APIENTRY * PFN_SETPROCTABLE)(PGLCLTPROCTABLE);
+
+BOOL APIENTRY
+DrvCopyContext(
+ DHGLRC dhrcSource,
+ DHGLRC dhrcDest,
+ UINT fuMask );
+
+DHGLRC APIENTRY
+DrvCreateLayerContext(
+ HDC hdc,
+ INT iLayerPlane );
+
+DHGLRC APIENTRY
+DrvCreateContext(
+ HDC hdc );
+
+BOOL APIENTRY
+DrvDeleteContext(
+ DHGLRC dhglrc );
+
+BOOL APIENTRY
+DrvDescribeLayerPlane(
+ HDC hdc,
+ INT iPixelFormat,
+ INT iLayerPlane,
+ UINT nBytes,
+ LPLAYERPLANEDESCRIPTOR plpd );
+
+LONG APIENTRY
+DrvDescribePixelFormat(
+ HDC hdc,
+ INT iPixelFormat,
+ ULONG cjpfd,
+ PIXELFORMATDESCRIPTOR *ppfd );
+
+int APIENTRY
+DrvGetLayerPaletteEntries(
+ HDC hdc,
+ INT iLayerPlane,
+ INT iStart,
+ INT cEntries,
+ COLORREF *pcr );
+
+PROC APIENTRY
+DrvGetProcAddress(
+ LPCSTR lpszProc );
+
+BOOL APIENTRY
+DrvRealizeLayerPalette(
+ HDC hdc,
+ INT iLayerPlane,
+ BOOL bRealize );
+
+BOOL APIENTRY
+DrvReleaseContext(
+ DHGLRC dhglrc );
+
+void APIENTRY
+DrvSetCallbackProcs(
+ INT nProcs,
+ PROC *pProcs );
+
+PGLCLTPROCTABLE APIENTRY
+DrvSetContext(
+ HDC hdc,
+ DHGLRC dhglrc,
+ PFN_SETPROCTABLE pfnSetProcTable );
+
+int APIENTRY
+DrvSetLayerPaletteEntries(
+ HDC hdc,
+ INT iLayerPlane,
+ INT iStart,
+ INT cEntries,
+ CONST COLORREF *pcr );
+
+BOOL APIENTRY
+DrvSetPixelFormat(
+ HDC hdc,
+ LONG iPixelFormat );
+
+BOOL APIENTRY
+DrvShareLists(
+ DHGLRC dhglrc1,
+ DHGLRC dhglrc2 );
+
+BOOL APIENTRY
+DrvSwapBuffers(
+ HDC hdc );
+
+BOOL APIENTRY
+DrvSwapLayerBuffers(
+ HDC hdc,
+ UINT fuPlanes );
+
+BOOL APIENTRY
+DrvValidateVersion(
+ ULONG ulVersion );
+
+#endif /* DRV_H */
diff --git a/src/gallium/state_trackers/wgl/opengl32.def b/src/gallium/state_trackers/wgl/opengl32.def
new file mode 100644
index 0000000000..596417ed84
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/opengl32.def
@@ -0,0 +1,388 @@
+EXPORTS
+; GlmfBeginGlsBlock
+; GlmfCloseMetaFile
+; GlmfEndGlsBlock
+; GlmfEndPlayback
+; GlmfInitPlayback
+; GlmfPlayGlsRecord
+ glAccum
+ glAlphaFunc
+ glAreTexturesResident
+ glArrayElement
+ glBegin
+ glBindTexture
+ glBitmap
+ glBlendFunc
+ glCallList
+ glCallLists
+ glClear
+ glClearAccum
+ glClearColor
+ glClearDepth
+ glClearIndex
+ glClearStencil
+ glClipPlane
+ glColor3b
+ glColor3bv
+ glColor3d
+ glColor3dv
+ glColor3f
+ glColor3fv
+ glColor3i
+ glColor3iv
+ glColor3s
+ glColor3sv
+ glColor3ub
+ glColor3ubv
+ glColor3ui
+ glColor3uiv
+ glColor3us
+ glColor3usv
+ glColor4b
+ glColor4bv
+ glColor4d
+ glColor4dv
+ glColor4f
+ glColor4fv
+ glColor4i
+ glColor4iv
+ glColor4s
+ glColor4sv
+ glColor4ub
+ glColor4ubv
+ glColor4ui
+ glColor4uiv
+ glColor4us
+ glColor4usv
+ glColorMask
+ glColorMaterial
+ glColorPointer
+ glCopyPixels
+ glCopyTexImage1D
+ glCopyTexImage2D
+ glCopyTexSubImage1D
+ glCopyTexSubImage2D
+ glCullFace
+; glDebugEntry
+ glDeleteLists
+ glDeleteTextures
+ glDepthFunc
+ glDepthMask
+ glDepthRange
+ glDisable
+ glDisableClientState
+ glDrawArrays
+ glDrawBuffer
+ glDrawElements
+ glDrawPixels
+ glEdgeFlag
+ glEdgeFlagPointer
+ glEdgeFlagv
+ glEnable
+ glEnableClientState
+ glEnd
+ glEndList
+ glEvalCoord1d
+ glEvalCoord1dv
+ glEvalCoord1f
+ glEvalCoord1fv
+ glEvalCoord2d
+ glEvalCoord2dv
+ glEvalCoord2f
+ glEvalCoord2fv
+ glEvalMesh1
+ glEvalMesh2
+ glEvalPoint1
+ glEvalPoint2
+ glFeedbackBuffer
+ glFinish
+ glFlush
+ glFogf
+ glFogfv
+ glFogi
+ glFogiv
+ glFrontFace
+ glFrustum
+ glGenLists
+ glGenTextures
+ glGetBooleanv
+ glGetClipPlane
+ glGetDoublev
+ glGetError
+ glGetFloatv
+ glGetIntegerv
+ glGetLightfv
+ glGetLightiv
+ glGetMapdv
+ glGetMapfv
+ glGetMapiv
+ glGetMaterialfv
+ glGetMaterialiv
+ glGetPixelMapfv
+ glGetPixelMapuiv
+ glGetPixelMapusv
+ glGetPointerv
+ glGetPolygonStipple
+ glGetString
+ glGetTexEnvfv
+ glGetTexEnviv
+ glGetTexGendv
+ glGetTexGenfv
+ glGetTexGeniv
+ glGetTexImage
+ glGetTexLevelParameterfv
+ glGetTexLevelParameteriv
+ glGetTexParameterfv
+ glGetTexParameteriv
+ glHint
+ glIndexMask
+ glIndexPointer
+ glIndexd
+ glIndexdv
+ glIndexf
+ glIndexfv
+ glIndexi
+ glIndexiv
+ glIndexs
+ glIndexsv
+ glIndexub
+ glIndexubv
+ glInitNames
+ glInterleavedArrays
+ glIsEnabled
+ glIsList
+ glIsTexture
+ glLightModelf
+ glLightModelfv
+ glLightModeli
+ glLightModeliv
+ glLightf
+ glLightfv
+ glLighti
+ glLightiv
+ glLineStipple
+ glLineWidth
+ glListBase
+ glLoadIdentity
+ glLoadMatrixd
+ glLoadMatrixf
+ glLoadName
+ glLogicOp
+ glMap1d
+ glMap1f
+ glMap2d
+ glMap2f
+ glMapGrid1d
+ glMapGrid1f
+ glMapGrid2d
+ glMapGrid2f
+ glMaterialf
+ glMaterialfv
+ glMateriali
+ glMaterialiv
+ glMatrixMode
+ glMultMatrixd
+ glMultMatrixf
+ glNewList
+ glNormal3b
+ glNormal3bv
+ glNormal3d
+ glNormal3dv
+ glNormal3f
+ glNormal3fv
+ glNormal3i
+ glNormal3iv
+ glNormal3s
+ glNormal3sv
+ glNormalPointer
+ glOrtho
+ glPassThrough
+ glPixelMapfv
+ glPixelMapuiv
+ glPixelMapusv
+ glPixelStoref
+ glPixelStorei
+ glPixelTransferf
+ glPixelTransferi
+ glPixelZoom
+ glPointSize
+ glPolygonMode
+ glPolygonOffset
+ glPolygonStipple
+ glPopAttrib
+ glPopClientAttrib
+ glPopMatrix
+ glPopName
+ glPrioritizeTextures
+ glPushAttrib
+ glPushClientAttrib
+ glPushMatrix
+ glPushName
+ glRasterPos2d
+ glRasterPos2dv
+ glRasterPos2f
+ glRasterPos2fv
+ glRasterPos2i
+ glRasterPos2iv
+ glRasterPos2s
+ glRasterPos2sv
+ glRasterPos3d
+ glRasterPos3dv
+ glRasterPos3f
+ glRasterPos3fv
+ glRasterPos3i
+ glRasterPos3iv
+ glRasterPos3s
+ glRasterPos3sv
+ glRasterPos4d
+ glRasterPos4dv
+ glRasterPos4f
+ glRasterPos4fv
+ glRasterPos4i
+ glRasterPos4iv
+ glRasterPos4s
+ glRasterPos4sv
+ glReadBuffer
+ glReadPixels
+ glRectd
+ glRectdv
+ glRectf
+ glRectfv
+ glRecti
+ glRectiv
+ glRects
+ glRectsv
+ glRenderMode
+ glRotated
+ glRotatef
+ glScaled
+ glScalef
+ glScissor
+ glSelectBuffer
+ glShadeModel
+ glStencilFunc
+ glStencilMask
+ glStencilOp
+ glTexCoord1d
+ glTexCoord1dv
+ glTexCoord1f
+ glTexCoord1fv
+ glTexCoord1i
+ glTexCoord1iv
+ glTexCoord1s
+ glTexCoord1sv
+ glTexCoord2d
+ glTexCoord2dv
+ glTexCoord2f
+ glTexCoord2fv
+ glTexCoord2i
+ glTexCoord2iv
+ glTexCoord2s
+ glTexCoord2sv
+ glTexCoord3d
+ glTexCoord3dv
+ glTexCoord3f
+ glTexCoord3fv
+ glTexCoord3i
+ glTexCoord3iv
+ glTexCoord3s
+ glTexCoord3sv
+ glTexCoord4d
+ glTexCoord4dv
+ glTexCoord4f
+ glTexCoord4fv
+ glTexCoord4i
+ glTexCoord4iv
+ glTexCoord4s
+ glTexCoord4sv
+ glTexCoordPointer
+ glTexEnvf
+ glTexEnvfv
+ glTexEnvi
+ glTexEnviv
+ glTexGend
+ glTexGendv
+ glTexGenf
+ glTexGenfv
+ glTexGeni
+ glTexGeniv
+ glTexImage1D
+ glTexImage2D
+ glTexParameterf
+ glTexParameterfv
+ glTexParameteri
+ glTexParameteriv
+ glTexSubImage1D
+ glTexSubImage2D
+ glTranslated
+ glTranslatef
+ glVertex2d
+ glVertex2dv
+ glVertex2f
+ glVertex2fv
+ glVertex2i
+ glVertex2iv
+ glVertex2s
+ glVertex2sv
+ glVertex3d
+ glVertex3dv
+ glVertex3f
+ glVertex3fv
+ glVertex3i
+ glVertex3iv
+ glVertex3s
+ glVertex3sv
+ glVertex4d
+ glVertex4dv
+ glVertex4f
+ glVertex4fv
+ glVertex4i
+ glVertex4iv
+ glVertex4s
+ glVertex4sv
+ glVertexPointer
+ glViewport
+ wglChoosePixelFormat
+ wglCopyContext
+ wglCreateContext
+ wglCreateLayerContext
+ wglDeleteContext
+ wglDescribeLayerPlane
+ wglDescribePixelFormat
+ wglGetCurrentContext
+ wglGetCurrentDC
+; wglGetDefaultProcAddress
+ wglGetLayerPaletteEntries
+ wglGetPixelFormat
+ wglGetProcAddress
+ wglMakeCurrent
+ wglRealizeLayerPalette
+ wglSetLayerPaletteEntries
+ wglSetPixelFormat
+ wglShareLists
+ wglSwapBuffers
+ wglSwapLayerBuffers
+; wglSwapMultipleBuffers
+ wglUseFontBitmapsA
+ wglUseFontBitmapsW
+ wglUseFontOutlinesA
+ wglUseFontOutlinesW
+ wglGetExtensionsStringARB
+ DrvCopyContext
+ DrvCreateContext
+ DrvCreateLayerContext
+ DrvDeleteContext
+ DrvDescribeLayerPlane
+ DrvDescribePixelFormat
+ DrvGetLayerPaletteEntries
+ DrvGetProcAddress
+ DrvRealizeLayerPalette
+ DrvReleaseContext
+ DrvSetCallbackProcs
+ DrvSetContext
+ DrvSetLayerPaletteEntries
+ DrvSetPixelFormat
+ DrvShareLists
+ DrvSwapBuffers
+ DrvSwapLayerBuffers
+ DrvValidateVersion
diff --git a/src/gallium/state_trackers/wgl/opengl32.mingw.def b/src/gallium/state_trackers/wgl/opengl32.mingw.def
new file mode 100644
index 0000000000..1f03ea3b37
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/opengl32.mingw.def
@@ -0,0 +1,387 @@
+EXPORTS
+; GlmfBeginGlsBlock = GlmfBeginGlsBlock@4
+; GlmfCloseMetaFile = GlmfCloseMetaFile@4
+; GlmfEndGlsBlock = GlmfEndGlsBlock@4
+; GlmfEndPlayback = GlmfEndPlayback@4
+; GlmfInitPlayback = GlmfInitPlayback@12
+; GlmfPlayGlsRecord = GlmfPlayGlsRecord@16
+ glAccum = glAccum@8
+ glAlphaFunc = glAlphaFunc@8
+ glAreTexturesResident = glAreTexturesResident@12
+ glArrayElement = glArrayElement@4
+ glBegin = glBegin@4
+ glBindTexture = glBindTexture@8
+ glBitmap = glBitmap@28
+ glBlendFunc = glBlendFunc@8
+ glCallList = glCallList@4
+ glCallLists = glCallLists@12
+ glClear = glClear@4
+ glClearAccum = glClearAccum@16
+ glClearColor = glClearColor@16
+ glClearDepth = glClearDepth@8
+ glClearIndex = glClearIndex@4
+ glClearStencil = glClearStencil@4
+ glClipPlane = glClipPlane@8
+ glColor3b = glColor3b@12
+ glColor3bv = glColor3bv@4
+ glColor3d = glColor3d@24
+ glColor3dv = glColor3dv@4
+ glColor3f = glColor3f@12
+ glColor3fv = glColor3fv@4
+ glColor3i = glColor3i@12
+ glColor3iv = glColor3iv@4
+ glColor3s = glColor3s@12
+ glColor3sv = glColor3sv@4
+ glColor3ub = glColor3ub@12
+ glColor3ubv = glColor3ubv@4
+ glColor3ui = glColor3ui@12
+ glColor3uiv = glColor3uiv@4
+ glColor3us = glColor3us@12
+ glColor3usv = glColor3usv@4
+ glColor4b = glColor4b@16
+ glColor4bv = glColor4bv@4
+ glColor4d = glColor4d@32
+ glColor4dv = glColor4dv@4
+ glColor4f = glColor4f@16
+ glColor4fv = glColor4fv@4
+ glColor4i = glColor4i@16
+ glColor4iv = glColor4iv@4
+ glColor4s = glColor4s@16
+ glColor4sv = glColor4sv@4
+ glColor4ub = glColor4ub@16
+ glColor4ubv = glColor4ubv@4
+ glColor4ui = glColor4ui@16
+ glColor4uiv = glColor4uiv@4
+ glColor4us = glColor4us@16
+ glColor4usv = glColor4usv@4
+ glColorMask = glColorMask@16
+ glColorMaterial = glColorMaterial@8
+ glColorPointer = glColorPointer@16
+ glCopyPixels = glCopyPixels@20
+ glCopyTexImage1D = glCopyTexImage1D@28
+ glCopyTexImage2D = glCopyTexImage2D@32
+ glCopyTexSubImage1D = glCopyTexSubImage1D@24
+ glCopyTexSubImage2D = glCopyTexSubImage2D@32
+ glCullFace = glCullFace@4
+; glDebugEntry = glDebugEntry@8
+ glDeleteLists = glDeleteLists@8
+ glDeleteTextures = glDeleteTextures@8
+ glDepthFunc = glDepthFunc@4
+ glDepthMask = glDepthMask@4
+ glDepthRange = glDepthRange@16
+ glDisable = glDisable@4
+ glDisableClientState = glDisableClientState@4
+ glDrawArrays = glDrawArrays@12
+ glDrawBuffer = glDrawBuffer@4
+ glDrawElements = glDrawElements@16
+ glDrawPixels = glDrawPixels@20
+ glEdgeFlag = glEdgeFlag@4
+ glEdgeFlagPointer = glEdgeFlagPointer@8
+ glEdgeFlagv = glEdgeFlagv@4
+ glEnable = glEnable@4
+ glEnableClientState = glEnableClientState@4
+ glEnd = glEnd@0
+ glEndList = glEndList@0
+ glEvalCoord1d = glEvalCoord1d@8
+ glEvalCoord1dv = glEvalCoord1dv@4
+ glEvalCoord1f = glEvalCoord1f@4
+ glEvalCoord1fv = glEvalCoord1fv@4
+ glEvalCoord2d = glEvalCoord2d@16
+ glEvalCoord2dv = glEvalCoord2dv@4
+ glEvalCoord2f = glEvalCoord2f@8
+ glEvalCoord2fv = glEvalCoord2fv@4
+ glEvalMesh1 = glEvalMesh1@12
+ glEvalMesh2 = glEvalMesh2@20
+ glEvalPoint1 = glEvalPoint1@4
+ glEvalPoint2 = glEvalPoint2@8
+ glFeedbackBuffer = glFeedbackBuffer@12
+ glFinish = glFinish@0
+ glFlush = glFlush@0
+ glFogf = glFogf@8
+ glFogfv = glFogfv@8
+ glFogi = glFogi@8
+ glFogiv = glFogiv@8
+ glFrontFace = glFrontFace@4
+ glFrustum = glFrustum@48
+ glGenLists = glGenLists@4
+ glGenTextures = glGenTextures@8
+ glGetBooleanv = glGetBooleanv@8
+ glGetClipPlane = glGetClipPlane@8
+ glGetDoublev = glGetDoublev@8
+ glGetError = glGetError@0
+ glGetFloatv = glGetFloatv@8
+ glGetIntegerv = glGetIntegerv@8
+ glGetLightfv = glGetLightfv@12
+ glGetLightiv = glGetLightiv@12
+ glGetMapdv = glGetMapdv@12
+ glGetMapfv = glGetMapfv@12
+ glGetMapiv = glGetMapiv@12
+ glGetMaterialfv = glGetMaterialfv@12
+ glGetMaterialiv = glGetMaterialiv@12
+ glGetPixelMapfv = glGetPixelMapfv@8
+ glGetPixelMapuiv = glGetPixelMapuiv@8
+ glGetPixelMapusv = glGetPixelMapusv@8
+ glGetPointerv = glGetPointerv@8
+ glGetPolygonStipple = glGetPolygonStipple@4
+ glGetString = glGetString@4
+ glGetTexEnvfv = glGetTexEnvfv@12
+ glGetTexEnviv = glGetTexEnviv@12
+ glGetTexGendv = glGetTexGendv@12
+ glGetTexGenfv = glGetTexGenfv@12
+ glGetTexGeniv = glGetTexGeniv@12
+ glGetTexImage = glGetTexImage@20
+ glGetTexLevelParameterfv = glGetTexLevelParameterfv@16
+ glGetTexLevelParameteriv = glGetTexLevelParameteriv@16
+ glGetTexParameterfv = glGetTexParameterfv@12
+ glGetTexParameteriv = glGetTexParameteriv@12
+ glHint = glHint@8
+ glIndexMask = glIndexMask@4
+ glIndexPointer = glIndexPointer@12
+ glIndexd = glIndexd@8
+ glIndexdv = glIndexdv@4
+ glIndexf = glIndexf@4
+ glIndexfv = glIndexfv@4
+ glIndexi = glIndexi@4
+ glIndexiv = glIndexiv@4
+ glIndexs = glIndexs@4
+ glIndexsv = glIndexsv@4
+ glIndexub = glIndexub@4
+ glIndexubv = glIndexubv@4
+ glInitNames = glInitNames@0
+ glInterleavedArrays = glInterleavedArrays@12
+ glIsEnabled = glIsEnabled@4
+ glIsList = glIsList@4
+ glIsTexture = glIsTexture@4
+ glLightModelf = glLightModelf@8
+ glLightModelfv = glLightModelfv@8
+ glLightModeli = glLightModeli@8
+ glLightModeliv = glLightModeliv@8
+ glLightf = glLightf@12
+ glLightfv = glLightfv@12
+ glLighti = glLighti@12
+ glLightiv = glLightiv@12
+ glLineStipple = glLineStipple@8
+ glLineWidth = glLineWidth@4
+ glListBase = glListBase@4
+ glLoadIdentity = glLoadIdentity@0
+ glLoadMatrixd = glLoadMatrixd@4
+ glLoadMatrixf = glLoadMatrixf@4
+ glLoadName = glLoadName@4
+ glLogicOp = glLogicOp@4
+ glMap1d = glMap1d@32
+ glMap1f = glMap1f@24
+ glMap2d = glMap2d@56
+ glMap2f = glMap2f@40
+ glMapGrid1d = glMapGrid1d@20
+ glMapGrid1f = glMapGrid1f@12
+ glMapGrid2d = glMapGrid2d@40
+ glMapGrid2f = glMapGrid2f@24
+ glMaterialf = glMaterialf@12
+ glMaterialfv = glMaterialfv@12
+ glMateriali = glMateriali@12
+ glMaterialiv = glMaterialiv@12
+ glMatrixMode = glMatrixMode@4
+ glMultMatrixd = glMultMatrixd@4
+ glMultMatrixf = glMultMatrixf@4
+ glNewList = glNewList@8
+ glNormal3b = glNormal3b@12
+ glNormal3bv = glNormal3bv@4
+ glNormal3d = glNormal3d@24
+ glNormal3dv = glNormal3dv@4
+ glNormal3f = glNormal3f@12
+ glNormal3fv = glNormal3fv@4
+ glNormal3i = glNormal3i@12
+ glNormal3iv = glNormal3iv@4
+ glNormal3s = glNormal3s@12
+ glNormal3sv = glNormal3sv@4
+ glNormalPointer = glNormalPointer@12
+ glOrtho = glOrtho@48
+ glPassThrough = glPassThrough@4
+ glPixelMapfv = glPixelMapfv@12
+ glPixelMapuiv = glPixelMapuiv@12
+ glPixelMapusv = glPixelMapusv@12
+ glPixelStoref = glPixelStoref@8
+ glPixelStorei = glPixelStorei@8
+ glPixelTransferf = glPixelTransferf@8
+ glPixelTransferi = glPixelTransferi@8
+ glPixelZoom = glPixelZoom@8
+ glPointSize = glPointSize@4
+ glPolygonMode = glPolygonMode@8
+ glPolygonOffset = glPolygonOffset@8
+ glPolygonStipple = glPolygonStipple@4
+ glPopAttrib = glPopAttrib@0
+ glPopClientAttrib = glPopClientAttrib@0
+ glPopMatrix = glPopMatrix@0
+ glPopName = glPopName@0
+ glPrioritizeTextures = glPrioritizeTextures@12
+ glPushAttrib = glPushAttrib@4
+ glPushClientAttrib = glPushClientAttrib@4
+ glPushMatrix = glPushMatrix@0
+ glPushName = glPushName@4
+ glRasterPos2d = glRasterPos2d@16
+ glRasterPos2dv = glRasterPos2dv@4
+ glRasterPos2f = glRasterPos2f@8
+ glRasterPos2fv = glRasterPos2fv@4
+ glRasterPos2i = glRasterPos2i@8
+ glRasterPos2iv = glRasterPos2iv@4
+ glRasterPos2s = glRasterPos2s@8
+ glRasterPos2sv = glRasterPos2sv@4
+ glRasterPos3d = glRasterPos3d@24
+ glRasterPos3dv = glRasterPos3dv@4
+ glRasterPos3f = glRasterPos3f@12
+ glRasterPos3fv = glRasterPos3fv@4
+ glRasterPos3i = glRasterPos3i@12
+ glRasterPos3iv = glRasterPos3iv@4
+ glRasterPos3s = glRasterPos3s@12
+ glRasterPos3sv = glRasterPos3sv@4
+ glRasterPos4d = glRasterPos4d@32
+ glRasterPos4dv = glRasterPos4dv@4
+ glRasterPos4f = glRasterPos4f@16
+ glRasterPos4fv = glRasterPos4fv@4
+ glRasterPos4i = glRasterPos4i@16
+ glRasterPos4iv = glRasterPos4iv@4
+ glRasterPos4s = glRasterPos4s@16
+ glRasterPos4sv = glRasterPos4sv@4
+ glReadBuffer = glReadBuffer@4
+ glReadPixels = glReadPixels@28
+ glRectd = glRectd@32
+ glRectdv = glRectdv@8
+ glRectf = glRectf@16
+ glRectfv = glRectfv@8
+ glRecti = glRecti@16
+ glRectiv = glRectiv@8
+ glRects = glRects@16
+ glRectsv = glRectsv@8
+ glRenderMode = glRenderMode@4
+ glRotated = glRotated@32
+ glRotatef = glRotatef@16
+ glScaled = glScaled@24
+ glScalef = glScalef@12
+ glScissor = glScissor@16
+ glSelectBuffer = glSelectBuffer@8
+ glShadeModel = glShadeModel@4
+ glStencilFunc = glStencilFunc@12
+ glStencilMask = glStencilMask@4
+ glStencilOp = glStencilOp@12
+ glTexCoord1d = glTexCoord1d@8
+ glTexCoord1dv = glTexCoord1dv@4
+ glTexCoord1f = glTexCoord1f@4
+ glTexCoord1fv = glTexCoord1fv@4
+ glTexCoord1i = glTexCoord1i@4
+ glTexCoord1iv = glTexCoord1iv@4
+ glTexCoord1s = glTexCoord1s@4
+ glTexCoord1sv = glTexCoord1sv@4
+ glTexCoord2d = glTexCoord2d@16
+ glTexCoord2dv = glTexCoord2dv@4
+ glTexCoord2f = glTexCoord2f@8
+ glTexCoord2fv = glTexCoord2fv@4
+ glTexCoord2i = glTexCoord2i@8
+ glTexCoord2iv = glTexCoord2iv@4
+ glTexCoord2s = glTexCoord2s@8
+ glTexCoord2sv = glTexCoord2sv@4
+ glTexCoord3d = glTexCoord3d@24
+ glTexCoord3dv = glTexCoord3dv@4
+ glTexCoord3f = glTexCoord3f@12
+ glTexCoord3fv = glTexCoord3fv@4
+ glTexCoord3i = glTexCoord3i@12
+ glTexCoord3iv = glTexCoord3iv@4
+ glTexCoord3s = glTexCoord3s@12
+ glTexCoord3sv = glTexCoord3sv@4
+ glTexCoord4d = glTexCoord4d@32
+ glTexCoord4dv = glTexCoord4dv@4
+ glTexCoord4f = glTexCoord4f@16
+ glTexCoord4fv = glTexCoord4fv@4
+ glTexCoord4i = glTexCoord4i@16
+ glTexCoord4iv = glTexCoord4iv@4
+ glTexCoord4s = glTexCoord4s@16
+ glTexCoord4sv = glTexCoord4sv@4
+ glTexCoordPointer = glTexCoordPointer@16
+ glTexEnvf = glTexEnvf@12
+ glTexEnvfv = glTexEnvfv@12
+ glTexEnvi = glTexEnvi@12
+ glTexEnviv = glTexEnviv@12
+ glTexGend = glTexGend@16
+ glTexGendv = glTexGendv@12
+ glTexGenf = glTexGenf@12
+ glTexGenfv = glTexGenfv@12
+ glTexGeni = glTexGeni@12
+ glTexGeniv = glTexGeniv@12
+ glTexImage1D = glTexImage1D@32
+ glTexImage2D = glTexImage2D@36
+ glTexParameterf = glTexParameterf@12
+ glTexParameterfv = glTexParameterfv@12
+ glTexParameteri = glTexParameteri@12
+ glTexParameteriv = glTexParameteriv@12
+ glTexSubImage1D = glTexSubImage1D@28
+ glTexSubImage2D = glTexSubImage2D@36
+ glTranslated = glTranslated@24
+ glTranslatef = glTranslatef@12
+ glVertex2d = glVertex2d@16
+ glVertex2dv = glVertex2dv@4
+ glVertex2f = glVertex2f@8
+ glVertex2fv = glVertex2fv@4
+ glVertex2i = glVertex2i@8
+ glVertex2iv = glVertex2iv@4
+ glVertex2s = glVertex2s@8
+ glVertex2sv = glVertex2sv@4
+ glVertex3d = glVertex3d@24
+ glVertex3dv = glVertex3dv@4
+ glVertex3f = glVertex3f@12
+ glVertex3fv = glVertex3fv@4
+ glVertex3i = glVertex3i@12
+ glVertex3iv = glVertex3iv@4
+ glVertex3s = glVertex3s@12
+ glVertex3sv = glVertex3sv@4
+ glVertex4d = glVertex4d@32
+ glVertex4dv = glVertex4dv@4
+ glVertex4f = glVertex4f@16
+ glVertex4fv = glVertex4fv@4
+ glVertex4i = glVertex4i@16
+ glVertex4iv = glVertex4iv@4
+ glVertex4s = glVertex4s@16
+ glVertex4sv = glVertex4sv@4
+ glVertexPointer = glVertexPointer@16
+ glViewport = glViewport@16
+ wglChoosePixelFormat = wglChoosePixelFormat@8
+ wglCopyContext = wglCopyContext@12
+ wglCreateContext = wglCreateContext@4
+ wglCreateLayerContext = wglCreateLayerContext@8
+ wglDeleteContext = wglDeleteContext@4
+ wglDescribeLayerPlane = wglDescribeLayerPlane@20
+ wglDescribePixelFormat = wglDescribePixelFormat@16
+ wglGetCurrentContext = wglGetCurrentContext@0
+ wglGetCurrentDC = wglGetCurrentDC@0
+; wglGetDefaultProcAddress = wglGetDefaultProcAddress@4
+ wglGetLayerPaletteEntries = wglGetLayerPaletteEntries@20
+ wglGetPixelFormat = wglGetPixelFormat@4
+ wglGetProcAddress = wglGetProcAddress@4
+ wglMakeCurrent = wglMakeCurrent@8
+ wglRealizeLayerPalette = wglRealizeLayerPalette@12
+ wglSetLayerPaletteEntries = wglSetLayerPaletteEntries@20
+ wglSetPixelFormat = wglSetPixelFormat@12
+ wglShareLists = wglShareLists@8
+ wglSwapBuffers = wglSwapBuffers@4
+ wglSwapLayerBuffers = wglSwapLayerBuffers@8
+; wglSwapMultipleBuffers = wglSwapMultipleBuffers@8
+ wglUseFontBitmapsA = wglUseFontBitmapsA@16
+ wglUseFontBitmapsW = wglUseFontBitmapsW@16
+ wglUseFontOutlinesA = wglUseFontOutlinesA@32
+ wglUseFontOutlinesW = wglUseFontOutlinesW@32
+ DrvCopyContext = DrvCopyContext@12
+ DrvCreateContext = DrvCreateContext@4
+ DrvCreateLayerContext = DrvCreateLayerContext@8
+ DrvDeleteContext = DrvDeleteContext@4
+ DrvDescribeLayerPlane = DrvDescribeLayerPlane@20
+ DrvDescribePixelFormat = DrvDescribePixelFormat@16
+ DrvGetLayerPaletteEntries = DrvGetLayerPaletteEntries@20
+ DrvGetProcAddress = DrvGetProcAddress@4
+ DrvRealizeLayerPalette = DrvRealizeLayerPalette@12
+ DrvReleaseContext = DrvReleaseContext@4
+ DrvSetCallbackProcs = DrvSetCallbackProcs@8
+ DrvSetContext = DrvSetContext@12
+ DrvSetLayerPaletteEntries = DrvSetLayerPaletteEntries@20
+ DrvSetPixelFormat = DrvSetPixelFormat@8
+ DrvShareLists = DrvShareLists@8
+ DrvSwapBuffers = DrvSwapBuffers@4
+ DrvSwapLayerBuffers = DrvSwapLayerBuffers@8
+ DrvValidateVersion = DrvValidateVersion@4
diff --git a/src/gallium/winsys/drm/intel/egl/intel_device.h b/src/gallium/state_trackers/wgl/shared/stw_arbextensionsstring.c
index 323a7c2aef..b3934cb464 100644
--- a/src/gallium/winsys/drm/intel/egl/intel_device.h
+++ b/src/gallium/state_trackers/wgl/shared/stw_arbextensionsstring.c
@@ -1,8 +1,8 @@
/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
- *
+ *
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,29 +22,21 @@
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
+ *
**************************************************************************/
-#ifndef _INTEL_SCREEN_H_
-#define _INTEL_SCREEN_H_
+#include <windows.h>
-#include "intel_be_device.h"
+#include "stw_arbextensionsstring.h"
-#include "pipe/p_compiler.h"
-
-struct pipe_screen;
-struct egl_drm_device;
-struct intel_context;
-
-struct intel_device
+WINGDIAPI const char * APIENTRY
+wglGetExtensionsStringARB(
+ HDC hdc )
{
- struct intel_be_device base;
- struct pipe_screen *pipe;
-
- int deviceID;
- struct egl_drm_device *device;
-
- struct intel_context *dummy;
-};
+ (void) hdc;
-#endif
+ return
+ "WGL_ARB_extensions_string "
+ "WGL_ARB_multisample "
+ "WGL_ARB_pixel_format";
+}
diff --git a/src/gallium/state_trackers/wgl/shared/stw_arbextensionsstring.h b/src/gallium/state_trackers/wgl/shared/stw_arbextensionsstring.h
new file mode 100644
index 0000000000..a0e4c5d98e
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/shared/stw_arbextensionsstring.h
@@ -0,0 +1,35 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef WGL_ARBEXTENSIONSSTRING_H
+#define WGL_ARBEXTENSIONSSTRING_H
+
+WINGDIAPI const char * APIENTRY
+wglGetExtensionsStringARB(
+ HDC hdc );
+
+#endif /* WGL_ARBEXTENSIONSSTRING_H */
diff --git a/src/gallium/state_trackers/wgl/shared/stw_arbpixelformat.c b/src/gallium/state_trackers/wgl/shared/stw_arbpixelformat.c
new file mode 100644
index 0000000000..f563635420
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/shared/stw_arbpixelformat.c
@@ -0,0 +1,519 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <windows.h>
+
+#include "pipe/p_compiler.h"
+#include "util/u_memory.h"
+#include "stw_public.h"
+#include "stw_pixelformat.h"
+#include "stw_arbpixelformat.h"
+
+#define WGL_NUMBER_PIXEL_FORMATS_ARB 0x2000
+#define WGL_DRAW_TO_WINDOW_ARB 0x2001
+#define WGL_DRAW_TO_BITMAP_ARB 0x2002
+#define WGL_ACCELERATION_ARB 0x2003
+#define WGL_NEED_PALETTE_ARB 0x2004
+#define WGL_NEED_SYSTEM_PALETTE_ARB 0x2005
+#define WGL_SWAP_LAYER_BUFFERS_ARB 0x2006
+#define WGL_SWAP_METHOD_ARB 0x2007
+#define WGL_NUMBER_OVERLAYS_ARB 0x2008
+#define WGL_NUMBER_UNDERLAYS_ARB 0x2009
+#define WGL_TRANSPARENT_ARB 0x200A
+#define WGL_TRANSPARENT_RED_VALUE_ARB 0x2037
+#define WGL_TRANSPARENT_GREEN_VALUE_ARB 0x2038
+#define WGL_TRANSPARENT_BLUE_VALUE_ARB 0x2039
+#define WGL_TRANSPARENT_ALPHA_VALUE_ARB 0x203A
+#define WGL_TRANSPARENT_INDEX_VALUE_ARB 0x203B
+#define WGL_SHARE_DEPTH_ARB 0x200C
+#define WGL_SHARE_STENCIL_ARB 0x200D
+#define WGL_SHARE_ACCUM_ARB 0x200E
+#define WGL_SUPPORT_GDI_ARB 0x200F
+#define WGL_SUPPORT_OPENGL_ARB 0x2010
+#define WGL_DOUBLE_BUFFER_ARB 0x2011
+#define WGL_STEREO_ARB 0x2012
+#define WGL_PIXEL_TYPE_ARB 0x2013
+#define WGL_COLOR_BITS_ARB 0x2014
+#define WGL_RED_BITS_ARB 0x2015
+#define WGL_RED_SHIFT_ARB 0x2016
+#define WGL_GREEN_BITS_ARB 0x2017
+#define WGL_GREEN_SHIFT_ARB 0x2018
+#define WGL_BLUE_BITS_ARB 0x2019
+#define WGL_BLUE_SHIFT_ARB 0x201A
+#define WGL_ALPHA_BITS_ARB 0x201B
+#define WGL_ALPHA_SHIFT_ARB 0x201C
+#define WGL_ACCUM_BITS_ARB 0x201D
+#define WGL_ACCUM_RED_BITS_ARB 0x201E
+#define WGL_ACCUM_GREEN_BITS_ARB 0x201F
+#define WGL_ACCUM_BLUE_BITS_ARB 0x2020
+#define WGL_ACCUM_ALPHA_BITS_ARB 0x2021
+#define WGL_DEPTH_BITS_ARB 0x2022
+#define WGL_STENCIL_BITS_ARB 0x2023
+#define WGL_AUX_BUFFERS_ARB 0x2024
+
+#define WGL_NO_ACCELERATION_ARB 0x2025
+#define WGL_GENERIC_ACCELERATION_ARB 0x2026
+#define WGL_FULL_ACCELERATION_ARB 0x2027
+
+#define WGL_SWAP_EXCHANGE_ARB 0x2028
+#define WGL_SWAP_COPY_ARB 0x2029
+#define WGL_SWAP_UNDEFINED_ARB 0x202A
+
+#define WGL_TYPE_RGBA_ARB 0x202B
+#define WGL_TYPE_COLORINDEX_ARB 0x202C
+
+/* From arb_multisample:
+ */
+#define WGL_SAMPLE_BUFFERS_ARB 0x2041
+#define WGL_SAMPLES_ARB 0x2042
+
+
+static boolean
+query_attrib(
+ int iPixelFormat,
+ int iLayerPlane,
+ int attrib,
+ int *pvalue )
+{
+ uint count;
+ uint index;
+ const struct pixelformat_info *pf;
+
+ count = pixelformat_get_extended_count();
+
+ if (attrib == WGL_NUMBER_PIXEL_FORMATS_ARB) {
+ *pvalue = (int) count;
+ return TRUE;
+ }
+
+ index = (uint) iPixelFormat - 1;
+ if (index >= count)
+ return FALSE;
+
+ pf = pixelformat_get_info( index );
+
+ switch (attrib) {
+ case WGL_DRAW_TO_WINDOW_ARB:
+ *pvalue = TRUE;
+ return TRUE;
+
+ case WGL_DRAW_TO_BITMAP_ARB:
+ *pvalue = FALSE;
+ return TRUE;
+
+ case WGL_NEED_PALETTE_ARB:
+ *pvalue = FALSE;
+ return TRUE;
+
+ case WGL_NEED_SYSTEM_PALETTE_ARB:
+ *pvalue = FALSE;
+ return TRUE;
+
+ case WGL_SWAP_METHOD_ARB:
+ if (pf->flags & PF_FLAG_DOUBLEBUFFER)
+ *pvalue = WGL_SWAP_COPY_ARB;
+ else
+ *pvalue = WGL_SWAP_UNDEFINED_ARB;
+ return TRUE;
+
+ case WGL_SWAP_LAYER_BUFFERS_ARB:
+ *pvalue = FALSE;
+ return TRUE;
+
+ case WGL_NUMBER_OVERLAYS_ARB:
+ *pvalue = 0;
+ return TRUE;
+
+ case WGL_NUMBER_UNDERLAYS_ARB:
+ *pvalue = 0;
+ return TRUE;
+ }
+
+ if (iLayerPlane != 0)
+ return FALSE;
+
+ switch (attrib) {
+ case WGL_ACCELERATION_ARB:
+ *pvalue = WGL_FULL_ACCELERATION_ARB;
+ break;
+
+ case WGL_TRANSPARENT_ARB:
+ *pvalue = FALSE;
+ break;
+
+ case WGL_TRANSPARENT_RED_VALUE_ARB:
+ case WGL_TRANSPARENT_GREEN_VALUE_ARB:
+ case WGL_TRANSPARENT_BLUE_VALUE_ARB:
+ case WGL_TRANSPARENT_ALPHA_VALUE_ARB:
+ case WGL_TRANSPARENT_INDEX_VALUE_ARB:
+ break;
+
+ case WGL_SHARE_DEPTH_ARB:
+ case WGL_SHARE_STENCIL_ARB:
+ case WGL_SHARE_ACCUM_ARB:
+ *pvalue = TRUE;
+ break;
+
+ case WGL_SUPPORT_GDI_ARB:
+ *pvalue = FALSE;
+ break;
+
+ case WGL_SUPPORT_OPENGL_ARB:
+ *pvalue = TRUE;
+ break;
+
+ case WGL_DOUBLE_BUFFER_ARB:
+ if (pf->flags & PF_FLAG_DOUBLEBUFFER)
+ *pvalue = TRUE;
+ else
+ *pvalue = FALSE;
+ break;
+
+ case WGL_STEREO_ARB:
+ *pvalue = FALSE;
+ break;
+
+ case WGL_PIXEL_TYPE_ARB:
+ *pvalue = WGL_TYPE_RGBA_ARB;
+ break;
+
+ case WGL_COLOR_BITS_ARB:
+ *pvalue = (int) (pf->color.redbits + pf->color.greenbits + pf->color.bluebits);
+ break;
+
+ case WGL_RED_BITS_ARB:
+ *pvalue = (int) pf->color.redbits;
+ break;
+
+ case WGL_RED_SHIFT_ARB:
+ *pvalue = (int) pf->color.redshift;
+ break;
+
+ case WGL_GREEN_BITS_ARB:
+ *pvalue = (int) pf->color.greenbits;
+ break;
+
+ case WGL_GREEN_SHIFT_ARB:
+ *pvalue = (int) pf->color.greenshift;
+ break;
+
+ case WGL_BLUE_BITS_ARB:
+ *pvalue = (int) pf->color.bluebits;
+ break;
+
+ case WGL_BLUE_SHIFT_ARB:
+ *pvalue = (int) pf->color.blueshift;
+ break;
+
+ case WGL_ALPHA_BITS_ARB:
+ *pvalue = (int) pf->alpha.alphabits;
+ break;
+
+ case WGL_ALPHA_SHIFT_ARB:
+ *pvalue = (int) pf->alpha.alphashift;
+ break;
+
+ case WGL_ACCUM_BITS_ARB:
+ case WGL_ACCUM_RED_BITS_ARB:
+ case WGL_ACCUM_GREEN_BITS_ARB:
+ case WGL_ACCUM_BLUE_BITS_ARB:
+ case WGL_ACCUM_ALPHA_BITS_ARB:
+ *pvalue = 0;
+ break;
+
+ case WGL_DEPTH_BITS_ARB:
+ *pvalue = (int) pf->depth.depthbits;
+ break;
+
+ case WGL_STENCIL_BITS_ARB:
+ *pvalue = (int) pf->depth.stencilbits;
+ break;
+
+ case WGL_AUX_BUFFERS_ARB:
+ *pvalue = 0;
+ break;
+
+ case WGL_SAMPLE_BUFFERS_ARB:
+ if (pf->flags & PF_FLAG_MULTISAMPLED)
+ *pvalue = stw_query_sample_buffers();
+ else
+ *pvalue = 0;
+ break;
+
+ case WGL_SAMPLES_ARB:
+ if (pf->flags & PF_FLAG_MULTISAMPLED)
+ *pvalue = stw_query_samples();
+ else
+ *pvalue = 0;
+ break;
+
+ default:
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+struct attrib_match_info
+{
+ int attribute;
+ int weight;
+ BOOL exact;
+};
+
+static struct attrib_match_info attrib_match[] = {
+
+ /* WGL_ARB_pixel_format */
+ { WGL_DRAW_TO_WINDOW_ARB, 0, TRUE },
+ { WGL_DRAW_TO_BITMAP_ARB, 0, TRUE },
+ { WGL_ACCELERATION_ARB, 0, TRUE },
+ { WGL_NEED_PALETTE_ARB, 0, TRUE },
+ { WGL_NEED_SYSTEM_PALETTE_ARB, 0, TRUE },
+ { WGL_SWAP_LAYER_BUFFERS_ARB, 0, TRUE },
+ { WGL_SWAP_METHOD_ARB, 0, TRUE },
+ { WGL_NUMBER_OVERLAYS_ARB, 4, FALSE },
+ { WGL_NUMBER_UNDERLAYS_ARB, 4, FALSE },
+ /*{ WGL_SHARE_DEPTH_ARB, 0, TRUE },*/ /* no overlays -- ignore */
+ /*{ WGL_SHARE_STENCIL_ARB, 0, TRUE },*/ /* no overlays -- ignore */
+ /*{ WGL_SHARE_ACCUM_ARB, 0, TRUE },*/ /* no overlays -- ignore */
+ { WGL_SUPPORT_GDI_ARB, 0, TRUE },
+ { WGL_SUPPORT_OPENGL_ARB, 0, TRUE },
+ { WGL_DOUBLE_BUFFER_ARB, 0, TRUE },
+ { WGL_STEREO_ARB, 0, TRUE },
+ { WGL_PIXEL_TYPE_ARB, 0, TRUE },
+ { WGL_COLOR_BITS_ARB, 1, FALSE },
+ { WGL_RED_BITS_ARB, 1, FALSE },
+ { WGL_GREEN_BITS_ARB, 1, FALSE },
+ { WGL_BLUE_BITS_ARB, 1, FALSE },
+ { WGL_ALPHA_BITS_ARB, 1, FALSE },
+ { WGL_ACCUM_BITS_ARB, 1, FALSE },
+ { WGL_ACCUM_RED_BITS_ARB, 1, FALSE },
+ { WGL_ACCUM_GREEN_BITS_ARB, 1, FALSE },
+ { WGL_ACCUM_BLUE_BITS_ARB, 1, FALSE },
+ { WGL_ACCUM_ALPHA_BITS_ARB, 1, FALSE },
+ { WGL_DEPTH_BITS_ARB, 1, FALSE },
+ { WGL_STENCIL_BITS_ARB, 1, FALSE },
+ { WGL_AUX_BUFFERS_ARB, 2, FALSE },
+
+ /* WGL_ARB_multisample */
+ { WGL_SAMPLE_BUFFERS_ARB, 2, FALSE },
+ { WGL_SAMPLES_ARB, 2, FALSE }
+};
+
+struct pixelformat_score
+{
+ int points;
+ uint index;
+};
+
+static BOOL
+score_pixelformats(
+ struct pixelformat_score *scores,
+ uint count,
+ int attribute,
+ int expected_value )
+{
+ uint i;
+ struct attrib_match_info *ami = NULL;
+ uint index;
+
+ /* Find out if a given attribute should be considered for score calculation.
+ */
+ for (i = 0; i < sizeof( attrib_match ) / sizeof( attrib_match[0] ); i++) {
+ if (attrib_match[i].attribute == attribute) {
+ ami = &attrib_match[i];
+ break;
+ }
+ }
+ if (ami == NULL)
+ return TRUE;
+
+ /* Iterate all pixelformats, query the requested attribute and calculate
+ * score points.
+ */
+ for (index = 0; index < count; index++) {
+ int actual_value;
+
+ if (!query_attrib( index + 1, 0, attribute, &actual_value ))
+ return FALSE;
+
+ if (ami->exact) {
+ /* For an exact match criteria, if the actual and expected values differ,
+ * the score is set to 0 points, effectively removing the pixelformat
+ * from a list of matching pixelformats.
+ */
+ if (actual_value != expected_value)
+ scores[index].points = 0;
+ }
+ else {
+ /* For a minimum match criteria, if the actual value is smaller than the expected
+ * value, the pixelformat is rejected (score set to 0). However, if the actual
+ * value is bigger, the pixelformat is given a penalty to favour pixelformats that
+ * more closely match the expected values.
+ */
+ if (actual_value < expected_value)
+ scores[index].points = 0;
+ else if (actual_value > expected_value)
+ scores[index].points -= (actual_value - expected_value) * ami->weight;
+ }
+ }
+
+ return TRUE;
+}
+
+WINGDIAPI BOOL APIENTRY
+wglChoosePixelFormatARB(
+ HDC hdc,
+ const int *piAttribIList,
+ const FLOAT *pfAttribFList,
+ UINT nMaxFormats,
+ int *piFormats,
+ UINT *nNumFormats )
+{
+ uint count;
+ struct pixelformat_score *scores;
+ uint i;
+
+ *nNumFormats = 0;
+
+ /* Allocate and initialize pixelformat score table -- better matches
+ * have higher scores. Start with a high score and take out penalty
+ * points for a mismatch when the match does not have to be exact.
+ * Set a score to 0 if there is a mismatch for an exact match criteria.
+ */
+ count = pixelformat_get_extended_count();
+ scores = (struct pixelformat_score *) MALLOC( count * sizeof( struct pixelformat_score ) );
+ if (scores == NULL)
+ return FALSE;
+ for (i = 0; i < count; i++) {
+ scores[i].points = 0x7fffffff;
+ scores[i].index = i;
+ }
+
+ /* Given the attribute list calculate a score for each pixelformat.
+ */
+ if (piAttribIList != NULL) {
+ while (*piAttribIList != 0) {
+ if (!score_pixelformats( scores, count, piAttribIList[0], piAttribIList[1] )) {
+ FREE( scores );
+ return FALSE;
+ }
+ piAttribIList += 2;
+ }
+ }
+ if (pfAttribFList != NULL) {
+ while (*pfAttribFList != 0) {
+ if (!score_pixelformats( scores, count, (int) pfAttribFList[0], (int) pfAttribFList[1] )) {
+ FREE( scores );
+ return FALSE;
+ }
+ pfAttribFList += 2;
+ }
+ }
+
+ /* Bubble-sort the resulting scores. Pixelformats with higher scores go first.
+ * TODO: Find out if there are any patent issues with it.
+ */
+ if (count > 1) {
+ uint n = count;
+ boolean swapped;
+
+ do {
+ swapped = FALSE;
+ for (i = 1; i < n; i++) {
+ if (scores[i - 1].points < scores[i].points) {
+ struct pixelformat_score score = scores[i - 1];
+
+ scores[i - 1] = scores[i];
+ scores[i] = score;
+ swapped = TRUE;
+ }
+ }
+ n--;
+ }
+ while (swapped);
+ }
+
+ /* Return a list of pixelformats that are the best match.
+ * Reject pixelformats with non-positive scores.
+ */
+ for (i = 0; i < count; i++) {
+ if (scores[i].points > 0) {
+ if (*nNumFormats < nMaxFormats)
+ piFormats[*nNumFormats] = scores[i].index + 1;
+ (*nNumFormats)++;
+ }
+ }
+
+ FREE( scores );
+ return TRUE;
+}
+
+WINGDIAPI BOOL APIENTRY
+wglGetPixelFormatAttribfvARB(
+ HDC hdc,
+ int iPixelFormat,
+ int iLayerPlane,
+ UINT nAttributes,
+ const int *piAttributes,
+ FLOAT *pfValues )
+{
+ UINT i;
+
+ (void) hdc;
+
+ for (i = 0; i < nAttributes; i++) {
+ int value;
+
+ if (!query_attrib( iPixelFormat, iLayerPlane, piAttributes[i], &value ))
+ return FALSE;
+ pfValues[i] = (FLOAT) value;
+ }
+
+ return TRUE;
+}
+
+WINGDIAPI BOOL APIENTRY
+wglGetPixelFormatAttribivARB(
+ HDC hdc,
+ int iPixelFormat,
+ int iLayerPlane,
+ UINT nAttributes,
+ const int *piAttributes,
+ int *piValues )
+{
+ UINT i;
+
+ (void) hdc;
+
+ for (i = 0; i < nAttributes; i++) {
+ if (!query_attrib( iPixelFormat, iLayerPlane, piAttributes[i], &piValues[i] ))
+ return FALSE;
+ }
+
+ return TRUE;
+}
diff --git a/src/gallium/state_trackers/wgl/shared/stw_arbpixelformat.h b/src/gallium/state_trackers/wgl/shared/stw_arbpixelformat.h
new file mode 100644
index 0000000000..a6c4259942
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/shared/stw_arbpixelformat.h
@@ -0,0 +1,61 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef WGL_ARBPIXELFORMAT_H
+#define WGL_ARBPIXELFORMAT_H
+
+
+/* Extension functions for get_proc_address:
+ */
+WINGDIAPI BOOL APIENTRY
+wglChoosePixelFormatARB(
+ HDC hdc,
+ const int *piAttribIList,
+ const FLOAT *pfAttribFList,
+ UINT nMaxFormats,
+ int *piFormats,
+ UINT *nNumFormats );
+
+WINGDIAPI BOOL APIENTRY
+wglGetPixelFormatAttribfvARB(
+ HDC hdc,
+ int iPixelFormat,
+ int iLayerPlane,
+ UINT nAttributes,
+ const int *piAttributes,
+ FLOAT *pfValues );
+
+WINGDIAPI BOOL APIENTRY
+wglGetPixelFormatAttribivARB(
+ HDC hdc,
+ int iPixelFormat,
+ int iLayerPlane,
+ UINT nAttributes,
+ const int *piAttributes,
+ int *piValues );
+
+#endif /* WGL_ARBPIXELFORMAT_H */
diff --git a/src/gallium/state_trackers/wgl/shared/stw_context.c b/src/gallium/state_trackers/wgl/shared/stw_context.c
new file mode 100644
index 0000000000..1377fb1ec8
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/shared/stw_context.c
@@ -0,0 +1,349 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <windows.h>
+
+#include "main/mtypes.h"
+#include "main/context.h"
+#include "pipe/p_compiler.h"
+#include "pipe/p_context.h"
+#include "state_tracker/st_context.h"
+#include "state_tracker/st_public.h"
+#include "shared/stw_device.h"
+#include "shared/stw_winsys.h"
+#include "shared/stw_framebuffer.h"
+#include "shared/stw_pixelformat.h"
+#include "stw_public.h"
+#include "stw_context.h"
+
+static HDC current_hdc = NULL;
+static UINT_PTR current_hglrc = 0;
+
+BOOL
+stw_copy_context(
+ UINT_PTR hglrcSrc,
+ UINT_PTR hglrcDst,
+ UINT mask )
+{
+ struct stw_context *src;
+ struct stw_context *dst;
+ BOOL ret = FALSE;
+
+ pipe_mutex_lock( stw_dev->mutex );
+
+ src = stw_lookup_context( hglrcSrc );
+ dst = stw_lookup_context( hglrcDst );
+
+ if (src && dst) {
+ /* FIXME */
+ (void) src;
+ (void) dst;
+ (void) mask;
+ }
+
+ pipe_mutex_unlock( stw_dev->mutex );
+
+ return ret;
+}
+
+UINT_PTR
+stw_create_layer_context(
+ HDC hdc,
+ int iLayerPlane )
+{
+ uint pfi;
+ const struct pixelformat_info *pf = NULL;
+ struct stw_context *ctx = NULL;
+ GLvisual *visual = NULL;
+ struct pipe_context *pipe = NULL;
+ UINT_PTR hglrc;
+
+ if(!stw_dev)
+ return 0;
+
+ if (iLayerPlane != 0)
+ return 0;
+
+ pfi = stw_pixelformat_get( hdc );
+ if (pfi == 0)
+ return 0;
+
+ pf = pixelformat_get_info( pfi - 1 );
+
+ ctx = CALLOC_STRUCT( stw_context );
+ if (ctx == NULL)
+ return 0;
+
+ ctx->hdc = hdc;
+ ctx->color_bits = GetDeviceCaps( ctx->hdc, BITSPIXEL );
+
+ /* Create visual based on flags
+ */
+ visual = _mesa_create_visual(
+ GL_TRUE,
+ (pf->flags & PF_FLAG_DOUBLEBUFFER) ? GL_TRUE : GL_FALSE,
+ GL_FALSE,
+ pf->color.redbits,
+ pf->color.greenbits,
+ pf->color.bluebits,
+ pf->alpha.alphabits,
+ 0,
+ pf->depth.depthbits,
+ pf->depth.stencilbits,
+ 0,
+ 0,
+ 0,
+ 0,
+ (pf->flags & PF_FLAG_MULTISAMPLED) ? stw_query_samples() : 0 );
+ if (visual == NULL)
+ goto fail;
+
+ pipe = stw_dev->stw_winsys->create_context( stw_dev->screen );
+ if (pipe == NULL)
+ goto fail;
+
+ assert(!pipe->priv);
+ pipe->priv = hdc;
+
+ ctx->st = st_create_context( pipe, visual, NULL );
+ if (ctx->st == NULL)
+ goto fail;
+
+ ctx->st->ctx->DriverCtx = ctx;
+
+ pipe_mutex_lock( stw_dev->mutex );
+ {
+ UINT_PTR i;
+
+ for (i = 0; i < STW_CONTEXT_MAX; i++) {
+ if (stw_dev->ctx_array[i].ctx == NULL)
+ break;
+ }
+
+ /* No slot available, fail:
+ */
+ if (i == STW_CONTEXT_MAX)
+ goto done;
+
+ stw_dev->ctx_array[i].ctx = ctx;
+
+ /* success:
+ */
+ hglrc = i + 1;
+ }
+done:
+ pipe_mutex_unlock( stw_dev->mutex );
+
+ return hglrc;
+
+fail:
+ if (visual)
+ _mesa_destroy_visual( visual );
+
+ if (pipe)
+ pipe->destroy( pipe );
+
+ FREE( ctx );
+ return 0;
+}
+
+BOOL
+stw_delete_context(
+ UINT_PTR hglrc )
+{
+ struct stw_context *ctx ;
+ BOOL ret = FALSE;
+
+ if (!stw_dev)
+ return FALSE;
+
+ pipe_mutex_lock( stw_dev->mutex );
+
+ ctx = stw_lookup_context(hglrc);
+ if (ctx) {
+ GLcontext *glctx = ctx->st->ctx;
+ GET_CURRENT_CONTEXT( glcurctx );
+ struct stw_framebuffer *fb;
+
+ /* Unbind current if deleting current context.
+ */
+ if (glcurctx == glctx)
+ st_make_current( NULL, NULL, NULL );
+
+ fb = framebuffer_from_hdc( ctx->hdc );
+ if (fb)
+ framebuffer_destroy( fb );
+
+ if (WindowFromDC( ctx->hdc ) != NULL)
+ ReleaseDC( WindowFromDC( ctx->hdc ), ctx->hdc );
+
+ st_destroy_context( ctx->st );
+
+ FREE( ctx );
+
+ stw_dev->ctx_array[hglrc - 1].ctx = NULL;
+
+ ret = TRUE;
+ }
+
+ pipe_mutex_unlock( stw_dev->mutex );
+
+ return ret;
+}
+
+BOOL
+stw_release_context(
+ UINT_PTR hglrc )
+{
+ BOOL ret = FALSE;
+
+ if (!stw_dev)
+ return ret;
+
+ pipe_mutex_lock( stw_dev->mutex );
+ {
+ struct stw_context *ctx;
+
+ /* XXX: The expectation is that ctx is the same context which is
+ * current for this thread. We should check that and return False
+ * if not the case.
+ */
+ ctx = stw_lookup_context( hglrc );
+ if (ctx == NULL)
+ goto done;
+
+ if (stw_make_current( NULL, 0 ) == FALSE)
+ goto done;
+
+ ret = TRUE;
+ }
+done:
+ pipe_mutex_unlock( stw_dev->mutex );
+
+ return ret;
+}
+
+/* Find the width and height of the window named by hdc.
+ */
+static void
+get_window_size( HDC hdc, GLuint *width, GLuint *height )
+{
+ if (WindowFromDC( hdc )) {
+ RECT rect;
+
+ GetClientRect( WindowFromDC( hdc ), &rect );
+ *width = rect.right - rect.left;
+ *height = rect.bottom - rect.top;
+ }
+ else {
+ *width = GetDeviceCaps( hdc, HORZRES );
+ *height = GetDeviceCaps( hdc, VERTRES );
+ }
+}
+
+UINT_PTR
+stw_get_current_context( void )
+{
+ return current_hglrc;
+}
+
+HDC
+stw_get_current_dc( void )
+{
+ return current_hdc;
+}
+
+BOOL
+stw_make_current(
+ HDC hdc,
+ UINT_PTR hglrc )
+{
+ struct stw_context *ctx;
+ GET_CURRENT_CONTEXT( glcurctx );
+ struct stw_framebuffer *fb;
+ GLuint width = 0;
+ GLuint height = 0;
+
+ if (!stw_dev)
+ return FALSE;
+
+ pipe_mutex_lock( stw_dev->mutex );
+ ctx = stw_lookup_context( hglrc );
+ pipe_mutex_unlock( stw_dev->mutex );
+
+ if (ctx == NULL)
+ return FALSE;
+
+ current_hdc = hdc;
+ current_hglrc = hglrc;
+
+ if (hdc == NULL || hglrc == 0) {
+ st_make_current( NULL, NULL, NULL );
+ return TRUE;
+ }
+
+ /* Return if already current.
+ */
+ if (glcurctx != NULL) {
+ struct stw_context *curctx = (struct stw_context *) glcurctx->DriverCtx;
+
+ if (curctx != NULL && curctx == ctx && ctx->hdc == hdc)
+ return TRUE;
+ }
+
+ fb = framebuffer_from_hdc( hdc );
+
+ if (hdc != NULL)
+ get_window_size( hdc, &width, &height );
+
+ /* Lazy creation of framebuffers.
+ */
+ if (fb == NULL && ctx != NULL && hdc != NULL) {
+ GLvisual *visual = &ctx->st->ctx->Visual;
+
+ fb = framebuffer_create( hdc, visual, width, height );
+ if (fb == NULL)
+ return FALSE;
+
+ fb->dib_hDC = CreateCompatibleDC( hdc );
+ fb->hbmDIB = NULL;
+ fb->pbPixels = NULL;
+ }
+
+ if (ctx && fb) {
+ st_make_current( ctx->st, fb->stfb, fb->stfb );
+ framebuffer_resize( fb, width, height );
+ ctx->hdc = hdc;
+ ctx->st->pipe->priv = hdc;
+ }
+ else {
+ /* Detach */
+ st_make_current( NULL, NULL, NULL );
+ }
+
+ return TRUE;
+}
diff --git a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.h b/src/gallium/state_trackers/wgl/shared/stw_context.h
index 5fa14cb749..b289615272 100644
--- a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.h
+++ b/src/gallium/state_trackers/wgl/shared/stw_context.h
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,15 +25,18 @@
*
**************************************************************************/
-#ifndef INTEL_SOFTPIPE_H
-#define INTEL_SOFTPIPE_H
+#ifndef STW_CONTEXT_H
+#define STW_CONTEXT_H
-struct pipe_winsys;
-struct pipe_context;
-struct intel_context;
+#include <windows.h>
-struct pipe_context *
-intel_create_softpipe( struct intel_context *intel,
- struct pipe_winsys *winsys );
+struct st_context;
-#endif
+struct stw_context
+{
+ struct st_context *st;
+ HDC hdc;
+ DWORD color_bits;
+};
+
+#endif /* STW_CONTEXT_H */
diff --git a/src/gallium/state_trackers/wgl/shared/stw_device.c b/src/gallium/state_trackers/wgl/shared/stw_device.c
new file mode 100644
index 0000000000..0dca856d73
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/shared/stw_device.c
@@ -0,0 +1,152 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <windows.h>
+
+#include "glapi/glthread.h"
+#include "util/u_debug.h"
+#include "pipe/p_screen.h"
+
+#include "shared/stw_device.h"
+#include "shared/stw_winsys.h"
+#include "shared/stw_pixelformat.h"
+#include "shared/stw_public.h"
+
+#ifdef WIN32_THREADS
+extern _glthread_Mutex OneTimeLock;
+extern void FreeAllTSD(void);
+#endif
+
+
+struct stw_device *stw_dev = NULL;
+
+
+/**
+ * XXX: Dispatch pipe_screen::flush_front_buffer to our
+ * stw_winsys::flush_front_buffer.
+ */
+static void
+st_flush_frontbuffer(struct pipe_screen *screen,
+ struct pipe_surface *surf,
+ void *context_private )
+{
+ const struct stw_winsys *stw_winsys = stw_dev->stw_winsys;
+ HDC hdc = (HDC)context_private;
+
+ stw_winsys->flush_frontbuffer(screen, surf, hdc);
+}
+
+
+boolean
+st_init(const struct stw_winsys *stw_winsys)
+{
+ static struct stw_device stw_dev_storage;
+
+ debug_printf("%s\n", __FUNCTION__);
+
+ assert(!stw_dev);
+
+ stw_dev = &stw_dev_storage;
+ memset(stw_dev, 0, sizeof(*stw_dev));
+
+#ifdef DEBUG
+ stw_dev->memdbg_no = debug_memory_begin();
+#endif
+
+ stw_dev->stw_winsys = stw_winsys;
+
+#ifdef WIN32_THREADS
+ _glthread_INIT_MUTEX(OneTimeLock);
+#endif
+
+ stw_dev->screen = stw_winsys->create_screen();
+ if(!stw_dev->screen)
+ goto error1;
+
+ stw_dev->screen->flush_frontbuffer = st_flush_frontbuffer;
+
+ pipe_mutex_init( stw_dev->mutex );
+
+ pixelformat_init();
+
+ return TRUE;
+
+error1:
+ stw_dev = NULL;
+ return FALSE;
+}
+
+
+void
+st_cleanup(void)
+{
+ UINT_PTR i;
+
+ debug_printf("%s\n", __FUNCTION__);
+
+ if (!stw_dev)
+ return;
+
+ pipe_mutex_lock( stw_dev->mutex );
+ {
+ /* Ensure all contexts are destroyed */
+ for (i = 0; i < STW_CONTEXT_MAX; i++)
+ if (stw_dev->ctx_array[i].ctx)
+ stw_delete_context( i + 1 );
+ }
+ pipe_mutex_unlock( stw_dev->mutex );
+
+ pipe_mutex_destroy( stw_dev->mutex );
+
+ stw_dev->screen->destroy(stw_dev->screen);
+
+#ifdef WIN32_THREADS
+ _glthread_DESTROY_MUTEX(OneTimeLock);
+ FreeAllTSD();
+#endif
+
+#ifdef DEBUG
+ debug_memory_end(stw_dev->memdbg_no);
+#endif
+
+ stw_dev = NULL;
+}
+
+
+struct stw_context *
+stw_lookup_context( UINT_PTR dhglrc )
+{
+ if (dhglrc == 0 ||
+ dhglrc >= STW_CONTEXT_MAX)
+ return NULL;
+
+ if (stw_dev == NULL)
+ return NULL;
+
+ return stw_dev->ctx_array[dhglrc - 1].ctx;
+}
+
diff --git a/src/gallium/winsys/drm/intel/dri/intel_swapbuffers.h b/src/gallium/state_trackers/wgl/shared/stw_device.h
index 46c9bab3af..80da14b84f 100644
--- a/src/gallium/winsys/drm/intel/dri/intel_swapbuffers.h
+++ b/src/gallium/state_trackers/wgl/shared/stw_device.h
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,23 +25,39 @@
*
**************************************************************************/
-#ifndef INTEL_SWAPBUFFERS_H
-#define INTEL_SWAPBUFFERS_H
+#ifndef STW_DEVICE_H_
+#define STW_DEVICE_H_
-struct pipe_surface;
+#include "pipe/p_compiler.h"
+#include "pipe/p_thread.h"
-extern void intelDisplaySurface(__DRIdrawablePrivate * dPriv,
- struct pipe_surface *surf,
- const drm_clip_rect_t * rect);
+#define STW_CONTEXT_MAX 32
-extern void intelSwapBuffers(__DRIdrawablePrivate * dPriv);
-extern void intelCopySubBuffer(__DRIdrawablePrivate * dPriv,
- int x, int y, int w, int h);
+struct pipe_screen;
-extern void intelUpdateWindowSize(__DRIdrawablePrivate *dPriv);
+struct stw_device
+{
+ const struct stw_winsys *stw_winsys;
+ struct pipe_screen *screen;
+
+ pipe_mutex mutex;
+ struct {
+ struct stw_context *ctx;
+ } ctx_array[STW_CONTEXT_MAX];
+
+#ifdef DEBUG
+ unsigned long memdbg_no;
+#endif
+};
-#endif /* INTEL_SWAPBUFFERS_H */
+struct stw_context *
+stw_lookup_context( UINT_PTR hglrc );
+
+extern struct stw_device *stw_dev;
+
+
+#endif /* STW_DEVICE_H_ */
diff --git a/src/gallium/state_trackers/wgl/shared/stw_framebuffer.c b/src/gallium/state_trackers/wgl/shared/stw_framebuffer.c
new file mode 100644
index 0000000000..c70b31a488
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/shared/stw_framebuffer.c
@@ -0,0 +1,212 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <windows.h>
+
+#include "main/context.h"
+#include "pipe/p_format.h"
+#include "pipe/p_screen.h"
+#include "state_tracker/st_context.h"
+#include "state_tracker/st_public.h"
+#include "stw_framebuffer.h"
+#include "stw_device.h"
+#include "stw_public.h"
+#include "stw_winsys.h"
+
+
+void
+framebuffer_resize(
+ struct stw_framebuffer *fb,
+ GLuint width,
+ GLuint height )
+{
+ if (fb->hbmDIB == NULL || fb->stfb->Base.Width != width || fb->stfb->Base.Height != height) {
+ if (fb->hbmDIB)
+ DeleteObject( fb->hbmDIB );
+
+ fb->hbmDIB = CreateCompatibleBitmap(
+ fb->hDC,
+ width,
+ height );
+ }
+
+ st_resize_framebuffer( fb->stfb, width, height );
+}
+
+static struct stw_framebuffer *fb_head = NULL;
+
+static LRESULT CALLBACK
+window_proc(
+ HWND hWnd,
+ UINT uMsg,
+ WPARAM wParam,
+ LPARAM lParam )
+{
+ struct stw_framebuffer *fb;
+
+ for (fb = fb_head; fb != NULL; fb = fb->next)
+ if (fb->hWnd == hWnd)
+ break;
+ assert( fb != NULL );
+
+ if (uMsg == WM_SIZE && wParam != SIZE_MINIMIZED)
+ framebuffer_resize( fb, LOWORD( lParam ), HIWORD( lParam ) );
+
+ return CallWindowProc( fb->WndProc, hWnd, uMsg, wParam, lParam );
+}
+
+/* Create a new framebuffer object which will correspond to the given HDC.
+ */
+struct stw_framebuffer *
+framebuffer_create(
+ HDC hdc,
+ GLvisual *visual,
+ GLuint width,
+ GLuint height )
+{
+ struct stw_framebuffer *fb;
+ enum pipe_format colorFormat, depthFormat, stencilFormat;
+
+ fb = CALLOC_STRUCT( stw_framebuffer );
+ if (fb == NULL)
+ return NULL;
+
+ /* Determine PIPE_FORMATs for buffers.
+ */
+ colorFormat = PIPE_FORMAT_A8R8G8B8_UNORM;
+
+ if (visual->depthBits == 0)
+ depthFormat = PIPE_FORMAT_NONE;
+ else if (visual->depthBits <= 16)
+ depthFormat = PIPE_FORMAT_Z16_UNORM;
+ else if (visual->depthBits <= 24)
+ depthFormat = PIPE_FORMAT_S8Z24_UNORM;
+ else
+ depthFormat = PIPE_FORMAT_Z32_UNORM;
+
+ if (visual->stencilBits == 8) {
+ if (depthFormat == PIPE_FORMAT_S8Z24_UNORM)
+ stencilFormat = depthFormat;
+ else
+ stencilFormat = PIPE_FORMAT_S8_UNORM;
+ }
+ else {
+ stencilFormat = PIPE_FORMAT_NONE;
+ }
+
+ fb->stfb = st_create_framebuffer(
+ visual,
+ colorFormat,
+ depthFormat,
+ stencilFormat,
+ width,
+ height,
+ (void *) fb );
+
+ fb->cColorBits = GetDeviceCaps( hdc, BITSPIXEL );
+ fb->hDC = hdc;
+
+ /* Subclass a window associated with the device context.
+ */
+ fb->hWnd = WindowFromDC( hdc );
+ if (fb->hWnd != NULL) {
+ fb->WndProc = (WNDPROC) SetWindowLong(
+ fb->hWnd,
+ GWL_WNDPROC,
+ (LONG) window_proc );
+ }
+
+ fb->next = fb_head;
+ fb_head = fb;
+ return fb;
+}
+
+void
+framebuffer_destroy(
+ struct stw_framebuffer *fb )
+{
+ struct stw_framebuffer **link = &fb_head;
+ struct stw_framebuffer *pfb = fb_head;
+
+ while (pfb != NULL) {
+ if (pfb == fb) {
+ if (fb->hWnd != NULL) {
+ SetWindowLong(
+ fb->hWnd,
+ GWL_WNDPROC,
+ (LONG) fb->WndProc );
+ }
+
+ *link = fb->next;
+ FREE( fb );
+ return;
+ }
+
+ link = &pfb->next;
+ pfb = pfb->next;
+ }
+}
+
+/* Given an hdc, return the corresponding stw_framebuffer.
+ */
+struct stw_framebuffer *
+framebuffer_from_hdc(
+ HDC hdc )
+{
+ struct stw_framebuffer *fb;
+
+ for (fb = fb_head; fb != NULL; fb = fb->next)
+ if (fb->hDC == hdc)
+ return fb;
+ return NULL;
+}
+
+
+BOOL
+stw_swap_buffers(
+ HDC hdc )
+{
+ struct stw_framebuffer *fb;
+ struct pipe_surface *surf;
+
+ fb = framebuffer_from_hdc( hdc );
+ if (fb == NULL)
+ return FALSE;
+
+ /* If we're swapping the buffer associated with the current context
+ * we have to flush any pending rendering commands first.
+ */
+ st_notify_swapbuffers( fb->stfb );
+
+ st_get_framebuffer_surface( fb->stfb, ST_SURFACE_BACK_LEFT, &surf );
+
+ stw_dev->stw_winsys->flush_frontbuffer(stw_dev->screen,
+ surf,
+ hdc );
+
+ return TRUE;
+}
diff --git a/src/gallium/winsys/drm/intel/dri/intel_reg.h b/src/gallium/state_trackers/wgl/shared/stw_framebuffer.h
index 4f33bee438..2e16e421f2 100644
--- a/src/gallium/winsys/drm/intel/dri/intel_reg.h
+++ b/src/gallium/state_trackers/wgl/shared/stw_framebuffer.h
@@ -1,8 +1,8 @@
/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
- *
+ *
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,32 +22,50 @@
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
+ *
**************************************************************************/
+#ifndef STW_FRAMEBUFFER_H
+#define STW_FRAMEBUFFER_H
-#ifndef _INTEL_REG_H_
-#define _INTEL_REG_H_
+#include "main/mtypes.h"
+/* Windows framebuffer, derived from gl_framebuffer.
+ */
+struct stw_framebuffer
+{
+ struct st_framebuffer *stfb;
+ HDC hDC;
+ int pixelformat;
+ BYTE cColorBits;
+ HDC dib_hDC;
+ HBITMAP hbmDIB;
+ HBITMAP hOldBitmap;
+ PBYTE pbPixels;
+ HWND hWnd;
+ WNDPROC WndProc;
+ struct stw_framebuffer *next;
+};
-#define BR00_BITBLT_CLIENT 0x40000000
-#define BR00_OP_COLOR_BLT 0x10000000
-#define BR00_OP_SRC_COPY_BLT 0x10C00000
-#define BR13_SOLID_PATTERN 0x80000000
+struct stw_framebuffer *
+framebuffer_create(
+ HDC hdc,
+ GLvisual *visual,
+ GLuint width,
+ GLuint height );
-#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|0x4)
-#define XY_COLOR_BLT_WRITE_ALPHA (1<<21)
-#define XY_COLOR_BLT_WRITE_RGB (1<<20)
+void
+framebuffer_destroy(
+ struct stw_framebuffer *fb );
-#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6)
-#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21)
-#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20)
+void
+framebuffer_resize(
+ struct stw_framebuffer *fb,
+ GLuint width,
+ GLuint height );
-#define MI_WAIT_FOR_EVENT ((0x3<<23))
-#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6)
-#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2)
+struct stw_framebuffer *
+framebuffer_from_hdc(
+ HDC hdc );
-#define MI_BATCH_BUFFER_END (0xA<<23)
-
-
-#endif
+#endif /* STW_FRAMEBUFFER_H */
diff --git a/src/gallium/state_trackers/wgl/shared/stw_getprocaddress.c b/src/gallium/state_trackers/wgl/shared/stw_getprocaddress.c
new file mode 100644
index 0000000000..ac2d6fc260
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/shared/stw_getprocaddress.c
@@ -0,0 +1,71 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <windows.h>
+
+#include "glapi/glapi.h"
+#include "stw_arbextensionsstring.h"
+#include "stw_arbpixelformat.h"
+#include "stw_public.h"
+
+struct extension_entry
+{
+ const char *name;
+ PROC proc;
+};
+
+#define EXTENTRY(P) { #P, (PROC) P }
+
+static struct extension_entry extension_entries[] = {
+
+ /* WGL_ARB_extensions_string */
+ EXTENTRY( wglGetExtensionsStringARB ),
+
+ /* WGL_ARB_pixel_format */
+ EXTENTRY( wglChoosePixelFormatARB ),
+ EXTENTRY( wglGetPixelFormatAttribfvARB ),
+ EXTENTRY( wglGetPixelFormatAttribivARB ),
+
+ { NULL, NULL }
+};
+
+PROC
+stw_get_proc_address(
+ LPCSTR lpszProc )
+{
+ struct extension_entry *entry;
+
+ PROC p = (PROC) _glapi_get_proc_address( (const char *) lpszProc );
+ if (p)
+ return p;
+
+ for (entry = extension_entries; entry->name; entry++)
+ if (strcmp( lpszProc, entry->name ) == 0)
+ return entry->proc;
+
+ return NULL;
+}
diff --git a/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c b/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c
new file mode 100644
index 0000000000..5cfdd41597
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c
@@ -0,0 +1,286 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_debug.h"
+#include "stw_pixelformat.h"
+#include "stw_public.h"
+
+#define MAX_PIXELFORMATS 16
+
+static struct pixelformat_info pixelformats[MAX_PIXELFORMATS];
+static uint pixelformat_count = 0;
+static uint pixelformat_extended_count = 0;
+
+static uint currentpixelformat = 0;
+
+
+static void
+add_standard_pixelformats(
+ struct pixelformat_info **ppf,
+ uint flags )
+{
+ struct pixelformat_info *pf = *ppf;
+ struct pixelformat_color_info color24 = { 8, 0, 8, 8, 8, 16 };
+ struct pixelformat_alpha_info alpha8 = { 8, 24 };
+ struct pixelformat_alpha_info noalpha = { 0, 0 };
+ struct pixelformat_depth_info depth24s8 = { 24, 8 };
+ struct pixelformat_depth_info depth16 = { 16, 0 };
+
+ pf->flags = PF_FLAG_DOUBLEBUFFER | flags;
+ pf->color = color24;
+ pf->alpha = alpha8;
+ pf->depth = depth16;
+ pf++;
+
+ pf->flags = PF_FLAG_DOUBLEBUFFER | flags;
+ pf->color = color24;
+ pf->alpha = alpha8;
+ pf->depth = depth24s8;
+ pf++;
+
+ pf->flags = PF_FLAG_DOUBLEBUFFER | flags;
+ pf->color = color24;
+ pf->alpha = noalpha;
+ pf->depth = depth16;
+ pf++;
+
+ pf->flags = PF_FLAG_DOUBLEBUFFER | flags;
+ pf->color = color24;
+ pf->alpha = noalpha;
+ pf->depth = depth24s8;
+ pf++;
+
+ pf->flags = flags;
+ pf->color = color24;
+ pf->alpha = noalpha;
+ pf->depth = depth16;
+ pf++;
+
+ pf->flags = flags;
+ pf->color = color24;
+ pf->alpha = noalpha;
+ pf->depth = depth24s8;
+ pf++;
+
+ *ppf = pf;
+}
+
+void
+pixelformat_init( void )
+{
+ struct pixelformat_info *pf = pixelformats;
+
+ add_standard_pixelformats( &pf, 0 );
+ pixelformat_count = pf - pixelformats;
+
+ add_standard_pixelformats( &pf, PF_FLAG_MULTISAMPLED );
+ pixelformat_extended_count = pf - pixelformats;
+
+ assert( pixelformat_extended_count <= MAX_PIXELFORMATS );
+}
+
+uint
+pixelformat_get_count( void )
+{
+ return pixelformat_count;
+}
+
+uint
+pixelformat_get_extended_count( void )
+{
+ return pixelformat_extended_count;
+}
+
+const struct pixelformat_info *
+pixelformat_get_info( uint index )
+{
+ assert( index < pixelformat_extended_count );
+
+ return &pixelformats[index];
+}
+
+
+int
+stw_pixelformat_describe(
+ HDC hdc,
+ int iPixelFormat,
+ UINT nBytes,
+ LPPIXELFORMATDESCRIPTOR ppfd )
+{
+ uint count;
+ uint index;
+ const struct pixelformat_info *pf;
+
+ (void) hdc;
+
+ count = pixelformat_get_extended_count();
+ index = (uint) iPixelFormat - 1;
+
+ if (ppfd == NULL)
+ return count;
+ if (index >= count || nBytes != sizeof( PIXELFORMATDESCRIPTOR ))
+ return 0;
+
+ pf = pixelformat_get_info( index );
+
+ ppfd->nSize = sizeof( PIXELFORMATDESCRIPTOR );
+ ppfd->nVersion = 1;
+ ppfd->dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL;
+ if (pf->flags & PF_FLAG_DOUBLEBUFFER)
+ ppfd->dwFlags |= PFD_DOUBLEBUFFER | PFD_SWAP_COPY;
+ ppfd->iPixelType = PFD_TYPE_RGBA;
+ ppfd->cColorBits = pf->color.redbits + pf->color.greenbits + pf->color.bluebits;
+ ppfd->cRedBits = pf->color.redbits;
+ ppfd->cRedShift = pf->color.redshift;
+ ppfd->cGreenBits = pf->color.greenbits;
+ ppfd->cGreenShift = pf->color.greenshift;
+ ppfd->cBlueBits = pf->color.bluebits;
+ ppfd->cBlueShift = pf->color.blueshift;
+ ppfd->cAlphaBits = pf->alpha.alphabits;
+ ppfd->cAlphaShift = pf->alpha.alphashift;
+ ppfd->cAccumBits = 0;
+ ppfd->cAccumRedBits = 0;
+ ppfd->cAccumGreenBits = 0;
+ ppfd->cAccumBlueBits = 0;
+ ppfd->cAccumAlphaBits = 0;
+ ppfd->cDepthBits = pf->depth.depthbits;
+ ppfd->cStencilBits = pf->depth.stencilbits;
+ ppfd->cAuxBuffers = 0;
+ ppfd->iLayerType = 0;
+ ppfd->bReserved = 0;
+ ppfd->dwLayerMask = 0;
+ ppfd->dwVisibleMask = 0;
+ ppfd->dwDamageMask = 0;
+
+ return count;
+}
+
+/* Only used by the wgl code, but have it here to avoid exporting the
+ * pixelformat.h functionality.
+ */
+int stw_pixelformat_choose( HDC hdc,
+ CONST PIXELFORMATDESCRIPTOR *ppfd )
+{
+ uint count;
+ uint index;
+ uint bestindex;
+ uint bestdelta;
+
+ (void) hdc;
+
+ count = pixelformat_get_count();
+ bestindex = count;
+ bestdelta = 0xffffffff;
+
+ for (index = 0; index < count; index++) {
+ uint delta = 0;
+ const struct pixelformat_info *pf = pixelformat_get_info( index );
+
+ if (!(ppfd->dwFlags & PFD_DOUBLEBUFFER_DONTCARE) &&
+ !!(ppfd->dwFlags & PFD_DOUBLEBUFFER) !=
+ !!(pf->flags & PF_FLAG_DOUBLEBUFFER))
+ continue;
+
+ if (ppfd->cColorBits != pf->color.redbits + pf->color.greenbits + pf->color.bluebits)
+ delta += 8;
+
+ if (ppfd->cDepthBits != pf->depth.depthbits)
+ delta += 4;
+
+ if (ppfd->cStencilBits != pf->depth.stencilbits)
+ delta += 2;
+
+ if (ppfd->cAlphaBits != pf->alpha.alphabits)
+ delta++;
+
+ if (delta < bestdelta) {
+ bestindex = index;
+ bestdelta = delta;
+ if (bestdelta == 0)
+ break;
+ }
+ }
+
+ if (bestindex == count)
+ return 0;
+
+ return bestindex + 1;
+}
+
+
+int
+stw_pixelformat_get(
+ HDC hdc )
+{
+ return currentpixelformat;
+}
+
+
+BOOL
+stw_pixelformat_set(
+ HDC hdc,
+ int iPixelFormat )
+{
+ uint count;
+ uint index;
+
+ (void) hdc;
+
+ index = (uint) iPixelFormat - 1;
+ count = pixelformat_get_extended_count();
+ if (index >= count)
+ return FALSE;
+
+ currentpixelformat = iPixelFormat;
+
+ /* Some applications mistakenly use the undocumented wglSetPixelFormat
+ * function instead of SetPixelFormat, so we call SetPixelFormat here to
+ * avoid opengl32.dll's wglCreateContext to fail */
+ if (GetPixelFormat(hdc) == 0) {
+ SetPixelFormat(hdc, iPixelFormat, NULL);
+ }
+
+ return TRUE;
+}
+
+
+
+/* XXX: this needs to be turned into queries on pipe_screen or
+ * stw_winsys.
+ */
+int
+stw_query_sample_buffers( void )
+{
+ return 1;
+}
+
+int
+stw_query_samples( void )
+{
+ return 4;
+}
+
diff --git a/src/gallium/state_trackers/wgl/shared/stw_pixelformat.h b/src/gallium/state_trackers/wgl/shared/stw_pixelformat.h
new file mode 100644
index 0000000000..7ca4194a2a
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/shared/stw_pixelformat.h
@@ -0,0 +1,83 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef PIXELFORMAT_H
+#define PIXELFORMAT_H
+
+#include <windows.h>
+#include "pipe/p_compiler.h"
+
+#define PF_FLAG_DOUBLEBUFFER 0x00000001
+#define PF_FLAG_MULTISAMPLED 0x00000002
+
+struct pixelformat_color_info
+{
+ uint redbits;
+ uint redshift;
+ uint greenbits;
+ uint greenshift;
+ uint bluebits;
+ uint blueshift;
+};
+
+struct pixelformat_alpha_info
+{
+ uint alphabits;
+ uint alphashift;
+};
+
+struct pixelformat_depth_info
+{
+ uint depthbits;
+ uint stencilbits;
+};
+
+struct pixelformat_info
+{
+ uint flags;
+ struct pixelformat_color_info color;
+ struct pixelformat_alpha_info alpha;
+ struct pixelformat_depth_info depth;
+};
+
+void
+pixelformat_init( void );
+
+uint
+pixelformat_get_count( void );
+
+uint
+pixelformat_get_extended_count( void );
+
+const struct pixelformat_info *
+pixelformat_get_info( uint index );
+
+int stw_query_sample_buffers( void );
+int stw_query_samples( void );
+
+
+#endif /* PIXELFORMAT_H */
diff --git a/src/gallium/state_trackers/wgl/shared/stw_public.h b/src/gallium/state_trackers/wgl/shared/stw_public.h
new file mode 100644
index 0000000000..39d377c16b
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/shared/stw_public.h
@@ -0,0 +1,68 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef STW_PUBLIC_H
+#define STW_PUBLIC_H
+
+#include <windows.h>
+
+BOOL stw_copy_context( UINT_PTR hglrcSrc,
+ UINT_PTR hglrcDst,
+ UINT mask );
+
+UINT_PTR stw_create_layer_context( HDC hdc,
+ int iLayerPlane );
+
+BOOL stw_delete_context( UINT_PTR hglrc );
+
+BOOL
+stw_release_context( UINT_PTR dhglrc );
+
+UINT_PTR stw_get_current_context( void );
+
+HDC stw_get_current_dc( void );
+
+BOOL stw_make_current( HDC hdc, UINT_PTR hglrc );
+
+BOOL stw_swap_buffers( HDC hdc );
+
+PROC stw_get_proc_address( LPCSTR lpszProc );
+
+int stw_pixelformat_describe( HDC hdc,
+ int iPixelFormat,
+ UINT nBytes,
+ LPPIXELFORMATDESCRIPTOR ppfd );
+
+int stw_pixelformat_get( HDC hdc );
+
+BOOL stw_pixelformat_set( HDC hdc,
+ int iPixelFormat );
+
+int stw_pixelformat_choose( HDC hdc,
+ CONST PIXELFORMATDESCRIPTOR *ppfd );
+
+#endif
diff --git a/src/gallium/state_trackers/wgl/shared/stw_quirks.c b/src/gallium/state_trackers/wgl/shared/stw_quirks.c
new file mode 100644
index 0000000000..0961ce3bb0
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/shared/stw_quirks.c
@@ -0,0 +1,113 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ *
+ * This is hopefully a temporary hack to define some needed dispatch
+ * table entries. Hopefully, I'll find a better solution. The
+ * dispatch table generation scripts ought to be making these dummy
+ * stubs as well.
+ */
+
+void gl_dispatch_stub_543(void){}
+void gl_dispatch_stub_544(void){}
+void gl_dispatch_stub_545(void){}
+void gl_dispatch_stub_546(void){}
+void gl_dispatch_stub_547(void){}
+void gl_dispatch_stub_548(void){}
+void gl_dispatch_stub_549(void){}
+void gl_dispatch_stub_550(void){}
+void gl_dispatch_stub_551(void){}
+void gl_dispatch_stub_552(void){}
+void gl_dispatch_stub_553(void){}
+void gl_dispatch_stub_554(void){}
+void gl_dispatch_stub_555(void){}
+void gl_dispatch_stub_556(void){}
+void gl_dispatch_stub_557(void){}
+void gl_dispatch_stub_558(void){}
+void gl_dispatch_stub_559(void){}
+void gl_dispatch_stub_560(void){}
+void gl_dispatch_stub_561(void){}
+void gl_dispatch_stub_565(void){}
+void gl_dispatch_stub_566(void){}
+void gl_dispatch_stub_570(void){}
+void gl_dispatch_stub_577(void){}
+void gl_dispatch_stub_578(void){}
+void gl_dispatch_stub_582(void){}
+void gl_dispatch_stub_603(void){}
+void gl_dispatch_stub_607(void){}
+void gl_dispatch_stub_645(void){}
+void gl_dispatch_stub_646(void){}
+void gl_dispatch_stub_647(void){}
+void gl_dispatch_stub_648(void){}
+void gl_dispatch_stub_649(void){}
+void gl_dispatch_stub_650(void){}
+void gl_dispatch_stub_651(void){}
+void gl_dispatch_stub_652(void){}
+void gl_dispatch_stub_653(void){}
+void gl_dispatch_stub_657(void){}
+void gl_dispatch_stub_733(void){}
+void gl_dispatch_stub_734(void){}
+void gl_dispatch_stub_735(void){}
+void gl_dispatch_stub_736(void){}
+void gl_dispatch_stub_737(void){}
+void gl_dispatch_stub_738(void){}
+void gl_dispatch_stub_744(void){}
+void gl_dispatch_stub_745(void){}
+void gl_dispatch_stub_746(void){}
+void gl_dispatch_stub_760(void){}
+void gl_dispatch_stub_761(void){}
+void gl_dispatch_stub_763(void){}
+void gl_dispatch_stub_764(void){}
+void gl_dispatch_stub_765(void){}
+void gl_dispatch_stub_766(void){}
+void gl_dispatch_stub_767(void){}
+void gl_dispatch_stub_768(void){}
+
+void gl_dispatch_stub_562(void){}
+void gl_dispatch_stub_563(void){}
+void gl_dispatch_stub_564(void){}
+void gl_dispatch_stub_567(void){}
+void gl_dispatch_stub_568(void){}
+void gl_dispatch_stub_569(void){}
+void gl_dispatch_stub_580(void){}
+void gl_dispatch_stub_581(void){}
+void gl_dispatch_stub_606(void){}
+void gl_dispatch_stub_654(void){}
+void gl_dispatch_stub_655(void){}
+void gl_dispatch_stub_656(void){}
+void gl_dispatch_stub_739(void){}
+void gl_dispatch_stub_740(void){}
+void gl_dispatch_stub_741(void){}
+void gl_dispatch_stub_748(void){}
+void gl_dispatch_stub_749(void){}
+void gl_dispatch_stub_769(void){}
+void gl_dispatch_stub_770(void){}
+void gl_dispatch_stub_771(void){}
+void gl_dispatch_stub_772(void){}
+void gl_dispatch_stub_773(void){}
diff --git a/src/gallium/state_trackers/wgl/shared/stw_winsys.h b/src/gallium/state_trackers/wgl/shared/stw_winsys.h
new file mode 100644
index 0000000000..a85a9a2257
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/shared/stw_winsys.h
@@ -0,0 +1,59 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef STW_WINSYS_H
+#define STW_WINSYS_H
+
+#include <windows.h> /* for HDC */
+
+#include "pipe/p_compiler.h"
+
+struct pipe_screen;
+struct pipe_context;
+struct pipe_surface;
+
+struct stw_winsys
+{
+ struct pipe_screen *
+ (*create_screen)( void );
+
+ struct pipe_context *
+ (*create_context)( struct pipe_screen *screen );
+
+ void
+ (*flush_frontbuffer)( struct pipe_screen *screen,
+ struct pipe_surface *surf,
+ HDC hDC );
+};
+
+boolean
+st_init(const struct stw_winsys *stw_winsys);
+
+void
+st_cleanup(void);
+
+#endif /* STW_WINSYS_H */
diff --git a/src/gallium/state_trackers/wgl/wgl/stw_wgl.c b/src/gallium/state_trackers/wgl/wgl/stw_wgl.c
new file mode 100644
index 0000000000..e06d2640b4
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/wgl/stw_wgl.c
@@ -0,0 +1,337 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <windows.h>
+
+#include "util/u_debug.h"
+#include "shared/stw_public.h"
+#include "stw_wgl.h"
+
+
+WINGDIAPI BOOL APIENTRY
+wglCopyContext(
+ HGLRC hglrcSrc,
+ HGLRC hglrcDst,
+ UINT mask )
+{
+ return stw_copy_context( (UINT_PTR)hglrcSrc,
+ (UINT_PTR)hglrcDst,
+ mask );
+}
+
+WINGDIAPI HGLRC APIENTRY
+wglCreateContext(
+ HDC hdc )
+{
+ return wglCreateLayerContext(hdc, 0);
+}
+
+WINGDIAPI HGLRC APIENTRY
+wglCreateLayerContext(
+ HDC hdc,
+ int iLayerPlane )
+{
+ return (HGLRC) stw_create_layer_context( hdc, iLayerPlane );
+}
+
+WINGDIAPI BOOL APIENTRY
+wglDeleteContext(
+ HGLRC hglrc )
+{
+ return stw_delete_context( (UINT_PTR)hglrc );
+}
+
+
+WINGDIAPI HGLRC APIENTRY
+wglGetCurrentContext( VOID )
+{
+ return (HGLRC)stw_get_current_context();
+}
+
+WINGDIAPI HDC APIENTRY
+wglGetCurrentDC( VOID )
+{
+ return stw_get_current_dc();
+}
+
+WINGDIAPI BOOL APIENTRY
+wglMakeCurrent(
+ HDC hdc,
+ HGLRC hglrc )
+{
+ return stw_make_current( hdc, (UINT_PTR)hglrc );
+}
+
+
+WINGDIAPI BOOL APIENTRY
+wglSwapBuffers(
+ HDC hdc )
+{
+ return stw_swap_buffers( hdc );
+}
+
+
+WINGDIAPI BOOL APIENTRY
+wglSwapLayerBuffers(
+ HDC hdc,
+ UINT fuPlanes )
+{
+ (void) hdc;
+ (void) fuPlanes;
+
+ return FALSE;
+}
+
+WINGDIAPI PROC APIENTRY
+wglGetProcAddress(
+ LPCSTR lpszProc )
+{
+ return stw_get_proc_address( lpszProc );
+}
+
+
+WINGDIAPI int APIENTRY
+wglChoosePixelFormat(
+ HDC hdc,
+ CONST PIXELFORMATDESCRIPTOR *ppfd )
+{
+ if (ppfd->nSize != sizeof( PIXELFORMATDESCRIPTOR ) || ppfd->nVersion != 1)
+ return 0;
+ if (ppfd->iPixelType != PFD_TYPE_RGBA)
+ return 0;
+ if (!(ppfd->dwFlags & PFD_DRAW_TO_WINDOW))
+ return 0;
+ if (!(ppfd->dwFlags & PFD_SUPPORT_OPENGL))
+ return 0;
+ if (ppfd->dwFlags & PFD_DRAW_TO_BITMAP)
+ return 0;
+ if (ppfd->dwFlags & PFD_SUPPORT_GDI)
+ return 0;
+ if (!(ppfd->dwFlags & PFD_STEREO_DONTCARE) && (ppfd->dwFlags & PFD_STEREO))
+ return 0;
+
+ return stw_pixelformat_choose( hdc, ppfd );
+}
+
+WINGDIAPI int APIENTRY
+wglDescribePixelFormat(
+ HDC hdc,
+ int iPixelFormat,
+ UINT nBytes,
+ LPPIXELFORMATDESCRIPTOR ppfd )
+{
+ return stw_pixelformat_describe( hdc, iPixelFormat, nBytes, ppfd );
+}
+
+WINGDIAPI int APIENTRY
+wglGetPixelFormat(
+ HDC hdc )
+{
+ return stw_pixelformat_get( hdc );
+}
+
+WINGDIAPI BOOL APIENTRY
+wglSetPixelFormat(
+ HDC hdc,
+ int iPixelFormat,
+ const PIXELFORMATDESCRIPTOR *ppfd )
+{
+ if (ppfd->nSize != sizeof( PIXELFORMATDESCRIPTOR ))
+ return FALSE;
+
+ return stw_pixelformat_set( hdc, iPixelFormat );
+}
+
+
+WINGDIAPI BOOL APIENTRY
+wglUseFontBitmapsA(
+ HDC hdc,
+ DWORD first,
+ DWORD count,
+ DWORD listBase )
+{
+ (void) hdc;
+ (void) first;
+ (void) count;
+ (void) listBase;
+
+ assert( 0 );
+
+ return FALSE;
+}
+
+WINGDIAPI BOOL APIENTRY
+wglShareLists(
+ HGLRC hglrc1,
+ HGLRC hglrc2 )
+{
+ (void) hglrc1;
+ (void) hglrc2;
+
+ assert( 0 );
+
+ return FALSE;
+}
+
+WINGDIAPI BOOL APIENTRY
+wglUseFontBitmapsW(
+ HDC hdc,
+ DWORD first,
+ DWORD count,
+ DWORD listBase )
+{
+ (void) hdc;
+ (void) first;
+ (void) count;
+ (void) listBase;
+
+ assert( 0 );
+
+ return FALSE;
+}
+
+WINGDIAPI BOOL APIENTRY
+wglUseFontOutlinesA(
+ HDC hdc,
+ DWORD first,
+ DWORD count,
+ DWORD listBase,
+ FLOAT deviation,
+ FLOAT extrusion,
+ int format,
+ LPGLYPHMETRICSFLOAT lpgmf )
+{
+ (void) hdc;
+ (void) first;
+ (void) count;
+ (void) listBase;
+ (void) deviation;
+ (void) extrusion;
+ (void) format;
+ (void) lpgmf;
+
+ assert( 0 );
+
+ return FALSE;
+}
+
+WINGDIAPI BOOL APIENTRY
+wglUseFontOutlinesW(
+ HDC hdc,
+ DWORD first,
+ DWORD count,
+ DWORD listBase,
+ FLOAT deviation,
+ FLOAT extrusion,
+ int format,
+ LPGLYPHMETRICSFLOAT lpgmf )
+{
+ (void) hdc;
+ (void) first;
+ (void) count;
+ (void) listBase;
+ (void) deviation;
+ (void) extrusion;
+ (void) format;
+ (void) lpgmf;
+
+ assert( 0 );
+
+ return FALSE;
+}
+
+WINGDIAPI BOOL APIENTRY
+wglDescribeLayerPlane(
+ HDC hdc,
+ int iPixelFormat,
+ int iLayerPlane,
+ UINT nBytes,
+ LPLAYERPLANEDESCRIPTOR plpd )
+{
+ (void) hdc;
+ (void) iPixelFormat;
+ (void) iLayerPlane;
+ (void) nBytes;
+ (void) plpd;
+
+ assert( 0 );
+
+ return FALSE;
+}
+
+WINGDIAPI int APIENTRY
+wglSetLayerPaletteEntries(
+ HDC hdc,
+ int iLayerPlane,
+ int iStart,
+ int cEntries,
+ CONST COLORREF *pcr )
+{
+ (void) hdc;
+ (void) iLayerPlane;
+ (void) iStart;
+ (void) cEntries;
+ (void) pcr;
+
+ assert( 0 );
+
+ return 0;
+}
+
+WINGDIAPI int APIENTRY
+wglGetLayerPaletteEntries(
+ HDC hdc,
+ int iLayerPlane,
+ int iStart,
+ int cEntries,
+ COLORREF *pcr )
+{
+ (void) hdc;
+ (void) iLayerPlane;
+ (void) iStart;
+ (void) cEntries;
+ (void) pcr;
+
+ assert( 0 );
+
+ return 0;
+}
+
+WINGDIAPI BOOL APIENTRY
+wglRealizeLayerPalette(
+ HDC hdc,
+ int iLayerPlane,
+ BOOL bRealize )
+{
+ (void) hdc;
+ (void) iLayerPlane;
+ (void) bRealize;
+
+ assert( 0 );
+
+ return FALSE;
+}
diff --git a/src/gallium/state_trackers/wgl/wgl/stw_wgl.h b/src/gallium/state_trackers/wgl/wgl/stw_wgl.h
new file mode 100644
index 0000000000..a98179944a
--- /dev/null
+++ b/src/gallium/state_trackers/wgl/wgl/stw_wgl.h
@@ -0,0 +1,63 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef STW_WGL_H_
+#define STW_WGL_H_
+
+
+#include <windows.h>
+
+#include <GL/gl.h>
+
+
+/*
+ * Undeclared APIs exported by opengl32.dll
+ */
+
+WINGDIAPI BOOL WINAPI
+wglSwapBuffers(HDC hdc);
+
+WINGDIAPI int WINAPI
+wglChoosePixelFormat(HDC hdc,
+ CONST PIXELFORMATDESCRIPTOR *ppfd);
+
+WINGDIAPI int WINAPI
+wglDescribePixelFormat(HDC hdc,
+ int iPixelFormat,
+ UINT nBytes,
+ LPPIXELFORMATDESCRIPTOR ppfd);
+
+WINGDIAPI int WINAPI
+wglGetPixelFormat(HDC hdc);
+
+WINGDIAPI BOOL WINAPI
+wglSetPixelFormat(HDC hdc,
+ int iPixelFormat,
+ CONST PIXELFORMATDESCRIPTOR *ppfd);
+
+
+#endif /* STW_WGL_H_ */
diff --git a/src/gallium/winsys/Makefile b/src/gallium/winsys/Makefile
index 2360a6a94a..bce5b3f9e0 100644
--- a/src/gallium/winsys/Makefile
+++ b/src/gallium/winsys/Makefile
@@ -1,24 +1,12 @@
+# src/gallium/winsys/Makefile
TOP = ../../..
include $(TOP)/configs/current
-
SUBDIRS = $(GALLIUM_WINSYS_DIRS)
-
-default: subdirs
-
-
-subdirs:
+default install clean:
@for dir in $(SUBDIRS) ; do \
if [ -d $$dir ] ; then \
- (cd $$dir && $(MAKE)) || exit 1 ; \
+ (cd $$dir && $(MAKE) $@) || exit 1; \
fi \
done
-
-
-clean:
- rm -f `find . -name \*.[oa]`
-
-
-# Dummy install target
-install:
diff --git a/src/gallium/winsys/drm/Makefile b/src/gallium/winsys/drm/Makefile
index f466ce6c3c..fee0191643 100644
--- a/src/gallium/winsys/drm/Makefile
+++ b/src/gallium/winsys/drm/Makefile
@@ -1,38 +1,12 @@
-# src/mesa/drivers/dri/Makefile
-
+# src/gallium/winsys/Makefile
TOP = ../../../..
-
include $(TOP)/configs/current
+SUBDIRS = $(GALLIUM_WINSYS_DRM_DIRS)
-
-default: $(TOP)/$(LIB_DIR) subdirs
-
-
-$(TOP)/$(LIB_DIR):
- -mkdir $(TOP)/$(LIB_DIR)
-
-
-subdirs:
- @for dir in $(DRI_DIRS) ; do \
- if [ -d $$dir ] ; then \
- (cd $$dir && $(MAKE)) || exit 1 ; \
- fi \
- done
-
-
-install:
- @for dir in $(DRI_DIRS) ; do \
- if [ -d $$dir ] ; then \
- (cd $$dir && $(MAKE) install) || exit 1 ; \
- fi \
- done
-
-
-clean:
- @for dir in $(DRI_DIRS) ; do \
+default install clean:
+ @for dir in $(SUBDIRS) ; do \
if [ -d $$dir ] ; then \
- (cd $$dir && $(MAKE) clean) ; \
+ (cd $$dir && $(MAKE) $@) || exit 1; \
fi \
done
- -rm -f common/*.o
diff --git a/src/gallium/winsys/drm/Makefile.template b/src/gallium/winsys/drm/Makefile.template
index 80e817b808..9f92cb4207 100644
--- a/src/gallium/winsys/drm/Makefile.template
+++ b/src/gallium/winsys/drm/Makefile.template
@@ -1,9 +1,9 @@
# -*-makefile-*-
MESA_MODULES = \
- $(TOP)/src/mesa/libmesa.a \
+ $(TOP)/src/mesa/libmesagallium.a \
$(GALLIUM_AUXILIARIES)
-
+
COMMON_GALLIUM_SOURCES = \
$(TOP)/src/mesa/drivers/dri/common/utils.c \
$(TOP)/src/mesa/drivers/dri/common/vblank.c \
@@ -79,25 +79,24 @@ SHARED_INCLUDES = \
##### TARGETS #####
-default: depend symlinks $(LIBNAME) $(TOP)/$(LIB_DIR)/$(LIBNAME) $(LIBNAME_EGL) $(TOP)/$(LIB_DIR)/$(LIBNAME_EGL)
-
+default: depend symlinks $(TOP)/$(LIB_DIR)/gallium/$(LIBNAME)
$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(PIPE_DRIVERS) $(WINOBJ) Makefile $(TOP)/src/mesa/drivers/dri/Makefile.template
- $(TOP)/bin/mklib -noprefix -o $@ \
- $(OBJECTS) $(PIPE_DRIVERS) $(MESA_MODULES) $(WINOBJ) $(DRI_LIB_DEPS)
+ $(MKLIB) -noprefix -o $@ \
+ $(OBJECTS) $(PIPE_DRIVERS) $(MESA_MODULES) $(WINOBJ) $(DRI_LIB_DEPS) $(DRIVER_EXTRAS)
$(LIBNAME_EGL): $(WINSYS_OBJECTS) $(LIBS)
- $(TOP)/bin/mklib -o $(LIBNAME_EGL) \
+ $(MKLIB) -o $(LIBNAME_EGL) \
-linker "$(CC)" \
-noprefix \
$(OBJECTS) $(MKLIB_OPTIONS) $(WINSYS_OBJECTS) $(PIPE_DRIVERS) $(WINOBJ) $(DRI_LIB_DEPS) \
- --whole-archive $(LIBS) $(GALLIUM_AUXILIARIES) --no-whole-archive
+ --whole-archive $(LIBS) $(GALLIUM_AUXILIARIES) --no-whole-archive $(DRIVER_EXTRAS)
-$(TOP)/$(LIB_DIR)/$(LIBNAME): $(LIBNAME)
- $(INSTALL) $(LIBNAME) $(TOP)/$(LIB_DIR)
+$(TOP)/$(LIB_DIR)/gallium:
+ mkdir -p $@
-$(TOP)/$(LIB_DIR)/$(LIBNAME_EGL): $(LIBNAME_EGL)
- $(INSTALL) $(LIBNAME_EGL) $(TOP)/$(LIB_DIR)
+$(TOP)/$(LIB_DIR)/gallium/$(LIBNAME): $(LIBNAME) $(TOP)/$(LIB_DIR)/gallium
+ $(INSTALL) $(LIBNAME) $(TOP)/$(LIB_DIR)/gallium
depend: $(C_SOURCES) $(ASM_SOURCES) $(SYMLINKS)
rm -f depend
@@ -118,8 +117,8 @@ clean:
install: $(LIBNAME)
- $(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR)
- $(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR)
+ $(INSTALL) -d $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
+ $(INSTALL) -m 755 $(LIBNAME) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
include depend
diff --git a/src/gallium/winsys/drm/intel/Makefile b/src/gallium/winsys/drm/intel/Makefile
index a670ac044d..d8feef6824 100644
--- a/src/gallium/winsys/drm/intel/Makefile
+++ b/src/gallium/winsys/drm/intel/Makefile
@@ -1,25 +1,12 @@
+# src/gallium/winsys/drm/intel/Makefile
TOP = ../../../../..
include $(TOP)/configs/current
+SUBDIRS = gem $(GALLIUM_STATE_TRACKERS_DIRS)
-SUBDIRS = common dri egl
-
-
-default: subdirs
-
-
-subdirs:
+default install clean:
@for dir in $(SUBDIRS) ; do \
if [ -d $$dir ] ; then \
- (cd $$dir && $(MAKE)) || exit 1 ; \
+ (cd $$dir && $(MAKE) $@) || exit 1; \
fi \
done
-
-
-clean:
- rm -f `find . -name \*.[oa]`
- rm -f `find . -name depend`
-
-
-# Dummy install target
-install:
diff --git a/src/gallium/winsys/drm/intel/common/Makefile b/src/gallium/winsys/drm/intel/common/Makefile
deleted file mode 100644
index bf1a7d691f..0000000000
--- a/src/gallium/winsys/drm/intel/common/Makefile
+++ /dev/null
@@ -1,23 +0,0 @@
-TOP = ../../../../../..
-include $(TOP)/configs/current
-
-LIBNAME = inteldrm
-
-C_SOURCES = \
- intel_be_batchbuffer.c \
- intel_be_context.c \
- intel_be_device.c \
- ws_dri_bufmgr.c \
- ws_dri_drmpool.c \
- ws_dri_fencemgr.c \
- ws_dri_mallocpool.c \
- ws_dri_slabpool.c
-
-
-include ./Makefile.template
-
-DRIVER_DEFINES = $(shell pkg-config libdrm --cflags \
- && pkg-config libdrm --atleast-version=2.3.1 \
- && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP")
-symlinks:
-
diff --git a/src/gallium/winsys/drm/intel/common/Makefile.template b/src/gallium/winsys/drm/intel/common/Makefile.template
deleted file mode 100644
index 02ed363a43..0000000000
--- a/src/gallium/winsys/drm/intel/common/Makefile.template
+++ /dev/null
@@ -1,64 +0,0 @@
-# -*-makefile-*-
-
-
-# We still have a dependency on the "dri" buffer manager. Most likely
-# the interface can be reused in non-dri environments, and also as a
-# frontend to simpler memory managers.
-#
-COMMON_SOURCES =
-
-OBJECTS = $(C_SOURCES:.c=.o) \
- $(CPP_SOURCES:.cpp=.o) \
- $(ASM_SOURCES:.S=.o)
-
-
-### Include directories
-INCLUDES = \
- -I. \
- -I$(TOP)/src/gallium/include \
- -I$(TOP)/src/gallium/auxiliary \
- -I$(TOP)/src/gallium/drivers \
- -I$(TOP)/include \
- $(DRIVER_INCLUDES)
-
-
-##### RULES #####
-
-.c.o:
- $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
-
-.cpp.o:
- $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@
-
-.S.o:
- $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
-
-
-##### TARGETS #####
-
-default: depend symlinks $(LIBNAME)
-
-
-$(LIBNAME): $(OBJECTS) Makefile Makefile.template
- $(TOP)/bin/mklib -o $@ -static $(OBJECTS) $(DRIVER_LIBS)
-
-
-depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS)
- rm -f depend
- touch depend
- $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \
- $(ASM_SOURCES) 2> /dev/null
-
-
-# Emacs tags
-tags:
- etags `find . -name \*.[ch]` `find ../include`
-
-
-# Remove .o and backup files
-clean::
- -rm -f *.o */*.o *~ *.so *~ server/*.o $(SYMLINKS)
- -rm -f depend depend.bak
-
-
-include depend
diff --git a/src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.c b/src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.c
deleted file mode 100644
index bc13a5761e..0000000000
--- a/src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.c
+++ /dev/null
@@ -1,429 +0,0 @@
-
-#include "intel_be_batchbuffer.h"
-#include "intel_be_context.h"
-#include "intel_be_device.h"
-#include <errno.h>
-
-#include "xf86drm.h"
-
-static void
-intel_realloc_relocs(struct intel_be_batchbuffer *batch, int num_relocs)
-{
- unsigned long size = num_relocs * I915_RELOC0_STRIDE + I915_RELOC_HEADER;
-
- size *= sizeof(uint32_t);
- batch->reloc = realloc(batch->reloc, size);
- batch->reloc_size = num_relocs;
-}
-
-
-void
-intel_be_batchbuffer_reset(struct intel_be_batchbuffer *batch)
-{
- /*
- * Get a new, free batchbuffer.
- */
- drmBO *bo;
- struct drm_bo_info_req *req;
-
- driBOUnrefUserList(batch->list);
- driBOResetList(batch->list);
-
- /* base.size is the size available to the i915simple driver */
- batch->base.size = batch->device->max_batch_size - BATCH_RESERVED;
- batch->base.actual_size = batch->device->max_batch_size;
- driBOData(batch->buffer, batch->base.actual_size, NULL, NULL, 0);
-
- /*
- * Add the batchbuffer to the validate list.
- */
-
- driBOAddListItem(batch->list, batch->buffer,
- DRM_BO_FLAG_EXE | DRM_BO_FLAG_MEM_TT,
- DRM_BO_FLAG_EXE | DRM_BO_MASK_MEM,
- &batch->dest_location, &batch->node);
-
- req = &batch->node->bo_arg.d.req.bo_req;
-
- /*
- * Set up information needed for us to make relocations
- * relative to the underlying drm buffer objects.
- */
-
- driReadLockKernelBO();
- bo = driBOKernel(batch->buffer);
- req->presumed_offset = (uint64_t) bo->offset;
- req->hint = DRM_BO_HINT_PRESUMED_OFFSET;
- batch->drmBOVirtual = (uint8_t *) bo->virtual;
- driReadUnlockKernelBO();
-
- /*
- * Adjust the relocation buffer size.
- */
-
- if (batch->reloc_size > INTEL_MAX_RELOCS ||
- batch->reloc == NULL)
- intel_realloc_relocs(batch, INTEL_DEFAULT_RELOCS);
-
- assert(batch->reloc != NULL);
- batch->reloc[0] = 0; /* No relocs yet. */
- batch->reloc[1] = 1; /* Reloc type 1 */
- batch->reloc[2] = 0; /* Only a single relocation list. */
- batch->reloc[3] = 0; /* Only a single relocation list. */
-
- batch->base.map = driBOMap(batch->buffer, DRM_BO_FLAG_WRITE, 0);
- batch->poolOffset = driBOPoolOffset(batch->buffer);
- batch->base.ptr = batch->base.map;
- batch->dirty_state = ~0;
- batch->nr_relocs = 0;
- batch->flags = 0;
- batch->id = 0;//batch->intel->intelScreen->batch_id++;
-}
-
-/*======================================================================
- * Public functions
- */
-struct intel_be_batchbuffer *
-intel_be_batchbuffer_alloc(struct intel_be_context *intel)
-{
- struct intel_be_batchbuffer *batch = calloc(sizeof(*batch), 1);
-
- batch->intel = intel;
- batch->device = intel->device;
-
- driGenBuffers(intel->device->batchPool, "batchbuffer", 1,
- &batch->buffer, 4096,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE, 0);
- batch->last_fence = NULL;
- batch->list = driBOCreateList(20);
- batch->reloc = NULL;
- intel_be_batchbuffer_reset(batch);
- return batch;
-}
-
-void
-intel_be_batchbuffer_free(struct intel_be_batchbuffer *batch)
-{
- if (batch->last_fence) {
- driFenceFinish(batch->last_fence,
- DRM_FENCE_TYPE_EXE, FALSE);
- driFenceUnReference(&batch->last_fence);
- }
- if (batch->base.map) {
- driBOUnmap(batch->buffer);
- batch->base.map = NULL;
- }
- driBOUnReference(batch->buffer);
- driBOFreeList(batch->list);
- if (batch->reloc)
- free(batch->reloc);
- batch->buffer = NULL;
- free(batch);
-}
-
-void
-intel_be_offset_relocation(struct intel_be_batchbuffer *batch,
- unsigned pre_add,
- struct _DriBufferObject *driBO,
- uint64_t val_flags,
- uint64_t val_mask)
-{
- int itemLoc;
- struct _drmBONode *node;
- uint32_t *reloc;
- struct drm_bo_info_req *req;
-
- driBOAddListItem(batch->list, driBO, val_flags, val_mask,
- &itemLoc, &node);
- req = &node->bo_arg.d.req.bo_req;
-
- if (!(req->hint & DRM_BO_HINT_PRESUMED_OFFSET)) {
-
- /*
- * Stop other threads from tampering with the underlying
- * drmBO while we're reading its offset.
- */
-
- driReadLockKernelBO();
- req->presumed_offset = (uint64_t) driBOKernel(driBO)->offset;
- driReadUnlockKernelBO();
- req->hint = DRM_BO_HINT_PRESUMED_OFFSET;
- }
-
- pre_add += driBOPoolOffset(driBO);
-
- if (batch->nr_relocs == batch->reloc_size)
- intel_realloc_relocs(batch, batch->reloc_size * 2);
-
- reloc = batch->reloc +
- (I915_RELOC_HEADER + batch->nr_relocs * I915_RELOC0_STRIDE);
-
- reloc[0] = ((uint8_t *)batch->base.ptr - batch->drmBOVirtual);
- i915_batchbuffer_dword(&batch->base, req->presumed_offset + pre_add);
- reloc[1] = pre_add;
- reloc[2] = itemLoc;
- reloc[3] = batch->dest_location;
- batch->nr_relocs++;
-}
-
-static void
-i915_drm_copy_reply(const struct drm_bo_info_rep * rep, drmBO * buf)
-{
- buf->handle = rep->handle;
- buf->flags = rep->flags;
- buf->size = rep->size;
- buf->offset = rep->offset;
- buf->mapHandle = rep->arg_handle;
- buf->proposedFlags = rep->proposed_flags;
- buf->start = rep->buffer_start;
- buf->fenceFlags = rep->fence_flags;
- buf->replyFlags = rep->rep_flags;
- buf->pageAlignment = rep->page_alignment;
-}
-
-static int
-i915_execbuf(struct intel_be_batchbuffer *batch,
- unsigned int used,
- boolean ignore_cliprects,
- drmBOList *list,
- struct drm_i915_execbuffer *ea)
-{
-// struct intel_be_context *intel = batch->intel;
- drmBONode *node;
- drmMMListHead *l;
- struct drm_i915_op_arg *arg, *first;
- struct drm_bo_op_req *req;
- struct drm_bo_info_rep *rep;
- uint64_t *prevNext = NULL;
- drmBO *buf;
- int ret = 0;
- uint32_t count = 0;
-
- first = NULL;
- for (l = list->list.next; l != &list->list; l = l->next) {
- node = DRMLISTENTRY(drmBONode, l, head);
-
- arg = &node->bo_arg;
- req = &arg->d.req;
-
- if (!first)
- first = arg;
-
- if (prevNext)
- *prevNext = (unsigned long)arg;
-
- prevNext = &arg->next;
- req->bo_req.handle = node->buf->handle;
- req->op = drm_bo_validate;
- req->bo_req.flags = node->arg0;
- req->bo_req.mask = node->arg1;
- req->bo_req.hint |= 0;
- count++;
- }
-
- memset(ea, 0, sizeof(*ea));
- ea->num_buffers = count;
- ea->batch.start = batch->poolOffset;
- ea->batch.used = used;
-#if 0 /* ZZZ JB: no cliprects used */
- ea->batch.cliprects = intel->pClipRects;
- ea->batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects;
- ea->batch.DR1 = 0;
- ea->batch.DR4 = 0;((((GLuint) intel->drawX) & 0xffff) |
- (((GLuint) intel->drawY) << 16));
-#else
- ea->batch.cliprects = NULL;
- ea->batch.num_cliprects = 0;
- ea->batch.DR1 = 0;
- ea->batch.DR4 = 0;
-#endif
- ea->fence_arg.flags = DRM_I915_FENCE_FLAG_FLUSHED;
- ea->ops_list = (unsigned long) first;
- first->reloc_ptr = (unsigned long) batch->reloc;
- batch->reloc[0] = batch->nr_relocs;
-
- //return -EFAULT;
- do {
- ret = drmCommandWriteRead(batch->device->fd, DRM_I915_EXECBUFFER, ea,
- sizeof(*ea));
- } while (ret == -EAGAIN);
-
- if (ret != 0)
- return ret;
-
- for (l = list->list.next; l != &list->list; l = l->next) {
- node = DRMLISTENTRY(drmBONode, l, head);
- arg = &node->bo_arg;
- rep = &arg->d.rep.bo_info;
-
- if (!arg->handled) {
- return -EFAULT;
- }
- if (arg->d.rep.ret)
- return arg->d.rep.ret;
-
- buf = node->buf;
- i915_drm_copy_reply(rep, buf);
- }
- return 0;
-}
-
-/* TODO: Push this whole function into bufmgr.
- */
-static struct _DriFenceObject *
-do_flush_locked(struct intel_be_batchbuffer *batch,
- unsigned int used,
- boolean ignore_cliprects, boolean allow_unlock)
-{
- struct intel_be_context *intel = batch->intel;
- struct _DriFenceObject *fo;
- drmFence fence;
- drmBOList *boList;
- struct drm_i915_execbuffer ea;
- int ret = 0;
-
- driBOValidateUserList(batch->list);
- boList = driGetdrmBOList(batch->list);
-
-#if 0 /* ZZZ JB Allways run */
- if (!(intel->numClipRects == 0 && !ignore_cliprects)) {
-#else
- if (1) {
-#endif
- ret = i915_execbuf(batch, used, ignore_cliprects, boList, &ea);
- } else {
- driPutdrmBOList(batch->list);
- fo = NULL;
- goto out;
- }
- driPutdrmBOList(batch->list);
- if (ret)
- abort();
-
- if (ea.fence_arg.error != 0) {
-
- /*
- * The hardware has been idled by the kernel.
- * Don't fence the driBOs.
- */
-
- if (batch->last_fence)
- driFenceUnReference(&batch->last_fence);
-#if 0 /* ZZZ JB: no _mesa_* funcs in gallium */
- _mesa_printf("fence error\n");
-#endif
- batch->last_fence = NULL;
- fo = NULL;
- goto out;
- }
-
- fence.handle = ea.fence_arg.handle;
- fence.fence_class = ea.fence_arg.fence_class;
- fence.type = ea.fence_arg.type;
- fence.flags = ea.fence_arg.flags;
- fence.signaled = ea.fence_arg.signaled;
-
- fo = driBOFenceUserList(batch->device->fenceMgr, batch->list,
- "SuperFence", &fence);
-
- if (driFenceType(fo) & DRM_I915_FENCE_TYPE_RW) {
- if (batch->last_fence)
- driFenceUnReference(&batch->last_fence);
- /*
- * FIXME: Context last fence??
- */
- batch->last_fence = fo;
- driFenceReference(fo);
- }
- out:
-#if 0 /* ZZZ JB: fix this */
- intel->vtbl.lost_hardware(intel);
-#else
- (void)intel;
-#endif
- return fo;
-}
-
-
-struct _DriFenceObject *
-intel_be_batchbuffer_flush(struct intel_be_batchbuffer *batch)
-{
- struct intel_be_context *intel = batch->intel;
- unsigned int used = batch->base.ptr - batch->base.map;
- boolean was_locked = batch->intel->hardware_locked(intel);
- struct _DriFenceObject *fence;
-
- if (used == 0) {
- driFenceReference(batch->last_fence);
- return batch->last_fence;
- }
-
- /* Add the MI_BATCH_BUFFER_END. Always add an MI_FLUSH - this is a
- * performance drain that we would like to avoid.
- */
-#if 0 /* ZZZ JB: what should we do here? */
- if (used & 4) {
- ((int *) batch->base.ptr)[0] = intel->vtbl.flush_cmd();
- ((int *) batch->base.ptr)[1] = 0;
- ((int *) batch->base.ptr)[2] = MI_BATCH_BUFFER_END;
- used += 12;
- }
- else {
- ((int *) batch->base.ptr)[0] = intel->vtbl.flush_cmd();
- ((int *) batch->base.ptr)[1] = MI_BATCH_BUFFER_END;
- used += 8;
- }
-#else
- if (used & 4) {
- ((int *) batch->base.ptr)[0] = ((0<<29)|(4<<23)); // MI_FLUSH;
- ((int *) batch->base.ptr)[1] = 0;
- ((int *) batch->base.ptr)[2] = (0xA<<23); // MI_BATCH_BUFFER_END;
- used += 12;
- }
- else {
- ((int *) batch->base.ptr)[0] = ((0<<29)|(4<<23)); // MI_FLUSH;
- ((int *) batch->base.ptr)[1] = (0xA<<23); // MI_BATCH_BUFFER_END;
- used += 8;
- }
-#endif
- driBOUnmap(batch->buffer);
- batch->base.ptr = NULL;
- batch->base.map = NULL;
-
- /* TODO: Just pass the relocation list and dma buffer up to the
- * kernel.
- */
- if (!was_locked)
- intel->hardware_lock(intel);
-
- fence = do_flush_locked(batch, used, !(batch->flags & INTEL_BATCH_CLIPRECTS),
- FALSE);
-
- if (!was_locked)
- intel->hardware_unlock(intel);
-
- /* Reset the buffer:
- */
- intel_be_batchbuffer_reset(batch);
- return fence;
-}
-
-void
-intel_be_batchbuffer_finish(struct intel_be_batchbuffer *batch)
-{
- struct _DriFenceObject *fence = intel_be_batchbuffer_flush(batch);
- driFenceFinish(fence, driFenceType(fence), FALSE);
- driFenceUnReference(&fence);
-}
-
-#if 0
-void
-intel_be_batchbuffer_data(struct intel_be_batchbuffer *batch,
- const void *data, unsigned int bytes, unsigned int flags)
-{
- assert((bytes & 3) == 0);
- intel_batchbuffer_require_space(batch, bytes, flags);
- memcpy(batch->base.ptr, data, bytes);
- batch->base.ptr += bytes;
-}
-#endif
diff --git a/src/gallium/winsys/drm/intel/common/intel_be_context.c b/src/gallium/winsys/drm/intel/common/intel_be_context.c
deleted file mode 100644
index 1af39674f4..0000000000
--- a/src/gallium/winsys/drm/intel/common/intel_be_context.c
+++ /dev/null
@@ -1,107 +0,0 @@
-
-/*
- * Authors: Jakob Bornecrantz <jakob-at-tungstengraphics.com>
- */
-
-#include "ws_dri_fencemgr.h"
-#include "intel_be_device.h"
-#include "intel_be_context.h"
-#include "intel_be_batchbuffer.h"
-
-static INLINE struct intel_be_context *
-intel_be_context(struct i915_winsys *sws)
-{
- return (struct intel_be_context *)sws;
-}
-
-/* Simple batchbuffer interface:
- */
-
-static struct i915_batchbuffer*
-intel_i915_batch_get(struct i915_winsys *sws)
-{
- struct intel_be_context *intel = intel_be_context(sws);
- return &intel->batch->base;
-}
-
-static void intel_i915_batch_reloc(struct i915_winsys *sws,
- struct pipe_buffer *buf,
- unsigned access_flags,
- unsigned delta)
-{
- struct intel_be_context *intel = intel_be_context(sws);
-
- unsigned flags = DRM_BO_FLAG_MEM_TT;
- unsigned mask = DRM_BO_MASK_MEM;
-
- if (access_flags & I915_BUFFER_ACCESS_WRITE) {
- flags |= DRM_BO_FLAG_WRITE;
- mask |= DRM_BO_FLAG_WRITE;
- }
-
- if (access_flags & I915_BUFFER_ACCESS_READ) {
- flags |= DRM_BO_FLAG_READ;
- mask |= DRM_BO_FLAG_READ;
- }
-
- intel_be_offset_relocation(intel->batch,
- delta,
- dri_bo(buf),
- flags,
- mask);
-}
-
-static void intel_i915_batch_flush(struct i915_winsys *sws,
- struct pipe_fence_handle **fence)
-{
- struct intel_be_context *intel = intel_be_context(sws);
-
- union {
- struct _DriFenceObject *dri;
- struct pipe_fence_handle *pipe;
- } fu;
-
- if (fence)
- assert(!*fence);
-
- fu.dri = intel_be_batchbuffer_flush(intel->batch);
-
- if (!fu.dri) {
- assert(0);
- *fence = NULL;
- return;
- }
-
- if (fu.dri) {
- if (fence)
- *fence = fu.pipe;
- else
- driFenceUnReference(&fu.dri);
- }
-
-}
-
-boolean
-intel_be_init_context(struct intel_be_context *intel, struct intel_be_device *device)
-{
- assert(intel);
- assert(device);
-
- intel->device = device;
-
- /* TODO move framebuffer createion to the driver */
-
- intel->base.batch_get = intel_i915_batch_get;
- intel->base.batch_reloc = intel_i915_batch_reloc;
- intel->base.batch_flush = intel_i915_batch_flush;
-
- intel->batch = intel_be_batchbuffer_alloc(intel);
-
- return true;
-}
-
-void
-intel_be_destroy_context(struct intel_be_context *intel)
-{
- intel_be_batchbuffer_free(intel->batch);
-}
diff --git a/src/gallium/winsys/drm/intel/common/intel_be_context.h b/src/gallium/winsys/drm/intel/common/intel_be_context.h
deleted file mode 100644
index d5cbc93594..0000000000
--- a/src/gallium/winsys/drm/intel/common/intel_be_context.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* These need to be diffrent from the intel winsys */
-#ifndef INTEL_BE_CONTEXT_H
-#define INTEL_BE_CONTEXT_H
-
-#include "i915simple/i915_winsys.h"
-
-struct intel_be_context
-{
- /** Interface to i915simple driver */
- struct i915_winsys base;
-
- struct intel_be_device *device;
- struct intel_be_batchbuffer *batch;
-
- /*
- * Hardware lock functions.
- *
- * Needs to be filled in by the winsys.
- */
- void (*hardware_lock)(struct intel_be_context *context);
- void (*hardware_unlock)(struct intel_be_context *context);
- boolean (*hardware_locked)(struct intel_be_context *context);
-};
-
-/**
- * Intialize a allocated intel_be_context struct.
- *
- * Remember to set the hardware_* functions.
- */
-boolean
-intel_be_init_context(struct intel_be_context *intel,
- struct intel_be_device *device);
-
-/**
- * Destroy a intel_be_context.
- * Does not free the struct that is up to the winsys.
- */
-void
-intel_be_destroy_context(struct intel_be_context *intel);
-#endif
diff --git a/src/gallium/winsys/drm/intel/common/intel_be_device.c b/src/gallium/winsys/drm/intel/common/intel_be_device.c
deleted file mode 100644
index 019ee5cbd2..0000000000
--- a/src/gallium/winsys/drm/intel/common/intel_be_device.c
+++ /dev/null
@@ -1,308 +0,0 @@
-
-
-/*
- * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com>
- * Jakob Bornecrantz <jakob-at-tungstengraphics-dot-com>
- */
-
-#include "intel_be_device.h"
-#include "ws_dri_bufmgr.h"
-#include "ws_dri_bufpool.h"
-#include "ws_dri_fencemgr.h"
-
-#include "pipe/p_winsys.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_state.h"
-#include "pipe/p_inlines.h"
-#include "util/u_memory.h"
-
-#include "i915simple/i915_screen.h"
-
-/* Turn a pipe winsys into an intel/pipe winsys:
- */
-static INLINE struct intel_be_device *
-intel_be_device( struct pipe_winsys *winsys )
-{
- return (struct intel_be_device *)winsys;
-}
-
-
-/*
- * Buffer functions.
- *
- * Most callbacks map direcly onto dri_bufmgr operations:
- */
-
-static void *intel_be_buffer_map(struct pipe_winsys *winsys,
- struct pipe_buffer *buf,
- unsigned flags )
-{
- unsigned drm_flags = 0;
-
- if (flags & PIPE_BUFFER_USAGE_CPU_WRITE)
- drm_flags |= DRM_BO_FLAG_WRITE;
-
- if (flags & PIPE_BUFFER_USAGE_CPU_READ)
- drm_flags |= DRM_BO_FLAG_READ;
-
- return driBOMap( dri_bo(buf), drm_flags, 0 );
-}
-
-static void intel_be_buffer_unmap(struct pipe_winsys *winsys,
- struct pipe_buffer *buf)
-{
- driBOUnmap( dri_bo(buf) );
-}
-
-static void
-intel_be_buffer_destroy(struct pipe_winsys *winsys,
- struct pipe_buffer *buf)
-{
- driBOUnReference( dri_bo(buf) );
- FREE(buf);
-}
-
-static struct pipe_buffer *
-intel_be_buffer_create(struct pipe_winsys *winsys,
- unsigned alignment,
- unsigned usage,
- unsigned size )
-{
- struct intel_be_buffer *buffer = CALLOC_STRUCT( intel_be_buffer );
- struct intel_be_device *iws = intel_be_device(winsys);
- unsigned flags = 0;
- struct _DriBufferPool *pool;
-
- buffer->base.refcount = 1;
- buffer->base.alignment = alignment;
- buffer->base.usage = usage;
- buffer->base.size = size;
-
- if (usage & (PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_CONSTANT)) {
- flags |= DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED;
- pool = iws->mallocPool;
- } else if (usage & PIPE_BUFFER_USAGE_CUSTOM) {
- /* For vertex buffers */
- flags |= DRM_BO_FLAG_MEM_VRAM | DRM_BO_FLAG_MEM_TT;
- pool = iws->vertexPool;
- } else {
- flags |= DRM_BO_FLAG_MEM_VRAM | DRM_BO_FLAG_MEM_TT;
- pool = iws->regionPool;
- }
-
- if (usage & PIPE_BUFFER_USAGE_GPU_READ)
- flags |= DRM_BO_FLAG_READ;
-
- if (usage & PIPE_BUFFER_USAGE_GPU_WRITE)
- flags |= DRM_BO_FLAG_WRITE;
-
- /* drm complains if we don't set any read/write flags.
- */
- if ((flags & (DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE)) == 0)
- flags |= DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE;
-
- buffer->pool = pool;
- driGenBuffers( buffer->pool,
- "pipe buffer", 1, &buffer->driBO, alignment, flags, 0 );
-
- driBOData( buffer->driBO, size, NULL, buffer->pool, 0 );
-
- return &buffer->base;
-}
-
-
-static struct pipe_buffer *
-intel_be_user_buffer_create(struct pipe_winsys *winsys, void *ptr, unsigned bytes)
-{
- struct intel_be_buffer *buffer = CALLOC_STRUCT( intel_be_buffer );
- struct intel_be_device *iws = intel_be_device(winsys);
-
- driGenUserBuffer( iws->regionPool,
- "pipe user buffer", &buffer->driBO, ptr, bytes );
-
- buffer->base.refcount = 1;
-
- return &buffer->base;
-}
-
-struct pipe_buffer *
-intel_be_buffer_from_handle(struct intel_be_device *device,
- const char* name, unsigned handle)
-{
- struct intel_be_buffer *be_buf = malloc(sizeof(*be_buf));
- struct pipe_buffer *buffer;
-
- if (!be_buf)
- goto err;
-
- memset(be_buf, 0, sizeof(*be_buf));
-
- driGenBuffers(device->staticPool, name, 1, &be_buf->driBO, 0, 0, 0);
- driBOSetReferenced(be_buf->driBO, handle);
-
- if (0) /** XXX TODO check error */
- goto err_bo;
-
- buffer = &be_buf->base;
- buffer->refcount = 1;
- buffer->alignment = 0;
- buffer->usage = 0;
- buffer->size = driBOSize(be_buf->driBO);
-
- return buffer;
-err_bo:
- free(be_buf);
-err:
- return NULL;
-}
-
-
-/*
- * Surface functions.
- *
- * Deprecated!
- */
-
-static struct pipe_surface *
-intel_i915_surface_alloc(struct pipe_winsys *winsys)
-{
- assert((size_t)"intel_i915_surface_alloc is deprecated" & 0);
- return NULL;
-}
-
-static int
-intel_i915_surface_alloc_storage(struct pipe_winsys *winsys,
- struct pipe_surface *surf,
- unsigned width, unsigned height,
- enum pipe_format format,
- unsigned flags,
- unsigned tex_usage)
-{
- assert((size_t)"intel_i915_surface_alloc_storage is deprecated" & 0);
- return -1;
-}
-
-static void
-intel_i915_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s)
-{
- assert((size_t)"intel_i915_surface_release is deprecated" & 0);
-}
-
-
-/*
- * Fence functions
- */
-
-static void
-intel_be_fence_reference( struct pipe_winsys *sws,
- struct pipe_fence_handle **ptr,
- struct pipe_fence_handle *fence )
-{
- if (*ptr)
- driFenceUnReference((struct _DriFenceObject **)ptr);
-
- if (fence)
- *ptr = (struct pipe_fence_handle *)driFenceReference((struct _DriFenceObject *)fence);
-}
-
-static int
-intel_be_fence_signalled( struct pipe_winsys *sws,
- struct pipe_fence_handle *fence,
- unsigned flag )
-{
- return driFenceSignaled((struct _DriFenceObject *)fence, flag);
-}
-
-static int
-intel_be_fence_finish( struct pipe_winsys *sws,
- struct pipe_fence_handle *fence,
- unsigned flag )
-{
- return driFenceFinish((struct _DriFenceObject *)fence, flag, 0);
-}
-
-
-/*
- * Misc functions
- */
-
-boolean
-intel_be_init_device(struct intel_be_device *dev, int fd, unsigned id)
-{
- dev->fd = fd;
- dev->max_batch_size = 16 * 4096;
- dev->max_vertex_size = 128 * 4096;
-
- dev->base.buffer_create = intel_be_buffer_create;
- dev->base.user_buffer_create = intel_be_user_buffer_create;
- dev->base.buffer_map = intel_be_buffer_map;
- dev->base.buffer_unmap = intel_be_buffer_unmap;
- dev->base.buffer_destroy = intel_be_buffer_destroy;
- dev->base.surface_alloc = intel_i915_surface_alloc;
- dev->base.surface_alloc_storage = intel_i915_surface_alloc_storage;
- dev->base.surface_release = intel_i915_surface_release;
- dev->base.fence_reference = intel_be_fence_reference;
- dev->base.fence_signalled = intel_be_fence_signalled;
- dev->base.fence_finish = intel_be_fence_finish;
-
-#if 0 /* Set by the winsys */
- dev->base.flush_frontbuffer = intel_flush_frontbuffer;
- dev->base.get_name = intel_get_name;
-#endif
-
- dev->fMan = driInitFreeSlabManager(10, 10);
- dev->fenceMgr = driFenceMgrTTMInit(dev->fd);
-
- dev->mallocPool = driMallocPoolInit();
- dev->staticPool = driDRMPoolInit(dev->fd);
- /* Sizes: 64 128 256 512 1024 2048 4096 8192 16384 32768 */
- dev->regionPool = driSlabPoolInit(dev->fd,
- DRM_BO_FLAG_READ |
- DRM_BO_FLAG_WRITE |
- DRM_BO_FLAG_MEM_TT,
- DRM_BO_FLAG_READ |
- DRM_BO_FLAG_WRITE |
- DRM_BO_FLAG_MEM_TT,
- 64,
- 10, 120, 4096 * 64, 0,
- dev->fMan);
-
- dev->vertexPool = driSlabPoolInit(dev->fd,
- DRM_BO_FLAG_READ |
- DRM_BO_FLAG_WRITE |
- DRM_BO_FLAG_MEM_TT,
- DRM_BO_FLAG_READ |
- DRM_BO_FLAG_WRITE |
- DRM_BO_FLAG_MEM_TT,
- dev->max_vertex_size,
- 1, 120, dev->max_vertex_size * 4, 0,
- dev->fMan);
-
- dev->batchPool = driSlabPoolInit(dev->fd,
- DRM_BO_FLAG_EXE |
- DRM_BO_FLAG_MEM_TT,
- DRM_BO_FLAG_EXE |
- DRM_BO_FLAG_MEM_TT,
- dev->max_batch_size,
- 1, 40, dev->max_batch_size * 16, 0,
- dev->fMan);
-
- /* Fill in this struct with callbacks that i915simple will need to
- * communicate with the window system, buffer manager, etc.
- */
- dev->screen = i915_create_screen(&dev->base, id);
-
- return true;
-}
-
-void
-intel_be_destroy_device(struct intel_be_device *dev)
-{
- driPoolTakeDown(dev->mallocPool);
- driPoolTakeDown(dev->staticPool);
- driPoolTakeDown(dev->regionPool);
- driPoolTakeDown(dev->vertexPool);
- driPoolTakeDown(dev->batchPool);
-
- /** TODO takedown fenceMgr and fMan */
-}
diff --git a/src/gallium/winsys/drm/intel/common/intel_be_device.h b/src/gallium/winsys/drm/intel/common/intel_be_device.h
deleted file mode 100644
index 3f8b3f585c..0000000000
--- a/src/gallium/winsys/drm/intel/common/intel_be_device.h
+++ /dev/null
@@ -1,72 +0,0 @@
-#ifndef INTEL_DRM_DEVICE_H
-#define INTEL_DRM_DEVICE_H
-
-#include "pipe/p_winsys.h"
-#include "pipe/p_context.h"
-
-/*
- * Device
- */
-
-struct intel_be_device
-{
- struct pipe_winsys base;
-
- /**
- * Hw level screen
- */
- struct pipe_screen *screen;
-
- int fd; /**< Drm file discriptor */
-
- size_t max_batch_size;
- size_t max_vertex_size;
-
- struct _DriFenceMgr *fenceMgr;
-
- struct _DriBufferPool *batchPool;
- struct _DriBufferPool *regionPool;
- struct _DriBufferPool *mallocPool;
- struct _DriBufferPool *vertexPool;
- struct _DriBufferPool *staticPool;
- struct _DriFreeSlabManager *fMan;
-};
-
-boolean
-intel_be_init_device(struct intel_be_device *device, int fd, unsigned id);
-
-void
-intel_be_destroy_device(struct intel_be_device *dev);
-
-/*
- * Buffer
- */
-
-struct intel_be_buffer {
- struct pipe_buffer base;
- struct _DriBufferPool *pool;
- struct _DriBufferObject *driBO;
-};
-
-/**
- * Create a be buffer from a drm bo handle
- *
- * Takes a reference
- */
-struct pipe_buffer *
-intel_be_buffer_from_handle(struct intel_be_device *device,
- const char* name, unsigned handle);
-
-static INLINE struct intel_be_buffer *
-intel_be_buffer(struct pipe_buffer *buf)
-{
- return (struct intel_be_buffer *)buf;
-}
-
-static INLINE struct _DriBufferObject *
-dri_bo(struct pipe_buffer *buf)
-{
- return intel_be_buffer(buf)->driBO;
-}
-
-#endif
diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.c b/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.c
deleted file mode 100644
index 517a97b3ee..0000000000
--- a/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.c
+++ /dev/null
@@ -1,949 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- *
- **************************************************************************/
-/*
- * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
- * Keith Whitwell <keithw-at-tungstengraphics-dot-com>
- */
-
-#include <xf86drm.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "pipe/p_thread.h"
-#include "errno.h"
-#include "ws_dri_bufmgr.h"
-#include "string.h"
-#include "pipe/p_debug.h"
-#include "ws_dri_bufpool.h"
-#include "ws_dri_fencemgr.h"
-
-
-/*
- * This lock is here to protect drmBO structs changing underneath us during a
- * validate list call, since validatelist cannot take individiual locks for
- * each drmBO. Validatelist takes this lock in write mode. Any access to an
- * individual drmBO should take this lock in read mode, since in that case, the
- * driBufferObject mutex will protect the access. Locking order is
- * driBufferObject mutex - > this rw lock.
- */
-
-pipe_static_mutex(bmMutex);
-pipe_static_condvar(bmCond);
-
-static int kernelReaders = 0;
-static int num_buffers = 0;
-static int num_user_buffers = 0;
-
-static drmBO *drmBOListBuf(void *iterator)
-{
- drmBONode *node;
- drmMMListHead *l = (drmMMListHead *) iterator;
- node = DRMLISTENTRY(drmBONode, l, head);
- return node->buf;
-}
-
-static void *drmBOListIterator(drmBOList *list)
-{
- void *ret = list->list.next;
-
- if (ret == &list->list)
- return NULL;
- return ret;
-}
-
-static void *drmBOListNext(drmBOList *list, void *iterator)
-{
- void *ret;
-
- drmMMListHead *l = (drmMMListHead *) iterator;
- ret = l->next;
- if (ret == &list->list)
- return NULL;
- return ret;
-}
-
-static drmBONode *drmAddListItem(drmBOList *list, drmBO *item,
- uint64_t arg0,
- uint64_t arg1)
-{
- drmBONode *node;
- drmMMListHead *l;
-
- l = list->free.next;
- if (l == &list->free) {
- node = (drmBONode *) malloc(sizeof(*node));
- if (!node) {
- return NULL;
- }
- list->numCurrent++;
- }
- else {
- DRMLISTDEL(l);
- node = DRMLISTENTRY(drmBONode, l, head);
- }
- node->buf = item;
- node->arg0 = arg0;
- node->arg1 = arg1;
- DRMLISTADD(&node->head, &list->list);
- list->numOnList++;
- return node;
-}
-
-static int drmAddValidateItem(drmBOList *list, drmBO *buf, uint64_t flags,
- uint64_t mask, int *newItem)
-{
- drmBONode *node, *cur;
- drmMMListHead *l;
-
- *newItem = 0;
- cur = NULL;
-
- for (l = list->list.next; l != &list->list; l = l->next) {
- node = DRMLISTENTRY(drmBONode, l, head);
- if (node->buf == buf) {
- cur = node;
- break;
- }
- }
- if (!cur) {
- cur = drmAddListItem(list, buf, flags, mask);
- if (!cur) {
- return -ENOMEM;
- }
- *newItem = 1;
- cur->arg0 = flags;
- cur->arg1 = mask;
- }
- else {
- uint64_t memFlags = cur->arg0 & flags & DRM_BO_MASK_MEM;
- uint64_t accFlags = (cur->arg0 | flags) & ~DRM_BO_MASK_MEM;
-
- if (mask & cur->arg1 & ~DRM_BO_MASK_MEM & (cur->arg0 ^ flags)) {
- return -EINVAL;
- }
-
- cur->arg1 |= mask;
- cur->arg0 = (cur->arg0 & ~mask) | ((memFlags | accFlags) & mask);
-
- if (((cur->arg1 & DRM_BO_MASK_MEM) != 0) &&
- (cur->arg0 & DRM_BO_MASK_MEM) == 0) {
- return -EINVAL;
- }
- }
- return 0;
-}
-
-static void drmBOFreeList(drmBOList *list)
-{
- drmBONode *node;
- drmMMListHead *l;
-
- l = list->list.next;
- while(l != &list->list) {
- DRMLISTDEL(l);
- node = DRMLISTENTRY(drmBONode, l, head);
- free(node);
- l = list->list.next;
- list->numCurrent--;
- list->numOnList--;
- }
-
- l = list->free.next;
- while(l != &list->free) {
- DRMLISTDEL(l);
- node = DRMLISTENTRY(drmBONode, l, head);
- free(node);
- l = list->free.next;
- list->numCurrent--;
- }
-}
-
-static int drmAdjustListNodes(drmBOList *list)
-{
- drmBONode *node;
- drmMMListHead *l;
- int ret = 0;
-
- while(list->numCurrent < list->numTarget) {
- node = (drmBONode *) malloc(sizeof(*node));
- if (!node) {
- ret = -ENOMEM;
- break;
- }
- list->numCurrent++;
- DRMLISTADD(&node->head, &list->free);
- }
-
- while(list->numCurrent > list->numTarget) {
- l = list->free.next;
- if (l == &list->free)
- break;
- DRMLISTDEL(l);
- node = DRMLISTENTRY(drmBONode, l, head);
- free(node);
- list->numCurrent--;
- }
- return ret;
-}
-
-static int drmBOCreateList(int numTarget, drmBOList *list)
-{
- DRMINITLISTHEAD(&list->list);
- DRMINITLISTHEAD(&list->free);
- list->numTarget = numTarget;
- list->numCurrent = 0;
- list->numOnList = 0;
- return drmAdjustListNodes(list);
-}
-
-static int drmBOResetList(drmBOList *list)
-{
- drmMMListHead *l;
- int ret;
-
- ret = drmAdjustListNodes(list);
- if (ret)
- return ret;
-
- l = list->list.next;
- while (l != &list->list) {
- DRMLISTDEL(l);
- DRMLISTADD(l, &list->free);
- list->numOnList--;
- l = list->list.next;
- }
- return drmAdjustListNodes(list);
-}
-
-void driWriteLockKernelBO(void)
-{
- pipe_mutex_lock(bmMutex);
- while(kernelReaders != 0)
- pipe_condvar_wait(bmCond, bmMutex);
-}
-
-void driWriteUnlockKernelBO(void)
-{
- pipe_mutex_unlock(bmMutex);
-}
-
-void driReadLockKernelBO(void)
-{
- pipe_mutex_lock(bmMutex);
- kernelReaders++;
- pipe_mutex_unlock(bmMutex);
-}
-
-void driReadUnlockKernelBO(void)
-{
- pipe_mutex_lock(bmMutex);
- if (--kernelReaders == 0)
- pipe_condvar_broadcast(bmCond);
- pipe_mutex_unlock(bmMutex);
-}
-
-
-
-
-/*
- * TODO: Introduce fence pools in the same way as
- * buffer object pools.
- */
-
-typedef struct _DriBufferObject
-{
- DriBufferPool *pool;
- pipe_mutex mutex;
- int refCount;
- const char *name;
- uint64_t flags;
- unsigned hint;
- unsigned alignment;
- unsigned createdByReference;
- void *private;
- /* user-space buffer: */
- unsigned userBuffer;
- void *userData;
- unsigned userSize;
-} DriBufferObject;
-
-typedef struct _DriBufferList {
- drmBOList drmBuffers; /* List of kernel buffers needing validation */
- drmBOList driBuffers; /* List of user-space buffers needing validation */
-} DriBufferList;
-
-
-void
-bmError(int val, const char *file, const char *function, int line)
-{
- printf("Fatal video memory manager error \"%s\".\n"
- "Check kernel logs or set the LIBGL_DEBUG\n"
- "environment variable to \"verbose\" for more info.\n"
- "Detected in file %s, line %d, function %s.\n",
- strerror(-val), file, line, function);
-#ifndef NDEBUG
- abort();
-#else
- abort();
-#endif
-}
-
-extern drmBO *
-driBOKernel(struct _DriBufferObject *buf)
-{
- drmBO *ret;
-
- driReadLockKernelBO();
- pipe_mutex_lock(buf->mutex);
- assert(buf->private != NULL);
- ret = buf->pool->kernel(buf->pool, buf->private);
- if (!ret)
- BM_CKFATAL(-EINVAL);
- pipe_mutex_unlock(buf->mutex);
- driReadUnlockKernelBO();
-
- return ret;
-}
-
-void
-driBOWaitIdle(struct _DriBufferObject *buf, int lazy)
-{
-
- /*
- * This function may block. Is it sane to keep the mutex held during
- * that time??
- */
-
- pipe_mutex_lock(buf->mutex);
- BM_CKFATAL(buf->pool->waitIdle(buf->pool, buf->private, &buf->mutex, lazy));
- pipe_mutex_unlock(buf->mutex);
-}
-
-void *
-driBOMap(struct _DriBufferObject *buf, unsigned flags, unsigned hint)
-{
- void *virtual;
- int retval;
-
- if (buf->userBuffer) {
- return buf->userData;
- }
-
- pipe_mutex_lock(buf->mutex);
- assert(buf->private != NULL);
- retval = buf->pool->map(buf->pool, buf->private, flags, hint,
- &buf->mutex, &virtual);
- pipe_mutex_unlock(buf->mutex);
-
- return retval == 0 ? virtual : NULL;
-}
-
-void
-driBOUnmap(struct _DriBufferObject *buf)
-{
- if (buf->userBuffer)
- return;
-
- assert(buf->private != NULL);
- pipe_mutex_lock(buf->mutex);
- BM_CKFATAL(buf->pool->unmap(buf->pool, buf->private));
- pipe_mutex_unlock(buf->mutex);
-}
-
-unsigned long
-driBOOffset(struct _DriBufferObject *buf)
-{
- unsigned long ret;
-
- assert(buf->private != NULL);
-
- pipe_mutex_lock(buf->mutex);
- ret = buf->pool->offset(buf->pool, buf->private);
- pipe_mutex_unlock(buf->mutex);
- return ret;
-}
-
-unsigned long
-driBOPoolOffset(struct _DriBufferObject *buf)
-{
- unsigned long ret;
-
- assert(buf->private != NULL);
-
- pipe_mutex_lock(buf->mutex);
- ret = buf->pool->poolOffset(buf->pool, buf->private);
- pipe_mutex_unlock(buf->mutex);
- return ret;
-}
-
-uint64_t
-driBOFlags(struct _DriBufferObject *buf)
-{
- uint64_t ret;
-
- assert(buf->private != NULL);
-
- driReadLockKernelBO();
- pipe_mutex_lock(buf->mutex);
- ret = buf->pool->flags(buf->pool, buf->private);
- pipe_mutex_unlock(buf->mutex);
- driReadUnlockKernelBO();
- return ret;
-}
-
-struct _DriBufferObject *
-driBOReference(struct _DriBufferObject *buf)
-{
- pipe_mutex_lock(buf->mutex);
- if (++buf->refCount == 1) {
- pipe_mutex_unlock(buf->mutex);
- BM_CKFATAL(-EINVAL);
- }
- pipe_mutex_unlock(buf->mutex);
- return buf;
-}
-
-void
-driBOUnReference(struct _DriBufferObject *buf)
-{
- int tmp;
-
- if (!buf)
- return;
-
- pipe_mutex_lock(buf->mutex);
- tmp = --buf->refCount;
- if (!tmp) {
- pipe_mutex_unlock(buf->mutex);
- if (buf->private) {
- if (buf->createdByReference)
- buf->pool->unreference(buf->pool, buf->private);
- else
- buf->pool->destroy(buf->pool, buf->private);
- }
- if (buf->userBuffer)
- num_user_buffers--;
- else
- num_buffers--;
- free(buf);
- } else
- pipe_mutex_unlock(buf->mutex);
-
-}
-
-
-int
-driBOData(struct _DriBufferObject *buf,
- unsigned size, const void *data,
- DriBufferPool *newPool,
- uint64_t flags)
-{
- void *virtual = NULL;
- int newBuffer;
- int retval = 0;
- struct _DriBufferPool *pool;
-
- assert(!buf->userBuffer); /* XXX just do a memcpy? */
-
- pipe_mutex_lock(buf->mutex);
- pool = buf->pool;
-
- if (pool == NULL && newPool != NULL) {
- buf->pool = newPool;
- pool = newPool;
- }
- if (newPool == NULL)
- newPool = pool;
-
- if (!pool->create) {
- assert((size_t)"driBOData called on invalid buffer\n" & 0);
- BM_CKFATAL(-EINVAL);
- }
-
- newBuffer = (!buf->private || pool != newPool ||
- pool->size(pool, buf->private) < size);
-
- if (!flags)
- flags = buf->flags;
-
- if (newBuffer) {
-
- if (buf->createdByReference) {
- assert((size_t)"driBOData requiring resizing called on shared buffer.\n" & 0);
- BM_CKFATAL(-EINVAL);
- }
-
- if (buf->private)
- buf->pool->destroy(buf->pool, buf->private);
-
- pool = newPool;
- buf->pool = newPool;
- buf->private = pool->create(pool, size, flags, DRM_BO_HINT_DONT_FENCE,
- buf->alignment);
- if (!buf->private)
- retval = -ENOMEM;
-
- if (retval == 0)
- retval = pool->map(pool, buf->private,
- DRM_BO_FLAG_WRITE,
- DRM_BO_HINT_DONT_BLOCK, &buf->mutex, &virtual);
- } else if (pool->map(pool, buf->private, DRM_BO_FLAG_WRITE,
- DRM_BO_HINT_DONT_BLOCK, &buf->mutex, &virtual)) {
- /*
- * Buffer is busy. need to create a new one.
- */
-
- void *newBuf;
-
- newBuf = pool->create(pool, size, flags, DRM_BO_HINT_DONT_FENCE,
- buf->alignment);
- if (newBuf) {
- buf->pool->destroy(buf->pool, buf->private);
- buf->private = newBuf;
- }
-
- retval = pool->map(pool, buf->private,
- DRM_BO_FLAG_WRITE, 0, &buf->mutex, &virtual);
- } else {
- uint64_t flag_diff = flags ^ buf->flags;
-
- /*
- * We might need to change buffer flags.
- */
-
- if (flag_diff){
- assert(pool->setStatus != NULL);
- BM_CKFATAL(pool->unmap(pool, buf->private));
- BM_CKFATAL(pool->setStatus(pool, buf->private, flag_diff,
- buf->flags));
- if (!data)
- goto out;
-
- retval = pool->map(pool, buf->private,
- DRM_BO_FLAG_WRITE, 0, &buf->mutex, &virtual);
- }
- }
-
- if (retval == 0) {
- if (data)
- memcpy(virtual, data, size);
-
- BM_CKFATAL(pool->unmap(pool, buf->private));
- }
-
- out:
- pipe_mutex_unlock(buf->mutex);
-
- return retval;
-}
-
-void
-driBOSubData(struct _DriBufferObject *buf,
- unsigned long offset, unsigned long size, const void *data)
-{
- void *virtual;
-
- assert(!buf->userBuffer); /* XXX just do a memcpy? */
-
- pipe_mutex_lock(buf->mutex);
- if (size && data) {
- BM_CKFATAL(buf->pool->map(buf->pool, buf->private,
- DRM_BO_FLAG_WRITE, 0, &buf->mutex,
- &virtual));
- memcpy((unsigned char *) virtual + offset, data, size);
- BM_CKFATAL(buf->pool->unmap(buf->pool, buf->private));
- }
- pipe_mutex_unlock(buf->mutex);
-}
-
-void
-driBOGetSubData(struct _DriBufferObject *buf,
- unsigned long offset, unsigned long size, void *data)
-{
- void *virtual;
-
- assert(!buf->userBuffer); /* XXX just do a memcpy? */
-
- pipe_mutex_lock(buf->mutex);
- if (size && data) {
- BM_CKFATAL(buf->pool->map(buf->pool, buf->private,
- DRM_BO_FLAG_READ, 0, &buf->mutex, &virtual));
- memcpy(data, (unsigned char *) virtual + offset, size);
- BM_CKFATAL(buf->pool->unmap(buf->pool, buf->private));
- }
- pipe_mutex_unlock(buf->mutex);
-}
-
-void
-driBOSetReferenced(struct _DriBufferObject *buf,
- unsigned long handle)
-{
- pipe_mutex_lock(buf->mutex);
- if (buf->private != NULL) {
- assert((size_t)"Invalid buffer for setReferenced\n" & 0);
- BM_CKFATAL(-EINVAL);
-
- }
- if (buf->pool->reference == NULL) {
- assert((size_t)"Invalid buffer pool for setReferenced\n" & 0);
- BM_CKFATAL(-EINVAL);
- }
- buf->private = buf->pool->reference(buf->pool, handle);
- if (!buf->private) {
- assert((size_t)"Invalid buffer pool for setStatic\n" & 0);
- BM_CKFATAL(-ENOMEM);
- }
- buf->createdByReference = TRUE;
- buf->flags = buf->pool->kernel(buf->pool, buf->private)->flags;
- pipe_mutex_unlock(buf->mutex);
-}
-
-int
-driGenBuffers(struct _DriBufferPool *pool,
- const char *name,
- unsigned n,
- struct _DriBufferObject *buffers[],
- unsigned alignment, uint64_t flags, unsigned hint)
-{
- struct _DriBufferObject *buf;
- int i;
-
- flags = (flags) ? flags : DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_MEM_VRAM |
- DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE;
-
- ++num_buffers;
-
- assert(pool);
-
- for (i = 0; i < n; ++i) {
- buf = (struct _DriBufferObject *) calloc(1, sizeof(*buf));
- if (!buf)
- return -ENOMEM;
-
- pipe_mutex_init(buf->mutex);
- pipe_mutex_lock(buf->mutex);
- buf->refCount = 1;
- buf->flags = flags;
- buf->hint = hint;
- buf->name = name;
- buf->alignment = alignment;
- buf->pool = pool;
- buf->createdByReference = 0;
- pipe_mutex_unlock(buf->mutex);
- buffers[i] = buf;
- }
- return 0;
-}
-
-void
-driGenUserBuffer(struct _DriBufferPool *pool,
- const char *name,
- struct _DriBufferObject **buffers,
- void *ptr, unsigned bytes)
-{
- const unsigned alignment = 1, flags = 0, hint = 0;
-
- --num_buffers; /* JB: is inced in GenBuffes */
- driGenBuffers(pool, name, 1, buffers, alignment, flags, hint);
- ++num_user_buffers;
-
- (*buffers)->userBuffer = 1;
- (*buffers)->userData = ptr;
- (*buffers)->userSize = bytes;
-}
-
-void
-driDeleteBuffers(unsigned n, struct _DriBufferObject *buffers[])
-{
- int i;
-
- for (i = 0; i < n; ++i) {
- driBOUnReference(buffers[i]);
- }
-}
-
-
-void
-driInitBufMgr(int fd)
-{
- ;
-}
-
-/*
- * Note that lists are per-context and don't need mutex protection.
- */
-
-struct _DriBufferList *
-driBOCreateList(int target)
-{
- struct _DriBufferList *list = calloc(sizeof(*list), 1);
-
- BM_CKFATAL(drmBOCreateList(target, &list->drmBuffers));
- BM_CKFATAL(drmBOCreateList(target, &list->driBuffers));
- return list;
-}
-
-int
-driBOResetList(struct _DriBufferList * list)
-{
- int ret;
- ret = drmBOResetList(&list->drmBuffers);
- if (ret)
- return ret;
- ret = drmBOResetList(&list->driBuffers);
- return ret;
-}
-
-void
-driBOFreeList(struct _DriBufferList * list)
-{
- drmBOFreeList(&list->drmBuffers);
- drmBOFreeList(&list->driBuffers);
- free(list);
-}
-
-
-/*
- * Copied from libdrm, because it is needed by driAddValidateItem.
- */
-
-static drmBONode *
-driAddListItem(drmBOList * list, drmBO * item,
- uint64_t arg0, uint64_t arg1)
-{
- drmBONode *node;
- drmMMListHead *l;
-
- l = list->free.next;
- if (l == &list->free) {
- node = (drmBONode *) malloc(sizeof(*node));
- if (!node) {
- return NULL;
- }
- list->numCurrent++;
- } else {
- DRMLISTDEL(l);
- node = DRMLISTENTRY(drmBONode, l, head);
- }
- memset(&node->bo_arg, 0, sizeof(node->bo_arg));
- node->buf = item;
- node->arg0 = arg0;
- node->arg1 = arg1;
- DRMLISTADDTAIL(&node->head, &list->list);
- list->numOnList++;
- return node;
-}
-
-/*
- * Slightly modified version compared to the libdrm version.
- * This one returns the list index of the buffer put on the list.
- */
-
-static int
-driAddValidateItem(drmBOList * list, drmBO * buf, uint64_t flags,
- uint64_t mask, int *itemLoc,
- struct _drmBONode **pnode)
-{
- drmBONode *node, *cur;
- drmMMListHead *l;
- int count = 0;
-
- cur = NULL;
-
- for (l = list->list.next; l != &list->list; l = l->next) {
- node = DRMLISTENTRY(drmBONode, l, head);
- if (node->buf == buf) {
- cur = node;
- break;
- }
- count++;
- }
- if (!cur) {
- cur = driAddListItem(list, buf, flags, mask);
- if (!cur)
- return -ENOMEM;
-
- cur->arg0 = flags;
- cur->arg1 = mask;
- } else {
- uint64_t memFlags = cur->arg0 & flags & DRM_BO_MASK_MEM;
- uint64_t accFlags = (cur->arg0 | flags) & ~DRM_BO_MASK_MEM;
-
- if (mask & cur->arg1 & ~DRM_BO_MASK_MEM & (cur->arg0 ^ flags)) {
- return -EINVAL;
- }
-
- cur->arg1 |= mask;
- cur->arg0 = (cur->arg0 & ~mask) | ((memFlags | accFlags) & mask);
-
- if (((cur->arg1 & DRM_BO_MASK_MEM) != 0) &&
- (cur->arg0 & DRM_BO_MASK_MEM) == 0) {
- return -EINVAL;
- }
- }
- *itemLoc = count;
- *pnode = cur;
- return 0;
-}
-
-
-void
-driBOAddListItem(struct _DriBufferList * list, struct _DriBufferObject *buf,
- uint64_t flags, uint64_t mask, int *itemLoc,
- struct _drmBONode **node)
-{
- int newItem;
-
- pipe_mutex_lock(buf->mutex);
- BM_CKFATAL(driAddValidateItem(&list->drmBuffers,
- buf->pool->kernel(buf->pool, buf->private),
- flags, mask, itemLoc, node));
- BM_CKFATAL(drmAddValidateItem(&list->driBuffers, (drmBO *) buf,
- flags, mask, &newItem));
- if (newItem)
- buf->refCount++;
-
- pipe_mutex_unlock(buf->mutex);
-}
-
-drmBOList *driGetdrmBOList(struct _DriBufferList *list)
-{
- driWriteLockKernelBO();
- return &list->drmBuffers;
-}
-
-void driPutdrmBOList(struct _DriBufferList *list)
-{
- driWriteUnlockKernelBO();
-}
-
-
-void
-driBOFence(struct _DriBufferObject *buf, struct _DriFenceObject *fence)
-{
- pipe_mutex_lock(buf->mutex);
- if (buf->pool->fence)
- BM_CKFATAL(buf->pool->fence(buf->pool, buf->private, fence));
- pipe_mutex_unlock(buf->mutex);
-
-}
-
-void
-driBOUnrefUserList(struct _DriBufferList *list)
-{
- struct _DriBufferObject *buf;
- void *curBuf;
-
- curBuf = drmBOListIterator(&list->driBuffers);
- while (curBuf) {
- buf = (struct _DriBufferObject *)drmBOListBuf(curBuf);
- driBOUnReference(buf);
- curBuf = drmBOListNext(&list->driBuffers, curBuf);
- }
-}
-
-struct _DriFenceObject *
-driBOFenceUserList(struct _DriFenceMgr *mgr,
- struct _DriBufferList *list, const char *name,
- drmFence *kFence)
-{
- struct _DriFenceObject *fence;
- struct _DriBufferObject *buf;
- void *curBuf;
-
- fence = driFenceCreate(mgr, kFence->fence_class, kFence->type,
- kFence, sizeof(*kFence));
- curBuf = drmBOListIterator(&list->driBuffers);
-
- /*
- * User-space fencing callbacks.
- */
-
- while (curBuf) {
- buf = (struct _DriBufferObject *) drmBOListBuf(curBuf);
- driBOFence(buf, fence);
- driBOUnReference(buf);
- curBuf = drmBOListNext(&list->driBuffers, curBuf);
- }
-
- driBOResetList(list);
- return fence;
-}
-
-void
-driBOValidateUserList(struct _DriBufferList * list)
-{
- void *curBuf;
- struct _DriBufferObject *buf;
-
- curBuf = drmBOListIterator(&list->driBuffers);
-
- /*
- * User-space validation callbacks.
- */
-
- while (curBuf) {
- buf = (struct _DriBufferObject *) drmBOListBuf(curBuf);
- pipe_mutex_lock(buf->mutex);
- if (buf->pool->validate)
- BM_CKFATAL(buf->pool->validate(buf->pool, buf->private, &buf->mutex));
- pipe_mutex_unlock(buf->mutex);
- curBuf = drmBOListNext(&list->driBuffers, curBuf);
- }
-}
-
-
-void
-driPoolTakeDown(struct _DriBufferPool *pool)
-{
- pool->takeDown(pool);
-
-}
-
-unsigned long
-driBOSize(struct _DriBufferObject *buf)
-{
- unsigned long size;
-
- pipe_mutex_lock(buf->mutex);
- size = buf->pool->size(buf->pool, buf->private);
- pipe_mutex_unlock(buf->mutex);
-
- return size;
-
-}
-
-drmBOList *driBOGetDRMBuffers(struct _DriBufferList *list)
-{
- return &list->drmBuffers;
-}
-
-drmBOList *driBOGetDRIBuffers(struct _DriBufferList *list)
-{
- return &list->driBuffers;
-}
-
diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.h b/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.h
deleted file mode 100644
index e6c0cff0a0..0000000000
--- a/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- *
- **************************************************************************/
-/*
- * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
- * Keith Whitwell <keithw-at-tungstengraphics-dot-com>
- */
-
-#ifndef _PSB_BUFMGR_H_
-#define _PSB_BUFMGR_H_
-#include <xf86mm.h>
-#include "i915_drm.h"
-#include "ws_dri_fencemgr.h"
-
-typedef struct _drmBONode
-{
- drmMMListHead head;
- drmBO *buf;
- struct drm_i915_op_arg bo_arg;
- uint64_t arg0;
- uint64_t arg1;
-} drmBONode;
-
-typedef struct _drmBOList {
- unsigned numTarget;
- unsigned numCurrent;
- unsigned numOnList;
- drmMMListHead list;
- drmMMListHead free;
-} drmBOList;
-
-
-struct _DriFenceObject;
-struct _DriBufferObject;
-struct _DriBufferPool;
-struct _DriBufferList;
-
-/*
- * Return a pointer to the libdrm buffer object this DriBufferObject
- * uses.
- */
-
-extern drmBO *driBOKernel(struct _DriBufferObject *buf);
-extern void *driBOMap(struct _DriBufferObject *buf, unsigned flags,
- unsigned hint);
-extern void driBOUnmap(struct _DriBufferObject *buf);
-extern unsigned long driBOOffset(struct _DriBufferObject *buf);
-extern unsigned long driBOPoolOffset(struct _DriBufferObject *buf);
-
-extern uint64_t driBOFlags(struct _DriBufferObject *buf);
-extern struct _DriBufferObject *driBOReference(struct _DriBufferObject *buf);
-extern void driBOUnReference(struct _DriBufferObject *buf);
-
-extern int driBOData(struct _DriBufferObject *r_buf,
- unsigned size, const void *data,
- struct _DriBufferPool *pool, uint64_t flags);
-
-extern void driBOSubData(struct _DriBufferObject *buf,
- unsigned long offset, unsigned long size,
- const void *data);
-extern void driBOGetSubData(struct _DriBufferObject *buf,
- unsigned long offset, unsigned long size,
- void *data);
-extern int driGenBuffers(struct _DriBufferPool *pool,
- const char *name,
- unsigned n,
- struct _DriBufferObject *buffers[],
- unsigned alignment, uint64_t flags, unsigned hint);
-extern void driGenUserBuffer(struct _DriBufferPool *pool,
- const char *name,
- struct _DriBufferObject *buffers[],
- void *ptr, unsigned bytes);
-extern void driDeleteBuffers(unsigned n, struct _DriBufferObject *buffers[]);
-extern void driInitBufMgr(int fd);
-extern struct _DriBufferList *driBOCreateList(int target);
-extern int driBOResetList(struct _DriBufferList * list);
-extern void driBOAddListItem(struct _DriBufferList * list,
- struct _DriBufferObject *buf,
- uint64_t flags, uint64_t mask, int *itemLoc,
- struct _drmBONode **node);
-
-extern void driBOValidateList(int fd, struct _DriBufferList * list);
-extern void driBOFreeList(struct _DriBufferList * list);
-extern struct _DriFenceObject *driBOFenceUserList(struct _DriFenceMgr *mgr,
- struct _DriBufferList *list,
- const char *name,
- drmFence *kFence);
-extern void driBOUnrefUserList(struct _DriBufferList *list);
-extern void driBOValidateUserList(struct _DriBufferList * list);
-extern drmBOList *driGetdrmBOList(struct _DriBufferList *list);
-extern void driPutdrmBOList(struct _DriBufferList *list);
-
-extern void driBOFence(struct _DriBufferObject *buf,
- struct _DriFenceObject *fence);
-
-extern void driPoolTakeDown(struct _DriBufferPool *pool);
-extern void driBOSetReferenced(struct _DriBufferObject *buf,
- unsigned long handle);
-unsigned long driBOSize(struct _DriBufferObject *buf);
-extern void driBOWaitIdle(struct _DriBufferObject *buf, int lazy);
-extern void driPoolTakeDown(struct _DriBufferPool *pool);
-
-extern void driReadLockKernelBO(void);
-extern void driReadUnlockKernelBO(void);
-extern void driWriteLockKernelBO(void);
-extern void driWriteUnlockKernelBO(void);
-
-/*
- * For debugging purposes.
- */
-
-extern drmBOList *driBOGetDRMBuffers(struct _DriBufferList *list);
-extern drmBOList *driBOGetDRIBuffers(struct _DriBufferList *list);
-#endif
diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_bufpool.h b/src/gallium/winsys/drm/intel/common/ws_dri_bufpool.h
deleted file mode 100644
index ad3b6f3931..0000000000
--- a/src/gallium/winsys/drm/intel/common/ws_dri_bufpool.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- *
- **************************************************************************/
-/*
- * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
- */
-
-#ifndef _PSB_BUFPOOL_H_
-#define _PSB_BUFPOOL_H_
-
-#include <xf86drm.h>
-#include "pipe/p_thread.h"
-struct _DriFenceObject;
-
-typedef struct _DriBufferPool
-{
- int fd;
- int (*map) (struct _DriBufferPool * pool, void *private,
- unsigned flags, int hint, pipe_mutex *mutex,
- void **virtual);
- int (*unmap) (struct _DriBufferPool * pool, void *private);
- int (*destroy) (struct _DriBufferPool * pool, void *private);
- unsigned long (*offset) (struct _DriBufferPool * pool, void *private);
- unsigned long (*poolOffset) (struct _DriBufferPool * pool, void *private);
- uint64_t (*flags) (struct _DriBufferPool * pool, void *private);
- unsigned long (*size) (struct _DriBufferPool * pool, void *private);
- void *(*create) (struct _DriBufferPool * pool, unsigned long size,
- uint64_t flags, unsigned hint, unsigned alignment);
- void *(*reference) (struct _DriBufferPool * pool, unsigned handle);
- int (*unreference) (struct _DriBufferPool * pool, void *private);
- int (*fence) (struct _DriBufferPool * pool, void *private,
- struct _DriFenceObject * fence);
- drmBO *(*kernel) (struct _DriBufferPool * pool, void *private);
- int (*validate) (struct _DriBufferPool * pool, void *private, pipe_mutex *mutex);
- int (*waitIdle) (struct _DriBufferPool *pool, void *private, pipe_mutex *mutex,
- int lazy);
- int (*setStatus) (struct _DriBufferPool *pool, void *private,
- uint64_t flag_diff, uint64_t old_flags);
- void (*takeDown) (struct _DriBufferPool * pool);
- void *data;
-} DriBufferPool;
-
-extern void bmError(int val, const char *file, const char *function,
- int line);
-#define BM_CKFATAL(val) \
- do{ \
- int tstVal = (val); \
- if (tstVal) \
- bmError(tstVal, __FILE__, __FUNCTION__, __LINE__); \
- } while(0);
-
-
-/*
- * Builtin pools.
- */
-
-/*
- * Kernel buffer objects. Size in multiples of page size. Page size aligned.
- */
-
-extern struct _DriBufferPool *driDRMPoolInit(int fd);
-extern struct _DriBufferPool *driMallocPoolInit(void);
-
-struct _DriFreeSlabManager;
-extern struct _DriBufferPool * driSlabPoolInit(int fd, uint64_t flags,
- uint64_t validMask,
- uint32_t smallestSize,
- uint32_t numSizes,
- uint32_t desiredNumBuffers,
- uint32_t maxSlabSize,
- uint32_t pageAlignment,
- struct _DriFreeSlabManager *fMan);
-extern void driFinishFreeSlabManager(struct _DriFreeSlabManager *fMan);
-extern struct _DriFreeSlabManager *
-driInitFreeSlabManager(uint32_t checkIntervalMsec, uint32_t slabTimeoutMsec);
-
-
-#endif
diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_drmpool.c b/src/gallium/winsys/drm/intel/common/ws_dri_drmpool.c
deleted file mode 100644
index 54618b1c82..0000000000
--- a/src/gallium/winsys/drm/intel/common/ws_dri_drmpool.c
+++ /dev/null
@@ -1,268 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- *
- **************************************************************************/
-/*
- * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
- */
-
-#include <xf86drm.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include "ws_dri_bufpool.h"
-#include "ws_dri_bufmgr.h"
-#include "assert.h"
-
-/*
- * Buffer pool implementation using DRM buffer objects as DRI buffer objects.
- */
-
-static void *
-pool_create(struct _DriBufferPool *pool,
- unsigned long size, uint64_t flags, unsigned hint,
- unsigned alignment)
-{
- drmBO *buf = (drmBO *) malloc(sizeof(*buf));
- int ret;
- unsigned pageSize = getpagesize();
-
- if (!buf)
- return NULL;
-
- if ((alignment > pageSize) && (alignment % pageSize)) {
- free(buf);
- return NULL;
- }
-
- ret = drmBOCreate(pool->fd, size, alignment / pageSize,
- NULL,
- flags, hint, buf);
- if (ret) {
- free(buf);
- return NULL;
- }
-
- return (void *) buf;
-}
-
-static void *
-pool_reference(struct _DriBufferPool *pool, unsigned handle)
-{
- drmBO *buf = (drmBO *) malloc(sizeof(*buf));
- int ret;
-
- if (!buf)
- return NULL;
-
- ret = drmBOReference(pool->fd, handle, buf);
-
- if (ret) {
- free(buf);
- return NULL;
- }
-
- return (void *) buf;
-}
-
-static int
-pool_destroy(struct _DriBufferPool *pool, void *private)
-{
- int ret;
- drmBO *buf = (drmBO *) private;
- driReadLockKernelBO();
- ret = drmBOUnreference(pool->fd, buf);
- free(buf);
- driReadUnlockKernelBO();
- return ret;
-}
-
-static int
-pool_unreference(struct _DriBufferPool *pool, void *private)
-{
- int ret;
- drmBO *buf = (drmBO *) private;
- driReadLockKernelBO();
- ret = drmBOUnreference(pool->fd, buf);
- free(buf);
- driReadUnlockKernelBO();
- return ret;
-}
-
-static int
-pool_map(struct _DriBufferPool *pool, void *private, unsigned flags,
- int hint, pipe_mutex *mutex, void **virtual)
-{
- drmBO *buf = (drmBO *) private;
- int ret;
-
- driReadLockKernelBO();
- ret = drmBOMap(pool->fd, buf, flags, hint, virtual);
- driReadUnlockKernelBO();
- return ret;
-}
-
-static int
-pool_unmap(struct _DriBufferPool *pool, void *private)
-{
- drmBO *buf = (drmBO *) private;
- int ret;
-
- driReadLockKernelBO();
- ret = drmBOUnmap(pool->fd, buf);
- driReadUnlockKernelBO();
-
- return ret;
-}
-
-static unsigned long
-pool_offset(struct _DriBufferPool *pool, void *private)
-{
- drmBO *buf = (drmBO *) private;
- unsigned long offset;
-
- driReadLockKernelBO();
- assert(buf->flags & DRM_BO_FLAG_NO_MOVE);
- offset = buf->offset;
- driReadUnlockKernelBO();
-
- return buf->offset;
-}
-
-static unsigned long
-pool_poolOffset(struct _DriBufferPool *pool, void *private)
-{
- return 0;
-}
-
-static uint64_t
-pool_flags(struct _DriBufferPool *pool, void *private)
-{
- drmBO *buf = (drmBO *) private;
- uint64_t flags;
-
- driReadLockKernelBO();
- flags = buf->flags;
- driReadUnlockKernelBO();
-
- return flags;
-}
-
-
-static unsigned long
-pool_size(struct _DriBufferPool *pool, void *private)
-{
- drmBO *buf = (drmBO *) private;
- unsigned long size;
-
- driReadLockKernelBO();
- size = buf->size;
- driReadUnlockKernelBO();
-
- return buf->size;
-}
-
-static int
-pool_fence(struct _DriBufferPool *pool, void *private,
- struct _DriFenceObject *fence)
-{
- /*
- * Noop. The kernel handles all fencing.
- */
-
- return 0;
-}
-
-static drmBO *
-pool_kernel(struct _DriBufferPool *pool, void *private)
-{
- return (drmBO *) private;
-}
-
-static int
-pool_waitIdle(struct _DriBufferPool *pool, void *private, pipe_mutex *mutex,
- int lazy)
-{
- drmBO *buf = (drmBO *) private;
- int ret;
-
- driReadLockKernelBO();
- ret = drmBOWaitIdle(pool->fd, buf, (lazy) ? DRM_BO_HINT_WAIT_LAZY:0);
- driReadUnlockKernelBO();
-
- return ret;
-}
-
-
-static void
-pool_takedown(struct _DriBufferPool *pool)
-{
- free(pool);
-}
-
-/*static int
-pool_setStatus(struct _DriBufferPool *pool, void *private,
- uint64_t flag_diff, uint64_t old_flags)
-{
- drmBO *buf = (drmBO *) private;
- uint64_t new_flags = old_flags ^ flag_diff;
- int ret;
-
- driReadLockKernelBO();
- ret = drmBOSetStatus(pool->fd, buf, new_flags, flag_diff,
- 0, 0, 0);
- driReadUnlockKernelBO();
- return ret;
-}*/
-
-struct _DriBufferPool *
-driDRMPoolInit(int fd)
-{
- struct _DriBufferPool *pool;
-
- pool = (struct _DriBufferPool *) malloc(sizeof(*pool));
-
- if (!pool)
- return NULL;
-
- pool->fd = fd;
- pool->map = &pool_map;
- pool->unmap = &pool_unmap;
- pool->destroy = &pool_destroy;
- pool->offset = &pool_offset;
- pool->poolOffset = &pool_poolOffset;
- pool->flags = &pool_flags;
- pool->size = &pool_size;
- pool->create = &pool_create;
- pool->fence = &pool_fence;
- pool->kernel = &pool_kernel;
- pool->validate = NULL;
- pool->waitIdle = &pool_waitIdle;
- pool->takeDown = &pool_takedown;
- pool->reference = &pool_reference;
- pool->unreference = &pool_unreference;
- pool->data = NULL;
- return pool;
-}
diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.c b/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.c
deleted file mode 100644
index 831c75d30c..0000000000
--- a/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.c
+++ /dev/null
@@ -1,377 +0,0 @@
-#include "ws_dri_fencemgr.h"
-#include "pipe/p_thread.h"
-#include <xf86mm.h>
-#include <string.h>
-#include <unistd.h>
-
-/*
- * Note: Locking order is
- * _DriFenceObject::mutex
- * _DriFenceMgr::mutex
- */
-
-struct _DriFenceMgr {
- /*
- * Constant members. Need no mutex protection.
- */
- struct _DriFenceMgrCreateInfo info;
- void *private;
-
- /*
- * These members are protected by this->mutex
- */
- pipe_mutex mutex;
- int refCount;
- drmMMListHead *heads;
- int num_fences;
-};
-
-struct _DriFenceObject {
-
- /*
- * These members are constant and need no mutex protection.
- */
- struct _DriFenceMgr *mgr;
- uint32_t fence_class;
- uint32_t fence_type;
-
- /*
- * These members are protected by mgr->mutex.
- */
- drmMMListHead head;
- int refCount;
-
- /*
- * These members are protected by this->mutex.
- */
- pipe_mutex mutex;
- uint32_t signaled_type;
- void *private;
-};
-
-uint32_t
-driFenceType(struct _DriFenceObject *fence)
-{
- return fence->fence_type;
-}
-
-struct _DriFenceMgr *
-driFenceMgrCreate(const struct _DriFenceMgrCreateInfo *info)
-{
- struct _DriFenceMgr *tmp;
- uint32_t i;
-
- tmp = calloc(1, sizeof(*tmp));
- if (!tmp)
- return NULL;
-
- pipe_mutex_init(tmp->mutex);
- pipe_mutex_lock(tmp->mutex);
- tmp->refCount = 1;
- tmp->info = *info;
- tmp->num_fences = 0;
- tmp->heads = calloc(tmp->info.num_classes, sizeof(*tmp->heads));
- if (!tmp->heads)
- goto out_err;
-
- for (i=0; i<tmp->info.num_classes; ++i) {
- DRMINITLISTHEAD(&tmp->heads[i]);
- }
- pipe_mutex_unlock(tmp->mutex);
- return tmp;
-
- out_err:
- if (tmp)
- free(tmp);
- return NULL;
-}
-
-static void
-driFenceMgrUnrefUnlock(struct _DriFenceMgr **pMgr)
-{
- struct _DriFenceMgr *mgr = *pMgr;
-
- *pMgr = NULL;
- if (--mgr->refCount == 0)
- free(mgr);
- else
- pipe_mutex_unlock(mgr->mutex);
-}
-
-void
-driFenceMgrUnReference(struct _DriFenceMgr **pMgr)
-{
- pipe_mutex_lock((*pMgr)->mutex);
- driFenceMgrUnrefUnlock(pMgr);
-}
-
-static void
-driFenceUnReferenceLocked(struct _DriFenceObject **pFence)
-{
- struct _DriFenceObject *fence = *pFence;
- struct _DriFenceMgr *mgr = fence->mgr;
-
- *pFence = NULL;
- if (--fence->refCount == 0) {
- DRMLISTDELINIT(&fence->head);
- if (fence->private)
- mgr->info.unreference(mgr, &fence->private);
- --mgr->num_fences;
- fence->mgr = NULL;
- --mgr->refCount;
- free(fence);
-
- }
-}
-
-
-static void
-driSignalPreviousFencesLocked(struct _DriFenceMgr *mgr,
- drmMMListHead *list,
- uint32_t fence_class,
- uint32_t fence_type)
-{
- struct _DriFenceObject *entry;
- drmMMListHead *prev;
-
- while(list != &mgr->heads[fence_class]) {
- entry = DRMLISTENTRY(struct _DriFenceObject, list, head);
-
- /*
- * Up refcount so that entry doesn't disappear from under us
- * when we unlock-relock mgr to get the correct locking order.
- */
-
- ++entry->refCount;
- pipe_mutex_unlock(mgr->mutex);
- pipe_mutex_lock(entry->mutex);
- pipe_mutex_lock(mgr->mutex);
-
- prev = list->prev;
-
-
-
- if (list->prev == list) {
-
- /*
- * Somebody else removed the entry from the list.
- */
-
- pipe_mutex_unlock(entry->mutex);
- driFenceUnReferenceLocked(&entry);
- return;
- }
-
- entry->signaled_type |= (fence_type & entry->fence_type);
- if (entry->signaled_type == entry->fence_type) {
- DRMLISTDELINIT(list);
- mgr->info.unreference(mgr, &entry->private);
- }
- pipe_mutex_unlock(entry->mutex);
- driFenceUnReferenceLocked(&entry);
- list = prev;
- }
-}
-
-
-int
-driFenceFinish(struct _DriFenceObject *fence, uint32_t fence_type,
- int lazy_hint)
-{
- struct _DriFenceMgr *mgr = fence->mgr;
- int ret = 0;
-
- pipe_mutex_lock(fence->mutex);
-
- if ((fence->signaled_type & fence_type) == fence_type)
- goto out0;
-
- ret = mgr->info.finish(mgr, fence->private, fence_type, lazy_hint);
- if (ret)
- goto out0;
-
- pipe_mutex_lock(mgr->mutex);
- pipe_mutex_unlock(fence->mutex);
-
- driSignalPreviousFencesLocked(mgr, &fence->head, fence->fence_class,
- fence_type);
- pipe_mutex_unlock(mgr->mutex);
- return 0;
-
- out0:
- pipe_mutex_unlock(fence->mutex);
- return ret;
-}
-
-uint32_t driFenceSignaledTypeCached(struct _DriFenceObject *fence)
-{
- uint32_t ret;
-
- pipe_mutex_lock(fence->mutex);
- ret = fence->signaled_type;
- pipe_mutex_unlock(fence->mutex);
-
- return ret;
-}
-
-int
-driFenceSignaledType(struct _DriFenceObject *fence, uint32_t flush_type,
- uint32_t *signaled)
-{
- int ret = 0;
- struct _DriFenceMgr *mgr;
-
- pipe_mutex_lock(fence->mutex);
- mgr = fence->mgr;
- *signaled = fence->signaled_type;
- if ((fence->signaled_type & flush_type) == flush_type)
- goto out0;
-
- ret = mgr->info.signaled(mgr, fence->private, flush_type, signaled);
- if (ret) {
- *signaled = fence->signaled_type;
- goto out0;
- }
-
- if ((fence->signaled_type | *signaled) == fence->signaled_type)
- goto out0;
-
- pipe_mutex_lock(mgr->mutex);
- pipe_mutex_unlock(fence->mutex);
-
- driSignalPreviousFencesLocked(mgr, &fence->head, fence->fence_class,
- *signaled);
-
- pipe_mutex_unlock(mgr->mutex);
- return 0;
- out0:
- pipe_mutex_unlock(fence->mutex);
- return ret;
-}
-
-struct _DriFenceObject *
-driFenceReference(struct _DriFenceObject *fence)
-{
- pipe_mutex_lock(fence->mgr->mutex);
- ++fence->refCount;
- pipe_mutex_unlock(fence->mgr->mutex);
- return fence;
-}
-
-void
-driFenceUnReference(struct _DriFenceObject **pFence)
-{
- struct _DriFenceMgr *mgr;
-
- if (*pFence == NULL)
- return;
-
- mgr = (*pFence)->mgr;
- pipe_mutex_lock(mgr->mutex);
- ++mgr->refCount;
- driFenceUnReferenceLocked(pFence);
- driFenceMgrUnrefUnlock(&mgr);
-}
-
-struct _DriFenceObject
-*driFenceCreate(struct _DriFenceMgr *mgr, uint32_t fence_class,
- uint32_t fence_type, void *private, size_t private_size)
-{
- struct _DriFenceObject *fence;
- size_t fence_size = sizeof(*fence);
-
- if (private_size)
- fence_size = ((fence_size + 15) & ~15);
-
- fence = calloc(1, fence_size + private_size);
-
- if (!fence) {
- int ret = mgr->info.finish(mgr, private, fence_type, 0);
-
- if (ret)
- usleep(10000000);
-
- return NULL;
- }
-
- pipe_mutex_init(fence->mutex);
- pipe_mutex_lock(fence->mutex);
- pipe_mutex_lock(mgr->mutex);
- fence->refCount = 1;
- DRMLISTADDTAIL(&fence->head, &mgr->heads[fence_class]);
- fence->mgr = mgr;
- ++mgr->refCount;
- ++mgr->num_fences;
- pipe_mutex_unlock(mgr->mutex);
- fence->fence_class = fence_class;
- fence->fence_type = fence_type;
- fence->signaled_type = 0;
- fence->private = private;
- if (private_size) {
- fence->private = (void *)(((uint8_t *) fence) + fence_size);
- memcpy(fence->private, private, private_size);
- }
-
- pipe_mutex_unlock(fence->mutex);
- return fence;
-}
-
-
-static int
-tSignaled(struct _DriFenceMgr *mgr, void *private, uint32_t flush_type,
- uint32_t *signaled_type)
-{
- long fd = (long) mgr->private;
- int dummy;
- drmFence *fence = (drmFence *) private;
- int ret;
-
- *signaled_type = 0;
- ret = drmFenceSignaled((int) fd, fence, flush_type, &dummy);
- if (ret)
- return ret;
-
- *signaled_type = fence->signaled;
-
- return 0;
-}
-
-static int
-tFinish(struct _DriFenceMgr *mgr, void *private, uint32_t fence_type,
- int lazy_hint)
-{
- long fd = (long) mgr->private;
- unsigned flags = lazy_hint ? DRM_FENCE_FLAG_WAIT_LAZY : 0;
-
- return drmFenceWait((int)fd, flags, (drmFence *) private, fence_type);
-}
-
-static int
-tUnref(struct _DriFenceMgr *mgr, void **private)
-{
- long fd = (long) mgr->private;
- drmFence *fence = (drmFence *) *private;
- *private = NULL;
-
- return drmFenceUnreference(fd, fence);
-}
-
-struct _DriFenceMgr *driFenceMgrTTMInit(int fd)
-{
- struct _DriFenceMgrCreateInfo info;
- struct _DriFenceMgr *mgr;
-
- info.flags = DRI_FENCE_CLASS_ORDERED;
- info.num_classes = 4;
- info.signaled = tSignaled;
- info.finish = tFinish;
- info.unreference = tUnref;
-
- mgr = driFenceMgrCreate(&info);
- if (mgr == NULL)
- return NULL;
-
- mgr->private = (void *) (long) fd;
- return mgr;
-}
-
diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.h b/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.h
deleted file mode 100644
index 4ea58dfe18..0000000000
--- a/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.h
+++ /dev/null
@@ -1,115 +0,0 @@
-#ifndef DRI_FENCEMGR_H
-#define DRI_FENCEMGR_H
-
-#include <stdint.h>
-#include <stdlib.h>
-
-struct _DriFenceObject;
-struct _DriFenceMgr;
-
-/*
- * Do a quick check to see if the fence manager has registered the fence
- * object as signaled. Note that this function may return a false negative
- * answer.
- */
-extern uint32_t driFenceSignaledTypeCached(struct _DriFenceObject *fence);
-
-/*
- * Check if the fence object is signaled. This function can be substantially
- * more expensive to call than the above function, but will not return a false
- * negative answer. The argument "flush_type" sets the types that the
- * underlying mechanism must make sure will eventually signal.
- */
-extern int driFenceSignaledType(struct _DriFenceObject *fence,
- uint32_t flush_type, uint32_t *signaled);
-
-/*
- * Convenience functions.
- */
-
-static inline int driFenceSignaled(struct _DriFenceObject *fence,
- uint32_t flush_type)
-{
- uint32_t signaled_types;
- int ret = driFenceSignaledType(fence, flush_type, &signaled_types);
- if (ret)
- return 0;
- return ((signaled_types & flush_type) == flush_type);
-}
-
-static inline int driFenceSignaledCached(struct _DriFenceObject *fence,
- uint32_t flush_type)
-{
- uint32_t signaled_types =
- driFenceSignaledTypeCached(fence);
-
- return ((signaled_types & flush_type) == flush_type);
-}
-
-/*
- * Reference a fence object.
- */
-extern struct _DriFenceObject *driFenceReference(struct _DriFenceObject *fence);
-
-/*
- * Unreference a fence object. The fence object pointer will be reset to NULL.
- */
-
-extern void driFenceUnReference(struct _DriFenceObject **pFence);
-
-
-/*
- * Wait for a fence to signal the indicated fence_type.
- * If "lazy_hint" is true, it indicates that the wait may sleep to avoid
- * busy-wait polling.
- */
-extern int driFenceFinish(struct _DriFenceObject *fence, uint32_t fence_type,
- int lazy_hint);
-
-/*
- * Create a DriFenceObject for manager "mgr".
- *
- * "private" is a pointer that should be used for the callbacks in
- * struct _DriFenceMgrCreateInfo.
- *
- * if private_size is nonzero, then the info stored at *private, with size
- * private size will be copied and the fence manager will instead use a
- * pointer to the copied data for the callbacks in
- * struct _DriFenceMgrCreateInfo. In that case, the object pointed to by
- * "private" may be destroyed after the call to driFenceCreate.
- */
-extern struct _DriFenceObject *driFenceCreate(struct _DriFenceMgr *mgr,
- uint32_t fence_class,
- uint32_t fence_type,
- void *private,
- size_t private_size);
-
-extern uint32_t driFenceType(struct _DriFenceObject *fence);
-
-/*
- * Fence creations are ordered. If a fence signals a fence_type,
- * it is safe to assume that all fences of the same class that was
- * created before that fence has signaled the same type.
- */
-
-#define DRI_FENCE_CLASS_ORDERED (1 << 0)
-
-struct _DriFenceMgrCreateInfo {
- uint32_t flags;
- uint32_t num_classes;
- int (*signaled) (struct _DriFenceMgr *mgr, void *private, uint32_t flush_type,
- uint32_t *signaled_type);
- int (*finish) (struct _DriFenceMgr *mgr, void *private, uint32_t fence_type, int lazy_hint);
- int (*unreference) (struct _DriFenceMgr *mgr, void **private);
-};
-
-extern struct _DriFenceMgr *
-driFenceMgrCreate(const struct _DriFenceMgrCreateInfo *info);
-
-void
-driFenceMgrUnReference(struct _DriFenceMgr **pMgr);
-
-extern struct _DriFenceMgr *
-driFenceMgrTTMInit(int fd);
-
-#endif
diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_mallocpool.c b/src/gallium/winsys/drm/intel/common/ws_dri_mallocpool.c
deleted file mode 100644
index 60924eac9e..0000000000
--- a/src/gallium/winsys/drm/intel/common/ws_dri_mallocpool.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, TX., USA
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- *
- **************************************************************************/
-/*
- * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
- */
-
-#include <xf86drm.h>
-#include <stdlib.h>
-#include <errno.h>
-#include "pipe/p_debug.h"
-#include "pipe/p_thread.h"
-#include "ws_dri_bufpool.h"
-#include "ws_dri_bufmgr.h"
-
-static void *
-pool_create(struct _DriBufferPool *pool,
- unsigned long size, uint64_t flags, unsigned hint,
- unsigned alignment)
-{
- unsigned long *private = malloc(size + 2*sizeof(unsigned long));
- if ((flags & DRM_BO_MASK_MEM) != DRM_BO_FLAG_MEM_LOCAL)
- abort();
-
- *private = size;
- return (void *)private;
-}
-
-
-static int
-pool_destroy(struct _DriBufferPool *pool, void *private)
-{
- free(private);
- return 0;
-}
-
-static int
-pool_waitIdle(struct _DriBufferPool *pool, void *private,
- pipe_mutex *mutex, int lazy)
-{
- return 0;
-}
-
-static int
-pool_map(struct _DriBufferPool *pool, void *private, unsigned flags,
- int hint, pipe_mutex *mutex, void **virtual)
-{
- *virtual = (void *)((unsigned long *)private + 2);
- return 0;
-}
-
-static int
-pool_unmap(struct _DriBufferPool *pool, void *private)
-{
- return 0;
-}
-
-static unsigned long
-pool_offset(struct _DriBufferPool *pool, void *private)
-{
- /*
- * BUG
- */
- abort();
- return 0UL;
-}
-
-static unsigned long
-pool_poolOffset(struct _DriBufferPool *pool, void *private)
-{
- /*
- * BUG
- */
- abort();
-}
-
-static uint64_t
-pool_flags(struct _DriBufferPool *pool, void *private)
-{
- return DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED;
-}
-
-static unsigned long
-pool_size(struct _DriBufferPool *pool, void *private)
-{
- return *(unsigned long *) private;
-}
-
-
-static int
-pool_fence(struct _DriBufferPool *pool, void *private,
- struct _DriFenceObject *fence)
-{
- abort();
- return 0UL;
-}
-
-static drmBO *
-pool_kernel(struct _DriBufferPool *pool, void *private)
-{
- abort();
- return NULL;
-}
-
-static void
-pool_takedown(struct _DriBufferPool *pool)
-{
- free(pool);
-}
-
-
-struct _DriBufferPool *
-driMallocPoolInit(void)
-{
- struct _DriBufferPool *pool;
-
- pool = (struct _DriBufferPool *) malloc(sizeof(*pool));
- if (!pool)
- return NULL;
-
- pool->data = NULL;
- pool->fd = -1;
- pool->map = &pool_map;
- pool->unmap = &pool_unmap;
- pool->destroy = &pool_destroy;
- pool->offset = &pool_offset;
- pool->poolOffset = &pool_poolOffset;
- pool->flags = &pool_flags;
- pool->size = &pool_size;
- pool->create = &pool_create;
- pool->fence = &pool_fence;
- pool->kernel = &pool_kernel;
- pool->validate = NULL;
- pool->waitIdle = &pool_waitIdle;
- pool->takeDown = &pool_takedown;
- return pool;
-}
diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_slabpool.c b/src/gallium/winsys/drm/intel/common/ws_dri_slabpool.c
deleted file mode 100644
index 391cea50a7..0000000000
--- a/src/gallium/winsys/drm/intel/common/ws_dri_slabpool.c
+++ /dev/null
@@ -1,968 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2006-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- *
- **************************************************************************/
-/*
- * Authors: Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
- */
-
-#include <stdint.h>
-#include <sys/time.h>
-#include <errno.h>
-#include <unistd.h>
-#include <assert.h>
-#include "ws_dri_bufpool.h"
-#include "ws_dri_fencemgr.h"
-#include "ws_dri_bufmgr.h"
-#include "pipe/p_thread.h"
-
-#define DRI_SLABPOOL_ALLOC_RETRIES 100
-
-struct _DriSlab;
-
-struct _DriSlabBuffer {
- int isSlabBuffer;
- drmBO *bo;
- struct _DriFenceObject *fence;
- struct _DriSlab *parent;
- drmMMListHead head;
- uint32_t mapCount;
- uint32_t start;
- uint32_t fenceType;
- int unFenced;
- pipe_condvar event;
-};
-
-struct _DriKernelBO {
- int fd;
- drmBO bo;
- drmMMListHead timeoutHead;
- drmMMListHead head;
- struct timeval timeFreed;
- uint32_t pageAlignment;
- void *virtual;
-};
-
-struct _DriSlab{
- drmMMListHead head;
- drmMMListHead freeBuffers;
- uint32_t numBuffers;
- uint32_t numFree;
- struct _DriSlabBuffer *buffers;
- struct _DriSlabSizeHeader *header;
- struct _DriKernelBO *kbo;
-};
-
-
-struct _DriSlabSizeHeader {
- drmMMListHead slabs;
- drmMMListHead freeSlabs;
- drmMMListHead delayedBuffers;
- uint32_t numDelayed;
- struct _DriSlabPool *slabPool;
- uint32_t bufSize;
- pipe_mutex mutex;
-};
-
-struct _DriFreeSlabManager {
- struct timeval slabTimeout;
- struct timeval checkInterval;
- struct timeval nextCheck;
- drmMMListHead timeoutList;
- drmMMListHead unCached;
- drmMMListHead cached;
- pipe_mutex mutex;
-};
-
-
-struct _DriSlabPool {
-
- /*
- * The data of this structure remains constant after
- * initialization and thus needs no mutex protection.
- */
-
- struct _DriFreeSlabManager *fMan;
- uint64_t proposedFlags;
- uint64_t validMask;
- uint32_t *bucketSizes;
- uint32_t numBuckets;
- uint32_t pageSize;
- int fd;
- int pageAlignment;
- int maxSlabSize;
- int desiredNumBuffers;
- struct _DriSlabSizeHeader *headers;
-};
-
-/*
- * FIXME: Perhaps arrange timeout slabs in size buckets for fast
- * retreival??
- */
-
-
-static inline int
-driTimeAfterEq(struct timeval *arg1, struct timeval *arg2)
-{
- return ((arg1->tv_sec > arg2->tv_sec) ||
- ((arg1->tv_sec == arg2->tv_sec) &&
- (arg1->tv_usec > arg2->tv_usec)));
-}
-
-static inline void
-driTimeAdd(struct timeval *arg, struct timeval *add)
-{
- unsigned int sec;
-
- arg->tv_sec += add->tv_sec;
- arg->tv_usec += add->tv_usec;
- sec = arg->tv_usec / 1000000;
- arg->tv_sec += sec;
- arg->tv_usec -= sec*1000000;
-}
-
-static void
-driFreeKernelBO(struct _DriKernelBO *kbo)
-{
- if (!kbo)
- return;
-
- (void) drmBOUnreference(kbo->fd, &kbo->bo);
- free(kbo);
-}
-
-
-static void
-driFreeTimeoutKBOsLocked(struct _DriFreeSlabManager *fMan,
- struct timeval *time)
-{
- drmMMListHead *list, *next;
- struct _DriKernelBO *kbo;
-
- if (!driTimeAfterEq(time, &fMan->nextCheck))
- return;
-
- for (list = fMan->timeoutList.next, next = list->next;
- list != &fMan->timeoutList;
- list = next, next = list->next) {
-
- kbo = DRMLISTENTRY(struct _DriKernelBO, list, timeoutHead);
-
- if (!driTimeAfterEq(time, &kbo->timeFreed))
- break;
-
- DRMLISTDELINIT(&kbo->timeoutHead);
- DRMLISTDELINIT(&kbo->head);
- driFreeKernelBO(kbo);
- }
-
- fMan->nextCheck = *time;
- driTimeAdd(&fMan->nextCheck, &fMan->checkInterval);
-}
-
-
-/*
- * Add a _DriKernelBO to the free slab manager.
- * This means that it is available for reuse, but if it's not
- * reused in a while, it will be freed.
- */
-
-static void
-driSetKernelBOFree(struct _DriFreeSlabManager *fMan,
- struct _DriKernelBO *kbo)
-{
- struct timeval time;
-
- pipe_mutex_lock(fMan->mutex);
- gettimeofday(&time, NULL);
- driTimeAdd(&time, &fMan->slabTimeout);
-
- kbo->timeFreed = time;
-
- if (kbo->bo.flags & DRM_BO_FLAG_CACHED)
- DRMLISTADD(&kbo->head, &fMan->cached);
- else
- DRMLISTADD(&kbo->head, &fMan->unCached);
-
- DRMLISTADDTAIL(&kbo->timeoutHead, &fMan->timeoutList);
- driFreeTimeoutKBOsLocked(fMan, &time);
-
- pipe_mutex_unlock(fMan->mutex);
-}
-
-/*
- * Get a _DriKernelBO for us to use as storage for a slab.
- *
- */
-
-static struct _DriKernelBO *
-driAllocKernelBO(struct _DriSlabSizeHeader *header)
-
-{
- struct _DriSlabPool *slabPool = header->slabPool;
- struct _DriFreeSlabManager *fMan = slabPool->fMan;
- drmMMListHead *list, *next, *head;
- uint32_t size = header->bufSize * slabPool->desiredNumBuffers;
- struct _DriKernelBO *kbo;
- struct _DriKernelBO *kboTmp;
- int ret;
-
- /*
- * FIXME: We should perhaps allow some variation in slabsize in order
- * to efficiently reuse slabs.
- */
-
- size = (size <= slabPool->maxSlabSize) ? size : slabPool->maxSlabSize;
- size = (size + slabPool->pageSize - 1) & ~(slabPool->pageSize - 1);
- pipe_mutex_lock(fMan->mutex);
-
- kbo = NULL;
-
- retry:
- head = (slabPool->proposedFlags & DRM_BO_FLAG_CACHED) ?
- &fMan->cached : &fMan->unCached;
-
- for (list = head->next, next = list->next;
- list != head;
- list = next, next = list->next) {
-
- kboTmp = DRMLISTENTRY(struct _DriKernelBO, list, head);
-
- if ((kboTmp->bo.size == size) &&
- (slabPool->pageAlignment == 0 ||
- (kboTmp->pageAlignment % slabPool->pageAlignment) == 0)) {
-
- if (!kbo)
- kbo = kboTmp;
-
- if ((kbo->bo.proposedFlags ^ slabPool->proposedFlags) == 0)
- break;
-
- }
- }
-
- if (kbo) {
- DRMLISTDELINIT(&kbo->head);
- DRMLISTDELINIT(&kbo->timeoutHead);
- }
-
- pipe_mutex_unlock(fMan->mutex);
-
- if (kbo) {
- uint64_t new_mask = kbo->bo.proposedFlags ^ slabPool->proposedFlags;
-
- ret = 0;
- if (new_mask) {
- ret = drmBOSetStatus(kbo->fd, &kbo->bo, slabPool->proposedFlags,
- new_mask, DRM_BO_HINT_DONT_FENCE, 0, 0);
- }
- if (ret == 0)
- return kbo;
-
- driFreeKernelBO(kbo);
- kbo = NULL;
- goto retry;
- }
-
- kbo = calloc(1, sizeof(struct _DriKernelBO));
- if (!kbo)
- return NULL;
-
- kbo->fd = slabPool->fd;
- DRMINITLISTHEAD(&kbo->head);
- DRMINITLISTHEAD(&kbo->timeoutHead);
- ret = drmBOCreate(kbo->fd, size, slabPool->pageAlignment, NULL,
- slabPool->proposedFlags,
- DRM_BO_HINT_DONT_FENCE, &kbo->bo);
- if (ret)
- goto out_err0;
-
- ret = drmBOMap(kbo->fd, &kbo->bo,
- DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE,
- 0, &kbo->virtual);
-
- if (ret)
- goto out_err1;
-
- ret = drmBOUnmap(kbo->fd, &kbo->bo);
- if (ret)
- goto out_err1;
-
- return kbo;
-
- out_err1:
- drmBOUnreference(kbo->fd, &kbo->bo);
- out_err0:
- free(kbo);
- return NULL;
-}
-
-
-static int
-driAllocSlab(struct _DriSlabSizeHeader *header)
-{
- struct _DriSlab *slab;
- struct _DriSlabBuffer *buf;
- uint32_t numBuffers;
- int ret;
- int i;
-
- slab = calloc(1, sizeof(*slab));
- if (!slab)
- return -ENOMEM;
-
- slab->kbo = driAllocKernelBO(header);
- if (!slab->kbo) {
- ret = -ENOMEM;
- goto out_err0;
- }
-
- numBuffers = slab->kbo->bo.size / header->bufSize;
-
- slab->buffers = calloc(numBuffers, sizeof(*slab->buffers));
- if (!slab->buffers) {
- ret = -ENOMEM;
- goto out_err1;
- }
-
- DRMINITLISTHEAD(&slab->head);
- DRMINITLISTHEAD(&slab->freeBuffers);
- slab->numBuffers = numBuffers;
- slab->numFree = 0;
- slab->header = header;
-
- buf = slab->buffers;
- for (i=0; i < numBuffers; ++i) {
- buf->parent = slab;
- buf->start = i* header->bufSize;
- buf->mapCount = 0;
- buf->isSlabBuffer = 1;
- pipe_condvar_init(buf->event);
- DRMLISTADDTAIL(&buf->head, &slab->freeBuffers);
- slab->numFree++;
- buf++;
- }
-
- DRMLISTADDTAIL(&slab->head, &header->slabs);
-
- return 0;
-
- out_err1:
- driSetKernelBOFree(header->slabPool->fMan, slab->kbo);
- free(slab->buffers);
- out_err0:
- free(slab);
- return ret;
-}
-
-/*
- * Delete a buffer from the slab header delayed list and put
- * it on the slab free list.
- */
-
-static void
-driSlabFreeBufferLocked(struct _DriSlabBuffer *buf)
-{
- struct _DriSlab *slab = buf->parent;
- struct _DriSlabSizeHeader *header = slab->header;
- drmMMListHead *list = &buf->head;
-
- DRMLISTDEL(list);
- DRMLISTADDTAIL(list, &slab->freeBuffers);
- slab->numFree++;
-
- if (slab->head.next == &slab->head)
- DRMLISTADDTAIL(&slab->head, &header->slabs);
-
- if (slab->numFree == slab->numBuffers) {
- list = &slab->head;
- DRMLISTDEL(list);
- DRMLISTADDTAIL(list, &header->freeSlabs);
- }
-
- if (header->slabs.next == &header->slabs ||
- slab->numFree != slab->numBuffers) {
-
- drmMMListHead *next;
- struct _DriFreeSlabManager *fMan = header->slabPool->fMan;
-
- for (list = header->freeSlabs.next, next = list->next;
- list != &header->freeSlabs;
- list = next, next = list->next) {
-
- slab = DRMLISTENTRY(struct _DriSlab, list, head);
-
- DRMLISTDELINIT(list);
- driSetKernelBOFree(fMan, slab->kbo);
- free(slab->buffers);
- free(slab);
- }
- }
-}
-
-static void
-driSlabCheckFreeLocked(struct _DriSlabSizeHeader *header, int wait)
-{
- drmMMListHead *list, *prev, *first;
- struct _DriSlabBuffer *buf;
- struct _DriSlab *slab;
- int firstWasSignaled = 1;
- int signaled;
- int i;
- int ret;
-
- /*
- * Rerun the freeing test if the youngest tested buffer
- * was signaled, since there might be more idle buffers
- * in the delay list.
- */
-
- while (firstWasSignaled) {
- firstWasSignaled = 0;
- signaled = 0;
- first = header->delayedBuffers.next;
-
- /* Only examine the oldest 1/3 of delayed buffers:
- */
- if (header->numDelayed > 3) {
- for (i = 0; i < header->numDelayed; i += 3) {
- first = first->next;
- }
- }
-
- for (list = first, prev = list->prev;
- list != &header->delayedBuffers;
- list = prev, prev = list->prev) {
- buf = DRMLISTENTRY(struct _DriSlabBuffer, list, head);
- slab = buf->parent;
-
- if (!signaled) {
- if (wait) {
- ret = driFenceFinish(buf->fence, buf->fenceType, 0);
- if (ret)
- break;
- signaled = 1;
- wait = 0;
- } else {
- signaled = driFenceSignaled(buf->fence, buf->fenceType);
- }
- if (signaled) {
- if (list == first)
- firstWasSignaled = 1;
- driFenceUnReference(&buf->fence);
- header->numDelayed--;
- driSlabFreeBufferLocked(buf);
- }
- } else if (driFenceSignaledCached(buf->fence, buf->fenceType)) {
- driFenceUnReference(&buf->fence);
- header->numDelayed--;
- driSlabFreeBufferLocked(buf);
- }
- }
- }
-}
-
-
-static struct _DriSlabBuffer *
-driSlabAllocBuffer(struct _DriSlabSizeHeader *header)
-{
- static struct _DriSlabBuffer *buf;
- struct _DriSlab *slab;
- drmMMListHead *list;
- int count = DRI_SLABPOOL_ALLOC_RETRIES;
-
- pipe_mutex_lock(header->mutex);
- while(header->slabs.next == &header->slabs && count > 0) {
- driSlabCheckFreeLocked(header, 0);
- if (header->slabs.next != &header->slabs)
- break;
-
- pipe_mutex_unlock(header->mutex);
- if (count != DRI_SLABPOOL_ALLOC_RETRIES)
- usleep(1);
- pipe_mutex_lock(header->mutex);
- (void) driAllocSlab(header);
- count--;
- }
-
- list = header->slabs.next;
- if (list == &header->slabs) {
- pipe_mutex_unlock(header->mutex);
- return NULL;
- }
- slab = DRMLISTENTRY(struct _DriSlab, list, head);
- if (--slab->numFree == 0)
- DRMLISTDELINIT(list);
-
- list = slab->freeBuffers.next;
- DRMLISTDELINIT(list);
-
- pipe_mutex_unlock(header->mutex);
- buf = DRMLISTENTRY(struct _DriSlabBuffer, list, head);
- return buf;
-}
-
-static void *
-pool_create(struct _DriBufferPool *driPool, unsigned long size,
- uint64_t flags, unsigned hint, unsigned alignment)
-{
- struct _DriSlabPool *pool = (struct _DriSlabPool *) driPool->data;
- struct _DriSlabSizeHeader *header;
- struct _DriSlabBuffer *buf;
- void *dummy;
- int i;
- int ret;
-
- /*
- * FIXME: Check for compatibility.
- */
-
- header = pool->headers;
- for (i=0; i<pool->numBuckets; ++i) {
- if (header->bufSize >= size)
- break;
- header++;
- }
-
- if (i < pool->numBuckets)
- return driSlabAllocBuffer(header);
-
-
- /*
- * Fall back to allocate a buffer object directly from DRM.
- * and wrap it in a driBO structure.
- */
-
-
- buf = calloc(1, sizeof(*buf));
-
- if (!buf)
- return NULL;
-
- buf->bo = calloc(1, sizeof(*buf->bo));
- if (!buf->bo)
- goto out_err0;
-
- if (alignment) {
- if ((alignment < pool->pageSize) && (pool->pageSize % alignment))
- goto out_err1;
- if ((alignment > pool->pageSize) && (alignment % pool->pageSize))
- goto out_err1;
- }
-
- ret = drmBOCreate(pool->fd, size, alignment / pool->pageSize, NULL,
- flags, hint, buf->bo);
- if (ret)
- goto out_err1;
-
- ret = drmBOMap(pool->fd, buf->bo, DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE,
- 0, &dummy);
- if (ret)
- goto out_err2;
-
- ret = drmBOUnmap(pool->fd, buf->bo);
- if (ret)
- goto out_err2;
-
- return buf;
- out_err2:
- drmBOUnreference(pool->fd, buf->bo);
- out_err1:
- free(buf->bo);
- out_err0:
- free(buf);
- return NULL;
-}
-
-static int
-pool_destroy(struct _DriBufferPool *driPool, void *private)
-{
- struct _DriSlabBuffer *buf =
- (struct _DriSlabBuffer *) private;
- struct _DriSlab *slab;
- struct _DriSlabSizeHeader *header;
-
- if (!buf->isSlabBuffer) {
- struct _DriSlabPool *pool = (struct _DriSlabPool *) driPool->data;
- int ret;
-
- ret = drmBOUnreference(pool->fd, buf->bo);
- free(buf->bo);
- free(buf);
- return ret;
- }
-
- slab = buf->parent;
- header = slab->header;
-
- pipe_mutex_lock(header->mutex);
- buf->unFenced = 0;
- buf->mapCount = 0;
-
- if (buf->fence && !driFenceSignaledCached(buf->fence, buf->fenceType)) {
- DRMLISTADDTAIL(&buf->head, &header->delayedBuffers);
- header->numDelayed++;
- } else {
- if (buf->fence)
- driFenceUnReference(&buf->fence);
- driSlabFreeBufferLocked(buf);
- }
-
- pipe_mutex_unlock(header->mutex);
- return 0;
-}
-
-static int
-pool_waitIdle(struct _DriBufferPool *driPool, void *private,
- pipe_mutex *mutex, int lazy)
-{
- struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private;
-
- while(buf->unFenced)
- pipe_condvar_wait(buf->event, *mutex);
-
- if (!buf->fence)
- return 0;
-
- driFenceFinish(buf->fence, buf->fenceType, lazy);
- driFenceUnReference(&buf->fence);
-
- return 0;
-}
-
-static int
-pool_map(struct _DriBufferPool *pool, void *private, unsigned flags,
- int hint, pipe_mutex *mutex, void **virtual)
-{
- struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private;
- int busy;
-
- if (buf->isSlabBuffer)
- busy = buf->unFenced || (buf->fence && !driFenceSignaledCached(buf->fence, buf->fenceType));
- else
- busy = buf->fence && !driFenceSignaled(buf->fence, buf->fenceType);
-
-
- if (busy) {
- if (hint & DRM_BO_HINT_DONT_BLOCK)
- return -EBUSY;
- else {
- (void) pool_waitIdle(pool, private, mutex, 0);
- }
- }
-
- ++buf->mapCount;
- *virtual = (buf->isSlabBuffer) ?
- (void *) ((uint8_t *) buf->parent->kbo->virtual + buf->start) :
- (void *) buf->bo->virtual;
-
- return 0;
-}
-
-static int
-pool_unmap(struct _DriBufferPool *pool, void *private)
-{
- struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private;
-
- --buf->mapCount;
- if (buf->mapCount == 0 && buf->isSlabBuffer)
- pipe_condvar_broadcast(buf->event);
-
- return 0;
-}
-
-static unsigned long
-pool_offset(struct _DriBufferPool *pool, void *private)
-{
- struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private;
- struct _DriSlab *slab;
- struct _DriSlabSizeHeader *header;
-
- if (!buf->isSlabBuffer) {
- assert(buf->bo->proposedFlags & DRM_BO_FLAG_NO_MOVE);
- return buf->bo->offset;
- }
-
- slab = buf->parent;
- header = slab->header;
-
- (void) header;
- assert(header->slabPool->proposedFlags & DRM_BO_FLAG_NO_MOVE);
- return slab->kbo->bo.offset + buf->start;
-}
-
-static unsigned long
-pool_poolOffset(struct _DriBufferPool *pool, void *private)
-{
- struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private;
-
- return buf->start;
-}
-
-static uint64_t
-pool_flags(struct _DriBufferPool *pool, void *private)
-{
- struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private;
-
- if (!buf->isSlabBuffer)
- return buf->bo->flags;
-
- return buf->parent->kbo->bo.flags;
-}
-
-static unsigned long
-pool_size(struct _DriBufferPool *pool, void *private)
-{
- struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private;
- if (!buf->isSlabBuffer)
- return buf->bo->size;
-
- return buf->parent->header->bufSize;
-}
-
-static int
-pool_fence(struct _DriBufferPool *pool, void *private,
- struct _DriFenceObject *fence)
-{
- struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private;
- drmBO *bo;
-
- if (buf->fence)
- driFenceUnReference(&buf->fence);
-
- buf->fence = driFenceReference(fence);
- bo = (buf->isSlabBuffer) ?
- &buf->parent->kbo->bo:
- buf->bo;
- buf->fenceType = bo->fenceFlags;
-
- buf->unFenced = 0;
- pipe_condvar_broadcast(buf->event);
-
- return 0;
-}
-
-static drmBO *
-pool_kernel(struct _DriBufferPool *pool, void *private)
-{
- struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private;
-
- return (buf->isSlabBuffer) ? &buf->parent->kbo->bo : buf->bo;
-}
-
-static int
-pool_validate(struct _DriBufferPool *pool, void *private,
- pipe_mutex *mutex)
-{
- struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private;
-
- if (!buf->isSlabBuffer)
- return 0;
-
- while(buf->mapCount != 0)
- pipe_condvar_wait(buf->event, *mutex);
-
- buf->unFenced = 1;
- return 0;
-}
-
-
-struct _DriFreeSlabManager *
-driInitFreeSlabManager(uint32_t checkIntervalMsec, uint32_t slabTimeoutMsec)
-{
- struct _DriFreeSlabManager *tmp;
-
- tmp = calloc(1, sizeof(*tmp));
- if (!tmp)
- return NULL;
-
- pipe_mutex_init(tmp->mutex);
- pipe_mutex_lock(tmp->mutex);
- tmp->slabTimeout.tv_usec = slabTimeoutMsec*1000;
- tmp->slabTimeout.tv_sec = tmp->slabTimeout.tv_usec / 1000000;
- tmp->slabTimeout.tv_usec -= tmp->slabTimeout.tv_sec*1000000;
-
- tmp->checkInterval.tv_usec = checkIntervalMsec*1000;
- tmp->checkInterval.tv_sec = tmp->checkInterval.tv_usec / 1000000;
- tmp->checkInterval.tv_usec -= tmp->checkInterval.tv_sec*1000000;
-
- gettimeofday(&tmp->nextCheck, NULL);
- driTimeAdd(&tmp->nextCheck, &tmp->checkInterval);
- DRMINITLISTHEAD(&tmp->timeoutList);
- DRMINITLISTHEAD(&tmp->unCached);
- DRMINITLISTHEAD(&tmp->cached);
- pipe_mutex_unlock(tmp->mutex);
-
- return tmp;
-}
-
-void
-driFinishFreeSlabManager(struct _DriFreeSlabManager *fMan)
-{
- struct timeval time;
-
- time = fMan->nextCheck;
- driTimeAdd(&time, &fMan->checkInterval);
-
- pipe_mutex_lock(fMan->mutex);
- driFreeTimeoutKBOsLocked(fMan, &time);
- pipe_mutex_unlock(fMan->mutex);
-
- assert(fMan->timeoutList.next == &fMan->timeoutList);
- assert(fMan->unCached.next == &fMan->unCached);
- assert(fMan->cached.next == &fMan->cached);
-
- free(fMan);
-}
-
-static void
-driInitSizeHeader(struct _DriSlabPool *pool, uint32_t size,
- struct _DriSlabSizeHeader *header)
-{
- pipe_mutex_init(header->mutex);
- pipe_mutex_lock(header->mutex);
-
- DRMINITLISTHEAD(&header->slabs);
- DRMINITLISTHEAD(&header->freeSlabs);
- DRMINITLISTHEAD(&header->delayedBuffers);
-
- header->numDelayed = 0;
- header->slabPool = pool;
- header->bufSize = size;
-
- pipe_mutex_unlock(header->mutex);
-}
-
-static void
-driFinishSizeHeader(struct _DriSlabSizeHeader *header)
-{
- drmMMListHead *list, *next;
- struct _DriSlabBuffer *buf;
-
- pipe_mutex_lock(header->mutex);
- for (list = header->delayedBuffers.next, next = list->next;
- list != &header->delayedBuffers;
- list = next, next = list->next) {
-
- buf = DRMLISTENTRY(struct _DriSlabBuffer, list , head);
- if (buf->fence) {
- (void) driFenceFinish(buf->fence, buf->fenceType, 0);
- driFenceUnReference(&buf->fence);
- }
- header->numDelayed--;
- driSlabFreeBufferLocked(buf);
- }
- pipe_mutex_unlock(header->mutex);
-}
-
-static void
-pool_takedown(struct _DriBufferPool *driPool)
-{
- struct _DriSlabPool *pool = driPool->data;
- int i;
-
- for (i=0; i<pool->numBuckets; ++i) {
- driFinishSizeHeader(&pool->headers[i]);
- }
-
- free(pool->headers);
- free(pool->bucketSizes);
- free(pool);
- free(driPool);
-}
-
-struct _DriBufferPool *
-driSlabPoolInit(int fd, uint64_t flags,
- uint64_t validMask,
- uint32_t smallestSize,
- uint32_t numSizes,
- uint32_t desiredNumBuffers,
- uint32_t maxSlabSize,
- uint32_t pageAlignment,
- struct _DriFreeSlabManager *fMan)
-{
- struct _DriBufferPool *driPool;
- struct _DriSlabPool *pool;
- uint32_t i;
-
- driPool = calloc(1, sizeof(*driPool));
- if (!driPool)
- return NULL;
-
- pool = calloc(1, sizeof(*pool));
- if (!pool)
- goto out_err0;
-
- pool->bucketSizes = calloc(numSizes, sizeof(*pool->bucketSizes));
- if (!pool->bucketSizes)
- goto out_err1;
-
- pool->headers = calloc(numSizes, sizeof(*pool->headers));
- if (!pool->headers)
- goto out_err2;
-
- pool->fMan = fMan;
- pool->proposedFlags = flags;
- pool->validMask = validMask;
- pool->numBuckets = numSizes;
- pool->pageSize = getpagesize();
- pool->fd = fd;
- pool->pageAlignment = pageAlignment;
- pool->maxSlabSize = maxSlabSize;
- pool->desiredNumBuffers = desiredNumBuffers;
-
- for (i=0; i<pool->numBuckets; ++i) {
- pool->bucketSizes[i] = (smallestSize << i);
- driInitSizeHeader(pool, pool->bucketSizes[i],
- &pool->headers[i]);
- }
-
- driPool->data = (void *) pool;
- driPool->map = &pool_map;
- driPool->unmap = &pool_unmap;
- driPool->destroy = &pool_destroy;
- driPool->offset = &pool_offset;
- driPool->poolOffset = &pool_poolOffset;
- driPool->flags = &pool_flags;
- driPool->size = &pool_size;
- driPool->create = &pool_create;
- driPool->fence = &pool_fence;
- driPool->kernel = &pool_kernel;
- driPool->validate = &pool_validate;
- driPool->waitIdle = &pool_waitIdle;
- driPool->takeDown = &pool_takedown;
-
- return driPool;
-
- out_err2:
- free(pool->bucketSizes);
- out_err1:
- free(pool);
- out_err0:
- free(driPool);
-
- return NULL;
-}
diff --git a/src/gallium/winsys/drm/intel/dri/Makefile b/src/gallium/winsys/drm/intel/dri/Makefile
deleted file mode 100644
index 2046441a22..0000000000
--- a/src/gallium/winsys/drm/intel/dri/Makefile
+++ /dev/null
@@ -1,33 +0,0 @@
-TOP = ../../../../../..
-include $(TOP)/configs/current
-
-LIBNAME = i915_dri.so
-LIBNAME_EGL = egl_i915_dri.so
-
-PIPE_DRIVERS = \
- $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
- ../common/libinteldrm.a \
- $(TOP)/src/gallium/drivers/i915simple/libi915simple.a
-
-
-DRIVER_SOURCES = \
- intel_winsys_softpipe.c \
- intel_swapbuffers.c \
- intel_context.c \
- intel_lock.c \
- intel_screen.c
-
-C_SOURCES = \
- $(COMMON_GALLIUM_SOURCES) \
- $(DRIVER_SOURCES)
-
-ASM_SOURCES =
-
-DRIVER_DEFINES = -I../common $(shell pkg-config libdrm --atleast-version=2.3.1 \
- && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP")
-
-include ../../Makefile.template
-
-#intel_tex_layout.o: $(TOP)/src/mesa/drivers/dri/intel/intel_tex_layout.c
-
-symlinks:
diff --git a/src/gallium/winsys/drm/intel/dri/SConscript b/src/gallium/winsys/drm/intel/dri/SConscript
deleted file mode 100644
index 6a4f50afcc..0000000000
--- a/src/gallium/winsys/drm/intel/dri/SConscript
+++ /dev/null
@@ -1,41 +0,0 @@
-Import('*')
-
-if 'mesa' in env['statetrackers']:
-
- env = drienv.Clone()
-
- env.Append(CPPPATH = [
- '../intel',
- 'server'
- ])
-
- #MINIGLX_SOURCES = server/intel_dri.c
-
- DRIVER_SOURCES = [
- 'intel_winsys_pipe.c',
- 'intel_winsys_softpipe.c',
- 'intel_winsys_i915.c',
- 'intel_batchbuffer.c',
- 'intel_swapbuffers.c',
- 'intel_context.c',
- 'intel_lock.c',
- 'intel_screen.c',
- 'intel_batchpool.c',
- ]
-
- sources = \
- COMMON_GALLIUM_SOURCES + \
- COMMON_BM_SOURCES + \
- DRIVER_SOURCES
-
- drivers = [
- softpipe,
- i915simple
- ]
-
- # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions
- env.SharedLibrary(
- target ='i915tex_dri.so',
- source = sources,
- LIBS = drivers + mesa + auxiliaries + env['LIBS'],
- )
diff --git a/src/gallium/winsys/drm/intel/dri/intel_batchbuffer.h b/src/gallium/winsys/drm/intel/dri/intel_batchbuffer.h
deleted file mode 100644
index 3e95326168..0000000000
--- a/src/gallium/winsys/drm/intel/dri/intel_batchbuffer.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef INTEL_BATCHBUFFER_H
-#define INTEL_BATCHBUFFER_H
-
-#include "intel_be_batchbuffer.h"
-
-/*
- * Need to redefine the BATCH defines
- */
-
-#undef BEGIN_BATCH
-#define BEGIN_BATCH(dwords, relocs) \
- (i915_batchbuffer_check(&intel->base.batch->base, dwords, relocs))
-
-#undef OUT_BATCH
-#define OUT_BATCH(d) \
- i915_batchbuffer_dword(&intel->base.batch->base, d)
-
-#undef OUT_RELOC
-#define OUT_RELOC(buf,flags,mask,delta) do { \
- assert((delta) >= 0); \
- intel_be_offset_relocation(intel->base.batch, delta, buf, flags, mask); \
-} while (0)
-
-#endif
diff --git a/src/gallium/winsys/drm/intel/dri/intel_context.c b/src/gallium/winsys/drm/intel/dri/intel_context.c
deleted file mode 100644
index 97ef731aaa..0000000000
--- a/src/gallium/winsys/drm/intel/dri/intel_context.c
+++ /dev/null
@@ -1,337 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "i830_dri.h"
-
-#include "intel_screen.h"
-#include "intel_context.h"
-#include "intel_swapbuffers.h"
-#include "intel_batchbuffer.h"
-#include "intel_winsys_softpipe.h"
-
-#include "i915simple/i915_screen.h"
-
-#include "state_tracker/st_public.h"
-#include "state_tracker/st_context.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_context.h"
-
-#include "utils.h"
-
-
-#ifdef DEBUG
-int __intel_debug = 0;
-#endif
-
-
-#define need_GL_ARB_multisample
-#define need_GL_ARB_point_parameters
-#define need_GL_ARB_texture_compression
-#define need_GL_ARB_vertex_buffer_object
-#define need_GL_ARB_vertex_program
-#define need_GL_ARB_window_pos
-#define need_GL_EXT_blend_color
-#define need_GL_EXT_blend_equation_separate
-#define need_GL_EXT_blend_func_separate
-#define need_GL_EXT_blend_minmax
-#define need_GL_EXT_cull_vertex
-#define need_GL_EXT_fog_coord
-#define need_GL_EXT_framebuffer_object
-#define need_GL_EXT_multi_draw_arrays
-#define need_GL_EXT_secondary_color
-#define need_GL_NV_vertex_program
-#include "extension_helper.h"
-
-
-/**
- * Extension strings exported by the intel driver.
- *
- * \note
- * It appears that ARB_texture_env_crossbar has "disappeared" compared to the
- * old i830-specific driver.
- */
-const struct dri_extension card_extensions[] = {
- {"GL_ARB_multisample", GL_ARB_multisample_functions},
- {"GL_ARB_multitexture", NULL},
- {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions},
- {"GL_ARB_texture_border_clamp", NULL},
- {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions},
- {"GL_ARB_texture_cube_map", NULL},
- {"GL_ARB_texture_env_add", NULL},
- {"GL_ARB_texture_env_combine", NULL},
- {"GL_ARB_texture_env_dot3", NULL},
- {"GL_ARB_texture_mirrored_repeat", NULL},
- {"GL_ARB_texture_rectangle", NULL},
- {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions},
- {"GL_ARB_pixel_buffer_object", NULL},
- {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions},
- {"GL_ARB_window_pos", GL_ARB_window_pos_functions},
- {"GL_EXT_blend_color", GL_EXT_blend_color_functions},
- {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions},
- {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions},
- {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions},
- {"GL_EXT_blend_subtract", NULL},
- {"GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions},
- {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions},
- {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions},
- {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions},
- {"GL_EXT_packed_depth_stencil", NULL},
- {"GL_EXT_pixel_buffer_object", NULL},
- {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
- {"GL_EXT_stencil_wrap", NULL},
- {"GL_EXT_texture_edge_clamp", NULL},
- {"GL_EXT_texture_env_combine", NULL},
- {"GL_EXT_texture_env_dot3", NULL},
- {"GL_EXT_texture_filter_anisotropic", NULL},
- {"GL_EXT_texture_lod_bias", NULL},
- {"GL_3DFX_texture_compression_FXT1", NULL},
- {"GL_APPLE_client_storage", NULL},
- {"GL_MESA_pack_invert", NULL},
- {"GL_MESA_ycbcr_texture", NULL},
- {"GL_NV_blend_square", NULL},
- {"GL_NV_vertex_program", GL_NV_vertex_program_functions},
- {"GL_NV_vertex_program1_1", NULL},
- {"GL_SGIS_generate_mipmap", NULL },
- {NULL, NULL}
-};
-
-
-
-#ifdef DEBUG
-static const struct dri_debug_control debug_control[] = {
- {"ioctl", DEBUG_IOCTL},
- {"bat", DEBUG_BATCH},
- {"lock", DEBUG_LOCK},
- {"swap", DEBUG_SWAP},
- {NULL, 0}
-};
-#endif
-
-
-
-static void
-intel_lock_hardware(struct intel_be_context *context)
-{
- struct intel_context *intel = (struct intel_context *)context;
- LOCK_HARDWARE(intel);
-}
-
-static void
-intel_unlock_hardware(struct intel_be_context *context)
-{
- struct intel_context *intel = (struct intel_context *)context;
- UNLOCK_HARDWARE(intel);
-}
-
-static boolean
-intel_locked_hardware(struct intel_be_context *context)
-{
- struct intel_context *intel = (struct intel_context *)context;
- return intel->locked ? TRUE : FALSE;
-}
-
-GLboolean
-intelCreateContext(const __GLcontextModes * visual,
- __DRIcontextPrivate * driContextPriv,
- void *sharedContextPrivate)
-{
- struct intel_context *intel = CALLOC_STRUCT(intel_context);
- __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
- struct intel_screen *intelScreen = intel_screen(sPriv);
- drmI830Sarea *saPriv = intelScreen->sarea;
- int fthrottle_mode;
- GLboolean havePools;
- struct pipe_context *pipe;
- struct st_context *st_share = NULL;
-
- if (sharedContextPrivate) {
- st_share = ((struct intel_context *) sharedContextPrivate)->st;
- }
-
- driContextPriv->driverPrivate = intel;
- intel->intelScreen = intelScreen;
- intel->driScreen = sPriv;
- intel->sarea = saPriv;
-
- driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
- intel->driScreen->myNum, "i915");
-
-
- /*
- * memory pools
- */
- DRM_LIGHT_LOCK(sPriv->fd, &sPriv->pSAREA->lock, driContextPriv->hHWContext);
- // ZZZ JB should be per screen and not be done per context
- havePools = intelCreatePools(sPriv);
- DRM_UNLOCK(sPriv->fd, &sPriv->pSAREA->lock, driContextPriv->hHWContext);
- if (!havePools)
- return GL_FALSE;
-
-
- /* Dri stuff */
- intel->hHWContext = driContextPriv->hHWContext;
- intel->driFd = sPriv->fd;
- intel->driHwLock = (drmLock *) & sPriv->pSAREA->lock;
-
- fthrottle_mode = driQueryOptioni(&intel->optionCache, "fthrottle_mode");
- intel->iw.irq_seq = -1;
- intel->irqsEmitted = 0;
-
- intel->last_swap_fence = NULL;
- intel->first_swap_fence = NULL;
-
-#ifdef DEBUG
- __intel_debug = driParseDebugString(getenv("INTEL_DEBUG"), debug_control);
-#endif
- intel->base.hardware_lock = intel_lock_hardware;
- intel->base.hardware_unlock = intel_unlock_hardware;
- intel->base.hardware_locked = intel_locked_hardware;
-
- intel_be_init_context(&intel->base, &intelScreen->base);
-
- /*
- * Pipe-related setup
- */
- if (getenv("INTEL_SP")) {
- /* use softpipe driver instead of hw */
- pipe = intel_create_softpipe( intel, &intelScreen->base.base );
- }
- else {
- switch (intel->intelScreen->deviceID) {
- case PCI_CHIP_I945_G:
- case PCI_CHIP_I945_GM:
- case PCI_CHIP_I945_GME:
- case PCI_CHIP_G33_G:
- case PCI_CHIP_Q33_G:
- case PCI_CHIP_Q35_G:
- case PCI_CHIP_I915_G:
- case PCI_CHIP_I915_GM:
- pipe = i915_create_context(intelScreen->base.screen,
- &intelScreen->base.base,
- &intel->base.base);
- break;
- default:
- fprintf(stderr, "Unknown PCIID %x in %s, using software driver\n",
- intel->intelScreen->deviceID, __FUNCTION__);
-
- pipe = intel_create_softpipe( intel, &intelScreen->base.base );
- break;
- }
- }
-
- pipe->priv = intel;
-
- intel->st = st_create_context(pipe, visual, st_share);
-
- driInitExtensions( intel->st->ctx, card_extensions, GL_TRUE );
-
- return GL_TRUE;
-}
-
-
-void
-intelDestroyContext(__DRIcontextPrivate * driContextPriv)
-{
- struct intel_context *intel = intel_context(driContextPriv);
-
- assert(intel); /* should never be null */
- if (intel) {
- st_finish(intel->st);
-
- if (intel->last_swap_fence) {
- driFenceFinish(intel->last_swap_fence, DRM_FENCE_TYPE_EXE, GL_TRUE);
- driFenceUnReference(&intel->last_swap_fence);
- intel->last_swap_fence = NULL;
- }
- if (intel->first_swap_fence) {
- driFenceFinish(intel->first_swap_fence, DRM_FENCE_TYPE_EXE, GL_TRUE);
- driFenceUnReference(&intel->first_swap_fence);
- intel->first_swap_fence = NULL;
- }
-
- if (intel->intelScreen->dummyContext == intel)
- intel->intelScreen->dummyContext = NULL;
-
- st_destroy_context(intel->st);
- intel_be_destroy_context(&intel->base);
- free(intel);
- }
-}
-
-
-GLboolean
-intelUnbindContext(__DRIcontextPrivate * driContextPriv)
-{
- struct intel_context *intel = intel_context(driContextPriv);
- st_flush(intel->st, PIPE_FLUSH_RENDER_CACHE, NULL);
- /* XXX make_current(NULL)? */
- return GL_TRUE;
-}
-
-
-GLboolean
-intelMakeCurrent(__DRIcontextPrivate * driContextPriv,
- __DRIdrawablePrivate * driDrawPriv,
- __DRIdrawablePrivate * driReadPriv)
-{
- if (driContextPriv) {
- struct intel_context *intel = intel_context(driContextPriv);
- struct intel_framebuffer *draw_fb = intel_framebuffer(driDrawPriv);
- struct intel_framebuffer *read_fb = intel_framebuffer(driReadPriv);
-
- assert(draw_fb->stfb);
- assert(read_fb->stfb);
-
- /* This is for situations in which we need a rendering context but
- * there may not be any currently bound.
- */
- intel->intelScreen->dummyContext = intel;
-
- st_make_current(intel->st, draw_fb->stfb, read_fb->stfb);
-
- if ((intel->driDrawable != driDrawPriv) ||
- (intel->lastStamp != driDrawPriv->lastStamp)) {
- intel->driDrawable = driDrawPriv;
- intelUpdateWindowSize(driDrawPriv);
- intel->lastStamp = driDrawPriv->lastStamp;
- }
-
- /* The size of the draw buffer will have been updated above.
- * If the readbuffer is a different window, check/update its size now.
- */
- if (driReadPriv != driDrawPriv) {
- intelUpdateWindowSize(driReadPriv);
- }
-
- }
- else {
- st_make_current(NULL, NULL, NULL);
- }
-
- return GL_TRUE;
-}
diff --git a/src/gallium/winsys/drm/intel/dri/intel_context.h b/src/gallium/winsys/drm/intel/dri/intel_context.h
deleted file mode 100644
index 5d22a422af..0000000000
--- a/src/gallium/winsys/drm/intel/dri/intel_context.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef INTEL_CONTEXT_H
-#define INTEL_CONTEXT_H
-
-#include <stdint.h>
-#include "drm.h"
-
-#include "pipe/p_debug.h"
-
-#include "intel_screen.h"
-#include "i915_drm.h"
-
-#include "intel_be_context.h"
-
-
-struct pipe_context;
-struct intel_context;
-struct _DriBufferObject;
-struct st_context;
-
-
-#define INTEL_MAX_FIXUP 64
-
-/**
- * Intel rendering context, contains a state tracker and intel-specific info.
- */
-struct intel_context
-{
- struct intel_be_context base;
- struct st_context *st;
-
- struct _DriFenceObject *last_swap_fence;
- struct _DriFenceObject *first_swap_fence;
-
-// struct intel_batchbuffer *batch;
-
- boolean locked;
- char *prevLockFile;
- int prevLockLine;
-
- uint irqsEmitted;
- drm_i915_irq_wait_t iw;
-
- drm_context_t hHWContext;
- drmLock *driHwLock;
- int driFd;
-
- __DRIdrawablePrivate *driDrawable;
- __DRIscreenPrivate *driScreen;
- struct intel_screen *intelScreen;
- drmI830Sarea *sarea;
-
- uint lastStamp;
-
- /**
- * Configuration cache
- */
- driOptionCache optionCache;
-};
-
-
-
-/**
- * Intel framebuffer.
- */
-struct intel_framebuffer
-{
- struct st_framebuffer *stfb;
-
- /* other fields TBD */
- int other;
-};
-
-
-
-
-/* These are functions now:
- */
-void LOCK_HARDWARE( struct intel_context *intel );
-void UNLOCK_HARDWARE( struct intel_context *intel );
-
-extern char *__progname;
-
-
-
-/* ================================================================
- * Debugging:
- */
-#ifdef DEBUG
-extern int __intel_debug;
-
-#define DEBUG_SWAP 0x1
-#define DEBUG_LOCK 0x2
-#define DEBUG_IOCTL 0x4
-#define DEBUG_BATCH 0x8
-
-#define DBG(flag, ...) do { \
- if (__intel_debug & (DEBUG_##flag)) \
- printf(__VA_ARGS__); \
-} while(0)
-
-#else
-#define DBG(flag, ...)
-#endif
-
-
-
-#define PCI_CHIP_845_G 0x2562
-#define PCI_CHIP_I830_M 0x3577
-#define PCI_CHIP_I855_GM 0x3582
-#define PCI_CHIP_I865_G 0x2572
-#define PCI_CHIP_I915_G 0x2582
-#define PCI_CHIP_I915_GM 0x2592
-#define PCI_CHIP_I945_G 0x2772
-#define PCI_CHIP_I945_GM 0x27A2
-#define PCI_CHIP_I945_GME 0x27AE
-#define PCI_CHIP_G33_G 0x29C2
-#define PCI_CHIP_Q35_G 0x29B2
-#define PCI_CHIP_Q33_G 0x29D2
-
-
-/** Cast wrapper */
-static INLINE struct intel_context *
-intel_context(__DRIcontextPrivate *driContextPriv)
-{
- return (struct intel_context *) driContextPriv->driverPrivate;
-}
-
-
-/** Cast wrapper */
-static INLINE struct intel_framebuffer *
-intel_framebuffer(__DRIdrawablePrivate * driDrawPriv)
-{
- return (struct intel_framebuffer *) driDrawPriv->driverPrivate;
-}
-
-
-#endif
diff --git a/src/gallium/winsys/drm/intel/dri/intel_screen.c b/src/gallium/winsys/drm/intel/dri/intel_screen.c
deleted file mode 100644
index 3a486481f5..0000000000
--- a/src/gallium/winsys/drm/intel/dri/intel_screen.c
+++ /dev/null
@@ -1,607 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "utils.h"
-#include "vblank.h"
-#include "xmlpool.h"
-
-#include "intel_context.h"
-#include "intel_screen.h"
-#include "intel_batchbuffer.h"
-#include "intel_swapbuffers.h"
-
-#include "i830_dri.h"
-#include "ws_dri_bufpool.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_screen.h"
-#include "pipe/p_inlines.h"
-#include "state_tracker/st_public.h"
-#include "state_tracker/st_cb_fbo.h"
-
-static void
-intelCreateSurface(struct intel_screen *intelScreen, struct pipe_winsys *winsys, unsigned handle);
-
-static void
-intelCreateSurface(struct intel_screen *intelScreen, struct pipe_winsys *winsys, unsigned handle)
-{
- struct pipe_screen *screen = intelScreen->base.screen;
- struct pipe_texture *texture;
- struct pipe_texture templat;
- struct pipe_surface *surface;
- struct pipe_buffer *buffer;
- unsigned pitch;
-
- assert(intelScreen->front.cpp == 4);
-
- buffer = intel_be_buffer_from_handle(&intelScreen->base,
- "front", handle);
-
- if (!buffer)
- return;
-
- intelScreen->front.buffer = dri_bo(buffer);
-
- memset(&templat, 0, sizeof(templat));
- templat.tex_usage |= PIPE_TEXTURE_USAGE_DISPLAY_TARGET;
- templat.target = PIPE_TEXTURE_2D;
- templat.last_level = 0;
- templat.depth[0] = 1;
- templat.format = PIPE_FORMAT_A8R8G8B8_UNORM;
- templat.width[0] = intelScreen->front.width;
- templat.height[0] = intelScreen->front.height;
- pf_get_block(templat.format, &templat.block);
- pitch = intelScreen->front.pitch;
-
- texture = screen->texture_blanket(screen,
- &templat,
- &pitch,
- buffer);
-
- /* Unref the buffer we don't need it anyways */
- pipe_buffer_reference(screen, &buffer, NULL);
-
- surface = screen->get_tex_surface(screen,
- texture,
- 0,
- 0,
- 0,
- PIPE_BUFFER_USAGE_GPU_WRITE);
-
- intelScreen->front.texture = texture;
- intelScreen->front.surface = surface;
-}
-
-PUBLIC const char __driConfigOptions[] =
- DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE
- DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
- DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
- DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY
-// DRI_CONF_FORCE_S3TC_ENABLE(false)
- DRI_CONF_ALLOW_LARGE_TEXTURES(1)
- DRI_CONF_SECTION_END DRI_CONF_END;
-
-const uint __driNConfigOptions = 3;
-
-#ifdef USE_NEW_INTERFACE
-static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
-#endif /*USE_NEW_INTERFACE */
-
-extern const struct dri_extension card_extensions[];
-
-
-
-
-static void
-intelPrintDRIInfo(struct intel_screen * intelScreen,
- __DRIscreenPrivate * sPriv, I830DRIPtr gDRIPriv)
-{
- fprintf(stderr, "*** Front size: 0x%x offset: 0x%x pitch: %d\n",
- intelScreen->front.size, intelScreen->front.offset,
- intelScreen->front.pitch);
- fprintf(stderr, "*** Memory : 0x%x\n", gDRIPriv->mem);
-}
-
-
-#if 0
-static void
-intelPrintSAREA(const drmI830Sarea * sarea)
-{
- fprintf(stderr, "SAREA: sarea width %d height %d\n", sarea->width,
- sarea->height);
- fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch);
- fprintf(stderr,
- "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x\n",
- sarea->front_offset, sarea->front_size,
- (unsigned) sarea->front_handle);
- fprintf(stderr,
- "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x\n",
- sarea->back_offset, sarea->back_size,
- (unsigned) sarea->back_handle);
- fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x\n",
- sarea->depth_offset, sarea->depth_size,
- (unsigned) sarea->depth_handle);
- fprintf(stderr, "SAREA: tex offset: 0x%08x size: 0x%x handle: 0x%x\n",
- sarea->tex_offset, sarea->tex_size, (unsigned) sarea->tex_handle);
- fprintf(stderr, "SAREA: rotation: %d\n", sarea->rotation);
- fprintf(stderr,
- "SAREA: rotated offset: 0x%08x size: 0x%x\n",
- sarea->rotated_offset, sarea->rotated_size);
- fprintf(stderr, "SAREA: rotated pitch: %d\n", sarea->rotated_pitch);
-}
-#endif
-
-
-/**
- * Use the information in the sarea to update the screen parameters
- * related to screen rotation. Needs to be called locked.
- */
-void
-intelUpdateScreenRotation(__DRIscreenPrivate * sPriv, drmI830Sarea * sarea)
-{
- struct intel_screen *intelScreen = intel_screen(sPriv);
-
- if (intelScreen->front.map) {
- drmUnmap(intelScreen->front.map, intelScreen->front.size);
- intelScreen->front.map = NULL;
- }
-
- if (intelScreen->front.buffer)
- driDeleteBuffers(1, &intelScreen->front.buffer);
-
- intelScreen->front.width = sarea->width;
- intelScreen->front.height = sarea->height;
- intelScreen->front.offset = sarea->front_offset;
- intelScreen->front.pitch = sarea->pitch * intelScreen->front.cpp;
- intelScreen->front.size = sarea->front_size;
- intelScreen->front.handle = sarea->front_handle;
-
- assert( sarea->front_size >=
- intelScreen->front.pitch * intelScreen->front.height );
-
-#if 0 /* JB not important */
- if (!sarea->front_handle)
- return;
-
- if (drmMap(sPriv->fd,
- sarea->front_handle,
- intelScreen->front.size,
- (drmAddress *) & intelScreen->front.map) != 0) {
- fprintf(stderr, "drmMap(frontbuffer) failed!\n");
- return;
- }
-#endif
-
-#if 0 /* JB */
- if (intelScreen->staticPool) {
- driGenBuffers(intelScreen->staticPool, "static region", 1,
- &intelScreen->front.buffer, 64,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_NO_MOVE |
- DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0);
-
- driBOSetStatic(intelScreen->front.buffer,
- intelScreen->front.offset,
- intelScreen->front.pitch * intelScreen->front.height,
- intelScreen->front.map, 0);
- }
-#else
- if (intelScreen->base.staticPool) {
- if (intelScreen->front.buffer) {
- driBOUnReference(intelScreen->front.buffer);
- pipe_surface_reference(&intelScreen->front.surface, NULL);
- pipe_texture_reference(&intelScreen->front.texture, NULL);
- }
- intelCreateSurface(intelScreen, &intelScreen->base.base, sarea->front_bo_handle);
- }
-#endif
-}
-
-
-boolean
-intelCreatePools(__DRIscreenPrivate * sPriv)
-{
- //unsigned batchPoolSize = 1024*1024;
- struct intel_screen *intelScreen = intel_screen(sPriv);
-
- if (intelScreen->havePools)
- return GL_TRUE;
-
- intelScreen->havePools = GL_TRUE;
-
- intelUpdateScreenRotation(sPriv, intelScreen->sarea);
-
- return GL_TRUE;
-}
-
-static const char *
-intel_get_name( struct pipe_winsys *winsys )
-{
- return "Intel/DRI/ttm";
-}
-
-/*
- * The state tracker (should!) keep track of whether the fake
- * frontbuffer has been touched by any rendering since the last time
- * we copied its contents to the real frontbuffer. Our task is easy:
- */
-static void
-intel_flush_frontbuffer( struct pipe_winsys *winsys,
- struct pipe_surface *surf,
- void *context_private)
-{
- struct intel_context *intel = (struct intel_context *) context_private;
- __DRIdrawablePrivate *dPriv = intel->driDrawable;
-
- intelDisplaySurface(dPriv, surf, NULL);
-}
-
-static boolean
-intelInitDriver(__DRIscreenPrivate * sPriv)
-{
- struct intel_screen *intelScreen;
- I830DRIPtr gDRIPriv = (I830DRIPtr) sPriv->pDevPriv;
-
- PFNGLXSCRENABLEEXTENSIONPROC glx_enable_extension =
- (PFNGLXSCRENABLEEXTENSIONPROC) (*dri_interface->
- getProcAddress("glxEnableExtension"));
- void *const psc = sPriv->psc->screenConfigs;
-
- if (sPriv->devPrivSize != sizeof(I830DRIRec)) {
- fprintf(stderr,
- "\nERROR! sizeof(I830DRIRec) does not match passed size from device driver\n");
- return GL_FALSE;
- }
-
- /* Allocate the private area */
- intelScreen = CALLOC_STRUCT(intel_screen);
- if (!intelScreen)
- return GL_FALSE;
-
- /* parse information in __driConfigOptions */
- driParseOptionInfo(&intelScreen->optionCache,
- __driConfigOptions, __driNConfigOptions);
-
- sPriv->private = (void *) intelScreen;
-
- intelScreen->sarea = (drmI830Sarea *) (((GLubyte *) sPriv->pSAREA) +
- gDRIPriv->sarea_priv_offset);
- intelScreen->deviceID = gDRIPriv->deviceID;
- intelScreen->front.cpp = gDRIPriv->cpp;
- intelScreen->drmMinor = sPriv->drmMinor;
-
- assert(gDRIPriv->bitsPerPixel == 16 ||
- gDRIPriv->bitsPerPixel == 32);
-
- intelUpdateScreenRotation(sPriv, intelScreen->sarea);
-
- if (0)
- intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv);
-
- if (glx_enable_extension != NULL) {
- (*glx_enable_extension) (psc, "GLX_SGI_swap_control");
- (*glx_enable_extension) (psc, "GLX_SGI_video_sync");
- (*glx_enable_extension) (psc, "GLX_MESA_swap_control");
- (*glx_enable_extension) (psc, "GLX_MESA_swap_frame_usage");
- (*glx_enable_extension) (psc, "GLX_SGI_make_current_read");
- }
-
- intelScreen->base.base.flush_frontbuffer = intel_flush_frontbuffer;
- intelScreen->base.base.get_name = intel_get_name;
- intel_be_init_device(&intelScreen->base, sPriv->fd, intelScreen->deviceID);
-
- return GL_TRUE;
-}
-
-
-static void
-intelDestroyScreen(__DRIscreenPrivate * sPriv)
-{
- struct intel_screen *intelScreen = intel_screen(sPriv);
-
- intel_be_destroy_device(&intelScreen->base);
- /* intelUnmapScreenRegions(intelScreen); */
-
- FREE(intelScreen);
- sPriv->private = NULL;
-}
-
-
-/**
- * This is called when we need to set up GL rendering to a new X window.
- */
-static boolean
-intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
- __DRIdrawablePrivate * driDrawPriv,
- const __GLcontextModes * visual, boolean isPixmap)
-{
- if (isPixmap) {
- return GL_FALSE; /* not implemented */
- }
- else {
- enum pipe_format colorFormat, depthFormat, stencilFormat;
- struct intel_framebuffer *intelfb = CALLOC_STRUCT(intel_framebuffer);
-
- if (!intelfb)
- return GL_FALSE;
-
- if (visual->redBits == 5)
- colorFormat = PIPE_FORMAT_R5G6B5_UNORM;
- else
- colorFormat = PIPE_FORMAT_A8R8G8B8_UNORM;
-
- if (visual->depthBits == 16)
- depthFormat = PIPE_FORMAT_Z16_UNORM;
- else if (visual->depthBits == 24)
- depthFormat = PIPE_FORMAT_S8Z24_UNORM;
- else
- depthFormat = PIPE_FORMAT_NONE;
-
- if (visual->stencilBits == 8)
- stencilFormat = PIPE_FORMAT_S8Z24_UNORM;
- else
- stencilFormat = PIPE_FORMAT_NONE;
-
- intelfb->stfb = st_create_framebuffer(visual,
- colorFormat,
- depthFormat,
- stencilFormat,
- driDrawPriv->w,
- driDrawPriv->h,
- (void*) intelfb);
- if (!intelfb->stfb) {
- free(intelfb);
- return GL_FALSE;
- }
-
- driDrawPriv->driverPrivate = (void *) intelfb;
- return GL_TRUE;
- }
-}
-
-static void
-intelDestroyBuffer(__DRIdrawablePrivate * driDrawPriv)
-{
- struct intel_framebuffer *intelfb = intel_framebuffer(driDrawPriv);
- assert(intelfb->stfb);
- st_unreference_framebuffer(&intelfb->stfb);
- free(intelfb);
-}
-
-
-/**
- * Get information about previous buffer swaps.
- */
-static int
-intelGetSwapInfo(__DRIdrawablePrivate * dPriv, __DRIswapInfo * sInfo)
-{
- if ((dPriv == NULL) || (dPriv->driverPrivate == NULL)
- || (sInfo == NULL)) {
- return -1;
- }
-
- return 0;
-}
-
-
-static void
-intelSetTexOffset(__DRIcontext *pDRICtx, int texname,
- unsigned long long offset, int depth, uint pitch)
-{
- abort();
-#if 0
- struct intel_context *intel = (struct intel_context*)
- ((__DRIcontextPrivate*)pDRICtx->private)->driverPrivate;
- struct gl_texture_object *tObj = _mesa_lookup_texture(&intel->ctx, texname);
- struct st_texture_object *stObj = st_texture_object(tObj);
-
- if (!stObj)
- return;
-
- if (stObj->pt)
- st->pipe->texture_release(intel->st->pipe, &stObj->pt);
-
- stObj->imageOverride = GL_TRUE;
- stObj->depthOverride = depth;
- stObj->pitchOverride = pitch;
-
- if (offset)
- stObj->textureOffset = offset;
-#endif
-}
-
-
-static const struct __DriverAPIRec intelAPI = {
- .InitDriver = intelInitDriver,
- .DestroyScreen = intelDestroyScreen,
- .CreateContext = intelCreateContext,
- .DestroyContext = intelDestroyContext,
- .CreateBuffer = intelCreateBuffer,
- .DestroyBuffer = intelDestroyBuffer,
- .SwapBuffers = intelSwapBuffers,
- .MakeCurrent = intelMakeCurrent,
- .UnbindContext = intelUnbindContext,
- .GetSwapInfo = intelGetSwapInfo,
- .GetMSC = driGetMSC32,
- .WaitForMSC = driWaitForMSC32,
- .WaitForSBC = NULL,
- .SwapBuffersMSC = NULL,
- .CopySubBuffer = intelCopySubBuffer,
- .setTexOffset = intelSetTexOffset,
-};
-
-
-static __GLcontextModes *
-intelFillInModes(unsigned pixel_bits, unsigned depth_bits,
- unsigned stencil_bits, boolean have_back_buffer)
-{
- __GLcontextModes *modes;
- __GLcontextModes *m;
- unsigned num_modes;
- unsigned depth_buffer_factor;
- unsigned back_buffer_factor;
- GLenum fb_format;
- GLenum fb_type;
-
- /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
- * support pageflipping at all.
- */
- static const GLenum back_buffer_modes[] = {
- GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML
- };
-
- uint8_t depth_bits_array[3];
- uint8_t stencil_bits_array[3];
- uint8_t msaa_samples_array[1];
-
-
- depth_bits_array[0] = 0;
- depth_bits_array[1] = depth_bits;
- depth_bits_array[2] = depth_bits;
- msaa_samples_array[0] = 0;
-
- /* Just like with the accumulation buffer, always provide some modes
- * with a stencil buffer. It will be a sw fallback, but some apps won't
- * care about that.
- */
- stencil_bits_array[0] = 0;
- stencil_bits_array[1] = 0;
- if (depth_bits == 24)
- stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits;
-
- stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits;
-
- depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1;
- back_buffer_factor = (have_back_buffer) ? 3 : 1;
-
- num_modes = depth_buffer_factor * back_buffer_factor * 4;
-
- if (pixel_bits == 16) {
- fb_format = GL_RGB;
- fb_type = GL_UNSIGNED_SHORT_5_6_5;
- }
- else {
- fb_format = GL_BGRA;
- fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
- }
-
- modes =
- (*dri_interface->createContextModes) (num_modes,
- sizeof(__GLcontextModes));
- m = modes;
- if (!driFillInModes(&m, fb_format, fb_type,
- depth_bits_array, stencil_bits_array,
- depth_buffer_factor, back_buffer_modes,
- back_buffer_factor, msaa_samples_array, 1, GLX_TRUE_COLOR)) {
- fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
- __LINE__);
- return NULL;
- }
- if (!driFillInModes(&m, fb_format, fb_type,
- depth_bits_array, stencil_bits_array,
- depth_buffer_factor, back_buffer_modes,
- back_buffer_factor, msaa_samples_array, 1, GLX_DIRECT_COLOR)) {
- fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
- __LINE__);
- return NULL;
- }
-
- /* Mark the visual as slow if there are "fake" stencil bits.
- */
- for (m = modes; m != NULL; m = m->next) {
- if ((m->stencilBits != 0) && (m->stencilBits != stencil_bits)) {
- m->visualRating = GLX_SLOW_CONFIG;
- }
- }
-
- return modes;
-}
-
-
-/**
- * This is the bootstrap function for the driver. libGL supplies all of the
- * requisite information about the system, and the driver initializes itself.
- * This routine also fills in the linked list pointed to by \c driver_modes
- * with the \c __GLcontextModes that the driver can support for windows or
- * pbuffers.
- *
- * \return A pointer to a \c __DRIscreenPrivate on success, or \c NULL on
- * failure.
- */
-PUBLIC void *
-__driCreateNewScreen_20050727(__DRInativeDisplay * dpy, int scrn,
- __DRIscreen * psc,
- const __GLcontextModes * modes,
- const __DRIversion * ddx_version,
- const __DRIversion * dri_version,
- const __DRIversion * drm_version,
- const __DRIframebuffer * frame_buffer,
- drmAddress pSAREA, int fd,
- int internal_api_version,
- const __DRIinterfaceMethods * interface,
- __GLcontextModes ** driver_modes)
-{
- __DRIscreenPrivate *psp;
- static const __DRIversion ddx_expected = { 1, 7, 0 };
- static const __DRIversion dri_expected = { 4, 0, 0 };
- static const __DRIversion drm_expected = { 1, 7, 0 };
-
- dri_interface = interface;
-
- if (!driCheckDriDdxDrmVersions2("i915",
- dri_version, &dri_expected,
- ddx_version, &ddx_expected,
- drm_version, &drm_expected)) {
- return NULL;
- }
-
- psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL,
- ddx_version, dri_version, drm_version,
- frame_buffer, pSAREA, fd,
- internal_api_version, &intelAPI);
-
- if (psp != NULL) {
- I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv;
- *driver_modes = intelFillInModes(dri_priv->cpp * 8,
- (dri_priv->cpp == 2) ? 16 : 24,
- (dri_priv->cpp == 2) ? 0 : 8, 1);
-
- /* Calling driInitExtensions here, with a NULL context pointer,
- * does not actually enable the extensions. It just makes sure
- * that all the dispatch offsets for all the extensions that
- * *might* be enables are known. This is needed because the
- * dispatch offsets need to be known when _mesa_context_create
- * is called, but we can't enable the extensions until we have a
- * context pointer.
- *
- * Hello chicken. Hello egg. How are you two today?
- */
- driInitExtensions(NULL, card_extensions, GL_FALSE);
- }
-
- return (void *) psp;
-}
-
diff --git a/src/gallium/winsys/drm/intel/dri/intel_swapbuffers.c b/src/gallium/winsys/drm/intel/dri/intel_swapbuffers.c
deleted file mode 100644
index 34ad7eebe1..0000000000
--- a/src/gallium/winsys/drm/intel/dri/intel_swapbuffers.c
+++ /dev/null
@@ -1,260 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "intel_screen.h"
-#include "intel_context.h"
-#include "intel_swapbuffers.h"
-
-#include "intel_reg.h"
-
-#include "pipe/p_context.h"
-#include "state_tracker/st_public.h"
-#include "state_tracker/st_context.h"
-#include "state_tracker/st_cb_fbo.h"
-
-#include "ws_dri_bufmgr.h"
-#include "intel_batchbuffer.h"
-
-/**
- * Display a colorbuffer surface in an X window.
- * Used for SwapBuffers and flushing front buffer rendering.
- *
- * \param dPriv the window/drawable to display into
- * \param surf the surface to display
- * \param rect optional subrect of surface to display (may be NULL).
- */
-void
-intelDisplaySurface(__DRIdrawablePrivate *dPriv,
- struct pipe_surface *surf,
- const drm_clip_rect_t *rect)
-{
- struct intel_screen *intelScreen = intel_screen(dPriv->driScreenPriv);
- struct intel_context *intel = intelScreen->dummyContext;
-
- DBG(SWAP, "%s\n", __FUNCTION__);
-
- if (!intel) {
- /* XXX this is where some kind of extra/meta context could be useful */
- return;
- }
-
- if (intel->last_swap_fence) {
- driFenceFinish(intel->last_swap_fence, DRM_FENCE_TYPE_EXE, TRUE);
- driFenceUnReference(&intel->last_swap_fence);
- intel->last_swap_fence = NULL;
- }
- intel->last_swap_fence = intel->first_swap_fence;
- intel->first_swap_fence = NULL;
-
- /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets
- * should work regardless.
- */
- LOCK_HARDWARE(intel);
- /* if this drawable isn't currently bound the LOCK_HARDWARE done on the
- * current context (which is what intelScreenContext should return) might
- * not get a contended lock and thus cliprects not updated (tests/manywin)
- */
- if (intel_context(dPriv->driContextPriv) != intel)
- DRI_VALIDATE_DRAWABLE_INFO(intel->driScreen, dPriv);
-
-
- if (dPriv && dPriv->numClipRects) {
- const int srcWidth = surf->width;
- const int srcHeight = surf->height;
- const int nbox = dPriv->numClipRects;
- const drm_clip_rect_t *pbox = dPriv->pClipRects;
- const int pitch = intelScreen->front.pitch / intelScreen->front.cpp;
- const int cpp = intelScreen->front.cpp;
- const int srcpitch = surf->stride / cpp;
- int BR13, CMD;
- int i;
-
- ASSERT(surf->buffer);
-
- DBG(SWAP, "screen pitch %d src surface pitch %d\n",
- pitch, surf->stride);
-
- if (cpp == 2) {
- BR13 = (pitch * cpp) | (0xCC << 16) | (1 << 24);
- CMD = XY_SRC_COPY_BLT_CMD;
- }
- else {
- BR13 = (pitch * cpp) | (0xCC << 16) | (1 << 24) | (1 << 25);
- CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
- XY_SRC_COPY_BLT_WRITE_RGB);
- }
-
- for (i = 0; i < nbox; i++, pbox++) {
- drm_clip_rect_t box;
- drm_clip_rect_t sbox;
-
- if (pbox->x1 > pbox->x2 ||
- pbox->y1 > pbox->y2 ||
- pbox->x2 > intelScreen->front.width ||
- pbox->y2 > intelScreen->front.height) {
- /* invalid cliprect, skip it */
- continue;
- }
-
- box = *pbox;
-
- if (rect) {
- /* intersect cliprect with user-provided src rect */
- drm_clip_rect_t rrect;
-
- rrect.x1 = dPriv->x + rect->x1;
- rrect.y1 = (dPriv->h - rect->y1 - rect->y2) + dPriv->y;
- rrect.x2 = rect->x2 + rrect.x1;
- rrect.y2 = rect->y2 + rrect.y1;
- if (rrect.x1 > box.x1)
- box.x1 = rrect.x1;
- if (rrect.y1 > box.y1)
- box.y1 = rrect.y1;
- if (rrect.x2 < box.x2)
- box.x2 = rrect.x2;
- if (rrect.y2 < box.y2)
- box.y2 = rrect.y2;
-
- if (box.x1 > box.x2 || box.y1 > box.y2)
- continue;
- }
-
- /* restrict blit to size of actually rendered area */
- if (box.x2 - box.x1 > srcWidth)
- box.x2 = srcWidth + box.x1;
- if (box.y2 - box.y1 > srcHeight)
- box.y2 = srcHeight + box.y1;
-
- DBG(SWAP, "box x1 x2 y1 y2 %d %d %d %d\n",
- box.x1, box.x2, box.y1, box.y2);
-
- sbox.x1 = box.x1 - dPriv->x;
- sbox.y1 = box.y1 - dPriv->y;
-
- assert(box.x1 < box.x2);
- assert(box.y1 < box.y2);
-
- /* XXX this could be done with pipe->surface_copy() */
- /* XXX should have its own batch buffer */
- if (!BEGIN_BATCH(8, 2)) {
- /*
- * Since we share this batch buffer with a context
- * we can't flush it since that risks a GPU lockup
- */
- assert(0);
- continue;
- }
-
- OUT_BATCH(CMD);
- OUT_BATCH(BR13);
- OUT_BATCH((box.y1 << 16) | box.x1);
- OUT_BATCH((box.y2 << 16) | box.x2);
-
- OUT_RELOC(intelScreen->front.buffer,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
- DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE, 0);
- OUT_BATCH((sbox.y1 << 16) | sbox.x1);
- OUT_BATCH((srcpitch * cpp) & 0xffff);
- OUT_RELOC(dri_bo(surf->buffer),
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- DRM_BO_MASK_MEM | DRM_BO_FLAG_READ, 0);
-
- }
-
- if (intel->first_swap_fence)
- driFenceUnReference(&intel->first_swap_fence);
- intel->first_swap_fence = intel_be_batchbuffer_flush(intel->base.batch);
- }
-
- UNLOCK_HARDWARE(intel);
-
- if (intel->lastStamp != dPriv->lastStamp) {
- intelUpdateWindowSize(dPriv);
- intel->lastStamp = dPriv->lastStamp;
- }
-}
-
-
-
-/**
- * This will be called whenever the currently bound window is moved/resized.
- */
-void
-intelUpdateWindowSize(__DRIdrawablePrivate *dPriv)
-{
- struct intel_framebuffer *intelfb = intel_framebuffer(dPriv);
- assert(intelfb->stfb);
- st_resize_framebuffer(intelfb->stfb, dPriv->w, dPriv->h);
-}
-
-
-
-void
-intelSwapBuffers(__DRIdrawablePrivate * dPriv)
-{
- struct intel_framebuffer *intel_fb = intel_framebuffer(dPriv);
- struct pipe_surface *back_surf;
-
- assert(intel_fb);
- assert(intel_fb->stfb);
-
- back_surf = st_get_framebuffer_surface(intel_fb->stfb,
- ST_SURFACE_BACK_LEFT);
- if (back_surf) {
- st_notify_swapbuffers(intel_fb->stfb);
- intelDisplaySurface(dPriv, back_surf, NULL);
- st_notify_swapbuffers_complete(intel_fb->stfb);
- }
-}
-
-
-/**
- * Called via glXCopySubBufferMESA() to copy a subrect of the back
- * buffer to the front buffer/screen.
- */
-void
-intelCopySubBuffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h)
-{
- struct intel_framebuffer *intel_fb = intel_framebuffer(dPriv);
- struct pipe_surface *back_surf;
-
- assert(intel_fb);
- assert(intel_fb->stfb);
-
- back_surf = st_get_framebuffer_surface(intel_fb->stfb,
- ST_SURFACE_BACK_LEFT);
- if (back_surf) {
- drm_clip_rect_t rect;
- rect.x1 = x;
- rect.y1 = y;
- rect.x2 = w;
- rect.y2 = h;
-
- st_notify_swapbuffers(intel_fb->stfb);
- intelDisplaySurface(dPriv, back_surf, &rect);
- }
-}
diff --git a/src/gallium/winsys/drm/intel/dri/server/i830_common.h b/src/gallium/winsys/drm/intel/dri/server/i830_common.h
deleted file mode 100644
index 3452ddb3c9..0000000000
--- a/src/gallium/winsys/drm/intel/dri/server/i830_common.h
+++ /dev/null
@@ -1,255 +0,0 @@
-/**************************************************************************
-
-Copyright 2001 VA Linux Systems Inc., Fremont, California.
-Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas.
-
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of this software and associated documentation files (the "Software"),
-to deal in the Software without restriction, including without limitation
-on the rights to use, copy, modify, merge, publish, distribute, sub
-license, and/or sell copies of the Software, and to permit persons to whom
-the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice (including the next
-paragraph) shall be included in all copies or substantial portions of the
-Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
-DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
-OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
-USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-
-#ifndef _I830_COMMON_H_
-#define _I830_COMMON_H_
-
-
-#define I830_NR_TEX_REGIONS 255 /* maximum due to use of chars for next/prev */
-#define I830_LOG_MIN_TEX_REGION_SIZE 14
-
-
-/* Driver specific DRM command indices
- * NOTE: these are not OS specific, but they are driver specific
- */
-#define DRM_I830_INIT 0x00
-#define DRM_I830_FLUSH 0x01
-#define DRM_I830_FLIP 0x02
-#define DRM_I830_BATCHBUFFER 0x03
-#define DRM_I830_IRQ_EMIT 0x04
-#define DRM_I830_IRQ_WAIT 0x05
-#define DRM_I830_GETPARAM 0x06
-#define DRM_I830_SETPARAM 0x07
-#define DRM_I830_ALLOC 0x08
-#define DRM_I830_FREE 0x09
-#define DRM_I830_INIT_HEAP 0x0a
-#define DRM_I830_CMDBUFFER 0x0b
-#define DRM_I830_DESTROY_HEAP 0x0c
-#define DRM_I830_SET_VBLANK_PIPE 0x0d
-#define DRM_I830_GET_VBLANK_PIPE 0x0e
-#define DRM_I830_MMIO 0x10
-
-typedef struct {
- enum {
- I830_INIT_DMA = 0x01,
- I830_CLEANUP_DMA = 0x02,
- I830_RESUME_DMA = 0x03
- } func;
- unsigned int mmio_offset;
- int sarea_priv_offset;
- unsigned int ring_start;
- unsigned int ring_end;
- unsigned int ring_size;
- unsigned int front_offset;
- unsigned int back_offset;
- unsigned int depth_offset;
- unsigned int w;
- unsigned int h;
- unsigned int pitch;
- unsigned int pitch_bits;
- unsigned int back_pitch;
- unsigned int depth_pitch;
- unsigned int cpp;
- unsigned int chipset;
-} drmI830Init;
-
-typedef struct {
- drmTextureRegion texList[I830_NR_TEX_REGIONS+1];
- int last_upload; /* last time texture was uploaded */
- int last_enqueue; /* last time a buffer was enqueued */
- int last_dispatch; /* age of the most recently dispatched buffer */
- int ctxOwner; /* last context to upload state */
- /** Last context that used the buffer manager. */
- int texAge;
- int pf_enabled; /* is pageflipping allowed? */
- int pf_active;
- int pf_current_page; /* which buffer is being displayed? */
- int perf_boxes; /* performance boxes to be displayed */
- int width, height; /* screen size in pixels */
-
- drm_handle_t front_handle;
- int front_offset;
- int front_size;
-
- drm_handle_t back_handle;
- int back_offset;
- int back_size;
-
- drm_handle_t depth_handle;
- int depth_offset;
- int depth_size;
-
- drm_handle_t tex_handle;
- int tex_offset;
- int tex_size;
- int log_tex_granularity;
- int pitch;
- int rotation; /* 0, 90, 180 or 270 */
- int rotated_offset;
- int rotated_size;
- int rotated_pitch;
- int virtualX, virtualY;
-
- unsigned int front_tiled;
- unsigned int back_tiled;
- unsigned int depth_tiled;
- unsigned int rotated_tiled;
- unsigned int rotated2_tiled;
-
- int planeA_x;
- int planeA_y;
- int planeA_w;
- int planeA_h;
- int planeB_x;
- int planeB_y;
- int planeB_w;
- int planeB_h;
-
- /* Triple buffering */
- drm_handle_t third_handle;
- int third_offset;
- int third_size;
- unsigned int third_tiled;
-
- /* buffer object handles for the static buffers. May change
- * over the lifetime of the client, though it doesn't in our current
- * implementation.
- */
- unsigned int front_bo_handle;
- unsigned int back_bo_handle;
- unsigned int third_bo_handle;
- unsigned int depth_bo_handle;
-} drmI830Sarea;
-
-/* Flags for perf_boxes
- */
-#define I830_BOX_RING_EMPTY 0x1 /* populated by kernel */
-#define I830_BOX_FLIP 0x2 /* populated by kernel */
-#define I830_BOX_WAIT 0x4 /* populated by kernel & client */
-#define I830_BOX_TEXTURE_LOAD 0x8 /* populated by kernel */
-#define I830_BOX_LOST_CONTEXT 0x10 /* populated by client */
-
-
-typedef struct {
- int start; /* agp offset */
- int used; /* nr bytes in use */
- int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */
- int DR4; /* window origin for GFX_OP_DRAWRECT_INFO*/
- int num_cliprects; /* mulitpass with multiple cliprects? */
- drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */
-} drmI830BatchBuffer;
-
-typedef struct {
- char *buf; /* agp offset */
- int sz; /* nr bytes in use */
- int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */
- int DR4; /* window origin for GFX_OP_DRAWRECT_INFO*/
- int num_cliprects; /* mulitpass with multiple cliprects? */
- drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */
-} drmI830CmdBuffer;
-
-typedef struct {
- int *irq_seq;
-} drmI830IrqEmit;
-
-typedef struct {
- int irq_seq;
-} drmI830IrqWait;
-
-typedef struct {
- int param;
- int *value;
-} drmI830GetParam;
-
-#define I830_PARAM_IRQ_ACTIVE 1
-#define I830_PARAM_ALLOW_BATCHBUFFER 2
-
-typedef struct {
- int param;
- int value;
-} drmI830SetParam;
-
-#define I830_SETPARAM_USE_MI_BATCHBUFFER_START 1
-#define I830_SETPARAM_TEX_LRU_LOG_GRANULARITY 2
-#define I830_SETPARAM_ALLOW_BATCHBUFFER 3
-
-
-/* A memory manager for regions of shared memory:
- */
-#define I830_MEM_REGION_AGP 1
-
-typedef struct {
- int region;
- int alignment;
- int size;
- int *region_offset; /* offset from start of fb or agp */
-} drmI830MemAlloc;
-
-typedef struct {
- int region;
- int region_offset;
-} drmI830MemFree;
-
-typedef struct {
- int region;
- int size;
- int start;
-} drmI830MemInitHeap;
-
-typedef struct {
- int region;
-} drmI830MemDestroyHeap;
-
-#define DRM_I830_VBLANK_PIPE_A 1
-#define DRM_I830_VBLANK_PIPE_B 2
-
-typedef struct {
- int pipe;
-} drmI830VBlankPipe;
-
-#define MMIO_READ 0
-#define MMIO_WRITE 1
-
-#define MMIO_REGS_IA_PRIMATIVES_COUNT 0
-#define MMIO_REGS_IA_VERTICES_COUNT 1
-#define MMIO_REGS_VS_INVOCATION_COUNT 2
-#define MMIO_REGS_GS_PRIMITIVES_COUNT 3
-#define MMIO_REGS_GS_INVOCATION_COUNT 4
-#define MMIO_REGS_CL_PRIMITIVES_COUNT 5
-#define MMIO_REGS_CL_INVOCATION_COUNT 6
-#define MMIO_REGS_PS_INVOCATION_COUNT 7
-#define MMIO_REGS_PS_DEPTH_COUNT 8
-
-typedef struct {
- unsigned int read_write:1;
- unsigned int reg:31;
- void __user *data;
-} drmI830MMIO;
-
-#endif /* _I830_DRM_H_ */
diff --git a/src/gallium/winsys/drm/intel/dri/server/i830_dri.h b/src/gallium/winsys/drm/intel/dri/server/i830_dri.h
deleted file mode 100644
index 0d514b6c38..0000000000
--- a/src/gallium/winsys/drm/intel/dri/server/i830_dri.h
+++ /dev/null
@@ -1,62 +0,0 @@
-
-#ifndef _I830_DRI_H
-#define _I830_DRI_H
-
-#include "xf86drm.h"
-#include "i830_common.h"
-
-#define I830_MAX_DRAWABLES 256
-
-#define I830_MAJOR_VERSION 1
-#define I830_MINOR_VERSION 7
-#define I830_PATCHLEVEL 2
-
-#define I830_REG_SIZE 0x80000
-
-typedef struct _I830DRIRec {
- drm_handle_t regs;
- drmSize regsSize;
-
- drmSize unused1; /* backbufferSize */
- drm_handle_t unused2; /* backbuffer */
-
- drmSize unused3; /* depthbufferSize */
- drm_handle_t unused4; /* depthbuffer */
-
- drmSize unused5; /* rotatedSize */
- drm_handle_t unused6; /* rotatedbuffer */
-
- drm_handle_t unused7; /* textures */
- int unused8; /* textureSize */
-
- drm_handle_t unused9; /* agp_buffers */
- drmSize unused10; /* agp_buf_size */
-
- int deviceID;
- int width;
- int height;
- int mem;
- int cpp;
- int bitsPerPixel;
-
- int unused11[8]; /* was front/back/depth/rotated offset/pitch */
-
- int unused12; /* logTextureGranularity */
- int unused13; /* textureOffset */
-
- int irq;
- int sarea_priv_offset;
-} I830DRIRec, *I830DRIPtr;
-
-typedef struct {
- /* Nothing here yet */
- int dummy;
-} I830ConfigPrivRec, *I830ConfigPrivPtr;
-
-typedef struct {
- /* Nothing here yet */
- int dummy;
-} I830DRIContextRec, *I830DRIContextPtr;
-
-
-#endif
diff --git a/src/gallium/winsys/drm/intel/egl/Makefile b/src/gallium/winsys/drm/intel/egl/Makefile
index f0b5a44389..c5217ad2d6 100644
--- a/src/gallium/winsys/drm/intel/egl/Makefile
+++ b/src/gallium/winsys/drm/intel/egl/Makefile
@@ -1,26 +1,26 @@
TOP = ../../../../../..
+GALLIUMDIR = ../../../..
include $(TOP)/configs/current
LIBNAME = EGL_i915.so
PIPE_DRIVERS = \
+ $(TOP)/src/gallium/state_trackers/egl/libegldrm.a \
+ $(GALLIUMDIR)/winsys/drm/intel/gem/libinteldrm.a \
$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
- $(TOP)/src/gallium/drivers/i915simple/libi915simple.a \
- ../common/libinteldrm.a
+ $(TOP)/src/gallium/drivers/i915simple/libi915simple.a
-DRIVER_SOURCES = \
- intel_swapbuffers.c \
- intel_context.c \
- intel_device.c \
- intel_egl.c
+DRIVER_SOURCES =
C_SOURCES = \
$(COMMON_GALLIUM_SOURCES) \
$(DRIVER_SOURCES)
+DRIVER_EXTRAS = -ldrm_intel
+
ASM_SOURCES =
-DRIVER_DEFINES = -I../common $(shell pkg-config libdrm --atleast-version=2.3.1 \
+DRIVER_DEFINES = -I../gem $(shell pkg-config libdrm --atleast-version=2.3.1 \
&& echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP")
include ../../Makefile.template
diff --git a/src/gallium/winsys/drm/intel/egl/SConscript b/src/gallium/winsys/drm/intel/egl/SConscript
deleted file mode 100644
index 0ad19d42a8..0000000000
--- a/src/gallium/winsys/drm/intel/egl/SConscript
+++ /dev/null
@@ -1,39 +0,0 @@
-Import('*')
-
-env = drienv.Clone()
-
-env.Append(CPPPATH = [
- '../intel',
- 'server'
-])
-
-#MINIGLX_SOURCES = server/intel_dri.c
-
-DRIVER_SOURCES = [
- 'intel_winsys_pipe.c',
- 'intel_winsys_softpipe.c',
- 'intel_winsys_i915.c',
- 'intel_batchbuffer.c',
- 'intel_swapbuffers.c',
- 'intel_context.c',
- 'intel_lock.c',
- 'intel_screen.c',
- 'intel_batchpool.c',
-]
-
-sources = \
- COMMON_GALLIUM_SOURCES + \
- COMMON_BM_SOURCES + \
- DRIVER_SOURCES
-
-drivers = [
- softpipe,
- i915simple
-]
-
-# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions
-env.SharedLibrary(
- target ='i915tex_dri.so',
- source = sources,
- LIBS = drivers + mesa + auxiliaries + env['LIBS'],
-) \ No newline at end of file
diff --git a/src/gallium/winsys/drm/intel/egl/intel_batchbuffer.h b/src/gallium/winsys/drm/intel/egl/intel_batchbuffer.h
deleted file mode 100644
index 3e95326168..0000000000
--- a/src/gallium/winsys/drm/intel/egl/intel_batchbuffer.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef INTEL_BATCHBUFFER_H
-#define INTEL_BATCHBUFFER_H
-
-#include "intel_be_batchbuffer.h"
-
-/*
- * Need to redefine the BATCH defines
- */
-
-#undef BEGIN_BATCH
-#define BEGIN_BATCH(dwords, relocs) \
- (i915_batchbuffer_check(&intel->base.batch->base, dwords, relocs))
-
-#undef OUT_BATCH
-#define OUT_BATCH(d) \
- i915_batchbuffer_dword(&intel->base.batch->base, d)
-
-#undef OUT_RELOC
-#define OUT_RELOC(buf,flags,mask,delta) do { \
- assert((delta) >= 0); \
- intel_be_offset_relocation(intel->base.batch, delta, buf, flags, mask); \
-} while (0)
-
-#endif
diff --git a/src/gallium/winsys/drm/intel/egl/intel_context.c b/src/gallium/winsys/drm/intel/egl/intel_context.c
deleted file mode 100644
index 927addb834..0000000000
--- a/src/gallium/winsys/drm/intel/egl/intel_context.c
+++ /dev/null
@@ -1,242 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "i915simple/i915_screen.h"
-
-#include "intel_device.h"
-#include "intel_context.h"
-#include "intel_batchbuffer.h"
-
-#include "state_tracker/st_public.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_context.h"
-#include "intel_egl.h"
-#include "utils.h"
-
-#ifdef DEBUG
-int __intel_debug = 0;
-#endif
-
-
-#define need_GL_ARB_multisample
-#define need_GL_ARB_point_parameters
-#define need_GL_ARB_texture_compression
-#define need_GL_ARB_vertex_buffer_object
-#define need_GL_ARB_vertex_program
-#define need_GL_ARB_window_pos
-#define need_GL_EXT_blend_color
-#define need_GL_EXT_blend_equation_separate
-#define need_GL_EXT_blend_func_separate
-#define need_GL_EXT_blend_minmax
-#define need_GL_EXT_cull_vertex
-#define need_GL_EXT_fog_coord
-#define need_GL_EXT_framebuffer_object
-#define need_GL_EXT_multi_draw_arrays
-#define need_GL_EXT_secondary_color
-#define need_GL_NV_vertex_program
-#include "extension_helper.h"
-
-
-/**
- * Extension strings exported by the intel driver.
- *
- * \note
- * It appears that ARB_texture_env_crossbar has "disappeared" compared to the
- * old i830-specific driver.
- */
-const struct dri_extension card_extensions[] = {
- {"GL_ARB_multisample", GL_ARB_multisample_functions},
- {"GL_ARB_multitexture", NULL},
- {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions},
- {"GL_ARB_texture_border_clamp", NULL},
- {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions},
- {"GL_ARB_texture_cube_map", NULL},
- {"GL_ARB_texture_env_add", NULL},
- {"GL_ARB_texture_env_combine", NULL},
- {"GL_ARB_texture_env_dot3", NULL},
- {"GL_ARB_texture_mirrored_repeat", NULL},
- {"GL_ARB_texture_rectangle", NULL},
- {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions},
- {"GL_ARB_pixel_buffer_object", NULL},
- {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions},
- {"GL_ARB_window_pos", GL_ARB_window_pos_functions},
- {"GL_EXT_blend_color", GL_EXT_blend_color_functions},
- {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions},
- {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions},
- {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions},
- {"GL_EXT_blend_subtract", NULL},
- {"GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions},
- {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions},
- {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions},
- {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions},
- {"GL_EXT_packed_depth_stencil", NULL},
- {"GL_EXT_pixel_buffer_object", NULL},
- {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
- {"GL_EXT_stencil_wrap", NULL},
- {"GL_EXT_texture_edge_clamp", NULL},
- {"GL_EXT_texture_env_combine", NULL},
- {"GL_EXT_texture_env_dot3", NULL},
- {"GL_EXT_texture_filter_anisotropic", NULL},
- {"GL_EXT_texture_lod_bias", NULL},
- {"GL_3DFX_texture_compression_FXT1", NULL},
- {"GL_APPLE_client_storage", NULL},
- {"GL_MESA_pack_invert", NULL},
- {"GL_MESA_ycbcr_texture", NULL},
- {"GL_NV_blend_square", NULL},
- {"GL_NV_vertex_program", GL_NV_vertex_program_functions},
- {"GL_NV_vertex_program1_1", NULL},
- {"GL_SGIS_generate_mipmap", NULL },
- {NULL, NULL}
-};
-
-
-/*
- * Hardware lock functions.
- * Doesn't do anything in EGL
- */
-
-static void
-intel_lock_hardware(struct intel_be_context *context)
-{
- (void)context;
-}
-
-static void
-intel_unlock_hardware(struct intel_be_context *context)
-{
- (void)context;
-}
-
-static boolean
-intel_locked_hardware(struct intel_be_context *context)
-{
- (void)context;
- return FALSE;
-}
-
-
-/*
- * Misc functions.
- */
-
-int
-intel_create_context(struct egl_drm_context *egl_context, const __GLcontextModes *visual, void *sharedContextPrivate)
-{
- struct intel_context *intel = CALLOC_STRUCT(intel_context);
- struct intel_device *device = (struct intel_device *)egl_context->device->priv;
- struct pipe_context *pipe;
- struct st_context *st_share = NULL;
-
- egl_context->priv = intel;
-
- intel->intel_device = device;
- intel->egl_context = egl_context;
- intel->egl_device = egl_context->device;
-
- intel->base.hardware_lock = intel_lock_hardware;
- intel->base.hardware_unlock = intel_unlock_hardware;
- intel->base.hardware_locked = intel_locked_hardware;
-
- intel_be_init_context(&intel->base, &device->base);
-
-#if 0
- pipe = intel_create_softpipe(intel, screen->winsys);
-#else
- pipe = i915_create_context(device->pipe, &device->base.base, &intel->base.base);
-#endif
-
- pipe->priv = intel;
-
- intel->st = st_create_context(pipe, visual, st_share);
-
- device->dummy = intel;
-
- return TRUE;
-}
-
-int
-intel_destroy_context(struct egl_drm_context *egl_context)
-{
- struct intel_context *intel = egl_context->priv;
-
- if (intel->intel_device->dummy == intel)
- intel->intel_device->dummy = NULL;
-
- st_destroy_context(intel->st);
- intel_be_destroy_context(&intel->base);
- free(intel);
- return TRUE;
-}
-
-void
-intel_make_current(struct egl_drm_context *context, struct egl_drm_drawable *draw, struct egl_drm_drawable *read)
-{
- if (context) {
- struct intel_context *intel = (struct intel_context *)context->priv;
- struct intel_framebuffer *draw_fb = (struct intel_framebuffer *)draw->priv;
- struct intel_framebuffer *read_fb = (struct intel_framebuffer *)read->priv;
-
- assert(draw_fb->stfb);
- assert(read_fb->stfb);
-
- st_make_current(intel->st, draw_fb->stfb, read_fb->stfb);
-
- intel->egl_drawable = draw;
-
- st_resize_framebuffer(draw_fb->stfb, draw->w, draw->h);
-
- if (draw != read)
- st_resize_framebuffer(read_fb->stfb, read->w, read->h);
-
- } else {
- st_make_current(NULL, NULL, NULL);
- }
-}
-
-void
-intel_bind_frontbuffer(struct egl_drm_drawable *draw, struct egl_drm_frontbuffer *front)
-{
- struct intel_device *device = (struct intel_device *)draw->device->priv;
- struct intel_framebuffer *draw_fb = (struct intel_framebuffer *)draw->priv;
-
- if (draw_fb->front_buffer)
- driBOUnReference(draw_fb->front_buffer);
-
- draw_fb->front_buffer = NULL;
- draw_fb->front = NULL;
-
- /* to unbind just call this function with front == NULL */
- if (!front)
- return;
-
- draw_fb->front = front;
-
- driGenBuffers(device->base.staticPool, "front", 1, &draw_fb->front_buffer, 0, 0, 0);
- driBOSetReferenced(draw_fb->front_buffer, front->handle);
-
- st_resize_framebuffer(draw_fb->stfb, draw->w, draw->h);
-}
diff --git a/src/gallium/winsys/drm/intel/egl/intel_context.h b/src/gallium/winsys/drm/intel/egl/intel_context.h
deleted file mode 100644
index 477fdec7f7..0000000000
--- a/src/gallium/winsys/drm/intel/egl/intel_context.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef INTEL_CONTEXT_H
-#define INTEL_CONTEXT_H
-
-#include "pipe/p_debug.h"
-#include "intel_be_context.h"
-
-
-struct st_context;
-struct egl_drm_device;
-struct egl_drm_context;
-struct egl_drm_frontbuffer;
-
-
-/**
- * Intel rendering context, contains a state tracker and intel-specific info.
- */
-struct intel_context
-{
- struct intel_be_context base;
-
- struct st_context *st;
-
- struct intel_device *intel_device;
-
- /* new egl stuff */
- struct egl_drm_device *egl_device;
- struct egl_drm_context *egl_context;
- struct egl_drm_drawable *egl_drawable;
-};
-
-
-
-/**
- * Intel framebuffer.
- */
-struct intel_framebuffer
-{
- struct st_framebuffer *stfb;
-
- struct intel_device *device;
- struct _DriBufferObject *front_buffer;
- struct egl_drm_frontbuffer *front;
-};
-
-
-
-
-/* These are functions now:
- */
-void LOCK_HARDWARE( struct intel_context *intel );
-void UNLOCK_HARDWARE( struct intel_context *intel );
-
-extern char *__progname;
-
-
-
-/* ================================================================
- * Debugging:
- */
-#ifdef DEBUG
-extern int __intel_debug;
-
-#define DEBUG_SWAP 0x1
-#define DEBUG_LOCK 0x2
-#define DEBUG_IOCTL 0x4
-#define DEBUG_BATCH 0x8
-
-#define DBG(flag, ...) do { \
- if (__intel_debug & (DEBUG_##flag)) \
- printf(__VA_ARGS__); \
-} while(0)
-
-#else
-#define DBG(flag, ...)
-#endif
-
-
-#define PCI_CHIP_845_G 0x2562
-#define PCI_CHIP_I830_M 0x3577
-#define PCI_CHIP_I855_GM 0x3582
-#define PCI_CHIP_I865_G 0x2572
-#define PCI_CHIP_I915_G 0x2582
-#define PCI_CHIP_I915_GM 0x2592
-#define PCI_CHIP_I945_G 0x2772
-#define PCI_CHIP_I945_GM 0x27A2
-#define PCI_CHIP_I945_GME 0x27AE
-#define PCI_CHIP_G33_G 0x29C2
-#define PCI_CHIP_Q35_G 0x29B2
-#define PCI_CHIP_Q33_G 0x29D2
-
-#endif
diff --git a/src/gallium/winsys/drm/intel/egl/intel_device.c b/src/gallium/winsys/drm/intel/egl/intel_device.c
deleted file mode 100644
index b9649cbec7..0000000000
--- a/src/gallium/winsys/drm/intel/egl/intel_device.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "utils.h"
-
-#include "state_tracker/st_public.h"
-#include "i915simple/i915_screen.h"
-
-#include "intel_context.h"
-#include "intel_device.h"
-#include "intel_batchbuffer.h"
-#include "intel_egl.h"
-
-
-extern const struct dri_extension card_extensions[];
-
-
-int
-intel_create_device(struct egl_drm_device *device)
-{
- struct intel_device *intel_device;
-
- /* Allocate the private area */
- intel_device = CALLOC_STRUCT(intel_device);
- if (!intel_device)
- return FALSE;
-
- device->priv = (void *)intel_device;
- intel_device->device = device;
-
- intel_device->deviceID = device->deviceID;
-
- intel_be_init_device(&intel_device->base, device->drmFD, intel_device->deviceID);
-
- intel_device->pipe = i915_create_screen(&intel_device->base.base, intel_device->deviceID);
-
- /* hack */
- driInitExtensions(NULL, card_extensions, GL_FALSE);
-
- return TRUE;
-}
-
-int
-intel_destroy_device(struct egl_drm_device *device)
-{
- struct intel_device *intel_device = (struct intel_device *)device->priv;
-
- intel_be_destroy_device(&intel_device->base);
-
- free(intel_device);
- device->priv = NULL;
-
- return TRUE;
-}
-
-int
-intel_create_drawable(struct egl_drm_drawable *drawable,
- const __GLcontextModes * visual)
-{
- enum pipe_format colorFormat, depthFormat, stencilFormat;
- struct intel_framebuffer *intelfb = CALLOC_STRUCT(intel_framebuffer);
-
- if (!intelfb)
- return GL_FALSE;
-
- intelfb->device = drawable->device->priv;
-
- if (visual->redBits == 5)
- colorFormat = PIPE_FORMAT_R5G6B5_UNORM;
- else
- colorFormat = PIPE_FORMAT_A8R8G8B8_UNORM;
-
- if (visual->depthBits == 16)
- depthFormat = PIPE_FORMAT_Z16_UNORM;
- else if (visual->depthBits == 24)
- depthFormat = PIPE_FORMAT_S8Z24_UNORM;
- else
- depthFormat = PIPE_FORMAT_NONE;
-
- if (visual->stencilBits == 8)
- stencilFormat = PIPE_FORMAT_S8Z24_UNORM;
- else
- stencilFormat = PIPE_FORMAT_NONE;
-
- intelfb->stfb = st_create_framebuffer(visual,
- colorFormat,
- depthFormat,
- stencilFormat,
- drawable->w,
- drawable->h,
- (void*) intelfb);
-
- if (!intelfb->stfb) {
- free(intelfb);
- return GL_FALSE;
- }
-
- drawable->priv = (void *) intelfb;
- return GL_TRUE;
-}
-
-int
-intel_destroy_drawable(struct egl_drm_drawable *drawable)
-{
- struct intel_framebuffer *intelfb = (struct intel_framebuffer *)drawable->priv;
- drawable->priv = NULL;
-
- assert(intelfb->stfb);
- st_unreference_framebuffer(&intelfb->stfb);
- free(intelfb);
- return TRUE;
-}
diff --git a/src/gallium/winsys/drm/intel/egl/intel_egl.c b/src/gallium/winsys/drm/intel/egl/intel_egl.c
deleted file mode 100644
index 3204ed3131..0000000000
--- a/src/gallium/winsys/drm/intel/egl/intel_egl.c
+++ /dev/null
@@ -1,796 +0,0 @@
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <stdint.h>
-
-#include "eglconfig.h"
-#include "eglcontext.h"
-#include "egldisplay.h"
-#include "egldriver.h"
-#include "eglglobals.h"
-#include "eglmode.h"
-#include "eglscreen.h"
-#include "eglsurface.h"
-#include "egllog.h"
-
-#include "intel_egl.h"
-
-#include "xf86drm.h"
-#include "xf86drmMode.h"
-
-#include "intel_context.h"
-
-#include "state_tracker/st_public.h"
-
-#define MAX_SCREENS 16
-
-static void
-drm_get_device_id(struct egl_drm_device *device)
-{
- char path[512];
- FILE *file;
-
- /* TODO get the real minor */
- int minor = 0;
-
- snprintf(path, sizeof(path), "/sys/class/drm/card%d/device/device", minor);
- file = fopen(path, "r");
- if (!file) {
- _eglLog(_EGL_WARNING, "Could not retrive device ID\n");
- return;
- }
-
- fgets(path, sizeof( path ), file);
- sscanf(path, "%x", &device->deviceID);
- fclose(file);
-}
-
-static struct egl_drm_device*
-egl_drm_create_device(int drmFD)
-{
- struct egl_drm_device *device = malloc(sizeof(*device));
- memset(device, 0, sizeof(*device));
- device->drmFD = drmFD;
-
- device->version = drmGetVersion(device->drmFD);
-
- drm_get_device_id(device);
-
- if (!intel_create_device(device)) {
- free(device);
- return NULL;
- }
-
- return device;
-}
-
-static void
-_egl_context_modes_destroy(__GLcontextModes *modes)
-{
- _eglLog(_EGL_DEBUG, "%s", __FUNCTION__);
-
- while (modes) {
- __GLcontextModes * const next = modes->next;
- free(modes);
- modes = next;
- }
-}
-/**
- * Create a linked list of 'count' GLcontextModes.
- * These are used during the client/server visual negotiation phase,
- * then discarded.
- */
-static __GLcontextModes *
-_egl_context_modes_create(unsigned count, size_t minimum_size)
-{
- /* This code copied from libGLX, and modified */
- const size_t size = (minimum_size > sizeof(__GLcontextModes))
- ? minimum_size : sizeof(__GLcontextModes);
- __GLcontextModes * head = NULL;
- __GLcontextModes ** next;
- unsigned i;
-
- _eglLog(_EGL_DEBUG, "%s %d %d", __FUNCTION__, count, minimum_size);
-
- next = & head;
- for (i = 0 ; i < count ; i++) {
- *next = (__GLcontextModes *) calloc(1, size);
- if (*next == NULL) {
- _egl_context_modes_destroy(head);
- head = NULL;
- break;
- }
-
- (*next)->doubleBufferMode = 1;
- (*next)->visualID = GLX_DONT_CARE;
- (*next)->visualType = GLX_DONT_CARE;
- (*next)->visualRating = GLX_NONE;
- (*next)->transparentPixel = GLX_NONE;
- (*next)->transparentRed = GLX_DONT_CARE;
- (*next)->transparentGreen = GLX_DONT_CARE;
- (*next)->transparentBlue = GLX_DONT_CARE;
- (*next)->transparentAlpha = GLX_DONT_CARE;
- (*next)->transparentIndex = GLX_DONT_CARE;
- (*next)->xRenderable = GLX_DONT_CARE;
- (*next)->fbconfigID = GLX_DONT_CARE;
- (*next)->swapMethod = GLX_SWAP_UNDEFINED_OML;
- (*next)->bindToTextureRgb = GLX_DONT_CARE;
- (*next)->bindToTextureRgba = GLX_DONT_CARE;
- (*next)->bindToMipmapTexture = GLX_DONT_CARE;
- (*next)->bindToTextureTargets = 0;
- (*next)->yInverted = GLX_DONT_CARE;
-
- next = & ((*next)->next);
- }
-
- return head;
-}
-
-struct drm_screen;
-
-struct drm_driver
-{
- _EGLDriver base; /* base class/object */
-
- drmModeResPtr res;
-
- struct drm_screen *screens[MAX_SCREENS];
- size_t count_screens;
-
- struct egl_drm_device *device;
-};
-
-struct drm_surface
-{
- _EGLSurface base; /* base class/object */
-
- struct egl_drm_drawable *drawable;
-};
-
-struct drm_context
-{
- _EGLContext base; /* base class/object */
-
- struct egl_drm_context *context;
-};
-
-struct drm_screen
-{
- _EGLScreen base;
-
- /* currently only support one connector */
- drmModeConnectorPtr connector;
-
- /* Has this screen been shown */
- int shown;
-
- /* Surface that is currently attached to this screen */
- struct drm_surface *surf;
-
- /* backing buffer */
- drmBO buffer;
-
- /* framebuffer */
- drmModeFBPtr fb;
- uint32_t fbID;
-
- /* crtc and mode used */
- drmModeCrtcPtr crtc;
- uint32_t crtcID;
-
- struct drm_mode_modeinfo *mode;
-
- /* geometry of the screen */
- struct egl_drm_frontbuffer front;
-};
-
-static void
-drm_update_res(struct drm_driver *drm_drv)
-{
- drmModeFreeResources(drm_drv->res);
- drm_drv->res = drmModeGetResources(drm_drv->device->drmFD);
-}
-
-static void
-drm_add_modes_from_connector(_EGLScreen *screen, drmModeConnectorPtr connector)
-{
- struct drm_mode_modeinfo *m;
- int i;
-
- for (i = 0; i < connector->count_modes; i++) {
- m = &connector->modes[i];
- _eglAddNewMode(screen, m->hdisplay, m->vdisplay, m->vrefresh, m->name);
- }
-}
-
-
-static EGLBoolean
-drm_initialize(_EGLDriver *drv, EGLDisplay dpy, EGLint *major, EGLint *minor)
-{
- _EGLDisplay *disp = _eglLookupDisplay(dpy);
- struct drm_driver *drm_drv = (struct drm_driver *)drv;
- struct drm_screen *screen = NULL;
- drmModeConnectorPtr connector = NULL;
- drmModeResPtr res = NULL;
- unsigned count_connectors = 0;
- int num_screens = 0;
-
- EGLint i;
- int fd;
-
- fd = drmOpen("i915", NULL);
- if (fd < 0) {
- return EGL_FALSE;
- }
-
- drm_drv->device = egl_drm_create_device(fd);
- if (!drm_drv->device) {
- drmClose(fd);
- return EGL_FALSE;
- }
-
- drm_update_res(drm_drv);
- res = drm_drv->res;
- if (res)
- count_connectors = res->count_connectors;
-
- for(i = 0; i < count_connectors && i < MAX_SCREENS; i++) {
- connector = drmModeGetConnector(fd, res->connectors[i]);
-
- if (!connector)
- continue;
-
- if (connector->connection != DRM_MODE_CONNECTED) {
- drmModeFreeConnector(connector);
- continue;
- }
-
- screen = malloc(sizeof(struct drm_screen));
- memset(screen, 0, sizeof(*screen));
- screen->connector = connector;
- _eglInitScreen(&screen->base);
- _eglAddScreen(disp, &screen->base);
- drm_add_modes_from_connector(&screen->base, connector);
- drm_drv->screens[num_screens++] = screen;
- }
- drm_drv->count_screens = num_screens;
-
- /* for now we only have one config */
- _EGLConfig *config = calloc(1, sizeof(*config));
- memset(config, 1, sizeof(*config));
- _eglInitConfig(config, 1);
- _eglSetConfigAttrib(config, EGL_RED_SIZE, 8);
- _eglSetConfigAttrib(config, EGL_GREEN_SIZE, 8);
- _eglSetConfigAttrib(config, EGL_BLUE_SIZE, 8);
- _eglSetConfigAttrib(config, EGL_ALPHA_SIZE, 8);
- _eglSetConfigAttrib(config, EGL_BUFFER_SIZE, 32);
- _eglSetConfigAttrib(config, EGL_DEPTH_SIZE, 24);
- _eglSetConfigAttrib(config, EGL_STENCIL_SIZE, 8);
- _eglSetConfigAttrib(config, EGL_SURFACE_TYPE, EGL_PBUFFER_BIT);
- _eglAddConfig(disp, config);
-
- drv->Initialized = EGL_TRUE;
-
- *major = 1;
- *minor = 4;
-
- return EGL_TRUE;
-}
-
-static void
-drm_takedown_shown_screen(_EGLDriver *drv, struct drm_screen *screen)
-{
- struct drm_driver *drm_drv = (struct drm_driver *)drv;
- unsigned int i;
-
- intel_bind_frontbuffer(screen->surf->drawable, NULL);
- screen->surf = NULL;
-
- for (i = 0; i < drm_drv->res->count_crtcs; i++) {
- drmModeSetCrtc(
- drm_drv->device->drmFD,
- drm_drv->res->crtcs[i],
- 0, // FD
- 0, 0,
- NULL, 0, // List of output ids
- NULL);
- }
-
- drmModeRmFB(drm_drv->device->drmFD, screen->fbID);
- drmModeFreeFB(screen->fb);
- screen->fb = NULL;
-
- drmBOUnreference(drm_drv->device->drmFD, &screen->buffer);
-
- screen->shown = 0;
-}
-
-static EGLBoolean
-drm_terminate(_EGLDriver *drv, EGLDisplay dpy)
-{
- struct drm_driver *drm_drv = (struct drm_driver *)drv;
- struct drm_screen *screen;
- int i = 0;
-
- intel_destroy_device(drm_drv->device);
- drmFreeVersion(drm_drv->device->version);
-
- for (i = 0; i < drm_drv->count_screens; i++) {
- screen = drm_drv->screens[i];
-
- if (screen->shown)
- drm_takedown_shown_screen(drv, screen);
-
- drmModeFreeConnector(screen->connector);
- _eglDestroyScreen(&screen->base);
- drm_drv->screens[i] = NULL;
- }
-
- drmClose(drm_drv->device->drmFD);
-
- free(drm_drv->device);
-
- _eglCleanupDisplay(_eglLookupDisplay(dpy));
- free(drm_drv);
-
- return EGL_TRUE;
-}
-
-
-static struct drm_context *
-lookup_drm_context(EGLContext context)
-{
- _EGLContext *c = _eglLookupContext(context);
- return (struct drm_context *) c;
-}
-
-
-static struct drm_surface *
-lookup_drm_surface(EGLSurface surface)
-{
- _EGLSurface *s = _eglLookupSurface(surface);
- return (struct drm_surface *) s;
-}
-
-static struct drm_screen *
-lookup_drm_screen(EGLDisplay dpy, EGLScreenMESA screen)
-{
- _EGLScreen *s = _eglLookupScreen(dpy, screen);
- return (struct drm_screen *) s;
-}
-
-static __GLcontextModes*
-visual_from_config(_EGLConfig *conf)
-{
- __GLcontextModes *visual;
- (void)conf;
-
- visual = _egl_context_modes_create(1, sizeof(*visual));
- visual->redBits = 8;
- visual->greenBits = 8;
- visual->blueBits = 8;
- visual->alphaBits = 8;
-
- visual->rgbBits = 32;
- visual->doubleBufferMode = 1;
-
- visual->depthBits = 24;
- visual->haveDepthBuffer = visual->depthBits > 0;
- visual->stencilBits = 8;
- visual->haveStencilBuffer = visual->stencilBits > 0;
-
- return visual;
-}
-
-
-
-static EGLContext
-drm_create_context(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, EGLContext share_list, const EGLint *attrib_list)
-{
- struct drm_driver *drm_drv = (struct drm_driver *)drv;
- struct drm_context *c;
- struct drm_egl_context *share = NULL;
- _EGLConfig *conf;
- int i;
- int ret;
- __GLcontextModes *visual;
- struct egl_drm_context *context;
-
- conf = _eglLookupConfig(drv, dpy, config);
- if (!conf) {
- _eglError(EGL_BAD_CONFIG, "eglCreateContext");
- return EGL_NO_CONTEXT;
- }
-
- for (i = 0; attrib_list && attrib_list[i] != EGL_NONE; i++) {
- switch (attrib_list[i]) {
- /* no attribs defined for now */
- default:
- _eglError(EGL_BAD_ATTRIBUTE, "eglCreateContext");
- return EGL_NO_CONTEXT;
- }
- }
-
- c = (struct drm_context *) calloc(1, sizeof(struct drm_context));
- if (!c)
- return EGL_NO_CONTEXT;
-
- _eglInitContext(drv, dpy, &c->base, config, attrib_list);
-
- context = malloc(sizeof(*context));
- memset(context, 0, sizeof(*context));
-
- if (!context)
- goto err_c;
-
- context->device = drm_drv->device;
- visual = visual_from_config(conf);
-
- ret = intel_create_context(context, visual, share);
- free(visual);
-
- if (!ret)
- goto err_gl;
-
- c->context = context;
-
- /* generate handle and insert into hash table */
- _eglSaveContext(&c->base);
- assert(_eglGetContextHandle(&c->base));
-
- return _eglGetContextHandle(&c->base);
-err_gl:
- free(context);
-err_c:
- free(c);
- return EGL_NO_CONTEXT;
-}
-
-static EGLBoolean
-drm_destroy_context(_EGLDriver *drv, EGLDisplay dpy, EGLContext context)
-{
- struct drm_context *fc = lookup_drm_context(context);
- _eglRemoveContext(&fc->base);
- if (fc->base.IsBound) {
- fc->base.DeletePending = EGL_TRUE;
- } else {
- intel_destroy_context(fc->context);
- free(fc->context);
- free(fc);
- }
- return EGL_TRUE;
-}
-
-
-static EGLSurface
-drm_create_window_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, NativeWindowType window, const EGLint *attrib_list)
-{
- return EGL_NO_SURFACE;
-}
-
-
-static EGLSurface
-drm_create_pixmap_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, NativePixmapType pixmap, const EGLint *attrib_list)
-{
- return EGL_NO_SURFACE;
-}
-
-
-static EGLSurface
-drm_create_pbuffer_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config,
- const EGLint *attrib_list)
-{
- struct drm_driver *drm_drv = (struct drm_driver *)drv;
- int i;
- int ret;
- int width = -1;
- int height = -1;
- struct drm_surface *surf = NULL;
- struct egl_drm_drawable *drawable = NULL;
- __GLcontextModes *visual;
- _EGLConfig *conf;
-
- conf = _eglLookupConfig(drv, dpy, config);
- if (!conf) {
- _eglError(EGL_BAD_CONFIG, "eglCreatePbufferSurface");
- return EGL_NO_CONTEXT;
- }
-
- for (i = 0; attrib_list && attrib_list[i] != EGL_NONE; i++) {
- switch (attrib_list[i]) {
- case EGL_WIDTH:
- width = attrib_list[++i];
- break;
- case EGL_HEIGHT:
- height = attrib_list[++i];
- break;
- default:
- _eglError(EGL_BAD_ATTRIBUTE, "eglCreatePbufferSurface");
- return EGL_NO_SURFACE;
- }
- }
-
- if (width < 1 || height < 1) {
- _eglError(EGL_BAD_ATTRIBUTE, "eglCreatePbufferSurface");
- return EGL_NO_SURFACE;
- }
-
- surf = (struct drm_surface *) calloc(1, sizeof(struct drm_surface));
- if (!surf)
- goto err;
-
- if (!_eglInitSurface(drv, dpy, &surf->base, EGL_PBUFFER_BIT, config, attrib_list))
- goto err_surf;
-
- drawable = malloc(sizeof(*drawable));
- memset(drawable, 0, sizeof(*drawable));
-
- drawable->w = width;
- drawable->h = height;
-
- visual = visual_from_config(conf);
-
- drawable->device = drm_drv->device;
- ret = intel_create_drawable(drawable, visual);
- free(visual);
-
- if (!ret)
- goto err_draw;
-
- surf->drawable = drawable;
-
- _eglSaveSurface(&surf->base);
- return surf->base.Handle;
-
-err_draw:
- free(drawable);
-err_surf:
- free(surf);
-err:
- return EGL_NO_SURFACE;
-}
-
-static EGLSurface
-drm_create_screen_surface_mesa(_EGLDriver *drv, EGLDisplay dpy, EGLConfig cfg,
- const EGLint *attrib_list)
-{
- EGLSurface surf = drm_create_pbuffer_surface(drv, dpy, cfg, attrib_list);
-
- return surf;
-}
-
-static struct drm_mode_modeinfo *
-drm_find_mode(drmModeConnectorPtr connector, _EGLMode *mode)
-{
- int i;
- struct drm_mode_modeinfo *m;
-
- for (i = 0; i < connector->count_modes; i++) {
- m = &connector->modes[i];
- if (m->hdisplay == mode->Width && m->vdisplay == mode->Height && m->vrefresh == mode->RefreshRate)
- break;
- m = &connector->modes[0]; /* if we can't find one, return first */
- }
-
- return m;
-}
-static void
-draw(size_t x, size_t y, size_t w, size_t h, size_t pitch, size_t v, unsigned int *ptr)
-{
- int i, j;
-
- for (i = x; i < x + w; i++)
- for(j = y; j < y + h; j++)
- ptr[(i * pitch / 4) + j] = v;
-
-}
-
-static void
-prettyColors(int fd, unsigned int handle, size_t pitch)
-{
- drmBO bo;
- unsigned int *ptr;
- void *p;
- int i;
-
- drmBOReference(fd, handle, &bo);
- drmBOMap(fd, &bo, DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0, &p);
- ptr = (unsigned int*)p;
-
- for (i = 0; i < (bo.size / 4); i++)
- ptr[i] = 0xFFFFFFFF;
-
- for (i = 0; i < 4; i++)
- draw(i * 40, i * 40, 40, 40, pitch, 0, ptr);
-
-
- draw(200, 100, 40, 40, pitch, 0xff00ff, ptr);
- draw(100, 200, 40, 40, pitch, 0xff00ff, ptr);
-
- drmBOUnmap(fd, &bo);
-}
-
-static EGLBoolean
-drm_show_screen_surface_mesa(_EGLDriver *drv, EGLDisplay dpy,
- EGLScreenMESA screen,
- EGLSurface surface, EGLModeMESA m)
-{
- struct drm_driver *drm_drv = (struct drm_driver *)drv;
- struct drm_surface *surf = lookup_drm_surface(surface);
- struct drm_screen *scrn = lookup_drm_screen(dpy, screen);
- _EGLMode *mode = _eglLookupMode(dpy, m);
- size_t pitch = mode->Width * 4;
- size_t size = mode->Height * pitch;
- int ret;
- unsigned int i,j,k;
-
- if (scrn->shown)
- drm_takedown_shown_screen(drv, scrn);
-
- ret = drmBOCreate(drm_drv->device->drmFD, size, 0, 0,
- DRM_BO_FLAG_READ |
- DRM_BO_FLAG_WRITE |
- DRM_BO_FLAG_MEM_TT |
- DRM_BO_FLAG_MEM_VRAM |
- DRM_BO_FLAG_NO_EVICT,
- DRM_BO_HINT_DONT_FENCE, &scrn->buffer);
-
- if (ret)
- return EGL_FALSE;
-
- prettyColors(drm_drv->device->drmFD, scrn->buffer.handle, pitch);
-
- ret = drmModeAddFB(drm_drv->device->drmFD, mode->Width, mode->Height,
- 32, 32, pitch,
- scrn->buffer.handle,
- &scrn->fbID);
-
- if (ret)
- goto err_bo;
-
- scrn->fb = drmModeGetFB(drm_drv->device->drmFD, scrn->fbID);
- if (!scrn->fb)
- goto err_bo;
-
- for (j = 0; j < drm_drv->res->count_connectors; j++) {
- drmModeConnector *con = drmModeGetConnector(drm_drv->device->drmFD, drm_drv->res->connectors[j]);
- scrn->mode = drm_find_mode(con, mode);
- if (!scrn->mode)
- goto err_fb;
-
- for (k = 0; k < con->count_encoders; k++) {
- drmModeEncoder *enc = drmModeGetEncoder(drm_drv->device->drmFD, con->encoders[k]);
- for (i = 0; i < drm_drv->res->count_crtcs; i++) {
- if (enc->possible_crtcs & (1<<i)) {
- ret = drmModeSetCrtc(
- drm_drv->device->drmFD,
- drm_drv->res->crtcs[i],
- scrn->fbID,
- 0, 0,
- &drm_drv->res->connectors[j], 1,
- scrn->mode);
- /* skip the other crtcs now */
- i = drm_drv->res->count_crtcs;
- }
- }
- }
- }
-
- scrn->front.handle = scrn->buffer.handle;
- scrn->front.pitch = pitch;
- scrn->front.width = mode->Width;
- scrn->front.height = mode->Height;
-
- scrn->surf = surf;
- intel_bind_frontbuffer(surf->drawable, &scrn->front);
-
- scrn->shown = 1;
-
- return EGL_TRUE;
-
-err_fb:
- /* TODO remove fb */
-
-err_bo:
- drmBOUnreference(drm_drv->device->drmFD, &scrn->buffer);
- return EGL_FALSE;
-}
-
-static EGLBoolean
-drm_destroy_surface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface)
-{
- struct drm_surface *fs = lookup_drm_surface(surface);
- _eglRemoveSurface(&fs->base);
- if (fs->base.IsBound) {
- fs->base.DeletePending = EGL_TRUE;
- } else {
- intel_bind_frontbuffer(fs->drawable, NULL);
- intel_destroy_drawable(fs->drawable);
- free(fs->drawable);
- free(fs);
- }
- return EGL_TRUE;
-}
-
-
-static EGLBoolean
-drm_make_current(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext context)
-{
- struct drm_surface *readSurf = lookup_drm_surface(read);
- struct drm_surface *drawSurf = lookup_drm_surface(draw);
- struct drm_context *ctx = lookup_drm_context(context);
- EGLBoolean b;
-
- b = _eglMakeCurrent(drv, dpy, draw, read, context);
- if (!b)
- return EGL_FALSE;
-
- if (ctx) {
- if (!drawSurf || !readSurf)
- return EGL_FALSE;
-
- intel_make_current(ctx->context, drawSurf->drawable, readSurf->drawable);
- } else {
- intel_make_current(NULL, NULL, NULL);
- }
-
- return EGL_TRUE;
-}
-
-static EGLBoolean
-drm_swap_buffers(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw)
-{
- struct drm_surface *surf = lookup_drm_surface(draw);
- if (!surf)
- return EGL_FALSE;
-
- /* error checking */
- if (!_eglSwapBuffers(drv, dpy, draw))
- return EGL_FALSE;
-
- intel_swap_buffers(surf->drawable);
- return EGL_TRUE;
-}
-
-
-/**
- * The bootstrap function. Return a new drm_driver object and
- * plug in API functions.
- */
-_EGLDriver *
-_eglMain(_EGLDisplay *dpy, const char *args)
-{
- struct drm_driver *drm;
-
- drm = (struct drm_driver *) calloc(1, sizeof(struct drm_driver));
- if (!drm) {
- return NULL;
- }
-
- /* First fill in the dispatch table with defaults */
- _eglInitDriverFallbacks(&drm->base);
- /* then plug in our Drm-specific functions */
- drm->base.API.Initialize = drm_initialize;
- drm->base.API.Terminate = drm_terminate;
- drm->base.API.CreateContext = drm_create_context;
- drm->base.API.MakeCurrent = drm_make_current;
- drm->base.API.CreateWindowSurface = drm_create_window_surface;
- drm->base.API.CreatePixmapSurface = drm_create_pixmap_surface;
- drm->base.API.CreatePbufferSurface = drm_create_pbuffer_surface;
- drm->base.API.DestroySurface = drm_destroy_surface;
- drm->base.API.DestroyContext = drm_destroy_context;
- drm->base.API.CreateScreenSurfaceMESA = drm_create_screen_surface_mesa;
- drm->base.API.ShowScreenSurfaceMESA = drm_show_screen_surface_mesa;
- drm->base.API.SwapBuffers = drm_swap_buffers;
-
- drm->base.ClientAPIsMask = EGL_OPENGL_BIT /*| EGL_OPENGL_ES_BIT*/;
- drm->base.Name = "DRM/Gallium";
-
- /* enable supported extensions */
- drm->base.Extensions.MESA_screen_surface = EGL_TRUE;
- drm->base.Extensions.MESA_copy_context = EGL_TRUE;
-
- return &drm->base;
-}
diff --git a/src/gallium/winsys/drm/intel/egl/intel_egl.h b/src/gallium/winsys/drm/intel/egl/intel_egl.h
deleted file mode 100644
index 1ee27e0847..0000000000
--- a/src/gallium/winsys/drm/intel/egl/intel_egl.h
+++ /dev/null
@@ -1,53 +0,0 @@
-
-#ifndef _INTEL_EGL_H_
-#define _INTEL_EGL_H_
-
-#include <xf86drm.h>
-
-struct egl_drm_device
-{
- void *priv;
- int drmFD;
-
- drmVersionPtr version;
- int deviceID;
-};
-
-struct egl_drm_context
-{
- void *priv;
- struct egl_drm_device *device;
-};
-
-struct egl_drm_drawable
-{
- void *priv;
- struct egl_drm_device *device;
- size_t h;
- size_t w;
-};
-
-struct egl_drm_frontbuffer
-{
- uint32_t handle;
- uint32_t pitch;
- uint32_t width;
- uint32_t height;
-};
-
-#include "GL/internal/glcore.h"
-
-int intel_create_device(struct egl_drm_device *device);
-int intel_destroy_device(struct egl_drm_device *device);
-
-int intel_create_context(struct egl_drm_context *context, const __GLcontextModes *visual, void *sharedContextPrivate);
-int intel_destroy_context(struct egl_drm_context *context);
-
-int intel_create_drawable(struct egl_drm_drawable *drawable, const __GLcontextModes * visual);
-int intel_destroy_drawable(struct egl_drm_drawable *drawable);
-
-void intel_make_current(struct egl_drm_context *context, struct egl_drm_drawable *draw, struct egl_drm_drawable *read);
-void intel_swap_buffers(struct egl_drm_drawable *draw);
-void intel_bind_frontbuffer(struct egl_drm_drawable *draw, struct egl_drm_frontbuffer *front);
-
-#endif
diff --git a/src/gallium/winsys/drm/intel/egl/intel_swapbuffers.c b/src/gallium/winsys/drm/intel/egl/intel_swapbuffers.c
deleted file mode 100644
index 2edcbc79ff..0000000000
--- a/src/gallium/winsys/drm/intel/egl/intel_swapbuffers.c
+++ /dev/null
@@ -1,111 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "intel_device.h"
-#include "intel_context.h"
-#include "intel_batchbuffer.h"
-#include "intel_reg.h"
-
-#include "pipe/p_context.h"
-#include "state_tracker/st_public.h"
-#include "state_tracker/st_context.h"
-#include "state_tracker/st_cb_fbo.h"
-#include "intel_egl.h"
-
-
-static void
-intel_display_surface(struct egl_drm_drawable *draw,
- struct pipe_surface *surf);
-
-void intel_swap_buffers(struct egl_drm_drawable *draw)
-{
- struct intel_framebuffer *intel_fb = (struct intel_framebuffer *)draw->priv;
- struct pipe_surface *back_surf;
-
- assert(intel_fb);
- assert(intel_fb->stfb);
-
- back_surf = st_get_framebuffer_surface(intel_fb->stfb, ST_SURFACE_BACK_LEFT);
- if (back_surf) {
- st_notify_swapbuffers(intel_fb->stfb);
- if (intel_fb->front)
- intel_display_surface(draw, back_surf);
- st_notify_swapbuffers_complete(intel_fb->stfb);
- }
-}
-
-static void
-intel_display_surface(struct egl_drm_drawable *draw,
- struct pipe_surface *surf)
-{
- struct intel_context *intel = NULL;
- struct intel_framebuffer *intel_fb = (struct intel_framebuffer *)draw->priv;
- struct _DriFenceObject *fence;
-
- //const int srcWidth = surf->width;
- //const int srcHeight = surf->height;
-
- intel = intel_fb->device->dummy;
- if (!intel) {
- printf("No dummy context\n");
- return;
- }
-
- const int dstWidth = intel_fb->front->width;
- const int dstHeight = intel_fb->front->height;
- const int dstPitch = intel_fb->front->pitch / 4;//draw->front.cpp;
-
- const int cpp = 4;//intel_fb->front->cpp;
- const int srcPitch = surf->stride / cpp;
-
- int BR13, CMD;
- //int i;
-
- BR13 = (dstPitch * cpp) | (0xCC << 16) | (1 << 24) | (1 << 25);
- CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
- XY_SRC_COPY_BLT_WRITE_RGB);
-
- BEGIN_BATCH(8, 2);
- OUT_BATCH(CMD);
- OUT_BATCH(BR13);
- OUT_BATCH((0 << 16) | 0);
- OUT_BATCH((dstHeight << 16) | dstWidth);
-
- OUT_RELOC(intel_fb->front_buffer,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
- DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE, 0);
-
- OUT_BATCH((0 << 16) | 0);
- OUT_BATCH((srcPitch * cpp) & 0xffff);
- OUT_RELOC(dri_bo(surf->buffer),
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- DRM_BO_MASK_MEM | DRM_BO_FLAG_READ, 0);
-
- fence = intel_be_batchbuffer_flush(intel->base.batch);
- driFenceUnReference(&fence);
- intel_be_batchbuffer_finish(intel->base.batch);
-}
diff --git a/src/gallium/winsys/drm/intel/gem/Makefile b/src/gallium/winsys/drm/intel/gem/Makefile
new file mode 100644
index 0000000000..7ab1a2a771
--- /dev/null
+++ b/src/gallium/winsys/drm/intel/gem/Makefile
@@ -0,0 +1,16 @@
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = inteldrm
+
+C_SOURCES = \
+ intel_be_batchbuffer.c \
+ intel_be_context.c \
+ intel_be_device.c \
+ intel_be_api.c
+
+LIBRARY_INCLUDES = $(shell pkg-config libdrm --cflags-only-I)
+
+LIBRARY_DEFINES = $(shell pkg-config libdrm --cflags-only-other)
+
+include ../../../../Makefile.template
diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_api.c b/src/gallium/winsys/drm/intel/gem/intel_be_api.c
new file mode 100644
index 0000000000..6cffed5134
--- /dev/null
+++ b/src/gallium/winsys/drm/intel/gem/intel_be_api.c
@@ -0,0 +1,12 @@
+
+#include "intel_be_api.h"
+
+struct drm_api drm_api_hocks =
+{
+ /* intel_be_context.c */
+ .create_context = intel_be_create_context,
+ /* intel_be_screen.c */
+ .create_screen = intel_be_create_screen,
+ .buffer_from_handle = intel_be_buffer_from_handle,
+ .handle_from_buffer = intel_be_handle_from_buffer,
+};
diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_api.h b/src/gallium/winsys/drm/intel/gem/intel_be_api.h
new file mode 100644
index 0000000000..73e458d4ba
--- /dev/null
+++ b/src/gallium/winsys/drm/intel/gem/intel_be_api.h
@@ -0,0 +1,14 @@
+
+#ifndef _INTEL_BE_API_H_
+#define _INTEL_BE_API_H_
+
+#include "pipe/p_compiler.h"
+
+#include "state_tracker/drm_api.h"
+
+#include "intel_be_device.h"
+
+struct pipe_screen *intel_be_create_screen(int drmFD, int pciID);
+struct pipe_context *intel_be_create_context(struct pipe_screen *screen);
+
+#endif
diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_batchbuffer.c b/src/gallium/winsys/drm/intel/gem/intel_be_batchbuffer.c
new file mode 100644
index 0000000000..d9556e1f38
--- /dev/null
+++ b/src/gallium/winsys/drm/intel/gem/intel_be_batchbuffer.c
@@ -0,0 +1,140 @@
+
+#include "i915simple/i915_debug.h"
+#include "intel_be_batchbuffer.h"
+#include "intel_be_context.h"
+#include "intel_be_device.h"
+#include "intel_be_fence.h"
+#include <errno.h>
+
+#include "util/u_memory.h"
+
+struct intel_be_batchbuffer *
+intel_be_batchbuffer_alloc(struct intel_be_context *intel)
+{
+ struct intel_be_batchbuffer *batch = CALLOC_STRUCT(intel_be_batchbuffer);
+
+
+ batch->base.buffer = NULL;
+ batch->base.winsys = &intel->base;
+ batch->base.map = NULL;
+ batch->base.ptr = NULL;
+ batch->base.size = 0;
+ batch->base.actual_size = intel->device->max_batch_size;
+ batch->base.relocs = 0;
+ batch->base.max_relocs = INTEL_DEFAULT_RELOCS;
+
+ batch->base.map = malloc(batch->base.actual_size);
+ memset(batch->base.map, 0, batch->base.actual_size);
+
+ batch->base.ptr = batch->base.map;
+
+ intel_be_batchbuffer_reset(batch);
+
+ return batch;
+}
+
+void
+intel_be_batchbuffer_reset(struct intel_be_batchbuffer *batch)
+{
+ struct intel_be_context *intel = intel_be_context(batch->base.winsys);
+ struct intel_be_device *dev = intel->device;
+
+ if (batch->bo)
+ drm_intel_bo_unreference(batch->bo);
+
+ memset(batch->base.map, 0, batch->base.actual_size);
+ batch->base.ptr = batch->base.map;
+ batch->base.size = batch->base.actual_size - BATCH_RESERVED;
+
+ batch->base.relocs = 0;
+ batch->base.max_relocs = INTEL_DEFAULT_RELOCS;
+
+ batch->bo = drm_intel_bo_alloc(dev->pools.gem,
+ "gallium3d_batch_buffer",
+ batch->base.actual_size, 0);
+}
+
+int
+intel_be_offset_relocation(struct intel_be_batchbuffer *batch,
+ unsigned pre_add,
+ drm_intel_bo *bo,
+ uint32_t read_domains,
+ uint32_t write_domain)
+{
+ unsigned offset;
+ int ret = 0;
+
+ assert(batch->base.relocs < batch->base.max_relocs);
+
+ offset = (unsigned)(batch->base.ptr - batch->base.map);
+
+ ret = drm_intel_bo_emit_reloc(batch->bo, offset,
+ bo, pre_add,
+ read_domains,
+ write_domain);
+
+ ((uint32_t*)batch->base.ptr)[0] = bo->offset + pre_add;
+ batch->base.ptr += 4;
+
+ if (!ret)
+ batch->base.relocs++;
+
+ return ret;
+}
+
+void
+intel_be_batchbuffer_flush(struct intel_be_batchbuffer *batch,
+ struct intel_be_fence **fence)
+{
+ struct i915_batchbuffer *i915 = &batch->base;
+ unsigned used = 0;
+ int ret = 0;
+
+ assert(i915_batchbuffer_space(i915) >= 0);
+
+ used = batch->base.ptr - batch->base.map;
+ assert((used & 3) == 0);
+
+ if (used & 4) {
+ i915_batchbuffer_dword(i915, (0x0<<29)|(0x4<<23)|(1<<0)); // MI_FLUSH | FLUSH_MAP_CACHE;
+ i915_batchbuffer_dword(i915, (0x0<<29)|(0x0<<23)); // MI_NOOP
+ i915_batchbuffer_dword(i915, (0x0<<29)|(0xA<<23)); // MI_BATCH_BUFFER_END;
+ } else {
+ i915_batchbuffer_dword(i915, (0x0<<29)|(0x4<<23)|(1<<0)); //MI_FLUSH | FLUSH_MAP_CACHE;
+ i915_batchbuffer_dword(i915, (0x0<<29)|(0xA<<23)); // MI_BATCH_BUFFER_END;
+ }
+
+ used = batch->base.ptr - batch->base.map;
+
+ drm_intel_bo_subdata(batch->bo, 0, used, batch->base.map);
+ ret = drm_intel_bo_exec(batch->bo, used, NULL, 0, 0);
+
+ assert(ret == 0);
+
+ intel_be_batchbuffer_reset(batch);
+
+ if (fence) {
+ if (*fence)
+ intel_be_fence_unreference(*fence);
+
+ (*fence) = CALLOC_STRUCT(intel_be_fence);
+ (*fence)->refcount = 1;
+ (*fence)->bo = NULL;
+ }
+}
+
+void
+intel_be_batchbuffer_finish(struct intel_be_batchbuffer *batch)
+{
+
+}
+
+void
+intel_be_batchbuffer_free(struct intel_be_batchbuffer *batch)
+{
+ if (batch->bo)
+ drm_intel_bo_unreference(batch->bo);
+
+ free(batch->base.map);
+ free(batch);
+}
diff --git a/src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.h b/src/gallium/winsys/drm/intel/gem/intel_be_batchbuffer.h
index f150e3a674..195bf8dee7 100644
--- a/src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.h
+++ b/src/gallium/winsys/drm/intel/gem/intel_be_batchbuffer.h
@@ -4,7 +4,8 @@
#include "i915simple/i915_batch.h"
-#include "ws_dri_bufmgr.h"
+#include "drm.h"
+#include "intel_bufmgr.h"
#define BATCH_RESERVED 16
@@ -16,6 +17,7 @@
struct intel_be_context;
struct intel_be_device;
+struct intel_be_fence;
struct intel_be_batchbuffer
{
@@ -24,24 +26,7 @@ struct intel_be_batchbuffer
struct intel_be_context *intel;
struct intel_be_device *device;
- struct _DriBufferObject *buffer;
- struct _DriFenceObject *last_fence;
- uint32_t flags;
-
- struct _DriBufferList *list;
- size_t list_count;
-
- uint32_t *reloc;
- size_t reloc_size;
- size_t nr_relocs;
-
- uint32_t dirty_state;
- uint32_t id;
-
- uint32_t poolOffset;
- uint8_t *drmBOVirtual;
- struct _drmBONode *node; /* Validation list node for this buffer */
- int dest_location; /* Validation list sequence for this buffer */
+ drm_intel_bo *bo;
};
struct intel_be_batchbuffer *
@@ -53,17 +38,18 @@ intel_be_batchbuffer_free(struct intel_be_batchbuffer *batch);
void
intel_be_batchbuffer_finish(struct intel_be_batchbuffer *batch);
-struct _DriFenceObject *
-intel_be_batchbuffer_flush(struct intel_be_batchbuffer *batch);
+void
+intel_be_batchbuffer_flush(struct intel_be_batchbuffer *batch,
+ struct intel_be_fence **fence);
void
intel_be_batchbuffer_reset(struct intel_be_batchbuffer *batch);
-void
+int
intel_be_offset_relocation(struct intel_be_batchbuffer *batch,
- unsigned pre_add,
- struct _DriBufferObject *driBO,
- uint64_t val_flags,
- uint64_t val_mask);
+ unsigned pre_add,
+ drm_intel_bo *bo,
+ uint32_t read_domains,
+ uint32_t write_doman);
#endif
diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_context.c b/src/gallium/winsys/drm/intel/gem/intel_be_context.c
new file mode 100644
index 0000000000..bb6f1b916c
--- /dev/null
+++ b/src/gallium/winsys/drm/intel/gem/intel_be_context.c
@@ -0,0 +1,118 @@
+
+#include "pipe/p_screen.h"
+
+#include "intel_be_device.h"
+#include "intel_be_context.h"
+#include "intel_be_batchbuffer.h"
+
+#include "i915_drm.h"
+
+#include "intel_be_api.h"
+
+static struct i915_batchbuffer *
+intel_be_batch_get(struct i915_winsys *sws)
+{
+ struct intel_be_context *intel = intel_be_context(sws);
+ return &intel->batch->base;
+}
+
+static void
+intel_be_batch_reloc(struct i915_winsys *sws,
+ struct pipe_buffer *buf,
+ unsigned access_flags,
+ unsigned delta)
+{
+ struct intel_be_context *intel = intel_be_context(sws);
+ drm_intel_bo *bo = intel_bo(buf);
+ int ret;
+ uint32_t read = 0;
+ uint32_t write = 0;
+
+ if (access_flags & I915_BUFFER_ACCESS_WRITE) {
+ write = I915_GEM_DOMAIN_RENDER;
+ read = I915_GEM_DOMAIN_RENDER;
+ }
+
+ if (access_flags & I915_BUFFER_ACCESS_READ) {
+ read |= I915_GEM_DOMAIN_VERTEX;
+ }
+
+ ret = intel_be_offset_relocation(intel->batch,
+ delta,
+ bo,
+ read,
+ write);
+ assert(ret == 0);
+
+ /* TODO change return type */
+ /* return ret; */
+}
+
+static void
+intel_be_batch_flush(struct i915_winsys *sws,
+ struct pipe_fence_handle **fence)
+{
+ struct intel_be_context *intel = intel_be_context(sws);
+ struct intel_be_fence **f = (struct intel_be_fence **)fence;
+
+ if (fence && *fence)
+ assert(0);
+
+ intel_be_batchbuffer_flush(intel->batch, f);
+}
+
+
+/*
+ * Misc functions.
+ */
+
+static void
+intel_be_destroy_context(struct i915_winsys *winsys)
+{
+ struct intel_be_context *intel = intel_be_context(winsys);
+
+ intel_be_batchbuffer_free(intel->batch);
+
+ free(intel);
+}
+
+boolean
+intel_be_init_context(struct intel_be_context *intel, struct intel_be_device *device)
+{
+ assert(intel);
+ assert(device);
+ intel->device = device;
+
+ intel->base.batch_get = intel_be_batch_get;
+ intel->base.batch_reloc = intel_be_batch_reloc;
+ intel->base.batch_flush = intel_be_batch_flush;
+
+ intel->base.destroy = intel_be_destroy_context;
+
+ intel->batch = intel_be_batchbuffer_alloc(intel);
+
+ return true;
+}
+
+struct pipe_context *
+intel_be_create_context(struct pipe_screen *screen)
+{
+ struct intel_be_context *intel;
+ struct pipe_context *pipe;
+ struct intel_be_device *device = intel_be_device(screen->winsys);
+
+ intel = (struct intel_be_context *)malloc(sizeof(*intel));
+ memset(intel, 0, sizeof(*intel));
+
+ intel_be_init_context(intel, device);
+
+#if 0
+ pipe = intel_create_softpipe(intel, screen->winsys);
+#else
+ pipe = i915_create_context(screen, &device->base, &intel->base);
+#endif
+
+ pipe->priv = intel;
+
+ return pipe;
+}
diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_context.h b/src/gallium/winsys/drm/intel/gem/intel_be_context.h
new file mode 100644
index 0000000000..5a369669c0
--- /dev/null
+++ b/src/gallium/winsys/drm/intel/gem/intel_be_context.h
@@ -0,0 +1,31 @@
+
+#ifndef INTEL_BE_CONTEXT_H
+#define INTEL_BE_CONTEXT_H
+
+#include "i915simple/i915_winsys.h"
+
+struct intel_be_context
+{
+ /** Interface to i915simple driver */
+ struct i915_winsys base;
+
+ struct intel_be_device *device;
+ struct intel_be_batchbuffer *batch;
+};
+
+static INLINE struct intel_be_context *
+intel_be_context(struct i915_winsys *sws)
+{
+ return (struct intel_be_context *)sws;
+}
+
+/**
+ * Intialize a allocated intel_be_context struct.
+ *
+ * Remember to set the hardware_* functions.
+ */
+boolean
+intel_be_init_context(struct intel_be_context *intel,
+ struct intel_be_device *device);
+
+#endif
diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_device.c b/src/gallium/winsys/drm/intel/gem/intel_be_device.c
new file mode 100644
index 0000000000..a2163a1e6d
--- /dev/null
+++ b/src/gallium/winsys/drm/intel/gem/intel_be_device.c
@@ -0,0 +1,283 @@
+
+#include "intel_be_device.h"
+
+#include "pipe/internal/p_winsys_screen.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_inlines.h"
+#include "util/u_memory.h"
+
+#include "intel_be_fence.h"
+
+#include "i915simple/i915_screen.h"
+
+#include "intel_be_api.h"
+
+/*
+ * Buffer
+ */
+
+static void *
+intel_be_buffer_map(struct pipe_winsys *winsys,
+ struct pipe_buffer *buf,
+ unsigned flags)
+{
+ drm_intel_bo *bo = intel_bo(buf);
+ int write = 0;
+ int ret;
+
+ if (flags & PIPE_BUFFER_USAGE_CPU_WRITE)
+ write = 1;
+
+ ret = drm_intel_bo_map(bo, write);
+
+ if (ret)
+ return NULL;
+
+ return bo->virtual;
+}
+
+static void
+intel_be_buffer_unmap(struct pipe_winsys *winsys,
+ struct pipe_buffer *buf)
+{
+ drm_intel_bo_unmap(intel_bo(buf));
+}
+
+static void
+intel_be_buffer_destroy(struct pipe_winsys *winsys,
+ struct pipe_buffer *buf)
+{
+ drm_intel_bo_unreference(intel_bo(buf));
+ free(buf);
+}
+
+static struct pipe_buffer *
+intel_be_buffer_create(struct pipe_winsys *winsys,
+ unsigned alignment,
+ unsigned usage,
+ unsigned size)
+{
+ struct intel_be_buffer *buffer = CALLOC_STRUCT(intel_be_buffer);
+ struct intel_be_device *dev = intel_be_device(winsys);
+ drm_intel_bufmgr *pool;
+ char *name;
+
+ if (!buffer)
+ return NULL;
+
+ buffer->base.refcount = 1;
+ buffer->base.alignment = alignment;
+ buffer->base.usage = usage;
+ buffer->base.size = size;
+
+ if (usage & (PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_CONSTANT)) {
+ /* Local buffer */
+ name = "gallium3d_local";
+ pool = dev->pools.gem;
+ } else if (usage & PIPE_BUFFER_USAGE_CUSTOM) {
+ /* For vertex buffers */
+ name = "gallium3d_internal_vertex";
+ pool = dev->pools.gem;
+ } else {
+ /* Regular buffers */
+ name = "gallium3d_regular";
+ pool = dev->pools.gem;
+ }
+
+ buffer->bo = drm_intel_bo_alloc(pool, name, size, alignment);
+
+ if (!buffer->bo)
+ goto err;
+
+ return &buffer->base;
+
+err:
+ free(buffer);
+ return NULL;
+}
+
+static struct pipe_buffer *
+intel_be_user_buffer_create(struct pipe_winsys *winsys, void *ptr, unsigned bytes)
+{
+ struct intel_be_buffer *buffer = CALLOC_STRUCT(intel_be_buffer);
+ struct intel_be_device *dev = intel_be_device(winsys);
+ int ret;
+
+ if (!buffer)
+ return NULL;
+
+ buffer->base.refcount = 1;
+ buffer->base.alignment = 0;
+ buffer->base.usage = 0;
+ buffer->base.size = bytes;
+
+ buffer->bo = drm_intel_bo_alloc(dev->pools.gem,
+ "gallium3d_user_buffer",
+ bytes, 0);
+
+ if (!buffer->bo)
+ goto err;
+
+ ret = drm_intel_bo_subdata(buffer->bo,
+ 0, bytes, ptr);
+
+ if (ret)
+ goto err;
+
+ return &buffer->base;
+
+err:
+ free(buffer);
+ return NULL;
+}
+
+struct pipe_buffer *
+intel_be_buffer_from_handle(struct pipe_winsys *winsys,
+ const char* name, unsigned handle)
+{
+ struct intel_be_device *dev = intel_be_device(winsys);
+ struct intel_be_buffer *buffer = CALLOC_STRUCT(intel_be_buffer);
+
+ if (!buffer)
+ return NULL;
+
+ buffer->bo = drm_intel_bo_gem_create_from_name(dev->pools.gem, name, handle);
+
+ if (!buffer->bo)
+ goto err;
+
+ buffer->base.refcount = 1;
+ buffer->base.alignment = buffer->bo->align;
+ buffer->base.usage = PIPE_BUFFER_USAGE_GPU_READ |
+ PIPE_BUFFER_USAGE_GPU_WRITE |
+ PIPE_BUFFER_USAGE_CPU_READ |
+ PIPE_BUFFER_USAGE_CPU_WRITE;
+ buffer->base.size = buffer->bo->size;
+
+ return &buffer->base;
+
+err:
+ free(buffer);
+ return NULL;
+}
+
+unsigned
+intel_be_handle_from_buffer(struct pipe_winsys *winsys,
+ struct pipe_buffer *buf)
+{
+ drm_intel_bo *bo = intel_bo(buf);
+ return bo->handle;
+}
+
+/*
+ * Fence
+ */
+
+static void
+intel_be_fence_refunref(struct pipe_winsys *sws,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+ struct intel_be_fence **p = (struct intel_be_fence **)ptr;
+ struct intel_be_fence *f = (struct intel_be_fence *)fence;
+
+ assert(p);
+
+ if (f)
+ intel_be_fence_reference(f);
+
+ if (*p)
+ intel_be_fence_unreference(*p);
+
+ *p = f;
+}
+
+static int
+intel_be_fence_signalled(struct pipe_winsys *sws,
+ struct pipe_fence_handle *fence,
+ unsigned flag)
+{
+ assert(0);
+
+ return 0;
+}
+
+static int
+intel_be_fence_finish(struct pipe_winsys *sws,
+ struct pipe_fence_handle *fence,
+ unsigned flag)
+{
+ struct intel_be_fence *f = (struct intel_be_fence *)fence;
+
+ /* fence already expired */
+ if (!f->bo)
+ return 0;
+
+ drm_intel_bo_wait_rendering(f->bo);
+ drm_intel_bo_unreference(f->bo);
+ f->bo = NULL;
+
+ return 0;
+}
+
+/*
+ * Misc functions
+ */
+
+static void
+intel_be_destroy_winsys(struct pipe_winsys *winsys)
+{
+ struct intel_be_device *dev = intel_be_device(winsys);
+
+ drm_intel_bufmgr_destroy(dev->pools.gem);
+
+ free(dev);
+}
+
+boolean
+intel_be_init_device(struct intel_be_device *dev, int fd, unsigned id)
+{
+ dev->fd = fd;
+ dev->id = id;
+ dev->max_batch_size = 16 * 4096;
+ dev->max_vertex_size = 128 * 4096;
+
+ dev->base.buffer_create = intel_be_buffer_create;
+ dev->base.user_buffer_create = intel_be_user_buffer_create;
+ dev->base.buffer_map = intel_be_buffer_map;
+ dev->base.buffer_unmap = intel_be_buffer_unmap;
+ dev->base.buffer_destroy = intel_be_buffer_destroy;
+
+ /* Not used anymore */
+ dev->base.surface_buffer_create = NULL;
+
+ dev->base.fence_reference = intel_be_fence_refunref;
+ dev->base.fence_signalled = intel_be_fence_signalled;
+ dev->base.fence_finish = intel_be_fence_finish;
+
+ dev->base.destroy = intel_be_destroy_winsys;
+
+ dev->pools.gem = drm_intel_bufmgr_gem_init(dev->fd, dev->max_batch_size);
+
+ return true;
+}
+
+struct pipe_screen *
+intel_be_create_screen(int drmFD, int deviceID)
+{
+ struct intel_be_device *dev;
+ struct pipe_screen *screen;
+
+ /* Allocate the private area */
+ dev = malloc(sizeof(*dev));
+ if (!dev)
+ return NULL;
+ memset(dev, 0, sizeof(*dev));
+
+ intel_be_init_device(dev, drmFD, deviceID);
+
+ screen = i915_create_screen(&dev->base, deviceID);
+
+ return screen;
+}
diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_device.h b/src/gallium/winsys/drm/intel/gem/intel_be_device.h
new file mode 100644
index 0000000000..c4837e65fa
--- /dev/null
+++ b/src/gallium/winsys/drm/intel/gem/intel_be_device.h
@@ -0,0 +1,79 @@
+
+#ifndef INTEL_DRM_DEVICE_H
+#define INTEL_DRM_DEVICE_H
+
+#include "pipe/internal/p_winsys_screen.h"
+#include "pipe/p_context.h"
+
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+/*
+ * Device
+ */
+
+struct intel_be_device
+{
+ struct pipe_winsys base;
+
+ int fd; /**< Drm file discriptor */
+
+ unsigned id;
+
+ size_t max_batch_size;
+ size_t max_vertex_size;
+
+ struct {
+ drm_intel_bufmgr *gem;
+ } pools;
+};
+
+static INLINE struct intel_be_device *
+intel_be_device(struct pipe_winsys *winsys)
+{
+ return (struct intel_be_device *)winsys;
+}
+
+boolean
+intel_be_init_device(struct intel_be_device *device, int fd, unsigned id);
+
+/*
+ * Buffer
+ */
+
+struct intel_be_buffer {
+ struct pipe_buffer base;
+ drm_intel_bo *bo;
+};
+
+/**
+ * Create a be buffer from a drm bo handle.
+ *
+ * Takes a reference.
+ */
+struct pipe_buffer *
+intel_be_buffer_from_handle(struct pipe_winsys *winsys,
+ const char* name, unsigned handle);
+
+/**
+ * Gets a handle from a buffer.
+ *
+ * If buffer is destroyed handle may become invalid.
+ */
+unsigned
+intel_be_handle_from_buffer(struct pipe_winsys *winsys,
+ struct pipe_buffer *buffer);
+
+static INLINE struct intel_be_buffer *
+intel_be_buffer(struct pipe_buffer *buf)
+{
+ return (struct intel_be_buffer *)buf;
+}
+
+static INLINE drm_intel_bo *
+intel_bo(struct pipe_buffer *buf)
+{
+ return intel_be_buffer(buf)->bo;
+}
+
+#endif
diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_fence.h b/src/gallium/winsys/drm/intel/gem/intel_be_fence.h
new file mode 100644
index 0000000000..0fe18f66f8
--- /dev/null
+++ b/src/gallium/winsys/drm/intel/gem/intel_be_fence.h
@@ -0,0 +1,38 @@
+
+#ifndef INTEL_BE_FENCE_H
+#define INTEL_BE_FENCE_H
+
+#include "pipe/p_defines.h"
+
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+/**
+ * Because gem does not have fence's we have to create our own fences.
+ *
+ * They work by keeping the batchbuffer around and checking if that has
+ * been idled. If bo is NULL fence has expired.
+ */
+struct intel_be_fence
+{
+ uint32_t refcount;
+ drm_intel_bo *bo;
+};
+
+static INLINE void
+intel_be_fence_reference(struct intel_be_fence *f)
+{
+ f->refcount++;
+}
+
+static INLINE void
+intel_be_fence_unreference(struct intel_be_fence *f)
+{
+ if (!--f->refcount) {
+ if (f->bo)
+ drm_intel_bo_unreference(f->bo);
+ free(f);
+ }
+}
+
+#endif
diff --git a/src/gallium/winsys/drm/nouveau/Makefile b/src/gallium/winsys/drm/nouveau/Makefile
new file mode 100644
index 0000000000..652cf7146c
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/Makefile
@@ -0,0 +1,12 @@
+# src/gallium/winsys/drm/nouveau/Makefile
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+SUBDIRS = common dri
+
+default install clean:
+ @for dir in $(SUBDIRS) ; do \
+ if [ -d $$dir ] ; then \
+ (cd $$dir && $(MAKE) $@) || exit 1; \
+ fi \
+ done
diff --git a/src/gallium/winsys/drm/nouveau/common/Makefile b/src/gallium/winsys/drm/nouveau/common/Makefile
new file mode 100644
index 0000000000..f675f7caf1
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/common/Makefile
@@ -0,0 +1,22 @@
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nouveaudrm
+
+C_SOURCES = \
+ nouveau_context.c \
+ nouveau_lock.c \
+ nouveau_screen.c \
+ nouveau_winsys.c \
+ nouveau_winsys_pipe.c \
+ nouveau_winsys_softpipe.c
+
+LIBRARY_INCLUDES = $(shell pkg-config libdrm libdrm_nouveau --cflags-only-I)
+
+LIBRARY_DEFINES = $(shell pkg-config libdrm --cflags-only-other \
+ && pkg-config libdrm --atleast-version=2.3.1 \
+ && pkg-config libdrm_nouveau --exact-version=0.5 \
+ && pkg-config libdrm_nouveau --cflags-only-other \
+ && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP")
+
+include ../../../../Makefile.template
diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_context.c b/src/gallium/winsys/drm/nouveau/common/nouveau_context.c
new file mode 100644
index 0000000000..d6ae0827cd
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/common/nouveau_context.c
@@ -0,0 +1,206 @@
+#include <pipe/p_defines.h>
+#include <pipe/p_context.h>
+#include <pipe/p_screen.h>
+#include <util/u_memory.h>
+#include "nouveau_context.h"
+#include "nouveau_dri.h"
+#include "nouveau_local.h"
+#include "nouveau_screen.h"
+#include "nouveau_winsys_pipe.h"
+
+static void
+nouveau_channel_context_destroy(struct nouveau_channel_context *nvc)
+{
+ nouveau_channel_free(&nvc->channel);
+
+ FREE(nvc);
+}
+
+static struct nouveau_channel_context *
+nouveau_channel_context_create(struct nouveau_device *dev)
+{
+ struct nouveau_channel_context *nvc;
+ int ret;
+
+ nvc = CALLOC_STRUCT(nouveau_channel_context);
+ if (!nvc)
+ return NULL;
+
+ if ((ret = nouveau_channel_alloc(dev, 0x8003d001, 0x8003d002,
+ &nvc->channel))) {
+ NOUVEAU_ERR("Error creating GPU channel: %d\n", ret);
+ nouveau_channel_context_destroy(nvc);
+ return NULL;
+ }
+
+ nvc->next_handle = 0x77000000;
+ return nvc;
+}
+
+int
+nouveau_context_init(struct nouveau_screen *nv_screen,
+ drm_context_t hHWContext, drmLock *sarea_lock,
+ struct nouveau_context *nv_share,
+ struct nouveau_context *nv)
+{
+ struct pipe_context *pipe = NULL;
+ struct nouveau_channel_context *nvc = NULL;
+ struct nouveau_device *dev = nv_screen->device;
+ int i;
+
+ switch (dev->chipset & 0xf0) {
+ case 0x00:
+ /* NV04 */
+ case 0x10:
+ case 0x20:
+ /* NV10 */
+ case 0x30:
+ /* NV30 */
+ case 0x40:
+ case 0x60:
+ /* NV40 */
+ case 0x50:
+ case 0x80:
+ case 0x90:
+ /* G80 */
+ break;
+ default:
+ NOUVEAU_ERR("Unsupported chipset: NV%02x\n", dev->chipset);
+ return 1;
+ }
+
+ nv->nv_screen = nv_screen;
+
+ {
+ struct nouveau_device_priv *nvdev = nouveau_device(dev);
+
+ nvdev->ctx = hHWContext;
+ nvdev->lock = sarea_lock;
+ }
+
+ /* Attempt to share a single channel between multiple contexts from
+ * a single process.
+ */
+ nvc = nv_screen->nvc;
+ if (!nvc && nv_share)
+ nvc = nv_share->nvc;
+
+ /*XXX: temporary - disable multi-context/single-channel on pre-NV4x */
+ switch (dev->chipset & 0xf0) {
+ case 0x40:
+ case 0x60:
+ /* NV40 class */
+ case 0x50:
+ case 0x80:
+ case 0x90:
+ /* G80 class */
+ break;
+ default:
+ nvc = NULL;
+ break;
+ }
+
+ if (!nvc) {
+ nvc = nouveau_channel_context_create(dev);
+ if (!nvc) {
+ NOUVEAU_ERR("Failed initialising GPU context\n");
+ return 1;
+ }
+ nv_screen->nvc = nvc;
+ }
+
+ nvc->refcount++;
+ nv->nvc = nvc;
+
+ /* Find a free slot for a pipe context, allocate a new one if needed */
+ nv->pctx_id = -1;
+ for (i = 0; i < nvc->nr_pctx; i++) {
+ if (nvc->pctx[i] == NULL) {
+ nv->pctx_id = i;
+ break;
+ }
+ }
+
+ if (nv->pctx_id < 0) {
+ nv->pctx_id = nvc->nr_pctx++;
+ nvc->pctx =
+ realloc(nvc->pctx,
+ sizeof(struct pipe_context *) * nvc->nr_pctx);
+ }
+
+ /* Create pipe */
+ if (!getenv("NOUVEAU_FORCE_SOFTPIPE")) {
+ struct pipe_screen *pscreen;
+
+ pipe = nouveau_pipe_create(nv);
+ if (!pipe)
+ NOUVEAU_ERR("Couldn't create hw pipe\n");
+ pscreen = nvc->pscreen;
+
+ nv->cap.hw_vertex_buffer =
+ pscreen->get_param(pscreen, NOUVEAU_CAP_HW_VTXBUF);
+ nv->cap.hw_index_buffer =
+ pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF);
+ }
+
+ if (!pipe) {
+ NOUVEAU_MSG("Using softpipe\n");
+ pipe = nouveau_create_softpipe(nv);
+ if (!pipe) {
+ NOUVEAU_ERR("Error creating pipe, bailing\n");
+ return 1;
+ }
+ }
+
+ {
+ struct pipe_texture *fb_tex;
+ struct pipe_surface *fb_surf;
+ struct nouveau_pipe_buffer *fb_buf;
+ enum pipe_format format;
+
+ fb_buf = calloc(1, sizeof(struct nouveau_pipe_buffer));
+ fb_buf->base.refcount = 1;
+ fb_buf->base.usage = PIPE_BUFFER_USAGE_PIXEL;
+
+ nouveau_bo_fake(dev, nv_screen->front_offset, NOUVEAU_BO_VRAM,
+ nv_screen->front_pitch*nv_screen->front_height,
+ NULL, &fb_buf->bo);
+
+ if (nv_screen->front_cpp == 4)
+ format = PIPE_FORMAT_A8R8G8B8_UNORM;
+ else
+ format = PIPE_FORMAT_R5G6B5_UNORM;
+
+ fb_surf = nouveau_surface_buffer_ref(nv, &fb_buf->base, format,
+ nv_screen->front_pitch /
+ nv_screen->front_cpp,
+ nv_screen->front_height,
+ nv_screen->front_pitch,
+ &fb_tex);
+
+ nv->frontbuffer = fb_surf;
+ nv->frontbuffer_texture = fb_tex;
+ }
+
+ pipe->priv = nv;
+ return 0;
+}
+
+void
+nouveau_context_cleanup(struct nouveau_context *nv)
+{
+ struct nouveau_channel_context *nvc = nv->nvc;
+
+ assert(nv);
+
+ if (nv->pctx_id >= 0) {
+ nvc->pctx[nv->pctx_id] = NULL;
+ if (--nvc->refcount <= 0) {
+ nouveau_channel_context_destroy(nvc);
+ nv->nv_screen->nvc = NULL;
+ }
+ }
+
+ /* XXX: Who cleans up the pipe? */
+}
+
diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_context.h b/src/gallium/winsys/drm/nouveau/common/nouveau_context.h
new file mode 100644
index 0000000000..02d2745680
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/common/nouveau_context.h
@@ -0,0 +1,59 @@
+#ifndef __NOUVEAU_CONTEXT_H__
+#define __NOUVEAU_CONTEXT_H__
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau_drmif.h"
+#include "nouveau_device.h"
+#include "nouveau_channel.h"
+#include "nouveau_pushbuf.h"
+#include "nouveau_bo.h"
+#include "nouveau_grobj.h"
+#include "nouveau_notifier.h"
+#include "nouveau_class.h"
+#include "nouveau_local.h"
+
+struct nouveau_channel_context {
+ struct pipe_screen *pscreen;
+ int refcount;
+
+ unsigned cur_pctx;
+ unsigned nr_pctx;
+ struct pipe_context **pctx;
+
+ struct nouveau_channel *channel;
+ unsigned next_handle;
+};
+
+struct nouveau_context {
+ int locked;
+ struct nouveau_screen *nv_screen;
+ struct pipe_surface *frontbuffer;
+ struct pipe_texture *frontbuffer_texture;
+
+ struct {
+ int hw_vertex_buffer;
+ int hw_index_buffer;
+ } cap;
+
+ /* Hardware context */
+ struct nouveau_channel_context *nvc;
+ int pctx_id;
+};
+
+extern int nouveau_context_init(struct nouveau_screen *nv_screen,
+ drm_context_t hHWContext, drmLock *sarea_lock,
+ struct nouveau_context *nv_share,
+ struct nouveau_context *nv);
+extern void nouveau_context_cleanup(struct nouveau_context *nv);
+
+extern void LOCK_HARDWARE(struct nouveau_context *);
+extern void UNLOCK_HARDWARE(struct nouveau_context *);
+
+extern uint32_t *nouveau_pipe_dma_beginp(struct nouveau_grobj *, int, int);
+extern void nouveau_pipe_dma_kickoff(struct nouveau_channel *);
+
+/* Must be provided by clients of common code */
+extern void
+nouveau_contended_lock(struct nouveau_context *nv);
+
+#endif
diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_dri.h b/src/gallium/winsys/drm/nouveau/common/nouveau_dri.h
new file mode 100644
index 0000000000..1207c2d609
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/common/nouveau_dri.h
@@ -0,0 +1,28 @@
+#ifndef _NOUVEAU_DRI_
+#define _NOUVEAU_DRI_
+
+#include "xf86drm.h"
+#include "drm.h"
+#include "nouveau_drm.h"
+
+struct nouveau_dri {
+ uint32_t device_id; /**< \brief PCI device ID */
+ uint32_t width; /**< \brief width in pixels of display */
+ uint32_t height; /**< \brief height in scanlines of display */
+ uint32_t depth; /**< \brief depth of display (8, 15, 16, 24) */
+ uint32_t bpp; /**< \brief bit depth of display (8, 16, 24, 32) */
+
+ uint32_t bus_type; /**< \brief ths bus type */
+ uint32_t bus_mode; /**< \brief bus mode (used for AGP, maybe also for PCI-E ?) */
+
+ uint32_t front_offset; /**< \brief front buffer offset */
+ uint32_t front_pitch; /**< \brief front buffer pitch */
+ uint32_t back_offset; /**< \brief private back buffer offset */
+ uint32_t back_pitch; /**< \brief private back buffer pitch */
+ uint32_t depth_offset; /**< \brief private depth buffer offset */
+ uint32_t depth_pitch; /**< \brief private depth buffer pitch */
+
+};
+
+#endif
+
diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_local.h b/src/gallium/winsys/drm/nouveau/common/nouveau_local.h
new file mode 100644
index 0000000000..11175bce7a
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/common/nouveau_local.h
@@ -0,0 +1,19 @@
+#ifndef __NOUVEAU_LOCAL_H__
+#define __NOUVEAU_LOCAL_H__
+
+#include "pipe/p_compiler.h"
+#include "nouveau_winsys_pipe.h"
+#include <stdio.h>
+
+/* Debug output */
+#define NOUVEAU_MSG(fmt, args...) do { \
+ fprintf(stdout, "nouveau: "fmt, ##args); \
+ fflush(stdout); \
+} while(0)
+
+#define NOUVEAU_ERR(fmt, args...) do { \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); \
+ fflush(stderr); \
+} while(0)
+
+#endif
diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_lock.c b/src/gallium/winsys/drm/nouveau/common/nouveau_lock.c
new file mode 100644
index 0000000000..e8cf051ed9
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/common/nouveau_lock.c
@@ -0,0 +1,72 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <pipe/p_thread.h>
+#include "nouveau_context.h"
+#include "nouveau_screen.h"
+
+pipe_static_mutex(lockMutex);
+
+/* Lock the hardware and validate our state.
+ */
+void
+LOCK_HARDWARE(struct nouveau_context *nv)
+{
+ struct nouveau_screen *nv_screen = nv->nv_screen;
+ struct nouveau_device *dev = nv_screen->device;
+ struct nouveau_device_priv *nvdev = nouveau_device(dev);
+ char __ret=0;
+
+ assert(!nv->locked);
+ pipe_mutex_lock(lockMutex);
+
+ DRM_CAS(nvdev->lock, nvdev->ctx,
+ (DRM_LOCK_HELD | nvdev->ctx), __ret);
+
+ if (__ret) {
+ drmGetLock(nvdev->fd, nvdev->ctx, 0);
+ nouveau_contended_lock(nv);
+ }
+ nv->locked = 1;
+}
+
+/* Unlock the hardware using the global current context
+ */
+void
+UNLOCK_HARDWARE(struct nouveau_context *nv)
+{
+ struct nouveau_screen *nv_screen = nv->nv_screen;
+ struct nouveau_device *dev = nv_screen->device;
+ struct nouveau_device_priv *nvdev = nouveau_device(dev);
+
+ assert(nv->locked);
+ nv->locked = 0;
+
+ DRM_UNLOCK(nvdev->fd, nvdev->lock, nvdev->ctx);
+
+ pipe_mutex_unlock(lockMutex);
+}
diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_screen.c b/src/gallium/winsys/drm/nouveau/common/nouveau_screen.c
new file mode 100644
index 0000000000..422fbf0207
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/common/nouveau_screen.c
@@ -0,0 +1,31 @@
+#include <util/u_memory.h>
+#include "nouveau_dri.h"
+#include "nouveau_local.h"
+#include "nouveau_screen.h"
+
+int
+nouveau_screen_init(struct nouveau_dri *nv_dri, int dev_fd,
+ struct nouveau_screen *nv_screen)
+{
+ int ret;
+
+ ret = nouveau_device_open_existing(&nv_screen->device, 0,
+ dev_fd, 0);
+ if (ret) {
+ NOUVEAU_ERR("Failed opening nouveau device: %d\n", ret);
+ return 1;
+ }
+
+ nv_screen->front_offset = nv_dri->front_offset;
+ nv_screen->front_pitch = nv_dri->front_pitch * (nv_dri->bpp / 8);
+ nv_screen->front_cpp = nv_dri->bpp / 8;
+ nv_screen->front_height = nv_dri->height;
+
+ return 0;
+}
+
+void
+nouveau_screen_cleanup(struct nouveau_screen *nv_screen)
+{
+ nouveau_device_close(&nv_screen->device);
+}
diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_screen.h b/src/gallium/winsys/drm/nouveau/common/nouveau_screen.h
new file mode 100644
index 0000000000..3e68e219d8
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/common/nouveau_screen.h
@@ -0,0 +1,27 @@
+#ifndef __NOUVEAU_SCREEN_H__
+#define __NOUVEAU_SCREEN_H__
+
+#include <stdint.h>
+
+struct nouveau_device;
+struct nouveau_dri;
+
+struct nouveau_screen {
+ struct nouveau_device *device;
+
+ uint32_t front_offset;
+ uint32_t front_pitch;
+ uint32_t front_cpp;
+ uint32_t front_height;
+
+ void *nvc;
+};
+
+int
+nouveau_screen_init(struct nouveau_dri *nv_dri, int dev_fd,
+ struct nouveau_screen *nv_screen);
+
+void
+nouveau_screen_cleanup(struct nouveau_screen *nv_screen);
+
+#endif
diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_winsys.c b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys.c
new file mode 100644
index 0000000000..b6199f8e6d
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys.c
@@ -0,0 +1,141 @@
+#include "util/u_memory.h"
+
+#include "nouveau_context.h"
+#include "nouveau_screen.h"
+#include "nouveau_winsys_pipe.h"
+
+#include "nouveau/nouveau_winsys.h"
+
+static int
+nouveau_pipe_notifier_alloc(struct nouveau_winsys *nvws, int count,
+ struct nouveau_notifier **notify)
+{
+ struct nouveau_context *nv = nvws->nv;
+
+ return nouveau_notifier_alloc(nv->nvc->channel, nv->nvc->next_handle++,
+ count, notify);
+}
+
+static int
+nouveau_pipe_grobj_alloc(struct nouveau_winsys *nvws, int grclass,
+ struct nouveau_grobj **grobj)
+{
+ struct nouveau_context *nv = nvws->nv;
+ struct nouveau_channel *chan = nv->nvc->channel;
+ int ret;
+
+ ret = nouveau_grobj_alloc(chan, nv->nvc->next_handle++,
+ grclass, grobj);
+ if (ret)
+ return ret;
+
+ BEGIN_RING(chan, *grobj, 0x0000, 1);
+ OUT_RING (chan, (*grobj)->handle);
+ (*grobj)->bound = NOUVEAU_GROBJ_BOUND_EXPLICIT;
+ return 0;
+}
+
+static int
+nouveau_pipe_push_reloc(struct nouveau_winsys *nvws, void *ptr,
+ struct pipe_buffer *buf, uint32_t data,
+ uint32_t flags, uint32_t vor, uint32_t tor)
+{
+ struct nouveau_bo *bo = nouveau_pipe_buffer(buf)->bo;
+
+ return nouveau_pushbuf_emit_reloc(nvws->channel, ptr, bo,
+ data, flags, vor, tor);
+}
+
+static int
+nouveau_pipe_push_flush(struct nouveau_winsys *nvws, unsigned size,
+ struct pipe_fence_handle **fence)
+{
+ if (fence)
+ *fence = NULL;
+
+ return nouveau_pushbuf_flush(nvws->channel, size);
+}
+
+static struct nouveau_bo *
+nouveau_pipe_get_bo(struct pipe_buffer *pb)
+{
+ return nouveau_pipe_buffer(pb)->bo;
+}
+
+struct pipe_context *
+nouveau_pipe_create(struct nouveau_context *nv)
+{
+ struct nouveau_channel_context *nvc = nv->nvc;
+ struct nouveau_winsys *nvws = CALLOC_STRUCT(nouveau_winsys);
+ struct pipe_screen *(*hws_create)(struct pipe_winsys *,
+ struct nouveau_winsys *);
+ struct pipe_context *(*hw_create)(struct pipe_screen *, unsigned);
+ struct pipe_winsys *ws;
+ unsigned chipset = nv->nv_screen->device->chipset;
+
+ if (!nvws)
+ return NULL;
+
+ switch (chipset & 0xf0) {
+ case 0x00:
+ hws_create = nv04_screen_create;
+ hw_create = nv04_create;
+ break;
+ case 0x10:
+ hws_create = nv10_screen_create;
+ hw_create = nv10_create;
+ break;
+ case 0x20:
+ hws_create = nv20_screen_create;
+ hw_create = nv20_create;
+ break;
+ case 0x30:
+ hws_create = nv30_screen_create;
+ hw_create = nv30_create;
+ break;
+ case 0x40:
+ case 0x60:
+ hws_create = nv40_screen_create;
+ hw_create = nv40_create;
+ break;
+ case 0x50:
+ case 0x80:
+ case 0x90:
+ hws_create = nv50_screen_create;
+ hw_create = nv50_create;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown chipset NV%02x\n", chipset);
+ return NULL;
+ }
+
+ nvws->nv = nv;
+ nvws->channel = nv->nvc->channel;
+
+ nvws->res_init = nouveau_resource_init;
+ nvws->res_alloc = nouveau_resource_alloc;
+ nvws->res_free = nouveau_resource_free;
+
+ nvws->push_reloc = nouveau_pipe_push_reloc;
+ nvws->push_flush = nouveau_pipe_push_flush;
+
+ nvws->grobj_alloc = nouveau_pipe_grobj_alloc;
+ nvws->grobj_free = nouveau_grobj_free;
+
+ nvws->notifier_alloc = nouveau_pipe_notifier_alloc;
+ nvws->notifier_free = nouveau_notifier_free;
+ nvws->notifier_reset = nouveau_notifier_reset;
+ nvws->notifier_status = nouveau_notifier_status;
+ nvws->notifier_retval = nouveau_notifier_return_val;
+ nvws->notifier_wait = nouveau_notifier_wait_status;
+
+ nvws->get_bo = nouveau_pipe_get_bo;
+
+ ws = nouveau_create_pipe_winsys(nv);
+
+ if (!nvc->pscreen)
+ nvc->pscreen = hws_create(ws, nvws);
+ nvc->pctx[nv->pctx_id] = hw_create(nvc->pscreen, nv->pctx_id);
+ return nvc->pctx[nv->pctx_id];
+}
+
diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.c b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.c
new file mode 100644
index 0000000000..e3ee985afc
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.c
@@ -0,0 +1,245 @@
+#include "pipe/internal/p_winsys_screen.h"
+#include <pipe/p_defines.h>
+#include <pipe/p_inlines.h>
+#include <util/u_memory.h>
+#include "nouveau_context.h"
+#include "nouveau_local.h"
+#include "nouveau_screen.h"
+#include "nouveau_winsys_pipe.h"
+
+static const char *
+nouveau_get_name(struct pipe_winsys *pws)
+{
+ return "Nouveau/DRI";
+}
+
+static uint32_t
+nouveau_flags_from_usage(struct nouveau_context *nv, unsigned usage)
+{
+ struct nouveau_device *dev = nv->nv_screen->device;
+ uint32_t flags = NOUVEAU_BO_LOCAL;
+
+ if (usage & NOUVEAU_BUFFER_USAGE_TRANSFER)
+ flags |= NOUVEAU_BO_GART;
+
+ if (usage & PIPE_BUFFER_USAGE_PIXEL) {
+ if (usage & NOUVEAU_BUFFER_USAGE_TEXTURE)
+ flags |= NOUVEAU_BO_GART;
+ if (!(usage & PIPE_BUFFER_USAGE_CPU_READ_WRITE))
+ flags |= NOUVEAU_BO_VRAM;
+
+ switch (dev->chipset & 0xf0) {
+ case 0x50:
+ case 0x80:
+ case 0x90:
+ flags |= NOUVEAU_BO_TILED;
+ if (usage & NOUVEAU_BUFFER_USAGE_ZETA)
+ flags |= NOUVEAU_BO_ZTILE;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (usage & PIPE_BUFFER_USAGE_VERTEX) {
+ if (nv->cap.hw_vertex_buffer)
+ flags |= NOUVEAU_BO_GART;
+ }
+
+ if (usage & PIPE_BUFFER_USAGE_INDEX) {
+ if (nv->cap.hw_index_buffer)
+ flags |= NOUVEAU_BO_GART;
+ }
+
+ return flags;
+}
+
+static struct pipe_buffer *
+nouveau_pipe_bo_create(struct pipe_winsys *pws, unsigned alignment,
+ unsigned usage, unsigned size)
+{
+ struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws;
+ struct nouveau_context *nv = nvpws->nv;
+ struct nouveau_device *dev = nv->nv_screen->device;
+ struct nouveau_pipe_buffer *nvbuf;
+ uint32_t flags;
+
+ nvbuf = CALLOC_STRUCT(nouveau_pipe_buffer);
+ if (!nvbuf)
+ return NULL;
+ nvbuf->base.refcount = 1;
+ nvbuf->base.alignment = alignment;
+ nvbuf->base.usage = usage;
+ nvbuf->base.size = size;
+
+ flags = nouveau_flags_from_usage(nv, usage);
+
+ if (nouveau_bo_new(dev, flags, alignment, size, &nvbuf->bo)) {
+ FREE(nvbuf);
+ return NULL;
+ }
+
+ return &nvbuf->base;
+}
+
+static struct pipe_buffer *
+nouveau_pipe_bo_user_create(struct pipe_winsys *pws, void *ptr, unsigned bytes)
+{
+ struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws;
+ struct nouveau_device *dev = nvpws->nv->nv_screen->device;
+ struct nouveau_pipe_buffer *nvbuf;
+
+ nvbuf = CALLOC_STRUCT(nouveau_pipe_buffer);
+ if (!nvbuf)
+ return NULL;
+ nvbuf->base.refcount = 1;
+ nvbuf->base.size = bytes;
+
+ if (nouveau_bo_user(dev, ptr, bytes, &nvbuf->bo)) {
+ FREE(nvbuf);
+ return NULL;
+ }
+
+ return &nvbuf->base;
+}
+
+static void
+nouveau_pipe_bo_del(struct pipe_winsys *ws, struct pipe_buffer *buf)
+{
+ struct nouveau_pipe_buffer *nvbuf = nouveau_pipe_buffer(buf);
+
+ nouveau_bo_ref(NULL, &nvbuf->bo);
+ FREE(nvbuf);
+}
+
+static void *
+nouveau_pipe_bo_map(struct pipe_winsys *pws, struct pipe_buffer *buf,
+ unsigned flags)
+{
+ struct nouveau_pipe_buffer *nvbuf = nouveau_pipe_buffer(buf);
+ uint32_t map_flags = 0;
+
+ if (flags & PIPE_BUFFER_USAGE_CPU_READ)
+ map_flags |= NOUVEAU_BO_RD;
+ if (flags & PIPE_BUFFER_USAGE_CPU_WRITE)
+ map_flags |= NOUVEAU_BO_WR;
+
+#if 0
+ if (flags & PIPE_BUFFER_USAGE_DISCARD &&
+ !(flags & PIPE_BUFFER_USAGE_CPU_READ) &&
+ nouveau_bo_busy(nvbuf->bo, map_flags)) {
+ struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws;
+ struct nouveau_context *nv = nvpws->nv;
+ struct nouveau_device *dev = nv->nv_screen->device;
+ struct nouveau_bo *rename;
+ uint32_t flags = nouveau_flags_from_usage(nv, buf->usage);
+
+ if (!nouveau_bo_new(dev, flags, buf->alignment, buf->size, &rename)) {
+ nouveau_bo_ref(NULL, &nvbuf->bo);
+ nvbuf->bo = rename;
+ }
+ }
+#endif
+
+ if (nouveau_bo_map(nvbuf->bo, map_flags))
+ return NULL;
+ return nvbuf->bo->map;
+}
+
+static void
+nouveau_pipe_bo_unmap(struct pipe_winsys *pws, struct pipe_buffer *buf)
+{
+ struct nouveau_pipe_buffer *nvbuf = nouveau_pipe_buffer(buf);
+
+ nouveau_bo_unmap(nvbuf->bo);
+}
+
+static void
+nouveau_pipe_fence_reference(struct pipe_winsys *ws,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *pfence)
+{
+ *ptr = pfence;
+}
+
+static int
+nouveau_pipe_fence_signalled(struct pipe_winsys *ws,
+ struct pipe_fence_handle *pfence, unsigned flag)
+{
+ return 0;
+}
+
+static int
+nouveau_pipe_fence_finish(struct pipe_winsys *ws,
+ struct pipe_fence_handle *pfence, unsigned flag)
+{
+ return 0;
+}
+
+struct pipe_surface *
+nouveau_surface_buffer_ref(struct nouveau_context *nv, struct pipe_buffer *pb,
+ enum pipe_format format, int w, int h,
+ unsigned pitch, struct pipe_texture **ppt)
+{
+ struct pipe_screen *pscreen = nv->nvc->pscreen;
+ struct pipe_texture tmpl, *pt;
+ struct pipe_surface *ps;
+
+ memset(&tmpl, 0, sizeof(tmpl));
+ tmpl.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
+ NOUVEAU_TEXTURE_USAGE_LINEAR;
+ tmpl.target = PIPE_TEXTURE_2D;
+ tmpl.width[0] = w;
+ tmpl.height[0] = h;
+ tmpl.depth[0] = 1;
+ tmpl.format = format;
+ pf_get_block(tmpl.format, &tmpl.block);
+ tmpl.nblocksx[0] = pf_get_nblocksx(&tmpl.block, w);
+ tmpl.nblocksy[0] = pf_get_nblocksy(&tmpl.block, h);
+
+ pt = pscreen->texture_blanket(pscreen, &tmpl, &pitch, pb);
+ if (!pt)
+ return NULL;
+
+ ps = pscreen->get_tex_surface(pscreen, pt, 0, 0, 0,
+ PIPE_BUFFER_USAGE_GPU_WRITE);
+
+ *ppt = pt;
+ return ps;
+}
+
+static void
+nouveau_destroy(struct pipe_winsys *pws)
+{
+ FREE(pws);
+}
+
+struct pipe_winsys *
+nouveau_create_pipe_winsys(struct nouveau_context *nv)
+{
+ struct nouveau_pipe_winsys *nvpws;
+ struct pipe_winsys *pws;
+
+ nvpws = CALLOC_STRUCT(nouveau_pipe_winsys);
+ if (!nvpws)
+ return NULL;
+ nvpws->nv = nv;
+ pws = &nvpws->pws;
+
+ pws->flush_frontbuffer = nouveau_flush_frontbuffer;
+
+ pws->buffer_create = nouveau_pipe_bo_create;
+ pws->buffer_destroy = nouveau_pipe_bo_del;
+ pws->user_buffer_create = nouveau_pipe_bo_user_create;
+ pws->buffer_map = nouveau_pipe_bo_map;
+ pws->buffer_unmap = nouveau_pipe_bo_unmap;
+
+ pws->fence_reference = nouveau_pipe_fence_reference;
+ pws->fence_signalled = nouveau_pipe_fence_signalled;
+ pws->fence_finish = nouveau_pipe_fence_finish;
+
+ pws->get_name = nouveau_get_name;
+ pws->destroy = nouveau_destroy;
+
+ return &nvpws->pws;
+}
diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.h b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.h
new file mode 100644
index 0000000000..1eb8043478
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.h
@@ -0,0 +1,44 @@
+#ifndef NOUVEAU_PIPE_WINSYS_H
+#define NOUVEAU_PIPE_WINSYS_H
+
+#include "pipe/p_context.h"
+#include "pipe/internal/p_winsys_screen.h"
+#include "nouveau_context.h"
+
+struct nouveau_pipe_buffer {
+ struct pipe_buffer base;
+ struct nouveau_bo *bo;
+};
+
+static INLINE struct nouveau_pipe_buffer *
+nouveau_pipe_buffer(struct pipe_buffer *buf)
+{
+ return (struct nouveau_pipe_buffer *)buf;
+}
+
+struct nouveau_pipe_winsys {
+ struct pipe_winsys pws;
+
+ struct nouveau_context *nv;
+};
+
+extern struct pipe_winsys *
+nouveau_create_pipe_winsys(struct nouveau_context *nv);
+
+struct pipe_context *
+nouveau_create_softpipe(struct nouveau_context *nv);
+
+struct pipe_context *
+nouveau_pipe_create(struct nouveau_context *nv);
+
+/* Must be provided by clients of common code */
+extern void
+nouveau_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surf,
+ void *context_private);
+
+struct pipe_surface *
+nouveau_surface_buffer_ref(struct nouveau_context *nv, struct pipe_buffer *pb,
+ enum pipe_format format, int w, int h,
+ unsigned pitch, struct pipe_texture **ppt);
+
+#endif
diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_softpipe.c b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_softpipe.c
new file mode 100644
index 0000000000..396e4f2a2e
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_softpipe.c
@@ -0,0 +1,101 @@
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ *
+ **************************************************************************/
+/*
+ * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com>
+ */
+
+#include "pipe/internal/p_winsys_screen.h"
+#include <pipe/p_screen.h>
+#include <pipe/p_defines.h>
+#include <pipe/p_format.h>
+#include <softpipe/sp_winsys.h>
+#include <util/u_memory.h>
+#include "nouveau_context.h"
+#include "nouveau_winsys_pipe.h"
+
+struct nouveau_softpipe_winsys {
+ struct softpipe_winsys sws;
+ struct nouveau_context *nv;
+};
+
+/**
+ * Return list of surface formats supported by this driver.
+ */
+static boolean
+nouveau_is_format_supported(struct softpipe_winsys *sws,
+ enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ return TRUE;
+ default:
+ break;
+ };
+
+ return FALSE;
+}
+
+struct pipe_context *
+nouveau_create_softpipe(struct nouveau_context *nv)
+{
+ struct nouveau_softpipe_winsys *nvsws;
+ struct pipe_screen *pscreen;
+ struct pipe_winsys *ws;
+ struct pipe_context *pipe;
+
+ ws = nouveau_create_pipe_winsys(nv);
+ if (!ws)
+ return NULL;
+ pscreen = softpipe_create_screen(ws);
+ if (!pscreen) {
+ ws->destroy(ws);
+ return NULL;
+ }
+ nvsws = CALLOC_STRUCT(nouveau_softpipe_winsys);
+ if (!nvsws) {
+ ws->destroy(ws);
+ pscreen->destroy(pscreen);
+ return NULL;
+ }
+
+ nvsws->sws.is_format_supported = nouveau_is_format_supported;
+ nvsws->nv = nv;
+
+ pipe = softpipe_create(pscreen, ws, &nvsws->sws);
+ if (!pipe) {
+ ws->destroy(ws);
+ pscreen->destroy(pscreen);
+ FREE(nvsws);
+ return NULL;
+ }
+
+ return pipe;
+}
+
diff --git a/src/gallium/winsys/drm/nouveau/dri/Makefile b/src/gallium/winsys/drm/nouveau/dri/Makefile
new file mode 100644
index 0000000000..a73e8d5cb4
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/dri/Makefile
@@ -0,0 +1,34 @@
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nouveau_dri.so
+
+MINIGLX_SOURCES =
+
+PIPE_DRIVERS = \
+ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
+ $(TOP)/src/gallium/drivers/nv04/libnv04.a \
+ $(TOP)/src/gallium/drivers/nv10/libnv10.a \
+ $(TOP)/src/gallium/drivers/nv20/libnv20.a \
+ $(TOP)/src/gallium/drivers/nv30/libnv30.a \
+ $(TOP)/src/gallium/drivers/nv40/libnv40.a \
+ $(TOP)/src/gallium/drivers/nv50/libnv50.a
+
+DRIVER_SOURCES = \
+ nouveau_context_dri.c \
+ nouveau_screen_dri.c \
+ nouveau_swapbuffers.c \
+ ../common/libnouveaudrm.a
+
+C_SOURCES = \
+ $(COMMON_GALLIUM_SOURCES) \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+DRIVER_DEFINES = $(shell pkg-config libdrm_nouveau --cflags)
+DRI_LIB_DEPS += $(shell pkg-config libdrm_nouveau --libs)
+
+include ../../Makefile.template
+
+symlinks:
diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.c b/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.c
new file mode 100644
index 0000000000..aacfe984d1
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.c
@@ -0,0 +1,124 @@
+#include <main/glheader.h>
+#include <glapi/glthread.h>
+#include <GL/internal/glcore.h>
+#include <utils.h>
+
+#include <state_tracker/st_public.h>
+#include <state_tracker/st_context.h>
+#include <pipe/p_defines.h>
+#include <pipe/p_context.h>
+#include <pipe/p_screen.h>
+
+#include "../common/nouveau_winsys_pipe.h"
+#include "../common/nouveau_dri.h"
+#include "../common/nouveau_local.h"
+#include "nouveau_context_dri.h"
+#include "nouveau_screen_dri.h"
+
+#ifdef DEBUG
+static const struct dri_debug_control debug_control[] = {
+ { "bo", DEBUG_BO },
+ { NULL, 0 }
+};
+int __nouveau_debug = 0;
+#endif
+
+GLboolean
+nouveau_context_create(const __GLcontextModes *glVis,
+ __DRIcontextPrivate *driContextPriv,
+ void *sharedContextPrivate)
+{
+ __DRIscreenPrivate *driScrnPriv = driContextPriv->driScreenPriv;
+ struct nouveau_screen_dri *nv_screen = driScrnPriv->private;
+ struct nouveau_context_dri *nv = CALLOC_STRUCT(nouveau_context_dri);
+ struct st_context *st_share = NULL;
+ struct nouveau_context_dri *nv_share = NULL;
+ struct pipe_context *pipe;
+
+ if (sharedContextPrivate) {
+ st_share = ((struct nouveau_context_dri *)sharedContextPrivate)->st;
+ nv_share = st_share->pipe->priv;
+ }
+
+ if (nouveau_context_init(&nv_screen->base, driContextPriv->hHWContext,
+ (drmLock *)&driScrnPriv->pSAREA->lock,
+ &nv_share->base, &nv->base)) {
+ return GL_FALSE;
+ }
+
+ pipe = nv->base.nvc->pctx[nv->base.pctx_id];
+ driContextPriv->driverPrivate = (void *)nv;
+ //nv->nv_screen = nv_screen;
+ nv->dri_screen = driScrnPriv;
+
+ driParseConfigFiles(&nv->dri_option_cache, &nv_screen->option_cache,
+ nv->dri_screen->myNum, "nouveau");
+#ifdef DEBUG
+ __nouveau_debug = driParseDebugString(getenv("NOUVEAU_DEBUG"),
+ debug_control);
+#endif
+
+ nv->st = st_create_context(pipe, glVis, st_share);
+ return GL_TRUE;
+}
+
+void
+nouveau_context_destroy(__DRIcontextPrivate *driContextPriv)
+{
+ struct nouveau_context_dri *nv = driContextPriv->driverPrivate;
+
+ assert(nv);
+
+ st_finish(nv->st);
+ st_destroy_context(nv->st);
+
+ nouveau_context_cleanup(&nv->base);
+
+ FREE(nv);
+}
+
+GLboolean
+nouveau_context_bind(__DRIcontextPrivate *driContextPriv,
+ __DRIdrawablePrivate *driDrawPriv,
+ __DRIdrawablePrivate *driReadPriv)
+{
+ struct nouveau_context_dri *nv;
+ struct nouveau_framebuffer *draw, *read;
+
+ if (!driContextPriv) {
+ st_make_current(NULL, NULL, NULL);
+ return GL_TRUE;
+ }
+
+ nv = driContextPriv->driverPrivate;
+ draw = driDrawPriv->driverPrivate;
+ read = driReadPriv->driverPrivate;
+
+ st_make_current(nv->st, draw->stfb, read->stfb);
+
+ if ((nv->dri_drawable != driDrawPriv) ||
+ (nv->last_stamp != driDrawPriv->lastStamp)) {
+ nv->dri_drawable = driDrawPriv;
+ st_resize_framebuffer(draw->stfb, driDrawPriv->w,
+ driDrawPriv->h);
+ nv->last_stamp = driDrawPriv->lastStamp;
+ }
+
+ if (driDrawPriv != driReadPriv) {
+ st_resize_framebuffer(read->stfb, driReadPriv->w,
+ driReadPriv->h);
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean
+nouveau_context_unbind(__DRIcontextPrivate *driContextPriv)
+{
+ struct nouveau_context_dri *nv = driContextPriv->driverPrivate;
+ (void)nv;
+
+ st_flush(nv->st, 0, NULL);
+ return GL_TRUE;
+}
+
diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.h b/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.h
new file mode 100644
index 0000000000..64cf326411
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.h
@@ -0,0 +1,47 @@
+#ifndef __NOUVEAU_CONTEXT_DRI_H__
+#define __NOUVEAU_CONTEXT_DRI_H__
+
+#include <dri_util.h>
+#include <xmlconfig.h>
+#include <nouveau/nouveau_winsys.h>
+#include "../common/nouveau_context.h"
+
+struct nouveau_framebuffer {
+ struct st_framebuffer *stfb;
+};
+
+struct nouveau_context_dri {
+ struct nouveau_context base;
+ struct st_context *st;
+
+ /* DRI stuff */
+ __DRIscreenPrivate *dri_screen;
+ __DRIdrawablePrivate *dri_drawable;
+ unsigned int last_stamp;
+ driOptionCache dri_option_cache;
+ drm_context_t drm_context;
+ drmLock drm_lock;
+};
+
+extern GLboolean nouveau_context_create(const __GLcontextModes *,
+ __DRIcontextPrivate *, void *);
+extern void nouveau_context_destroy(__DRIcontextPrivate *);
+extern GLboolean nouveau_context_bind(__DRIcontextPrivate *,
+ __DRIdrawablePrivate *draw,
+ __DRIdrawablePrivate *read);
+extern GLboolean nouveau_context_unbind(__DRIcontextPrivate *);
+
+#ifdef DEBUG
+extern int __nouveau_debug;
+
+#define DEBUG_BO (1 << 0)
+
+#define DBG(flag, ...) do { \
+ if (__nouveau_debug & (DEBUG_##flag)) \
+ NOUVEAU_ERR(__VA_ARGS__); \
+} while(0)
+#else
+#define DBG(flag, ...)
+#endif
+
+#endif
diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.c b/src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.c
new file mode 100644
index 0000000000..964a9028aa
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.c
@@ -0,0 +1,259 @@
+#include <utils.h>
+#include <vblank.h>
+#include <xmlpool.h>
+
+#include <pipe/p_context.h>
+#include <state_tracker/st_public.h>
+#include <state_tracker/st_cb_fbo.h>
+#include <nouveau_drm.h>
+#include "../common/nouveau_dri.h"
+#include "../common/nouveau_local.h"
+#include "nouveau_context_dri.h"
+#include "nouveau_screen_dri.h"
+#include "nouveau_swapbuffers.h"
+
+#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 12
+#error nouveau_drm.h version does not match expected version
+#endif
+
+/* Extension stuff, enabling of extensions handled by Gallium's GL state
+ * tracker. But, we still need to define the entry points we want.
+ */
+#define need_GL_ARB_fragment_program
+#define need_GL_ARB_multisample
+#define need_GL_ARB_occlusion_query
+#define need_GL_ARB_point_parameters
+#define need_GL_ARB_shader_objects
+#define need_GL_ARB_texture_compression
+#define need_GL_ARB_vertex_program
+#define need_GL_ARB_vertex_shader
+#define need_GL_ARB_vertex_buffer_object
+#define need_GL_EXT_compiled_vertex_array
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_secondary_color
+#define need_GL_EXT_framebuffer_object
+#define need_GL_VERSION_2_0
+#define need_GL_VERSION_2_1
+#include "extension_helper.h"
+
+const struct dri_extension card_extensions[] =
+{
+ { "GL_ARB_multisample", GL_ARB_multisample_functions },
+ { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions },
+ { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions },
+ { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions },
+ { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions },
+ { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions },
+ { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions },
+ { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions },
+ { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions },
+ { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions },
+ { "GL_EXT_compiled_vertex_array", GL_EXT_compiled_vertex_array_functions },
+ { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
+ { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
+ { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions },
+ { NULL, 0 }
+};
+
+PUBLIC const char __driConfigOptions[] =
+DRI_CONF_BEGIN
+DRI_CONF_END;
+static const GLuint __driNConfigOptions = 0;
+
+extern const struct dri_extension common_extensions[];
+extern const struct dri_extension nv40_extensions[];
+
+static GLboolean
+nouveau_create_buffer(__DRIscreenPrivate * driScrnPriv,
+ __DRIdrawablePrivate * driDrawPriv,
+ const __GLcontextModes *glVis, GLboolean pixmapBuffer)
+{
+ struct nouveau_framebuffer *nvfb;
+ enum pipe_format colour, depth, stencil;
+
+ if (pixmapBuffer)
+ return GL_FALSE;
+
+ nvfb = CALLOC_STRUCT(nouveau_framebuffer);
+ if (!nvfb)
+ return GL_FALSE;
+
+ if (glVis->redBits == 5)
+ colour = PIPE_FORMAT_R5G6B5_UNORM;
+ else
+ colour = PIPE_FORMAT_A8R8G8B8_UNORM;
+
+ if (glVis->depthBits == 16)
+ depth = PIPE_FORMAT_Z16_UNORM;
+ else if (glVis->depthBits == 24)
+ depth = PIPE_FORMAT_Z24S8_UNORM;
+ else
+ depth = PIPE_FORMAT_NONE;
+
+ if (glVis->stencilBits == 8)
+ stencil = PIPE_FORMAT_Z24S8_UNORM;
+ else
+ stencil = PIPE_FORMAT_NONE;
+
+ nvfb->stfb = st_create_framebuffer(glVis, colour, depth, stencil,
+ driDrawPriv->w, driDrawPriv->h,
+ (void*)nvfb);
+ if (!nvfb->stfb) {
+ free(nvfb);
+ return GL_FALSE;
+ }
+
+ driDrawPriv->driverPrivate = (void *)nvfb;
+ return GL_TRUE;
+}
+
+static void
+nouveau_destroy_buffer(__DRIdrawablePrivate * driDrawPriv)
+{
+ struct nouveau_framebuffer *nvfb;
+
+ nvfb = (struct nouveau_framebuffer *)driDrawPriv->driverPrivate;
+ st_unreference_framebuffer(nvfb->stfb);
+ free(nvfb);
+}
+
+static __DRIconfig **
+nouveau_fill_in_modes(__DRIscreenPrivate *psp,
+ unsigned pixel_bits, unsigned depth_bits,
+ unsigned stencil_bits, GLboolean have_back_buffer)
+{
+ __DRIconfig **configs;
+ unsigned depth_buffer_factor;
+ unsigned back_buffer_factor;
+ GLenum fb_format;
+ GLenum fb_type;
+
+ static const GLenum back_buffer_modes[] = {
+ GLX_NONE, GLX_SWAP_UNDEFINED_OML,
+ };
+
+ uint8_t depth_bits_array[3];
+ uint8_t stencil_bits_array[3];
+ uint8_t msaa_samples_array[1];
+
+ depth_bits_array[0] = 0;
+ depth_bits_array[1] = depth_bits;
+ depth_bits_array[2] = depth_bits;
+
+ /* Just like with the accumulation buffer, always provide some modes
+ * with a stencil buffer. It will be a sw fallback, but some apps won't
+ * care about that.
+ */
+ stencil_bits_array[0] = 0;
+ stencil_bits_array[1] = 0;
+ if (depth_bits == 24)
+ stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits;
+ stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits;
+
+ msaa_samples_array[0] = 0;
+
+ depth_buffer_factor =
+ ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1;
+ back_buffer_factor = (have_back_buffer) ? 3 : 1;
+
+ if (pixel_bits == 16) {
+ fb_format = GL_RGB;
+ fb_type = GL_UNSIGNED_SHORT_5_6_5;
+ }
+ else {
+ fb_format = GL_BGRA;
+ fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+ }
+
+ configs = driCreateConfigs(fb_format, fb_type,
+ depth_bits_array, stencil_bits_array,
+ depth_buffer_factor, back_buffer_modes,
+ back_buffer_factor, msaa_samples_array, 1);
+ if (configs == NULL) {
+ fprintf(stderr, "[%s:%u] Error creating FBConfig!\n",
+ __func__, __LINE__);
+ return NULL;
+ }
+
+ return configs;
+}
+
+static const __DRIconfig **
+nouveau_screen_create(__DRIscreenPrivate *psp)
+{
+ struct nouveau_dri *nv_dri = psp->pDevPriv;
+ struct nouveau_screen_dri *nv_screen;
+ static const __DRIversion ddx_expected =
+ { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL };
+ static const __DRIversion dri_expected = { 4, 0, 0 };
+ static const __DRIversion drm_expected =
+ { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL };
+
+ if (!driCheckDriDdxDrmVersions2("nouveau",
+ &psp->dri_version, &dri_expected,
+ &psp->ddx_version, &ddx_expected,
+ &psp->drm_version, &drm_expected)) {
+ return NULL;
+ }
+
+ if (drm_expected.patch != psp->drm_version.patch) {
+ fprintf(stderr, "Incompatible DRM patch level.\n"
+ "Expected: %d\n" "Current : %d\n",
+ drm_expected.patch, psp->drm_version.patch);
+ return NULL;
+ }
+
+ driInitExtensions(NULL, card_extensions, GL_FALSE);
+
+ if (psp->devPrivSize != sizeof(struct nouveau_dri)) {
+ NOUVEAU_ERR("DRI struct mismatch between DDX/DRI\n");
+ return NULL;
+ }
+
+ nv_screen = CALLOC_STRUCT(nouveau_screen_dri);
+ if (!nv_screen)
+ return NULL;
+
+ driParseOptionInfo(&nv_screen->option_cache,
+ __driConfigOptions, __driNConfigOptions);
+
+ if (nouveau_screen_init(nv_dri, psp->fd, &nv_screen->base)) {
+ FREE(nv_screen);
+ return NULL;
+ }
+
+ nv_screen->driScrnPriv = psp;
+ psp->private = (void *)nv_screen;
+
+ return (const __DRIconfig **)
+ nouveau_fill_in_modes(psp, nv_dri->bpp,
+ (nv_dri->bpp == 16) ? 16 : 24,
+ (nv_dri->bpp == 16) ? 0 : 8, 1);
+}
+
+static void
+nouveau_screen_destroy(__DRIscreenPrivate *driScrnPriv)
+{
+ struct nouveau_screen_dri *nv_screen = driScrnPriv->private;
+
+ driScrnPriv->private = NULL;
+ nouveau_screen_cleanup(&nv_screen->base);
+ FREE(nv_screen);
+}
+
+const struct __DriverAPIRec
+driDriverAPI = {
+ .InitScreen = nouveau_screen_create,
+ .DestroyScreen = nouveau_screen_destroy,
+ .CreateContext = nouveau_context_create,
+ .DestroyContext = nouveau_context_destroy,
+ .CreateBuffer = nouveau_create_buffer,
+ .DestroyBuffer = nouveau_destroy_buffer,
+ .SwapBuffers = nouveau_swap_buffers,
+ .MakeCurrent = nouveau_context_bind,
+ .UnbindContext = nouveau_context_unbind,
+ .CopySubBuffer = nouveau_copy_sub_buffer,
+
+ .InitScreen2 = NULL, /* one day, I promise! */
+};
+
diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.h b/src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.h
new file mode 100644
index 0000000000..1498087819
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.h
@@ -0,0 +1,13 @@
+#ifndef __NOUVEAU_SCREEN_DRI_H__
+#define __NOUVEAU_SCREEN_DRI_H__
+
+#include "../common/nouveau_screen.h"
+#include "xmlconfig.h"
+
+struct nouveau_screen_dri {
+ struct nouveau_screen base;
+ __DRIscreenPrivate *driScrnPriv;
+ driOptionCache option_cache;
+};
+
+#endif
diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.c b/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.c
new file mode 100644
index 0000000000..58cb6f7265
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.c
@@ -0,0 +1,113 @@
+#include <main/glheader.h>
+#include <glapi/glthread.h>
+#include <GL/internal/glcore.h>
+
+#include <pipe/p_context.h>
+#include <state_tracker/st_public.h>
+#include <state_tracker/st_context.h>
+#include <state_tracker/st_cb_fbo.h>
+
+#include "../common/nouveau_local.h"
+#include "nouveau_context_dri.h"
+#include "nouveau_screen_dri.h"
+#include "nouveau_swapbuffers.h"
+
+void
+nouveau_copy_buffer(__DRIdrawablePrivate *dPriv, struct pipe_surface *surf,
+ const drm_clip_rect_t *rect)
+{
+ struct nouveau_context_dri *nv = dPriv->driContextPriv->driverPrivate;
+ struct pipe_context *pipe = nv->base.nvc->pctx[nv->base.pctx_id];
+ drm_clip_rect_t *pbox;
+ int nbox, i;
+
+ LOCK_HARDWARE(&nv->base);
+ if (!dPriv->numClipRects) {
+ UNLOCK_HARDWARE(&nv->base);
+ return;
+ }
+ pbox = dPriv->pClipRects;
+ nbox = dPriv->numClipRects;
+
+ for (i = 0; i < nbox; i++, pbox++) {
+ int sx, sy, dx, dy, w, h;
+
+ sx = pbox->x1 - dPriv->x;
+ sy = pbox->y1 - dPriv->y;
+ dx = pbox->x1;
+ dy = pbox->y1;
+ w = pbox->x2 - pbox->x1;
+ h = pbox->y2 - pbox->y1;
+
+ pipe->surface_copy(pipe, FALSE, nv->base.frontbuffer,
+ dx, dy, surf, sx, sy, w, h);
+ }
+
+ FIRE_RING(nv->base.nvc->channel);
+ UNLOCK_HARDWARE(&nv->base);
+
+ if (nv->last_stamp != dPriv->lastStamp) {
+ struct nouveau_framebuffer *nvfb = dPriv->driverPrivate;
+ st_resize_framebuffer(nvfb->stfb, dPriv->w, dPriv->h);
+ nv->last_stamp = dPriv->lastStamp;
+ }
+}
+
+void
+nouveau_copy_sub_buffer(__DRIdrawablePrivate *dPriv, int x, int y, int w, int h)
+{
+ struct nouveau_framebuffer *nvfb = dPriv->driverPrivate;
+ struct pipe_surface *surf;
+
+ st_get_framebuffer_surface(nvfb->stfb, ST_SURFACE_BACK_LEFT, &surf);
+ if (surf) {
+ drm_clip_rect_t rect;
+ rect.x1 = x;
+ rect.y1 = y;
+ rect.x2 = x + w;
+ rect.y2 = y + h;
+
+ st_notify_swapbuffers(nvfb->stfb);
+ nouveau_copy_buffer(dPriv, surf, &rect);
+ }
+}
+
+void
+nouveau_swap_buffers(__DRIdrawablePrivate *dPriv)
+{
+ struct nouveau_framebuffer *nvfb = dPriv->driverPrivate;
+ struct pipe_surface *surf;
+
+ st_get_framebuffer_surface(nvfb->stfb, ST_SURFACE_BACK_LEFT, &surf);
+ if (surf) {
+ st_notify_swapbuffers(nvfb->stfb);
+ nouveau_copy_buffer(dPriv, surf, NULL);
+ }
+}
+
+void
+nouveau_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surf,
+ void *context_private)
+{
+ struct nouveau_context_dri *nv = context_private;
+ __DRIdrawablePrivate *dPriv = nv->dri_drawable;
+
+ nouveau_copy_buffer(dPriv, surf, NULL);
+}
+
+void
+nouveau_contended_lock(struct nouveau_context *nv)
+{
+ struct nouveau_context_dri *nv_sub = (struct nouveau_context_dri*)nv;
+ __DRIdrawablePrivate *dPriv = nv_sub->dri_drawable;
+ __DRIscreenPrivate *sPriv = nv_sub->dri_screen;
+
+ /* If the window moved, may need to set a new cliprect now.
+ *
+ * NOTE: This releases and regains the hw lock, so all state
+ * checking must be done *after* this call:
+ */
+ if (dPriv)
+ DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv);
+}
+
diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.h b/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.h
new file mode 100644
index 0000000000..825d3da6da
--- /dev/null
+++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.h
@@ -0,0 +1,10 @@
+#ifndef __NOUVEAU_SWAPBUFFERS_H__
+#define __NOUVEAU_SWAPBUFFERS_H__
+
+extern void nouveau_copy_buffer(__DRIdrawablePrivate *, struct pipe_surface *,
+ const drm_clip_rect_t *);
+extern void nouveau_copy_sub_buffer(__DRIdrawablePrivate *,
+ int x, int y, int w, int h);
+extern void nouveau_swap_buffers(__DRIdrawablePrivate *);
+
+#endif
diff --git a/src/gallium/winsys/drm/radeon/Makefile b/src/gallium/winsys/drm/radeon/Makefile
new file mode 100644
index 0000000000..dca1e3233a
--- /dev/null
+++ b/src/gallium/winsys/drm/radeon/Makefile
@@ -0,0 +1,32 @@
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = radeon_dri.so
+
+MINIGLX_SOURCES =
+
+PIPE_DRIVERS = \
+ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
+ $(TOP)/src/gallium/drivers/r300/libr300.a
+
+DRIVER_SOURCES = \
+ radeon_buffer.c \
+ radeon_context.c \
+ radeon_r300.c \
+ radeon_screen.c \
+ radeon_winsys_softpipe.c
+
+C_SOURCES = \
+ $(COMMON_GALLIUM_SOURCES) \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+DRIVER_DEFINES = -I../../../drivers/r300
+
+include ../Makefile.template
+
+DRI_LIB_DEPS += -ldrm_radeon
+
+symlinks:
diff --git a/src/gallium/winsys/drm/radeon/SConscript b/src/gallium/winsys/drm/radeon/SConscript
new file mode 100644
index 0000000000..2435211a32
--- /dev/null
+++ b/src/gallium/winsys/drm/radeon/SConscript
@@ -0,0 +1,29 @@
+Import('*')
+
+if 'mesa' in env['statetrackers']:
+
+ env = drienv.Clone()
+
+ DRIVER_SOURCES = [
+ 'radeon_buffer.c',
+ 'radeon_context.c',
+ 'radeon_screen.c',
+ 'radeon_winsys_softpipe.c',
+ ]
+
+ sources = \
+ COMMON_GALLIUM_SOURCES + \
+ DRIVER_SOURCES
+
+ drivers = [
+ softpipe,
+ r300
+ ]
+
+ # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions
+ env.SharedLibrary(
+ target ='radeon_dri.so',
+ source = sources,
+ LIBS = drivers + mesa + auxiliaries + env['LIBS'],
+ )
+
diff --git a/src/gallium/winsys/drm/radeon/radeon_buffer.c b/src/gallium/winsys/drm/radeon/radeon_buffer.c
new file mode 100644
index 0000000000..259a505c0a
--- /dev/null
+++ b/src/gallium/winsys/drm/radeon/radeon_buffer.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Jérôme Glisse <glisse@freedesktop.org>
+ */
+#include <stdio.h>
+#include "dri_util.h"
+#include "state_tracker/st_public.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "radeon_buffer.h"
+#include "radeon_screen.h"
+#include "radeon_context.h"
+#include "radeon_bo.h"
+#include "radeon_drm.h"
+
+static const char *radeon_get_name(struct pipe_winsys *ws)
+{
+ return "RADEON/DRI2";
+}
+
+static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws,
+ unsigned alignment,
+ unsigned usage,
+ unsigned size)
+{
+ struct radeon_pipe_winsys *radeon_ws = (struct radeon_pipe_winsys *)ws;
+ struct radeon_pipe_buffer *radeon_buffer;
+ uint32_t domain;
+
+ radeon_buffer = calloc(1, sizeof(*radeon_buffer));
+ if (radeon_buffer == NULL) {
+ return NULL;
+ }
+ radeon_buffer->base.refcount = 1;
+ radeon_buffer->base.alignment = alignment;
+ radeon_buffer->base.usage = usage;
+ radeon_buffer->base.size = size;
+
+ domain = 0;
+
+ if (usage & PIPE_BUFFER_USAGE_PIXEL) {
+ domain |= RADEON_GEM_DOMAIN_VRAM;
+ }
+ if (usage & PIPE_BUFFER_USAGE_VERTEX) {
+ domain |= RADEON_GEM_DOMAIN_GTT;
+ }
+
+ if (usage & PIPE_BUFFER_USAGE_INDEX) {
+ domain |= RADEON_GEM_DOMAIN_GTT;
+ }
+ radeon_buffer->bo = radeon_bo_open(radeon_ws->radeon_screen->bom, 0,
+ size, alignment, domain, 0);
+ if (radeon_buffer->bo == NULL) {
+ free(radeon_buffer);
+ }
+ return &radeon_buffer->base;
+}
+
+static struct pipe_buffer *radeon_buffer_user_create(struct pipe_winsys *ws,
+ void *ptr,
+ unsigned bytes)
+{
+ struct radeon_pipe_buffer *radeon_buffer;
+
+ radeon_buffer = (struct radeon_pipe_buffer*)radeon_buffer_create(ws, 0, 0, bytes);
+ if (radeon_buffer == NULL) {
+ return NULL;
+ }
+ radeon_bo_map(radeon_buffer->bo, 1);
+ memcpy(radeon_buffer->bo->ptr, ptr, bytes);
+ radeon_bo_unmap(radeon_buffer->bo);
+ return &radeon_buffer->base;
+}
+
+static void radeon_buffer_del(struct pipe_winsys *ws, struct pipe_buffer *buffer)
+{
+ struct radeon_pipe_buffer *radeon_buffer = (struct radeon_pipe_buffer*)buffer;
+
+ radeon_bo_unref(radeon_buffer->bo);
+ free(radeon_buffer);
+}
+
+static void *radeon_buffer_map(struct pipe_winsys *ws,
+ struct pipe_buffer *buffer,
+ unsigned flags)
+{
+ struct radeon_pipe_buffer *radeon_buffer = (struct radeon_pipe_buffer*)buffer;
+ int write = 0;
+
+ if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) {
+ write = 1;
+ }
+ if (radeon_bo_map(radeon_buffer->bo, write))
+ return NULL;
+ return radeon_buffer->bo->ptr;
+}
+
+static void radeon_buffer_unmap(struct pipe_winsys *ws, struct pipe_buffer *buffer)
+{
+ struct radeon_pipe_buffer *radeon_buffer = (struct radeon_pipe_buffer*)buffer;
+
+ radeon_bo_unmap(radeon_buffer->bo);
+}
+
+static void radeon_fence_reference(struct pipe_winsys *ws,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *pfence)
+{
+}
+
+static int radeon_fence_signalled(struct pipe_winsys *ws,
+ struct pipe_fence_handle *pfence,
+ unsigned flag)
+{
+ return 1;
+}
+
+static int radeon_fence_finish(struct pipe_winsys *ws,
+ struct pipe_fence_handle *pfence,
+ unsigned flag)
+{
+ return 0;
+}
+
+static void radeon_flush_frontbuffer(struct pipe_winsys *pipe_winsys,
+ struct pipe_surface *pipe_surface,
+ void *context_private)
+{
+ /* TODO: call dri2CopyRegion */
+}
+
+struct pipe_winsys *radeon_pipe_winsys(struct radeon_screen *radeon_screen)
+{
+ struct radeon_pipe_winsys *radeon_ws;
+
+ radeon_ws = calloc(1, sizeof(struct radeon_pipe_winsys));
+ if (radeon_ws == NULL) {
+ return NULL;
+ }
+ radeon_ws->radeon_screen = radeon_screen;
+
+ radeon_ws->winsys.flush_frontbuffer = radeon_flush_frontbuffer;
+
+ radeon_ws->winsys.buffer_create = radeon_buffer_create;
+ radeon_ws->winsys.buffer_destroy = radeon_buffer_del;
+ radeon_ws->winsys.user_buffer_create = radeon_buffer_user_create;
+ radeon_ws->winsys.buffer_map = radeon_buffer_map;
+ radeon_ws->winsys.buffer_unmap = radeon_buffer_unmap;
+
+ radeon_ws->winsys.fence_reference = radeon_fence_reference;
+ radeon_ws->winsys.fence_signalled = radeon_fence_signalled;
+ radeon_ws->winsys.fence_finish = radeon_fence_finish;
+
+ radeon_ws->winsys.get_name = radeon_get_name;
+
+ return &radeon_ws->winsys;
+}
+
+static struct pipe_buffer *radeon_buffer_from_handle(struct radeon_screen *radeon_screen,
+ uint32_t handle)
+{
+ struct radeon_pipe_buffer *radeon_buffer;
+ struct radeon_bo *bo = NULL;
+
+ bo = radeon_bo_open(radeon_screen->bom, handle, 0, 0, 0, 0);
+ if (bo == NULL) {
+ return NULL;
+ }
+ radeon_buffer = calloc(1, sizeof(struct radeon_pipe_buffer));
+ if (radeon_buffer == NULL) {
+ radeon_bo_unref(bo);
+ return NULL;
+ }
+ radeon_buffer->base.refcount = 1;
+ radeon_buffer->base.usage = PIPE_BUFFER_USAGE_PIXEL;
+ radeon_buffer->bo = bo;
+ return &radeon_buffer->base;
+}
+
+struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_context,
+ uint32_t handle,
+ enum pipe_format format,
+ int w, int h, int pitch)
+{
+ struct pipe_screen *pipe_screen = radeon_context->pipe_screen;
+ struct pipe_winsys *pipe_winsys = radeon_context->pipe_winsys;
+ struct pipe_texture tmpl;
+ struct pipe_surface *ps;
+ struct pipe_texture *pt;
+ struct pipe_buffer *pb;
+
+ pb = radeon_buffer_from_handle(radeon_context->radeon_screen, handle);
+ if (pb == NULL) {
+ return NULL;
+ }
+ memset(&tmpl, 0, sizeof(tmpl));
+ tmpl.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET;
+ tmpl.target = PIPE_TEXTURE_2D;
+ tmpl.width[0] = w;
+ tmpl.height[0] = h;
+ tmpl.depth[0] = 1;
+ tmpl.format = format;
+ pf_get_block(tmpl.format, &tmpl.block);
+ tmpl.nblocksx[0] = pf_get_nblocksx(&tmpl.block, w);
+ tmpl.nblocksy[0] = pf_get_nblocksy(&tmpl.block, h);
+
+ pt = pipe_screen->texture_blanket(pipe_screen, &tmpl, &pitch, pb);
+ if (pt == NULL) {
+ pipe_buffer_reference(pipe_screen, &pb, NULL);
+ }
+ ps = pipe_screen->get_tex_surface(pipe_screen, pt, 0, 0, 0,
+ PIPE_BUFFER_USAGE_GPU_WRITE);
+ return ps;
+}
diff --git a/src/gallium/winsys/drm/radeon/radeon_buffer.h b/src/gallium/winsys/drm/radeon/radeon_buffer.h
new file mode 100644
index 0000000000..c626c20229
--- /dev/null
+++ b/src/gallium/winsys/drm/radeon/radeon_buffer.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Jérôme Glisse <glisse@freedesktop.org>
+ */
+#ifndef RADEON_BUFFER_H
+#define RADEON_BUFFER_H
+
+#include "pipe/internal/p_winsys_screen.h"
+#include "radeon_screen.h"
+#include "radeon_context.h"
+#include "radeon_bo.h"
+
+struct radeon_pipe_buffer {
+ struct pipe_buffer base;
+ struct radeon_bo *bo;
+};
+
+struct radeon_pipe_winsys {
+ struct pipe_winsys winsys;
+ struct radeon_screen *radeon_screen;
+};
+
+struct pipe_winsys *radeon_pipe_winsys(struct radeon_screen *radeon_screen);
+struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_context,
+ uint32_t handle,
+ enum pipe_format format,
+ int w, int h, int pitch);
+
+#endif
diff --git a/src/gallium/winsys/drm/radeon/radeon_context.c b/src/gallium/winsys/drm/radeon/radeon_context.c
new file mode 100644
index 0000000000..13a7035fec
--- /dev/null
+++ b/src/gallium/winsys/drm/radeon/radeon_context.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Jérôme Glisse <glisse@freedesktop.org>
+ */
+#include <stdio.h>
+#include "dri_util.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "state_tracker/st_public.h"
+#include "state_tracker/st_context.h"
+#include "radeon_screen.h"
+#include "radeon_context.h"
+#include "radeon_buffer.h"
+#include "radeon_winsys_softpipe.h"
+
+#define need_GL_ARB_point_parameters
+#define need_GL_ARB_vertex_buffer_object
+#define need_GL_EXT_cull_vertex
+#define need_GL_EXT_compiled_vertex_array
+#include "extension_helper.h"
+
+/**
+ * Extension strings exported by the radeon driver.
+ */
+const struct dri_extension radeon_card_extensions[] = {
+/* XXX these are technically not supported
+ {"GL_ARB_texture_rectangle", NULL},
+ {"GL_ARB_pixel_buffer_object", NULL}, */
+ {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions},
+ {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions},
+ {"GL_EXT_compiled_vertex_array", GL_EXT_compiled_vertex_array_functions},
+ {"GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions},
+ {NULL, NULL}
+};
+
+static void radeon_update_renderbuffers(__DRIcontext *dri_context,
+ __DRIdrawable *dri_drawable)
+{
+ struct radeon_framebuffer *radeon_fb;
+ struct radeon_context *radeon_context;
+ unsigned attachments[10];
+ __DRIbuffer *buffers;
+ __DRIscreen *screen;
+ int i, count;
+
+ radeon_context = dri_context->driverPrivate;
+ screen = dri_drawable->driScreenPriv;
+ radeon_fb = dri_drawable->driverPrivate;
+ for (count = 0, i = 0; count < 6; count++) {
+ if (radeon_fb->attachments & (1 << count)) {
+ attachments[i++] = count;
+ }
+ }
+
+ buffers = (*screen->dri2.loader->getBuffers)(dri_drawable,
+ &dri_drawable->w,
+ &dri_drawable->h,
+ attachments,
+ i,
+ &count,
+ dri_drawable->loaderPrivate);
+ if (buffers == NULL) {
+ return;
+ }
+
+ /* set one cliprect to cover the whole dri_drawable */
+ dri_drawable->x = 0;
+ dri_drawable->y = 0;
+ dri_drawable->backX = 0;
+ dri_drawable->backY = 0;
+ dri_drawable->numClipRects = 1;
+ dri_drawable->pClipRects[0].x1 = 0;
+ dri_drawable->pClipRects[0].y1 = 0;
+ dri_drawable->pClipRects[0].x2 = dri_drawable->w;
+ dri_drawable->pClipRects[0].y2 = dri_drawable->h;
+ dri_drawable->numBackClipRects = 1;
+ dri_drawable->pBackClipRects[0].x1 = 0;
+ dri_drawable->pBackClipRects[0].y1 = 0;
+ dri_drawable->pBackClipRects[0].x2 = dri_drawable->w;
+ dri_drawable->pBackClipRects[0].y2 = dri_drawable->h;
+
+ for (i = 0; i < count; i++) {
+ struct pipe_surface *ps;
+ enum pipe_format format = 0;
+ int index = 0;
+
+ switch (buffers[i].attachment) {
+ case __DRI_BUFFER_FRONT_LEFT:
+ index = ST_SURFACE_FRONT_LEFT;
+ switch (buffers[i].cpp) {
+ case 4:
+ format = PIPE_FORMAT_A8R8G8B8_UNORM;
+ break;
+ case 2:
+ format = PIPE_FORMAT_R5G6B5_UNORM;
+ break;
+ default:
+ /* FIXME: error */
+ return;
+ }
+ break;
+ case __DRI_BUFFER_BACK_LEFT:
+ index = ST_SURFACE_BACK_LEFT;
+ switch (buffers[i].cpp) {
+ case 4:
+ format = PIPE_FORMAT_A8R8G8B8_UNORM;
+ break;
+ case 2:
+ format = PIPE_FORMAT_R5G6B5_UNORM;
+ break;
+ default:
+ /* FIXME: error */
+ return;
+ }
+ break;
+ case __DRI_BUFFER_STENCIL:
+ case __DRI_BUFFER_DEPTH:
+ index = ST_SURFACE_DEPTH;
+ switch (buffers[i].cpp) {
+ case 4:
+ format = PIPE_FORMAT_Z24S8_UNORM;
+ break;
+ case 2:
+ format = PIPE_FORMAT_Z16_UNORM;
+ break;
+ default:
+ /* FIXME: error */
+ return;
+ }
+ break;
+ case __DRI_BUFFER_ACCUM:
+ default:
+ fprintf(stderr,
+ "unhandled buffer attach event, attacment type %d\n",
+ buffers[i].attachment);
+ return;
+ }
+
+ ps = radeon_surface_from_handle(radeon_context,
+ buffers[i].name,
+ format,
+ dri_drawable->w,
+ dri_drawable->h,
+ buffers[i].pitch);
+ assert(ps);
+ st_set_framebuffer_surface(radeon_fb->st_framebuffer, index, ps);
+ }
+ st_resize_framebuffer(radeon_fb->st_framebuffer,
+ dri_drawable->w,
+ dri_drawable->h);
+}
+
+GLboolean radeon_context_create(const __GLcontextModes *visual,
+ __DRIcontextPrivate *dri_context,
+ void *shared_context)
+{
+ __DRIscreenPrivate *dri_screen;
+ struct radeon_context *radeon_context;
+ struct radeon_screen *radeon_screen;
+ struct pipe_context *pipe;
+ struct st_context *shared_st_context = NULL;
+
+ dri_context->driverPrivate = NULL;
+ radeon_context = calloc(1, sizeof(struct radeon_context));
+ if (radeon_context == NULL) {
+ return GL_FALSE;
+ }
+
+ if (shared_context) {
+ shared_st_context = ((struct radeon_context*)shared_context)->st_context;
+ }
+
+ dri_screen = dri_context->driScreenPriv;
+ radeon_screen = dri_screen->private;
+ radeon_context->dri_screen = dri_screen;
+ radeon_context->radeon_screen = radeon_screen;
+ radeon_context->drm_fd = dri_screen->fd;
+
+ radeon_context->pipe_winsys = radeon_pipe_winsys(radeon_screen);
+ if (radeon_context->pipe_winsys == NULL) {
+ free(radeon_context);
+ return GL_FALSE;
+ }
+
+ if (!getenv("RADEON_SOFTPIPE")) {
+ fprintf(stderr, "Creating r300 context...\n");
+ pipe =
+ r300_create_context(NULL,
+ radeon_context->pipe_winsys,
+ radeon_create_r300_winsys(radeon_context->drm_fd));
+ radeon_context->pipe_screen = pipe->screen;
+ } else {
+ pipe = radeon_create_softpipe(radeon_context);
+ }
+ radeon_context->st_context = st_create_context(pipe, visual,
+ shared_st_context);
+ driInitExtensions(radeon_context->st_context->ctx,
+ radeon_card_extensions, GL_TRUE);
+ dri_context->driverPrivate = radeon_context;
+ return GL_TRUE;
+}
+
+void radeon_context_destroy(__DRIcontextPrivate *dri_context)
+{
+ struct radeon_context *radeon_context;
+
+ radeon_context = dri_context->driverPrivate;
+ st_finish(radeon_context->st_context);
+ st_destroy_context(radeon_context->st_context);
+ free(radeon_context);
+}
+
+GLboolean radeon_context_bind(__DRIcontextPrivate *dri_context,
+ __DRIdrawablePrivate *dri_drawable,
+ __DRIdrawablePrivate *dri_readable)
+{
+ struct radeon_framebuffer *drawable;
+ struct radeon_framebuffer *readable;
+ struct radeon_context *radeon_context;
+
+ if (dri_context == NULL) {
+ st_make_current(NULL, NULL, NULL);
+ return GL_TRUE;
+ }
+
+ radeon_context = dri_context->driverPrivate;
+ drawable = dri_drawable->driverPrivate;
+ readable = dri_readable->driverPrivate;
+ st_make_current(radeon_context->st_context,
+ drawable->st_framebuffer,
+ readable->st_framebuffer);
+
+ radeon_update_renderbuffers(dri_context, dri_drawable);
+ if (dri_drawable != dri_readable) {
+ radeon_update_renderbuffers(dri_context, dri_readable);
+ }
+ return GL_TRUE;
+}
+
+GLboolean radeon_context_unbind(__DRIcontextPrivate *dri_context)
+{
+ struct radeon_context *radeon_context;
+
+ radeon_context = dri_context->driverPrivate;
+ st_flush(radeon_context->st_context, PIPE_FLUSH_RENDER_CACHE, NULL);
+ return GL_TRUE;
+}
diff --git a/src/gallium/winsys/drm/radeon/radeon_context.h b/src/gallium/winsys/drm/radeon/radeon_context.h
new file mode 100644
index 0000000000..d7222b4469
--- /dev/null
+++ b/src/gallium/winsys/drm/radeon/radeon_context.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Jérôme Glisse <glisse@freedesktop.org>
+ */
+#ifndef RADEON_CONTEXT_H
+#define RADEON_CONTEXT_H
+
+#include "dri_util.h"
+#include "state_tracker/st_public.h"
+#include "state_tracker/st_context.h"
+#include "radeon_screen.h"
+
+#include "radeon_r300.h"
+
+struct radeon_framebuffer {
+ struct st_framebuffer *st_framebuffer;
+ unsigned attachments;
+};
+
+struct radeon_context {
+ /* st */
+ struct st_context *st_context;
+ /* pipe */
+ struct pipe_screen *pipe_screen;
+ struct pipe_winsys *pipe_winsys;
+ /* DRI */
+ __DRIscreenPrivate *dri_screen;
+ __DRIdrawablePrivate *dri_drawable;
+ __DRIdrawablePrivate *dri_readable;
+ /* DRM */
+ int drm_fd;
+ /* RADEON */
+ struct radeon_screen *radeon_screen;
+};
+
+GLboolean radeon_context_create(const __GLcontextModes*,
+ __DRIcontextPrivate*,
+ void*);
+void radeon_context_destroy(__DRIcontextPrivate*);
+GLboolean radeon_context_bind(__DRIcontextPrivate*,
+ __DRIdrawablePrivate*,
+ __DRIdrawablePrivate*);
+GLboolean radeon_context_unbind(__DRIcontextPrivate*);
+
+#endif
diff --git a/src/gallium/winsys/drm/radeon/radeon_r300.c b/src/gallium/winsys/drm/radeon/radeon_r300.c
new file mode 100644
index 0000000000..8fe2375e34
--- /dev/null
+++ b/src/gallium/winsys/drm/radeon/radeon_r300.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_r300.h"
+
+static boolean radeon_r300_check_cs(struct radeon_cs* cs, int size)
+{
+ /* XXX check size here, lazy ass! */
+ return TRUE;
+}
+
+static void radeon_r300_write_cs_reloc(struct radeon_cs* cs,
+ struct pipe_buffer* pbuffer,
+ uint32_t rd,
+ uint32_t wd,
+ uint32_t flags)
+{
+ radeon_cs_write_reloc(cs, ((struct radeon_pipe_buffer*)pbuffer)->bo, rd, wd, flags);
+}
+
+static void radeon_r300_flush_cs(struct radeon_cs* cs)
+{
+ radeon_cs_emit(cs);
+ radeon_cs_erase(cs);
+}
+
+/* Helper function to do the ioctls needed for setup and init. */
+static void do_ioctls(struct r300_winsys* winsys, int fd)
+{
+ drm_radeon_getparam_t gp;
+ uint32_t target;
+ int retval;
+
+ /* XXX is this cast safe? */
+ gp.value = (int*)&target;
+
+ /* First, get PCI ID */
+ gp.param = RADEON_PARAM_DEVICE_ID;
+ retval = drmCommandWriteRead(fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp));
+ if (retval) {
+ fprintf(stderr, "%s: Failed to get PCI ID, error number %d",
+ __FUNCTION__, retval);
+ exit(1);
+ }
+ winsys->pci_id = target;
+
+ /* Then, get the number of pixel pipes */
+ gp.param = RADEON_PARAM_NUM_GB_PIPES;
+ retval = drmCommandWriteRead(fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp));
+ if (retval) {
+ fprintf(stderr, "%s: Failed to get GB pipe count, error number %d",
+ __FUNCTION__, retval);
+ exit(1);
+ }
+ winsys->gb_pipes = target;
+
+}
+
+struct r300_winsys* radeon_create_r300_winsys(int fd)
+{
+ struct r300_winsys* winsys = calloc(1, sizeof(struct r300_winsys));
+
+ do_ioctls(winsys, fd);
+
+ struct radeon_cs_manager* csm = radeon_cs_manager_gem_ctor(fd);
+
+ winsys->cs = radeon_cs_create(csm, 1024 * 64 / 4);
+
+ winsys->check_cs = radeon_r300_check_cs;
+ winsys->begin_cs = radeon_cs_begin;
+ winsys->write_cs_dword = radeon_cs_write_dword;
+ winsys->write_cs_reloc = radeon_r300_write_cs_reloc;
+ winsys->end_cs = radeon_cs_end;
+ winsys->flush_cs = radeon_r300_flush_cs;
+
+ return winsys;
+}
diff --git a/src/gallium/winsys/drm/radeon/radeon_r300.h b/src/gallium/winsys/drm/radeon/radeon_r300.h
new file mode 100644
index 0000000000..8ed95a3a9b
--- /dev/null
+++ b/src/gallium/winsys/drm/radeon/radeon_r300.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/* XXX WTF is this! I shouldn't have to include those first three! FUCK! */
+#include <stdint.h>
+#include <stdlib.h>
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon_cs.h"
+
+#include "r300_winsys.h"
+
+#include "radeon_buffer.h"
+
+struct r300_winsys* radeon_create_r300_winsys(int fd);
diff --git a/src/gallium/winsys/drm/radeon/radeon_screen.c b/src/gallium/winsys/drm/radeon/radeon_screen.c
new file mode 100644
index 0000000000..e31caff0bf
--- /dev/null
+++ b/src/gallium/winsys/drm/radeon/radeon_screen.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Jérôme Glisse <glisse@freedesktop.org>
+ */
+#include <stdio.h>
+#include "pipe/p_screen.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "state_tracker/st_public.h"
+#include "state_tracker/st_context.h"
+#include "utils.h"
+#include "xf86drm.h"
+#include "drm.h"
+#include "dri_util.h"
+#include "radeon_screen.h"
+#include "radeon_context.h"
+#include "radeon_buffer.h"
+#include "radeon_bo.h"
+#include "radeon_bo_gem.h"
+#include "radeon_drm.h"
+
+extern const struct dri_extension radeon_card_extensions[];
+
+static const __DRIextension *radeon_screen_extensions[] = {
+ &driReadDrawableExtension,
+ &driCopySubBufferExtension.base,
+ &driSwapControlExtension.base,
+ &driFrameTrackingExtension.base,
+ &driMediaStreamCounterExtension.base,
+ NULL
+};
+
+static __DRIconfig **radeon_fill_in_modes(unsigned pixel_bits,
+ unsigned depth_bits,
+ GLboolean have_back_buffer)
+{
+ __DRIconfig **configs;
+ unsigned depth_buffer_factor;
+ unsigned back_buffer_factor;
+ unsigned num_modes;
+ GLenum fb_format;
+ GLenum fb_type;
+ uint8_t depth_bits_array[3];
+ uint8_t stencil_bits_array[3];
+ uint8_t msaa_samples_array[1];
+ /* TODO: pageflipping ? */
+ static const GLenum back_buffer_modes[] = {
+ GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML
+ };
+
+ stencil_bits_array[0] = 0;
+ stencil_bits_array[1] = 0;
+ if (depth_bits == 24) {
+ stencil_bits_array[2] = 8;
+ num_modes = 3;
+ }
+
+ depth_bits_array[0] = 0;
+ depth_bits_array[1] = depth_bits;
+ depth_bits_array[2] = depth_bits;
+ depth_buffer_factor = (depth_bits == 24) ? 3 : 2;
+
+ back_buffer_factor = (have_back_buffer) ? 3 : 1;
+
+ msaa_samples_array[0] = 0;
+
+ if (pixel_bits == 16) {
+ fb_format = GL_RGB;
+ fb_type = GL_UNSIGNED_SHORT_5_6_5;
+ } else {
+ fb_format = GL_BGRA;
+ fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+ }
+
+ configs = (__DRIconfig **)driCreateConfigs(fb_format,
+ fb_type,
+ depth_bits_array,
+ stencil_bits_array,
+ depth_buffer_factor,
+ back_buffer_modes,
+ back_buffer_factor,
+ msaa_samples_array,
+ 1);
+ if (configs == NULL) {
+ fprintf(stderr, "[%s:%u] Error creating FBConfig!\n",
+ __FILE__, __LINE__);
+ return NULL;
+ }
+ return configs;
+}
+
+static void radeon_screen_destroy(__DRIscreenPrivate *dri_screen)
+{
+ struct radeon_screen *radeon_screen = (struct radeon_screen*)dri_screen->private;
+
+ radeon_bo_manager_gem_dtor(radeon_screen->bom);
+ dri_screen = NULL;
+ free(radeon_screen);
+}
+
+static const __DRIconfig **radeon_screen_init(__DRIscreenPrivate *dri_screen)
+{
+ struct radeon_screen *radeon_screen;
+
+ /* Calling driInitExtensions here, with a NULL context pointer,
+ * does not actually enable the extensions. It just makes sure
+ * that all the dispatch offsets for all the extensions that
+ * *might* be enables are known. This is needed because the
+ * dispatch offsets need to be known when _mesa_context_create is
+ * called, but we can't enable the extensions until we have a
+ * context pointer.
+ *
+ * Hello chicken. Hello egg. How are you two today?
+ */
+ driInitExtensions(NULL, radeon_card_extensions, GL_FALSE);
+
+ radeon_screen = calloc(1, sizeof(struct radeon_screen));
+ if (radeon_screen == NULL) {
+ fprintf(stderr, "\nERROR! Allocating private area failed\n");
+ return NULL;
+ }
+ dri_screen->private = (void*)radeon_screen;
+ dri_screen->extensions = radeon_screen_extensions;
+ radeon_screen->dri_screen = dri_screen;
+
+ radeon_screen->bom = radeon_bo_manager_gem_ctor(dri_screen->fd);
+ if (radeon_screen->bom == NULL) {
+ radeon_screen_destroy(dri_screen);
+ return NULL;
+ }
+
+ return driConcatConfigs(radeon_fill_in_modes(16, 16, 1),
+ radeon_fill_in_modes(32, 24, 1));
+}
+
+static boolean radeon_buffer_create(__DRIscreenPrivate *dri_screen,
+ __DRIdrawablePrivate *dri_drawable,
+ const __GLcontextModes *visual,
+ boolean is_pixmap)
+{
+ if (is_pixmap) {
+ /* TODO: implement ? */
+ return GL_FALSE;
+ } else {
+ enum pipe_format color_format, depth_format, stencil_format;
+ struct radeon_framebuffer *radeon_fb;
+
+ radeon_fb = calloc(1, sizeof(struct radeon_framebuffer));
+ if (radeon_fb == NULL) {
+ return GL_FALSE;
+ }
+
+ switch (visual->redBits) {
+ case 5:
+ color_format = PIPE_FORMAT_R5G6B5_UNORM;
+ break;
+ default:
+ color_format = PIPE_FORMAT_A8R8G8B8_UNORM;
+ break;
+ }
+
+ switch (visual->depthBits) {
+ case 24:
+ depth_format = PIPE_FORMAT_S8Z24_UNORM;
+ break;
+ case 16:
+ depth_format = PIPE_FORMAT_Z16_UNORM;
+ break;
+ default:
+ depth_format = PIPE_FORMAT_NONE;
+ break;
+ }
+
+ switch (visual->stencilBits) {
+ case 8:
+ /* force depth format */
+ depth_format = PIPE_FORMAT_S8Z24_UNORM;
+ stencil_format = PIPE_FORMAT_S8Z24_UNORM;
+ break;
+ default:
+ stencil_format = PIPE_FORMAT_NONE;
+ break;
+ }
+
+ radeon_fb->st_framebuffer = st_create_framebuffer(visual,
+ color_format,
+ depth_format,
+ stencil_format,
+ dri_drawable->w,
+ dri_drawable->h,
+ (void*)radeon_fb);
+ if (radeon_fb->st_framebuffer == NULL) {
+ free(radeon_fb);
+ return GL_FALSE;
+ }
+ dri_drawable->driverPrivate = (void *) radeon_fb;
+
+ radeon_fb->attachments = (1 << __DRI_BUFFER_FRONT_LEFT);
+ if (visual->doubleBufferMode) {
+ radeon_fb->attachments |= (1 << __DRI_BUFFER_BACK_LEFT);
+ }
+ if (visual->depthBits || visual->stencilBits) {
+ radeon_fb->attachments |= (1 << __DRI_BUFFER_DEPTH);
+ }
+
+ return GL_TRUE;
+ }
+}
+
+static void radeon_buffer_destroy(__DRIdrawablePrivate * dri_drawable)
+{
+ struct radeon_framebuffer *radeon_fb;
+
+ radeon_fb = dri_drawable->driverPrivate;
+ assert(radeon_fb->st_framebuffer);
+ st_unreference_framebuffer(radeon_fb->st_framebuffer);
+ free(radeon_fb);
+}
+
+static void radeon_swap_buffers(__DRIdrawablePrivate *dri_drawable)
+{
+ struct radeon_framebuffer *radeon_fb;
+ struct pipe_surface *back_surf = NULL;
+
+ radeon_fb = dri_drawable->driverPrivate;
+ assert(radeon_fb);
+ assert(radeon_fb->st_framebuffer);
+
+ st_get_framebuffer_surface(radeon_fb->st_framebuffer,
+ ST_SURFACE_BACK_LEFT,
+ &back_surf);
+ if (back_surf) {
+ st_notify_swapbuffers(radeon_fb->st_framebuffer);
+ /* TODO: do we want to do anythings ? */
+ st_notify_swapbuffers_complete(radeon_fb->st_framebuffer);
+ }
+}
+
+/**
+ * Called via glXCopySubBufferMESA() to copy a subrect of the back
+ * buffer to the front buffer/screen.
+ */
+static void radeon_copy_sub_buffer(__DRIdrawablePrivate *dri_drawable,
+ int x, int y, int w, int h)
+{
+ /* TODO: ... */
+}
+
+const struct __DriverAPIRec driDriverAPI = {
+ .InitScreen = NULL,
+ .DestroyScreen = radeon_screen_destroy,
+ .CreateContext = radeon_context_create,
+ .DestroyContext = radeon_context_destroy,
+ .CreateBuffer = radeon_buffer_create,
+ .DestroyBuffer = radeon_buffer_destroy,
+ .SwapBuffers = radeon_swap_buffers,
+ .MakeCurrent = radeon_context_bind,
+ .UnbindContext = radeon_context_unbind,
+ .CopySubBuffer = radeon_copy_sub_buffer,
+ .InitScreen2 = radeon_screen_init,
+};
diff --git a/src/gallium/winsys/drm/radeon/radeon_screen.h b/src/gallium/winsys/drm/radeon/radeon_screen.h
new file mode 100644
index 0000000000..01b7fa6531
--- /dev/null
+++ b/src/gallium/winsys/drm/radeon/radeon_screen.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Jérôme Glisse <glisse@freedesktop.org>
+ */
+#ifndef RADEON_SCREEN_H
+#define RADEON_SCREEN_H
+
+#include "dri_util.h"
+#include "radeon_bo.h"
+
+struct radeon_screen {
+ __DRIscreenPrivate *dri_screen;
+ struct radeon_bo_manager *bom;
+};
+
+#endif
diff --git a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c b/src/gallium/winsys/drm/radeon/radeon_winsys_softpipe.c
index 20920a2052..8402e1fa5a 100644
--- a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c
+++ b/src/gallium/winsys/drm/radeon/radeon_winsys_softpipe.c
@@ -1,8 +1,8 @@
/**************************************************************************
- *
+ *
* Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
* All Rights Reserved.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@@ -10,73 +10,68 @@
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
- *
- *
+ *
+ *
**************************************************************************/
/*
* Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com>
*/
-
-#include "intel_context.h"
-#include "intel_winsys_softpipe.h"
+#include <stdio.h>
+#include "imports.h"
#include "pipe/p_defines.h"
#include "pipe/p_format.h"
-#include "util/u_memory.h"
#include "softpipe/sp_winsys.h"
+#include "radeon_context.h"
+#include "radeon_winsys_softpipe.h"
-
-struct intel_softpipe_winsys {
- struct softpipe_winsys sws;
- struct intel_context *intel;
+struct radeon_softpipe_winsys {
+ struct softpipe_winsys sp_winsys;
+ struct radeon_context *radeon_context;
};
/**
* Return list of surface formats supported by this driver.
*/
-static boolean
-intel_is_format_supported(struct softpipe_winsys *sws,
- enum pipe_format format)
+static boolean radeon_is_format_supported(struct softpipe_winsys *sws, uint format)
{
- switch(format) {
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- case PIPE_FORMAT_R5G6B5_UNORM:
- case PIPE_FORMAT_S8Z24_UNORM:
- return TRUE;
- default:
- return FALSE;
- }
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ return TRUE;
+ default:
+ break;
+ };
+ return FALSE;
}
-
-/**
- * Create rendering context which uses software rendering.
- */
-struct pipe_context *
-intel_create_softpipe( struct intel_context *intel,
- struct pipe_winsys *winsys )
+struct pipe_context *radeon_create_softpipe(struct radeon_context *radeon_context)
{
- struct intel_softpipe_winsys *isws = CALLOC_STRUCT( intel_softpipe_winsys );
- struct pipe_screen *screen = softpipe_create_screen(winsys);
+ struct radeon_softpipe_winsys *radeon_sp_ws;
+ struct pipe_screen *pipe_screen;
- /* Fill in this struct with callbacks that softpipe will need to
- * communicate with the window system, buffer manager, etc.
- */
- isws->sws.is_format_supported = intel_is_format_supported;
- isws->intel = intel;
+ pipe_screen = softpipe_create_screen(radeon_context->pipe_winsys);
- /* Create the softpipe context:
- */
- return softpipe_create( screen, winsys, &isws->sws );
+ radeon_sp_ws = CALLOC_STRUCT(radeon_softpipe_winsys);
+ if (radeon_sp_ws == NULL) {
+ return NULL;
+ }
+ radeon_context->pipe_screen = pipe_screen;
+ radeon_sp_ws->radeon_context = radeon_context;
+ radeon_sp_ws->sp_winsys.is_format_supported = radeon_is_format_supported;
+ return softpipe_create(pipe_screen,
+ radeon_context->pipe_winsys,
+ &radeon_sp_ws->sp_winsys);
}
diff --git a/src/gallium/winsys/drm/radeon/radeon_winsys_softpipe.h b/src/gallium/winsys/drm/radeon/radeon_winsys_softpipe.h
new file mode 100644
index 0000000000..519eab769c
--- /dev/null
+++ b/src/gallium/winsys/drm/radeon/radeon_winsys_softpipe.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Jérôme Glisse <glisse@freedesktop.org>
+ */
+#ifndef RADEON_WINSYS_SOFTPIPE_H
+#define RADEON_WINSYS_SOFTPIPE_H
+
+#include "radeon_context.h"
+
+struct pipe_context *radeon_create_softpipe(struct radeon_context *radeon_context);
+
+#endif
diff --git a/src/gallium/winsys/egl_xlib/Makefile b/src/gallium/winsys/egl_xlib/Makefile
index 76f1b56da4..02ac47caa4 100644
--- a/src/gallium/winsys/egl_xlib/Makefile
+++ b/src/gallium/winsys/egl_xlib/Makefile
@@ -34,7 +34,7 @@ LIBS = \
# mesa code, as done for ES 1.x, 2.x, OpenVG, etc)
UNUSED_LIBS = \
$(TOP)/src/mesa/libglapi.a \
- $(TOP)/src/mesa/libmesa.a \
+ $(TOP)/src/mesa/libmesagallium.a \
LOCAL_CFLAGS = -D_EGL_PLATFORM_X=1
diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c
index e9f821d276..9ceb67d2ac 100644
--- a/src/gallium/winsys/egl_xlib/egl_xlib.c
+++ b/src/gallium/winsys/egl_xlib/egl_xlib.c
@@ -38,9 +38,10 @@
#include "pipe/p_compiler.h"
#include "pipe/p_format.h"
#include "pipe/p_state.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "util/u_memory.h"
#include "softpipe/sp_winsys.h"
+#include "softpipe/sp_texture.h"
#include "eglconfig.h"
#include "eglconfigutil.h"
@@ -276,6 +277,7 @@ display_surface(struct pipe_winsys *pws,
struct pipe_surface *psurf,
struct xlib_egl_surface *xsurf)
{
+ struct softpipe_texture *spt = softpipe_texture(psurf->texture);
XImage *ximage;
void *data;
@@ -292,13 +294,13 @@ display_surface(struct pipe_winsys *pws,
assert(ximage->format);
assert(ximage->bitmap_unit);
- data = pws->buffer_map(pws, psurf->buffer, 0);
+ data = pws->buffer_map(pws, spt->buffer, 0);
/* update XImage's fields */
ximage->data = data;
ximage->width = psurf->width;
ximage->height = psurf->height;
- ximage->bytes_per_line = psurf->stride;
+ ximage->bytes_per_line = spt->stride[psurf->level];
XPutImage(xsurf->Dpy, xsurf->Win, xsurf->Gc,
ximage, 0, 0, 0, 0, psurf->width, psurf->height);
@@ -308,7 +310,7 @@ display_surface(struct pipe_winsys *pws,
ximage->data = NULL;
XDestroyImage(ximage);
- pws->buffer_unmap(pws, psurf->buffer);
+ pws->buffer_unmap(pws, spt->buffer);
}
@@ -537,7 +539,7 @@ xlib_eglDestroySurface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface)
}
else {
XFreeGC(surf->Dpy, surf->Gc);
- st_unreference_framebuffer(&surf->Framebuffer);
+ st_unreference_framebuffer(surf->Framebuffer);
free(surf);
}
return EGL_TRUE;
@@ -559,8 +561,10 @@ xlib_eglSwapBuffers(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw)
{
struct xlib_egl_surface *xsurf = lookup_surface(draw);
struct pipe_winsys *pws = xsurf->winsys;
- struct pipe_surface *psurf =
- st_get_framebuffer_surface(xsurf->Framebuffer, ST_SURFACE_BACK_LEFT);
+ struct pipe_surface *psurf;
+
+ st_get_framebuffer_surface(xsurf->Framebuffer, ST_SURFACE_BACK_LEFT,
+ &psurf);
st_notify_swapbuffers(xsurf->Framebuffer);
diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.c b/src/gallium/winsys/egl_xlib/sw_winsys.c
index 2fd190da52..739bfa1c1a 100644
--- a/src/gallium/winsys/egl_xlib/sw_winsys.c
+++ b/src/gallium/winsys/egl_xlib/sw_winsys.c
@@ -35,7 +35,7 @@
*/
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_state.h"
#include "pipe/p_inlines.h"
#include "util/u_math.h"
@@ -161,65 +161,25 @@ buffer_destroy(struct pipe_winsys *pws, struct pipe_buffer *buf)
}
-/**
- * Called via winsys->surface_alloc() to create new surfaces.
- */
-static struct pipe_surface *
-surface_alloc(struct pipe_winsys *ws)
-{
- struct pipe_surface *surf = CALLOC_STRUCT(pipe_surface);
- if (!surf)
- return NULL;
-
- surf->refcount = 1;
- surf->winsys = ws;
-
- return surf;
-}
-
-
-static int
-surface_alloc_storage(struct pipe_winsys *winsys,
- struct pipe_surface *surf,
+static struct pipe_buffer *
+surface_buffer_create(struct pipe_winsys *winsys,
unsigned width, unsigned height,
enum pipe_format format,
- unsigned flags,
- unsigned tex_usage)
+ unsigned usage,
+ unsigned *stride)
{
const unsigned alignment = 64;
+ struct pipe_format_block block;
+ unsigned nblocksx, nblocksy;
- surf->width = width;
- surf->height = height;
- surf->format = format;
- pf_get_block(surf->format, &surf->block);
- surf->nblocksx = pf_get_nblocksx(&surf->block, width);
- surf->nblocksy = pf_get_nblocksy(&surf->block, height);
- surf->stride = round_up(surf->nblocksx * surf->block.size, alignment);
- surf->usage = flags;
-
- assert(!surf->buffer);
- surf->buffer = winsys->buffer_create(winsys, alignment,
- PIPE_BUFFER_USAGE_PIXEL,
- surf->stride * height);
- if(!surf->buffer)
- return -1;
-
- return 0;
-}
-
+ pf_get_block(format, &block);
+ nblocksx = pf_get_nblocksx(&block, width);
+ nblocksy = pf_get_nblocksy(&block, height);
+ *stride = round_up(nblocksx * block.size, alignment);
-static void
-surface_release(struct pipe_winsys *winsys, struct pipe_surface **s)
-{
- struct pipe_surface *surf = *s;
- assert(!surf->texture);
- surf->refcount--;
- if (surf->refcount == 0) {
- if (surf->buffer)
- winsys_buffer_reference(winsys, &surf->buffer, NULL);
- free(surf);
- }
- *s = NULL;
+ return winsys->buffer_create(winsys, alignment,
+ usage,
+ *stride * nblocksy);
}
@@ -268,9 +228,7 @@ create_sw_winsys(void)
ws->Base.buffer_unmap = buffer_unmap;
ws->Base.buffer_destroy = buffer_destroy;
- ws->Base.surface_alloc = surface_alloc;
- ws->Base.surface_alloc_storage = surface_alloc_storage;
- ws->Base.surface_release = surface_release;
+ ws->Base.surface_buffer_create = surface_buffer_create;
ws->Base.fence_reference = fence_reference;
ws->Base.fence_signalled = fence_signalled;
diff --git a/src/gallium/winsys/g3dvl/nouveau/Makefile b/src/gallium/winsys/g3dvl/nouveau/Makefile
new file mode 100644
index 0000000000..2997f6b79c
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/nouveau/Makefile
@@ -0,0 +1,50 @@
+TARGET = libnouveau_dri.so
+GALLIUMDIR = ../../..
+DRMDIR ?= /usr
+DRIDIR = ../../../../driclient
+
+OBJECTS = nouveau_screen_vl.o nouveau_context_vl.o nouveau_swapbuffers.o
+
+CFLAGS += -g -Wall -Werror=implicit-function-declaration -fPIC \
+ -I${GALLIUMDIR}/include \
+ -I${GALLIUMDIR}/winsys/g3dvl \
+ -I${GALLIUMDIR}/winsys/drm/nouveau \
+ -I${DRMDIR}/include \
+ -I${DRMDIR}/include/drm \
+ -I${DRMDIR}/include/nouveau \
+ -I${GALLIUMDIR}/drivers \
+ -I${GALLIUMDIR}/auxiliary \
+ -I${DRIDIR}/include
+
+LDFLAGS += -L${DRMDIR}/lib \
+ -L${DRIDIR}/lib \
+ -L${GALLIUMDIR}/winsys/drm/nouveau/common \
+ -L${GALLIUMDIR}/auxiliary/draw \
+ -L${GALLIUMDIR}/auxiliary/tgsi \
+ -L${GALLIUMDIR}/auxiliary/translate \
+ -L${GALLIUMDIR}/auxiliary/rtasm \
+ -L${GALLIUMDIR}/auxiliary/cso_cache \
+ -L${GALLIUMDIR}/drivers/nv04 \
+ -L${GALLIUMDIR}/drivers/nv10 \
+ -L${GALLIUMDIR}/drivers/nv20 \
+ -L${GALLIUMDIR}/drivers/nv30 \
+ -L${GALLIUMDIR}/drivers/nv40 \
+ -L${GALLIUMDIR}/drivers/nv50
+
+LIBS += -lnouveaudrm -ldriclient -ldrm_nouveau -ldrm -lnv04 -lnv10 -lnv20 -lnv30 -lnv40 -lnv50 -ldraw -ltgsi -ltranslate -lrtasm -lcso_cache -lm
+
+#############################################
+
+.PHONY = all clean libdriclient
+
+all: ${TARGET}
+
+${TARGET}: ${OBJECTS} libdriclient
+ $(CC) ${LDFLAGS} -shared -o $@ ${OBJECTS} ${LIBS}
+
+libdriclient:
+ cd ${DRIDIR}/src; ${MAKE}
+
+clean:
+ cd ${DRIDIR}/src; ${MAKE} clean
+ rm -rf ${OBJECTS} ${TARGET}
diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.c
new file mode 100644
index 0000000000..dfc4905bc0
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.c
@@ -0,0 +1,172 @@
+#include "nouveau_context_vl.h"
+#include <pipe/p_defines.h>
+#include <pipe/p_context.h>
+#include <pipe/p_screen.h>
+#include <util/u_memory.h>
+#include <common/nouveau_dri.h>
+#include <common/nouveau_local.h>
+#include <common/nouveau_winsys_pipe.h>
+#include "nouveau_screen_vl.h"
+
+/*
+#ifdef DEBUG
+static const struct dri_debug_control debug_control[] = {
+ { "bo", DEBUG_BO },
+ { NULL, 0 }
+};
+int __nouveau_debug = 0;
+#endif
+*/
+
+int
+nouveau_context_create(dri_context_t *dri_context)
+{
+ dri_screen_t *dri_screen;
+ struct nouveau_screen_vl *nv_screen;
+ struct nouveau_context_vl *nv;
+
+ assert (dri_context);
+
+ dri_screen = dri_context->dri_screen;
+ nv_screen = dri_screen->private;
+ nv = CALLOC_STRUCT(nouveau_context_vl);
+
+ if (!nv)
+ return 1;
+
+ if (nouveau_context_init(&nv_screen->base, dri_context->drm_context,
+ (drmLock*)&dri_screen->sarea->lock, NULL, &nv->base))
+ {
+ FREE(nv);
+ return 1;
+ }
+
+ dri_context->private = (void*)nv;
+ nv->dri_context = dri_context;
+ nv->nv_screen = nv_screen;
+
+ /*
+ driParseConfigFiles(&nv->dri_option_cache, &nv_screen->option_cache,
+ nv->dri_screen->myNum, "nouveau");
+#ifdef DEBUG
+ __nouveau_debug = driParseDebugString(getenv("NOUVEAU_DEBUG"),
+ debug_control);
+#endif
+ */
+
+ nv->base.nvc->pctx[nv->base.pctx_id]->priv = nv;
+
+ return 0;
+}
+
+void
+nouveau_context_destroy(dri_context_t *dri_context)
+{
+ struct nouveau_context_vl *nv = dri_context->private;
+
+ assert(dri_context);
+
+ nouveau_context_cleanup(&nv->base);
+
+ FREE(nv);
+}
+
+int
+nouveau_context_bind(struct nouveau_context_vl *nv, dri_drawable_t *dri_drawable)
+{
+ assert(nv);
+ assert(dri_drawable);
+
+ if (nv->dri_drawable != dri_drawable)
+ {
+ nv->dri_drawable = dri_drawable;
+ dri_drawable->private = nv;
+ }
+
+ return 0;
+}
+
+int
+nouveau_context_unbind(struct nouveau_context_vl *nv)
+{
+ assert(nv);
+
+ nv->dri_drawable = NULL;
+
+ return 0;
+}
+
+/* Show starts here */
+
+int bind_pipe_drawable(struct pipe_context *pipe, Drawable drawable)
+{
+ struct nouveau_context_vl *nv;
+ dri_drawable_t *dri_drawable;
+
+ assert(pipe);
+
+ nv = pipe->priv;
+
+ driCreateDrawable(nv->nv_screen->dri_screen, drawable, &dri_drawable);
+
+ nouveau_context_bind(nv, dri_drawable);
+
+ return 0;
+}
+
+int unbind_pipe_drawable(struct pipe_context *pipe)
+{
+ assert (pipe);
+
+ nouveau_context_unbind(pipe->priv);
+
+ return 0;
+}
+
+struct pipe_context* create_pipe_context(Display *display, int screen)
+{
+ dri_screen_t *dri_screen;
+ dri_framebuffer_t dri_framebuf;
+ dri_context_t *dri_context;
+ struct nouveau_context_vl *nv;
+
+ assert(display);
+
+ driCreateScreen(display, screen, &dri_screen, &dri_framebuf);
+ driCreateContext(dri_screen, XDefaultVisual(display, screen), &dri_context);
+
+ nouveau_screen_create(dri_screen, &dri_framebuf);
+ nouveau_context_create(dri_context);
+
+ nv = dri_context->private;
+
+ return nv->base.nvc->pctx[nv->base.pctx_id];
+}
+
+int destroy_pipe_context(struct pipe_context *pipe)
+{
+ struct pipe_screen *screen;
+ struct pipe_winsys *winsys;
+ struct nouveau_context_vl *nv;
+ dri_screen_t *dri_screen;
+ dri_context_t *dri_context;
+
+ assert(pipe);
+
+ screen = pipe->screen;
+ winsys = pipe->winsys;
+ nv = pipe->priv;
+ dri_context = nv->dri_context;
+ dri_screen = dri_context->dri_screen;
+
+ pipe->destroy(pipe);
+ screen->destroy(screen);
+ FREE(winsys);
+
+ nouveau_context_destroy(dri_context);
+ nouveau_screen_destroy(dri_screen);
+ driDestroyContext(dri_context);
+ driDestroyScreen(dri_screen);
+
+ return 0;
+}
diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.h
new file mode 100644
index 0000000000..1115c3130c
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.h
@@ -0,0 +1,39 @@
+#ifndef __NOUVEAU_CONTEXT_VL_H__
+#define __NOUVEAU_CONTEXT_VL_H__
+
+#include <driclient.h>
+#include <nouveau/nouveau_winsys.h>
+#include <common/nouveau_context.h>
+
+/*#include "xmlconfig.h"*/
+
+struct nouveau_context_vl {
+ struct nouveau_context base;
+ struct nouveau_screen_vl *nv_screen;
+ dri_context_t *dri_context;
+ dri_drawable_t *dri_drawable;
+ unsigned int last_stamp;
+ /*driOptionCache dri_option_cache;*/
+ drm_context_t drm_context;
+ drmLock drm_lock;
+};
+
+extern int nouveau_context_create(dri_context_t *);
+extern void nouveau_context_destroy(dri_context_t *);
+extern int nouveau_context_bind(struct nouveau_context_vl *, dri_drawable_t *);
+extern int nouveau_context_unbind(struct nouveau_context_vl *);
+
+#ifdef DEBUG
+extern int __nouveau_debug;
+
+#define DEBUG_BO (1 << 0)
+
+#define DBG(flag, ...) do { \
+ if (__nouveau_debug & (DEBUG_##flag)) \
+ NOUVEAU_ERR(__VA_ARGS__); \
+} while(0)
+#else
+#define DBG(flag, ...)
+#endif
+
+#endif
diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.c
new file mode 100644
index 0000000000..b7c74f8299
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.c
@@ -0,0 +1,88 @@
+#include "nouveau_screen_vl.h"
+#include <util/u_memory.h>
+#include <nouveau_drm.h>
+#include <common/nouveau_dri.h>
+#include <common/nouveau_local.h>
+
+#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 12
+#error nouveau_drm.h version does not match expected version
+#endif
+
+/*
+PUBLIC const char __driConfigOptions[] =
+DRI_CONF_BEGIN
+DRI_CONF_END;
+static const GLuint __driNConfigOptions = 0;
+*/
+
+int nouveau_check_dri_drm_ddx(dri_version_t *dri, dri_version_t *drm, dri_version_t *ddx)
+{
+ static const dri_version_t ddx_expected = {0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL};
+ static const dri_version_t dri_expected = {4, 0, 0};
+ static const dri_version_t drm_expected = {0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL};
+
+ assert(dri);
+ assert(drm);
+ assert(ddx);
+
+ if (dri->major != dri_expected.major || dri->minor < dri_expected.minor)
+ {
+ NOUVEAU_ERR("Unexpected DRI version.\n");
+ return 1;
+ }
+ if (drm->major != drm_expected.major || drm->minor < drm_expected.minor)
+ {
+ NOUVEAU_ERR("Unexpected DRM version.\n");
+ return 1;
+ }
+ if (ddx->major != ddx_expected.major || ddx->minor < ddx_expected.minor)
+ {
+ NOUVEAU_ERR("Unexpected DDX version.\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+nouveau_screen_create(dri_screen_t *dri_screen, dri_framebuffer_t *dri_framebuf)
+{
+ struct nouveau_dri *nv_dri = dri_framebuf->private;
+ struct nouveau_screen_vl *nv_screen;
+
+ assert(dri_screen);
+ assert(dri_framebuf);
+
+ if (nouveau_check_dri_drm_ddx(&dri_screen->dri, &dri_screen->drm, &dri_screen->ddx))
+ return 1;
+
+ nv_screen = CALLOC_STRUCT(nouveau_screen_vl);
+
+ if (!nv_screen)
+ return 1;
+
+ if (nouveau_screen_init(nv_dri, dri_screen->fd, &nv_screen->base))
+ {
+ FREE(nv_screen);
+ return 1;
+ }
+
+ /*
+ driParseOptionInfo(&nv_screen->option_cache,
+ __driConfigOptions, __driNConfigOptions);
+ */
+
+ nv_screen->dri_screen = dri_screen;
+ dri_screen->private = (void*)nv_screen;
+
+ return 0;
+}
+
+void
+nouveau_screen_destroy(dri_screen_t *dri_screen)
+{
+ struct nouveau_screen_vl *nv_screen = dri_screen->private;
+
+ nouveau_screen_cleanup(&nv_screen->base);
+ FREE(nv_screen);
+}
diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.h
new file mode 100644
index 0000000000..0c1ceca6de
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.h
@@ -0,0 +1,20 @@
+#ifndef __NOUVEAU_SCREEN_VL_H__
+#define __NOUVEAU_SCREEN_VL_H__
+
+#include <driclient.h>
+#include <common/nouveau_screen.h>
+
+/* TODO: Investigate using DRI options for interesting things */
+/*#include "xmlconfig.h"*/
+
+struct nouveau_screen_vl
+{
+ struct nouveau_screen base;
+ dri_screen_t *dri_screen;
+ /*driOptionCache option_cache;*/
+};
+
+int nouveau_screen_create(dri_screen_t *dri_screen, dri_framebuffer_t *dri_framebuf);
+void nouveau_screen_destroy(dri_screen_t *dri_screen);
+
+#endif
diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c
new file mode 100644
index 0000000000..864be37871
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c
@@ -0,0 +1,94 @@
+#include <driclient.h>
+#include <common/nouveau_local.h>
+#include <common/nouveau_screen.h>
+#include "nouveau_context_vl.h"
+#include "nouveau_swapbuffers.h"
+
+void
+nouveau_copy_buffer(dri_drawable_t *dri_drawable, struct pipe_surface *surf,
+ const drm_clip_rect_t *rect)
+{
+ struct nouveau_context_vl *nv = dri_drawable->private;
+ struct pipe_context *pipe = nv->base.nvc->pctx[nv->base.pctx_id];
+ drm_clip_rect_t *pbox;
+ int nbox, i;
+
+ LOCK_HARDWARE(&nv->base);
+ if (!dri_drawable->num_cliprects) {
+ UNLOCK_HARDWARE(&nv->base);
+ return;
+ }
+ pbox = dri_drawable->cliprects;
+ nbox = dri_drawable->num_cliprects;
+
+ for (i = 0; i < nbox; i++, pbox++) {
+ int sx, sy, dx, dy, w, h;
+
+ sx = pbox->x1 - dri_drawable->x;
+ sy = pbox->y1 - dri_drawable->y;
+ dx = pbox->x1;
+ dy = pbox->y1;
+ w = pbox->x2 - pbox->x1;
+ h = pbox->y2 - pbox->y1;
+
+ pipe->surface_copy(pipe, FALSE, nv->base.frontbuffer,
+ dx, dy, surf, sx, sy, w, h);
+ }
+
+ FIRE_RING(nv->base.nvc->channel);
+ UNLOCK_HARDWARE(&nv->base);
+}
+
+void
+nouveau_copy_sub_buffer(dri_drawable_t *dri_drawable, struct pipe_surface *surf, int x, int y, int w, int h)
+{
+ if (surf) {
+ drm_clip_rect_t rect;
+ rect.x1 = x;
+ rect.y1 = y;
+ rect.x2 = x + w;
+ rect.y2 = y + h;
+
+ nouveau_copy_buffer(dri_drawable, surf, &rect);
+ }
+}
+
+void
+nouveau_swap_buffers(dri_drawable_t *dri_drawable, struct pipe_surface *surf)
+{
+ if (surf)
+ nouveau_copy_buffer(dri_drawable, surf, NULL);
+}
+
+void
+nouveau_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surf,
+ void *context_private)
+{
+ struct nouveau_context_vl *nv;
+ dri_drawable_t *dri_drawable;
+
+ assert(pws);
+ assert(surf);
+ assert(context_private);
+
+ nv = context_private;
+ dri_drawable = nv->dri_drawable;
+
+ nouveau_copy_buffer(dri_drawable, surf, NULL);
+}
+
+void
+nouveau_contended_lock(struct nouveau_context *nv)
+{
+ struct nouveau_context_vl *nv_vl = (struct nouveau_context_vl*)nv;
+ dri_drawable_t *dri_drawable = nv_vl->dri_drawable;
+ dri_screen_t *dri_screen = nv_vl->dri_context->dri_screen;
+
+ /* If the window moved, may need to set a new cliprect now.
+ *
+ * NOTE: This releases and regains the hw lock, so all state
+ * checking must be done *after* this call:
+ */
+ if (dri_drawable)
+ DRI_VALIDATE_DRAWABLE_INFO(dri_screen, dri_drawable);
+}
diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.h
new file mode 100644
index 0000000000..35e934adba
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.h
@@ -0,0 +1,10 @@
+#ifndef __NOUVEAU_SWAPBUFFERS_H__
+#define __NOUVEAU_SWAPBUFFERS_H__
+
+extern void nouveau_copy_buffer(dri_drawable_t *, struct pipe_surface *,
+ const drm_clip_rect_t *);
+extern void nouveau_copy_sub_buffer(dri_drawable_t *, struct pipe_surface *,
+ int x, int y, int w, int h);
+extern void nouveau_swap_buffers(dri_drawable_t *, struct pipe_surface *);
+
+#endif
diff --git a/src/gallium/winsys/g3dvl/vl_winsys.h b/src/gallium/winsys/g3dvl/vl_winsys.h
new file mode 100644
index 0000000000..c83db28dd9
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/vl_winsys.h
@@ -0,0 +1,14 @@
+#ifndef vl_winsys_h
+#define vl_winsys_h
+
+#include <X11/Xlib.h>
+
+struct pipe_context;
+
+struct pipe_context* create_pipe_context(Display *display, int screen);
+int destroy_pipe_context(struct pipe_context *pipe);
+int bind_pipe_drawable(struct pipe_context *pipe, Drawable drawable);
+int unbind_pipe_drawable(struct pipe_context *pipe);
+
+#endif
+
diff --git a/src/gallium/winsys/g3dvl/xsp_winsys.c b/src/gallium/winsys/g3dvl/xsp_winsys.c
new file mode 100644
index 0000000000..40d683234f
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/xsp_winsys.c
@@ -0,0 +1,295 @@
+#include "vl_winsys.h"
+#include <X11/Xutil.h>
+#include <pipe/p_winsys.h>
+#include <pipe/p_state.h>
+#include <pipe/p_inlines.h>
+#include <util/u_memory.h>
+#include <softpipe/sp_winsys.h>
+
+/* pipe_winsys implementation */
+
+struct xsp_pipe_winsys
+{
+ struct pipe_winsys base;
+ XImage fbimage;
+};
+
+struct xsp_context
+{
+ Display *display;
+ int screen;
+ Drawable drawable;
+ int drawable_bound;
+};
+
+struct xsp_buffer
+{
+ struct pipe_buffer base;
+ boolean is_user_buffer;
+ void *data;
+ void *mapped_data;
+};
+
+static struct pipe_buffer* xsp_buffer_create(struct pipe_winsys *pws, unsigned alignment, unsigned usage, unsigned size)
+{
+ struct xsp_buffer *buffer;
+
+ assert(pws);
+
+ buffer = calloc(1, sizeof(struct xsp_buffer));
+ buffer->base.refcount = 1;
+ buffer->base.alignment = alignment;
+ buffer->base.usage = usage;
+ buffer->base.size = size;
+ buffer->data = align_malloc(size, alignment);
+
+ return (struct pipe_buffer*)buffer;
+}
+
+static struct pipe_buffer* xsp_user_buffer_create(struct pipe_winsys *pws, void *data, unsigned size)
+{
+ struct xsp_buffer *buffer;
+
+ assert(pws);
+
+ buffer = calloc(1, sizeof(struct xsp_buffer));
+ buffer->base.refcount = 1;
+ buffer->base.size = size;
+ buffer->is_user_buffer = TRUE;
+ buffer->data = data;
+
+ return (struct pipe_buffer*)buffer;
+}
+
+static void* xsp_buffer_map(struct pipe_winsys *pws, struct pipe_buffer *buffer, unsigned flags)
+{
+ struct xsp_buffer *xsp_buf = (struct xsp_buffer*)buffer;
+
+ assert(pws);
+ assert(buffer);
+
+ xsp_buf->mapped_data = xsp_buf->data;
+
+ return xsp_buf->mapped_data;
+}
+
+static void xsp_buffer_unmap(struct pipe_winsys *pws, struct pipe_buffer *buffer)
+{
+ struct xsp_buffer *xsp_buf = (struct xsp_buffer*)buffer;
+
+ assert(pws);
+ assert(buffer);
+
+ xsp_buf->mapped_data = NULL;
+}
+
+static void xsp_buffer_destroy(struct pipe_winsys *pws, struct pipe_buffer *buffer)
+{
+ struct xsp_buffer *xsp_buf = (struct xsp_buffer*)buffer;
+
+ assert(pws);
+ assert(buffer);
+
+ if (!xsp_buf->is_user_buffer)
+ align_free(xsp_buf->data);
+
+ free(xsp_buf);
+}
+
+/* Borrowed from Mesa's xm_winsys */
+static unsigned int round_up(unsigned n, unsigned multiple)
+{
+ return (n + multiple - 1) & ~(multiple - 1);
+}
+
+static struct pipe_buffer* xsp_surface_buffer_create
+(
+ struct pipe_winsys *pws,
+ unsigned width,
+ unsigned height,
+ enum pipe_format format,
+ unsigned usage,
+ unsigned *stride
+)
+{
+ const unsigned int ALIGNMENT = 1;
+ struct pipe_format_block block;
+ unsigned nblocksx, nblocksy;
+
+ pf_get_block(format, &block);
+ nblocksx = pf_get_nblocksx(&block, width);
+ nblocksy = pf_get_nblocksy(&block, height);
+ *stride = round_up(nblocksx * block.size, ALIGNMENT);
+
+ return winsys->buffer_create(winsys, ALIGNMENT,
+ usage,
+ *stride * nblocksy);
+}
+
+static void xsp_fence_reference(struct pipe_winsys *pws, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence)
+{
+ assert(pws);
+ assert(ptr);
+ assert(fence);
+}
+
+static int xsp_fence_signalled(struct pipe_winsys *pws, struct pipe_fence_handle *fence, unsigned flag)
+{
+ assert(pws);
+ assert(fence);
+
+ return 0;
+}
+
+static int xsp_fence_finish(struct pipe_winsys *pws, struct pipe_fence_handle *fence, unsigned flag)
+{
+ assert(pws);
+ assert(fence);
+
+ return 0;
+}
+
+static void xsp_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surface, void *context_private)
+{
+ struct xsp_pipe_winsys *xsp_winsys;
+ struct xsp_context *xsp_context;
+
+ assert(pws);
+ assert(surface);
+ assert(context_private);
+
+ xsp_winsys = (struct xsp_pipe_winsys*)pws;
+ xsp_context = (struct xsp_context*)context_private;
+
+ if (!xsp_context->drawable_bound)
+ return;
+
+ xsp_winsys->fbimage.width = surface->width;
+ xsp_winsys->fbimage.height = surface->height;
+ xsp_winsys->fbimage.bytes_per_line = surface->width * (xsp_winsys->fbimage.bits_per_pixel >> 3);
+ xsp_winsys->fbimage.data = pipe_surface_map(surface, 0);
+
+ XPutImage
+ (
+ xsp_context->display,
+ xsp_context->drawable,
+ XDefaultGC(xsp_context->display, xsp_context->screen),
+ &xsp_winsys->fbimage,
+ 0,
+ 0,
+ 0,
+ 0,
+ surface->width,
+ surface->height
+ );
+ XFlush(xsp_context->display);
+ pipe_surface_unmap(surface);
+}
+
+static const char* xsp_get_name(struct pipe_winsys *pws)
+{
+ assert(pws);
+ return "X11 SoftPipe";
+}
+
+/* Show starts here */
+
+int bind_pipe_drawable(struct pipe_context *pipe, Drawable drawable)
+{
+ struct xsp_context *xsp_context;
+
+ assert(pipe);
+
+ xsp_context = pipe->priv;
+ xsp_context->drawable = drawable;
+ xsp_context->drawable_bound = 1;
+
+ return 0;
+}
+
+int unbind_pipe_drawable(struct pipe_context *pipe)
+{
+ struct xsp_context *xsp_context;
+
+ assert(pipe);
+
+ xsp_context = pipe->priv;
+ xsp_context->drawable_bound = 0;
+
+ return 0;
+}
+
+struct pipe_context* create_pipe_context(Display *display, int screen)
+{
+ struct xsp_pipe_winsys *xsp_winsys;
+ struct xsp_context *xsp_context;
+ struct pipe_screen *sp_screen;
+ struct pipe_context *sp_pipe;
+
+ assert(display);
+
+ xsp_winsys = calloc(1, sizeof(struct xsp_pipe_winsys));
+ xsp_winsys->base.buffer_create = xsp_buffer_create;
+ xsp_winsys->base.user_buffer_create = xsp_user_buffer_create;
+ xsp_winsys->base.buffer_map = xsp_buffer_map;
+ xsp_winsys->base.buffer_unmap = xsp_buffer_unmap;
+ xsp_winsys->base.buffer_destroy = xsp_buffer_destroy;
+ xsp_winsys->base.surface_buffer_create = xsp_surface_buffer_create;
+ xsp_winsys->base.fence_reference = xsp_fence_reference;
+ xsp_winsys->base.fence_signalled = xsp_fence_signalled;
+ xsp_winsys->base.fence_finish = xsp_fence_finish;
+ xsp_winsys->base.flush_frontbuffer = xsp_flush_frontbuffer;
+ xsp_winsys->base.get_name = xsp_get_name;
+
+ {
+ /* XXX: Can't use the returned XImage* directly,
+ since we don't have control over winsys destruction
+ and we wouldn't be able to free it */
+ XImage *template = XCreateImage
+ (
+ display,
+ XDefaultVisual(display, XDefaultScreen(display)),
+ XDefaultDepth(display, XDefaultScreen(display)),
+ ZPixmap,
+ 0,
+ NULL,
+ 0, /* Don't know the width and height until flush_frontbuffer */
+ 0,
+ 32,
+ 0
+ );
+
+ memcpy(&xsp_winsys->fbimage, template, sizeof(XImage));
+ XInitImage(&xsp_winsys->fbimage);
+
+ XDestroyImage(template);
+ }
+
+ sp_screen = softpipe_create_screen((struct pipe_winsys*)xsp_winsys);
+ sp_pipe = softpipe_create(sp_screen, (struct pipe_winsys*)xsp_winsys, NULL);
+
+ xsp_context = calloc(1, sizeof(struct xsp_context));
+ xsp_context->display = display;
+ xsp_context->screen = screen;
+
+ sp_pipe->priv = xsp_context;
+
+ return sp_pipe;
+}
+
+int destroy_pipe_context(struct pipe_context *pipe)
+{
+ struct pipe_screen *screen;
+ struct pipe_winsys *winsys;
+
+ assert(pipe);
+
+ screen = pipe->screen;
+ winsys = pipe->winsys;
+ free(pipe->priv);
+ pipe->destroy(pipe);
+ screen->destroy(screen);
+ free(winsys);
+
+ return 0;
+}
diff --git a/src/gallium/winsys/gdi/SConscript b/src/gallium/winsys/gdi/SConscript
index b463fa6505..72b5df8ca2 100644
--- a/src/gallium/winsys/gdi/SConscript
+++ b/src/gallium/winsys/gdi/SConscript
@@ -8,26 +8,28 @@ if env['platform'] == 'windows':
env = env.Clone()
env.Append(CPPPATH = [
- '#src/mesa/state_tracker/wgl',
+ '#src/gallium/state_trackers/wgl',
])
- env.Append(CPPDEFINES = [
+ env.Append(LIBS = [
+ 'gdi32',
+ 'user32',
+ 'kernel32',
])
sources = [
- '#src/mesa/state_tracker/wgl/opengl32.def',
'gdi_softpipe_winsys.c',
]
+
+ if env['toolchain'] == 'crossmingw':
+ sources += ['#src/gallium/state_trackers/wgl/opengl32.mingw.def']
+ else:
+ sources += ['#src/gallium/state_trackers/wgl/opengl32.def']
drivers = [
softpipe,
]
- env.Append(LIBS = [
- 'gdi32',
- 'user32'
- ])
-
env.SharedLibrary(
target ='opengl32',
source = sources,
diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
index e981b4c5cd..2d961f7087 100644
--- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
+++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
@@ -38,14 +38,15 @@
#include <windows.h>
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_format.h"
#include "pipe/p_context.h"
#include "pipe/p_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "softpipe/sp_winsys.h"
-#include "stw_winsys.h"
+#include "softpipe/sp_texture.h"
+#include "shared/stw_winsys.h"
struct gdi_softpipe_buffer
@@ -161,63 +162,25 @@ round_up(unsigned n, unsigned multiple)
}
-static int
-gdi_softpipe_surface_alloc_storage(struct pipe_winsys *winsys,
- struct pipe_surface *surf,
+static struct pipe_buffer *
+gdi_softpipe_surface_buffer_create(struct pipe_winsys *winsys,
unsigned width, unsigned height,
enum pipe_format format,
- unsigned flags,
- unsigned tex_usage)
+ unsigned usage,
+ unsigned *stride)
{
const unsigned alignment = 64;
+ struct pipe_format_block block;
+ unsigned nblocksx, nblocksy;
- surf->width = width;
- surf->height = height;
- surf->format = format;
- pf_get_block(format, &surf->block);
- surf->nblocksx = pf_get_nblocksx(&surf->block, width);
- surf->nblocksy = pf_get_nblocksy(&surf->block, height);
- surf->stride = round_up(surf->nblocksx * surf->block.size, alignment);
- surf->usage = flags;
-
- assert(!surf->buffer);
- surf->buffer = winsys->buffer_create(winsys, alignment,
- PIPE_BUFFER_USAGE_PIXEL,
- surf->stride * surf->nblocksy);
- if(!surf->buffer)
- return -1;
+ pf_get_block(format, &block);
+ nblocksx = pf_get_nblocksx(&block, width);
+ nblocksy = pf_get_nblocksy(&block, height);
+ *stride = round_up(nblocksx * block.size, alignment);
- return 0;
-}
-
-
-static struct pipe_surface *
-gdi_softpipe_surface_alloc(struct pipe_winsys *winsys)
-{
- struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface);
-
- assert(winsys);
-
- surface->refcount = 1;
- surface->winsys = winsys;
-
- return surface;
-}
-
-
-static void
-gdi_softpipe_surface_release(struct pipe_winsys *winsys,
- struct pipe_surface **s)
-{
- struct pipe_surface *surf = *s;
- assert(!surf->texture);
- surf->refcount--;
- if (surf->refcount == 0) {
- if (surf->buffer)
- winsys_buffer_reference(winsys, &surf->buffer, NULL);
- free(surf);
- }
- *s = NULL;
+ return winsys->buffer_create(winsys, alignment,
+ usage,
+ *stride * nblocksy);
}
@@ -281,9 +244,7 @@ gdi_softpipe_screen_create(void)
winsys->buffer_unmap = gdi_softpipe_buffer_unmap;
winsys->buffer_destroy = gdi_softpipe_buffer_destroy;
- winsys->surface_alloc = gdi_softpipe_surface_alloc;
- winsys->surface_alloc_storage = gdi_softpipe_surface_alloc_storage;
- winsys->surface_release = gdi_softpipe_surface_release;
+ winsys->surface_buffer_create = gdi_softpipe_surface_buffer_create;
winsys->fence_reference = gdi_softpipe_fence_reference;
winsys->fence_signalled = gdi_softpipe_fence_signalled;
@@ -308,18 +269,21 @@ gdi_softpipe_context_create(struct pipe_screen *screen)
static void
-gdi_softpipe_flush_frontbuffer(struct pipe_winsys *winsys,
+gdi_softpipe_flush_frontbuffer(struct pipe_screen *screen,
struct pipe_surface *surface,
HDC hDC)
{
+ struct softpipe_texture *texture;
struct gdi_softpipe_buffer *buffer;
BITMAPINFO bmi;
- buffer = gdi_softpipe_buffer(surface->buffer);
+ texture = softpipe_texture(surface->texture);
+
+ buffer = gdi_softpipe_buffer(texture->buffer);
memset(&bmi, 0, sizeof(BITMAPINFO));
bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
- bmi.bmiHeader.biWidth = surface->stride / pf_get_size(surface->format);
+ bmi.bmiHeader.biWidth = texture->stride[surface->level] / pf_get_size(surface->format);
bmi.bmiHeader.biHeight= -(long)surface->height;
bmi.bmiHeader.biPlanes = 1;
bmi.bmiHeader.biBitCount = pf_get_bits(surface->format);
@@ -356,4 +320,4 @@ DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved)
break;
}
return TRUE;
-} \ No newline at end of file
+}
diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile
index 11c7632411..bb187cc14a 100644
--- a/src/gallium/winsys/xlib/Makefile
+++ b/src/gallium/winsys/xlib/Makefile
@@ -19,16 +19,24 @@ INCLUDE_DIRS = \
-I$(TOP)/src/mesa/main \
-I$(TOP)/src/gallium/include \
-I$(TOP)/src/gallium/drivers \
+ -I$(TOP)/src/gallium/state_trackers/glx/xlib \
-I$(TOP)/src/gallium/auxiliary
+DEFINES += \
+ -DGALLIUM_SOFTPIPE \
+ -DGALLIUM_TRACE \
+ -DGALLIUM_BRW
+#-DGALLIUM_CELL will be defined by the config */
+
XLIB_WINSYS_SOURCES = \
- glxapi.c \
- fakeglx.c \
- xfonts.c \
- xm_api.c \
- xm_winsys.c \
- xm_winsys_aub.c \
- brw_aub.c
+ xlib.c \
+ xlib_cell.c \
+ xlib_brw_aub.c \
+ xlib_brw_context.c \
+ xlib_brw_screen.c \
+ xlib_softpipe.c \
+ xlib_trace.c
+
XLIB_WINSYS_OBJECTS = $(XLIB_WINSYS_SOURCES:.c=.o)
@@ -37,8 +45,9 @@ XLIB_WINSYS_OBJECTS = $(XLIB_WINSYS_SOURCES:.c=.o)
LIBS = \
$(GALLIUM_DRIVERS) \
+ $(TOP)/src/gallium/state_trackers/glx/xlib/libxlib.a \
$(TOP)/src/mesa/libglapi.a \
- $(TOP)/src/mesa/libmesa.a \
+ $(TOP)/src/mesa/libmesagallium.a \
$(GALLIUM_AUXILIARIES) \
$(CELL_SPU_LIB) \
@@ -46,31 +55,33 @@ LIBS = \
.SUFFIXES : .cpp
.c.o:
- $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
+ $(CC) -c $(INCLUDE_DIRS) $(DEFINES) $(CFLAGS) $< -o $@
.cpp.o:
- $(CXX) -c $(INCLUDE_DIRS) $(CXXFLAGS) $< -o $@
+ $(CXX) -c $(INCLUDE_DIRS) $(DEFINES) $(CXXFLAGS) $< -o $@
-default: $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME)
+default: $(TOP)/$(LIB_DIR)/gallium $(TOP)/$(LIB_DIR)/gallium/$(GL_LIB_NAME)
+$(TOP)/$(LIB_DIR)/gallium:
+ @ mkdir -p $(TOP)/$(LIB_DIR)/gallium
# Make the libGL.so library
-$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(XLIB_WINSYS_OBJECTS) $(LIBS)
+$(TOP)/$(LIB_DIR)/gallium/$(GL_LIB_NAME): $(XLIB_WINSYS_OBJECTS) $(LIBS) Makefile
$(TOP)/bin/mklib -o $(GL_LIB) \
-linker "$(CC)" \
-major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \
- -install $(TOP)/$(LIB_DIR) \
+ -install $(TOP)/$(LIB_DIR)/gallium \
$(MKLIB_OPTIONS) $(XLIB_WINSYS_OBJECTS) \
--start-group $(LIBS) --end-group $(GL_LIB_DEPS)
-depend: $(ALL_SOURCES)
+depend: $(XLIB_WINSYS_SOURCES)
@ echo "running $(MKDEP)"
@ rm -f depend # workaround oops on gutsy?!?
@ touch depend
- @ $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(ALL_SOURCES) \
+ $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(XLIB_WINSYS_SOURCES) \
> /dev/null 2>/dev/null
diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript
index 324fbef306..0fb4b50f63 100644
--- a/src/gallium/winsys/xlib/SConscript
+++ b/src/gallium/winsys/xlib/SConscript
@@ -5,8 +5,7 @@ Import('*')
if env['platform'] == 'linux' \
and 'mesa' in env['statetrackers'] \
- and 'softpipe' in env['drivers'] \
- and 'i965simple' in env['drivers'] \
+ and ('softpipe' or 'i915simple' or 'trace') in env['drivers'] \
and not env['dri']:
env = env.Clone()
@@ -14,32 +13,46 @@ if env['platform'] == 'linux' \
env.Append(CPPPATH = [
'#/src/mesa',
'#/src/mesa/main',
+ '#src/gallium/state_trackers/glx/xlib',
])
+ env.Append(CPPDEFINES = ['USE_XSHM'])
+
sources = [
- 'glxapi.c',
- 'fakeglx.c',
- 'xfonts.c',
- 'xm_api.c',
- 'xm_winsys.c',
- 'xm_winsys_aub.c',
- 'brw_aub.c',
+ 'xlib.c',
]
+
+ drivers = []
- drivers = [
- softpipe,
- i965simple,
- ]
-
+ if 'softpipe' in env['drivers']:
+ env.Append(CPPDEFINES = 'GALLIUM_SOFTPIPE')
+ sources += ['xlib_softpipe.c']
+ drivers += [softpipe]
+
+ if 'i965simple' in env['drivers']:
+ env.Append(CPPDEFINES = 'GALLIUM_I965SIMPLE')
+ sources += [
+ 'xlib_brw_aub.c',
+ 'xlib_brw_context.c',
+ 'xlib_brw_screen.c',
+ ]
+ drivers += [i965simple]
+
+ if 'cell' in env['drivers']:
+ env.Append(CPPDEFINES = 'GALLIUM_CELL')
+ sources += ['xlib_cell.c']
+ drivers += [cell]
+
if 'trace' in env['drivers']:
env.Append(CPPDEFINES = 'GALLIUM_TRACE')
+ sources += ['xlib_trace.c']
drivers += [trace]
# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions
libgl = env.SharedLibrary(
target ='GL',
source = sources,
- LIBS = glapi + mesa + drivers + auxiliaries + env['LIBS'],
+ LIBS = st_xlib + glapi + mesa + drivers + auxiliaries + env['LIBS'],
)
env.InstallSharedLibrary(libgl, version=(1, 5))
diff --git a/src/gallium/winsys/xlib/glxheader.h b/src/gallium/winsys/xlib/glxheader.h
deleted file mode 100644
index a402191f13..0000000000
--- a/src/gallium/winsys/xlib/glxheader.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Mesa 3-D graphics library
- * Version: 6.5.1
- *
- * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#ifndef GLX_HEADER_H
-#define GLX_HEADER_H
-
-#ifdef __VMS
-#include <GL/vms_x_fix.h>
-#endif
-
-#include "glheader.h"
-
-#ifdef XFree86Server
-
-# include "resource.h"
-# include "windowstr.h"
-
-#else
-
-# include <X11/Xlib.h>
-# include <X11/Xlibint.h>
-# include <X11/Xutil.h>
-# ifdef USE_XSHM /* was SHM */
-# include <sys/ipc.h>
-# include <sys/shm.h>
-# include <X11/extensions/XShm.h>
-# endif
-# include <GL/glx.h>
-# include <sys/time.h>
-
-#endif
-
-
-
-/* this silences a compiler warning on several systems */
-struct timespec;
-struct itimerspec;
-
-
-#endif /*GLX_HEADER*/
diff --git a/src/gallium/winsys/xlib/realglx.c b/src/gallium/winsys/xlib/realglx.c
deleted file mode 100644
index 30adb7465b..0000000000
--- a/src/gallium/winsys/xlib/realglx.c
+++ /dev/null
@@ -1,180 +0,0 @@
-
-/*
- * Mesa 3-D graphics library
- * Version: 5.1
- *
- * Copyright (C) 1999-2002 Brian Paul All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#include <assert.h>
-#include <GL/glx.h>
-#include "realglx.h"
-#include "glxapi.h"
-
-
-struct _glxapi_table *
-_real_GetGLXDispatchTable(void)
-{
- static struct _glxapi_table glx;
-
- /* be sure our dispatch table size <= libGL's table */
- {
- GLuint size = sizeof(struct _glxapi_table) / sizeof(void *);
- (void) size;
- assert(_glxapi_get_dispatch_table_size() >= size);
- }
-
- /* initialize the whole table to no-ops */
- _glxapi_set_no_op_table(&glx);
-
- /* now initialize the table with the functions I implement */
-
- /*** GLX_VERSION_1_0 ***/
- glx.ChooseVisual = _real_glXChooseVisual;
- glx.CopyContext = _real_glXCopyContext;
- glx.CreateContext = _real_glXCreateContext;
- glx.CreateGLXPixmap = _real_glXCreateGLXPixmap;
- glx.DestroyContext = _real_glXDestroyContext;
- glx.DestroyGLXPixmap = _real_glXDestroyGLXPixmap;
- glx.GetConfig = _real_glXGetConfig;
- /*glx.GetCurrentContext = _real_glXGetCurrentContext;*/
- /*glx.GetCurrentDrawable = _real_glXGetCurrentDrawable;*/
- glx.IsDirect = _real_glXIsDirect;
- glx.MakeCurrent = _real_glXMakeCurrent;
- glx.QueryExtension = _real_glXQueryExtension;
- glx.QueryVersion = _real_glXQueryVersion;
- glx.SwapBuffers = _real_glXSwapBuffers;
- glx.UseXFont = _real_glXUseXFont;
- glx.WaitGL = _real_glXWaitGL;
- glx.WaitX = _real_glXWaitX;
-
- /*** GLX_VERSION_1_1 ***/
- glx.GetClientString = _real_glXGetClientString;
- glx.QueryExtensionsString = _real_glXQueryExtensionsString;
- glx.QueryServerString = _real_glXQueryServerString;
-
- /*** GLX_VERSION_1_2 ***/
- /*glx.GetCurrentDisplay = _real_glXGetCurrentDisplay;*/
-
- /*** GLX_VERSION_1_3 ***/
- glx.ChooseFBConfig = _real_glXChooseFBConfig;
- glx.CreateNewContext = _real_glXCreateNewContext;
- glx.CreatePbuffer = _real_glXCreatePbuffer;
- glx.CreatePixmap = _real_glXCreatePixmap;
- glx.CreateWindow = _real_glXCreateWindow;
- glx.DestroyPbuffer = _real_glXDestroyPbuffer;
- glx.DestroyPixmap = _real_glXDestroyPixmap;
- glx.DestroyWindow = _real_glXDestroyWindow;
- /*glx.GetCurrentReadDrawable = _real_glXGetCurrentReadDrawable;*/
- glx.GetFBConfigAttrib = _real_glXGetFBConfigAttrib;
- glx.GetFBConfigs = _real_glXGetFBConfigs;
- glx.GetSelectedEvent = _real_glXGetSelectedEvent;
- glx.GetVisualFromFBConfig = _real_glXGetVisualFromFBConfig;
- glx.MakeContextCurrent = _real_glXMakeContextCurrent;
- glx.QueryContext = _real_glXQueryContext;
- glx.QueryDrawable = _real_glXQueryDrawable;
- glx.SelectEvent = _real_glXSelectEvent;
-
- /*** GLX_SGI_swap_control ***/
- glx.SwapIntervalSGI = _real_glXSwapIntervalSGI;
-
- /*** GLX_SGI_video_sync ***/
- glx.GetVideoSyncSGI = _real_glXGetVideoSyncSGI;
- glx.WaitVideoSyncSGI = _real_glXWaitVideoSyncSGI;
-
- /*** GLX_SGI_make_current_read ***/
- glx.MakeCurrentReadSGI = _real_glXMakeCurrentReadSGI;
- /*glx.GetCurrentReadDrawableSGI = _real_glXGetCurrentReadDrawableSGI;*/
-
-#if defined(_VL_H)
- /*** GLX_SGIX_video_source ***/
- glx.CreateGLXVideoSourceSGIX = _real_glXCreateGLXVideoSourceSGIX;
- glx.DestroyGLXVideoSourceSGIX = _real_glXDestroyGLXVideoSourceSGIX;
-#endif
-
- /*** GLX_EXT_import_context ***/
- glx.FreeContextEXT = _real_glXFreeContextEXT;
- /*glx.GetContextIDEXT = _real_glXGetContextIDEXT;*/
- /*glx.GetCurrentDisplayEXT = _real_glXGetCurrentDisplayEXT;*/
- glx.ImportContextEXT = _real_glXImportContextEXT;
- glx.QueryContextInfoEXT = _real_glXQueryContextInfoEXT;
-
- /*** GLX_SGIX_fbconfig ***/
- glx.GetFBConfigAttribSGIX = _real_glXGetFBConfigAttribSGIX;
- glx.ChooseFBConfigSGIX = _real_glXChooseFBConfigSGIX;
- glx.CreateGLXPixmapWithConfigSGIX = _real_glXCreateGLXPixmapWithConfigSGIX;
- glx.CreateContextWithConfigSGIX = _real_glXCreateContextWithConfigSGIX;
- glx.GetVisualFromFBConfigSGIX = _real_glXGetVisualFromFBConfigSGIX;
- glx.GetFBConfigFromVisualSGIX = _real_glXGetFBConfigFromVisualSGIX;
-
- /*** GLX_SGIX_pbuffer ***/
- glx.CreateGLXPbufferSGIX = _real_glXCreateGLXPbufferSGIX;
- glx.DestroyGLXPbufferSGIX = _real_glXDestroyGLXPbufferSGIX;
- glx.QueryGLXPbufferSGIX = _real_glXQueryGLXPbufferSGIX;
- glx.SelectEventSGIX = _real_glXSelectEventSGIX;
- glx.GetSelectedEventSGIX = _real_glXGetSelectedEventSGIX;
-
- /*** GLX_SGI_cushion ***/
- glx.CushionSGI = _real_glXCushionSGI;
-
- /*** GLX_SGIX_video_resize ***/
- glx.BindChannelToWindowSGIX = _real_glXBindChannelToWindowSGIX;
- glx.ChannelRectSGIX = _real_glXChannelRectSGIX;
- glx.QueryChannelRectSGIX = _real_glXQueryChannelRectSGIX;
- glx.QueryChannelDeltasSGIX = _real_glXQueryChannelDeltasSGIX;
- glx.ChannelRectSyncSGIX = _real_glXChannelRectSyncSGIX;
-
-#if defined(_DM_BUFFER_H_)
- /*** (GLX_SGIX_dmbuffer ***/
- glx.AssociateDMPbufferSGIX = NULL;
-#endif
-
- /*** GLX_SGIX_swap_group ***/
- glx.JoinSwapGroupSGIX = _real_glXJoinSwapGroupSGIX;
-
- /*** GLX_SGIX_swap_barrier ***/
- glx.BindSwapBarrierSGIX = _real_glXBindSwapBarrierSGIX;
- glx.QueryMaxSwapBarriersSGIX = _real_glXQueryMaxSwapBarriersSGIX;
-
- /*** GLX_SUN_get_transparent_index ***/
- glx.GetTransparentIndexSUN = _real_glXGetTransparentIndexSUN;
-
- /*** GLX_MESA_copy_sub_buffer ***/
- glx.CopySubBufferMESA = _real_glXCopySubBufferMESA;
-
- /*** GLX_MESA_release_buffers ***/
- glx.ReleaseBuffersMESA = _real_glXReleaseBuffersMESA;
-
- /*** GLX_MESA_pixmap_colormap ***/
- glx.CreateGLXPixmapMESA = _real_glXCreateGLXPixmapMESA;
-
- /*** GLX_MESA_set_3dfx_mode ***/
- glx.Set3DfxModeMESA = _real_glXSet3DfxModeMESA;
-
- /*** GLX_NV_vertex_array_range ***/
- glx.AllocateMemoryNV = _real_glXAllocateMemoryNV;
- glx.FreeMemoryNV = _real_glXFreeMemoryNV;
-
- /*** GLX_MESA_agp_offset ***/
- glx.GetAGPOffsetMESA = _real_glXGetAGPOffsetMESA;
-
- return &glx;
-}
diff --git a/src/gallium/winsys/xlib/realglx.h b/src/gallium/winsys/xlib/realglx.h
deleted file mode 100644
index 150129db68..0000000000
--- a/src/gallium/winsys/xlib/realglx.h
+++ /dev/null
@@ -1,326 +0,0 @@
-
-/*
- * Mesa 3-D graphics library
- * Version: 3.5
- *
- * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#ifndef REALGLX_H
-#define REALGLX_H
-
-
-extern struct _glxapi_table *
-_real_GetGLXDispatchTable(void);
-
-
-/*
- * Basically just need these to prevent compiler warnings.
- */
-
-
-extern XVisualInfo *
-_real_glXChooseVisual( Display *dpy, int screen, int *list );
-
-extern GLXContext
-_real_glXCreateContext( Display *dpy, XVisualInfo *visinfo,
- GLXContext share_list, Bool direct );
-
-extern GLXPixmap
-_real_glXCreateGLXPixmap( Display *dpy, XVisualInfo *visinfo, Pixmap pixmap );
-
-extern GLXPixmap
-_real_glXCreateGLXPixmapMESA( Display *dpy, XVisualInfo *visinfo,
- Pixmap pixmap, Colormap cmap );
-
-extern void
-_real_glXDestroyGLXPixmap( Display *dpy, GLXPixmap pixmap );
-
-extern void
-_real_glXCopyContext( Display *dpy, GLXContext src, GLXContext dst,
- unsigned long mask );
-
-extern Bool
-_real_glXMakeCurrent( Display *dpy, GLXDrawable drawable, GLXContext ctx );
-
-extern Bool
-_real_glXQueryExtension( Display *dpy, int *errorb, int *event );
-
-extern void
-_real_glXDestroyContext( Display *dpy, GLXContext ctx );
-
-extern Bool
-_real_glXIsDirect( Display *dpy, GLXContext ctx );
-
-extern void
-_real_glXSwapBuffers( Display *dpy, GLXDrawable drawable );
-
-extern void
-_real_glXUseXFont( Font font, int first, int count, int listbase );
-
-extern Bool
-_real_glXQueryVersion( Display *dpy, int *maj, int *min );
-
-extern int
-_real_glXGetConfig( Display *dpy, XVisualInfo *visinfo,
- int attrib, int *value );
-
-extern void
-_real_glXWaitGL( void );
-
-
-extern void
-_real_glXWaitX( void );
-
-/* GLX 1.1 and later */
-extern const char *
-_real_glXQueryExtensionsString( Display *dpy, int screen );
-
-/* GLX 1.1 and later */
-extern const char *
-_real_glXQueryServerString( Display *dpy, int screen, int name );
-
-/* GLX 1.1 and later */
-extern const char *
-_real_glXGetClientString( Display *dpy, int name );
-
-
-/*
- * GLX 1.3 and later
- */
-
-extern GLXFBConfig *
-_real_glXChooseFBConfig( Display *dpy, int screen,
- const int *attribList, int *nitems );
-
-extern int
-_real_glXGetFBConfigAttrib( Display *dpy, GLXFBConfig config,
- int attribute, int *value );
-
-extern GLXFBConfig *
-_real_glXGetFBConfigs( Display *dpy, int screen, int *nelements );
-
-extern XVisualInfo *
-_real_glXGetVisualFromFBConfig( Display *dpy, GLXFBConfig config );
-
-extern GLXWindow
-_real_glXCreateWindow( Display *dpy, GLXFBConfig config, Window win,
- const int *attribList );
-
-extern void
-_real_glXDestroyWindow( Display *dpy, GLXWindow window );
-
-extern GLXPixmap
-_real_glXCreatePixmap( Display *dpy, GLXFBConfig config, Pixmap pixmap,
- const int *attribList );
-
-extern void
-_real_glXDestroyPixmap( Display *dpy, GLXPixmap pixmap );
-
-extern GLXPbuffer
-_real_glXCreatePbuffer( Display *dpy, GLXFBConfig config,
- const int *attribList );
-
-extern void
-_real_glXDestroyPbuffer( Display *dpy, GLXPbuffer pbuf );
-
-extern void
-_real_glXQueryDrawable( Display *dpy, GLXDrawable draw, int attribute,
- unsigned int *value );
-
-extern GLXContext
-_real_glXCreateNewContext( Display *dpy, GLXFBConfig config,
- int renderType, GLXContext shareList, Bool direct );
-
-
-extern Bool
-_real_glXMakeContextCurrent( Display *dpy, GLXDrawable draw,
- GLXDrawable read, GLXContext ctx );
-
-extern int
-_real_glXQueryContext( Display *dpy, GLXContext ctx, int attribute, int *value );
-
-extern void
-_real_glXSelectEvent( Display *dpy, GLXDrawable drawable, unsigned long mask );
-
-extern void
-_real_glXGetSelectedEvent( Display *dpy, GLXDrawable drawable,
- unsigned long *mask );
-
-#ifdef GLX_SGI_swap_control
-extern int
-_real_glXSwapIntervalSGI(int interval);
-#endif
-
-
-#ifdef GLX_SGI_video_sync
-extern int
-_real_glXGetVideoSyncSGI(unsigned int *count);
-
-extern int
-_real_glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count);
-#endif
-
-
-#ifdef GLX_SGI_make_current_read
-extern Bool
-_real_glXMakeCurrentReadSGI(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx);
-
-extern GLXDrawable
-_real_glXGetCurrentReadDrawableSGI(void);
-#endif
-
-#if defined(_VL_H) && defined(GLX_SGIX_video_source)
-extern GLXVideoSourceSGIX
-_real_glXCreateGLXVideoSourceSGIX(Display *dpy, int screen, VLServer server, VLPath path, int nodeClass, VLNode drainNode);
-
-extern void
-_real_glXDestroyGLXVideoSourceSGIX(Display *dpy, GLXVideoSourceSGIX src);
-#endif
-
-#ifdef GLX_EXT_import_context
-extern void
-_real_glXFreeContextEXT(Display *dpy, GLXContext context);
-
-extern GLXContextID
-_real_glXGetContextIDEXT(const GLXContext context);
-
-extern Display *
-_real_glXGetCurrentDisplayEXT(void);
-
-extern GLXContext
-_real_glXImportContextEXT(Display *dpy, GLXContextID contextID);
-
-extern int
-_real_glXQueryContextInfoEXT(Display *dpy, GLXContext context, int attribute, int *value);
-#endif
-
-#ifdef GLX_SGIX_fbconfig
-extern int
-_real_glXGetFBConfigAttribSGIX(Display *dpy, GLXFBConfigSGIX config, int attribute, int *value);
-
-extern GLXFBConfigSGIX *
-_real_glXChooseFBConfigSGIX(Display *dpy, int screen, int *attrib_list, int *nelements);
-
-extern GLXPixmap
-_real_glXCreateGLXPixmapWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, Pixmap pixmap);
-
-extern GLXContext
-_real_glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, int render_type, GLXContext share_list, Bool direct);
-
-extern XVisualInfo *
-_real_glXGetVisualFromFBConfigSGIX(Display *dpy, GLXFBConfigSGIX config);
-
-extern GLXFBConfigSGIX
-_real_glXGetFBConfigFromVisualSGIX(Display *dpy, XVisualInfo *vis);
-#endif
-
-#ifdef GLX_SGIX_pbuffer
-extern GLXPbufferSGIX
-_real_glXCreateGLXPbufferSGIX(Display *dpy, GLXFBConfigSGIX config, unsigned int width, unsigned int height, int *attrib_list);
-
-extern void
-_real_glXDestroyGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf);
-
-extern int
-_real_glXQueryGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf, int attribute, unsigned int *value);
-
-extern void
-_real_glXSelectEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long mask);
-
-extern void
-_real_glXGetSelectedEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long *mask);
-#endif
-
-#ifdef GLX_SGI_cushion
-extern void
-_real_glXCushionSGI(Display *dpy, Window win, float cushion);
-#endif
-
-#ifdef GLX_SGIX_video_resize
-extern int
-_real_glXBindChannelToWindowSGIX(Display *dpy, int screen, int channel , Window window);
-
-extern int
-_real_glXChannelRectSGIX(Display *dpy, int screen, int channel, int x, int y, int w, int h);
-
-extern int
-_real_glXQueryChannelRectSGIX(Display *dpy, int screen, int channel, int *x, int *y, int *w, int *h);
-
-extern int
-_real_glXQueryChannelDeltasSGIX(Display *dpy, int screen, int channel, int *dx, int *dy, int *dw, int *dh);
-
-extern int
-_real_glXChannelRectSyncSGIX(Display *dpy, int screen, int channel, GLenum synctype);
-#endif
-
-#if defined(_DM_BUFFER_H_) && defined(GLX_SGIX_dmbuffer)
-extern Bool
-_real_glXAssociateDMPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuffer, DMparams *params, DMbuffer dmbuffer);
-#endif
-
-#ifdef GLX_SGIX_swap_group
-extern void
-_real_glXJoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable, GLXDrawable member);
-#endif
-
-#ifdef GLX_SGIX_swap_barrier
-extern void
-_real_glXBindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable, int barrier);
-
-extern Bool
-_real_glXQueryMaxSwapBarriersSGIX(Display *dpy, int screen, int *max);
-#endif
-
-#ifdef GLX_SUN_get_transparent_index
-extern Status
-_real_glXGetTransparentIndexSUN(Display *dpy, Window overlay, Window underlay, long *pTransparent);
-#endif
-
-#ifdef GLX_MESA_release_buffers
-extern Bool
-_real_glXReleaseBuffersMESA( Display *dpy, GLXDrawable d );
-#endif
-
-#ifdef GLX_MESA_set_3dfx_mode
-extern Bool
-_real_glXSet3DfxModeMESA( int mode );
-#endif
-
-#ifdef GLX_NV_vertex_array_range
-extern void *
-_real_glXAllocateMemoryNV(GLsizei size, GLfloat readfreq, GLfloat writefreq, GLfloat priority);
-extern void
-_real_glXFreeMemoryNV(GLvoid *pointer);
-#endif
-
-#ifdef GLX_MESA_agp_offset
-extern GLuint
-_real_glXGetAGPOffsetMESA(const GLvoid *pointer);
-#endif
-
-#ifdef GLX_MESA_copy_sub_buffer
-extern void
-_real_glXCopySubBufferMESA( Display *dpy, GLXDrawable drawable,
- int x, int y, int width, int height );
-#endif
-
-#endif /* REALGLX_H */
diff --git a/src/gallium/winsys/xlib/xlib.c b/src/gallium/winsys/xlib/xlib.c
new file mode 100644
index 0000000000..da72228215
--- /dev/null
+++ b/src/gallium/winsys/xlib/xlib.c
@@ -0,0 +1,113 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell
+ */
+
+#include "xlib.h"
+#include "xm_winsys.h"
+
+#include <stdlib.h>
+#include <assert.h>
+
+/* Todo, replace all this with callback-structs provided by the
+ * individual implementations.
+ */
+
+enum mode {
+ MODE_TRACE,
+ MODE_BRW,
+ MODE_CELL,
+ MODE_SOFTPIPE
+};
+
+
+static enum mode get_mode()
+{
+ if (getenv("XMESA_TRACE"))
+ return MODE_TRACE;
+
+ if (getenv("XMESA_BRW"))
+ return MODE_BRW;
+
+#ifdef GALLIUM_CELL
+ if (!getenv("GALLIUM_NOCELL"))
+ return MODE_CELL;
+#endif
+
+ return MODE_SOFTPIPE;
+}
+
+static void _init( void ) __attribute__((constructor));
+
+static void _init( void )
+{
+ enum mode xlib_mode = get_mode();
+
+ switch (xlib_mode) {
+ case MODE_TRACE:
+#if defined(GALLIUM_TRACE) && defined(GALLIUM_SOFTPIPE)
+ xmesa_set_driver( &xlib_trace_driver );
+#endif
+ break;
+ case MODE_BRW:
+#if defined(GALLIUM_BRW)
+ xmesa_set_driver( &xlib_brw_driver );
+#endif
+ break;
+ case MODE_CELL:
+#if defined(GALLIUM_CELL)
+ xmesa_set_driver( &xlib_cell_driver );
+#endif
+ break;
+ case MODE_SOFTPIPE:
+#if defined(GALLIUM_SOFTPIPE)
+ xmesa_set_driver( &xlib_softpipe_driver );
+#endif
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+
+/***********************************************************************
+ *
+ * Butt-ugly hack to convince the linker not to throw away public GL
+ * symbols (they are all referenced from getprocaddress, I guess).
+ */
+extern void (*linker_foo(const unsigned char *procName))();
+extern void (*glXGetProcAddress(const unsigned char *procName))();
+
+extern void (*linker_foo(const unsigned char *procName))()
+{
+ return glXGetProcAddress(procName);
+}
diff --git a/src/gallium/winsys/xlib/xlib.h b/src/gallium/winsys/xlib/xlib.h
new file mode 100644
index 0000000000..d602ab0b13
--- /dev/null
+++ b/src/gallium/winsys/xlib/xlib.h
@@ -0,0 +1,14 @@
+
+#ifndef XLIB_H
+#define XLIB_H
+
+#include "pipe/p_compiler.h"
+#include "xm_winsys.h"
+
+extern struct xm_driver xlib_trace_driver;
+extern struct xm_driver xlib_softpipe_driver;
+extern struct xm_driver xlib_cell_driver;
+extern struct xm_driver xlib_brw_driver;
+
+
+#endif
diff --git a/src/gallium/winsys/xlib/xlib_brw.h b/src/gallium/winsys/xlib/xlib_brw.h
new file mode 100644
index 0000000000..be2dd147db
--- /dev/null
+++ b/src/gallium/winsys/xlib/xlib_brw.h
@@ -0,0 +1,30 @@
+#ifndef XLIB_BRW_H
+#define XLIB_BRW_H
+
+struct pipe_winsys;
+struct pipe_buffer;
+struct pipe_surface;
+struct xmesa_buffer;
+
+unsigned xlib_brw_get_buffer_offset( struct pipe_winsys *pws,
+ struct pipe_buffer *buf,
+ unsigned access_flags );
+
+void xlib_brw_buffer_subdata_typed( struct pipe_winsys *pws,
+ struct pipe_buffer *buf,
+ unsigned long offset,
+ unsigned long size,
+ const void *data,
+ unsigned data_type );
+
+
+
+void xlib_brw_commands_aub(struct pipe_winsys *winsys,
+ unsigned *cmds,
+ unsigned nr_dwords);
+
+struct pipe_context *
+xlib_create_brw_context( struct pipe_screen *screen,
+ void *unused );
+
+#endif
diff --git a/src/gallium/winsys/xlib/brw_aub.c b/src/gallium/winsys/xlib/xlib_brw_aub.c
index 9e96efaa53..b6bd849ef2 100644
--- a/src/gallium/winsys/xlib/brw_aub.c
+++ b/src/gallium/winsys/xlib/xlib_brw_aub.c
@@ -31,11 +31,12 @@
#include <stdio.h>
#include <stdlib.h>
-#include "brw_aub.h"
+#include "xlib_brw_aub.h"
#include "pipe/p_context.h"
#include "pipe/p_state.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_memory.h"
+#include "softpipe/sp_texture.h"
struct brw_aubfile {
@@ -322,10 +323,10 @@ void brw_aub_dump_bmp( struct brw_aubfile *aubfile,
struct aub_dump_bmp db;
unsigned format;
- assert(surface->block.width == 1);
- assert(surface->block.height == 1);
+ assert(surface->texture->block.width == 1);
+ assert(surface->texture->block.height == 1);
- if (surface->block.size == 4)
+ if (surface->texture->block.size == 4)
format = 0x7;
else
format = 0x3;
@@ -334,8 +335,9 @@ void brw_aub_dump_bmp( struct brw_aubfile *aubfile,
db.xmin = 0;
db.ymin = 0;
db.format = format;
- db.bpp = surface->block.size * 8;
- db.pitch = surface->stride/surface->block.size;
+ db.bpp = surface->texture->block.size * 8;
+ db.pitch = softpipe_texture(surface->texture)->stride[surface->level] /
+ surface->texture->block.size;
db.xsize = surface->width;
db.ysize = surface->height;
db.addr = gtt_offset;
diff --git a/src/gallium/winsys/xlib/brw_aub.h b/src/gallium/winsys/xlib/xlib_brw_aub.h
index f5c60c7be2..f5c60c7be2 100644
--- a/src/gallium/winsys/xlib/brw_aub.h
+++ b/src/gallium/winsys/xlib/xlib_brw_aub.h
diff --git a/src/gallium/winsys/xlib/xlib_brw_context.c b/src/gallium/winsys/xlib/xlib_brw_context.c
new file mode 100644
index 0000000000..09599507f4
--- /dev/null
+++ b/src/gallium/winsys/xlib/xlib_brw_context.c
@@ -0,0 +1,209 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell
+ * Brian Paul
+ */
+
+
+//#include "glxheader.h"
+//#include "xmesaP.h"
+
+#include "pipe/internal/p_winsys_screen.h"
+#include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "i965simple/brw_winsys.h"
+#include "xlib_brw_aub.h"
+#include "xlib_brw.h"
+
+
+
+
+#define XBCWS_BATCHBUFFER_SIZE 1024
+
+
+/* The backend to the brw driver (ie struct brw_winsys) is actually a
+ * per-context entity.
+ */
+struct xlib_brw_context_winsys {
+ struct brw_winsys brw_context_winsys; /**< batch buffer funcs */
+ struct aub_context *aub;
+
+ struct pipe_winsys *pipe_winsys;
+
+ unsigned batch_data[XBCWS_BATCHBUFFER_SIZE];
+ unsigned batch_nr;
+ unsigned batch_size;
+ unsigned batch_alloc;
+};
+
+
+/* Turn a brw_winsys into an xlib_brw_context_winsys:
+ */
+static inline struct xlib_brw_context_winsys *
+xlib_brw_context_winsys( struct brw_winsys *sws )
+{
+ return (struct xlib_brw_context_winsys *)sws;
+}
+
+
+/* Simple batchbuffer interface:
+ */
+
+static unsigned *xbcws_batch_start( struct brw_winsys *sws,
+ unsigned dwords,
+ unsigned relocs )
+{
+ struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws);
+
+ if (xbcws->batch_size < xbcws->batch_nr + dwords)
+ return NULL;
+
+ xbcws->batch_alloc = xbcws->batch_nr + dwords;
+ return (void *)1; /* not a valid pointer! */
+}
+
+static void xbcws_batch_dword( struct brw_winsys *sws,
+ unsigned dword )
+{
+ struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws);
+
+ assert(xbcws->batch_nr < xbcws->batch_alloc);
+ xbcws->batch_data[xbcws->batch_nr++] = dword;
+}
+
+static void xbcws_batch_reloc( struct brw_winsys *sws,
+ struct pipe_buffer *buf,
+ unsigned access_flags,
+ unsigned delta )
+{
+ struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws);
+
+ assert(xbcws->batch_nr < xbcws->batch_alloc);
+ xbcws->batch_data[xbcws->batch_nr++] =
+ ( xlib_brw_get_buffer_offset( NULL, buf, access_flags ) +
+ delta );
+}
+
+static void xbcws_batch_end( struct brw_winsys *sws )
+{
+ struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws);
+
+ assert(xbcws->batch_nr <= xbcws->batch_alloc);
+ xbcws->batch_alloc = 0;
+}
+
+static void xbcws_batch_flush( struct brw_winsys *sws,
+ struct pipe_fence_handle **fence )
+{
+ struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws);
+ assert(xbcws->batch_nr <= xbcws->batch_size);
+
+ if (xbcws->batch_nr) {
+ xlib_brw_commands_aub( xbcws->pipe_winsys,
+ xbcws->batch_data,
+ xbcws->batch_nr );
+ }
+
+ xbcws->batch_nr = 0;
+}
+
+
+
+/* Really a per-device function, just pass through:
+ */
+static unsigned xbcws_get_buffer_offset( struct brw_winsys *sws,
+ struct pipe_buffer *buf,
+ unsigned access_flags )
+{
+ struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws);
+
+ return xlib_brw_get_buffer_offset( xbcws->pipe_winsys,
+ buf,
+ access_flags );
+}
+
+
+/* Really a per-device function, just pass through:
+ */
+static void xbcws_buffer_subdata_typed( struct brw_winsys *sws,
+ struct pipe_buffer *buf,
+ unsigned long offset,
+ unsigned long size,
+ const void *data,
+ unsigned data_type )
+{
+ struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws);
+
+ xlib_brw_buffer_subdata_typed( xbcws->pipe_winsys,
+ buf,
+ offset,
+ size,
+ data,
+ data_type );
+}
+
+
+/**
+ * Create i965 hardware rendering context, but plugged into a
+ * dump-to-aubfile backend.
+ */
+struct pipe_context *
+xlib_create_brw_context( struct pipe_screen *screen,
+ void *unused )
+{
+ struct xlib_brw_context_winsys *xbcws = CALLOC_STRUCT( xlib_brw_context_winsys );
+
+ /* Fill in this struct with callbacks that i965simple will need to
+ * communicate with the window system, buffer manager, etc.
+ */
+ xbcws->brw_context_winsys.batch_start = xbcws_batch_start;
+ xbcws->brw_context_winsys.batch_dword = xbcws_batch_dword;
+ xbcws->brw_context_winsys.batch_reloc = xbcws_batch_reloc;
+ xbcws->brw_context_winsys.batch_end = xbcws_batch_end;
+ xbcws->brw_context_winsys.batch_flush = xbcws_batch_flush;
+ xbcws->brw_context_winsys.buffer_subdata_typed = xbcws_buffer_subdata_typed;
+ xbcws->brw_context_winsys.get_buffer_offset = xbcws_get_buffer_offset;
+
+ xbcws->pipe_winsys = screen->winsys; /* redundant */
+
+ xbcws->batch_size = XBCWS_BATCHBUFFER_SIZE;
+
+ /* Create the i965simple context:
+ */
+#ifdef GALLIUM_CELL
+ return NULL;
+#else
+ return brw_create( screen,
+ &xbcws->brw_context_winsys,
+ 0 );
+#endif
+}
diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xlib_brw_screen.c
index b7c10b6bca..8e1bfab2f5 100644
--- a/src/gallium/winsys/xlib/xm_winsys_aub.c
+++ b/src/gallium/winsys/xlib/xlib_brw_screen.c
@@ -33,19 +33,28 @@
*/
-#include "glxheader.h"
-#include "xmesaP.h"
+//#include "state_trackers/xlib/glxheader.h"
+//#include "state_trackers/xlib/xmesaP.h"
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "i965simple/brw_winsys.h"
#include "i965simple/brw_screen.h"
-#include "brw_aub.h"
-#include "xm_winsys_aub.h"
+#include "i965simple/brw_context.h"
+#include "xlib_brw_aub.h"
+#include "xlib_brw.h"
+#include "xlib.h"
+
+static struct pipe_buffer *
+buffer_from_surface(struct pipe_surface *surface)
+{
+ struct brw_texture *texture = (struct brw_texture *)surface;
+ return texture->buffer;
+}
struct aub_buffer {
char *data;
@@ -142,29 +151,8 @@ aub_buffer_destroy(struct pipe_winsys *winsys,
}
-void xmesa_buffer_subdata_aub(struct pipe_winsys *winsys,
- struct pipe_buffer *buf,
- unsigned long offset,
- unsigned long size,
- const void *data,
- unsigned aub_type,
- unsigned aub_sub_type)
-{
- struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys);
- struct aub_buffer *sbo = aub_bo(buf);
-
- assert(sbo->size > offset + size);
- memcpy(sbo->data + offset, data, size);
-
- brw_aub_gtt_data( iws->aubfile,
- sbo->offset + offset,
- sbo->data + offset,
- size,
- aub_type,
- aub_sub_type );
-}
-void xmesa_commands_aub(struct pipe_winsys *winsys,
+void xlib_brw_commands_aub(struct pipe_winsys *winsys,
unsigned *cmds,
unsigned nr_dwords)
{
@@ -182,16 +170,10 @@ void xmesa_commands_aub(struct pipe_winsys *winsys,
}
+/* XXX: fix me:
+ */
static struct aub_pipe_winsys *global_winsys = NULL;
-void xmesa_display_aub( /* struct pipe_winsys *winsys, */
- struct pipe_surface *surface )
-{
-// struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys);
- brw_aub_dump_bmp( global_winsys->aubfile,
- surface,
- aub_bo(surface->buffer)->offset );
-}
@@ -245,21 +227,13 @@ aub_user_buffer_create(struct pipe_winsys *winsys, void *ptr, unsigned bytes)
*/
static void
aub_flush_frontbuffer( struct pipe_winsys *winsys,
- struct pipe_surface *surf,
- void *context_private)
-{
- xmesa_display_aub( surf );
-}
-
-static struct pipe_surface *
-aub_i915_surface_alloc(struct pipe_winsys *winsys)
+ struct pipe_surface *surface,
+ void *context_private)
{
- struct pipe_surface *surf = CALLOC_STRUCT(pipe_surface);
- if (surf) {
- surf->refcount = 1;
- surf->winsys = winsys;
- }
- return surf;
+// struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys);
+ brw_aub_dump_bmp( global_winsys->aubfile,
+ surface,
+ aub_bo(buffer_from_surface(surface))->offset );
}
@@ -272,58 +246,48 @@ round_up(unsigned n, unsigned multiple)
return (n + multiple - 1) & ~(multiple - 1);
}
-static int
-aub_i915_surface_alloc_storage(struct pipe_winsys *winsys,
- struct pipe_surface *surf,
+static struct pipe_buffer *
+aub_i915_surface_buffer_create(struct pipe_winsys *winsys,
unsigned width, unsigned height,
enum pipe_format format,
- unsigned flags,
- unsigned tex_usage)
+ unsigned usage,
+ unsigned *stride)
{
const unsigned alignment = 64;
+ struct pipe_format_block block;
+ unsigned nblocksx, nblocksy;
- surf->width = width;
- surf->height = height;
- surf->format = format;
- pf_get_block(format, &surf->block);
- surf->nblocksx = pf_get_nblocksx(&surf->block, width);
- surf->nblocksy = pf_get_nblocksy(&surf->block, height);
- surf->stride = round_up(surf->nblocksx * surf->block.size, alignment);
- surf->usage = flags;
-
- assert(!surf->buffer);
- surf->buffer = winsys->buffer_create(winsys, alignment,
- PIPE_BUFFER_USAGE_PIXEL,
- surf->stride * surf->nblocksy);
- if(!surf->buffer)
- return -1;
-
- return 0;
-}
+ pf_get_block(format, &block);
+ nblocksx = pf_get_nblocksx(&block, width);
+ nblocksy = pf_get_nblocksy(&block, height);
+ *stride = round_up(nblocksx * block.size, alignment);
-static void
-aub_i915_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s)
-{
- struct pipe_surface *surf = *s;
- surf->refcount--;
- if (surf->refcount == 0) {
- if (surf->buffer)
- winsys_buffer_reference(winsys, &surf->buffer, NULL);
- free(surf);
- }
- *s = NULL;
+ return winsys->buffer_create(winsys, alignment,
+ usage,
+ *stride * nblocksy);
}
-
static const char *
aub_get_name( struct pipe_winsys *winsys )
{
return "Aub/xlib";
}
-struct pipe_winsys *
-xmesa_create_pipe_winsys_aub( void )
+static void
+xlib_brw_destroy_pipe_winsys_aub( struct pipe_winsys *winsys )
+
+{
+ struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys);
+ brw_aub_destroy(iws->aubfile);
+ free(iws->pool);
+ free(iws);
+}
+
+
+
+static struct pipe_winsys *
+xlib_create_brw_winsys( void )
{
struct aub_pipe_winsys *iws = CALLOC_STRUCT( aub_pipe_winsys );
@@ -341,10 +305,9 @@ xmesa_create_pipe_winsys_aub( void )
iws->winsys.buffer_destroy = aub_buffer_destroy;
iws->winsys.flush_frontbuffer = aub_flush_frontbuffer;
iws->winsys.get_name = aub_get_name;
+ iws->winsys.destroy = xlib_brw_destroy_pipe_winsys_aub;
- iws->winsys.surface_alloc = aub_i915_surface_alloc;
- iws->winsys.surface_alloc_storage = aub_i915_surface_alloc_storage;
- iws->winsys.surface_release = aub_i915_surface_release;
+ iws->winsys.surface_buffer_create = aub_i915_surface_buffer_create;
iws->aubfile = brw_aubfile_create();
iws->size = AUB_BUF_SIZE;
@@ -359,122 +322,49 @@ xmesa_create_pipe_winsys_aub( void )
}
-void
-xmesa_destroy_pipe_winsys_aub( struct pipe_winsys *winsys )
-
+static struct pipe_screen *
+xlib_create_brw_screen( void )
{
- struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys);
- brw_aub_destroy(iws->aubfile);
- free(iws->pool);
- free(iws);
-}
-
-
-
-
-
+#ifndef GALLIUM_CELL
+ struct pipe_winsys *winsys;
+ struct pipe_screen *screen;
+ winsys = xlib_create_brw_winsys();
+ if (winsys == NULL)
+ return NULL;
-#define IWS_BATCHBUFFER_SIZE 1024
-
-struct aub_brw_winsys {
- struct brw_winsys winsys; /**< batch buffer funcs */
- struct aub_context *aub;
-
- struct pipe_winsys *pipe_winsys;
+ screen = brw_create_screen(winsys, 0/* XXX pci_id */);
+ if (screen == NULL)
+ goto fail;
- unsigned batch_data[IWS_BATCHBUFFER_SIZE];
- unsigned batch_nr;
- unsigned batch_size;
- unsigned batch_alloc;
-};
+ return screen;
+fail:
+ if (winsys)
+ winsys->destroy( winsys );
-/* Turn a i965simple winsys into an aub/i965simple winsys:
- */
-static inline struct aub_brw_winsys *
-aub_brw_winsys( struct brw_winsys *sws )
-{
- return (struct aub_brw_winsys *)sws;
+#endif
+ return NULL;
}
-/* Simple batchbuffer interface:
+/* These per-screen functions are acually made available to the driver
+ * through the brw_winsys (per-context) entity.
*/
-
-static unsigned *aub_i965_batch_start( struct brw_winsys *sws,
- unsigned dwords,
- unsigned relocs )
-{
- struct aub_brw_winsys *iws = aub_brw_winsys(sws);
-
- if (iws->batch_size < iws->batch_nr + dwords)
- return NULL;
-
- iws->batch_alloc = iws->batch_nr + dwords;
- return (void *)1; /* not a valid pointer! */
-}
-
-static void aub_i965_batch_dword( struct brw_winsys *sws,
- unsigned dword )
-{
- struct aub_brw_winsys *iws = aub_brw_winsys(sws);
-
- assert(iws->batch_nr < iws->batch_alloc);
- iws->batch_data[iws->batch_nr++] = dword;
-}
-
-static void aub_i965_batch_reloc( struct brw_winsys *sws,
- struct pipe_buffer *buf,
- unsigned access_flags,
- unsigned delta )
-{
- struct aub_brw_winsys *iws = aub_brw_winsys(sws);
-
- assert(iws->batch_nr < iws->batch_alloc);
- iws->batch_data[iws->batch_nr++] = aub_bo(buf)->offset + delta;
-}
-
-static unsigned aub_i965_get_buffer_offset( struct brw_winsys *sws,
- struct pipe_buffer *buf,
- unsigned access_flags )
+unsigned xlib_brw_get_buffer_offset( struct pipe_winsys *pws,
+ struct pipe_buffer *buf,
+ unsigned access_flags )
{
return aub_bo(buf)->offset;
}
-static void aub_i965_batch_end( struct brw_winsys *sws )
+void xlib_brw_buffer_subdata_typed( struct pipe_winsys *pws,
+ struct pipe_buffer *buf,
+ unsigned long offset,
+ unsigned long size,
+ const void *data,
+ unsigned data_type )
{
- struct aub_brw_winsys *iws = aub_brw_winsys(sws);
-
- assert(iws->batch_nr <= iws->batch_alloc);
- iws->batch_alloc = 0;
-}
-
-static void aub_i965_batch_flush( struct brw_winsys *sws,
- struct pipe_fence_handle **fence )
-{
- struct aub_brw_winsys *iws = aub_brw_winsys(sws);
- assert(iws->batch_nr <= iws->batch_size);
-
- if (iws->batch_nr) {
- xmesa_commands_aub( iws->pipe_winsys,
- iws->batch_data,
- iws->batch_nr );
- }
-
- iws->batch_nr = 0;
-}
-
-
-
-static void aub_i965_buffer_subdata_typed(struct brw_winsys *winsys,
- struct pipe_buffer *buf,
- unsigned long offset,
- unsigned long size,
- const void *data,
- unsigned data_type)
-{
- struct aub_brw_winsys *iws = aub_brw_winsys(winsys);
unsigned aub_type = DW_GENERAL_STATE;
unsigned aub_sub_type;
@@ -545,42 +435,36 @@ static void aub_i965_buffer_subdata_typed(struct brw_winsys *winsys,
break;
}
- xmesa_buffer_subdata_aub( iws->pipe_winsys,
- buf,
- offset,
- size,
- data,
- aub_type,
- aub_sub_type );
-}
-
-/**
- * Create i965 hardware rendering context.
- */
-struct pipe_context *
-xmesa_create_i965simple( struct pipe_winsys *winsys )
-{
- struct aub_brw_winsys *iws = CALLOC_STRUCT( aub_brw_winsys );
- struct pipe_screen *screen = brw_create_screen(winsys, 0/* XXX pci_id */);
-
- /* Fill in this struct with callbacks that i965simple will need to
- * communicate with the window system, buffer manager, etc.
- */
- iws->winsys.batch_start = aub_i965_batch_start;
- iws->winsys.batch_dword = aub_i965_batch_dword;
- iws->winsys.batch_reloc = aub_i965_batch_reloc;
- iws->winsys.batch_end = aub_i965_batch_end;
- iws->winsys.batch_flush = aub_i965_batch_flush;
- iws->winsys.buffer_subdata_typed = aub_i965_buffer_subdata_typed;
- iws->winsys.get_buffer_offset = aub_i965_get_buffer_offset;
+ {
+ struct aub_pipe_winsys *iws = aub_pipe_winsys(pws);
+ struct aub_buffer *sbo = aub_bo(buf);
- iws->pipe_winsys = winsys;
+ assert(sbo->size > offset + size);
+ memcpy(sbo->data + offset, data, size);
- iws->batch_size = IWS_BATCHBUFFER_SIZE;
+ brw_aub_gtt_data( iws->aubfile,
+ sbo->offset + offset,
+ sbo->data + offset,
+ size,
+ aub_type,
+ aub_sub_type );
+ }
+}
+
- /* Create the i965simple context:
- */
- return brw_create( screen,
- &iws->winsys,
- 0 );
+static void
+xlib_brw_display_surface(struct xmesa_buffer *b,
+ struct pipe_surface *surf)
+{
+ brw_aub_dump_bmp( global_winsys->aubfile,
+ surf,
+ aub_bo(buffer_from_surface(surf))->offset );
}
+
+
+struct xm_driver xlib_brw_driver =
+{
+ .create_pipe_screen = xlib_create_brw_screen,
+ .create_pipe_context = xlib_create_brw_context,
+ .display_surface = xlib_brw_display_surface,
+};
diff --git a/src/gallium/winsys/xlib/xlib_cell.c b/src/gallium/winsys/xlib/xlib_cell.c
new file mode 100644
index 0000000000..c87564f4dc
--- /dev/null
+++ b/src/gallium/winsys/xlib/xlib_cell.c
@@ -0,0 +1,437 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell
+ * Brian Paul
+ */
+
+#include "xlib.h"
+
+#ifdef GALLIUM_CELL
+
+#include "xm_api.h"
+
+#undef ASSERT
+#undef Elements
+
+#include "pipe/internal/p_winsys_screen.h"
+#include "pipe/p_format.h"
+#include "pipe/p_context.h"
+#include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "cell/ppu/cell_context.h"
+#include "cell/ppu/cell_screen.h"
+#include "cell/ppu/cell_winsys.h"
+#include "cell/ppu/cell_texture.h"
+
+
+/**
+ * Subclass of pipe_buffer for Xlib winsys.
+ * Low-level OS/window system memory buffer
+ */
+struct xm_buffer
+{
+ struct pipe_buffer base;
+ boolean userBuffer; /** Is this a user-space buffer? */
+ void *data;
+ void *mapped;
+
+ XImage *tempImage;
+ int shm;
+};
+
+
+/**
+ * Subclass of pipe_winsys for Xlib winsys
+ */
+struct xmesa_pipe_winsys
+{
+ struct pipe_winsys base;
+};
+
+
+
+/** Cast wrapper */
+static INLINE struct xm_buffer *
+xm_buffer( struct pipe_buffer *buf )
+{
+ return (struct xm_buffer *)buf;
+}
+
+
+/* Most callbacks map direcly onto dri_bufmgr operations:
+ */
+static void *
+xm_buffer_map(struct pipe_winsys *pws, struct pipe_buffer *buf,
+ unsigned flags)
+{
+ struct xm_buffer *xm_buf = xm_buffer(buf);
+ xm_buf->mapped = xm_buf->data;
+ return xm_buf->mapped;
+}
+
+static void
+xm_buffer_unmap(struct pipe_winsys *pws, struct pipe_buffer *buf)
+{
+ struct xm_buffer *xm_buf = xm_buffer(buf);
+ xm_buf->mapped = NULL;
+}
+
+static void
+xm_buffer_destroy(struct pipe_winsys *pws,
+ struct pipe_buffer *buf)
+{
+ struct xm_buffer *oldBuf = xm_buffer(buf);
+
+ if (oldBuf->data) {
+ {
+ if (!oldBuf->userBuffer) {
+ align_free(oldBuf->data);
+ }
+ }
+
+ oldBuf->data = NULL;
+ }
+
+ free(oldBuf);
+}
+
+
+/**
+ * For Cell. Basically, rearrange the pixels/quads from this layout:
+ * +--+--+--+--+
+ * |p0|p1|p2|p3|....
+ * +--+--+--+--+
+ *
+ * to this layout:
+ * +--+--+
+ * |p0|p1|....
+ * +--+--+
+ * |p2|p3|
+ * +--+--+
+ */
+static void
+twiddle_tile(const uint *tileIn, uint *tileOut)
+{
+ int y, x;
+
+ for (y = 0; y < TILE_SIZE; y+=2) {
+ for (x = 0; x < TILE_SIZE; x+=2) {
+ int k = 4 * (y/2 * TILE_SIZE/2 + x/2);
+ tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k];
+ tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1];
+ tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2];
+ tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3];
+ }
+ }
+}
+
+
+
+/**
+ * Display a surface that's in a tiled configuration. That is, all the
+ * pixels for a TILE_SIZExTILE_SIZE block are contiguous in memory.
+ */
+static void
+xlib_cell_display_surface(struct xmesa_buffer *b, struct pipe_surface *surf)
+{
+ XImage *ximage;
+ struct xm_buffer *xm_buf = xm_buffer(
+ cell_texture(surf->texture)->buffer);
+ const uint tilesPerRow = (surf->width + TILE_SIZE - 1) / TILE_SIZE;
+ uint x, y;
+
+ ximage = b->tempImage;
+
+ /* check that the XImage has been previously initialized */
+ assert(ximage->format);
+ assert(ximage->bitmap_unit);
+
+ /* update XImage's fields */
+ ximage->width = TILE_SIZE;
+ ximage->height = TILE_SIZE;
+ ximage->bytes_per_line = TILE_SIZE * 4;
+
+ for (y = 0; y < surf->height; y += TILE_SIZE) {
+ for (x = 0; x < surf->width; x += TILE_SIZE) {
+ uint tmpTile[TILE_SIZE * TILE_SIZE];
+ int tx = x / TILE_SIZE;
+ int ty = y / TILE_SIZE;
+ int offset = ty * tilesPerRow + tx;
+ int w = TILE_SIZE;
+ int h = TILE_SIZE;
+
+ if (y + h > surf->height)
+ h = surf->height - y;
+ if (x + w > surf->width)
+ w = surf->width - x;
+
+ /* offset in pixels */
+ offset *= TILE_SIZE * TILE_SIZE;
+
+ /* twiddle from ximage buffer to temp tile */
+ twiddle_tile((uint *) xm_buf->data + offset, tmpTile);
+ /* display temp tile data */
+ ximage->data = (char *) tmpTile;
+ XPutImage(b->xm_visual->display, b->drawable, b->gc,
+ ximage, 0, 0, x, y, w, h);
+ }
+ }
+}
+
+
+
+
+
+static void
+xm_flush_frontbuffer(struct pipe_winsys *pws,
+ struct pipe_surface *surf,
+ void *context_private)
+{
+ /*
+ * The front color buffer is actually just another XImage buffer.
+ * This function copies that XImage to the actual X Window.
+ */
+ XMesaContext xmctx = (XMesaContext) context_private;
+ xlib_cell_display_surface(xmctx->xm_buffer, surf);
+}
+
+
+
+static const char *
+xm_get_name(struct pipe_winsys *pws)
+{
+ return "Xlib/Cell";
+}
+
+
+static struct pipe_buffer *
+xm_buffer_create(struct pipe_winsys *pws,
+ unsigned alignment,
+ unsigned usage,
+ unsigned size)
+{
+ struct xm_buffer *buffer = CALLOC_STRUCT(xm_buffer);
+
+ buffer->base.refcount = 1;
+ buffer->base.alignment = alignment;
+ buffer->base.usage = usage;
+ buffer->base.size = size;
+
+
+ if (buffer->data == NULL) {
+ buffer->shm = 0;
+
+ /* align to 16-byte multiple for Cell */
+ buffer->data = align_malloc(size, max(alignment, 16));
+ }
+
+ return &buffer->base;
+}
+
+
+/**
+ * Create buffer which wraps user-space data.
+ */
+static struct pipe_buffer *
+xm_user_buffer_create(struct pipe_winsys *pws, void *ptr, unsigned bytes)
+{
+ struct xm_buffer *buffer = CALLOC_STRUCT(xm_buffer);
+ buffer->base.refcount = 1;
+ buffer->base.size = bytes;
+ buffer->userBuffer = TRUE;
+ buffer->data = ptr;
+ buffer->shm = 0;
+
+ return &buffer->base;
+}
+
+
+
+/**
+ * Round n up to next multiple.
+ */
+static INLINE unsigned
+round_up(unsigned n, unsigned multiple)
+{
+ return (n + multiple - 1) & ~(multiple - 1);
+}
+
+static struct pipe_buffer *
+xm_surface_buffer_create(struct pipe_winsys *winsys,
+ unsigned width, unsigned height,
+ enum pipe_format format,
+ unsigned usage,
+ unsigned *stride)
+{
+ const unsigned alignment = 64;
+ struct pipe_format_block block;
+ unsigned nblocksx, nblocksy;
+
+ pf_get_block(format, &block);
+ nblocksx = pf_get_nblocksx(&block, width);
+ nblocksy = pf_get_nblocksy(&block, height);
+ *stride = round_up(nblocksx * block.size, alignment);
+
+ return winsys->buffer_create(winsys, alignment,
+ usage,
+ /* XXX a bit of a hack */
+ *stride * round_up(nblocksy, TILE_SIZE));
+}
+
+
+/*
+ * Fence functions - basically nothing to do, as we don't create any actual
+ * fence objects.
+ */
+
+static void
+xm_fence_reference(struct pipe_winsys *sws, struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+}
+
+
+static int
+xm_fence_signalled(struct pipe_winsys *sws, struct pipe_fence_handle *fence,
+ unsigned flag)
+{
+ return 0;
+}
+
+
+static int
+xm_fence_finish(struct pipe_winsys *sws, struct pipe_fence_handle *fence,
+ unsigned flag)
+{
+ return 0;
+}
+
+
+
+static struct pipe_winsys *
+xlib_create_cell_winsys( void )
+{
+ static struct xmesa_pipe_winsys *ws = NULL;
+
+ if (!ws) {
+ ws = CALLOC_STRUCT(xmesa_pipe_winsys);
+
+ /* Fill in this struct with callbacks that pipe will need to
+ * communicate with the window system, buffer manager, etc.
+ */
+ ws->base.buffer_create = xm_buffer_create;
+ ws->base.user_buffer_create = xm_user_buffer_create;
+ ws->base.buffer_map = xm_buffer_map;
+ ws->base.buffer_unmap = xm_buffer_unmap;
+ ws->base.buffer_destroy = xm_buffer_destroy;
+
+ ws->base.surface_buffer_create = xm_surface_buffer_create;
+
+ ws->base.fence_reference = xm_fence_reference;
+ ws->base.fence_signalled = xm_fence_signalled;
+ ws->base.fence_finish = xm_fence_finish;
+
+ ws->base.flush_frontbuffer = xm_flush_frontbuffer;
+ ws->base.get_name = xm_get_name;
+ }
+
+ return &ws->base;
+}
+
+
+static struct pipe_screen *
+xlib_create_cell_screen( struct pipe_winsys *pws )
+{
+ struct pipe_winsys *winsys;
+ struct pipe_screen *screen;
+
+ winsys = xlib_create_cell_winsys();
+ if (winsys == NULL)
+ return NULL;
+
+ screen = cell_create_screen(winsys);
+ if (screen == NULL)
+ goto fail;
+
+ return screen;
+
+fail:
+ if (winsys)
+ winsys->destroy( winsys );
+
+ return NULL;
+}
+
+
+static struct pipe_context *
+xlib_create_cell_context( struct pipe_screen *screen,
+ void *priv )
+{
+ struct pipe_context *pipe;
+
+
+ /* This takes a cell_winsys pointer, but probably that should be
+ * created and stored at screen creation, not context creation.
+ *
+ * The actual cell_winsys value isn't used for anything, so just
+ * passing NULL for now.
+ */
+ pipe = cell_create_context( screen, NULL);
+ if (pipe == NULL)
+ goto fail;
+
+ pipe->priv = priv;
+
+ return pipe;
+
+fail:
+ return NULL;
+}
+
+struct xm_driver xlib_cell_driver =
+{
+ .create_pipe_screen = xlib_create_cell_screen,
+ .create_pipe_context = xlib_create_cell_context,
+ .display_surface = xlib_cell_display_surface,
+};
+
+#else
+
+struct xm_driver xlib_cell_driver =
+{
+ .create_pipe_screen = NULL,
+ .create_pipe_context = NULL,
+ .display_surface = NULL,
+};
+
+#endif
diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xlib_softpipe.c
index 3334af175b..71f12b2b47 100644
--- a/src/gallium/winsys/xlib/xm_winsys.c
+++ b/src/gallium/winsys/xlib/xlib_softpipe.c
@@ -33,35 +33,21 @@
*/
-#include "glxheader.h"
-#include "xmesaP.h"
+#include "xm_api.h"
#undef ASSERT
#undef Elements
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_format.h"
#include "pipe/p_context.h"
#include "pipe/p_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "softpipe/sp_winsys.h"
+#include "softpipe/sp_texture.h"
-#ifdef GALLIUM_CELL
-#include "cell/ppu/cell_context.h"
-#include "cell/ppu/cell_screen.h"
-#include "cell/ppu/cell_winsys.h"
-#else
-#define TILE_SIZE 32 /* avoid compilation errors */
-#endif
-
-#ifdef GALLIUM_TRACE
-#include "trace/tr_screen.h"
-#include "trace/tr_context.h"
-#endif
-
-#include "xm_winsys_aub.h"
-
+#include "xlib.h"
/**
* Subclass of pipe_buffer for Xlib winsys.
@@ -73,10 +59,10 @@ struct xm_buffer
boolean userBuffer; /** Is this a user-space buffer? */
void *data;
void *mapped;
-
+
XImage *tempImage;
+#ifdef USE_XSHM
int shm;
-#if defined(USE_XSHM) && !defined(XFree86Server)
XShmSegmentInfo shminfo;
#endif
};
@@ -88,8 +74,10 @@ struct xm_buffer
struct xmesa_pipe_winsys
{
struct pipe_winsys base;
- struct xmesa_visual *xm_visual;
+/* struct xmesa_visual *xm_visual; */
+#ifdef USE_XSHM
int shm;
+#endif
};
@@ -105,9 +93,13 @@ xm_buffer( struct pipe_buffer *buf )
/**
* X Shared Memory Image extension code
*/
-#if defined(USE_XSHM) && !defined(XFree86Server)
-
+#ifdef USE_XSHM
#define XSHM_ENABLED(b) ((b)->shm)
+#else
+#define XSHM_ENABLED(b) 0
+#endif
+
+#ifdef USE_XSHM
static volatile int mesaXErrorFlag = 0;
@@ -115,7 +107,7 @@ static volatile int mesaXErrorFlag = 0;
* Catches potential Xlib errors.
*/
static int
-mesaHandleXError(XMesaDisplay *dpy, XErrorEvent *event)
+mesaHandleXError(Display *dpy, XErrorEvent *event)
{
(void) dpy;
(void) event;
@@ -157,10 +149,7 @@ alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb,
* errors at different points if the extension won't work. Therefore
* we have to be very careful...
*/
-#if 0
- GC gc;
-#endif
- int (*old_handler)(XMesaDisplay *, XErrorEvent *);
+ int (*old_handler)(Display *, XErrorEvent *);
b->tempImage = XShmCreateImage(xmb->xm_visual->display,
xmb->xm_visual->visinfo->visual,
@@ -192,41 +181,13 @@ alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb,
return;
}
-
- /* Finally, try an XShmPutImage to be really sure the extension works */
-#if 0
- gc = XCreateGC(xmb->xm_visual->display, xmb->drawable, 0, NULL);
- XShmPutImage(xmb->xm_visual->display, xmb->drawable, gc,
- b->tempImage, 0, 0, 0, 0, 1, 1 /*one pixel*/, False);
- XSync(xmb->xm_visual->display, False);
- XFreeGC(xmb->xm_visual->display, gc);
- (void) XSetErrorHandler(old_handler);
- if (mesaXErrorFlag) {
- XFlush(xmb->xm_visual->display);
- mesaXErrorFlag = 0;
- XDestroyImage(b->tempImage);
- b->tempImage = NULL;
- b->shm = 0;
- return;
- }
-#endif
+ b->shm = 1;
}
-#else
-
-#define XSHM_ENABLED(b) 0
-
-static void
-alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb,
- unsigned width, unsigned height)
-{
- b->shm = 0;
-}
#endif /* USE_XSHM */
-
/* Most callbacks map direcly onto dri_bufmgr operations:
*/
static void *
@@ -252,7 +213,7 @@ xm_buffer_destroy(struct pipe_winsys *pws,
struct xm_buffer *oldBuf = xm_buffer(buf);
if (oldBuf->data) {
-#if defined(USE_XSHM) && !defined(XFree86Server)
+#ifdef USE_XSHM
if (oldBuf->shminfo.shmid >= 0) {
shmdt(oldBuf->shminfo.shmaddr);
shmctl(oldBuf->shminfo.shmid, IPC_RMID, 0);
@@ -276,154 +237,47 @@ xm_buffer_destroy(struct pipe_winsys *pws,
/**
- * For Cell. Basically, rearrange the pixels/quads from this layout:
- * +--+--+--+--+
- * |p0|p1|p2|p3|....
- * +--+--+--+--+
- *
- * to this layout:
- * +--+--+
- * |p0|p1|....
- * +--+--+
- * |p2|p3|
- * +--+--+
- */
-static void
-twiddle_tile(const uint *tileIn, uint *tileOut)
-{
- int y, x;
-
- for (y = 0; y < TILE_SIZE; y+=2) {
- for (x = 0; x < TILE_SIZE; x+=2) {
- int k = 4 * (y/2 * TILE_SIZE/2 + x/2);
- tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k];
- tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1];
- tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2];
- tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3];
- }
- }
-}
-
-
-
-/**
- * Display a surface that's in a tiled configuration. That is, all the
- * pixels for a TILE_SIZExTILE_SIZE block are contiguous in memory.
- */
-static void
-xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf)
-{
- XImage *ximage;
- struct xm_buffer *xm_buf = xm_buffer(surf->buffer);
- const uint tilesPerRow = (surf->width + TILE_SIZE - 1) / TILE_SIZE;
- uint x, y;
-
- if (XSHM_ENABLED(xm_buf) && (xm_buf->tempImage == NULL)) {
- alloc_shm_ximage(xm_buf, b, TILE_SIZE, TILE_SIZE);
- }
-
- ximage = (XSHM_ENABLED(xm_buf)) ? xm_buf->tempImage : b->tempImage;
-
- /* check that the XImage has been previously initialized */
- assert(ximage->format);
- assert(ximage->bitmap_unit);
-
- if (!XSHM_ENABLED(xm_buf)) {
- /* update XImage's fields */
- ximage->width = TILE_SIZE;
- ximage->height = TILE_SIZE;
- ximage->bytes_per_line = TILE_SIZE * 4;
- }
-
- for (y = 0; y < surf->height; y += TILE_SIZE) {
- for (x = 0; x < surf->width; x += TILE_SIZE) {
- uint tmpTile[TILE_SIZE * TILE_SIZE];
- int tx = x / TILE_SIZE;
- int ty = y / TILE_SIZE;
- int offset = ty * tilesPerRow + tx;
- int w = TILE_SIZE;
- int h = TILE_SIZE;
-
- if (y + h > surf->height)
- h = surf->height - y;
- if (x + w > surf->width)
- w = surf->width - x;
-
- /* offset in pixels */
- offset *= TILE_SIZE * TILE_SIZE;
-
- if (XSHM_ENABLED(xm_buf)) {
- ximage->data = (char *) xm_buf->data + 4 * offset;
- /* make copy of tile data */
- memcpy(tmpTile, (uint *) ximage->data, sizeof(tmpTile));
- /* twiddle from temp to ximage in shared memory */
- twiddle_tile(tmpTile, (uint *) ximage->data);
- /* display image in shared memory */
-#if defined(USE_XSHM) && !defined(XFree86Server)
- XShmPutImage(b->xm_visual->display, b->drawable, b->gc,
- ximage, 0, 0, x, y, w, h, False);
-#endif
- }
- else {
- /* twiddel from ximage buffer to temp tile */
- twiddle_tile((uint *) xm_buf->data + offset, tmpTile);
- /* display temp tile data */
- ximage->data = (char *) tmpTile;
- XPutImage(b->xm_visual->display, b->drawable, b->gc,
- ximage, 0, 0, x, y, w, h);
- }
- }
- }
-}
-
-
-/**
* Display/copy the image in the surface into the X window specified
* by the XMesaBuffer.
*/
-void
-xmesa_display_surface(XMesaBuffer b, const struct pipe_surface *surf)
+static void
+xlib_softpipe_display_surface(struct xmesa_buffer *b,
+ struct pipe_surface *surf)
{
XImage *ximage;
- struct xm_buffer *xm_buf = xm_buffer(surf->buffer);
+ struct softpipe_texture *spt = softpipe_texture(surf->texture);
+ struct xm_buffer *xm_buf = xm_buffer(spt->buffer);
static boolean no_swap = 0;
static boolean firsttime = 1;
- static int tileSize = 0;
if (firsttime) {
no_swap = getenv("SP_NO_RAST") != NULL;
-#ifdef GALLIUM_CELL
- if (!getenv("GALLIUM_NOCELL")) {
- tileSize = 32; /** probably temporary */
- }
-#endif
firsttime = 0;
}
if (no_swap)
return;
- if (tileSize) {
- xmesa_display_surface_tiled(b, surf);
- return;
- }
-
+#ifdef USE_XSHM
if (XSHM_ENABLED(xm_buf) && (xm_buf->tempImage == NULL)) {
- assert(surf->block.width == 1);
- assert(surf->block.height == 1);
- alloc_shm_ximage(xm_buf, b, surf->stride/surf->block.size, surf->height);
+ assert(surf->texture->block.width == 1);
+ assert(surf->texture->block.height == 1);
+ alloc_shm_ximage(xm_buf, b, spt->stride[surf->level] /
+ surf->texture->block.size, surf->height);
}
+#endif
ximage = (XSHM_ENABLED(xm_buf)) ? xm_buf->tempImage : b->tempImage;
ximage->data = xm_buf->data;
/* display image in Window */
+#ifdef USE_XSHM
if (XSHM_ENABLED(xm_buf)) {
-#if defined(USE_XSHM) && !defined(XFree86Server)
XShmPutImage(b->xm_visual->display, b->drawable, b->gc,
ximage, 0, 0, 0, 0, surf->width, surf->height, False);
+ } else
#endif
- } else {
+ {
/* check that the XImage has been previously initialized */
assert(ximage->format);
assert(ximage->bitmap_unit);
@@ -431,7 +285,7 @@ xmesa_display_surface(XMesaBuffer b, const struct pipe_surface *surf)
/* update XImage's fields */
ximage->width = surf->width;
ximage->height = surf->height;
- ximage->bytes_per_line = surf->stride;
+ ximage->bytes_per_line = spt->stride[surf->level];
XPutImage(b->xm_visual->display, b->drawable, b->gc,
ximage, 0, 0, 0, 0, surf->width, surf->height);
@@ -449,7 +303,7 @@ xm_flush_frontbuffer(struct pipe_winsys *pws,
* This function copies that XImage to the actual X Window.
*/
XMesaContext xmctx = (XMesaContext) context_private;
- xmesa_display_surface(xmctx->xm_buffer, surf);
+ xlib_softpipe_display_surface(xmctx->xm_buffer, surf);
}
@@ -468,17 +322,9 @@ xm_buffer_create(struct pipe_winsys *pws,
unsigned size)
{
struct xm_buffer *buffer = CALLOC_STRUCT(xm_buffer);
-#if defined(USE_XSHM) && !defined(XFree86Server)
+#ifdef USE_XSHM
struct xmesa_pipe_winsys *xpws = (struct xmesa_pipe_winsys *) pws;
-#endif
-
- buffer->base.refcount = 1;
- buffer->base.alignment = alignment;
- buffer->base.usage = usage;
- buffer->base.size = size;
-
-#if defined(USE_XSHM) && !defined(XFree86Server)
buffer->shminfo.shmid = -1;
buffer->shminfo.shmaddr = (char *) -1;
@@ -487,13 +333,17 @@ xm_buffer_create(struct pipe_winsys *pws,
if (alloc_shm(buffer, size)) {
buffer->data = buffer->shminfo.shmaddr;
+ buffer->shm = 1;
}
}
#endif
- if (buffer->data == NULL) {
- buffer->shm = 0;
+ buffer->base.refcount = 1;
+ buffer->base.alignment = alignment;
+ buffer->base.usage = usage;
+ buffer->base.size = size;
+ if (buffer->data == NULL) {
/* align to 16-byte multiple for Cell */
buffer->data = align_malloc(size, max(alignment, 16));
}
@@ -513,87 +363,33 @@ xm_user_buffer_create(struct pipe_winsys *pws, void *ptr, unsigned bytes)
buffer->base.size = bytes;
buffer->userBuffer = TRUE;
buffer->data = ptr;
+#ifdef USE_XSHM
buffer->shm = 0;
+#endif
return &buffer->base;
}
-
-/**
- * Round n up to next multiple.
- */
-static INLINE unsigned
-round_up(unsigned n, unsigned multiple)
-{
- return (n + multiple - 1) & ~(multiple - 1);
-}
-
-static int
-xm_surface_alloc_storage(struct pipe_winsys *winsys,
- struct pipe_surface *surf,
+static struct pipe_buffer *
+xm_surface_buffer_create(struct pipe_winsys *winsys,
unsigned width, unsigned height,
- enum pipe_format format,
- unsigned flags,
- unsigned tex_usage)
+ enum pipe_format format,
+ unsigned usage,
+ unsigned *stride)
{
const unsigned alignment = 64;
+ struct pipe_format_block block;
+ unsigned nblocksx, nblocksy;
- surf->width = width;
- surf->height = height;
- surf->format = format;
- pf_get_block(format, &surf->block);
- surf->nblocksx = pf_get_nblocksx(&surf->block, width);
- surf->nblocksy = pf_get_nblocksy(&surf->block, height);
- surf->stride = round_up(surf->nblocksx * surf->block.size, alignment);
- surf->usage = flags;
-
- assert(!surf->buffer);
- surf->buffer = winsys->buffer_create(winsys, alignment,
- PIPE_BUFFER_USAGE_PIXEL,
-#ifdef GALLIUM_CELL /* XXX a bit of a hack */
- surf->stride * round_up(surf->nblocksy, TILE_SIZE));
-#else
- surf->stride * surf->nblocksy);
-#endif
-
- if(!surf->buffer)
- return -1;
-
- return 0;
-}
-
-
-/**
- * Called via winsys->surface_alloc() to create new surfaces.
- */
-static struct pipe_surface *
-xm_surface_alloc(struct pipe_winsys *ws)
-{
- struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface);
-
- assert(ws);
-
- surface->refcount = 1;
- surface->winsys = ws;
-
- return surface;
-}
-
-
+ pf_get_block(format, &block);
+ nblocksx = pf_get_nblocksx(&block, width);
+ nblocksy = pf_get_nblocksy(&block, height);
+ *stride = align(nblocksx * block.size, alignment);
-static void
-xm_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s)
-{
- struct pipe_surface *surf = *s;
- assert(!surf->texture);
- surf->refcount--;
- if (surf->refcount == 0) {
- if (surf->buffer)
- winsys_buffer_reference(winsys, &surf->buffer, NULL);
- free(surf);
- }
- *s = NULL;
+ return winsys->buffer_create(winsys, alignment,
+ usage,
+ *stride * nblocksy);
}
@@ -625,34 +421,15 @@ xm_fence_finish(struct pipe_winsys *sws, struct pipe_fence_handle *fence,
}
-/**
- * Return pointer to a pipe_winsys object.
- * For Xlib, this is a singleton object.
- * Nothing special for the Xlib driver so no subclassing or anything.
- */
-struct pipe_winsys *
-xmesa_get_pipe_winsys_aub(struct xmesa_visual *xm_vis)
-{
- static struct xmesa_pipe_winsys *ws = NULL;
-
- if (!ws) {
- ws = (struct xmesa_pipe_winsys *) xmesa_create_pipe_winsys_aub();
- }
- return &ws->base;
-}
-
static struct pipe_winsys *
-xmesa_get_pipe_winsys(struct xmesa_visual *xm_vis)
+xlib_create_softpipe_winsys( void )
{
static struct xmesa_pipe_winsys *ws = NULL;
if (!ws) {
ws = CALLOC_STRUCT(xmesa_pipe_winsys);
- ws->xm_visual = xm_vis;
- ws->shm = xmesa_check_for_xshm(xm_vis->display);
-
/* Fill in this struct with callbacks that pipe will need to
* communicate with the window system, buffer manager, etc.
*/
@@ -662,9 +439,7 @@ xmesa_get_pipe_winsys(struct xmesa_visual *xm_vis)
ws->base.buffer_unmap = xm_buffer_unmap;
ws->base.buffer_destroy = xm_buffer_destroy;
- ws->base.surface_alloc = xm_surface_alloc;
- ws->base.surface_alloc_storage = xm_surface_alloc_storage;
- ws->base.surface_release = xm_surface_release;
+ ws->base.surface_buffer_create = xm_surface_buffer_create;
ws->base.fence_reference = xm_fence_reference;
ws->base.fence_signalled = xm_fence_signalled;
@@ -678,42 +453,54 @@ xmesa_get_pipe_winsys(struct xmesa_visual *xm_vis)
}
-struct pipe_context *
-xmesa_create_pipe_context(XMesaContext xmesa, uint pixelformat)
+static struct pipe_screen *
+xlib_create_softpipe_screen( void )
{
- struct pipe_winsys *pws;
- struct pipe_context *pipe;
-
- if (getenv("XM_AUB")) {
- pws = xmesa_get_pipe_winsys_aub(xmesa->xm_visual);
- }
- else {
- pws = xmesa_get_pipe_winsys(xmesa->xm_visual);
- }
+ struct pipe_winsys *winsys;
+ struct pipe_screen *screen;
-#ifdef GALLIUM_CELL
- if (!getenv("GALLIUM_NOCELL")) {
- struct cell_winsys *cws = cell_get_winsys(pixelformat);
- struct pipe_screen *screen = cell_create_screen(pws);
+ winsys = xlib_create_softpipe_winsys();
+ if (winsys == NULL)
+ return NULL;
- pipe = cell_create_context(screen, cws);
- }
- else
-#endif
- {
- struct pipe_screen *screen = softpipe_create_screen(pws);
+ screen = softpipe_create_screen(winsys);
+ if (screen == NULL)
+ goto fail;
- pipe = softpipe_create(screen, pws, NULL);
+ return screen;
-#ifdef GALLIUM_TRACE
- screen = trace_screen_create(screen);
-
- pipe = trace_context_create(screen, pipe);
-#endif
- }
+fail:
+ if (winsys)
+ winsys->destroy( winsys );
+
+ return NULL;
+}
- if (pipe)
- pipe->priv = xmesa;
+static struct pipe_context *
+xlib_create_softpipe_context( struct pipe_screen *screen,
+ void *context_private )
+{
+ struct pipe_context *pipe;
+
+ pipe = softpipe_create(screen, screen->winsys, NULL);
+ if (pipe == NULL)
+ goto fail;
+
+ pipe->priv = context_private;
return pipe;
+
+fail:
+ /* Free stuff here */
+ return NULL;
}
+
+struct xm_driver xlib_softpipe_driver =
+{
+ .create_pipe_screen = xlib_create_softpipe_screen,
+ .create_pipe_context = xlib_create_softpipe_context,
+ .display_surface = xlib_softpipe_display_surface
+};
+
+
+
diff --git a/src/gallium/winsys/xlib/xlib_trace.c b/src/gallium/winsys/xlib/xlib_trace.c
new file mode 100644
index 0000000000..37095c5d8e
--- /dev/null
+++ b/src/gallium/winsys/xlib/xlib_trace.c
@@ -0,0 +1,107 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell
+ * Brian Paul
+ */
+
+
+#include "xlib.h"
+
+#include "trace/tr_screen.h"
+#include "trace/tr_context.h"
+
+#include "pipe/p_screen.h"
+
+
+
+static struct pipe_screen *
+xlib_create_trace_screen( void )
+{
+ struct pipe_screen *screen, *trace_screen;
+
+ screen = xlib_softpipe_driver.create_pipe_screen();
+ if (screen == NULL)
+ goto fail;
+
+ /* Wrap it:
+ */
+ trace_screen = trace_screen_create(screen);
+ if (trace_screen == NULL)
+ goto fail;
+
+ return trace_screen;
+
+fail:
+ if (screen)
+ screen->destroy( screen );
+ return NULL;
+}
+
+static struct pipe_context *
+xlib_create_trace_context( struct pipe_screen *screen,
+ void *priv )
+{
+ struct pipe_context *pipe, *trace_pipe;
+
+ pipe = xlib_softpipe_driver.create_pipe_context( screen, priv );
+ if (pipe == NULL)
+ goto fail;
+
+ /* Wrap it:
+ */
+ trace_pipe = trace_context_create(screen, pipe);
+ if (trace_pipe == NULL)
+ goto fail;
+
+ trace_pipe->priv = priv;
+
+ return trace_pipe;
+
+fail:
+ return NULL;
+}
+
+static void
+xlib_trace_display_surface( struct xmesa_buffer *buffer,
+ struct pipe_surface *surf )
+{
+ /* ??
+ */
+ xlib_softpipe_driver.display_surface( buffer, surf );
+}
+
+
+struct xm_driver xlib_trace_driver =
+{
+ .create_pipe_screen = xlib_create_trace_screen,
+ .create_pipe_context = xlib_create_trace_context,
+ .display_surface = xlib_trace_display_surface,
+};
diff --git a/src/gallium/winsys/xlib/xm_image.c b/src/gallium/winsys/xlib/xm_image.c
deleted file mode 100644
index 087b4e4c3a..0000000000
--- a/src/gallium/winsys/xlib/xm_image.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/**************************************************************************
-
-Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sub license, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial portions
-of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
-ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- * Kevin E. Martin <kevin@precisioninsight.com>
- * Brian Paul <brian@precisioninsight.com>
- */
-
-#include <stdlib.h>
-#include <X11/Xmd.h>
-
-#include "glxheader.h"
-#include "xmesaP.h"
-
-#ifdef XFree86Server
-
-#ifdef ROUNDUP
-#undef ROUNDUP
-#endif
-
-#define ROUNDUP(nbytes, pad) ((((nbytes) + ((pad)-1)) / (pad)) * ((pad)>>3))
-
-XMesaImage *XMesaCreateImage(int bitsPerPixel, int width, int height, char *data)
-{
- XMesaImage *image;
-
- image = (XMesaImage *)xalloc(sizeof(XMesaImage));
-
- if (image) {
- image->width = width;
- image->height = height;
- image->data = data;
- /* Always pad to 32 bits */
- image->bytes_per_line = ROUNDUP((bitsPerPixel * width), 32);
- image->bits_per_pixel = bitsPerPixel;
- }
-
- return image;
-}
-
-void XMesaDestroyImage(XMesaImage *image)
-{
- if (image->data)
- free(image->data);
- xfree(image);
-}
-
-unsigned long XMesaGetPixel(XMesaImage *image, int x, int y)
-{
- CARD8 *row = (CARD8 *)(image->data + y*image->bytes_per_line);
- CARD8 *i8;
- CARD16 *i16;
- CARD32 *i32;
- switch (image->bits_per_pixel) {
- case 8:
- i8 = (CARD8 *)row;
- return i8[x];
- break;
- case 15:
- case 16:
- i16 = (CARD16 *)row;
- return i16[x];
- break;
- case 24: /* WARNING: architecture specific code */
- i8 = (CARD8 *)row;
- return (((CARD32)i8[x*3]) |
- (((CARD32)i8[x*3+1])<<8) |
- (((CARD32)i8[x*3+2])<<16));
- break;
- case 32:
- i32 = (CARD32 *)row;
- return i32[x];
- break;
- }
- return 0;
-}
-
-#ifndef XMESA_USE_PUTPIXEL_MACRO
-void XMesaPutPixel(XMesaImage *image, int x, int y, unsigned long pixel)
-{
- CARD8 *row = (CARD8 *)(image->data + y*image->bytes_per_line);
- CARD8 *i8;
- CARD16 *i16;
- CARD32 *i32;
- switch (image->bits_per_pixel) {
- case 8:
- i8 = (CARD8 *)row;
- i8[x] = (CARD8)pixel;
- break;
- case 15:
- case 16:
- i16 = (CARD16 *)row;
- i16[x] = (CARD16)pixel;
- break;
- case 24: /* WARNING: architecture specific code */
- i8 = (CARD8 *)__row;
- i8[x*3] = (CARD8)(p);
- i8[x*3+1] = (CARD8)(p>>8);
- i8[x*3+2] = (CARD8)(p>>16);
- case 32:
- i32 = (CARD32 *)row;
- i32[x] = (CARD32)pixel;
- break;
- }
-}
-#endif
-
-#endif /* XFree86Server */
diff --git a/src/gallium/winsys/xlib/xm_image.h b/src/gallium/winsys/xlib/xm_image.h
deleted file mode 100644
index 2a5e0f3777..0000000000
--- a/src/gallium/winsys/xlib/xm_image.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/**************************************************************************
-
-Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sub license, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial portions
-of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
-ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- * Kevin E. Martin <kevin@precisioninsight.com>
- * Brian Paul <brian@precisioninsight.com>
- */
-
-#ifndef _XM_IMAGE_H_
-#define _XM_IMAGE_H_
-
-#define XMESA_USE_PUTPIXEL_MACRO
-
-extern XMesaImage *XMesaCreateImage(int bitsPerPixel, int width, int height,
- char *data);
-extern void XMesaDestroyImage(XMesaImage *image);
-extern unsigned long XMesaGetPixel(XMesaImage *image, int x, int y);
-#ifdef XMESA_USE_PUTPIXEL_MACRO
-#define XMesaPutPixel(__i,__x,__y,__p) \
-{ \
- CARD8 *__row = (CARD8 *)(__i->data + __y*__i->bytes_per_line); \
- CARD8 *__i8; \
- CARD16 *__i16; \
- CARD32 *__i32; \
- switch (__i->bits_per_pixel) { \
- case 8: \
- __i8 = (CARD8 *)__row; \
- __i8[__x] = (CARD8)__p; \
- break; \
- case 15: \
- case 16: \
- __i16 = (CARD16 *)__row; \
- __i16[__x] = (CARD16)__p; \
- break; \
- case 24: /* WARNING: architecture specific code */ \
- __i8 = (CARD8 *)__row; \
- __i8[__x*3] = (CARD8)(__p); \
- __i8[__x*3+1] = (CARD8)(__p>>8); \
- __i8[__x*3+2] = (CARD8)(__p>>16); \
- break; \
- case 32: \
- __i32 = (CARD32 *)__row; \
- __i32[__x] = (CARD32)__p; \
- break; \
- } \
-}
-#else
-extern void XMesaPutPixel(XMesaImage *image, int x, int y,
- unsigned long pixel);
-#endif
-
-#endif /* _XM_IMAGE_H_ */
diff --git a/src/gallium/winsys/xlib/xmesa.h b/src/gallium/winsys/xlib/xmesa.h
new file mode 100644
index 0000000000..98139af833
--- /dev/null
+++ b/src/gallium/winsys/xlib/xmesa.h
@@ -0,0 +1,424 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.1
+ *
+ * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * Mesa/X11 interface. This header file serves as the documentation for
+ * the Mesa/X11 interface functions.
+ *
+ * Note: this interface isn't intended for user programs. It's primarily
+ * just for implementing the pseudo-GLX interface.
+ */
+
+
+/* Sample Usage:
+
+In addition to the usual X calls to select a visual, create a colormap
+and create a window, you must do the following to use the X/Mesa interface:
+
+1. Call XMesaCreateVisual() to make an XMesaVisual from an XVisualInfo.
+
+2. Call XMesaCreateContext() to create an X/Mesa rendering context, given
+ the XMesaVisual.
+
+3. Call XMesaCreateWindowBuffer() to create an XMesaBuffer from an X window
+ and XMesaVisual.
+
+4. Call XMesaMakeCurrent() to bind the XMesaBuffer to an XMesaContext and
+ to make the context the current one.
+
+5. Make gl* calls to render your graphics.
+
+6. Use XMesaSwapBuffers() when double buffering to swap front/back buffers.
+
+7. Before the X window is destroyed, call XMesaDestroyBuffer().
+
+8. Before exiting, call XMesaDestroyVisual and XMesaDestroyContext.
+
+*/
+
+
+
+
+#ifndef XMESA_H
+#define XMESA_H
+
+#ifdef __VMS
+#include <GL/vms_x_fix.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef XFree86Server
+#include "xmesa_xf86.h"
+#else
+#include <X11/Xlib.h>
+#include <X11/Xutil.h>
+#include "xmesa_x.h"
+#endif
+#include "GL/gl.h"
+
+#ifdef AMIWIN
+#include <pragmas/xlib_pragmas.h>
+extern struct Library *XLibBase;
+#endif
+
+
+#define XMESA_MAJOR_VERSION 6
+#define XMESA_MINOR_VERSION 3
+
+
+
+/*
+ * Values passed to XMesaGetString:
+ */
+#define XMESA_VERSION 1
+#define XMESA_EXTENSIONS 2
+
+
+/*
+ * Values passed to XMesaSetFXmode:
+ */
+#define XMESA_FX_WINDOW 1
+#define XMESA_FX_FULLSCREEN 2
+
+
+
+typedef struct xmesa_context *XMesaContext;
+
+typedef struct xmesa_visual *XMesaVisual;
+
+typedef struct xmesa_buffer *XMesaBuffer;
+
+
+
+/*
+ * Create a new X/Mesa visual.
+ * Input: display - X11 display
+ * visinfo - an XVisualInfo pointer
+ * rgb_flag - GL_TRUE = RGB mode,
+ * GL_FALSE = color index mode
+ * alpha_flag - alpha buffer requested?
+ * db_flag - GL_TRUE = double-buffered,
+ * GL_FALSE = single buffered
+ * stereo_flag - stereo visual?
+ * ximage_flag - GL_TRUE = use an XImage for back buffer,
+ * GL_FALSE = use an off-screen pixmap for back buffer
+ * depth_size - requested bits/depth values, or zero
+ * stencil_size - requested bits/stencil values, or zero
+ * accum_red_size - requested bits/red accum values, or zero
+ * accum_green_size - requested bits/green accum values, or zero
+ * accum_blue_size - requested bits/blue accum values, or zero
+ * accum_alpha_size - requested bits/alpha accum values, or zero
+ * num_samples - number of samples/pixel if multisampling, or zero
+ * level - visual level, usually 0
+ * visualCaveat - ala the GLX extension, usually GLX_NONE_EXT
+ * Return; a new XMesaVisual or 0 if error.
+ */
+extern XMesaVisual XMesaCreateVisual( XMesaDisplay *display,
+ XMesaVisualInfo visinfo,
+ GLboolean rgb_flag,
+ GLboolean alpha_flag,
+ GLboolean db_flag,
+ GLboolean stereo_flag,
+ GLboolean ximage_flag,
+ GLint depth_size,
+ GLint stencil_size,
+ GLint accum_red_size,
+ GLint accum_green_size,
+ GLint accum_blue_size,
+ GLint accum_alpha_size,
+ GLint num_samples,
+ GLint level,
+ GLint visualCaveat );
+
+/*
+ * Destroy an XMesaVisual, but not the associated XVisualInfo.
+ */
+extern void XMesaDestroyVisual( XMesaVisual v );
+
+
+
+/*
+ * Create a new XMesaContext for rendering into an X11 window.
+ *
+ * Input: visual - an XMesaVisual
+ * share_list - another XMesaContext with which to share display
+ * lists or NULL if no sharing is wanted.
+ * Return: an XMesaContext or NULL if error.
+ */
+extern XMesaContext XMesaCreateContext( XMesaVisual v,
+ XMesaContext share_list );
+
+
+/*
+ * Destroy a rendering context as returned by XMesaCreateContext()
+ */
+extern void XMesaDestroyContext( XMesaContext c );
+
+
+#ifdef XFree86Server
+/*
+ * These are the extra routines required for integration with XFree86.
+ * None of these routines should be user visible. -KEM
+ */
+extern GLboolean XMesaForceCurrent( XMesaContext c );
+
+extern GLboolean XMesaLoseCurrent( XMesaContext c );
+
+extern GLboolean XMesaCopyContext( XMesaContext src,
+ XMesaContext dst,
+ GLuint mask );
+#endif /* XFree86Server */
+
+
+/*
+ * Create an XMesaBuffer from an X window.
+ */
+extern XMesaBuffer XMesaCreateWindowBuffer( XMesaVisual v, XMesaWindow w );
+
+
+/*
+ * Create an XMesaBuffer from an X pixmap.
+ */
+extern XMesaBuffer XMesaCreatePixmapBuffer( XMesaVisual v,
+ XMesaPixmap p,
+ XMesaColormap cmap );
+
+
+/*
+ * Destroy an XMesaBuffer, but not the corresponding window or pixmap.
+ */
+extern void XMesaDestroyBuffer( XMesaBuffer b );
+
+
+/*
+ * Return the XMesaBuffer handle which corresponds to an X drawable, if any.
+ *
+ * New in Mesa 2.3.
+ */
+extern XMesaBuffer XMesaFindBuffer( XMesaDisplay *dpy,
+ XMesaDrawable d );
+
+
+
+/*
+ * Bind a buffer to a context and make the context the current one.
+ */
+extern GLboolean XMesaMakeCurrent( XMesaContext c,
+ XMesaBuffer b );
+
+
+/*
+ * Bind two buffers (read and draw) to a context and make the
+ * context the current one.
+ * New in Mesa 3.3
+ */
+extern GLboolean XMesaMakeCurrent2( XMesaContext c,
+ XMesaBuffer drawBuffer,
+ XMesaBuffer readBuffer );
+
+
+/*
+ * Unbind the current context from its buffer.
+ */
+extern GLboolean XMesaUnbindContext( XMesaContext c );
+
+
+/*
+ * Return a handle to the current context.
+ */
+extern XMesaContext XMesaGetCurrentContext( void );
+
+
+/*
+ * Return handle to the current (draw) buffer.
+ */
+extern XMesaBuffer XMesaGetCurrentBuffer( void );
+
+
+/*
+ * Return handle to the current read buffer.
+ * New in Mesa 3.3
+ */
+extern XMesaBuffer XMesaGetCurrentReadBuffer( void );
+
+
+/*
+ * Swap the front and back buffers for the given buffer. No action is
+ * taken if the buffer is not double buffered.
+ */
+extern void XMesaSwapBuffers( XMesaBuffer b );
+
+
+/*
+ * Copy a sub-region of the back buffer to the front buffer.
+ *
+ * New in Mesa 2.6
+ */
+extern void XMesaCopySubBuffer( XMesaBuffer b,
+ int x,
+ int y,
+ int width,
+ int height );
+
+
+/*
+ * Return a pointer to the the Pixmap or XImage being used as the back
+ * color buffer of an XMesaBuffer. This function is a way to get "under
+ * the hood" of X/Mesa so one can manipulate the back buffer directly.
+ * Input: b - the XMesaBuffer
+ * Output: pixmap - pointer to back buffer's Pixmap, or 0
+ * ximage - pointer to back buffer's XImage, or NULL
+ * Return: GL_TRUE = context is double buffered
+ * GL_FALSE = context is single buffered
+ */
+extern GLboolean XMesaGetBackBuffer( XMesaBuffer b,
+ XMesaPixmap *pixmap,
+ XMesaImage **ximage );
+
+
+
+/*
+ * Return the depth buffer associated with an XMesaBuffer.
+ * Input: b - the XMesa buffer handle
+ * Output: width, height - size of buffer in pixels
+ * bytesPerValue - bytes per depth value (2 or 4)
+ * buffer - pointer to depth buffer values
+ * Return: GL_TRUE or GL_FALSE to indicate success or failure.
+ *
+ * New in Mesa 2.4.
+ */
+extern GLboolean XMesaGetDepthBuffer( XMesaBuffer b,
+ GLint *width,
+ GLint *height,
+ GLint *bytesPerValue,
+ void **buffer );
+
+
+
+/*
+ * Flush/sync a context
+ */
+extern void XMesaFlush( XMesaContext c );
+
+
+
+/*
+ * Get an X/Mesa-specific string.
+ * Input: name - either XMESA_VERSION or XMESA_EXTENSIONS
+ */
+extern const char *XMesaGetString( XMesaContext c, int name );
+
+
+
+/*
+ * Scan for XMesaBuffers whose window/pixmap has been destroyed, then free
+ * any memory used by that buffer.
+ *
+ * New in Mesa 2.3.
+ */
+extern void XMesaGarbageCollect( void );
+
+
+
+/*
+ * Return a dithered pixel value.
+ * Input: c - XMesaContext
+ * x, y - window coordinate
+ * red, green, blue, alpha - color components in [0,1]
+ * Return: pixel value
+ *
+ * New in Mesa 2.3.
+ */
+extern unsigned long XMesaDitherColor( XMesaContext xmesa,
+ GLint x,
+ GLint y,
+ GLfloat red,
+ GLfloat green,
+ GLfloat blue,
+ GLfloat alpha );
+
+
+
+/*
+ * 3Dfx Glide driver only!
+ * Set 3Dfx/Glide full-screen or window rendering mode.
+ * Input: mode - either XMESA_FX_WINDOW (window rendering mode) or
+ * XMESA_FX_FULLSCREEN (full-screen rendering mode)
+ * Return: GL_TRUE if success
+ * GL_FALSE if invalid mode or if not using 3Dfx driver
+ *
+ * New in Mesa 2.6.
+ */
+extern GLboolean XMesaSetFXmode( GLint mode );
+
+
+
+/*
+ * Reallocate the back/depth/stencil/accum/etc/ buffers associated with
+ * buffer <b> if its size has changed.
+ *
+ * New in Mesa 4.0.2
+ */
+extern void XMesaResizeBuffers( XMesaBuffer b );
+
+
+
+/*
+ * Create a pbuffer.
+ * New in Mesa 4.1
+ */
+extern XMesaBuffer XMesaCreatePBuffer(XMesaVisual v, XMesaColormap cmap,
+ unsigned int width, unsigned int height);
+
+
+
+/*
+ * Texture from Pixmap
+ * New in Mesa 7.1
+ */
+extern void
+XMesaBindTexImage(XMesaDisplay *dpy, XMesaBuffer drawable, int buffer,
+ const int *attrib_list);
+
+extern void
+XMesaReleaseTexImage(XMesaDisplay *dpy, XMesaBuffer drawable, int buffer);
+
+
+extern XMesaBuffer
+XMesaCreatePixmapTextureBuffer(XMesaVisual v, XMesaPixmap p,
+ XMesaColormap cmap,
+ int format, int target, int mipmap);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
diff --git a/src/gallium/winsys/xlib/xmesaP.h b/src/gallium/winsys/xlib/xmesaP.h
deleted file mode 100644
index fcaeee52bc..0000000000
--- a/src/gallium/winsys/xlib/xmesaP.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Mesa 3-D graphics library
- * Version: 7.1
- *
- * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#ifndef XMESAP_H
-#define XMESAP_H
-
-
-#include "GL/xmesa.h"
-#include "mtypes.h"
-#ifdef XFree86Server
-#include "xm_image.h"
-#endif
-
-#include "state_tracker/st_context.h"
-#include "state_tracker/st_public.h"
-#include "pipe/p_thread.h"
-
-
-extern pipe_mutex _xmesa_lock;
-
-extern XMesaBuffer XMesaBufferList;
-
-/*
- */
-#define XMESA_SOFTPIPE 1
-#define XMESA_AUB 2
-extern int xmesa_mode;
-
-
-/**
- * Visual inforation, derived from GLvisual.
- * Basically corresponds to an XVisualInfo.
- */
-struct xmesa_visual {
- GLvisual mesa_visual; /* Device independent visual parameters */
- XMesaDisplay *display; /* The X11 display */
-#ifdef XFree86Server
- GLint ColormapEntries;
- GLint nplanes;
-#else
- XMesaVisualInfo visinfo; /* X's visual info (pointer to private copy) */
- XVisualInfo *vishandle; /* Only used in fakeglx.c */
-#endif
- GLint BitsPerPixel; /* True bits per pixel for XImages */
-
- GLboolean ximage_flag; /* Use XImage for back buffer (not pixmap)? */
-};
-
-
-/**
- * Context info, derived from st_context.
- * Basically corresponds to a GLXContext.
- */
-struct xmesa_context {
- struct st_context *st;
- XMesaVisual xm_visual; /** pixel format info */
- XMesaBuffer xm_buffer; /** current drawbuffer */
-};
-
-
-/**
- * Types of X/GLX drawables we might render into.
- */
-typedef enum {
- WINDOW, /* An X window */
- GLXWINDOW, /* GLX window */
- PIXMAP, /* GLX pixmap */
- PBUFFER /* GLX Pbuffer */
-} BufferType;
-
-
-/**
- * Framebuffer information, derived from.
- * Basically corresponds to a GLXDrawable.
- */
-struct xmesa_buffer {
- struct st_framebuffer *stfb;
-
- GLboolean wasCurrent; /* was ever the current buffer? */
- XMesaVisual xm_visual; /* the X/Mesa visual */
- XMesaDrawable drawable; /* Usually the X window ID */
- XMesaColormap cmap; /* the X colormap */
- BufferType type; /* window, pixmap, pbuffer or glxwindow */
-
- XMesaImage *tempImage;
- unsigned long selectedEvents;/* for pbuffers only */
-
- GLuint shm; /* X Shared Memory extension status: */
- /* 0 = not available */
- /* 1 = XImage support available */
- /* 2 = Pixmap support available too */
-#if defined(USE_XSHM) && !defined(XFree86Server)
- XShmSegmentInfo shminfo;
-#endif
-
- XMesaGC gc; /* scratch GC for span, line, tri drawing */
-
- /* GLX_EXT_texture_from_pixmap */
- GLint TextureTarget; /** GLX_TEXTURE_1D_EXT, for example */
- GLint TextureFormat; /** GLX_TEXTURE_FORMAT_RGB_EXT, for example */
- GLint TextureMipmap; /** 0 or 1 */
-
- struct xmesa_buffer *Next; /* Linked list pointer: */
-};
-
-
-
-/** cast wrapper */
-static INLINE XMesaContext
-xmesa_context(GLcontext *ctx)
-{
- return (XMesaContext) ctx->DriverCtx;
-}
-
-
-/** cast wrapper */
-static INLINE XMesaBuffer
-xmesa_buffer(GLframebuffer *fb)
-{
- struct st_framebuffer *stfb = (struct st_framebuffer *) fb;
- return (XMesaBuffer) st_framebuffer_private(stfb);
-}
-
-
-extern void
-xmesa_delete_framebuffer(struct gl_framebuffer *fb);
-
-extern XMesaBuffer
-xmesa_find_buffer(XMesaDisplay *dpy, XMesaColormap cmap, XMesaBuffer notThis);
-
-extern void
-xmesa_check_and_update_buffer_size(XMesaContext xmctx, XMesaBuffer drawBuffer);
-
-extern void
-xmesa_destroy_buffers_on_display(XMesaDisplay *dpy);
-
-extern struct pipe_context *
-xmesa_create_pipe_context(XMesaContext xm, uint pixelformat);
-
-static INLINE GLuint
-xmesa_buffer_width(XMesaBuffer b)
-{
- return b->stfb->Base.Width;
-}
-
-static INLINE GLuint
-xmesa_buffer_height(XMesaBuffer b)
-{
- return b->stfb->Base.Height;
-}
-
-extern void
-xmesa_display_surface(XMesaBuffer b, const struct pipe_surface *surf);
-
-extern int
-xmesa_check_for_xshm(XMesaDisplay *display);
-
-#endif
diff --git a/src/gallium/winsys/xlib/xmesa_x.h b/src/gallium/winsys/xlib/xmesa_x.h
new file mode 100644
index 0000000000..865bab4313
--- /dev/null
+++ b/src/gallium/winsys/xlib/xmesa_x.h
@@ -0,0 +1,86 @@
+
+/**************************************************************************
+
+Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Kevin E. Martin <kevin@precisioninsight.com>
+ *
+ * When we're building the XMesa driver for stand-alone Mesa we
+ * include this file when building the xm_*.c files.
+ * We need to define some types and macros differently when building
+ * in the Xserver vs. stand-alone Mesa.
+ */
+
+#ifndef _XMESA_X_H_
+#define _XMESA_X_H_
+
+typedef Display XMesaDisplay;
+typedef Pixmap XMesaPixmap;
+typedef Colormap XMesaColormap;
+typedef Drawable XMesaDrawable;
+typedef Window XMesaWindow;
+typedef GC XMesaGC;
+typedef XVisualInfo *XMesaVisualInfo;
+typedef XImage XMesaImage;
+typedef XPoint XMesaPoint;
+typedef XColor XMesaColor;
+
+#define XMesaDestroyImage XDestroyImage
+
+#define XMesaPutPixel XPutPixel
+#define XMesaGetPixel XGetPixel
+
+#define XMesaSetForeground XSetForeground
+#define XMesaSetBackground XSetBackground
+#define XMesaSetPlaneMask XSetPlaneMask
+#define XMesaSetFunction XSetFunction
+#define XMesaSetFillStyle XSetFillStyle
+#define XMesaSetTile XSetTile
+
+#define XMesaDrawPoint XDrawPoint
+#define XMesaDrawPoints XDrawPoints
+#define XMesaDrawLine XDrawLine
+#define XMesaFillRectangle XFillRectangle
+#define XMesaGetImage XGetImage
+#define XMesaPutImage XPutImage
+#define XMesaCopyArea XCopyArea
+
+#define XMesaCreatePixmap XCreatePixmap
+#define XMesaFreePixmap XFreePixmap
+#define XMesaFreeGC XFreeGC
+
+#define GET_COLORMAP_SIZE(__v) __v->visinfo->colormap_size
+#define GET_REDMASK(__v) __v->mesa_visual.redMask
+#define GET_GREENMASK(__v) __v->mesa_visual.greenMask
+#define GET_BLUEMASK(__v) __v->mesa_visual.blueMask
+#define GET_VISUAL_DEPTH(__v) __v->visinfo->depth
+#define GET_BLACK_PIXEL(__v) BlackPixel(__v->display, __v->mesa_visual.screen)
+#define CHECK_BYTE_ORDER(__v) host_byte_order()==ImageByteOrder(__v->display)
+#define CHECK_FOR_HPCR(__v) XInternAtom(__v->display, "_HP_RGB_SMOOTH_MAP_LIST", True)
+
+#endif