summaryrefslogtreecommitdiff
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/Makefile20
-rw-r--r--src/gallium/drivers/cell/Makefile12
-rw-r--r--src/gallium/drivers/cell/common.h376
-rw-r--r--src/gallium/drivers/cell/ppu/Makefile86
-rw-r--r--src/gallium/drivers/cell/ppu/cell_batch.c260
-rw-r--r--src/gallium/drivers/cell/ppu/cell_batch.h54
-rw-r--r--src/gallium/drivers/cell/ppu/cell_clear.c123
-rw-r--r--src/gallium/drivers/cell/ppu/cell_clear.h43
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.c183
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.h205
-rw-r--r--src/gallium/drivers/cell/ppu/cell_draw_arrays.c191
-rw-r--r--src/gallium/drivers/cell/ppu/cell_draw_arrays.h36
-rw-r--r--src/gallium/drivers/cell/ppu/cell_fence.c168
-rw-r--r--src/gallium/drivers/cell/ppu/cell_fence.h57
-rw-r--r--src/gallium/drivers/cell/ppu/cell_flush.c112
-rw-r--r--src/gallium/drivers/cell/ppu/cell_flush.h45
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.c2046
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.h42
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.c2181
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.h38
-rw-r--r--src/gallium/drivers/cell/ppu/cell_pipe_state.c358
-rw-r--r--src/gallium/drivers/cell/ppu/cell_pipe_state.h39
-rw-r--r--src/gallium/drivers/cell/ppu/cell_render.c211
-rw-r--r--src/gallium/drivers/cell/ppu/cell_render.h39
-rw-r--r--src/gallium/drivers/cell/ppu/cell_screen.c174
-rw-r--r--src/gallium/drivers/cell/ppu/cell_screen.h41
-rw-r--r--src/gallium/drivers/cell/ppu/cell_spu.c219
-rw-r--r--src/gallium/drivers/cell/ppu/cell_spu.h79
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state.h65
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_derived.c170
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c340
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.h36
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_per_fragment.c1430
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_per_fragment.h39
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_shader.c221
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_vertex.c79
-rw-r--r--src/gallium/drivers/cell/ppu/cell_surface.c38
-rw-r--r--src/gallium/drivers/cell/ppu/cell_surface.h42
-rw-r--r--src/gallium/drivers/cell/ppu/cell_texture.c528
-rw-r--r--src/gallium/drivers/cell/ppu/cell_texture.h72
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vbuf.c309
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vbuf.h38
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vertex_fetch.c346
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vertex_shader.c146
-rw-r--r--src/gallium/drivers/cell/ppu/cell_winsys.h50
-rw-r--r--src/gallium/drivers/cell/spu/.gitignore1
-rw-r--r--src/gallium/drivers/cell/spu/Makefile82
-rw-r--r--src/gallium/drivers/cell/spu/spu_colorpack.h145
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.c815
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.h35
-rw-r--r--src/gallium/drivers/cell/spu/spu_dcache.c145
-rw-r--r--src/gallium/drivers/cell/spu/spu_dcache.h37
-rw-r--r--src/gallium/drivers/cell/spu/spu_exec.c1933
-rw-r--r--src/gallium/drivers/cell/spu/spu_exec.h172
-rw-r--r--src/gallium/drivers/cell/spu/spu_funcs.c173
-rw-r--r--src/gallium/drivers/cell/spu/spu_funcs.h35
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c117
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h254
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.c631
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.h44
-rw-r--r--src/gallium/drivers/cell/spu/spu_render.c295
-rw-r--r--src/gallium/drivers/cell/spu/spu_render.h38
-rw-r--r--src/gallium/drivers/cell/spu/spu_shuffle.h186
-rw-r--r--src/gallium/drivers/cell/spu/spu_texture.c641
-rw-r--r--src/gallium/drivers/cell/spu/spu_texture.h67
-rw-r--r--src/gallium/drivers/cell/spu/spu_tile.c126
-rw-r--r--src/gallium/drivers/cell/spu/spu_tile.h75
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c809
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.h37
-rw-r--r--src/gallium/drivers/cell/spu/spu_util.c167
-rw-r--r--src/gallium/drivers/cell/spu/spu_vertex_fetch.c145
-rw-r--r--src/gallium/drivers/cell/spu/spu_vertex_shader.c244
-rw-r--r--src/gallium/drivers/cell/spu/spu_vertex_shader.h66
-rw-r--r--src/gallium/drivers/failover/Makefile14
-rw-r--r--src/gallium/drivers/failover/SConscript13
-rw-r--r--src/gallium/drivers/failover/fo_context.c159
-rw-r--r--src/gallium/drivers/failover/fo_context.h118
-rw-r--r--src/gallium/drivers/failover/fo_state.c483
-rw-r--r--src/gallium/drivers/failover/fo_state_emit.c117
-rw-r--r--src/gallium/drivers/failover/fo_winsys.h45
-rw-r--r--src/gallium/drivers/i915simple/Makefile31
-rw-r--r--src/gallium/drivers/i915simple/SConscript29
-rw-r--r--src/gallium/drivers/i915simple/i915_batch.h116
-rw-r--r--src/gallium/drivers/i915simple/i915_blit.c157
-rw-r--r--src/gallium/drivers/i915simple/i915_blit.h55
-rw-r--r--src/gallium/drivers/i915simple/i915_clear.c48
-rw-r--r--src/gallium/drivers/i915simple/i915_context.c193
-rw-r--r--src/gallium/drivers/i915simple/i915_context.h345
-rw-r--r--src/gallium/drivers/i915simple/i915_debug.c900
-rw-r--r--src/gallium/drivers/i915simple/i915_debug.h115
-rw-r--r--src/gallium/drivers/i915simple/i915_debug_fp.c363
-rw-r--r--src/gallium/drivers/i915simple/i915_flush.c78
-rw-r--r--src/gallium/drivers/i915simple/i915_fpc.h207
-rw-r--r--src/gallium/drivers/i915simple/i915_fpc_emit.c375
-rw-r--r--src/gallium/drivers/i915simple/i915_fpc_translate.c1190
-rw-r--r--src/gallium/drivers/i915simple/i915_prim_emit.c220
-rw-r--r--src/gallium/drivers/i915simple/i915_prim_vbuf.c545
-rw-r--r--src/gallium/drivers/i915simple/i915_reg.h978
-rw-r--r--src/gallium/drivers/i915simple/i915_screen.c284
-rw-r--r--src/gallium/drivers/i915simple/i915_screen.h69
-rw-r--r--src/gallium/drivers/i915simple/i915_state.c788
-rw-r--r--src/gallium/drivers/i915simple/i915_state.h50
-rw-r--r--src/gallium/drivers/i915simple/i915_state_derived.c183
-rw-r--r--src/gallium/drivers/i915simple/i915_state_dynamic.c310
-rw-r--r--src/gallium/drivers/i915simple/i915_state_emit.c402
-rw-r--r--src/gallium/drivers/i915simple/i915_state_immediate.c225
-rw-r--r--src/gallium/drivers/i915simple/i915_state_inlines.h230
-rw-r--r--src/gallium/drivers/i915simple/i915_state_sampler.c299
-rw-r--r--src/gallium/drivers/i915simple/i915_surface.c123
-rw-r--r--src/gallium/drivers/i915simple/i915_texture.c774
-rw-r--r--src/gallium/drivers/i915simple/i915_texture.h43
-rw-r--r--src/gallium/drivers/i915simple/i915_winsys.h121
-rw-r--r--src/gallium/drivers/i965simple/Makefile54
-rw-r--r--src/gallium/drivers/i965simple/SConscript54
-rw-r--r--src/gallium/drivers/i965simple/brw_batch.h59
-rw-r--r--src/gallium/drivers/i965simple/brw_blit.c218
-rw-r--r--src/gallium/drivers/i965simple/brw_blit.h33
-rw-r--r--src/gallium/drivers/i965simple/brw_cc.c269
-rw-r--r--src/gallium/drivers/i965simple/brw_clip.c206
-rw-r--r--src/gallium/drivers/i965simple/brw_clip.h170
-rw-r--r--src/gallium/drivers/i965simple/brw_clip_line.c245
-rw-r--r--src/gallium/drivers/i965simple/brw_clip_point.c47
-rw-r--r--src/gallium/drivers/i965simple/brw_clip_state.c93
-rw-r--r--src/gallium/drivers/i965simple/brw_clip_tri.c566
-rw-r--r--src/gallium/drivers/i965simple/brw_clip_unfilled.c477
-rw-r--r--src/gallium/drivers/i965simple/brw_clip_util.c351
-rw-r--r--src/gallium/drivers/i965simple/brw_context.c114
-rw-r--r--src/gallium/drivers/i965simple/brw_context.h684
-rw-r--r--src/gallium/drivers/i965simple/brw_curbe.c369
-rw-r--r--src/gallium/drivers/i965simple/brw_defines.h852
-rw-r--r--src/gallium/drivers/i965simple/brw_draw.c239
-rw-r--r--src/gallium/drivers/i965simple/brw_draw.h55
-rw-r--r--src/gallium/drivers/i965simple/brw_draw_upload.c300
-rw-r--r--src/gallium/drivers/i965simple/brw_eu.c130
-rw-r--r--src/gallium/drivers/i965simple/brw_eu.h888
-rw-r--r--src/gallium/drivers/i965simple/brw_eu_debug.c90
-rw-r--r--src/gallium/drivers/i965simple/brw_eu_emit.c1080
-rw-r--r--src/gallium/drivers/i965simple/brw_eu_util.c126
-rw-r--r--src/gallium/drivers/i965simple/brw_flush.c73
-rw-r--r--src/gallium/drivers/i965simple/brw_gs.c196
-rw-r--r--src/gallium/drivers/i965simple/brw_gs.h75
-rw-r--r--src/gallium/drivers/i965simple/brw_gs_emit.c148
-rw-r--r--src/gallium/drivers/i965simple/brw_gs_state.c90
-rw-r--r--src/gallium/drivers/i965simple/brw_misc_state.c488
-rw-r--r--src/gallium/drivers/i965simple/brw_reg.h76
-rw-r--r--src/gallium/drivers/i965simple/brw_screen.c244
-rw-r--r--src/gallium/drivers/i965simple/brw_screen.h68
-rw-r--r--src/gallium/drivers/i965simple/brw_sf.c351
-rw-r--r--src/gallium/drivers/i965simple/brw_sf.h122
-rw-r--r--src/gallium/drivers/i965simple/brw_sf_emit.c382
-rw-r--r--src/gallium/drivers/i965simple/brw_sf_state.c181
-rw-r--r--src/gallium/drivers/i965simple/brw_shader_info.c48
-rw-r--r--src/gallium/drivers/i965simple/brw_state.c469
-rw-r--r--src/gallium/drivers/i965simple/brw_state.h151
-rw-r--r--src/gallium/drivers/i965simple/brw_state_batch.c113
-rw-r--r--src/gallium/drivers/i965simple/brw_state_cache.c443
-rw-r--r--src/gallium/drivers/i965simple/brw_state_pool.c138
-rw-r--r--src/gallium/drivers/i965simple/brw_state_upload.c202
-rw-r--r--src/gallium/drivers/i965simple/brw_structs.h1348
-rw-r--r--src/gallium/drivers/i965simple/brw_surface.c124
-rw-r--r--src/gallium/drivers/i965simple/brw_tex_layout.c401
-rw-r--r--src/gallium/drivers/i965simple/brw_tex_layout.h44
-rw-r--r--src/gallium/drivers/i965simple/brw_urb.c186
-rw-r--r--src/gallium/drivers/i965simple/brw_util.c104
-rw-r--r--src/gallium/drivers/i965simple/brw_util.h43
-rw-r--r--src/gallium/drivers/i965simple/brw_vs.c120
-rw-r--r--src/gallium/drivers/i965simple/brw_vs.h82
-rw-r--r--src/gallium/drivers/i965simple/brw_vs_emit.c1330
-rw-r--r--src/gallium/drivers/i965simple/brw_vs_state.c103
-rw-r--r--src/gallium/drivers/i965simple/brw_winsys.h209
-rw-r--r--src/gallium/drivers/i965simple/brw_wm.c209
-rw-r--r--src/gallium/drivers/i965simple/brw_wm.h142
-rw-r--r--src/gallium/drivers/i965simple/brw_wm_decl.c392
-rw-r--r--src/gallium/drivers/i965simple/brw_wm_glsl.c1076
-rw-r--r--src/gallium/drivers/i965simple/brw_wm_iz.c214
-rw-r--r--src/gallium/drivers/i965simple/brw_wm_sampler_state.c275
-rw-r--r--src/gallium/drivers/i965simple/brw_wm_state.c195
-rw-r--r--src/gallium/drivers/i965simple/brw_wm_surface_state.c304
-rw-r--r--src/gallium/drivers/nouveau/nouveau_bo.h53
-rw-r--r--src/gallium/drivers/nouveau/nouveau_channel.h40
-rw-r--r--src/gallium/drivers/nouveau/nouveau_class.h8006
-rw-r--r--src/gallium/drivers/nouveau/nouveau_device.h30
-rw-r--r--src/gallium/drivers/nouveau/nouveau_gldefs.h196
-rw-r--r--src/gallium/drivers/nouveau/nouveau_grobj.h35
-rw-r--r--src/gallium/drivers/nouveau/nouveau_notifier.h43
-rw-r--r--src/gallium/drivers/nouveau/nouveau_push.h82
-rw-r--r--src/gallium/drivers/nouveau/nouveau_pushbuf.h32
-rw-r--r--src/gallium/drivers/nouveau/nouveau_resource.h37
-rw-r--r--src/gallium/drivers/nouveau/nouveau_stateobj.h158
-rw-r--r--src/gallium/drivers/nouveau/nouveau_util.h91
-rw-r--r--src/gallium/drivers/nouveau/nouveau_winsys.h98
-rw-r--r--src/gallium/drivers/nv04/Makefile28
-rw-r--r--src/gallium/drivers/nv04/nv04_clear.c12
-rw-r--r--src/gallium/drivers/nv04/nv04_context.c106
-rw-r--r--src/gallium/drivers/nv04/nv04_context.h146
-rw-r--r--src/gallium/drivers/nv04/nv04_fragprog.c21
-rw-r--r--src/gallium/drivers/nv04/nv04_fragtex.c73
-rw-r--r--src/gallium/drivers/nv04/nv04_miptree.c136
-rw-r--r--src/gallium/drivers/nv04/nv04_prim_vbuf.c309
-rw-r--r--src/gallium/drivers/nv04/nv04_screen.c214
-rw-r--r--src/gallium/drivers/nv04/nv04_screen.h25
-rw-r--r--src/gallium/drivers/nv04/nv04_state.c495
-rw-r--r--src/gallium/drivers/nv04/nv04_state.h71
-rw-r--r--src/gallium/drivers/nv04/nv04_state_emit.c156
-rw-r--r--src/gallium/drivers/nv04/nv04_surface.c64
-rw-r--r--src/gallium/drivers/nv04/nv04_vbo.c71
-rw-r--r--src/gallium/drivers/nv10/Makefile28
-rw-r--r--src/gallium/drivers/nv10/nv10_clear.c12
-rw-r--r--src/gallium/drivers/nv10/nv10_context.c296
-rw-r--r--src/gallium/drivers/nv10/nv10_context.h153
-rw-r--r--src/gallium/drivers/nv10/nv10_fragprog.c21
-rw-r--r--src/gallium/drivers/nv10/nv10_fragtex.c124
-rw-r--r--src/gallium/drivers/nv10/nv10_miptree.c149
-rw-r--r--src/gallium/drivers/nv10/nv10_prim_vbuf.c245
-rw-r--r--src/gallium/drivers/nv10/nv10_screen.c210
-rw-r--r--src/gallium/drivers/nv10/nv10_screen.h22
-rw-r--r--src/gallium/drivers/nv10/nv10_state.c588
-rw-r--r--src/gallium/drivers/nv10/nv10_state.h139
-rw-r--r--src/gallium/drivers/nv10/nv10_state_emit.c303
-rw-r--r--src/gallium/drivers/nv10/nv10_surface.c64
-rw-r--r--src/gallium/drivers/nv10/nv10_vbo.c77
-rw-r--r--src/gallium/drivers/nv20/Makefile29
-rw-r--r--src/gallium/drivers/nv20/nv20_clear.c12
-rw-r--r--src/gallium/drivers/nv20/nv20_context.c419
-rw-r--r--src/gallium/drivers/nv20/nv20_context.h153
-rw-r--r--src/gallium/drivers/nv20/nv20_fragprog.c21
-rw-r--r--src/gallium/drivers/nv20/nv20_fragtex.c124
-rw-r--r--src/gallium/drivers/nv20/nv20_miptree.c155
-rw-r--r--src/gallium/drivers/nv20/nv20_prim_vbuf.c402
-rw-r--r--src/gallium/drivers/nv20/nv20_screen.c206
-rw-r--r--src/gallium/drivers/nv20/nv20_screen.h22
-rw-r--r--src/gallium/drivers/nv20/nv20_state.c581
-rw-r--r--src/gallium/drivers/nv20/nv20_state.h139
-rw-r--r--src/gallium/drivers/nv20/nv20_state_emit.c359
-rw-r--r--src/gallium/drivers/nv20/nv20_surface.c64
-rw-r--r--src/gallium/drivers/nv20/nv20_vbo.c77
-rw-r--r--src/gallium/drivers/nv20/nv20_vertprog.c838
-rw-r--r--src/gallium/drivers/nv30/Makefile37
-rw-r--r--src/gallium/drivers/nv30/nv30_clear.c13
-rw-r--r--src/gallium/drivers/nv30/nv30_context.c72
-rw-r--r--src/gallium/drivers/nv30/nv30_context.h212
-rw-r--r--src/gallium/drivers/nv30/nv30_draw.c61
-rw-r--r--src/gallium/drivers/nv30/nv30_fragprog.c911
-rw-r--r--src/gallium/drivers/nv30/nv30_fragtex.c163
-rw-r--r--src/gallium/drivers/nv30/nv30_miptree.c195
-rw-r--r--src/gallium/drivers/nv30/nv30_query.c122
-rw-r--r--src/gallium/drivers/nv30/nv30_screen.c386
-rw-r--r--src/gallium/drivers/nv30/nv30_screen.h35
-rw-r--r--src/gallium/drivers/nv30/nv30_shader.h490
-rw-r--r--src/gallium/drivers/nv30/nv30_state.c725
-rw-r--r--src/gallium/drivers/nv30/nv30_state.h88
-rw-r--r--src/gallium/drivers/nv30/nv30_state_blend.c40
-rw-r--r--src/gallium/drivers/nv30/nv30_state_emit.c118
-rw-r--r--src/gallium/drivers/nv30/nv30_state_fb.c140
-rw-r--r--src/gallium/drivers/nv30/nv30_state_rasterizer.c17
-rw-r--r--src/gallium/drivers/nv30/nv30_state_scissor.c35
-rw-r--r--src/gallium/drivers/nv30/nv30_state_stipple.c39
-rw-r--r--src/gallium/drivers/nv30/nv30_state_viewport.c70
-rw-r--r--src/gallium/drivers/nv30/nv30_state_zsa.c17
-rw-r--r--src/gallium/drivers/nv30/nv30_surface.c77
-rw-r--r--src/gallium/drivers/nv30/nv30_vbo.c556
-rw-r--r--src/gallium/drivers/nv30/nv30_vertprog.c838
-rw-r--r--src/gallium/drivers/nv40/Makefile37
-rw-r--r--src/gallium/drivers/nv40/nv40_clear.c13
-rw-r--r--src/gallium/drivers/nv40/nv40_context.c72
-rw-r--r--src/gallium/drivers/nv40/nv40_context.h233
-rw-r--r--src/gallium/drivers/nv40/nv40_draw.c349
-rw-r--r--src/gallium/drivers/nv40/nv40_fragprog.c991
-rw-r--r--src/gallium/drivers/nv40/nv40_fragtex.c168
-rw-r--r--src/gallium/drivers/nv40/nv40_miptree.c197
-rw-r--r--src/gallium/drivers/nv40/nv40_query.c122
-rw-r--r--src/gallium/drivers/nv40/nv40_screen.c369
-rw-r--r--src/gallium/drivers/nv40/nv40_screen.h35
-rw-r--r--src/gallium/drivers/nv40/nv40_shader.h556
-rw-r--r--src/gallium/drivers/nv40/nv40_state.c740
-rw-r--r--src/gallium/drivers/nv40/nv40_state.h91
-rw-r--r--src/gallium/drivers/nv40/nv40_state_blend.c40
-rw-r--r--src/gallium/drivers/nv40/nv40_state_emit.c184
-rw-r--r--src/gallium/drivers/nv40/nv40_state_fb.c155
-rw-r--r--src/gallium/drivers/nv40/nv40_state_rasterizer.c17
-rw-r--r--src/gallium/drivers/nv40/nv40_state_scissor.c35
-rw-r--r--src/gallium/drivers/nv40/nv40_state_stipple.c39
-rw-r--r--src/gallium/drivers/nv40/nv40_state_viewport.c67
-rw-r--r--src/gallium/drivers/nv40/nv40_state_zsa.c17
-rw-r--r--src/gallium/drivers/nv40/nv40_surface.c77
-rw-r--r--src/gallium/drivers/nv40/nv40_vbo.c555
-rw-r--r--src/gallium/drivers/nv40/nv40_vertprog.c1070
-rw-r--r--src/gallium/drivers/nv50/Makefile29
-rw-r--r--src/gallium/drivers/nv50/nv50_clear.c90
-rw-r--r--src/gallium/drivers/nv50/nv50_context.c90
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h199
-rw-r--r--src/gallium/drivers/nv50/nv50_draw.c89
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c298
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c1779
-rw-r--r--src/gallium/drivers/nv50/nv50_program.h45
-rw-r--r--src/gallium/drivers/nv50/nv50_query.c129
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c329
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.h33
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c664
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c309
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c92
-rw-r--r--src/gallium/drivers/nv50/nv50_tex.c161
-rw-r--r--src/gallium/drivers/nv50/nv50_texture.h129
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c241
-rw-r--r--src/gallium/drivers/softpipe/Makefile47
-rw-r--r--src/gallium/drivers/softpipe/SConscript46
-rw-r--r--src/gallium/drivers/softpipe/sp_clear.c107
-rw-r--r--src/gallium/drivers/softpipe/sp_clear.h43
-rw-r--r--src/gallium/drivers/softpipe/sp_context.c288
-rw-r--r--src/gallium/drivers/softpipe/sp_context.h174
-rw-r--r--src/gallium/drivers/softpipe/sp_draw_arrays.c202
-rw-r--r--src/gallium/drivers/softpipe/sp_flush.c92
-rw-r--r--src/gallium/drivers/softpipe/sp_flush.h37
-rw-r--r--src/gallium/drivers/softpipe/sp_fs.h54
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_exec.c164
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_llvm.c200
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_sse.c169
-rw-r--r--src/gallium/drivers/softpipe/sp_headers.h95
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_setup.c190
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_setup.h85
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_vbuf.c410
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_vbuf.h38
-rw-r--r--src/gallium/drivers/softpipe/sp_quad.c118
-rw-r--r--src/gallium/drivers/softpipe/sp_quad.h69
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_alpha_test.c108
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_blend.c759
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_bufloop.c74
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_colormask.c116
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_coverage.c93
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_depth_test.c290
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_earlyz.c88
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_fs.c189
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_occlusion.c85
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_output.c103
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_stencil.c352
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_stipple.c94
-rw-r--r--src/gallium/drivers/softpipe/sp_query.c107
-rw-r--r--src/gallium/drivers/softpipe/sp_query.h39
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c179
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.h58
-rw-r--r--src/gallium/drivers/softpipe/sp_setup.c1569
-rw-r--r--src/gallium/drivers/softpipe/sp_setup.h53
-rw-r--r--src/gallium/drivers/softpipe/sp_state.h206
-rw-r--r--src/gallium/drivers/softpipe/sp_state_blend.c98
-rw-r--r--src/gallium/drivers/softpipe/sp_state_clip.c79
-rw-r--r--src/gallium/drivers/softpipe/sp_state_derived.c210
-rw-r--r--src/gallium/drivers/softpipe/sp_state_fs.c161
-rw-r--r--src/gallium/drivers/softpipe/sp_state_rasterizer.c62
-rw-r--r--src/gallium/drivers/softpipe/sp_state_sampler.c118
-rw-r--r--src/gallium/drivers/softpipe/sp_state_surface.c130
-rw-r--r--src/gallium/drivers/softpipe/sp_state_vertex.c73
-rw-r--r--src/gallium/drivers/softpipe/sp_surface.c38
-rw-r--r--src/gallium/drivers/softpipe/sp_surface.h42
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c1229
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.h73
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.c343
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.h70
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.c614
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.h105
-rw-r--r--src/gallium/drivers/softpipe/sp_winsys.h73
-rw-r--r--src/gallium/drivers/trace/Makefile18
-rw-r--r--src/gallium/drivers/trace/README64
-rw-r--r--src/gallium/drivers/trace/SConscript16
-rw-r--r--src/gallium/drivers/trace/tr_context.c1072
-rw-r--r--src/gallium/drivers/trace/tr_context.h68
-rw-r--r--src/gallium/drivers/trace/tr_dump.c404
-rw-r--r--src/gallium/drivers/trace/tr_dump.h132
-rw-r--r--src/gallium/drivers/trace/tr_screen.c469
-rw-r--r--src/gallium/drivers/trace/tr_screen.h60
-rw-r--r--src/gallium/drivers/trace/tr_state.c464
-rw-r--r--src/gallium/drivers/trace/tr_state.h76
-rw-r--r--src/gallium/drivers/trace/tr_texture.c111
-rw-r--r--src/gallium/drivers/trace/tr_texture.h95
-rw-r--r--src/gallium/drivers/trace/tr_winsys.c450
-rw-r--r--src/gallium/drivers/trace/tr_winsys.h76
-rw-r--r--src/gallium/drivers/trace/trace.xsl185
376 files changed, 96145 insertions, 0 deletions
diff --git a/src/gallium/drivers/Makefile b/src/gallium/drivers/Makefile
new file mode 100644
index 0000000000..6161cb6ff8
--- /dev/null
+++ b/src/gallium/drivers/Makefile
@@ -0,0 +1,20 @@
+TOP = ../../..
+include $(TOP)/configs/current
+
+
+SUBDIRS = $(GALLIUM_DRIVER_DIRS)
+
+
+default: subdirs
+
+
+subdirs:
+ @for dir in $(SUBDIRS) ; do \
+ if [ -d $$dir ] ; then \
+ (cd $$dir && $(MAKE)) || exit 1 ; \
+ fi \
+ done
+
+
+clean:
+ rm -f `find . -name \*.[oa]`
diff --git a/src/gallium/drivers/cell/Makefile b/src/gallium/drivers/cell/Makefile
new file mode 100644
index 0000000000..47aef7b05f
--- /dev/null
+++ b/src/gallium/drivers/cell/Makefile
@@ -0,0 +1,12 @@
+# Cell Gallium driver Makefile
+
+
+default:
+ ( cd spu ; make )
+ ( cd ppu ; make )
+
+
+
+clean:
+ ( cd spu ; make clean )
+ ( cd ppu ; make clean )
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h
new file mode 100644
index 0000000000..1f6860da11
--- /dev/null
+++ b/src/gallium/drivers/cell/common.h
@@ -0,0 +1,376 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Types and tokens which are common to the SPU and PPU code.
+ */
+
+
+#ifndef CELL_COMMON_H
+#define CELL_COMMON_H
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_format.h"
+#include "pipe/p_state.h"
+
+
+/** The standard assert macro doesn't seem to work reliably */
+#define ASSERT(x) \
+ if (!(x)) { \
+ ubyte *p = NULL; \
+ fprintf(stderr, "%s:%d: %s(): assertion %s failed.\n", \
+ __FILE__, __LINE__, __FUNCTION__, #x); \
+ *p = 0; \
+ exit(1); \
+ }
+
+
+
+#define JOIN(x, y) JOIN_AGAIN(x, y)
+#define JOIN_AGAIN(x, y) x ## y
+
+#define STATIC_ASSERT(e) \
+{typedef char JOIN(assertion_failed_at_line_, __LINE__) [(e) ? 1 : -1];}
+
+
+
+/** for sanity checking */
+#define ASSERT_ALIGN16(ptr) \
+ ASSERT((((unsigned long) (ptr)) & 0xf) == 0);
+
+
+/** round up value to next multiple of 4 */
+#define ROUNDUP4(k) (((k) + 0x3) & ~0x3)
+
+/** round up value to next multiple of 8 */
+#define ROUNDUP8(k) (((k) + 0x7) & ~0x7)
+
+/** round up value to next multiple of 16 */
+#define ROUNDUP16(k) (((k) + 0xf) & ~0xf)
+
+
+#define CELL_MAX_SPUS 8
+
+#define CELL_MAX_SAMPLERS 4
+#define CELL_MAX_TEXTURE_LEVELS 12 /* 2k x 2k */
+#define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */
+#define CELL_MAX_WIDTH 1024 /**< max framebuffer width */
+#define CELL_MAX_HEIGHT 1024 /**< max framebuffer width */
+
+#define TILE_SIZE 32
+
+
+/**
+ * The low byte of a mailbox word contains the command opcode.
+ * Remaining higher bytes are command specific.
+ */
+#define CELL_CMD_OPCODE_MASK 0xff
+
+#define CELL_CMD_EXIT 1
+#define CELL_CMD_CLEAR_SURFACE 2
+#define CELL_CMD_FINISH 3
+#define CELL_CMD_RENDER 4
+#define CELL_CMD_BATCH 5
+#define CELL_CMD_RELEASE_VERTS 6
+#define CELL_CMD_STATE_FRAMEBUFFER 10
+#define CELL_CMD_STATE_FRAGMENT_OPS 11
+#define CELL_CMD_STATE_SAMPLER 12
+#define CELL_CMD_STATE_TEXTURE 13
+#define CELL_CMD_STATE_VERTEX_INFO 14
+#define CELL_CMD_STATE_VIEWPORT 15
+#define CELL_CMD_STATE_UNIFORMS 16
+#define CELL_CMD_STATE_VS_ARRAY_INFO 17
+#define CELL_CMD_STATE_BIND_VS 18
+#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19
+#define CELL_CMD_STATE_ATTRIB_FETCH 20
+#define CELL_CMD_STATE_FS_CONSTANTS 21
+#define CELL_CMD_STATE_RASTERIZER 22
+#define CELL_CMD_VS_EXECUTE 23
+#define CELL_CMD_FLUSH_BUFFER_RANGE 24
+#define CELL_CMD_FENCE 25
+
+
+/** Command/batch buffers */
+#define CELL_NUM_BUFFERS 4
+#define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */
+
+#define CELL_BUFFER_STATUS_FREE 10
+#define CELL_BUFFER_STATUS_USED 20
+
+/** Debug flags */
+#define CELL_DEBUG_CHECKER (1 << 0)
+#define CELL_DEBUG_ASM (1 << 1)
+#define CELL_DEBUG_SYNC (1 << 2)
+#define CELL_DEBUG_FRAGMENT_OPS (1 << 3)
+#define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4)
+#define CELL_DEBUG_CMD (1 << 5)
+#define CELL_DEBUG_CACHE (1 << 6)
+
+#define CELL_FENCE_IDLE 0
+#define CELL_FENCE_EMITTED 1
+#define CELL_FENCE_SIGNALLED 2
+
+#define CELL_FACING_FRONT 0
+#define CELL_FACING_BACK 1
+
+struct cell_fence
+{
+ /** There's a 16-byte status qword per SPU */
+ volatile uint status[CELL_MAX_SPUS][4];
+};
+
+#ifdef __SPU__
+typedef vector unsigned int opcode_t;
+#else
+typedef unsigned int opcode_t[4];
+#endif
+
+/**
+ * Fence command sent to SPUs. In response, the SPUs will write
+ * CELL_FENCE_STATUS_SIGNALLED back to the fence status word in main memory.
+ */
+struct cell_command_fence
+{
+ opcode_t opcode; /**< CELL_CMD_FENCE */
+ struct cell_fence *fence;
+ uint32_t pad_[3];
+};
+
+
+/**
+ * Command to specify per-fragment operations state and generated code.
+ * Note that this is a variant-length structure, allocated with as
+ * much memory as needed to hold the generated code; the "code"
+ * field *must* be the last field in the structure. Also, the entire
+ * length of the structure (including the variant code field) must be
+ * a multiple of 8 bytes; we require that this structure itself be
+ * a multiple of 8 bytes, and that the generated code also be a multiple
+ * of 8 bytes.
+ *
+ * Also note that the dsa, blend, blend_color fields are really only needed
+ * for the fallback/C per-pixel code. They're not used when we generate
+ * dynamic SPU fragment code (which is the normal case), and will eventually
+ * be removed from this structure.
+ */
+struct cell_command_fragment_ops
+{
+ opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */
+
+ /* Fields for the fallback case */
+ struct pipe_depth_stencil_alpha_state dsa;
+ struct pipe_blend_state blend;
+ struct pipe_blend_color blend_color;
+
+ /* Fields for the generated SPU code */
+ unsigned total_code_size;
+ unsigned front_code_index;
+ unsigned back_code_index;
+ /* this field has variant length, and must be the last field in
+ * the structure
+ */
+ unsigned code[0];
+};
+
+
+/** Max instructions for fragment programs */
+#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 512
+
+/**
+ * Command to send a fragment program to SPUs.
+ */
+struct cell_command_fragment_program
+{
+ opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */
+ uint num_inst; /**< Number of instructions */
+ uint32_t pad[3];
+ unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
+};
+
+
+/**
+ * Tell SPUs about the framebuffer size, location
+ */
+struct cell_command_framebuffer
+{
+ opcode_t opcode; /**< CELL_CMD_STATE_FRAMEBUFFER */
+ int width, height;
+ void *color_start, *depth_start;
+ enum pipe_format color_format, depth_format;
+ uint32_t pad_[2];
+};
+
+
+/**
+ * Tell SPUs about rasterizer state.
+ */
+struct cell_command_rasterizer
+{
+ opcode_t opcode; /**< CELL_CMD_STATE_RASTERIZER */
+ struct pipe_rasterizer_state rasterizer;
+};
+
+
+/**
+ * Clear framebuffer to the given value/color.
+ */
+struct cell_command_clear_surface
+{
+ opcode_t opcode; /**< CELL_CMD_CLEAR_SURFACE */
+ uint surface; /**< Temporary: 0=color, 1=Z */
+ uint value;
+ uint32_t pad[2];
+};
+
+
+/**
+ * Array info used by the vertex shader's vertex puller.
+ */
+struct cell_array_info
+{
+ uint64_t base; /**< Base address of the 0th element. */
+ uint attr; /**< Attribute that this state is for. */
+ uint pitch; /**< Byte pitch from one entry to the next. */
+ uint size;
+ uint function_offset;
+};
+
+
+struct cell_attribute_fetch_code
+{
+ uint64_t base;
+ uint size;
+};
+
+
+struct cell_buffer_range
+{
+ uint64_t base;
+ unsigned size;
+};
+
+
+struct cell_shader_info
+{
+ uint64_t declarations;
+ uint64_t instructions;
+ uint64_t immediates;
+
+ unsigned num_outputs;
+ unsigned num_declarations;
+ unsigned num_instructions;
+ unsigned num_immediates;
+};
+
+
+#define SPU_VERTS_PER_BATCH 64
+struct cell_command_vs
+{
+ opcode_t opcode; /**< CELL_CMD_VS_EXECUTE */
+ uint64_t vOut[SPU_VERTS_PER_BATCH];
+ unsigned num_elts;
+ unsigned elts[SPU_VERTS_PER_BATCH];
+ float plane[12][4];
+ unsigned nr_planes;
+ unsigned nr_attrs;
+};
+
+
+struct cell_command_render
+{
+ opcode_t opcode; /**< CELL_CMD_RENDER */
+ uint prim_type; /**< PIPE_PRIM_x */
+ uint num_verts;
+ uint vertex_size; /**< bytes per vertex */
+ uint num_indexes;
+ uint vertex_buf; /**< which cell->buffer[] contains the vertex data */
+ float xmin, ymin, xmax, ymax; /* XXX another dummy field */
+ uint min_index;
+ boolean inline_verts;
+ uint32_t pad_[1];
+};
+
+
+struct cell_command_release_verts
+{
+ opcode_t opcode; /**< CELL_CMD_RELEASE_VERTS */
+ uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */
+ uint32_t pad_[3];
+};
+
+
+struct cell_command_sampler
+{
+ opcode_t opcode; /**< CELL_CMD_STATE_SAMPLER */
+ uint unit;
+ struct pipe_sampler_state state;
+ uint32_t pad_[1];
+};
+
+
+struct cell_command_texture
+{
+ opcode_t opcode; /**< CELL_CMD_STATE_TEXTURE */
+ uint target; /**< PIPE_TEXTURE_x */
+ uint unit;
+ void *start[CELL_MAX_TEXTURE_LEVELS]; /**< Address in main memory */
+ ushort width[CELL_MAX_TEXTURE_LEVELS];
+ ushort height[CELL_MAX_TEXTURE_LEVELS];
+ ushort depth[CELL_MAX_TEXTURE_LEVELS];
+};
+
+
+#define MAX_SPU_FUNCTIONS 12
+/**
+ * Used to tell the PPU about the address of particular functions in the
+ * SPU's address space.
+ */
+struct cell_spu_function_info
+{
+ uint num;
+ char names[MAX_SPU_FUNCTIONS][16];
+ uint addrs[MAX_SPU_FUNCTIONS];
+ char pad[12]; /**< Pad struct to multiple of 16 bytes (256 currently) */
+};
+
+
+/** This is the object passed to spe_create_thread() */
+struct cell_init_info
+{
+ unsigned id;
+ unsigned num_spus;
+ unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */
+ float inv_timebase; /**< 1.0/timebase, for perf measurement */
+
+ /** Buffers for command batches, vertex/index data */
+ ubyte *buffers[CELL_NUM_BUFFERS];
+ uint *buffer_status; /**< points at cell_context->buffer_status */
+
+ struct cell_spu_function_info *spu_functions;
+} ALIGN16_ATTRIB;
+
+
+#endif /* CELL_COMMON_H */
diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile
new file mode 100644
index 0000000000..c92f8e5cba
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/Makefile
@@ -0,0 +1,86 @@
+# Gallium3D Cell driver: PPU code
+
+# This makefile builds the libcell.a library which gets pulled into
+# the main libGL.so library
+
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+
+# This is the "top-level" cell PPU driver code, will get pulled into libGL.so
+# by the winsys Makefile.
+CELL_LIB = ../libcell.a
+
+
+# This is the SPU code. We'd like to be able to put this into the libcell.a
+# archive with the PPU code, but nesting .a libs doesn't seem to work.
+# So, it's pulled into libGL.so in gallium/winsys/xlib/Makefile
+SPU_CODE_MODULE = ../spu/g3d_spu.a
+
+
+SOURCES = \
+ cell_batch.c \
+ cell_clear.c \
+ cell_context.c \
+ cell_draw_arrays.c \
+ cell_fence.c \
+ cell_flush.c \
+ cell_gen_fragment.c \
+ cell_gen_fp.c \
+ cell_state_derived.c \
+ cell_state_emit.c \
+ cell_state_shader.c \
+ cell_pipe_state.c \
+ cell_screen.c \
+ cell_state_vertex.c \
+ cell_spu.c \
+ cell_surface.c \
+ cell_texture.c \
+ cell_vbuf.c \
+ cell_vertex_fetch.c \
+ cell_vertex_shader.c
+
+
+OBJECTS = $(SOURCES:.c=.o) \
+
+INCLUDE_DIRS = \
+ -I$(TOP)/src/mesa \
+ -I$(TOP)/src/gallium/include \
+ -I$(TOP)/src/gallium/auxiliary \
+ -I$(TOP)/src/gallium/drivers
+
+.c.o:
+ $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
+
+
+.c.s:
+ $(CC) -S $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
+
+
+default: $(CELL_LIB)
+
+
+$(CELL_LIB): $(OBJECTS) $(SPU_CODE_MODULE)
+# ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) # doesn't work
+ ar -ru $(CELL_LIB) $(OBJECTS)
+
+#$(PROG): $(PPU_OBJECTS)
+# $(CC) -o $(PROG) $(PPU_OBJECTS) $(SPU_CODE_MODULE) $(PPU_LFLAGS)
+
+
+
+clean:
+ rm -f *.o *~ $(CELL_LIB)
+
+
+
+depend: $(SOURCES)
+ rm -f depend
+ touch depend
+ $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null
+
+include depend
+
+
+
diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c
new file mode 100644
index 0000000000..fe144f8b84
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_batch.c
@@ -0,0 +1,260 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "cell_context.h"
+#include "cell_batch.h"
+#include "cell_fence.h"
+#include "cell_spu.h"
+
+
+
+/**
+ * Search the buffer pool for an empty/free buffer and return its index.
+ * Buffers are used for storing vertex data, state and commands which
+ * will be sent to the SPUs.
+ * If no empty buffers are available, wait for one.
+ * \return buffer index in [0, CELL_NUM_BUFFERS-1]
+ */
+uint
+cell_get_empty_buffer(struct cell_context *cell)
+{
+ static uint prev_buffer = 0;
+ uint buf = (prev_buffer + 1) % CELL_NUM_BUFFERS;
+ uint tries = 0;
+
+ /* Find a buffer that's marked as free by all SPUs */
+ while (1) {
+ uint spu, num_free = 0;
+
+ for (spu = 0; spu < cell->num_spus; spu++) {
+ if (cell->buffer_status[spu][buf][0] == CELL_BUFFER_STATUS_FREE) {
+ num_free++;
+
+ if (num_free == cell->num_spus) {
+ /* found a free buffer, now mark status as used */
+ for (spu = 0; spu < cell->num_spus; spu++) {
+ cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
+ }
+ /*
+ printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries);
+ */
+ prev_buffer = buf;
+
+ /* release tex buffer associated w/ prev use of this batch buf */
+ cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]);
+
+ return buf;
+ }
+ }
+ else {
+ break;
+ }
+ }
+
+ /* try next buf */
+ buf = (buf + 1) % CELL_NUM_BUFFERS;
+
+ tries++;
+ if (tries == 100) {
+ /*
+ printf("PPU WAITING for buffer...\n");
+ */
+ }
+ }
+}
+
+
+/**
+ * Append a fence command to the current batch buffer.
+ * Note that we're sure there's always room for this because of the
+ * adjusted size check in cell_batch_free_space().
+ */
+static void
+emit_fence(struct cell_context *cell)
+{
+ const uint batch = cell->cur_batch;
+ const uint size = cell->buffer_size[batch];
+ struct cell_command_fence *fence_cmd;
+ struct cell_fence *fence = &cell->fenced_buffers[batch].fence;
+ uint i;
+
+ /* set fence status to emitted, not yet signalled */
+ for (i = 0; i < cell->num_spus; i++) {
+ fence->status[i][0] = CELL_FENCE_EMITTED;
+ }
+
+ STATIC_ASSERT(sizeof(struct cell_command_fence) % 16 == 0);
+ ASSERT(size % 16 == 0);
+ ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE);
+
+ fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size);
+ fence_cmd->opcode[0] = CELL_CMD_FENCE;
+ fence_cmd->fence = fence;
+
+ /* update batch buffer size */
+ cell->buffer_size[batch] = size + sizeof(struct cell_command_fence);
+}
+
+
+/**
+ * Flush the current batch buffer to the SPUs.
+ * An empty buffer will be found and set as the new current batch buffer
+ * for subsequent commands/data.
+ */
+void
+cell_batch_flush(struct cell_context *cell)
+{
+ static boolean flushing = FALSE;
+ uint batch = cell->cur_batch;
+ uint size = cell->buffer_size[batch];
+ uint spu, cmd_word;
+
+ assert(!flushing);
+
+ if (size == 0)
+ return;
+
+ /* Before we use this batch buffer, make sure any fenced texture buffers
+ * are released.
+ */
+ if (cell->fenced_buffers[batch].head) {
+ emit_fence(cell);
+ size = cell->buffer_size[batch];
+ }
+
+ flushing = TRUE;
+
+ assert(batch < CELL_NUM_BUFFERS);
+
+ /*
+ printf("cell_batch_dispatch: buf %u at %p, size %u\n",
+ batch, &cell->buffer[batch][0], size);
+ */
+
+ /*
+ * Build "BATCH" command and send to all SPUs.
+ */
+ cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16);
+
+ for (spu = 0; spu < cell->num_spus; spu++) {
+ assert(cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_USED);
+ send_mbox_message(cell_global.spe_contexts[spu], cmd_word);
+ }
+
+ /* When the SPUs are done copying the buffer into their locals stores
+ * they'll write a BUFFER_STATUS_FREE message into the buffer_status[]
+ * array indicating that the PPU can re-use the buffer.
+ */
+
+ batch = cell_get_empty_buffer(cell);
+
+ cell->buffer_size[batch] = 0; /* empty */
+ cell->cur_batch = batch;
+
+ flushing = FALSE;
+}
+
+
+/**
+ * Return the number of bytes free in the current batch buffer.
+ */
+uint
+cell_batch_free_space(const struct cell_context *cell)
+{
+ uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch];
+ free -= sizeof(struct cell_command_fence);
+ return free;
+}
+
+
+/**
+ * Allocate space in the current batch buffer for 'bytes' space.
+ * Bytes must be a multiple of 16 bytes. Allocation will be 16 byte aligned.
+ * \return address in batch buffer to put data
+ */
+void *
+cell_batch_alloc16(struct cell_context *cell, uint bytes)
+{
+ void *pos;
+ uint size;
+
+ ASSERT(bytes % 16 == 0);
+ ASSERT(bytes <= CELL_BUFFER_SIZE);
+ ASSERT(cell->cur_batch >= 0);
+
+#ifdef ASSERT
+ {
+ uint spu;
+ for (spu = 0; spu < cell->num_spus; spu++) {
+ ASSERT(cell->buffer_status[spu][cell->cur_batch][0]
+ == CELL_BUFFER_STATUS_USED);
+ }
+ }
+#endif
+
+ size = cell->buffer_size[cell->cur_batch];
+
+ if (bytes > cell_batch_free_space(cell)) {
+ cell_batch_flush(cell);
+ size = 0;
+ }
+
+ ASSERT(size % 16 == 0);
+ ASSERT(size + bytes <= CELL_BUFFER_SIZE);
+
+ pos = (void *) (cell->buffer[cell->cur_batch] + size);
+
+ cell->buffer_size[cell->cur_batch] = size + bytes;
+
+ return pos;
+}
+
+
+/**
+ * One-time init of batch buffers.
+ */
+void
+cell_init_batch_buffers(struct cell_context *cell)
+{
+ uint spu, buf;
+
+ /* init command, vertex/index buffer info */
+ for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) {
+ cell->buffer_size[buf] = 0;
+
+ /* init batch buffer status values,
+ * mark 0th buffer as used, rest as free.
+ */
+ for (spu = 0; spu < cell->num_spus; spu++) {
+ if (buf == 0)
+ cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
+ else
+ cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE;
+ }
+ }
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h
new file mode 100644
index 0000000000..290136031a
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_batch.h
@@ -0,0 +1,54 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef CELL_BATCH_H
+#define CELL_BATCH_H
+
+#include "pipe/p_compiler.h"
+
+
+struct cell_context;
+
+
+extern uint
+cell_get_empty_buffer(struct cell_context *cell);
+
+extern void
+cell_batch_flush(struct cell_context *cell);
+
+extern uint
+cell_batch_free_space(const struct cell_context *cell);
+
+extern void *
+cell_batch_alloc16(struct cell_context *cell, uint bytes);
+
+extern void
+cell_init_batch_buffers(struct cell_context *cell);
+
+
+#endif /* CELL_BATCH_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c
new file mode 100644
index 0000000000..c2e276988c
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_clear.c
@@ -0,0 +1,123 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Authors
+ * Brian Paul
+ */
+
+#include <stdio.h>
+#include <assert.h>
+#include <stdint.h>
+#include "pipe/p_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_pack_color.h"
+#include "cell/common.h"
+#include "cell_clear.h"
+#include "cell_context.h"
+#include "cell_batch.h"
+#include "cell_flush.h"
+#include "cell_spu.h"
+#include "cell_state.h"
+
+
+/**
+ * Convert packed pixel from one format to another.
+ */
+static unsigned
+convert_color(enum pipe_format srcFormat, unsigned srcColor,
+ enum pipe_format dstFormat)
+{
+ ubyte r, g, b, a;
+ unsigned dstColor;
+
+ util_unpack_color_ub(srcFormat, &srcColor, &r, &g, &b, &a);
+ util_pack_color_ub(r, g, b, a, dstFormat, &dstColor);
+
+ return dstColor;
+}
+
+
+
+/**
+ * Called via pipe->clear()
+ */
+void
+cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ struct pipe_screen *screen = pipe->screen;
+ struct cell_context *cell = cell_context(pipe);
+ uint surfIndex;
+
+ if (cell->dirty)
+ cell_update_derived(cell);
+
+
+ if (!cell->cbuf_map[0])
+ cell->cbuf_map[0] = screen->surface_map(screen, ps,
+ PIPE_BUFFER_USAGE_GPU_WRITE);
+
+ if (ps == cell->framebuffer.zsbuf) {
+ /* clear z/stencil buffer */
+ surfIndex = 1;
+ }
+ else {
+ /* clear color buffer */
+ surfIndex = 0;
+
+ if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) {
+ clearValue = convert_color(PIPE_FORMAT_A8R8G8B8_UNORM, clearValue,
+ ps->format);
+ }
+ }
+
+
+ /* Build a CLEAR command and place it in the current batch buffer */
+ {
+ STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0);
+ struct cell_command_clear_surface *clr
+ = (struct cell_command_clear_surface *)
+ cell_batch_alloc16(cell, sizeof(*clr));
+ clr->opcode[0] = CELL_CMD_CLEAR_SURFACE;
+ clr->surface = surfIndex;
+ clr->value = clearValue;
+ }
+
+ /* Technically, the surface's contents are now known and cleared,
+ * so we could set the status to PIPE_SURFACE_STATUS_CLEAR. But
+ * it turns out it's quite painful to recognize when any particular
+ * surface goes from PIPE_SURFACE_STATUS_CLEAR to
+ * PIPE_SURFACE_STATUS_DEFINED (i.e. with known contents), because
+ * the drawing commands could be operating on numerous draw buffers,
+ * which we'd have to iterate through to set all their stati...
+ * For now, we cheat a bit and set the surface's status to DEFINED
+ * right here. Later we should revisit this and set the status to
+ * CLEAR here, and find a better place to set the status to DEFINED.
+ */
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_clear.h b/src/gallium/drivers/cell/ppu/cell_clear.h
new file mode 100644
index 0000000000..ff47d43f4c
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_clear.h
@@ -0,0 +1,43 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef CELL_CLEAR_H
+#define CELL_CLEAR_H
+
+
+struct pipe_context;
+struct pipe_surface;
+
+
+extern void
+cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+
+
+#endif /* CELL_CLEAR_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c
new file mode 100644
index 0000000000..8f502823f9
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_context.c
@@ -0,0 +1,183 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Authors
+ * Brian Paul
+ */
+
+
+#include <stdio.h>
+
+#include "pipe/p_defines.h"
+#include "pipe/p_format.h"
+#include "util/u_memory.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_screen.h"
+
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+
+#include "cell/common.h"
+#include "cell_batch.h"
+#include "cell_clear.h"
+#include "cell_context.h"
+#include "cell_draw_arrays.h"
+#include "cell_fence.h"
+#include "cell_flush.h"
+#include "cell_state.h"
+#include "cell_surface.h"
+#include "cell_spu.h"
+#include "cell_pipe_state.h"
+#include "cell_texture.h"
+#include "cell_vbuf.h"
+
+
+
+static void
+cell_destroy_context( struct pipe_context *pipe )
+{
+ struct cell_context *cell = cell_context(pipe);
+
+ util_delete_keymap(cell->fragment_ops_cache, NULL);
+
+ cell_spu_exit(cell);
+
+ align_free(cell);
+}
+
+
+static struct draw_context *
+cell_draw_create(struct cell_context *cell)
+{
+ struct draw_context *draw = draw_create();
+
+#if 0 /* broken */
+ if (getenv("GALLIUM_CELL_VS")) {
+ /* plug in SPU-based vertex transformation code */
+ draw->shader_queue_flush = cell_vertex_shader_queue_flush;
+ draw->driver_private = cell;
+ }
+#endif
+
+ return draw;
+}
+
+
+static const struct debug_named_value cell_debug_flags[] = {
+ {"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */
+ {"asm", CELL_DEBUG_ASM}, /**< dump SPU asm code */
+ {"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */
+ {"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/
+ {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/
+ {"cmd", CELL_DEBUG_CMD}, /**< SPUs dump command buffer info */
+ {"cache", CELL_DEBUG_CACHE}, /**< report texture cache stats on exit */
+ {NULL, 0}
+};
+
+
+struct pipe_context *
+cell_create_context(struct pipe_screen *screen,
+ struct cell_winsys *cws)
+{
+ struct cell_context *cell;
+ uint i;
+
+ /* some fields need to be 16-byte aligned, so align the whole object */
+ cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16);
+ if (!cell)
+ return NULL;
+
+ memset(cell, 0, sizeof(*cell));
+
+ cell->winsys = cws;
+ cell->pipe.winsys = screen->winsys;
+ cell->pipe.screen = screen;
+ cell->pipe.destroy = cell_destroy_context;
+
+ cell->pipe.clear = cell_clear_surface;
+ cell->pipe.flush = cell_flush;
+
+#if 0
+ cell->pipe.begin_query = cell_begin_query;
+ cell->pipe.end_query = cell_end_query;
+ cell->pipe.wait_query = cell_wait_query;
+#endif
+
+ cell_init_draw_functions(cell);
+ cell_init_state_functions(cell);
+ cell_init_shader_functions(cell);
+ cell_init_surface_functions(cell);
+ cell_init_vertex_functions(cell);
+
+ cell->draw = cell_draw_create(cell);
+
+ /* Create cache of fragment ops generated code */
+ cell->fragment_ops_cache =
+ util_new_keymap(sizeof(struct cell_fragment_ops_key), ~0, NULL);
+
+ cell_init_vbuf(cell);
+
+ draw_set_rasterize_stage(cell->draw, cell->vbuf);
+
+ /* convert all points/lines to tris for the time being */
+ draw_wide_point_threshold(cell->draw, 0.0);
+ draw_wide_line_threshold(cell->draw, 0.0);
+
+ /* get env vars or read config file to get debug flags */
+ cell->debug_flags = debug_get_flags_option("CELL_DEBUG",
+ cell_debug_flags,
+ 0 );
+
+ for (i = 0; i < CELL_NUM_BUFFERS; i++)
+ cell_fence_init(&cell->fenced_buffers[i].fence);
+
+
+ /*
+ * SPU stuff
+ */
+ /* This call only works with SDK 3.0. Anyone still using 2.1??? */
+ cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1);
+ cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
+ if (cell->debug_flags) {
+ printf("Cell: found %d Cell(s) with %u SPUs\n",
+ cell->num_cells, cell->num_spus);
+ }
+ if (getenv("CELL_NUM_SPUS")) {
+ cell->num_spus = atoi(getenv("CELL_NUM_SPUS"));
+ assert(cell->num_spus > 0);
+ }
+
+ cell_start_spus(cell);
+
+ cell_init_batch_buffers(cell);
+
+ /* make sure SPU initializations are done before proceeding */
+ cell_flush_int(cell, CELL_FLUSH_WAIT);
+
+ return &cell->pipe;
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
new file mode 100644
index 0000000000..eb1397bb3f
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_context.h
@@ -0,0 +1,205 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef CELL_CONTEXT_H
+#define CELL_CONTEXT_H
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_vbuf.h"
+#include "cell_winsys.h"
+#include "cell/common.h"
+#include "rtasm/rtasm_ppc_spe.h"
+#include "tgsi/tgsi_scan.h"
+#include "util/u_keymap.h"
+
+
+struct cell_vbuf_render;
+
+
+/**
+ * Cell vertex shader state, subclass of pipe_shader_state.
+ */
+struct cell_vertex_shader_state
+{
+ struct pipe_shader_state shader;
+ struct tgsi_shader_info info;
+ void *draw_data;
+};
+
+
+/**
+ * Cell fragment shader state, subclass of pipe_shader_state.
+ */
+struct cell_fragment_shader_state
+{
+ struct pipe_shader_state shader;
+ struct tgsi_shader_info info;
+ struct spe_function code;
+ void *data;
+};
+
+
+/**
+ * Key for mapping per-fragment state to cached SPU machine code.
+ * keymap(cell_fragment_ops_key) => cell_command_fragment_ops
+ */
+struct cell_fragment_ops_key
+{
+ struct pipe_blend_state blend;
+ struct pipe_blend_color blend_color;
+ struct pipe_depth_stencil_alpha_state dsa;
+ enum pipe_format color_format;
+ enum pipe_format zs_format;
+};
+
+
+struct cell_buffer_node;
+
+/**
+ * Fenced buffer list. List of buffers which can be unreferenced after
+ * the fence has been executed/signalled.
+ */
+struct cell_buffer_list
+{
+ struct cell_fence fence ALIGN16_ATTRIB;
+ struct cell_buffer_node *head;
+};
+
+
+/**
+ * Per-context state, subclass of pipe_context.
+ */
+struct cell_context
+{
+ struct pipe_context pipe;
+
+ struct cell_winsys *winsys;
+
+ const struct pipe_blend_state *blend;
+ const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+ uint num_samplers;
+ const struct pipe_depth_stencil_alpha_state *depth_stencil;
+ const struct pipe_rasterizer_state *rasterizer;
+ const struct cell_vertex_shader_state *vs;
+ const struct cell_fragment_shader_state *fs;
+
+ struct spe_function logic_op;
+
+ struct pipe_blend_color blend_color;
+ struct pipe_clip_state clip;
+ struct pipe_constant_buffer constants[2];
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_poly_stipple poly_stipple;
+ struct pipe_scissor_state scissor;
+ struct cell_texture *texture[PIPE_MAX_SAMPLERS];
+ uint num_textures;
+ struct pipe_viewport_state viewport;
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ uint num_vertex_buffers;
+ struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+ uint num_vertex_elements;
+
+ ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS];
+ ubyte *zsbuf_map;
+
+ struct pipe_surface *tex_surf;
+ uint *tex_map;
+
+ uint dirty;
+ uint dirty_textures; /* bitmask of texture units */
+ uint dirty_samplers; /* bitmask of sampler units */
+
+ /** Cache of code generated for per-fragment ops */
+ struct keymap *fragment_ops_cache;
+
+ /** The primitive drawing context */
+ struct draw_context *draw;
+ struct draw_stage *render_stage;
+
+ /** For post-transformed vertex buffering: */
+ struct cell_vbuf_render *vbuf_render;
+ struct draw_stage *vbuf;
+
+ struct vertex_info vertex_info;
+
+ /** Mapped constant buffers */
+ void *mapped_constants[PIPE_SHADER_TYPES];
+
+ struct cell_spu_function_info spu_functions ALIGN16_ATTRIB;
+
+ uint num_cells, num_spus;
+
+ /** Buffers for command batches, vertex/index data */
+ uint buffer_size[CELL_NUM_BUFFERS];
+ ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB;
+
+ int cur_batch; /**< which buffer is being filled w/ commands */
+
+ /** [4] to ensure 16-byte alignment for each status word */
+ uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB;
+
+
+ /** Associated with each command/batch buffer is a list of pipe_buffers
+ * that are fenced. When the last command in a buffer is executed, the
+ * fence will be signalled, indicating that any pipe_buffers preceeding
+ * that fence can be unreferenced (and probably freed).
+ */
+ struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS];
+
+
+ struct spe_function attrib_fetch;
+ unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS];
+
+ unsigned debug_flags;
+};
+
+
+
+
+static INLINE struct cell_context *
+cell_context(struct pipe_context *pipe)
+{
+ return (struct cell_context *) pipe;
+}
+
+
+extern struct pipe_context *
+cell_create_context(struct pipe_screen *screen, struct cell_winsys *cws);
+
+extern void
+cell_vertex_shader_queue_flush(struct draw_context *draw);
+
+
+/* XXX find a better home for this */
+extern void cell_update_vertex_fetch(struct draw_context *draw);
+
+
+#endif /* CELL_CONTEXT_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
new file mode 100644
index 0000000000..880d535320
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
@@ -0,0 +1,191 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Brian Paul
+ * Keith Whitwell
+ */
+
+
+#include "pipe/p_defines.h"
+#include "pipe/p_context.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+
+#include "cell_context.h"
+#include "cell_draw_arrays.h"
+#include "cell_state.h"
+#include "cell_flush.h"
+
+#include "draw/draw_context.h"
+
+
+
+static void
+cell_map_constant_buffers(struct cell_context *sp)
+{
+ struct pipe_winsys *ws = sp->pipe.winsys;
+ uint i;
+ for (i = 0; i < 2; i++) {
+ if (sp->constants[i].size) {
+ sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ cell_flush_buffer_range(sp, sp->mapped_constants[i],
+ sp->constants[i].buffer->size);
+ }
+ }
+
+ draw_set_mapped_constant_buffer(sp->draw,
+ sp->mapped_constants[PIPE_SHADER_VERTEX],
+ sp->constants[PIPE_SHADER_VERTEX].size);
+}
+
+static void
+cell_unmap_constant_buffers(struct cell_context *sp)
+{
+ struct pipe_winsys *ws = sp->pipe.winsys;
+ uint i;
+ for (i = 0; i < 2; i++) {
+ if (sp->constants[i].size)
+ ws->buffer_unmap(ws, sp->constants[i].buffer);
+ sp->mapped_constants[i] = NULL;
+ }
+}
+
+
+
+/**
+ * Draw vertex arrays, with optional indexing.
+ * Basically, map the vertex buffers (and drawing surfaces), then hand off
+ * the drawing to the 'draw' module.
+ *
+ * XXX should the element buffer be specified/bound with a separate function?
+ */
+static boolean
+cell_draw_range_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct cell_context *sp = cell_context(pipe);
+ struct draw_context *draw = sp->draw;
+ unsigned i;
+
+ if (sp->dirty)
+ cell_update_derived( sp );
+
+#if 0
+ cell_map_surfaces(sp);
+#endif
+ cell_map_constant_buffers(sp);
+
+ /*
+ * Map vertex buffers
+ */
+ for (i = 0; i < sp->num_vertex_buffers; i++) {
+ void *buf = pipe_buffer_map(pipe->screen,
+ sp->vertex_buffer[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ cell_flush_buffer_range(sp, buf, sp->vertex_buffer[i].buffer->size);
+ draw_set_mapped_vertex_buffer(draw, i, buf);
+ }
+ /* Map index buffer, if present */
+ if (indexBuffer) {
+ void *mapped_indexes = pipe_buffer_map(pipe->screen,
+ indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes);
+ }
+ else {
+ /* no index/element buffer */
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+
+ /* draw! */
+ draw_arrays(draw, mode, start, count);
+
+ /*
+ * unmap vertex/index buffers - will cause draw module to flush
+ */
+ for (i = 0; i < sp->num_vertex_buffers; i++) {
+ draw_set_mapped_vertex_buffer(draw, i, NULL);
+ pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer);
+ }
+ if (indexBuffer) {
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ pipe_buffer_unmap(pipe->screen, indexBuffer);
+ }
+
+ /* Note: leave drawing surfaces mapped */
+ cell_unmap_constant_buffers(sp);
+
+ return TRUE;
+}
+
+
+static boolean
+cell_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count)
+{
+ return cell_draw_range_elements( pipe, indexBuffer,
+ indexSize,
+ 0, 0xffffffff,
+ mode, start, count );
+}
+
+
+static boolean
+cell_draw_arrays(struct pipe_context *pipe, unsigned mode,
+ unsigned start, unsigned count)
+{
+ return cell_draw_elements(pipe, NULL, 0, mode, start, count);
+}
+
+
+static void
+cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags)
+{
+ struct cell_context *cell = cell_context(pipe);
+ draw_set_edgeflags(cell->draw, edgeflags);
+}
+
+
+
+void
+cell_init_draw_functions(struct cell_context *cell)
+{
+ cell->pipe.draw_arrays = cell_draw_arrays;
+ cell->pipe.draw_elements = cell_draw_elements;
+ cell->pipe.draw_range_elements = cell_draw_range_elements;
+ cell->pipe.set_edgeflags = cell_set_edgeflags;
+}
+
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h
new file mode 100644
index 0000000000..148873aa67
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h
@@ -0,0 +1,36 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef CELL_DRAW_ARRAYS_H
+#define CELL_DRAW_ARRAYS_H
+
+
+extern void
+cell_init_draw_functions(struct cell_context *cell);
+
+
+#endif /* CELL_DRAW_ARRAYS_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c
new file mode 100644
index 0000000000..867b5dcaa0
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_fence.c
@@ -0,0 +1,168 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <unistd.h>
+#include "util/u_memory.h"
+#include "pipe/p_inlines.h"
+#include "cell_context.h"
+#include "cell_batch.h"
+#include "cell_fence.h"
+#include "cell_texture.h"
+
+
+void
+cell_fence_init(struct cell_fence *fence)
+{
+ uint i;
+ ASSERT_ALIGN16(fence->status);
+ for (i = 0; i < CELL_MAX_SPUS; i++) {
+ fence->status[i][0] = CELL_FENCE_IDLE;
+ }
+}
+
+
+boolean
+cell_fence_signalled(const struct cell_context *cell,
+ const struct cell_fence *fence)
+{
+ uint i;
+ for (i = 0; i < cell->num_spus; i++) {
+ if (fence->status[i][0] != CELL_FENCE_SIGNALLED)
+ return FALSE;
+ /*assert(fence->status[i][0] == CELL_FENCE_EMITTED);*/
+ }
+ return TRUE;
+}
+
+
+void
+cell_fence_finish(const struct cell_context *cell,
+ const struct cell_fence *fence)
+{
+ while (!cell_fence_signalled(cell, fence)) {
+ usleep(10);
+ }
+
+#ifdef DEBUG
+ {
+ uint i;
+ for (i = 0; i < cell->num_spus; i++) {
+ assert(fence->status[i][0] == CELL_FENCE_SIGNALLED);
+ }
+ }
+#endif
+}
+
+
+
+
+struct cell_buffer_node
+{
+ struct pipe_buffer *buffer;
+ struct cell_buffer_node *next;
+};
+
+
+static void
+cell_add_buffer_to_list(struct cell_context *cell,
+ struct cell_buffer_list *list,
+ struct pipe_buffer *buffer)
+{
+ struct pipe_screen *ps = cell->pipe.screen;
+ struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node);
+ /* create new list node which references the buffer, insert at head */
+ if (node) {
+ pipe_buffer_reference(ps, &node->buffer, buffer);
+ node->next = list->head;
+ list->head = node;
+ }
+}
+
+
+/**
+ * Wait for completion of the given fence, then unreference any buffers
+ * on the list.
+ * This typically unrefs/frees texture buffers after any rendering which uses
+ * them has completed.
+ */
+void
+cell_free_fenced_buffers(struct cell_context *cell,
+ struct cell_buffer_list *list)
+{
+ if (list->head) {
+ struct pipe_screen *ps = cell->pipe.screen;
+ struct cell_buffer_node *node;
+
+ cell_fence_finish(cell, &list->fence);
+
+ /* traverse the list, unreferencing buffers, freeing nodes */
+ node = list->head;
+ while (node) {
+ struct cell_buffer_node *next = node->next;
+ assert(node->buffer);
+ pipe_buffer_unmap(ps, node->buffer);
+#if 0
+ printf("Unref buffer %p\n", node->buffer);
+ if (node->buffer->refcount == 1)
+ printf(" Delete!\n");
+#endif
+ pipe_buffer_reference(ps, &node->buffer, NULL);
+ FREE(node);
+ node = next;
+ }
+ list->head = NULL;
+ }
+}
+
+
+/**
+ * This should be called for each render command.
+ * Any texture buffers that are current bound will be added to a fenced
+ * list to be freed later when the fence is executed/signalled.
+ */
+void
+cell_add_fenced_textures(struct cell_context *cell)
+{
+ struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch];
+ uint i;
+
+ for (i = 0; i < cell->num_textures; i++) {
+ struct cell_texture *ct = cell->texture[i];
+ if (ct) {
+ uint level;
+ for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
+ if (ct->tiled_buffer[level]) {
+#if 0
+ printf("Adding texture %p buffer %p to list\n",
+ ct, ct->tiled_buffer[level]);
+#endif
+ cell_add_buffer_to_list(cell, list, ct->tiled_buffer[level]);
+ }
+ }
+ }
+ }
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h
new file mode 100644
index 0000000000..536b4ba411
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_fence.h
@@ -0,0 +1,57 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef CELL_FENCE_H
+#define CELL_FENCE_H
+
+
+extern void
+cell_fence_init(struct cell_fence *fence);
+
+
+extern boolean
+cell_fence_signalled(const struct cell_context *cell,
+ const struct cell_fence *fence);
+
+
+extern void
+cell_fence_finish(const struct cell_context *cell,
+ const struct cell_fence *fence);
+
+
+
+extern void
+cell_free_fenced_buffers(struct cell_context *cell,
+ struct cell_buffer_list *list);
+
+
+extern void
+cell_add_fenced_textures(struct cell_context *cell);
+
+
+#endif /* CELL_FENCE_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c
new file mode 100644
index 0000000000..8275c9dc9c
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_flush.c
@@ -0,0 +1,112 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "cell_context.h"
+#include "cell_batch.h"
+#include "cell_flush.h"
+#include "cell_spu.h"
+#include "cell_render.h"
+#include "draw/draw_context.h"
+
+
+/**
+ * Called via pipe->flush()
+ */
+void
+cell_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct cell_context *cell = cell_context(pipe);
+
+ if (fence) {
+ *fence = NULL;
+ /* XXX: Implement real fencing */
+ flags |= CELL_FLUSH_WAIT;
+ }
+
+ if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_RENDER_CACHE))
+ flags |= CELL_FLUSH_WAIT;
+
+ draw_flush( cell->draw );
+ cell_flush_int(cell, flags);
+}
+
+
+/**
+ * Cell internal flush function. Send the current batch buffer to all SPUs.
+ * If flags & CELL_FLUSH_WAIT, do not return until the SPUs are idle.
+ * \param flags bitmask of flags CELL_FLUSH_WAIT, or zero
+ */
+void
+cell_flush_int(struct cell_context *cell, unsigned flags)
+{
+ static boolean flushing = FALSE; /* recursion catcher */
+ uint i;
+
+ ASSERT(!flushing);
+ flushing = TRUE;
+
+ if (flags & CELL_FLUSH_WAIT) {
+ STATIC_ASSERT(sizeof(opcode_t) % 16 == 0);
+ opcode_t *cmd = (opcode_t*) cell_batch_alloc16(cell, sizeof(opcode_t));
+ *cmd[0] = CELL_CMD_FINISH;
+ }
+
+ cell_batch_flush(cell);
+
+#if 0
+ /* Send CMD_FINISH to all SPUs */
+ for (i = 0; i < cell->num_spus; i++) {
+ send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FINISH);
+ }
+#endif
+
+ if (flags & CELL_FLUSH_WAIT) {
+ /* Wait for ack */
+ for (i = 0; i < cell->num_spus; i++) {
+ uint k = wait_mbox_message(cell_global.spe_contexts[i]);
+ assert(k == CELL_CMD_FINISH);
+ }
+ }
+
+ flushing = FALSE;
+}
+
+
+void
+cell_flush_buffer_range(struct cell_context *cell, void *ptr,
+ unsigned size)
+{
+ STATIC_ASSERT((sizeof(opcode_t) + sizeof(struct cell_buffer_range)) % 16 == 0);
+ uint32_t *batch = (uint32_t*)cell_batch_alloc16(cell,
+ sizeof(opcode_t) + sizeof(struct cell_buffer_range));
+ struct cell_buffer_range *br = (struct cell_buffer_range *) &batch[4];
+ batch[0] = CELL_CMD_FLUSH_BUFFER_RANGE;
+ br->base = (uintptr_t) ptr;
+ br->size = size;
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h
new file mode 100644
index 0000000000..509ae6239a
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_flush.h
@@ -0,0 +1,45 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef CELL_FLUSH
+#define CELL_FLUSH
+
+#define CELL_FLUSH_WAIT 0x80000000
+
+extern void
+cell_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence);
+
+extern void
+cell_flush_int(struct cell_context *cell, unsigned flags);
+
+extern void
+cell_flush_buffer_range(struct cell_context *cell, void *ptr,
+ unsigned size);
+
+#endif
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
new file mode 100644
index 0000000000..5a889a6119
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -0,0 +1,2046 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * Copyright 2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+
+/**
+ * Generate SPU fragment program/shader code.
+ *
+ * Note that we generate SOA-style code here. So each TGSI instruction
+ * operates on four pixels (and is translated into four SPU instructions,
+ * generally speaking).
+ *
+ * \author Brian Paul
+ */
+
+#include <math.h>
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_dump.h"
+#include "rtasm/rtasm_ppc_spe.h"
+#include "util/u_memory.h"
+#include "cell_context.h"
+#include "cell_gen_fp.h"
+
+
+#define MAX_TEMPS 16
+#define MAX_IMMED 8
+
+#define CHAN_X 0
+#define CHAN_Y 1
+#define CHAN_Z 2
+#define CHAN_W 3
+
+/**
+ * Context needed during code generation.
+ */
+struct codegen
+{
+ struct cell_context *cell;
+ int inputs_reg; /**< 1st function parameter */
+ int outputs_reg; /**< 2nd function parameter */
+ int constants_reg; /**< 3rd function parameter */
+ int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */
+ int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */
+
+ int num_imm; /**< number of immediates */
+
+ int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
+
+ int addr_reg; /**< address register, integer values */
+
+ /** Per-instruction temps / intermediate temps */
+ int num_itemps;
+ int itemps[12];
+
+ /** Current IF/ELSE/ENDIF nesting level */
+ int if_nesting;
+ /** Current BGNLOOP/ENDLOOP nesting level */
+ int loop_nesting;
+ /** Location of start of current loop */
+ int loop_start;
+
+ /** Index of if/conditional mask register */
+ int cond_mask_reg;
+ /** Index of loop mask register */
+ int loop_mask_reg;
+
+ /** Index of master execution mask register */
+ int exec_mask_reg;
+
+ /** KIL mask: indicates which fragments have been killed */
+ int kill_mask_reg;
+
+ int frame_size; /**< Stack frame size, in words */
+
+ struct spe_function *f;
+ boolean error;
+};
+
+
+/**
+ * Allocate an intermediate temporary register.
+ */
+static int
+get_itemp(struct codegen *gen)
+{
+ int t = spe_allocate_available_register(gen->f);
+ assert(gen->num_itemps < Elements(gen->itemps));
+ gen->itemps[gen->num_itemps++] = t;
+ return t;
+}
+
+/**
+ * Free all intermediate temporary registers. To be called after each
+ * instruction has been emitted.
+ */
+static void
+free_itemps(struct codegen *gen)
+{
+ int i;
+ for (i = 0; i < gen->num_itemps; i++) {
+ spe_release_register(gen->f, gen->itemps[i]);
+ }
+ gen->num_itemps = 0;
+}
+
+
+/**
+ * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
+ * The register is allocated and initialized upon the first call.
+ */
+static int
+get_const_one_reg(struct codegen *gen)
+{
+ if (gen->one_reg <= 0) {
+ gen->one_reg = spe_allocate_available_register(gen->f);
+
+ spe_indent(gen->f, 4);
+ spe_comment(gen->f, -4, "init constant reg = 1.0:");
+
+ /* one = {1.0, 1.0, 1.0, 1.0} */
+ spe_load_float(gen->f, gen->one_reg, 1.0f);
+
+ spe_indent(gen->f, -4);
+ }
+
+ return gen->one_reg;
+}
+
+
+/**
+ * Return index of the address register.
+ * Used for indirect register loads/stores.
+ */
+static int
+get_address_reg(struct codegen *gen)
+{
+ if (gen->addr_reg <= 0) {
+ gen->addr_reg = spe_allocate_available_register(gen->f);
+
+ spe_indent(gen->f, 4);
+ spe_comment(gen->f, -4, "init address reg = 0:");
+
+ /* init addr = {0, 0, 0, 0} */
+ spe_zero(gen->f, gen->addr_reg);
+
+ spe_indent(gen->f, -4);
+ }
+
+ return gen->addr_reg;
+}
+
+
+/**
+ * Return index of the master execution mask.
+ * The register is allocated an initialized upon the first call.
+ *
+ * The master execution mask controls which pixels in a quad are
+ * modified, according to surrounding conditionals, loops, etc.
+ */
+static int
+get_exec_mask_reg(struct codegen *gen)
+{
+ if (gen->exec_mask_reg <= 0) {
+ gen->exec_mask_reg = spe_allocate_available_register(gen->f);
+
+ /* XXX this may not be needed */
+ spe_comment(gen->f, 0*-4, "initialize master execution mask = ~0");
+ spe_load_int(gen->f, gen->exec_mask_reg, ~0);
+ }
+
+ return gen->exec_mask_reg;
+}
+
+
+/** Return index of the conditional (if/else) execution mask register */
+static int
+get_cond_mask_reg(struct codegen *gen)
+{
+ if (gen->cond_mask_reg <= 0) {
+ gen->cond_mask_reg = spe_allocate_available_register(gen->f);
+ }
+
+ return gen->cond_mask_reg;
+}
+
+
+/** Return index of the loop execution mask register */
+static int
+get_loop_mask_reg(struct codegen *gen)
+{
+ if (gen->loop_mask_reg <= 0) {
+ gen->loop_mask_reg = spe_allocate_available_register(gen->f);
+ }
+
+ return gen->loop_mask_reg;
+}
+
+
+
+static boolean
+is_register_src(struct codegen *gen, int channel,
+ const struct tgsi_full_src_register *src)
+{
+ int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
+ int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
+
+ if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) {
+ return FALSE;
+ }
+ if (src->SrcRegister.File == TGSI_FILE_TEMPORARY ||
+ src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+
+static boolean
+is_memory_dst(struct codegen *gen, int channel,
+ const struct tgsi_full_dst_register *dst)
+{
+ if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
+ return TRUE;
+ }
+ else {
+ return FALSE;
+ }
+}
+
+
+/**
+ * Return the index of the SPU temporary containing the named TGSI
+ * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
+ * just return the corresponding SPE register. If the TGIS register
+ * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
+ * and emit an SPE load instruction.
+ */
+static int
+get_src_reg(struct codegen *gen,
+ int channel,
+ const struct tgsi_full_src_register *src)
+{
+ int reg = -1;
+ int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
+ boolean reg_is_itemp = FALSE;
+ uint sign_op;
+
+ assert(swizzle >= TGSI_SWIZZLE_X);
+ assert(swizzle <= TGSI_EXTSWIZZLE_ONE);
+
+ if (swizzle == TGSI_EXTSWIZZLE_ONE) {
+ /* Load const one float and early out */
+ reg = get_const_one_reg(gen);
+ }
+ else if (swizzle == TGSI_EXTSWIZZLE_ZERO) {
+ /* Load const zero float and early out */
+ reg = get_itemp(gen);
+ spe_xor(gen->f, reg, reg, reg);
+ }
+ else {
+ int index = src->SrcRegister.Index;
+
+ assert(swizzle < 4);
+
+ if (src->SrcRegister.Indirect) {
+ /* XXX unfinished */
+ }
+
+ switch (src->SrcRegister.File) {
+ case TGSI_FILE_TEMPORARY:
+ reg = gen->temp_regs[index][swizzle];
+ break;
+ case TGSI_FILE_INPUT:
+ {
+ /* offset is measured in quadwords, not bytes */
+ int offset = index * 4 + swizzle;
+ reg = get_itemp(gen);
+ reg_is_itemp = TRUE;
+ /* Load: reg = memory[(machine_reg) + offset] */
+ spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16);
+ }
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ reg = gen->imm_regs[index][swizzle];
+ break;
+ case TGSI_FILE_CONSTANT:
+ {
+ /* offset is measured in quadwords, not bytes */
+ int offset = index * 4 + swizzle;
+ reg = get_itemp(gen);
+ reg_is_itemp = TRUE;
+ /* Load: reg = memory[(machine_reg) + offset] */
+ spe_lqd(gen->f, reg, gen->constants_reg, offset * 16);
+ }
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ /*
+ * Handle absolute value, negate or set-negative of src register.
+ */
+ sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
+ if (sign_op != TGSI_UTIL_SIGN_KEEP) {
+ /*
+ * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
+ */
+ const int bit31mask_reg = get_itemp(gen);
+ int result_reg;
+
+ if (reg_is_itemp) {
+ /* re-use 'reg' for the result */
+ result_reg = reg;
+ }
+ else {
+ /* alloc a new reg for the result */
+ result_reg = get_itemp(gen);
+ }
+
+ /* mask with bit 31 set, the rest cleared */
+ spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
+
+ if (sign_op == TGSI_UTIL_SIGN_CLEAR) {
+ spe_andc(gen->f, result_reg, reg, bit31mask_reg);
+ }
+ else if (sign_op == TGSI_UTIL_SIGN_SET) {
+ spe_and(gen->f, result_reg, reg, bit31mask_reg);
+ }
+ else {
+ assert(sign_op == TGSI_UTIL_SIGN_TOGGLE);
+ spe_xor(gen->f, result_reg, reg, bit31mask_reg);
+ }
+
+ reg = result_reg;
+ }
+
+ return reg;
+}
+
+
+/**
+ * Return the index of an SPE register to use for the given TGSI register.
+ * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
+ * corresponding SPE register is returned. If the TGSI register is
+ * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
+ * See store_dest_reg() below...
+ */
+static int
+get_dst_reg(struct codegen *gen,
+ int channel,
+ const struct tgsi_full_dst_register *dest)
+{
+ int reg = -1;
+
+ switch (dest->DstRegister.File) {
+ case TGSI_FILE_TEMPORARY:
+ if (gen->if_nesting > 0 || gen->loop_nesting > 0)
+ reg = get_itemp(gen);
+ else
+ reg = gen->temp_regs[dest->DstRegister.Index][channel];
+ break;
+ case TGSI_FILE_OUTPUT:
+ reg = get_itemp(gen);
+ break;
+ default:
+ assert(0);
+ }
+
+ return reg;
+}
+
+
+/**
+ * When a TGSI instruction is writing to an output register, this
+ * function emits the SPE store instruction to store the value_reg.
+ * \param value_reg the SPE register containing the value to store.
+ * This would have been returned by get_dst_reg().
+ */
+static void
+store_dest_reg(struct codegen *gen,
+ int value_reg, int channel,
+ const struct tgsi_full_dst_register *dest)
+{
+ /*
+ * XXX need to implement dst reg clamping/saturation
+ */
+#if 0
+ switch (inst->Instruction.Saturate) {
+ case TGSI_SAT_NONE:
+ break;
+ case TGSI_SAT_ZERO_ONE:
+ break;
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ break;
+ default:
+ assert( 0 );
+ }
+#endif
+
+ switch (dest->DstRegister.File) {
+ case TGSI_FILE_TEMPORARY:
+ if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
+ int d_reg = gen->temp_regs[dest->DstRegister.Index][channel];
+ int exec_reg = get_exec_mask_reg(gen);
+ /* Mix d with new value according to exec mask:
+ * d[i] = mask_reg[i] ? value_reg : d_reg
+ */
+ spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg);
+ }
+ else {
+ /* we're not inside a condition or loop: do nothing special */
+
+ }
+ break;
+ case TGSI_FILE_OUTPUT:
+ {
+ /* offset is measured in quadwords, not bytes */
+ int offset = dest->DstRegister.Index * 4 + channel;
+ if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
+ int exec_reg = get_exec_mask_reg(gen);
+ int curval_reg = get_itemp(gen);
+ /* First read the current value from memory:
+ * Load: curval = memory[(machine_reg) + offset]
+ */
+ spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
+ /* Mix curval with newvalue according to exec mask:
+ * d[i] = mask_reg[i] ? value_reg : d_reg
+ */
+ spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg);
+ /* Store: memory[(machine_reg) + offset] = curval */
+ spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
+ }
+ else {
+ /* Store: memory[(machine_reg) + offset] = reg */
+ spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16);
+ }
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+
+static void
+emit_prologue(struct codegen *gen)
+{
+ gen->frame_size = 1024; /* XXX temporary, should be dynamic */
+
+ spe_comment(gen->f, 0, "Function prologue:");
+
+ /* save $lr on stack # stqd $lr,16($sp) */
+ spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
+
+ if (gen->frame_size >= 512) {
+ /* offset is too large for ai instruction */
+ int offset_reg = spe_allocate_available_register(gen->f);
+ int sp_reg = spe_allocate_available_register(gen->f);
+ /* offset = -framesize */
+ spe_load_int(gen->f, offset_reg, -gen->frame_size);
+ /* sp = $sp */
+ spe_move(gen->f, sp_reg, SPE_REG_SP);
+ /* $sp = $sp + offset_reg */
+ spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
+ /* save $sp in stack frame */
+ spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0);
+ /* clean up */
+ spe_release_register(gen->f, offset_reg);
+ spe_release_register(gen->f, sp_reg);
+ }
+ else {
+ /* save stack pointer # stqd $sp,-frameSize($sp) */
+ spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
+
+ /* adjust stack pointer # ai $sp,$sp,-frameSize */
+ spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
+ }
+}
+
+
+static void
+emit_epilogue(struct codegen *gen)
+{
+ const int return_reg = 3;
+
+ spe_comment(gen->f, 0, "Function epilogue:");
+
+ spe_comment(gen->f, 0, "return the killed mask");
+ if (gen->kill_mask_reg > 0) {
+ /* shader called KIL, return the "alive" mask */
+ spe_move(gen->f, return_reg, gen->kill_mask_reg);
+ }
+ else {
+ /* return {0,0,0,0} */
+ spe_load_uint(gen->f, return_reg, 0);
+ }
+
+ spe_comment(gen->f, 0, "restore stack and return");
+ if (gen->frame_size >= 512) {
+ /* offset is too large for ai instruction */
+ int offset_reg = spe_allocate_available_register(gen->f);
+ /* offset = framesize */
+ spe_load_int(gen->f, offset_reg, gen->frame_size);
+ /* $sp = $sp + offset */
+ spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
+ /* clean up */
+ spe_release_register(gen->f, offset_reg);
+ }
+ else {
+ /* restore stack pointer # ai $sp,$sp,frameSize */
+ spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size);
+ }
+
+ /* restore $lr # lqd $lr,16($sp) */
+ spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
+
+ /* return from function call */
+ spe_bi(gen->f, SPE_REG_RA, 0, 0);
+}
+
+
+#define FOR_EACH_ENABLED_CHANNEL(inst, ch) \
+ for (ch = 0; ch < 4; ch++) \
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch))
+
+
+static boolean
+emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch = 0, src_reg, addr_reg;
+
+ src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ addr_reg = get_address_reg(gen);
+
+ /* convert float to int */
+ spe_cflts(gen->f, addr_reg, src_reg, 0);
+
+ free_itemps(gen);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, src_reg[4], dst_reg[4];
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) &&
+ is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) {
+ /* special-case: register to memory store */
+ store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+ else {
+ spe_move(gen->f, dst_reg[ch], src_reg[ch]);
+ store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+ }
+
+ free_itemps(gen);
+
+ return TRUE;
+}
+
+/**
+ * Emit binary operation
+ */
+static boolean
+emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], s2_reg[4], d_reg[4];
+
+ /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+
+ /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ /* Emit actual SPE instruction: d = s1 + s2 */
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ADD:
+ spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ break;
+ case TGSI_OPCODE_SUB:
+ spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ break;
+ case TGSI_OPCODE_MUL:
+ spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ break;
+ default:
+ ;
+ }
+ }
+
+ /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ /* Free any intermediate temps we allocated */
+ free_itemps(gen);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit multiply add. See emit_ADD for comments.
+ */
+static boolean
+emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4];
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+/**
+ * Emit linear interpolate. See emit_ADD for comments.
+ */
+static boolean
+emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4];
+
+ /* setup/get src/dst/temp regs */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
+ }
+
+ /* d = s3 + s1(s2 - s3) */
+ /* do all subtracts, then all fma, then all stores to better pipeline */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+
+/**
+ * Emit reciprocal or recip sqrt.
+ */
+static boolean
+emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], d_reg[4], tmp_reg[4];
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) {
+ /* tmp = 1/s1 */
+ spe_frest(gen->f, tmp_reg[ch], s1_reg[ch]);
+ }
+ else {
+ /* tmp = 1/sqrt(s1) */
+ spe_frsqest(gen->f, tmp_reg[ch], s1_reg[ch]);
+ }
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ /* d = float_interp(s1, tmp) */
+ spe_fi(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+/**
+ * Emit absolute value. See emit_ADD for comments.
+ */
+static boolean
+emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], d_reg[4];
+ const int bit31mask_reg = get_itemp(gen);
+
+ /* mask with bit 31 set, the rest cleared */
+ spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+
+ /* d = sign bit cleared in s1 */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_andc(gen->f, d_reg[ch], s1_reg[ch], bit31mask_reg);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+/**
+ * Emit 3 component dot product. See emit_ADD for comments.
+ */
+static boolean
+emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ int s1x_reg, s1y_reg, s1z_reg;
+ int s2x_reg, s2y_reg, s2z_reg;
+ int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
+
+ s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ s2y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ s2z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+
+ /* t0 = x0 * x1 */
+ spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg);
+
+ /* t1 = y0 * y1 */
+ spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg);
+
+ /* t0 = z0 * z1 + t0 */
+ spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg);
+
+ /* t0 = t0 + t1 */
+ spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, t0_reg);
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+/**
+ * Emit 4 component dot product. See emit_ADD for comments.
+ */
+static boolean
+emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ int s0x_reg, s0y_reg, s0z_reg, s0w_reg;
+ int s1x_reg, s1y_reg, s1z_reg, s1w_reg;
+ int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
+
+ s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ s0z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ s0w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]);
+ s1w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
+
+ /* t0 = x0 * x1 */
+ spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg);
+
+ /* t1 = y0 * y1 */
+ spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg);
+
+ /* t0 = z0 * z1 + t0 */
+ spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg);
+
+ /* t1 = w0 * w1 + t1 */
+ spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg);
+
+ /* t0 = t0 + t1 */
+ spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, t0_reg);
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+/**
+ * Emit homogeneous dot product. See emit_ADD for comments.
+ */
+static boolean
+emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ /* XXX rewrite this function to look more like DP3/DP4 */
+ int ch;
+ int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ int tmp_reg = get_itemp(gen);
+
+ /* t = x0 * x1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ /* t = y0 * y1 + t */
+ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ /* t = z0 * z1 + t */
+ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
+ /* t = w1 + t */
+ spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg);
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, tmp_reg);
+ store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+/**
+ * Emit 3-component vector normalize.
+ */
+static boolean
+emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ int src_reg[3];
+ int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
+
+ src_reg[0] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+
+ /* t0 = x * x */
+ spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]);
+
+ /* t1 = y * y */
+ spe_fm(gen->f, t1_reg, src_reg[1], src_reg[1]);
+
+ /* t0 = z * z + t0 */
+ spe_fma(gen->f, t0_reg, src_reg[2], src_reg[2], t0_reg);
+
+ /* t0 = t0 + t1 */
+ spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
+
+ /* t1 = 1.0 / sqrt(t0) */
+ spe_frsqest(gen->f, t1_reg, t0_reg);
+ spe_fi(gen->f, t1_reg, t0_reg, t1_reg);
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ /* dst = src[ch] * t1 */
+ spe_fm(gen->f, d_reg, src_reg[ch], t1_reg);
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+/**
+ * Emit cross product. See emit_ADD for comments.
+ */
+static boolean
+emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ int tmp_reg = get_itemp(gen);
+
+ /* t = z0 * y1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ /* t = y0 * z1 - t */
+ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) {
+ store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]);
+ }
+
+ s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ /* t = x0 * z1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ /* t = z0 * x1 - t */
+ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) {
+ store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]);
+ }
+
+ s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ /* t = y0 * x1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ /* t = x0 * y1 - t */
+ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) {
+ store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+/**
+ * Emit inequality instruction.
+ * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
+ * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
+ * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
+ */
+static boolean
+emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg;
+ bool complement = FALSE;
+
+ one_reg = get_const_one_reg(gen);
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_SGT:
+ spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ break;
+ case TGSI_OPCODE_SLT:
+ spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]);
+ break;
+ case TGSI_OPCODE_SGE:
+ spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]);
+ complement = TRUE;
+ break;
+ case TGSI_OPCODE_SLE:
+ spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ complement = TRUE;
+ break;
+ case TGSI_OPCODE_SEQ:
+ spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ break;
+ case TGSI_OPCODE_SNE:
+ spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ complement = TRUE;
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ /* convert d from 0x0/0xffffffff to 0.0/1.0 */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ /* d = d & one_reg */
+ if (complement)
+ spe_andc(gen->f, d_reg[ch], one_reg, d_reg[ch]);
+ else
+ spe_and(gen->f, d_reg[ch], one_reg, d_reg[ch]);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+/**
+ * Emit compare.
+ */
+static boolean
+emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ int zero_reg = get_itemp(gen);
+
+ spe_zero(gen->f, zero_reg);
+
+ /* d = (s1 < 0) ? s2 : s3 */
+ spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
+ spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+
+ return TRUE;
+}
+
+/**
+ * Emit trunc.
+ * Convert float to signed int
+ * Convert signed int to float
+ */
+static boolean
+emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], d_reg[4];
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+
+ /* Convert float to int */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_cflts(gen->f, d_reg[ch], s1_reg[ch], 0);
+ }
+
+ /* Convert int to float */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_csflt(gen->f, d_reg[ch], d_reg[ch], 0);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+/**
+ * Emit floor.
+ * If negative int subtract one
+ * Convert float to signed int
+ * Convert signed int to float
+ */
+static boolean
+emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg;
+
+ zero_reg = get_itemp(gen);
+ spe_zero(gen->f, zero_reg);
+ one_reg = get_const_one_reg(gen);
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
+ }
+
+ /* If negative, subtract 1.0 */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]);
+ }
+
+ /* Convert float to int */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
+ }
+
+ /* Convert int to float */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_csflt(gen->f, d_reg[ch], tmp_reg[ch], 0);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+/**
+ * Compute frac = Input - FLR(Input)
+ */
+static boolean
+emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg;
+
+ zero_reg = get_itemp(gen);
+ spe_zero(gen->f, zero_reg);
+ one_reg = get_const_one_reg(gen);
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
+ }
+
+ /* If negative, subtract 1.0 */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]);
+ }
+
+ /* Convert float to int */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
+ }
+
+ /* Convert int to float */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_csflt(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
+ }
+
+ /* d = s1 - FLR(s1) */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fs(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]);
+ }
+
+ /* store result */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+#if 0
+static void
+print_functions(struct cell_context *cell)
+{
+ struct cell_spu_function_info *funcs = &cell->spu_functions;
+ uint i;
+ for (i = 0; i < funcs->num; i++) {
+ printf("SPU func %u: %s at %u\n",
+ i, funcs->names[i], funcs->addrs[i]);
+ }
+}
+#endif
+
+
+static uint
+lookup_function(struct cell_context *cell, const char *funcname)
+{
+ const struct cell_spu_function_info *funcs = &cell->spu_functions;
+ uint i, addr = 0;
+ for (i = 0; i < funcs->num; i++) {
+ if (strcmp(funcs->names[i], funcname) == 0) {
+ addr = funcs->addrs[i];
+ }
+ }
+ assert(addr && "spu function not found");
+ return addr / 4; /* discard 2 least significant bits */
+}
+
+
+/**
+ * Emit code to call a SPU function.
+ * Used to implement instructions like SIN/COS/POW/TEX/etc.
+ * If scalar, only the X components of the src regs are used, and the
+ * result is replicated across the dest register's XYZW components.
+ */
+static boolean
+emit_function_call(struct codegen *gen,
+ const struct tgsi_full_instruction *inst,
+ char *funcname, uint num_args, boolean scalar)
+{
+ const uint addr = lookup_function(gen->cell, funcname);
+ char comment[100];
+ int s_regs[3];
+ int func_called = FALSE;
+ uint a, ch;
+ int retval_reg = -1;
+
+ assert(num_args <= 3);
+
+ snprintf(comment, sizeof(comment), "CALL %s:", funcname);
+ spe_comment(gen->f, -4, comment);
+
+ if (scalar) {
+ for (a = 0; a < num_args; a++) {
+ s_regs[a] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[a]);
+ }
+ /* we'll call the function, put the return value in this register,
+ * then replicate it across all write-enabled components in d_reg.
+ */
+ retval_reg = spe_allocate_available_register(gen->f);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int d_reg;
+ ubyte usedRegs[SPE_NUM_REGS];
+ uint i, numUsed;
+
+ if (!scalar) {
+ for (a = 0; a < num_args; a++) {
+ s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]);
+ }
+ }
+
+ d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+
+ if (!scalar || !func_called) {
+ /* for a scalar function, we'll really only call the function once */
+
+ numUsed = spe_get_registers_used(gen->f, usedRegs);
+ assert(numUsed < gen->frame_size / 16 - 2);
+
+ /* save registers to stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ int offset = 2 + i;
+ spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
+
+ /* setup function arguments */
+ for (a = 0; a < num_args; a++) {
+ spe_move(gen->f, 3 + a, s_regs[a]);
+ }
+
+ /* branch to function, save return addr */
+ spe_brasl(gen->f, SPE_REG_RA, addr);
+
+ /* save function's return value */
+ if (scalar)
+ spe_move(gen->f, retval_reg, 3);
+ else
+ spe_move(gen->f, d_reg, 3);
+
+ /* restore registers from stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ if (reg != d_reg && reg != retval_reg) {
+ int offset = 2 + i;
+ spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
+ }
+
+ func_called = TRUE;
+ }
+
+ if (scalar) {
+ spe_move(gen->f, d_reg, retval_reg);
+ }
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+
+ if (scalar) {
+ spe_release_register(gen->f, retval_reg);
+ }
+
+ return TRUE;
+}
+
+
+static boolean
+emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ const uint target = inst->InstructionExtTexture.Texture;
+ const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+ uint addr;
+ int ch;
+ int coord_regs[4], d_regs[4];
+
+ switch (target) {
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_2D:
+ addr = lookup_function(gen->cell, "spu_tex_2d");
+ break;
+ case TGSI_TEXTURE_3D:
+ addr = lookup_function(gen->cell, "spu_tex_3d");
+ break;
+ case TGSI_TEXTURE_CUBE:
+ addr = lookup_function(gen->cell, "spu_tex_cube");
+ break;
+ default:
+ ASSERT(0 && "unsupported texture target");
+ return FALSE;
+ }
+
+ assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER);
+
+ spe_comment(gen->f, -4, "CALL tex:");
+
+ /* get src/dst reg info */
+ for (ch = 0; ch < 4; ch++) {
+ coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+
+ {
+ ubyte usedRegs[SPE_NUM_REGS];
+ uint i, numUsed;
+
+ numUsed = spe_get_registers_used(gen->f, usedRegs);
+ assert(numUsed < gen->frame_size / 16 - 2);
+
+ /* save registers to stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ int offset = 2 + i;
+ spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
+
+ /* setup function arguments (XXX depends on target) */
+ for (i = 0; i < 4; i++) {
+ spe_move(gen->f, 3 + i, coord_regs[i]);
+ }
+ spe_load_uint(gen->f, 7, unit); /* sampler unit */
+
+ /* branch to function, save return addr */
+ spe_brasl(gen->f, SPE_REG_RA, addr);
+
+ /* save function's return values (four pixel's colors) */
+ for (i = 0; i < 4; i++) {
+ spe_move(gen->f, d_regs[i], 3 + i);
+ }
+
+ /* restore registers from stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ if (reg != d_regs[0] &&
+ reg != d_regs[1] &&
+ reg != d_regs[2] &&
+ reg != d_regs[3]) {
+ int offset = 2 + i;
+ spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
+ }
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+
+ return TRUE;
+}
+
+
+/**
+ * KILL if any of src reg values are less than zero.
+ */
+static boolean
+emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ int s_regs[4], kil_reg = -1, cmp_reg, zero_reg;
+
+ spe_comment(gen->f, -4, "CALL kil:");
+
+ /* zero = {0,0,0,0} */
+ zero_reg = get_itemp(gen);
+ spe_zero(gen->f, zero_reg);
+
+ cmp_reg = get_itemp(gen);
+
+ /* get src regs */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ }
+
+ /* test if any src regs are < 0 */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ if (kil_reg >= 0) {
+ /* cmp = 0 > src ? : ~0 : 0 */
+ spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]);
+ /* kil = kil | cmp */
+ spe_or(gen->f, kil_reg, kil_reg, cmp_reg);
+ }
+ else {
+ kil_reg = get_itemp(gen);
+ /* kil = 0 > src ? : ~0 : 0 */
+ spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]);
+ }
+ }
+
+ if (gen->if_nesting || gen->loop_nesting) {
+ /* may have been a conditional kil */
+ spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg);
+ }
+
+ /* allocate the kill mask reg if needed */
+ if (gen->kill_mask_reg <= 0) {
+ gen->kill_mask_reg = spe_allocate_available_register(gen->f);
+ spe_move(gen->f, gen->kill_mask_reg, kil_reg);
+ }
+ else {
+ spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg);
+ }
+
+ free_itemps(gen);
+
+ return TRUE;
+}
+
+
+
+/**
+ * Emit min or max.
+ */
+static boolean
+emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
+ }
+
+ /* d = (s0 > s1) ? s0 : s1 */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_MAX)
+ spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]);
+ else
+ spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
+}
+
+
+/**
+ * Emit code to update the execution mask.
+ * This needs to be done whenever the execution status of a conditional
+ * or loop is changed.
+ */
+static void
+emit_update_exec_mask(struct codegen *gen)
+{
+ const int exec_reg = get_exec_mask_reg(gen);
+ const int cond_reg = gen->cond_mask_reg;
+ const int loop_reg = gen->loop_mask_reg;
+
+ spe_comment(gen->f, 0, "Update master execution mask");
+
+ if (gen->if_nesting > 0 && gen->loop_nesting > 0) {
+ /* exec_mask = cond_mask & loop_mask */
+ assert(cond_reg > 0);
+ assert(loop_reg > 0);
+ spe_and(gen->f, exec_reg, cond_reg, loop_reg);
+ }
+ else if (gen->if_nesting > 0) {
+ assert(cond_reg > 0);
+ spe_move(gen->f, exec_reg, cond_reg);
+ }
+ else if (gen->loop_nesting > 0) {
+ assert(loop_reg > 0);
+ spe_move(gen->f, exec_reg, loop_reg);
+ }
+ else {
+ spe_load_int(gen->f, exec_reg, ~0x0);
+ }
+}
+
+
+static boolean
+emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ const int channel = 0;
+ int cond_reg;
+
+ cond_reg = get_cond_mask_reg(gen);
+
+ /* XXX push cond exec mask */
+
+ spe_comment(gen->f, 0, "init conditional exec mask = ~0:");
+ spe_load_int(gen->f, cond_reg, ~0);
+
+ /* update conditional execution mask with the predicate register */
+ int tmp_reg = get_itemp(gen);
+ int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]);
+
+ /* tmp = (s1_reg == 0) */
+ spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
+ /* tmp = !tmp */
+ spe_complement(gen->f, tmp_reg, tmp_reg);
+ /* cond_mask = cond_mask & tmp */
+ spe_and(gen->f, cond_reg, cond_reg, tmp_reg);
+
+ gen->if_nesting++;
+
+ /* update the master execution mask */
+ emit_update_exec_mask(gen);
+
+ free_itemps(gen);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ const int cond_reg = get_cond_mask_reg(gen);
+
+ spe_comment(gen->f, 0, "cond exec mask = !cond exec mask");
+ spe_complement(gen->f, cond_reg, cond_reg);
+ emit_update_exec_mask(gen);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ /* XXX todo: pop cond exec mask */
+
+ gen->if_nesting--;
+
+ emit_update_exec_mask(gen);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int exec_reg, loop_reg;
+
+ exec_reg = get_exec_mask_reg(gen);
+ loop_reg = get_loop_mask_reg(gen);
+
+ /* XXX push loop_exec mask */
+
+ spe_comment(gen->f, 0*-4, "initialize loop exec mask = ~0");
+ spe_load_int(gen->f, loop_reg, ~0x0);
+
+ gen->loop_nesting++;
+ gen->loop_start = spe_code_size(gen->f); /* in bytes */
+
+ return TRUE;
+}
+
+
+static boolean
+emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ const int loop_reg = get_loop_mask_reg(gen);
+ const int tmp_reg = get_itemp(gen);
+ int offset;
+
+ /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */
+ spe_orx(gen->f, tmp_reg, loop_reg);
+
+ offset = gen->loop_start - spe_code_size(gen->f); /* in bytes */
+
+ /* branch back to top of loop if tmp_reg != 0 */
+ spe_brnz(gen->f, tmp_reg, offset / 4);
+
+ /* XXX pop loop_exec mask */
+
+ gen->loop_nesting--;
+
+ emit_update_exec_mask(gen);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ const int exec_reg = get_exec_mask_reg(gen);
+ const int loop_reg = get_loop_mask_reg(gen);
+
+ assert(gen->loop_nesting > 0);
+
+ spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask");
+ spe_andc(gen->f, loop_reg, loop_reg, exec_reg);
+
+ emit_update_exec_mask(gen);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ assert(gen->loop_nesting > 0);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst,
+ boolean ddx)
+{
+ int ch;
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+
+ int t1_reg = get_itemp(gen);
+ int t2_reg = get_itemp(gen);
+
+ spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */
+ if (ddx) {
+ spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */
+ }
+ else {
+ spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */
+ }
+ spe_fs(gen->f, d_reg, t2_reg, t1_reg);
+
+ free_itemps(gen);
+ }
+
+ return TRUE;
+}
+
+
+
+
+/**
+ * Emit END instruction.
+ * We just return from the shader function at this point.
+ *
+ * Note that there may be more code after this that would be
+ * called by TGSI_OPCODE_CALL.
+ */
+static boolean
+emit_END(struct codegen *gen)
+{
+ emit_epilogue(gen);
+ return TRUE;
+}
+
+
+/**
+ * Emit code for the given instruction. Just a big switch stmt.
+ */
+static boolean
+emit_instruction(struct codegen *gen,
+ const struct tgsi_full_instruction *inst)
+{
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ARL:
+ return emit_ARL(gen, inst);
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_SWZ:
+ return emit_MOV(gen, inst);
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_MUL:
+ return emit_binop(gen, inst);
+ case TGSI_OPCODE_MAD:
+ return emit_MAD(gen, inst);
+ case TGSI_OPCODE_LERP:
+ return emit_LERP(gen, inst);
+ case TGSI_OPCODE_DP3:
+ return emit_DP3(gen, inst);
+ case TGSI_OPCODE_DP4:
+ return emit_DP4(gen, inst);
+ case TGSI_OPCODE_DPH:
+ return emit_DPH(gen, inst);
+ case TGSI_OPCODE_NRM:
+ return emit_NRM3(gen, inst);
+ case TGSI_OPCODE_XPD:
+ return emit_XPD(gen, inst);
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ return emit_RCP_RSQ(gen, inst);
+ case TGSI_OPCODE_ABS:
+ return emit_ABS(gen, inst);
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SNE:
+ return emit_inequality(gen, inst);
+ case TGSI_OPCODE_CMP:
+ return emit_CMP(gen, inst);
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_MAX:
+ return emit_MIN_MAX(gen, inst);
+ case TGSI_OPCODE_TRUNC:
+ return emit_TRUNC(gen, inst);
+ case TGSI_OPCODE_FLR:
+ return emit_FLR(gen, inst);
+ case TGSI_OPCODE_FRC:
+ return emit_FRC(gen, inst);
+ case TGSI_OPCODE_END:
+ return emit_END(gen);
+
+ case TGSI_OPCODE_COS:
+ return emit_function_call(gen, inst, "spu_cos", 1, TRUE);
+ case TGSI_OPCODE_SIN:
+ return emit_function_call(gen, inst, "spu_sin", 1, TRUE);
+ case TGSI_OPCODE_POW:
+ return emit_function_call(gen, inst, "spu_pow", 2, TRUE);
+ case TGSI_OPCODE_EXPBASE2:
+ return emit_function_call(gen, inst, "spu_exp2", 1, TRUE);
+ case TGSI_OPCODE_LOGBASE2:
+ return emit_function_call(gen, inst, "spu_log2", 1, TRUE);
+ case TGSI_OPCODE_TEX:
+ /* fall-through for now */
+ case TGSI_OPCODE_TXD:
+ /* fall-through for now */
+ case TGSI_OPCODE_TXB:
+ /* fall-through for now */
+ case TGSI_OPCODE_TXL:
+ /* fall-through for now */
+ case TGSI_OPCODE_TXP:
+ return emit_TEX(gen, inst);
+ case TGSI_OPCODE_KIL:
+ return emit_KIL(gen, inst);
+
+ case TGSI_OPCODE_IF:
+ return emit_IF(gen, inst);
+ case TGSI_OPCODE_ELSE:
+ return emit_ELSE(gen, inst);
+ case TGSI_OPCODE_ENDIF:
+ return emit_ENDIF(gen, inst);
+
+ case TGSI_OPCODE_BGNLOOP2:
+ return emit_BGNLOOP(gen, inst);
+ case TGSI_OPCODE_ENDLOOP2:
+ return emit_ENDLOOP(gen, inst);
+ case TGSI_OPCODE_BRK:
+ return emit_BRK(gen, inst);
+ case TGSI_OPCODE_CONT:
+ return emit_CONT(gen, inst);
+
+ case TGSI_OPCODE_DDX:
+ return emit_DDX_DDY(gen, inst, TRUE);
+ case TGSI_OPCODE_DDY:
+ return emit_DDX_DDY(gen, inst, FALSE);
+
+ /* XXX lots more cases to do... */
+
+ default:
+ fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n",
+ inst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+
+/**
+ * Emit code for a TGSI immediate value (vector of four floats).
+ * This involves register allocation and initialization.
+ * XXX the initialization should be done by a "prepare" stage, not
+ * per quad execution!
+ */
+static boolean
+emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
+{
+ int ch;
+
+ assert(gen->num_imm < MAX_TEMPS);
+
+ for (ch = 0; ch < 4; ch++) {
+ float val = immed->u.ImmediateFloat32[ch].Float;
+
+ if (ch > 0 && val == immed->u.ImmediateFloat32[ch - 1].Float) {
+ /* re-use previous register */
+ gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1];
+ }
+ else {
+ char str[100];
+ int reg = spe_allocate_available_register(gen->f);
+
+ if (reg < 0)
+ return FALSE;
+
+ sprintf(str, "init $%d = %f", reg, val);
+ spe_comment(gen->f, 0, str);
+
+ /* update immediate map */
+ gen->imm_regs[gen->num_imm][ch] = reg;
+
+ /* emit initializer instruction */
+ spe_load_float(gen->f, reg, val);
+ }
+ }
+
+ gen->num_imm++;
+
+ return TRUE;
+}
+
+
+
+/**
+ * Emit "code" for a TGSI declaration.
+ * We only care about TGSI TEMPORARY register declarations at this time.
+ * For each TGSI TEMPORARY we allocate four SPE registers.
+ */
+static boolean
+emit_declaration(struct cell_context *cell,
+ struct codegen *gen, const struct tgsi_full_declaration *decl)
+{
+ int i, ch;
+
+ switch (decl->Declaration.File) {
+ case TGSI_FILE_TEMPORARY:
+ for (i = decl->DeclarationRange.First;
+ i <= decl->DeclarationRange.Last;
+ i++) {
+ assert(i < MAX_TEMPS);
+ for (ch = 0; ch < 4; ch++) {
+ gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
+ if (gen->temp_regs[i][ch] < 0)
+ return FALSE; /* out of regs */
+ }
+
+ /* XXX if we run out of SPE registers, we need to spill
+ * to SPU memory. someday...
+ */
+
+ {
+ char buf[100];
+ sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i,
+ gen->temp_regs[i][0], gen->temp_regs[i][1],
+ gen->temp_regs[i][2], gen->temp_regs[i][3]);
+ spe_comment(gen->f, 0, buf);
+ }
+ }
+ break;
+ default:
+ ; /* ignore */
+ }
+
+ return TRUE;
+}
+
+
+
+/**
+ * Translate TGSI shader code to SPE instructions. This is done when
+ * the state tracker gives us a new shader (via pipe->create_fs_state()).
+ *
+ * \param cell the rendering context (in)
+ * \param tokens the TGSI shader (in)
+ * \param f the generated function (out)
+ */
+boolean
+cell_gen_fragment_program(struct cell_context *cell,
+ const struct tgsi_token *tokens,
+ struct spe_function *f)
+{
+ struct tgsi_parse_context parse;
+ struct codegen gen;
+ uint ic = 0;
+
+ memset(&gen, 0, sizeof(gen));
+ gen.cell = cell;
+ gen.f = f;
+
+ /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
+ gen.inputs_reg = 3; /* pointer to inputs array */
+ gen.outputs_reg = 4; /* pointer to outputs array */
+ gen.constants_reg = 5; /* pointer to constants array */
+
+ spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
+ spe_allocate_register(f, gen.inputs_reg);
+ spe_allocate_register(f, gen.outputs_reg);
+ spe_allocate_register(f, gen.constants_reg);
+
+ if (cell->debug_flags & CELL_DEBUG_ASM) {
+ spe_print_code(f, TRUE);
+ spe_indent(f, 2*8);
+ printf("Begin %s\n", __FUNCTION__);
+ tgsi_dump(tokens, 0);
+ }
+
+ tgsi_parse_init(&parse, tokens);
+
+ emit_prologue(&gen);
+
+ while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ if (f->print) {
+ _debug_printf(" # ");
+ tgsi_dump_immediate(&parse.FullToken.FullImmediate);
+ }
+ if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
+ gen.error = TRUE;
+ break;
+
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ if (f->print) {
+ _debug_printf(" # ");
+ tgsi_dump_declaration(&parse.FullToken.FullDeclaration);
+ }
+ if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration))
+ gen.error = TRUE;
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if (f->print) {
+ _debug_printf(" # ");
+ ic++;
+ tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic);
+ }
+ if (!emit_instruction(&gen, &parse.FullToken.FullInstruction))
+ gen.error = TRUE;
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+
+ if (gen.error) {
+ /* terminate the SPE code */
+ return emit_END(&gen);
+ }
+
+ if (cell->debug_flags & CELL_DEBUG_ASM) {
+ printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
+ printf("End %s\n", __FUNCTION__);
+ }
+
+ tgsi_parse_free( &parse );
+
+ return !gen.error;
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.h b/src/gallium/drivers/cell/ppu/cell_gen_fp.h
new file mode 100644
index 0000000000..99faea7046
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.h
@@ -0,0 +1,42 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+
+#ifndef CELL_GEN_FP_H
+#define CELL_GEN_FP_H
+
+
+
+extern boolean
+cell_gen_fragment_program(struct cell_context *cell,
+ const struct tgsi_token *tokens,
+ struct spe_function *f);
+
+
+#endif /* CELL_GEN_FP_H */
+
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
new file mode 100644
index 0000000000..0ea8f017ef
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
@@ -0,0 +1,2181 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * Copyright 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Generate SPU per-fragment code (actually per-quad code).
+ * \author Brian Paul
+ * \author Bob Ellison
+ */
+
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "rtasm/rtasm_ppc_spe.h"
+#include "cell_context.h"
+#include "cell_gen_fragment.h"
+
+
+
+/** Do extra optimizations? */
+#define OPTIMIZATIONS 1
+
+
+/**
+ * Generate SPE code to perform Z/depth testing.
+ *
+ * \param dsa Gallium depth/stencil/alpha state to gen code for
+ * \param f SPE function to append instruction onto.
+ * \param mask_reg register containing quad/pixel "alive" mask (in/out)
+ * \param ifragZ_reg register containing integer fragment Z values (in)
+ * \param ifbZ_reg register containing integer frame buffer Z values (in/out)
+ * \param zmask_reg register containing result of Z test/comparison (out)
+ *
+ * Returns TRUE if the Z-buffer needs to be updated.
+ */
+static boolean
+gen_depth_test(struct spe_function *f,
+ const struct pipe_depth_stencil_alpha_state *dsa,
+ int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg)
+{
+ /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_
+ * quantities. This only makes a difference for 32-bit Z values though.
+ */
+ ASSERT(dsa->depth.enabled);
+
+ switch (dsa->depth.func) {
+ case PIPE_FUNC_EQUAL:
+ /* zmask = (ifragZ == ref) */
+ spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
+ /* mask = (mask & zmask) */
+ spe_and(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_NOTEQUAL:
+ /* zmask = (ifragZ == ref) */
+ spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
+ /* mask = (mask & ~zmask) */
+ spe_andc(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_GREATER:
+ /* zmask = (ifragZ > ref) */
+ spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
+ /* mask = (mask & zmask) */
+ spe_and(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_LESS:
+ /* zmask = (ref > ifragZ) */
+ spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
+ /* mask = (mask & zmask) */
+ spe_and(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_LEQUAL:
+ /* zmask = (ifragZ > ref) */
+ spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
+ /* mask = (mask & ~zmask) */
+ spe_andc(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_GEQUAL:
+ /* zmask = (ref > ifragZ) */
+ spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
+ /* mask = (mask & ~zmask) */
+ spe_andc(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_NEVER:
+ spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */
+ spe_move(f, zmask_reg, mask_reg); /* zmask = mask */
+ break;
+
+ case PIPE_FUNC_ALWAYS:
+ /* mask unchanged */
+ spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */
+ break;
+
+ default:
+ ASSERT(0);
+ break;
+ }
+
+ if (dsa->depth.writemask) {
+ /*
+ * If (ztest passed) {
+ * framebufferZ = fragmentZ;
+ * }
+ * OR,
+ * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ;
+ */
+ spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+/**
+ * Generate SPE code to perform alpha testing.
+ *
+ * \param dsa Gallium depth/stencil/alpha state to gen code for
+ * \param f SPE function to append instruction onto.
+ * \param mask_reg register containing quad/pixel "alive" mask (in/out)
+ * \param fragA_reg register containing four fragment alpha values (in)
+ */
+static void
+gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
+ struct spe_function *f, int mask_reg, int fragA_reg)
+{
+ int ref_reg = spe_allocate_available_register(f);
+ int amask_reg = spe_allocate_available_register(f);
+
+ ASSERT(dsa->alpha.enabled);
+
+ if ((dsa->alpha.func != PIPE_FUNC_NEVER) &&
+ (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
+ /* load/splat the alpha reference float value */
+ spe_load_float(f, ref_reg, dsa->alpha.ref);
+ }
+
+ /* emit code to do the alpha comparison, updating 'mask' */
+ switch (dsa->alpha.func) {
+ case PIPE_FUNC_EQUAL:
+ /* amask = (fragA == ref) */
+ spe_fceq(f, amask_reg, fragA_reg, ref_reg);
+ /* mask = (mask & amask) */
+ spe_and(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_NOTEQUAL:
+ /* amask = (fragA == ref) */
+ spe_fceq(f, amask_reg, fragA_reg, ref_reg);
+ /* mask = (mask & ~amask) */
+ spe_andc(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_GREATER:
+ /* amask = (fragA > ref) */
+ spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
+ /* mask = (mask & amask) */
+ spe_and(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_LESS:
+ /* amask = (ref > fragA) */
+ spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
+ /* mask = (mask & amask) */
+ spe_and(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_LEQUAL:
+ /* amask = (fragA > ref) */
+ spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
+ /* mask = (mask & ~amask) */
+ spe_andc(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_GEQUAL:
+ /* amask = (ref > fragA) */
+ spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
+ /* mask = (mask & ~amask) */
+ spe_andc(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_NEVER:
+ spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */
+ break;
+
+ case PIPE_FUNC_ALWAYS:
+ /* no-op, mask unchanged */
+ break;
+
+ default:
+ ASSERT(0);
+ break;
+ }
+
+#if OPTIMIZATIONS
+ /* if mask == {0,0,0,0} we're all done, return */
+ {
+ /* re-use amask reg here */
+ int tmp_reg = amask_reg;
+ /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */
+ spe_orx(f, tmp_reg, mask_reg);
+ /* if tmp[0] == 0 then return from function call */
+ spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0);
+ }
+#endif
+
+ spe_release_register(f, ref_reg);
+ spe_release_register(f, amask_reg);
+}
+
+
+/**
+ * This pair of functions is used inline to allocate and deallocate
+ * optional constant registers. Once a constant is discovered to be
+ * needed, we will likely need it again, so we don't want to deallocate
+ * it and have to allocate and load it again unnecessarily.
+ */
+static INLINE void
+setup_optional_register(struct spe_function *f,
+ int *r)
+{
+ if (*r < 0)
+ *r = spe_allocate_available_register(f);
+}
+
+static INLINE void
+release_optional_register(struct spe_function *f,
+ int r)
+{
+ if (r >= 0)
+ spe_release_register(f, r);
+}
+
+static INLINE void
+setup_const_register(struct spe_function *f,
+ int *r,
+ float value)
+{
+ if (*r >= 0)
+ return;
+ setup_optional_register(f, r);
+ spe_load_float(f, *r, value);
+}
+
+static INLINE void
+release_const_register(struct spe_function *f,
+ int r)
+{
+ release_optional_register(f, r);
+}
+
+
+
+/**
+ * Unpack/convert framebuffer colors from four 32-bit packed colors
+ * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA).
+ * Each 8-bit color component is expanded into a float in [0.0, 1.0].
+ */
+static void
+unpack_colors(struct spe_function *f,
+ enum pipe_format color_format,
+ int fbRGBA_reg,
+ int fbR_reg, int fbG_reg, int fbB_reg, int fbA_reg)
+{
+ int mask0_reg = spe_allocate_available_register(f);
+ int mask1_reg = spe_allocate_available_register(f);
+ int mask2_reg = spe_allocate_available_register(f);
+ int mask3_reg = spe_allocate_available_register(f);
+
+ spe_load_int(f, mask0_reg, 0xff);
+ spe_load_int(f, mask1_reg, 0xff00);
+ spe_load_int(f, mask2_reg, 0xff0000);
+ spe_load_int(f, mask3_reg, 0xff000000);
+
+ spe_comment(f, 0, "Unpack framebuffer colors, convert to floats");
+
+ switch (color_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ /* fbB = fbRGBA & mask */
+ spe_and(f, fbB_reg, fbRGBA_reg, mask0_reg);
+
+ /* fbG = fbRGBA & mask */
+ spe_and(f, fbG_reg, fbRGBA_reg, mask1_reg);
+
+ /* fbR = fbRGBA & mask */
+ spe_and(f, fbR_reg, fbRGBA_reg, mask2_reg);
+
+ /* fbA = fbRGBA & mask */
+ spe_and(f, fbA_reg, fbRGBA_reg, mask3_reg);
+
+ /* fbG = fbG >> 8 */
+ spe_roti(f, fbG_reg, fbG_reg, -8);
+
+ /* fbR = fbR >> 16 */
+ spe_roti(f, fbR_reg, fbR_reg, -16);
+
+ /* fbA = fbA >> 24 */
+ spe_roti(f, fbA_reg, fbA_reg, -24);
+ break;
+
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ /* fbA = fbRGBA & mask */
+ spe_and(f, fbA_reg, fbRGBA_reg, mask0_reg);
+
+ /* fbR = fbRGBA & mask */
+ spe_and(f, fbR_reg, fbRGBA_reg, mask1_reg);
+
+ /* fbG = fbRGBA & mask */
+ spe_and(f, fbG_reg, fbRGBA_reg, mask2_reg);
+
+ /* fbB = fbRGBA & mask */
+ spe_and(f, fbB_reg, fbRGBA_reg, mask3_reg);
+
+ /* fbR = fbR >> 8 */
+ spe_roti(f, fbR_reg, fbR_reg, -8);
+
+ /* fbG = fbG >> 16 */
+ spe_roti(f, fbG_reg, fbG_reg, -16);
+
+ /* fbB = fbB >> 24 */
+ spe_roti(f, fbB_reg, fbB_reg, -24);
+ break;
+
+ default:
+ ASSERT(0);
+ }
+
+ /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */
+ spe_cuflt(f, fbR_reg, fbR_reg, 8);
+ spe_cuflt(f, fbG_reg, fbG_reg, 8);
+ spe_cuflt(f, fbB_reg, fbB_reg, 8);
+ spe_cuflt(f, fbA_reg, fbA_reg, 8);
+
+ spe_release_register(f, mask0_reg);
+ spe_release_register(f, mask1_reg);
+ spe_release_register(f, mask2_reg);
+ spe_release_register(f, mask3_reg);
+}
+
+
+/**
+ * Generate SPE code to implement the given blend mode for a quad of pixels.
+ * \param f SPE function to append instruction onto.
+ * \param fragR_reg register with fragment red values (float) (in/out)
+ * \param fragG_reg register with fragment green values (float) (in/out)
+ * \param fragB_reg register with fragment blue values (float) (in/out)
+ * \param fragA_reg register with fragment alpha values (float) (in/out)
+ * \param fbRGBA_reg register with packed framebuffer colors (integer) (in)
+ */
+static void
+gen_blend(const struct pipe_blend_state *blend,
+ const struct pipe_blend_color *blend_color,
+ struct spe_function *f,
+ enum pipe_format color_format,
+ int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg,
+ int fbRGBA_reg)
+{
+ int term1R_reg = spe_allocate_available_register(f);
+ int term1G_reg = spe_allocate_available_register(f);
+ int term1B_reg = spe_allocate_available_register(f);
+ int term1A_reg = spe_allocate_available_register(f);
+
+ int term2R_reg = spe_allocate_available_register(f);
+ int term2G_reg = spe_allocate_available_register(f);
+ int term2B_reg = spe_allocate_available_register(f);
+ int term2A_reg = spe_allocate_available_register(f);
+
+ int fbR_reg = spe_allocate_available_register(f);
+ int fbG_reg = spe_allocate_available_register(f);
+ int fbB_reg = spe_allocate_available_register(f);
+ int fbA_reg = spe_allocate_available_register(f);
+
+ int tmp_reg = spe_allocate_available_register(f);
+
+ /* Optional constant registers we might or might not end up using;
+ * if we do use them, make sure we only allocate them once by
+ * keeping a flag on each one.
+ */
+ int one_reg = -1;
+ int constR_reg = -1, constG_reg = -1, constB_reg = -1, constA_reg = -1;
+
+ ASSERT(blend->blend_enable);
+
+ /* packed RGBA -> float colors */
+ unpack_colors(f, color_format, fbRGBA_reg,
+ fbR_reg, fbG_reg, fbB_reg, fbA_reg);
+
+ /*
+ * Compute Src RGB terms. We're actually looking for the value
+ * of (the appropriate RGB factors) * (the incoming source RGB color),
+ * because in some cases (like PIPE_BLENDFACTOR_ONE and
+ * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math.
+ */
+ switch (blend->rgb_src_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ /* factors = (1,1,1), so term = (R,G,B) */
+ spe_move(f, term1R_reg, fragR_reg);
+ spe_move(f, term1G_reg, fragG_reg);
+ spe_move(f, term1B_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ /* factors = (0,0,0), so term = (0,0,0) */
+ spe_load_float(f, term1R_reg, 0.0f);
+ spe_load_float(f, term1G_reg, 0.0f);
+ spe_load_float(f, term1B_reg, 0.0f);
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* factors = (R,G,B), so term = (R*R, G*G, B*B) */
+ spe_fm(f, term1R_reg, fragR_reg, fragR_reg);
+ spe_fm(f, term1G_reg, fragG_reg, fragG_reg);
+ spe_fm(f, term1B_reg, fragB_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ /* factors = (A,A,A), so term = (R*A, G*A, B*A) */
+ spe_fm(f, term1R_reg, fragR_reg, fragA_reg);
+ spe_fm(f, term1G_reg, fragG_reg, fragA_reg);
+ spe_fm(f, term1B_reg, fragB_reg, fragA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B))
+ * or in other words term = (R-R*R, G-G*G, B-B*B)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, fragR_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, fragG_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, fragB_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */
+ spe_fm(f, term1R_reg, fragR_reg, fbR_reg);
+ spe_fm(f, term1G_reg, fragG_reg, fbG_reg);
+ spe_fm(f, term1B_reg, fragB_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb))
+ * or term = (R-R*Rfb, G-G*Gfb, B-B*Bfb)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, fbR_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, fbG_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, fbB_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A))
+ * or term = (R-R*A,G-G*A,B-B*A)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, fragA_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, fragA_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, fragA_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */
+ spe_fm(f, term1R_reg, fragR_reg, fbA_reg);
+ spe_fm(f, term1G_reg, fragG_reg, fbA_reg);
+ spe_fm(f, term1B_reg, fragB_reg, fbA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb))
+ * or term = (R-R*Afb,G-G*Afb,b-B*Afb)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, fbA_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, fbA_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, fbA_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg, blend_color->color[2]);
+ /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */
+ spe_fm(f, term1R_reg, fragR_reg, constR_reg);
+ spe_fm(f, term1G_reg, fragG_reg, constG_reg);
+ spe_fm(f, term1B_reg, fragB_reg, constB_reg);
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ /* we'll need the optional constant alpha register */
+ setup_const_register(f, &constA_reg, blend_color->color[3]);
+ /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */
+ spe_fm(f, term1R_reg, fragR_reg, constA_reg);
+ spe_fm(f, term1G_reg, fragG_reg, constA_reg);
+ spe_fm(f, term1B_reg, fragB_reg, constA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg, blend_color->color[2]);
+ /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc))
+ * or term = (R-R*Rc, G-G*Gc, B-B*Bc)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, constR_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, constG_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, constB_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg, blend_color->color[2]);
+ /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac))
+ * or term = (R-R*Ac,G-G*Ac,B-B*Ac)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, constA_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, constA_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, constA_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ /* We'll need the optional {1,1,1,1} register */
+ setup_const_register(f, &one_reg, 1.0f);
+ /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so
+ * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb))
+ * We could expand the term (as a*min(b,c) == min(a*b,a*c)
+ * as long as a is positive), but then we'd have to do three
+ * spe_float_min() functions instead of one, so this is simpler.
+ */
+ /* tmp = 1 - Afb */
+ spe_fs(f, tmp_reg, one_reg, fbA_reg);
+ /* tmp = min(A,tmp) */
+ spe_float_min(f, tmp_reg, fragA_reg, tmp_reg);
+ /* term = R*tmp */
+ spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
+ spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
+ spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
+ break;
+
+ /* These are special D3D cases involving a second color output
+ * from the fragment shader. I'm not sure we can support them
+ * yet... XXX
+ */
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Src Alpha term. Like the above, we're looking for
+ * the full term A*factor, not just the factor itself, because
+ * in many cases we can avoid doing unnecessary multiplies.
+ */
+ switch (blend->alpha_src_factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ /* factor = 0, so term = 0 */
+ spe_load_float(f, term1A_reg, 0.0f);
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* fall through */
+ case PIPE_BLENDFACTOR_ONE:
+ /* factor = 1, so term = A */
+ spe_move(f, term1A_reg, fragA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* factor = A, so term = A*A */
+ spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* factor = 1-A, so term = A*(1-A) = A-A*A */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term1A_reg, fragA_reg, fragA_reg, fragA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* factor = Afb, so term = A*Afb */
+ spe_fm(f, term1A_reg, fragA_reg, fbA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* factor = 1-Afb, so term = A*(1-Afb) = A - A*Afb */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term1A_reg, fragA_reg, fbA_reg, fragA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* We need the optional constA_reg register */
+ setup_const_register(f, &constA_reg, blend_color->color[3]);
+ /* factor = Ac, so term = A*Ac */
+ spe_fm(f, term1A_reg, fragA_reg, constA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* We need the optional constA_reg register */
+ setup_const_register(f, &constA_reg, blend_color->color[3]);
+ /* factor = 1-Ac, so term = A*(1-Ac) = A-A*Ac */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term1A_reg, fragA_reg, constA_reg, fragA_reg);
+ break;
+
+ /* These are special D3D cases involving a second color output
+ * from the fragment shader. I'm not sure we can support them
+ * yet... XXX
+ */
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Dest RGB term. Like the above, we're looking for
+ * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because
+ * in many cases we can avoid doing unnecessary multiplies.
+ */
+ switch (blend->rgb_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */
+ spe_move(f, term2R_reg, fbR_reg);
+ spe_move(f, term2G_reg, fbG_reg);
+ spe_move(f, term2B_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ /* factor s= (0,0,0), so term = (0,0,0) */
+ spe_load_float(f, term2R_reg, 0.0f);
+ spe_load_float(f, term2G_reg, 0.0f);
+ spe_load_float(f, term2B_reg, 0.0f);
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* factors = (R,G,B), so term = (R*Rfb, G*Gfb, B*Bfb) */
+ spe_fm(f, term2R_reg, fbR_reg, fragR_reg);
+ spe_fm(f, term2G_reg, fbG_reg, fragG_reg);
+ spe_fm(f, term2B_reg, fbB_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* factors = (1-R,1-G,1-B), so term = (Rfb*(1-R), Gfb*(1-G), Bfb*(1-B))
+ * or in other words term = (Rfb-Rfb*R, Gfb-Gfb*G, Bfb-Bfb*B)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term2R_reg, fragR_reg, fbR_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fragG_reg, fbG_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fragB_reg, fbB_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ /* factors = (A,A,A), so term = (Rfb*A, Gfb*A, Bfb*A) */
+ spe_fm(f, term2R_reg, fbR_reg, fragA_reg);
+ spe_fm(f, term2G_reg, fbG_reg, fragA_reg);
+ spe_fm(f, term2B_reg, fbB_reg, fragA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ /* factors = (1-A,1-A,1-A) so term = (Rfb-Rfb*A,Gfb-Gfb*A,Bfb-Bfb*A) */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* factors = (Rfb,Gfb,Bfb), so term = (Rfb*Rfb, Gfb*Gfb, Bfb*Bfb) */
+ spe_fm(f, term2R_reg, fbR_reg, fbR_reg);
+ spe_fm(f, term2G_reg, fbG_reg, fbG_reg);
+ spe_fm(f, term2B_reg, fbB_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (Rfb*(1-Rfb),Gfb*(1-Gfb),Bfb*(1-Bfb))
+ * or term = (Rfb-Rfb*Rfb, Gfb-Gfb*Gfb, Bfb-Bfb*Bfb)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term2R_reg, fbR_reg, fbR_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fbG_reg, fbG_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fbB_reg, fbB_reg, fbB_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ /* factors = (Afb, Afb, Afb), so term = (Rfb*Afb, Gfb*Afb, Bfb*Afb) */
+ spe_fm(f, term2R_reg, fbR_reg, fbA_reg);
+ spe_fm(f, term2G_reg, fbG_reg, fbA_reg);
+ spe_fm(f, term2B_reg, fbB_reg, fbA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (Rfb*(1-Afb),Gfb*(1-Afb),Bfb*(1-Afb))
+ * or term = (Rfb-Rfb*Afb,Gfb-Gfb*Afb,Bfb-Bfb*Afb)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term2R_reg, fbR_reg, fbA_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fbG_reg, fbA_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fbB_reg, fbA_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg, blend_color->color[2]);
+ /* now, factor = (Rc,Gc,Bc), so term = (Rfb*Rc,Gfb*Gc,Bfb*Bc) */
+ spe_fm(f, term2R_reg, fbR_reg, constR_reg);
+ spe_fm(f, term2G_reg, fbG_reg, constG_reg);
+ spe_fm(f, term2B_reg, fbB_reg, constB_reg);
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ /* we'll need the optional constant alpha register */
+ setup_const_register(f, &constA_reg, blend_color->color[3]);
+ /* factor = (Ac,Ac,Ac), so term = (Rfb*Ac,Gfb*Ac,Bfb*Ac) */
+ spe_fm(f, term2R_reg, fbR_reg, constA_reg);
+ spe_fm(f, term2G_reg, fbG_reg, constA_reg);
+ spe_fm(f, term2B_reg, fbB_reg, constA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg, blend_color->color[2]);
+ /* factor = (1-Rc,1-Gc,1-Bc), so term = (Rfb*(1-Rc),Gfb*(1-Gc),Bfb*(1-Bc))
+ * or term = (Rfb-Rfb*Rc, Gfb-Gfb*Gc, Bfb-Bfb*Bc)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term2R_reg, fbR_reg, constR_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fbG_reg, constG_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fbB_reg, constB_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg, blend_color->color[2]);
+ /* factor = (1-Ac,1-Ac,1-Ac), so term = (Rfb*(1-Ac),Gfb*(1-Ac),Bfb*(1-Ac))
+ * or term = (Rfb-Rfb*Ac,Gfb-Gfb*Ac,Bfb-Bfb*Ac)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term2R_reg, fbR_reg, constA_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fbG_reg, constA_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fbB_reg, constA_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest RGB */
+ ASSERT(0);
+ break;
+
+ /* These are special D3D cases involving a second color output
+ * from the fragment shader. I'm not sure we can support them
+ * yet... XXX
+ */
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Dest Alpha term. Like the above, we're looking for
+ * the full term Afb*factor, not just the factor itself, because
+ * in many cases we can avoid doing unnecessary multiplies.
+ */
+ switch (blend->alpha_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ /* factor = 1, so term = Afb */
+ spe_move(f, term2A_reg, fbA_reg);
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ /* factor = 0, so term = 0 */
+ spe_load_float(f, term2A_reg, 0.0f);
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* factor = A, so term = Afb*A */
+ spe_fm(f, term2A_reg, fbA_reg, fragA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* factor = 1-A, so term = Afb*(1-A) = Afb-Afb*A */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* factor = Afb, so term = Afb*Afb */
+ spe_fm(f, term2A_reg, fbA_reg, fbA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* factor = 1-Afb, so term = Afb*(1-Afb) = Afb - Afb*Afb */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term2A_reg, fbA_reg, fbA_reg, fbA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* We need the optional constA_reg register */
+ setup_const_register(f, &constA_reg, blend_color->color[3]);
+ /* factor = Ac, so term = Afb*Ac */
+ spe_fm(f, term2A_reg, fbA_reg, constA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* We need the optional constA_reg register */
+ setup_const_register(f, &constA_reg, blend_color->color[3]);
+ /* factor = 1-Ac, so term = Afb*(1-Ac) = Afb-Afb*Ac */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term2A_reg, fbA_reg, constA_reg, fbA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest alpha */
+ ASSERT(0);
+ break;
+
+ /* These are special D3D cases involving a second color output
+ * from the fragment shader. I'm not sure we can support them
+ * yet... XXX
+ */
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Combine Src/Dest RGB terms as per the blend equation.
+ */
+ switch (blend->rgb_func) {
+ case PIPE_BLEND_ADD:
+ spe_fa(f, fragR_reg, term1R_reg, term2R_reg);
+ spe_fa(f, fragG_reg, term1G_reg, term2G_reg);
+ spe_fa(f, fragB_reg, term1B_reg, term2B_reg);
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ spe_fs(f, fragR_reg, term1R_reg, term2R_reg);
+ spe_fs(f, fragG_reg, term1G_reg, term2G_reg);
+ spe_fs(f, fragB_reg, term1B_reg, term2B_reg);
+ break;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ spe_fs(f, fragR_reg, term2R_reg, term1R_reg);
+ spe_fs(f, fragG_reg, term2G_reg, term1G_reg);
+ spe_fs(f, fragB_reg, term2B_reg, term1B_reg);
+ break;
+ case PIPE_BLEND_MIN:
+ spe_float_min(f, fragR_reg, term1R_reg, term2R_reg);
+ spe_float_min(f, fragG_reg, term1G_reg, term2G_reg);
+ spe_float_min(f, fragB_reg, term1B_reg, term2B_reg);
+ break;
+ case PIPE_BLEND_MAX:
+ spe_float_max(f, fragR_reg, term1R_reg, term2R_reg);
+ spe_float_max(f, fragG_reg, term1G_reg, term2G_reg);
+ spe_float_max(f, fragB_reg, term1B_reg, term2B_reg);
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Combine Src/Dest A term
+ */
+ switch (blend->alpha_func) {
+ case PIPE_BLEND_ADD:
+ spe_fa(f, fragA_reg, term1A_reg, term2A_reg);
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ spe_fs(f, fragA_reg, term1A_reg, term2A_reg);
+ break;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ spe_fs(f, fragA_reg, term2A_reg, term1A_reg);
+ break;
+ case PIPE_BLEND_MIN:
+ spe_float_min(f, fragA_reg, term1A_reg, term2A_reg);
+ break;
+ case PIPE_BLEND_MAX:
+ spe_float_max(f, fragA_reg, term1A_reg, term2A_reg);
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ spe_release_register(f, term1R_reg);
+ spe_release_register(f, term1G_reg);
+ spe_release_register(f, term1B_reg);
+ spe_release_register(f, term1A_reg);
+
+ spe_release_register(f, term2R_reg);
+ spe_release_register(f, term2G_reg);
+ spe_release_register(f, term2B_reg);
+ spe_release_register(f, term2A_reg);
+
+ spe_release_register(f, fbR_reg);
+ spe_release_register(f, fbG_reg);
+ spe_release_register(f, fbB_reg);
+ spe_release_register(f, fbA_reg);
+
+ spe_release_register(f, tmp_reg);
+
+ /* Free any optional registers that actually got used */
+ release_const_register(f, one_reg);
+ release_const_register(f, constR_reg);
+ release_const_register(f, constG_reg);
+ release_const_register(f, constB_reg);
+ release_const_register(f, constA_reg);
+}
+
+
+static void
+gen_logicop(const struct pipe_blend_state *blend,
+ struct spe_function *f,
+ int fragRGBA_reg, int fbRGBA_reg)
+{
+ /* We've got four 32-bit RGBA packed pixels in each of
+ * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
+ * reds, greens, blues, and alphas.
+ * */
+ ASSERT(blend->logicop_enable);
+
+ switch(blend->logicop_func) {
+ case PIPE_LOGICOP_CLEAR: /* 0 */
+ spe_zero(f, fragRGBA_reg);
+ break;
+ case PIPE_LOGICOP_NOR: /* ~(s | d) */
+ spe_nor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_AND_INVERTED: /* ~s & d */
+ /* andc R, A, B computes R = A & ~B */
+ spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg);
+ break;
+ case PIPE_LOGICOP_COPY_INVERTED: /* ~s */
+ spe_complement(f, fragRGBA_reg, fragRGBA_reg);
+ break;
+ case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */
+ /* andc R, A, B computes R = A & ~B */
+ spe_andc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_INVERT: /* ~d */
+ /* Note that (A nor A) == ~(A|A) == ~A */
+ spe_nor(f, fragRGBA_reg, fbRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_XOR: /* s ^ d */
+ spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_NAND: /* ~(s & d) */
+ spe_nand(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_AND: /* s & d */
+ spe_and(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */
+ spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ spe_complement(f, fragRGBA_reg, fragRGBA_reg);
+ break;
+ case PIPE_LOGICOP_NOOP: /* d */
+ spe_move(f, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_OR_INVERTED: /* ~s | d */
+ /* orc R, A, B computes R = A | ~B */
+ spe_orc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg);
+ break;
+ case PIPE_LOGICOP_COPY: /* s */
+ break;
+ case PIPE_LOGICOP_OR_REVERSE: /* s | ~d */
+ /* orc R, A, B computes R = A | ~B */
+ spe_orc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_OR: /* s | d */
+ spe_or(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_SET: /* 1 */
+ spe_load_int(f, fragRGBA_reg, 0xffffffff);
+ break;
+ default:
+ ASSERT(0);
+ }
+}
+
+
+/**
+ * Generate code to pack a quad of float colors into four 32-bit integers.
+ *
+ * \param f SPE function to append instruction onto.
+ * \param color_format the dest color packing format
+ * \param r_reg register containing four red values (in/clobbered)
+ * \param g_reg register containing four green values (in/clobbered)
+ * \param b_reg register containing four blue values (in/clobbered)
+ * \param a_reg register containing four alpha values (in/clobbered)
+ * \param rgba_reg register to store the packed RGBA colors (out)
+ */
+static void
+gen_pack_colors(struct spe_function *f,
+ enum pipe_format color_format,
+ int r_reg, int g_reg, int b_reg, int a_reg,
+ int rgba_reg)
+{
+ int rg_reg = spe_allocate_available_register(f);
+ int ba_reg = spe_allocate_available_register(f);
+
+ /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */
+ spe_cfltu(f, r_reg, r_reg, 32);
+ spe_cfltu(f, g_reg, g_reg, 32);
+ spe_cfltu(f, b_reg, b_reg, 32);
+ spe_cfltu(f, a_reg, a_reg, 32);
+
+ /* Shift the most significant bytes to the least significant positions.
+ * I.e.: reg = reg >> 24
+ */
+ spe_rotmi(f, r_reg, r_reg, -24);
+ spe_rotmi(f, g_reg, g_reg, -24);
+ spe_rotmi(f, b_reg, b_reg, -24);
+ spe_rotmi(f, a_reg, a_reg, -24);
+
+ /* Shift the color bytes according to the surface format */
+ if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) {
+ spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */
+ spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */
+ spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */
+ }
+ else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) {
+ spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */
+ spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */
+ spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */
+ }
+ else {
+ ASSERT(0);
+ }
+
+ /* Merge red, green, blue, alpha registers to make packed RGBA colors.
+ * Eg: after shifting according to color_format we might have:
+ * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000}
+ * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600}
+ * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099}
+ * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000}
+ * OR-ing all those together gives us four packed colors:
+ * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699}
+ */
+ spe_or(f, rg_reg, r_reg, g_reg);
+ spe_or(f, ba_reg, a_reg, b_reg);
+ spe_or(f, rgba_reg, rg_reg, ba_reg);
+
+ spe_release_register(f, rg_reg);
+ spe_release_register(f, ba_reg);
+}
+
+
+static void
+gen_colormask(struct spe_function *f,
+ uint colormask,
+ enum pipe_format color_format,
+ int fragRGBA_reg, int fbRGBA_reg)
+{
+ /* We've got four 32-bit RGBA packed pixels in each of
+ * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
+ * reds, greens, blues, and alphas. Further, the pixels
+ * are packed according to the given color format, not
+ * necessarily RGBA...
+ */
+ uint r_mask;
+ uint g_mask;
+ uint b_mask;
+ uint a_mask;
+
+ /* Calculate exactly where the bits for any particular color
+ * end up, so we can mask them correctly.
+ */
+ switch(color_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ /* ARGB */
+ a_mask = 0xff000000;
+ r_mask = 0x00ff0000;
+ g_mask = 0x0000ff00;
+ b_mask = 0x000000ff;
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ /* BGRA */
+ b_mask = 0xff000000;
+ g_mask = 0x00ff0000;
+ r_mask = 0x0000ff00;
+ a_mask = 0x000000ff;
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ /* For each R, G, B, and A component we're supposed to mask out,
+ * clear its bits. Then our mask operation later will work
+ * as expected.
+ */
+ if (!(colormask & PIPE_MASK_R)) {
+ r_mask = 0;
+ }
+ if (!(colormask & PIPE_MASK_G)) {
+ g_mask = 0;
+ }
+ if (!(colormask & PIPE_MASK_B)) {
+ b_mask = 0;
+ }
+ if (!(colormask & PIPE_MASK_A)) {
+ a_mask = 0;
+ }
+
+ /* Get a temporary register to hold the mask that will be applied
+ * to the fragment
+ */
+ int colormask_reg = spe_allocate_available_register(f);
+
+ /* The actual mask we're going to use is an OR of the remaining R, G, B,
+ * and A masks. Load the result value into our temporary register.
+ */
+ spe_load_uint(f, colormask_reg, r_mask | g_mask | b_mask | a_mask);
+
+ /* Use the mask register to select between the fragment color
+ * values and the frame buffer color values. Wherever the
+ * mask has a 0 bit, the current frame buffer color should override
+ * the fragment color. Wherever the mask has a 1 bit, the
+ * fragment color should persevere. The Select Bits (selb rt, rA, rB, rM)
+ * instruction will select bits from its first operand rA wherever the
+ * the mask bits rM are 0, and from its second operand rB wherever the
+ * mask bits rM are 1. That means that the frame buffer color is the
+ * first operand, and the fragment color the second.
+ */
+ spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg);
+
+ /* Release the temporary register and we're done */
+ spe_release_register(f, colormask_reg);
+}
+
+
+/**
+ * This function is annoyingly similar to gen_depth_test(), above, except
+ * that instead of comparing two varying values (i.e. fragment and buffer),
+ * we're comparing a varying value with a static value. As such, we have
+ * access to the Compare Immediate instructions where we don't in
+ * gen_depth_test(), which is what makes us very different.
+ *
+ * There's some added complexity if there's a non-trivial state->mask
+ * value; then stencil and reference both must be masked
+ *
+ * The return value in the stencil_pass_reg is a bitmask of valid
+ * fragments that also passed the stencil test. The bitmask of valid
+ * fragments that failed would be found in
+ * (fragment_mask_reg & ~stencil_pass_reg).
+ */
+static void
+gen_stencil_test(struct spe_function *f,
+ const struct pipe_stencil_state *state,
+ uint stencil_max_value,
+ int fragment_mask_reg,
+ int fbS_reg,
+ int stencil_pass_reg)
+{
+ /* Generate code that puts the set of passing fragments into the
+ * stencil_pass_reg register, taking into account whether each fragment
+ * was active to begin with.
+ */
+ switch (state->func) {
+ case PIPE_FUNC_EQUAL:
+ if (state->value_mask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & (s == reference) */
+ spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ((s&mask) == (reference&mask)) */
+ uint tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask);
+ spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil,
+ state->value_mask & state->ref_value);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+
+ case PIPE_FUNC_NOTEQUAL:
+ if (state->value_mask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & ~(s == reference) */
+ spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ~((s&mask) == (reference&mask)) */
+ int tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask);
+ spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil,
+ state->value_mask & state->ref_value);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+
+ case PIPE_FUNC_LESS:
+ if (state->value_mask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & (reference < s) */
+ spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, state->ref_value);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ((reference&mask) < (s & mask)) */
+ int tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask);
+ spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil,
+ state->value_mask & state->ref_value);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+
+ case PIPE_FUNC_GREATER:
+ if (state->value_mask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & (reference > s) */
+ /* There's no convenient Compare Less Than Immediate instruction, so
+ * we'll have to do this one the harder way, by loading a register and
+ * comparing directly. Compare Logical Greater Than Word (clgt)
+ * treats its operands as unsigned - no sign extension.
+ */
+ int tmp_reg = spe_allocate_available_register(f);
+ spe_load_uint(f, tmp_reg, state->ref_value);
+ spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ((reference&mask) > (s&mask)) */
+ int tmp_reg = spe_allocate_available_register(f);
+ int tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_load_uint(f, tmp_reg, state->value_mask & state->ref_value);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask);
+ spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+
+ case PIPE_FUNC_GEQUAL:
+ if (state->value_mask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & (reference >= s)
+ * = fragment_mask & ~(s > reference) */
+ spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg,
+ state->ref_value);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ~((s&mask) > (reference&mask)) */
+ int tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask);
+ spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil,
+ state->value_mask & state->ref_value);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+
+ case PIPE_FUNC_LEQUAL:
+ if (state->value_mask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & (reference <= s) ]
+ * = fragment_mask & ~(reference > s) */
+ /* As above, we have to do this by loading a register */
+ int tmp_reg = spe_allocate_available_register(f);
+ spe_load_uint(f, tmp_reg, state->ref_value);
+ spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ~((reference&mask) > (s&mask)) */
+ int tmp_reg = spe_allocate_available_register(f);
+ int tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_load_uint(f, tmp_reg, state->ref_value & state->value_mask);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask);
+ spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+
+ case PIPE_FUNC_NEVER:
+ /* stencil_pass = fragment_mask & 0 = 0 */
+ spe_load_uint(f, stencil_pass_reg, 0);
+ break;
+
+ case PIPE_FUNC_ALWAYS:
+ /* stencil_pass = fragment_mask & 1 = fragment_mask */
+ spe_move(f, stencil_pass_reg, fragment_mask_reg);
+ break;
+ }
+
+ /* The fragments that passed the stencil test are now in stencil_pass_reg.
+ * The fragments that failed would be (fragment_mask_reg & ~stencil_pass_reg).
+ */
+}
+
+
+/**
+ * This function generates code that calculates a set of new stencil values
+ * given the earlier values and the operation to apply. It does not
+ * apply any tests. It is intended to be called up to 3 times
+ * (for the stencil fail operation, for the stencil pass-z fail operation,
+ * and for the stencil pass-z pass operation) to collect up to three
+ * possible sets of values, and for the caller to combine them based
+ * on the result of the tests.
+ *
+ * stencil_max_value should be (2^n - 1) where n is the number of bits
+ * in the stencil buffer - in other words, it should be usable as a mask.
+ */
+static void
+gen_stencil_values(struct spe_function *f,
+ uint stencil_op,
+ uint stencil_ref_value,
+ uint stencil_max_value,
+ int fbS_reg,
+ int newS_reg)
+{
+ /* The code below assumes that newS_reg and fbS_reg are not the same
+ * register; if they can be, the calculations below will have to use
+ * an additional temporary register. For now, mark the assumption
+ * with an assertion that will fail if they are the same.
+ */
+ ASSERT(fbS_reg != newS_reg);
+
+ /* The code also assumes the the stencil_max_value is of the form
+ * 2^n-1 and can therefore be used as a mask for the valid bits in
+ * addition to a maximum. Make sure this is the case as well.
+ * The clever math below exploits the fact that incrementing a
+ * binary number serves to flip all the bits of a number starting at
+ * the LSB and continuing to (and including) the first zero bit
+ * found. That means that a number and its increment will always
+ * have at least one bit in common (the high order bit, if nothing
+ * else) *unless* the number is zero, *or* the number is of a form
+ * consisting of some number of 1s in the low-order bits followed
+ * by nothing but 0s in the high-order bits. The latter case
+ * implies it's of the form 2^n-1.
+ */
+ ASSERT(stencil_max_value > 0 && ((stencil_max_value + 1) & stencil_max_value) == 0);
+
+ switch(stencil_op) {
+ case PIPE_STENCIL_OP_KEEP:
+ /* newS = S */
+ spe_move(f, newS_reg, fbS_reg);
+ break;
+
+ case PIPE_STENCIL_OP_ZERO:
+ /* newS = 0 */
+ spe_zero(f, newS_reg);
+ break;
+
+ case PIPE_STENCIL_OP_REPLACE:
+ /* newS = stencil reference value */
+ spe_load_uint(f, newS_reg, stencil_ref_value);
+ break;
+
+ case PIPE_STENCIL_OP_INCR: {
+ /* newS = (s == max ? max : s + 1) */
+ int equals_reg = spe_allocate_available_register(f);
+
+ spe_compare_equal_uint(f, equals_reg, fbS_reg, stencil_max_value);
+ /* Add Word Immediate computes rT = rA + 10-bit signed immediate */
+ spe_ai(f, newS_reg, fbS_reg, 1);
+ /* Select from the current value or the new value based on the equality test */
+ spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg);
+
+ spe_release_register(f, equals_reg);
+ break;
+ }
+ case PIPE_STENCIL_OP_DECR: {
+ /* newS = (s == 0 ? 0 : s - 1) */
+ int equals_reg = spe_allocate_available_register(f);
+
+ spe_compare_equal_uint(f, equals_reg, fbS_reg, 0);
+ /* Add Word Immediate with a (-1) value works */
+ spe_ai(f, newS_reg, fbS_reg, -1);
+ /* Select from the current value or the new value based on the equality test */
+ spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg);
+
+ spe_release_register(f, equals_reg);
+ break;
+ }
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ /* newS = (s == max ? 0 : s + 1), but since max is 2^n-1, we can
+ * do a normal add and mask off the correct bits
+ */
+ spe_ai(f, newS_reg, fbS_reg, 1);
+ spe_and_uint(f, newS_reg, newS_reg, stencil_max_value);
+ break;
+
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ /* newS = (s == 0 ? max : s - 1), but we'll pull the same mask trick as above */
+ spe_ai(f, newS_reg, fbS_reg, -1);
+ spe_and_uint(f, newS_reg, newS_reg, stencil_max_value);
+ break;
+
+ case PIPE_STENCIL_OP_INVERT:
+ /* newS = ~s. We take advantage of the mask/max value to invert only
+ * the valid bits for the field so we don't have to do an extra "and".
+ */
+ spe_xor_uint(f, newS_reg, fbS_reg, stencil_max_value);
+ break;
+
+ default:
+ ASSERT(0);
+ }
+}
+
+
+/**
+ * This function generates code to get all the necessary possible
+ * stencil values. For each of the output registers (fail_reg,
+ * zfail_reg, and zpass_reg), it either allocates a new register
+ * and calculates a new set of values based on the stencil operation,
+ * or it reuses a register allocation and calculation done for an
+ * earlier (matching) operation, or it reuses the fbS_reg register
+ * (if the stencil operation is KEEP, which doesn't change the
+ * stencil buffer).
+ *
+ * Since this function allocates a variable number of registers,
+ * to avoid incurring complex logic to free them, they should
+ * be allocated after a spe_allocate_register_set() call
+ * and released by the corresponding spe_release_register_set() call.
+ */
+static void
+gen_get_stencil_values(struct spe_function *f,
+ const struct pipe_stencil_state *stencil,
+ const uint depth_enabled,
+ int fbS_reg,
+ int *fail_reg,
+ int *zfail_reg,
+ int *zpass_reg)
+{
+ uint zfail_op;
+
+ /* Stenciling had better be enabled here */
+ ASSERT(stencil->enabled);
+
+ /* If the depth test is not enabled, it is treated as though it always
+ * passes, which means that the zfail_op is not considered - a
+ * failing stencil test triggers the fail_op, and a passing one
+ * triggers the zpass_op
+ *
+ * As an optimization, override calculation of the zfail_op values
+ * if they aren't going to be used. By setting the value of
+ * the operation to PIPE_STENCIL_OP_KEEP, its value will be assumed
+ * to match the incoming stencil values, and no calculation will
+ * be done.
+ */
+ if (depth_enabled) {
+ zfail_op = stencil->zfail_op;
+ }
+ else {
+ zfail_op = PIPE_STENCIL_OP_KEEP;
+ }
+
+ /* One-sided or front-facing stencil */
+ if (stencil->fail_op == PIPE_STENCIL_OP_KEEP) {
+ *fail_reg = fbS_reg;
+ }
+ else {
+ *fail_reg = spe_allocate_available_register(f);
+ gen_stencil_values(f, stencil->fail_op, stencil->ref_value,
+ 0xff, fbS_reg, *fail_reg);
+ }
+
+ /* Check the possibly overridden value, not the structure value */
+ if (zfail_op == PIPE_STENCIL_OP_KEEP) {
+ *zfail_reg = fbS_reg;
+ }
+ else if (zfail_op == stencil->fail_op) {
+ *zfail_reg = *fail_reg;
+ }
+ else {
+ *zfail_reg = spe_allocate_available_register(f);
+ gen_stencil_values(f, stencil->zfail_op, stencil->ref_value,
+ 0xff, fbS_reg, *zfail_reg);
+ }
+
+ if (stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
+ *zpass_reg = fbS_reg;
+ }
+ else if (stencil->zpass_op == stencil->fail_op) {
+ *zpass_reg = *fail_reg;
+ }
+ else if (stencil->zpass_op == zfail_op) {
+ *zpass_reg = *zfail_reg;
+ }
+ else {
+ *zpass_reg = spe_allocate_available_register(f);
+ gen_stencil_values(f, stencil->zpass_op, stencil->ref_value,
+ 0xff, fbS_reg, *zpass_reg);
+ }
+}
+
+/**
+ * Note that fbZ_reg may *not* be set on entry, if in fact
+ * the depth test is not enabled. This function must not use
+ * the register if depth is not enabled.
+ */
+static boolean
+gen_stencil_depth_test(struct spe_function *f,
+ const struct pipe_depth_stencil_alpha_state *dsa,
+ const uint facing,
+ const int mask_reg, const int fragZ_reg,
+ const int fbZ_reg, const int fbS_reg)
+{
+ /* True if we've generated code that could require writeback to the
+ * depth and/or stencil buffers
+ */
+ boolean modified_buffers = FALSE;
+
+ boolean need_to_calculate_stencil_values;
+ boolean need_to_writemask_stencil_values;
+
+ struct pipe_stencil_state *stencil;
+
+ /* Registers. We may or may not actually allocate these, depending
+ * on whether the state values indicate that we need them.
+ */
+ int stencil_pass_reg, stencil_fail_reg;
+ int stencil_fail_values, stencil_pass_depth_fail_values, stencil_pass_depth_pass_values;
+ int stencil_writemask_reg;
+ int zmask_reg;
+ int newS_reg;
+
+ /* Stenciling is quite complex: up to six different configurable stencil
+ * operations/calculations can be required (three each for front-facing
+ * and back-facing fragments). Many of those operations will likely
+ * be identical, so there's good reason to try to avoid calculating
+ * the same values more than once (which unfortunately makes the code less
+ * straightforward).
+ *
+ * To make register management easier, we start a new
+ * register set; we can release all the registers in the set at
+ * once, and avoid having to keep track of exactly which registers
+ * we allocate. We can still allocate and free registers as
+ * desired (if we know we no longer need a register), but we don't
+ * have to spend the complexity to track the more difficult variant
+ * register usage scenarios.
+ */
+ spe_comment(f, 0, "Allocating stencil register set");
+ spe_allocate_register_set(f);
+
+ /* The facing we're given is the fragment facing; it doesn't
+ * exactly match the stencil facing. If stencil is enabled,
+ * but two-sided stencil is *not* enabled, we use the same
+ * stencil settings for both front- and back-facing fragments.
+ * We only use the "back-facing" stencil for backfacing fragments
+ * if two-sided stenciling is enabled.
+ */
+ if (facing == CELL_FACING_BACK && dsa->stencil[1].enabled) {
+ stencil = &dsa->stencil[1];
+ }
+ else {
+ stencil = &dsa->stencil[0];
+ }
+
+ /* Calculate the writemask. If the writemask is trivial (either
+ * all 0s, meaning that we don't need to calculate any stencil values
+ * because they're not going to change the stencil anyway, or all 1s,
+ * meaning that we have to calculate the stencil values but do not
+ * need to mask them), we can avoid generating code. Don't forget
+ * that we need to consider backfacing stencil, if enabled.
+ *
+ * Note that if the backface stencil is *not* enabled, the backface
+ * stencil will have the same values as the frontface stencil.
+ */
+ if (stencil->fail_op == PIPE_STENCIL_OP_KEEP &&
+ stencil->zfail_op == PIPE_STENCIL_OP_KEEP &&
+ stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
+ need_to_calculate_stencil_values = FALSE;
+ need_to_writemask_stencil_values = FALSE;
+ }
+ else if (stencil->write_mask == 0x0) {
+ /* All changes are writemasked out, so no need to calculate
+ * what those changes might be, and no need to write anything back.
+ */
+ need_to_calculate_stencil_values = FALSE;
+ need_to_writemask_stencil_values = FALSE;
+ }
+ else if (stencil->write_mask == 0xff) {
+ /* Still trivial, but a little less so. We need to write the stencil
+ * values, but we don't need to mask them.
+ */
+ need_to_calculate_stencil_values = TRUE;
+ need_to_writemask_stencil_values = FALSE;
+ }
+ else {
+ /* The general case: calculate, mask, and write */
+ need_to_calculate_stencil_values = TRUE;
+ need_to_writemask_stencil_values = TRUE;
+
+ /* While we're here, generate code that calculates what the
+ * writemask should be. If backface stenciling is enabled,
+ * and the backface writemask is not the same as the frontface
+ * writemask, we'll have to generate code that merges the
+ * two masks into a single effective mask based on fragment facing.
+ */
+ spe_comment(f, 0, "Computing stencil writemask");
+ stencil_writemask_reg = spe_allocate_available_register(f);
+ spe_load_uint(f, stencil_writemask_reg, dsa->stencil[facing].write_mask);
+ }
+
+ /* At least one-sided stenciling must be on. Generate code that
+ * runs the stencil test on the basic/front-facing stencil, leaving
+ * the mask of passing stencil bits in stencil_pass_reg. This mask will
+ * be used both to mask the set of active pixels, and also to
+ * determine how the stencil buffer changes.
+ *
+ * This test will *not* change the value in mask_reg (because we don't
+ * yet know whether to apply the two-sided stencil or one-sided stencil).
+ */
+ spe_comment(f, 0, "Running basic stencil test");
+ stencil_pass_reg = spe_allocate_available_register(f);
+ gen_stencil_test(f, stencil, 0xff, mask_reg, fbS_reg, stencil_pass_reg);
+
+ /* Generate code that, given the mask of valid fragments and the
+ * mask of valid fragments that passed the stencil test, computes
+ * the mask of valid fragments that failed the stencil test. We
+ * have to do this before we run a depth test (because the
+ * depth test should not be performed on fragments that failed the
+ * stencil test, and because the depth test will update the
+ * mask of valid fragments based on the results of the depth test).
+ */
+ spe_comment(f, 0, "Computing stencil fail mask and updating fragment mask");
+ stencil_fail_reg = spe_allocate_available_register(f);
+ spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg);
+ /* Now remove the stenciled-out pixels from the valid fragment mask,
+ * so we can later use the valid fragment mask in the depth test.
+ */
+ spe_and(f, mask_reg, mask_reg, stencil_pass_reg);
+
+ /* We may not need to calculate stencil values, if the writemask is off */
+ if (need_to_calculate_stencil_values) {
+ /* Generate code that calculates exactly which stencil values we need,
+ * without calculating the same value twice (say, if two different
+ * stencil ops have the same value). This code will work for one-sided
+ * and two-sided stenciling (so that we take into account that operations
+ * may match between front and back stencils), and will also take into
+ * account whether the depth test is enabled (if the depth test is off,
+ * we don't need any of the zfail results, because the depth test always
+ * is considered to pass if it is disabled). Any register value that
+ * does not need to be calculated will come back with the same value
+ * that's in fbS_reg.
+ *
+ * This function will allocate a variant number of registers that
+ * will be released as part of the register set.
+ */
+ spe_comment(f, 0, facing == CELL_FACING_FRONT
+ ? "Computing front-facing stencil values"
+ : "Computing back-facing stencil values");
+ gen_get_stencil_values(f, stencil, dsa->depth.enabled, fbS_reg,
+ &stencil_fail_values, &stencil_pass_depth_fail_values,
+ &stencil_pass_depth_pass_values);
+ }
+
+ /* We now have all the stencil values we need. We also need
+ * the results of the depth test to figure out which
+ * stencil values will become the new stencil values. (Even if
+ * we aren't actually calculating stencil values, we need to apply
+ * the depth test if it's enabled.)
+ *
+ * The code generated by gen_depth_test() returns the results of the
+ * test in the given register, but also alters the mask_reg based
+ * on the results of the test.
+ */
+ if (dsa->depth.enabled) {
+ spe_comment(f, 0, "Running stencil depth test");
+ zmask_reg = spe_allocate_available_register(f);
+ modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg,
+ fbZ_reg, zmask_reg);
+ }
+
+ if (need_to_calculate_stencil_values) {
+
+ /* If we need to writemask the stencil values before going into
+ * the stencil buffer, we'll have to use a new register to
+ * hold the new values. If not, we can just keep using the
+ * current register.
+ */
+ if (need_to_writemask_stencil_values) {
+ newS_reg = spe_allocate_available_register(f);
+ spe_comment(f, 0, "Saving current stencil values for writemasking");
+ spe_move(f, newS_reg, fbS_reg);
+ }
+ else {
+ newS_reg = fbS_reg;
+ }
+
+ /* Merge in the selected stencil fail values */
+ if (stencil_fail_values != fbS_reg) {
+ spe_comment(f, 0, "Loading stencil fail values");
+ spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg);
+ modified_buffers = TRUE;
+ }
+
+ /* Same for the stencil pass/depth fail values. If this calculation
+ * is not needed (say, if depth test is off), then the
+ * stencil_pass_depth_fail_values register will be equal to fbS_reg
+ * and we'll skip the calculation.
+ */
+ if (stencil_pass_depth_fail_values != fbS_reg) {
+ /* We don't actually have a stencil pass/depth fail mask yet.
+ * Calculate it here from the stencil passing mask and the
+ * depth passing mask. Note that zmask_reg *must* have been
+ * set above if we're here.
+ */
+ uint stencil_pass_depth_fail_mask =
+ spe_allocate_available_register(f);
+
+ spe_comment(f, 0, "Loading stencil pass/depth fail values");
+ spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg);
+
+ spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values,
+ stencil_pass_depth_fail_mask);
+
+ spe_release_register(f, stencil_pass_depth_fail_mask);
+ modified_buffers = TRUE;
+ }
+
+ /* Same for the stencil pass/depth pass mask. Note that we
+ * *can* get here with zmask_reg being unset (if the depth
+ * test is off but the stencil test is on). In this case,
+ * we assume the depth test passes, and don't need to mask
+ * the stencil pass mask with the Z mask.
+ */
+ if (stencil_pass_depth_pass_values != fbS_reg) {
+ if (dsa->depth.enabled) {
+ uint stencil_pass_depth_pass_mask = spe_allocate_available_register(f);
+ /* We'll need a separate register */
+ spe_comment(f, 0, "Loading stencil pass/depth pass values");
+ spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg);
+ spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask);
+ spe_release_register(f, stencil_pass_depth_pass_mask);
+ }
+ else {
+ /* We can use the same stencil-pass register */
+ spe_comment(f, 0, "Loading stencil pass values");
+ spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_reg);
+ }
+ modified_buffers = TRUE;
+ }
+
+ /* Almost done. If we need to writemask, do it now, leaving the
+ * results in the fbS_reg register passed in. If we don't need
+ * to writemask, then the results are *already* in the fbS_reg,
+ * so there's nothing more to do.
+ */
+
+ if (need_to_writemask_stencil_values && modified_buffers) {
+ /* The Select Bytes command makes a fine writemask. Where
+ * the mask is 0, the first (original) values are retained,
+ * effectively masking out changes. Where the mask is 1, the
+ * second (new) values are retained, incorporating changes.
+ */
+ spe_comment(f, 0, "Writemasking new stencil values");
+ spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg);
+ }
+
+ } /* done calculating stencil values */
+
+ /* The stencil and/or depth values have been applied, and the
+ * mask_reg, fbS_reg, and fbZ_reg values have been updated.
+ * We're all done, except that we've allocated a fair number
+ * of registers that we didn't bother tracking. Release all
+ * those registers as part of the register set, and go home.
+ */
+ spe_comment(f, 0, "Releasing stencil register set");
+ spe_release_register_set(f);
+
+ /* Return TRUE if we could have modified the stencil and/or
+ * depth buffers.
+ */
+ return modified_buffers;
+}
+
+
+/**
+ * Generate depth and/or stencil test code.
+ * \param cell context
+ * \param dsa depth/stencil/alpha state
+ * \param f spe function to emit
+ * \param facing either CELL_FACING_FRONT or CELL_FACING_BACK
+ * \param mask_reg register containing the pixel alive/dead mask
+ * \param depth_tile_reg register containing address of z/stencil tile
+ * \param quad_offset_reg offset to quad from start of tile
+ * \param fragZ_reg register containg fragment Z values
+ */
+static void
+gen_depth_stencil(struct cell_context *cell,
+ const struct pipe_depth_stencil_alpha_state *dsa,
+ struct spe_function *f,
+ uint facing,
+ int mask_reg,
+ int depth_tile_reg,
+ int quad_offset_reg,
+ int fragZ_reg)
+
+{
+ const enum pipe_format zs_format = cell->framebuffer.zsbuf->format;
+ boolean write_depth_stencil;
+
+ /* framebuffer's combined z/stencil values register */
+ int fbZS_reg = spe_allocate_available_register(f);
+
+ /* Framebufer Z values register */
+ int fbZ_reg = spe_allocate_available_register(f);
+
+ /* Framebuffer stencil values register (may not be used) */
+ int fbS_reg = spe_allocate_available_register(f);
+
+ /* 24-bit mask register (may not be used) */
+ int zmask_reg = spe_allocate_available_register(f);
+
+ /**
+ * The following code:
+ * 1. fetch quad of packed Z/S values from the framebuffer tile.
+ * 2. extract the separate the Z and S values from packed values
+ * 3. convert fragment Z values from float in [0,1] to 32/24/16-bit ints
+ *
+ * The instructions for doing this are interleaved for better performance.
+ */
+ spe_comment(f, 0, "Fetch Z/stencil quad from tile");
+
+ switch(zs_format) {
+ case PIPE_FORMAT_S8Z24_UNORM: /* fall through */
+ case PIPE_FORMAT_X8Z24_UNORM:
+ /* prepare mask to extract Z vals from ZS vals */
+ spe_load_uint(f, zmask_reg, 0x00ffffff);
+
+ /* convert fragment Z from [0,1] to 32-bit ints */
+ spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
+
+ /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
+ spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
+
+ /* right shift 32-bit fragment Z to 24 bits */
+ spe_rotmi(f, fragZ_reg, fragZ_reg, -8);
+
+ /* extract 24-bit Z values from ZS values by masking */
+ spe_and(f, fbZ_reg, fbZS_reg, zmask_reg);
+
+ /* extract 8-bit stencil values by shifting */
+ spe_rotmi(f, fbS_reg, fbZS_reg, -24);
+ break;
+
+ case PIPE_FORMAT_Z24S8_UNORM: /* fall through */
+ case PIPE_FORMAT_Z24X8_UNORM:
+ /* convert fragment Z from [0,1] to 32-bit ints */
+ spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
+
+ /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
+ spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
+
+ /* right shift 32-bit fragment Z to 24 bits */
+ spe_rotmi(f, fragZ_reg, fragZ_reg, -8);
+
+ /* extract 24-bit Z values from ZS values by shifting */
+ spe_rotmi(f, fbZ_reg, fbZS_reg, -8);
+
+ /* extract 8-bit stencil values by masking */
+ spe_and_uint(f, fbS_reg, fbZS_reg, 0x000000ff);
+ break;
+
+ case PIPE_FORMAT_Z32_UNORM:
+ /* Load: fbZ_reg = memory[depth_tile_reg + offset_reg] */
+ spe_lqx(f, fbZ_reg, depth_tile_reg, quad_offset_reg);
+
+ /* convert fragment Z from [0,1] to 32-bit ints */
+ spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
+
+ /* No stencil, so can't do anything there */
+ break;
+
+ case PIPE_FORMAT_Z16_UNORM:
+ /* XXX This code for 16bpp Z is broken! */
+
+ /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
+ spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
+
+ /* Copy over 4 32-bit values */
+ spe_move(f, fbZ_reg, fbZS_reg);
+
+ /* convert Z from [0,1] to 16-bit ints */
+ spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
+ spe_rotmi(f, fragZ_reg, fragZ_reg, -16);
+ /* No stencil */
+ break;
+
+ default:
+ ASSERT(0); /* invalid format */
+ }
+
+ /* If stencil is enabled, use the stencil-specific code
+ * generator to generate both the stencil and depth (if needed)
+ * tests. Otherwise, if only depth is enabled, generate
+ * a quick depth test. The test generators themselves will
+ * report back whether the depth/stencil buffer has to be
+ * written back.
+ */
+ if (dsa->stencil[0].enabled) {
+ /* This will perform the stencil and depth tests, and update
+ * the mask_reg, fbZ_reg, and fbS_reg as required by the
+ * tests.
+ */
+ ASSERT(fbS_reg >= 0);
+ spe_comment(f, 0, "Perform stencil test");
+
+ /* Note that fbZ_reg may not be set on entry, if stenciling
+ * is enabled but there's no Z-buffer. The
+ * gen_stencil_depth_test() function must ignore the
+ * fbZ_reg register if depth is not enabled.
+ */
+ write_depth_stencil = gen_stencil_depth_test(f, dsa, facing,
+ mask_reg, fragZ_reg,
+ fbZ_reg, fbS_reg);
+ }
+ else if (dsa->depth.enabled) {
+ int zmask_reg = spe_allocate_available_register(f);
+ ASSERT(fbZ_reg >= 0);
+ spe_comment(f, 0, "Perform depth test");
+ write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg,
+ fbZ_reg, zmask_reg);
+ spe_release_register(f, zmask_reg);
+ }
+ else {
+ write_depth_stencil = FALSE;
+ }
+
+ if (write_depth_stencil) {
+ /* Merge latest Z and Stencil values into fbZS_reg.
+ * fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
+ * fbS_reg has four 8-bit Z values in bits [7..0].
+ */
+ spe_comment(f, 0, "Store quad's depth/stencil values in tile");
+ if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
+ zs_format == PIPE_FORMAT_X8Z24_UNORM) {
+ spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */
+ spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
+ }
+ else if (zs_format == PIPE_FORMAT_Z24S8_UNORM ||
+ zs_format == PIPE_FORMAT_Z24X8_UNORM) {
+ spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */
+ spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
+ }
+ else if (zs_format == PIPE_FORMAT_Z32_UNORM) {
+ spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */
+ }
+ else if (zs_format == PIPE_FORMAT_Z16_UNORM) {
+ spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */
+ }
+ else if (zs_format == PIPE_FORMAT_S8_UNORM) {
+ ASSERT(0); /* XXX to do */
+ }
+ else {
+ ASSERT(0); /* bad zs_format */
+ }
+
+ /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
+ spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
+ }
+
+ /* Don't need these any more */
+ spe_release_register(f, fbZS_reg);
+ spe_release_register(f, fbZ_reg);
+ spe_release_register(f, fbS_reg);
+ spe_release_register(f, zmask_reg);
+}
+
+
+
+/**
+ * Generate SPE code to implement the fragment operations (alpha test,
+ * depth test, stencil test, blending, colormask, and final
+ * framebuffer write) as specified by the current context state.
+ *
+ * Logically, this code will be called after running the fragment
+ * shader. But under some circumstances we could run some of this
+ * code before the fragment shader to cull fragments/quads that are
+ * totally occluded/discarded.
+ *
+ * XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now.
+ *
+ * See the spu_default_fragment_ops() function to see how the per-fragment
+ * operations would be done with ordinary C code.
+ * The code we generate here though has no branches, is SIMD, etc and
+ * should be much faster.
+ *
+ * \param cell the rendering context (in)
+ * \param facing whether the generated code is for front-facing or
+ * back-facing fragments
+ * \param f the generated function (in/out); on input, the function
+ * must already have been initialized. On exit, whatever
+ * instructions within the generated function have had
+ * the fragment ops appended.
+ */
+void
+cell_gen_fragment_function(struct cell_context *cell,
+ const uint facing,
+ struct spe_function *f)
+{
+ const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil;
+ const struct pipe_blend_state *blend = cell->blend;
+ const struct pipe_blend_color *blend_color = &cell->blend_color;
+ const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format;
+
+ /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
+ const int x_reg = 3; /* uint */
+ const int y_reg = 4; /* uint */
+ const int color_tile_reg = 5; /* tile_t * */
+ const int depth_tile_reg = 6; /* tile_t * */
+ const int fragZ_reg = 7; /* vector float */
+ const int fragR_reg = 8; /* vector float */
+ const int fragG_reg = 9; /* vector float */
+ const int fragB_reg = 10; /* vector float */
+ const int fragA_reg = 11; /* vector float */
+ const int mask_reg = 12; /* vector uint */
+
+ ASSERT(facing == CELL_FACING_FRONT || facing == CELL_FACING_BACK);
+
+ /* offset of quad from start of tile
+ * XXX assuming 4-byte pixels for color AND Z/stencil!!!!
+ */
+ int quad_offset_reg;
+
+ int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */
+
+ if (cell->debug_flags & CELL_DEBUG_ASM) {
+ spe_print_code(f, TRUE);
+ spe_indent(f, 8);
+ spe_comment(f, -4, facing == CELL_FACING_FRONT
+ ? "Begin front-facing per-fragment ops"
+ : "Begin back-facing per-fragment ops");
+ }
+
+ spe_allocate_register(f, x_reg);
+ spe_allocate_register(f, y_reg);
+ spe_allocate_register(f, color_tile_reg);
+ spe_allocate_register(f, depth_tile_reg);
+ spe_allocate_register(f, fragZ_reg);
+ spe_allocate_register(f, fragR_reg);
+ spe_allocate_register(f, fragG_reg);
+ spe_allocate_register(f, fragB_reg);
+ spe_allocate_register(f, fragA_reg);
+ spe_allocate_register(f, mask_reg);
+
+ quad_offset_reg = spe_allocate_available_register(f);
+ fbRGBA_reg = spe_allocate_available_register(f);
+
+ /* compute offset of quad from start of tile, in bytes */
+ {
+ int x2_reg = spe_allocate_available_register(f);
+ int y2_reg = spe_allocate_available_register(f);
+
+ ASSERT(TILE_SIZE == 32);
+
+ spe_comment(f, 0, "Compute quad offset within tile");
+ spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */
+ spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */
+ spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */
+ spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */
+ spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */
+
+ spe_release_register(f, x2_reg);
+ spe_release_register(f, y2_reg);
+ }
+
+ /* Generate the alpha test, if needed. */
+ if (dsa->alpha.enabled) {
+ gen_alpha_test(dsa, f, mask_reg, fragA_reg);
+ }
+
+ /* generate depth and/or stencil test code */
+ if (dsa->depth.enabled || dsa->stencil[0].enabled) {
+ gen_depth_stencil(cell, dsa, f,
+ facing,
+ mask_reg,
+ depth_tile_reg,
+ quad_offset_reg,
+ fragZ_reg);
+ }
+
+ /* Get framebuffer quad/colors. We'll need these for blending,
+ * color masking, and to obey the quad/pixel mask.
+ * Load: fbRGBA_reg = memory[color_tile + quad_offset]
+ * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking
+ * we could skip this load.
+ */
+ spe_comment(f, 0, "Fetch quad colors from tile");
+ spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg);
+
+ if (blend->blend_enable) {
+ spe_comment(f, 0, "Perform blending");
+ gen_blend(blend, blend_color, f, color_format,
+ fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg);
+ }
+
+ /*
+ * Write fragment colors to framebuffer/tile.
+ * This involves converting the fragment colors from float[4] to the
+ * tile's specific format and obeying the quad/pixel mask.
+ */
+ {
+ int rgba_reg = spe_allocate_available_register(f);
+
+ /* Pack four float colors as four 32-bit int colors */
+ spe_comment(f, 0, "Convert float quad colors to packed int framebuffer colors");
+ gen_pack_colors(f, color_format,
+ fragR_reg, fragG_reg, fragB_reg, fragA_reg,
+ rgba_reg);
+
+ if (blend->logicop_enable) {
+ spe_comment(f, 0, "Compute logic op");
+ gen_logicop(blend, f, rgba_reg, fbRGBA_reg);
+ }
+
+ if (blend->colormask != PIPE_MASK_RGBA) {
+ spe_comment(f, 0, "Compute color mask");
+ gen_colormask(f, blend->colormask, color_format, rgba_reg, fbRGBA_reg);
+ }
+
+ /* Mix fragment colors with framebuffer colors using the quad/pixel mask:
+ * if (mask[i])
+ * rgba[i] = rgba[i];
+ * else
+ * rgba[i] = framebuffer[i];
+ */
+ spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg);
+
+ /* Store updated quad in tile:
+ * memory[color_tile + quad_offset] = rgba_reg;
+ */
+ spe_comment(f, 0, "Store quad colors into color tile");
+ spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg);
+
+ spe_release_register(f, rgba_reg);
+ }
+
+ //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
+
+ spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */
+
+ spe_release_register(f, fbRGBA_reg);
+ spe_release_register(f, quad_offset_reg);
+
+ if (cell->debug_flags & CELL_DEBUG_ASM) {
+ char buffer[1024];
+ sprintf(buffer, "End %s-facing per-fragment ops: %d instructions",
+ facing == CELL_FACING_FRONT ? "front" : "back", f->num_inst);
+ spe_comment(f, -4, buffer);
+ }
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
new file mode 100644
index 0000000000..21b35d1faf
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
@@ -0,0 +1,38 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef CELL_GEN_FRAGMENT_H
+#define CELL_GEN_FRAGMENT_H
+
+
+extern void
+cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f);
+
+
+#endif /* CELL_GEN_FRAGMENT_H */
+
diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
new file mode 100644
index 0000000000..81efd137c7
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
@@ -0,0 +1,358 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Brian Paul
+ */
+
+#include "util/u_memory.h"
+#include "pipe/p_inlines.h"
+#include "draw/draw_context.h"
+#include "cell_context.h"
+#include "cell_flush.h"
+#include "cell_pipe_state.h"
+#include "cell_state.h"
+#include "cell_texture.h"
+
+
+
+static void *
+cell_create_blend_state(struct pipe_context *pipe,
+ const struct pipe_blend_state *blend)
+{
+ return mem_dup(blend, sizeof(*blend));
+}
+
+
+static void
+cell_bind_blend_state(struct pipe_context *pipe, void *blend)
+{
+ struct cell_context *cell = cell_context(pipe);
+
+ draw_flush(cell->draw);
+
+ cell->blend = (struct pipe_blend_state *) blend;
+ cell->dirty |= CELL_NEW_BLEND;
+}
+
+
+static void
+cell_delete_blend_state(struct pipe_context *pipe, void *blend)
+{
+ FREE(blend);
+}
+
+
+static void
+cell_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *blend_color)
+{
+ struct cell_context *cell = cell_context(pipe);
+
+ draw_flush(cell->draw);
+
+ cell->blend_color = *blend_color;
+
+ cell->dirty |= CELL_NEW_BLEND;
+}
+
+
+
+
+static void *
+cell_create_depth_stencil_alpha_state(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *dsa)
+{
+ return mem_dup(dsa, sizeof(*dsa));
+}
+
+
+static void
+cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe,
+ void *dsa)
+{
+ struct cell_context *cell = cell_context(pipe);
+
+ draw_flush(cell->draw);
+
+ cell->depth_stencil = (struct pipe_depth_stencil_alpha_state *) dsa;
+ cell->dirty |= CELL_NEW_DEPTH_STENCIL;
+}
+
+
+static void
+cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *dsa)
+{
+ FREE(dsa);
+}
+
+
+static void
+cell_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct cell_context *cell = cell_context(pipe);
+
+ /* pass the clip state to the draw module */
+ draw_set_clip_state(cell->draw, clip);
+}
+
+
+
+/* Called when driver state tracker notices changes to the viewport
+ * matrix:
+ */
+static void
+cell_set_viewport_state( struct pipe_context *pipe,
+ const struct pipe_viewport_state *viewport )
+{
+ struct cell_context *cell = cell_context(pipe);
+
+ cell->viewport = *viewport; /* struct copy */
+ cell->dirty |= CELL_NEW_VIEWPORT;
+
+ /* pass the viewport info to the draw module */
+ draw_set_viewport_state(cell->draw, viewport);
+
+ /* Using tnl/ and vf/ modules is temporary while getting started.
+ * Full pipe will have vertex shader, vertex fetch of its own.
+ */
+}
+
+
+static void
+cell_set_scissor_state( struct pipe_context *pipe,
+ const struct pipe_scissor_state *scissor )
+{
+ struct cell_context *cell = cell_context(pipe);
+
+ memcpy( &cell->scissor, scissor, sizeof(*scissor) );
+ cell->dirty |= CELL_NEW_SCISSOR;
+}
+
+
+static void
+cell_set_polygon_stipple( struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple )
+{
+ struct cell_context *cell = cell_context(pipe);
+
+ memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) );
+ cell->dirty |= CELL_NEW_STIPPLE;
+}
+
+
+
+static void *
+cell_create_rasterizer_state(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *rasterizer)
+{
+ return mem_dup(rasterizer, sizeof(*rasterizer));
+}
+
+
+static void
+cell_bind_rasterizer_state(struct pipe_context *pipe, void *rast)
+{
+ struct pipe_rasterizer_state *rasterizer =
+ (struct pipe_rasterizer_state *) rast;
+ struct cell_context *cell = cell_context(pipe);
+
+ /* pass-through to draw module */
+ draw_set_rasterizer_state(cell->draw, rasterizer);
+
+ cell->rasterizer = rasterizer;
+
+ cell->dirty |= CELL_NEW_RASTERIZER;
+}
+
+
+static void
+cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer)
+{
+ FREE(rasterizer);
+}
+
+
+
+static void *
+cell_create_sampler_state(struct pipe_context *pipe,
+ const struct pipe_sampler_state *sampler)
+{
+ return mem_dup(sampler, sizeof(*sampler));
+}
+
+
+static void
+cell_bind_sampler_states(struct pipe_context *pipe,
+ unsigned num, void **samplers)
+{
+ struct cell_context *cell = cell_context(pipe);
+ uint i, changed = 0x0;
+
+ assert(num <= CELL_MAX_SAMPLERS);
+
+ draw_flush(cell->draw);
+
+ for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
+ struct pipe_sampler_state *new_samp = i < num ? samplers[i] : NULL;
+ if (cell->sampler[i] != new_samp) {
+ cell->sampler[i] = new_samp;
+ changed |= (1 << i);
+ }
+ }
+
+ if (changed) {
+ cell->dirty |= CELL_NEW_SAMPLER;
+ cell->dirty_samplers |= changed;
+ }
+}
+
+
+static void
+cell_delete_sampler_state(struct pipe_context *pipe,
+ void *sampler)
+{
+ FREE( sampler );
+}
+
+
+
+static void
+cell_set_sampler_textures(struct pipe_context *pipe,
+ unsigned num, struct pipe_texture **texture)
+{
+ struct cell_context *cell = cell_context(pipe);
+ uint i, changed = 0x0;
+
+ assert(num <= CELL_MAX_SAMPLERS);
+
+ for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
+ struct pipe_texture *new_tex = i < num ? texture[i] : NULL;
+ if ((struct pipe_texture *) cell->texture[i] != new_tex) {
+ pipe_texture_reference((struct pipe_texture **) &cell->texture[i],
+ new_tex);
+ changed |= (1 << i);
+ }
+ }
+
+ cell->num_textures = num;
+
+ if (changed) {
+ cell->dirty |= CELL_NEW_TEXTURE;
+ cell->dirty_textures |= changed;
+ }
+}
+
+
+
+static void
+cell_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct cell_context *cell = cell_context(pipe);
+
+ if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) {
+ struct pipe_surface *csurf = fb->cbufs[0];
+ struct pipe_surface *zsurf = fb->zsbuf;
+ uint i;
+ uint flags = (PIPE_BUFFER_USAGE_GPU_WRITE |
+ PIPE_BUFFER_USAGE_GPU_READ);
+
+ /* unmap old surfaces */
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ if (cell->framebuffer.cbufs[i] && cell->cbuf_map[i]) {
+ pipe_surface_unmap(cell->framebuffer.cbufs[i]);
+ cell->cbuf_map[i] = NULL;
+ }
+ }
+
+ if (cell->framebuffer.zsbuf && cell->zsbuf_map) {
+ pipe_surface_unmap(cell->framebuffer.zsbuf);
+ cell->zsbuf_map = NULL;
+ }
+
+ /* Finish any pending rendering to the current surface before
+ * installing a new surface!
+ */
+ cell_flush_int(cell, CELL_FLUSH_WAIT);
+
+ /* update my state
+ * (this is also where old surfaces will finally get freed)
+ */
+ cell->framebuffer.width = fb->width;
+ cell->framebuffer.height = fb->height;
+ cell->framebuffer.num_cbufs = fb->num_cbufs;
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]);
+ }
+ pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf);
+
+ /* map new surfaces */
+ if (csurf)
+ cell->cbuf_map[0] = pipe_surface_map(csurf, flags);
+
+ if (zsurf)
+ cell->zsbuf_map = pipe_surface_map(zsurf, flags);
+
+ cell->dirty |= CELL_NEW_FRAMEBUFFER;
+ }
+}
+
+
+
+void
+cell_init_state_functions(struct cell_context *cell)
+{
+ cell->pipe.create_blend_state = cell_create_blend_state;
+ cell->pipe.bind_blend_state = cell_bind_blend_state;
+ cell->pipe.delete_blend_state = cell_delete_blend_state;
+
+ cell->pipe.create_sampler_state = cell_create_sampler_state;
+ cell->pipe.bind_sampler_states = cell_bind_sampler_states;
+ cell->pipe.delete_sampler_state = cell_delete_sampler_state;
+
+ cell->pipe.set_sampler_textures = cell_set_sampler_textures;
+
+ cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state;
+ cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state;
+ cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state;
+
+ cell->pipe.create_rasterizer_state = cell_create_rasterizer_state;
+ cell->pipe.bind_rasterizer_state = cell_bind_rasterizer_state;
+ cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state;
+
+ cell->pipe.set_blend_color = cell_set_blend_color;
+ cell->pipe.set_clip_state = cell_set_clip_state;
+
+ cell->pipe.set_framebuffer_state = cell_set_framebuffer_state;
+
+ cell->pipe.set_polygon_stipple = cell_set_polygon_stipple;
+ cell->pipe.set_scissor_state = cell_set_scissor_state;
+ cell->pipe.set_viewport_state = cell_set_viewport_state;
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.h b/src/gallium/drivers/cell/ppu/cell_pipe_state.h
new file mode 100644
index 0000000000..1889bd52ff
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.h
@@ -0,0 +1,39 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef CELL_PIPE_STATE_H
+#define CELL_PIPE_STATE_H
+
+
+struct cell_context;
+
+extern void
+cell_init_state_functions(struct cell_context *cell);
+
+
+#endif /* CELL_PIPE_STATE_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c
new file mode 100644
index 0000000000..79cb8df82f
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_render.c
@@ -0,0 +1,211 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \brief Last stage of 'draw' pipeline: send tris to SPUs.
+ * \author Brian Paul
+ */
+
+#include "cell_context.h"
+#include "cell_render.h"
+#include "cell_spu.h"
+#include "util/u_memory.h"
+#include "draw/draw_private.h"
+
+
+struct render_stage {
+ struct draw_stage stage; /**< This must be first (base class) */
+
+ struct cell_context *cell;
+};
+
+
+static INLINE struct render_stage *
+render_stage(struct draw_stage *stage)
+{
+ return (struct render_stage *) stage;
+}
+
+
+static void render_begin( struct draw_stage *stage )
+{
+#if 0
+ struct render_stage *render = render_stage(stage);
+ struct cell_context *sp = render->cell;
+ const struct pipe_shader_state *fs = &render->cell->fs->shader;
+ render->quad.nr_attrs = render->cell->nr_frag_attrs;
+
+ render->firstFpInput = fs->input_semantic_name[0];
+
+ sp->quad.first->begin(sp->quad.first);
+#endif
+}
+
+
+static void render_end( struct draw_stage *stage )
+{
+}
+
+
+static void reset_stipple_counter( struct draw_stage *stage )
+{
+ struct render_stage *render = render_stage(stage);
+ /*render->cell->line_stipple_counter = 0;*/
+}
+
+
+static void
+render_point(struct draw_stage *stage, struct prim_header *prim)
+{
+}
+
+
+static void
+render_line(struct draw_stage *stage, struct prim_header *prim)
+{
+}
+
+
+/** Write a vertex into the prim buffer */
+static void
+save_vertex(struct cell_prim_buffer *buf, uint pos,
+ const struct vertex_header *vert)
+{
+ uint attr, j;
+
+ for (attr = 0; attr < 2; attr++) {
+ for (j = 0; j < 4; j++) {
+ buf->vertex[pos][attr][j] = vert->data[attr][j];
+ }
+ }
+
+ /* update bounding box */
+ if (vert->data[0][0] < buf->xmin)
+ buf->xmin = vert->data[0][0];
+ if (vert->data[0][0] > buf->xmax)
+ buf->xmax = vert->data[0][0];
+ if (vert->data[0][1] < buf->ymin)
+ buf->ymin = vert->data[0][1];
+ if (vert->data[0][1] > buf->ymax)
+ buf->ymax = vert->data[0][1];
+}
+
+
+static void
+render_tri(struct draw_stage *stage, struct prim_header *prim)
+{
+ struct render_stage *rs = render_stage(stage);
+ struct cell_context *cell = rs->cell;
+ struct cell_prim_buffer *buf = &cell->prim_buffer;
+ uint i;
+
+ if (buf->num_verts + 3 > CELL_MAX_VERTS) {
+ cell_flush_prim_buffer(cell);
+ }
+
+ i = buf->num_verts;
+ assert(i+2 <= CELL_MAX_VERTS);
+ save_vertex(buf, i+0, prim->v[0]);
+ save_vertex(buf, i+1, prim->v[1]);
+ save_vertex(buf, i+2, prim->v[2]);
+ buf->num_verts += 3;
+}
+
+
+/**
+ * Send the a RENDER command to all SPUs to have them render the prims
+ * in the current prim_buffer.
+ */
+void
+cell_flush_prim_buffer(struct cell_context *cell)
+{
+ uint i;
+
+ if (cell->prim_buffer.num_verts == 0)
+ return;
+
+ for (i = 0; i < cell->num_spus; i++) {
+ struct cell_command_render *render = &cell_global.command[i].render;
+ render->prim_type = PIPE_PRIM_TRIANGLES;
+ render->num_verts = cell->prim_buffer.num_verts;
+ render->front_winding = cell->rasterizer->front_winding;
+ render->vertex_size = cell->vertex_info->size * 4;
+ render->xmin = cell->prim_buffer.xmin;
+ render->ymin = cell->prim_buffer.ymin;
+ render->xmax = cell->prim_buffer.xmax;
+ render->ymax = cell->prim_buffer.ymax;
+ render->vertex_data = &cell->prim_buffer.vertex;
+ ASSERT_ALIGN16(render->vertex_data);
+ send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_RENDER);
+ }
+
+ cell->prim_buffer.num_verts = 0;
+
+ cell->prim_buffer.xmin = 1e100;
+ cell->prim_buffer.ymin = 1e100;
+ cell->prim_buffer.xmax = -1e100;
+ cell->prim_buffer.ymax = -1e100;
+
+ /* XXX temporary, need to double-buffer the prim buffer until we get
+ * a real command buffer/list system.
+ */
+ cell_flush(&cell->pipe, 0x0);
+}
+
+
+
+static void render_destroy( struct draw_stage *stage )
+{
+ FREE( stage );
+}
+
+
+/**
+ * Create a new draw/render stage. This will be plugged into the
+ * draw module as the last pipeline stage.
+ */
+struct draw_stage *cell_draw_render_stage( struct cell_context *cell )
+{
+ struct render_stage *render = CALLOC_STRUCT(render_stage);
+
+ render->cell = cell;
+ render->stage.draw = cell->draw;
+ render->stage.begin = render_begin;
+ render->stage.point = render_point;
+ render->stage.line = render_line;
+ render->stage.tri = render_tri;
+ render->stage.end = render_end;
+ render->stage.reset_stipple_counter = reset_stipple_counter;
+ render->stage.destroy = render_destroy;
+
+ /*
+ render->quad.coef = render->coef;
+ render->quad.posCoef = &render->posCoef;
+ */
+
+ return &render->stage;
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_render.h b/src/gallium/drivers/cell/ppu/cell_render.h
new file mode 100644
index 0000000000..826dcbafeb
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_render.h
@@ -0,0 +1,39 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef CELL_RENDER_H
+#define CELL_RENDER_H
+
+struct cell_context;
+struct draw_stage;
+
+extern void
+cell_flush_prim_buffer(struct cell_context *cell);
+
+extern struct draw_stage *cell_draw_render_stage( struct cell_context *cell );
+
+#endif /* CELL_RENDER_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c
new file mode 100644
index 0000000000..6fc2257e2a
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_screen.c
@@ -0,0 +1,174 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_memory.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+
+#include "cell/common.h"
+#include "cell_screen.h"
+#include "cell_texture.h"
+#include "cell_winsys.h"
+
+
+static const char *
+cell_get_vendor(struct pipe_screen *screen)
+{
+ return "Tungsten Graphics, Inc.";
+}
+
+
+static const char *
+cell_get_name(struct pipe_screen *screen)
+{
+ return "Cell";
+}
+
+
+static int
+cell_get_param(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return CELL_MAX_SAMPLERS;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 1;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 0;
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return CELL_MAX_TEXTURE_LEVELS;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 8; /* max 128x128x128 */
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return CELL_MAX_TEXTURE_LEVELS;
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ return 1; /* XXX not really true */
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ return 0; /* XXX to do */
+ default:
+ return 0;
+ }
+}
+
+
+static float
+cell_get_paramf(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 255.0; /* arbitrary */
+
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 255.0; /* arbitrary */
+
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 0.0;
+
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 16.0; /* arbitrary */
+
+ default:
+ return 0;
+ }
+}
+
+
+static boolean
+cell_is_format_supported( struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage,
+ unsigned geom_flags )
+{
+ /* cell supports most formats, XXX for now anyway */
+ if (format == PIPE_FORMAT_DXT5_RGBA ||
+ format == PIPE_FORMAT_R8G8B8A8_SRGB)
+ return FALSE;
+ else
+ return TRUE;
+}
+
+
+static void
+cell_destroy_screen( struct pipe_screen *screen )
+{
+ struct pipe_winsys *winsys = screen->winsys;
+
+ if(winsys->destroy)
+ winsys->destroy(winsys);
+
+ FREE(screen);
+}
+
+
+/**
+ * Create a new pipe_screen object
+ * Note: we're not presently subclassing pipe_screen (no cell_screen) but
+ * that would be the place to put SPU thread/context info...
+ */
+struct pipe_screen *
+cell_create_screen(struct pipe_winsys *winsys)
+{
+ struct pipe_screen *screen = CALLOC_STRUCT(pipe_screen);
+
+ if (!screen)
+ return NULL;
+
+ screen->winsys = winsys;
+
+ screen->destroy = cell_destroy_screen;
+
+ screen->get_name = cell_get_name;
+ screen->get_vendor = cell_get_vendor;
+ screen->get_param = cell_get_param;
+ screen->get_paramf = cell_get_paramf;
+ screen->is_format_supported = cell_is_format_supported;
+
+ cell_init_screen_texture_funcs(screen);
+
+ return screen;
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_screen.h b/src/gallium/drivers/cell/ppu/cell_screen.h
new file mode 100644
index 0000000000..c7e15889d6
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_screen.h
@@ -0,0 +1,41 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef CELL_SCREEN_H
+#define CELL_SCREEN_H
+
+
+struct pipe_screen;
+struct pipe_winsys;
+
+
+extern struct pipe_screen *
+cell_create_screen(struct pipe_winsys *winsys);
+
+
+#endif /* CELL_SCREEN_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c
new file mode 100644
index 0000000000..28e5e6d706
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_spu.c
@@ -0,0 +1,219 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * Utility/wrappers for communicating with the SPUs.
+ */
+
+
+#include <pthread.h>
+
+#include "cell_spu.h"
+#include "pipe/p_format.h"
+#include "pipe/p_state.h"
+#include "util/u_memory.h"
+#include "cell/common.h"
+
+
+/*
+helpful headers:
+/opt/ibm/cell-sdk/prototype/src/include/ppu/cbe_mfc.h
+*/
+
+
+/**
+ * Cell/SPU info that's not per-context.
+ */
+struct cell_global_info cell_global;
+
+
+/**
+ * Scan /proc/cpuinfo to determine the timebase for the system.
+ * This is used by the SPUs to convert 'decrementer' ticks to seconds.
+ * There may be a better way to get this value...
+ */
+static unsigned
+get_timebase(void)
+{
+ FILE *f = fopen("/proc/cpuinfo", "r");
+ unsigned timebase;
+
+ assert(f);
+ while (!feof(f)) {
+ char line[80];
+ fgets(line, sizeof(line), f);
+ if (strncmp(line, "timebase", 8) == 0) {
+ char *colon = strchr(line, ':');
+ if (colon) {
+ timebase = atoi(colon + 2);
+ break;
+ }
+ }
+ }
+ fclose(f);
+
+ return timebase;
+}
+
+
+/**
+ * Write a 1-word message to the given SPE mailbox.
+ */
+void
+send_mbox_message(spe_context_ptr_t ctx, unsigned int msg)
+{
+ spe_in_mbox_write(ctx, &msg, 1, SPE_MBOX_ALL_BLOCKING);
+}
+
+
+/**
+ * Wait for a 1-word message to arrive in given mailbox.
+ */
+uint
+wait_mbox_message(spe_context_ptr_t ctx)
+{
+ do {
+ unsigned data;
+ int count = spe_out_mbox_read(ctx, &data, 1);
+
+ if (count == 1) {
+ return data;
+ }
+
+ if (count < 0) {
+ /* error */ ;
+ }
+ } while (1);
+}
+
+
+/**
+ * Called by pthread_create() to spawn an SPU thread.
+ */
+static void *
+cell_thread_function(void *arg)
+{
+ struct cell_init_info *init = (struct cell_init_info *) arg;
+ unsigned entry = SPE_DEFAULT_ENTRY;
+
+ ASSERT_ALIGN16(init);
+
+ if (spe_context_run(cell_global.spe_contexts[init->id], &entry, 0,
+ init, NULL, NULL) < 0) {
+ fprintf(stderr, "spe_context_run() failed\n");
+ exit(1);
+ }
+
+ pthread_exit(NULL);
+}
+
+
+/**
+ * Create the SPU threads. This is done once during driver initialization.
+ * This involves setting the the "init" message which is sent to each SPU.
+ * The init message specifies an SPU id, total number of SPUs, location
+ * and number of batch buffers, etc.
+ */
+void
+cell_start_spus(struct cell_context *cell)
+{
+ static boolean one_time_init = FALSE;
+ uint i, j;
+ uint timebase = get_timebase();
+
+ if (one_time_init) {
+ fprintf(stderr, "PPU: Multiple rendering contexts not yet supported "
+ "on Cell.\n");
+ abort();
+ }
+
+ one_time_init = TRUE;
+
+ assert(cell->num_spus <= CELL_MAX_SPUS);
+
+ ASSERT_ALIGN16(&cell_global.inits[0]);
+ ASSERT_ALIGN16(&cell_global.inits[1]);
+
+ /*
+ * Initialize the global 'inits' structure for each SPU.
+ * A pointer to the init struct will be passed to each SPU.
+ * The SPUs will then each grab their init info with mfc_get().
+ */
+ for (i = 0; i < cell->num_spus; i++) {
+ cell_global.inits[i].id = i;
+ cell_global.inits[i].num_spus = cell->num_spus;
+ cell_global.inits[i].debug_flags = cell->debug_flags;
+ cell_global.inits[i].inv_timebase = 1000.0f / timebase;
+
+ for (j = 0; j < CELL_NUM_BUFFERS; j++) {
+ cell_global.inits[i].buffers[j] = cell->buffer[j];
+ }
+ cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0];
+
+ cell_global.inits[i].spu_functions = &cell->spu_functions;
+
+ cell_global.spe_contexts[i] = spe_context_create(0, NULL);
+ if (!cell_global.spe_contexts[i]) {
+ fprintf(stderr, "spe_context_create() failed\n");
+ exit(1);
+ }
+
+ if (spe_program_load(cell_global.spe_contexts[i], &g3d_spu)) {
+ fprintf(stderr, "spe_program_load() failed\n");
+ exit(1);
+ }
+
+ pthread_create(&cell_global.spe_threads[i], /* returned thread handle */
+ NULL, /* pthread attribs */
+ &cell_thread_function, /* start routine */
+ &cell_global.inits[i]); /* thread argument */
+ }
+}
+
+
+/**
+ * Tell all the SPUs to stop/exit.
+ * This is done when the driver's exiting / cleaning up.
+ */
+void
+cell_spu_exit(struct cell_context *cell)
+{
+ uint i;
+
+ for (i = 0; i < cell->num_spus; i++) {
+ send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_EXIT);
+ }
+
+ /* wait for threads to exit */
+ for (i = 0; i < cell->num_spus; i++) {
+ void *value;
+ pthread_join(cell_global.spe_threads[i], &value);
+ cell_global.spe_threads[i] = 0;
+ cell_global.spe_contexts[i] = 0;
+ }
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h
new file mode 100644
index 0000000000..c93958a9ed
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_spu.h
@@ -0,0 +1,79 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef CELL_SPU
+#define CELL_SPU
+
+
+#include <libspe2.h>
+#include <pthread.h>
+#include "cell/common.h"
+
+#include "cell_context.h"
+
+
+/**
+ * Global vars, for now anyway.
+ */
+struct cell_global_info
+{
+ /**
+ * SPU/SPE handles, etc
+ */
+ spe_context_ptr_t spe_contexts[CELL_MAX_SPUS];
+ pthread_t spe_threads[CELL_MAX_SPUS];
+
+ /**
+ * Data sent to SPUs at start-up
+ */
+ struct cell_init_info inits[CELL_MAX_SPUS];
+};
+
+
+extern struct cell_global_info cell_global;
+
+
+/** This is the handle for the actual SPE code */
+extern spe_program_handle_t g3d_spu;
+
+
+extern void
+send_mbox_message(spe_context_ptr_t ctx, unsigned int msg);
+
+extern uint
+wait_mbox_message(spe_context_ptr_t ctx);
+
+
+extern void
+cell_start_spus(struct cell_context *cell);
+
+
+extern void
+cell_spu_exit(struct cell_context *cell);
+
+
+#endif /* CELL_SPU */
diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h
new file mode 100644
index 0000000000..b193170f9c
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_state.h
@@ -0,0 +1,65 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef CELL_STATE_H
+#define CELL_STATE_H
+
+
+#define CELL_NEW_VIEWPORT 0x1
+#define CELL_NEW_RASTERIZER 0x2
+#define CELL_NEW_FS 0x4
+#define CELL_NEW_BLEND 0x8
+#define CELL_NEW_CLIP 0x10
+#define CELL_NEW_SCISSOR 0x20
+#define CELL_NEW_STIPPLE 0x40
+#define CELL_NEW_FRAMEBUFFER 0x80
+#define CELL_NEW_ALPHA_TEST 0x100
+#define CELL_NEW_DEPTH_STENCIL 0x200
+#define CELL_NEW_SAMPLER 0x400
+#define CELL_NEW_TEXTURE 0x800
+#define CELL_NEW_VERTEX 0x1000
+#define CELL_NEW_VS 0x2000
+#define CELL_NEW_VS_CONSTANTS 0x4000
+#define CELL_NEW_FS_CONSTANTS 0x8000
+#define CELL_NEW_VERTEX_INFO 0x10000
+
+
+extern void
+cell_update_derived( struct cell_context *softpipe );
+
+
+extern void
+cell_init_shader_functions(struct cell_context *cell);
+
+
+extern void
+cell_init_vertex_functions(struct cell_context *cell);
+
+
+#endif /* CELL_STATE_H */
+
diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c
new file mode 100644
index 0000000000..efc4f78364
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c
@@ -0,0 +1,170 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_memory.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw/draw_context.h"
+#include "draw/draw_vertex.h"
+#include "cell_context.h"
+#include "cell_batch.h"
+#include "cell_state.h"
+#include "cell_state_emit.h"
+
+
+/**
+ * Determine how to map vertex program outputs to fragment program inputs.
+ * Basically, this will be used when computing the triangle interpolation
+ * coefficients from the post-transform vertex attributes.
+ */
+static void
+calculate_vertex_layout( struct cell_context *cell )
+{
+ const struct cell_fragment_shader_state *fs = cell->fs;
+ const enum interp_mode colorInterp
+ = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
+ struct vertex_info *vinfo = &cell->vertex_info;
+ uint i;
+ int src;
+
+#if 0
+ if (cell->vbuf) {
+ /* if using the post-transform vertex buffer, tell draw_vbuf to
+ * simply emit the whole post-xform vertex as-is:
+ */
+ struct vertex_info *vinfo_vbuf = &cell->vertex_info_vbuf;
+ vinfo_vbuf->num_attribs = 0;
+ draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0);
+ vinfo_vbuf->size = 4 * vs->num_outputs + sizeof(struct vertex_header)/4;
+ }
+#endif
+
+ /* reset vinfo */
+ vinfo->num_attribs = 0;
+
+ /* we always want to emit vertex pos */
+ src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0);
+ assert(src >= 0);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
+
+
+ /*
+ * Loop over fragment shader inputs, searching for the matching output
+ * from the vertex shader.
+ */
+ for (i = 0; i < fs->info.num_inputs; i++) {
+ switch (fs->info.input_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ /* already done above */
+ break;
+
+ case TGSI_SEMANTIC_COLOR:
+ src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR,
+ fs->info.input_semantic_index[i]);
+ assert(src >= 0);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
+ break;
+
+ case TGSI_SEMANTIC_FOG:
+ src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0);
+#if 1
+ if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */
+ src = 0;
+#endif
+ assert(src >= 0);
+ draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
+ break;
+
+ case TGSI_SEMANTIC_GENERIC:
+ /* this includes texcoords and varying vars */
+ src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC,
+ fs->info.input_semantic_index[i]);
+ assert(src >= 0);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+
+ draw_compute_vertex_size(vinfo);
+
+ /* XXX only signal this if format really changes */
+ cell->dirty |= CELL_NEW_VERTEX_INFO;
+}
+
+
+#if 0
+/**
+ * Recompute cliprect from scissor bounds, scissor enable and surface size.
+ */
+static void
+compute_cliprect(struct cell_context *sp)
+{
+ uint surfWidth = sp->framebuffer.width;
+ uint surfHeight = sp->framebuffer.height;
+
+ if (sp->rasterizer->scissor) {
+ /* clip to scissor rect */
+ sp->cliprect.minx = MAX2(sp->scissor.minx, 0);
+ sp->cliprect.miny = MAX2(sp->scissor.miny, 0);
+ sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth);
+ sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight);
+ }
+ else {
+ /* clip to surface bounds */
+ sp->cliprect.minx = 0;
+ sp->cliprect.miny = 0;
+ sp->cliprect.maxx = surfWidth;
+ sp->cliprect.maxy = surfHeight;
+ }
+}
+#endif
+
+
+
+/**
+ * Update derived state, send current state to SPUs prior to rendering.
+ */
+void cell_update_derived( struct cell_context *cell )
+{
+ if (cell->dirty & (CELL_NEW_RASTERIZER |
+ CELL_NEW_FS |
+ CELL_NEW_VS))
+ calculate_vertex_layout( cell );
+
+#if 0
+ if (cell->dirty & (CELL_NEW_SCISSOR |
+ CELL_NEW_DEPTH_STENCIL_ALPHA |
+ CELL_NEW_FRAMEBUFFER))
+ compute_cliprect(cell);
+#endif
+
+ cell_emit_state(cell);
+
+ cell->dirty = 0;
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
new file mode 100644
index 0000000000..39b85faeb8
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -0,0 +1,340 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_inlines.h"
+#include "util/u_memory.h"
+#include "cell_context.h"
+#include "cell_gen_fragment.h"
+#include "cell_state.h"
+#include "cell_state_emit.h"
+#include "cell_batch.h"
+#include "cell_texture.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+
+
+/**
+ * Find/create a cell_command_fragment_ops object corresponding to the
+ * current blend/stencil/z/colormask/etc. state.
+ */
+static struct cell_command_fragment_ops *
+lookup_fragment_ops(struct cell_context *cell)
+{
+ struct cell_fragment_ops_key key;
+ struct cell_command_fragment_ops *ops;
+
+ /*
+ * Build key
+ */
+ memset(&key, 0, sizeof(key));
+ key.blend = *cell->blend;
+ key.blend_color = cell->blend_color;
+ key.dsa = *cell->depth_stencil;
+
+ if (cell->framebuffer.cbufs[0])
+ key.color_format = cell->framebuffer.cbufs[0]->format;
+ else
+ key.color_format = PIPE_FORMAT_NONE;
+
+ if (cell->framebuffer.zsbuf)
+ key.zs_format = cell->framebuffer.zsbuf->format;
+ else
+ key.zs_format = PIPE_FORMAT_NONE;
+
+ /*
+ * Look up key in cache.
+ */
+ ops = (struct cell_command_fragment_ops *)
+ util_keymap_lookup(cell->fragment_ops_cache, &key);
+
+ /*
+ * If not found, create/save new fragment ops command.
+ */
+ if (!ops) {
+ struct spe_function spe_code_front, spe_code_back;
+ unsigned int facing_dependent, total_code_size;
+
+ if (0)
+ debug_printf("**** Create New Fragment Ops\n");
+
+ /* Prepare the buffer that will hold the generated code. The
+ * "0" passed in for the size means that the SPE code will
+ * use a default size.
+ */
+ spe_init_func(&spe_code_front, 0);
+ spe_init_func(&spe_code_back, 0);
+
+ /* Generate new code. Always generate new code for both front-facing
+ * and back-facing fragments, even if it's the same code in both
+ * cases.
+ */
+ cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front);
+ cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back);
+
+ /* Make sure the code is a multiple of 8 bytes long; this is
+ * required to ensure that the dual pipe instruction alignment
+ * is correct. It's also important for the SPU unpacking,
+ * which assumes 8-byte boundaries.
+ */
+ unsigned int front_code_size = spe_code_size(&spe_code_front);
+ while (front_code_size % 8 != 0) {
+ spe_lnop(&spe_code_front);
+ front_code_size = spe_code_size(&spe_code_front);
+ }
+ unsigned int back_code_size = spe_code_size(&spe_code_back);
+ while (back_code_size % 8 != 0) {
+ spe_lnop(&spe_code_back);
+ back_code_size = spe_code_size(&spe_code_back);
+ }
+
+ /* Determine whether the code we generated is facing-dependent, by
+ * determining whether the generated code is different for the front-
+ * and back-facing fragments.
+ */
+ if (front_code_size == back_code_size && memcmp(spe_code_front.store, spe_code_back.store, front_code_size) == 0) {
+ /* Code is identical; only need one copy. */
+ facing_dependent = 0;
+ total_code_size = front_code_size;
+ }
+ else {
+ /* Code is different for front-facing and back-facing fragments.
+ * Need to send both copies.
+ */
+ facing_dependent = 1;
+ total_code_size = front_code_size + back_code_size;
+ }
+
+ /* alloc new fragment ops command. Note that this structure
+ * has variant length based on the total code size required.
+ */
+ ops = CALLOC_VARIANT_LENGTH_STRUCT(cell_command_fragment_ops, total_code_size);
+ /* populate the new cell_command_fragment_ops object */
+ ops->opcode[0] = CELL_CMD_STATE_FRAGMENT_OPS;
+ ops->total_code_size = total_code_size;
+ ops->front_code_index = 0;
+ memcpy(ops->code, spe_code_front.store, front_code_size);
+ if (facing_dependent) {
+ /* We have separate front- and back-facing code. Append the
+ * back-facing code to the buffer. Be careful because the code
+ * size is in bytes, but the buffer is of unsigned elements.
+ */
+ ops->back_code_index = front_code_size / sizeof(spe_code_front.store[0]);
+ memcpy(ops->code + ops->back_code_index, spe_code_back.store, back_code_size);
+ }
+ else {
+ /* Use the same code for front- and back-facing fragments */
+ ops->back_code_index = ops->front_code_index;
+ }
+
+ /* Set the fields for the fallback case. Note that these fields
+ * (and the whole fallback case) will eventually go away.
+ */
+ ops->dsa = *cell->depth_stencil;
+ ops->blend = *cell->blend;
+ ops->blend_color = cell->blend_color;
+
+ /* insert cell_command_fragment_ops object into keymap/cache */
+ util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL);
+
+ /* release rtasm buffer */
+ spe_release_func(&spe_code_front);
+ spe_release_func(&spe_code_back);
+ }
+ else {
+ if (0)
+ debug_printf("**** Re-use Fragment Ops\n");
+ }
+
+ return ops;
+}
+
+
+
+static void
+emit_state_cmd(struct cell_context *cell, uint cmd,
+ const void *state, uint state_size)
+{
+ uint32_t *dst = (uint32_t *)
+ cell_batch_alloc16(cell, ROUNDUP16(sizeof(opcode_t) + state_size));
+ *dst = cmd;
+ memcpy(dst + 4, state, state_size);
+}
+
+
+/**
+ * For state marked as 'dirty', construct a state-update command block
+ * and insert it into the current batch buffer.
+ */
+void
+cell_emit_state(struct cell_context *cell)
+{
+ if (cell->dirty & CELL_NEW_FRAMEBUFFER) {
+ struct pipe_surface *cbuf = cell->framebuffer.cbufs[0];
+ struct pipe_surface *zbuf = cell->framebuffer.zsbuf;
+ STATIC_ASSERT(sizeof(struct cell_command_framebuffer) % 16 == 0);
+ struct cell_command_framebuffer *fb
+ = cell_batch_alloc16(cell, sizeof(*fb));
+ fb->opcode[0] = CELL_CMD_STATE_FRAMEBUFFER;
+ fb->color_start = cell->cbuf_map[0];
+ fb->color_format = cbuf->format;
+ fb->depth_start = cell->zsbuf_map;
+ fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE;
+ fb->width = cell->framebuffer.width;
+ fb->height = cell->framebuffer.height;
+#if 0
+ printf("EMIT color format %s\n", pf_name(fb->color_format));
+ printf("EMIT depth format %s\n", pf_name(fb->depth_format));
+#endif
+ }
+
+ if (cell->dirty & (CELL_NEW_RASTERIZER)) {
+ STATIC_ASSERT(sizeof(struct cell_command_rasterizer) % 16 == 0);
+ struct cell_command_rasterizer *rast =
+ cell_batch_alloc16(cell, sizeof(*rast));
+ rast->opcode[0] = CELL_CMD_STATE_RASTERIZER;
+ rast->rasterizer = *cell->rasterizer;
+ }
+
+ if (cell->dirty & (CELL_NEW_FS)) {
+ /* Send new fragment program to SPUs */
+ STATIC_ASSERT(sizeof(struct cell_command_fragment_program) % 16 == 0);
+ struct cell_command_fragment_program *fp
+ = cell_batch_alloc16(cell, sizeof(*fp));
+ fp->opcode[0] = CELL_CMD_STATE_FRAGMENT_PROGRAM;
+ fp->num_inst = cell->fs->code.num_inst;
+ memcpy(&fp->code, cell->fs->code.store,
+ SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
+ if (0) {
+ int i;
+ printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n");
+ for (i = 0; i < fp->num_inst; i++) {
+ printf(" %3d: 0x%08x\n", i, fp->code[i]);
+ }
+ }
+ }
+
+ if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) {
+ const uint shader = PIPE_SHADER_FRAGMENT;
+ const uint num_const = cell->constants[shader].size / sizeof(float);
+ uint i, j;
+ float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float)));
+ uint32_t *ibuf = (uint32_t *) buf;
+ const float *constants = pipe_buffer_map(cell->pipe.screen,
+ cell->constants[shader].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS;
+ ibuf[4] = num_const;
+ j = 8;
+ for (i = 0; i < num_const; i++) {
+ buf[j++] = constants[i];
+ }
+ pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader].buffer);
+ }
+
+ if (cell->dirty & (CELL_NEW_FRAMEBUFFER |
+ CELL_NEW_DEPTH_STENCIL |
+ CELL_NEW_BLEND)) {
+ struct cell_command_fragment_ops *fops, *fops_cmd;
+ /* Note that cell_command_fragment_ops is a variant-sized record */
+ fops = lookup_fragment_ops(cell);
+ fops_cmd = cell_batch_alloc16(cell, ROUNDUP16(sizeof(*fops_cmd) + fops->total_code_size));
+ memcpy(fops_cmd, fops, sizeof(*fops) + fops->total_code_size);
+ }
+
+ if (cell->dirty & CELL_NEW_SAMPLER) {
+ uint i;
+ for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
+ if (cell->dirty_samplers & (1 << i)) {
+ if (cell->sampler[i]) {
+ STATIC_ASSERT(sizeof(struct cell_command_sampler) % 16 == 0);
+ struct cell_command_sampler *sampler
+ = cell_batch_alloc16(cell, sizeof(*sampler));
+ sampler->opcode[0] = CELL_CMD_STATE_SAMPLER;
+ sampler->unit = i;
+ sampler->state = *cell->sampler[i];
+ }
+ }
+ }
+ cell->dirty_samplers = 0x0;
+ }
+
+ if (cell->dirty & CELL_NEW_TEXTURE) {
+ uint i;
+ for (i = 0;i < CELL_MAX_SAMPLERS; i++) {
+ if (cell->dirty_textures & (1 << i)) {
+ STATIC_ASSERT(sizeof(struct cell_command_texture) % 16 == 0);
+ struct cell_command_texture *texture
+ = (struct cell_command_texture *)cell_batch_alloc16(cell, sizeof(*texture));
+ texture->opcode[0] = CELL_CMD_STATE_TEXTURE;
+ texture->unit = i;
+ if (cell->texture[i]) {
+ uint level;
+ for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
+ texture->start[level] = cell->texture[i]->tiled_mapped[level];
+ texture->width[level] = cell->texture[i]->base.width[level];
+ texture->height[level] = cell->texture[i]->base.height[level];
+ texture->depth[level] = cell->texture[i]->base.depth[level];
+ }
+ texture->target = cell->texture[i]->base.target;
+ }
+ else {
+ uint level;
+ for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
+ texture->start[level] = NULL;
+ texture->width[level] = 0;
+ texture->height[level] = 0;
+ texture->depth[level] = 0;
+ }
+ texture->target = 0;
+ }
+ }
+ }
+ cell->dirty_textures = 0x0;
+ }
+
+ if (cell->dirty & CELL_NEW_VERTEX_INFO) {
+ emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO,
+ &cell->vertex_info, sizeof(struct vertex_info));
+ }
+
+#if 0
+ if (cell->dirty & CELL_NEW_VS) {
+ const struct draw_context *const draw = cell->draw;
+ struct cell_shader_info info;
+
+ info.num_outputs = draw_num_vs_outputs(draw);
+ info.declarations = (uintptr_t) draw->vs.machine.Declarations;
+ info.num_declarations = draw->vs.machine.NumDeclarations;
+ info.instructions = (uintptr_t) draw->vs.machine.Instructions;
+ info.num_instructions = draw->vs.machine.NumInstructions;
+ info.immediates = (uintptr_t) draw->vs.machine.Imms;
+ info.num_immediates = draw->vs.machine.ImmLimit / 4;
+
+ emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info));
+ }
+#endif
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.h b/src/gallium/drivers/cell/ppu/cell_state_emit.h
new file mode 100644
index 0000000000..59f8affe8d
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.h
@@ -0,0 +1,36 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef CELL_STATE_EMIT_H
+#define CELL_STATE_EMIT_H
+
+
+extern void
+cell_emit_state(struct cell_context *cell);
+
+
+#endif /* CELL_STATE_EMIT_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
new file mode 100644
index 0000000000..78cb446c14
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
@@ -0,0 +1,1430 @@
+/*
+ * (C) Copyright IBM Corporation 2008
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file
+ * Generate code to perform all per-fragment operations.
+ *
+ * Code generated by these functions perform both alpha, depth, and stencil
+ * testing as well as alpha blending.
+ *
+ * \note
+ * Occlusion query is not supported, but this is the right place to add that
+ * support.
+ *
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "cell_context.h"
+
+#include "rtasm/rtasm_ppc_spe.h"
+
+
+/**
+ * Generate code to perform alpha testing.
+ *
+ * The code generated by this function uses the register specificed by
+ * \c mask as both an input and an output.
+ *
+ * \param dsa Current alpha-test state
+ * \param f Function to which code should be appended
+ * \param mask Index of register containing active fragment mask
+ * \param alphas Index of register containing per-fragment alpha values
+ *
+ * \note Emits a maximum of 6 instructions.
+ */
+static void
+emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa,
+ struct spe_function *f, int mask, int alphas)
+{
+ /* If the alpha function is either NEVER or ALWAYS, there is no need to
+ * load the reference value into a register. ALWAYS is a fairly common
+ * case, and this optimization saves 2 instructions.
+ */
+ if (dsa->alpha.enabled
+ && (dsa->alpha.func != PIPE_FUNC_NEVER)
+ && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
+ int ref = spe_allocate_available_register(f);
+ int tmp_a = spe_allocate_available_register(f);
+ int tmp_b = spe_allocate_available_register(f);
+ union {
+ float f;
+ unsigned u;
+ } ref_val;
+ boolean complement = FALSE;
+
+ ref_val.f = dsa->alpha.ref;
+
+ spe_il(f, ref, ref_val.u & 0x0000ffff);
+ spe_ilh(f, ref, ref_val.u >> 16);
+
+ switch (dsa->alpha.func) {
+ case PIPE_FUNC_NOTEQUAL:
+ complement = TRUE;
+ /* FALLTHROUGH */
+
+ case PIPE_FUNC_EQUAL:
+ spe_fceq(f, tmp_a, ref, alphas);
+ break;
+
+ case PIPE_FUNC_LEQUAL:
+ complement = TRUE;
+ /* FALLTHROUGH */
+
+ case PIPE_FUNC_GREATER:
+ spe_fcgt(f, tmp_a, ref, alphas);
+ break;
+
+ case PIPE_FUNC_LESS:
+ complement = TRUE;
+ /* FALLTHROUGH */
+
+ case PIPE_FUNC_GEQUAL:
+ spe_fcgt(f, tmp_a, ref, alphas);
+ spe_fceq(f, tmp_b, ref, alphas);
+ spe_or(f, tmp_a, tmp_b, tmp_a);
+ break;
+
+ case PIPE_FUNC_ALWAYS:
+ case PIPE_FUNC_NEVER:
+ default:
+ assert(0);
+ break;
+ }
+
+ if (complement) {
+ spe_andc(f, mask, mask, tmp_a);
+ } else {
+ spe_and(f, mask, mask, tmp_a);
+ }
+
+ spe_release_register(f, ref);
+ spe_release_register(f, tmp_a);
+ spe_release_register(f, tmp_b);
+ } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) {
+ spe_il(f, mask, 0);
+ }
+}
+
+
+/**
+ * Generate code to perform Z testing. Four Z values are tested at once.
+ * \param dsa Current depth-test state
+ * \param f Function to which code should be appended
+ * \param mask Index of register to contain depth-pass mask
+ * \param stored Index of register containing values from depth buffer
+ * \param calculated Index of register containing per-fragment depth values
+ *
+ * \return
+ * If the calculated depth comparison mask is the actual mask, \c FALSE is
+ * returned. If the calculated depth comparison mask is the compliment of
+ * the actual mask, \c TRUE is returned.
+ *
+ * \note Emits a maximum of 3 instructions.
+ */
+static boolean
+emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa,
+ struct spe_function *f, int mask, int stored, int calculated)
+{
+ unsigned func = (dsa->depth.enabled)
+ ? dsa->depth.func : PIPE_FUNC_ALWAYS;
+ int tmp = spe_allocate_available_register(f);
+ boolean compliment = FALSE;
+
+ switch (func) {
+ case PIPE_FUNC_NEVER:
+ spe_il(f, mask, 0);
+ break;
+
+ case PIPE_FUNC_NOTEQUAL:
+ compliment = TRUE;
+ /* FALLTHROUGH */
+ case PIPE_FUNC_EQUAL:
+ spe_ceq(f, mask, calculated, stored);
+ break;
+
+ case PIPE_FUNC_LEQUAL:
+ compliment = TRUE;
+ /* FALLTHROUGH */
+ case PIPE_FUNC_GREATER:
+ spe_clgt(f, mask, calculated, stored);
+ break;
+
+ case PIPE_FUNC_LESS:
+ compliment = TRUE;
+ /* FALLTHROUGH */
+ case PIPE_FUNC_GEQUAL:
+ spe_clgt(f, mask, calculated, stored);
+ spe_ceq(f, tmp, calculated, stored);
+ spe_or(f, mask, mask, tmp);
+ break;
+
+ case PIPE_FUNC_ALWAYS:
+ spe_il(f, mask, ~0);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ spe_release_register(f, tmp);
+ return compliment;
+}
+
+
+/**
+ * Generate code to apply the stencil operation (after testing).
+ * \note Emits a maximum of 5 instructions.
+ *
+ * \warning
+ * Since \c out and \c in might be the same register, this routine cannot
+ * generate code that uses \c out as a temporary.
+ */
+static void
+emit_stencil_op(struct spe_function *f,
+ int out, int in, int mask, unsigned op, unsigned ref)
+{
+ const int clamp = spe_allocate_available_register(f);
+ const int clamp_mask = spe_allocate_available_register(f);
+ const int result = spe_allocate_available_register(f);
+
+ switch(op) {
+ case PIPE_STENCIL_OP_KEEP:
+ assert(0);
+ case PIPE_STENCIL_OP_ZERO:
+ spe_il(f, result, 0);
+ break;
+ case PIPE_STENCIL_OP_REPLACE:
+ spe_il(f, result, ref);
+ break;
+ case PIPE_STENCIL_OP_INCR:
+ /* clamp = [0xff, 0xff, 0xff, 0xff] */
+ spe_il(f, clamp, 0x0ff);
+ /* result[i] = in[i] + 1 */
+ spe_ai(f, result, in, 1);
+ /* clamp_mask[i] = (result[i] > 0xff) */
+ spe_clgti(f, clamp_mask, result, 0x0ff);
+ /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */
+ spe_selb(f, result, result, clamp, clamp_mask);
+ break;
+ case PIPE_STENCIL_OP_DECR:
+ spe_il(f, clamp, 0);
+ spe_ai(f, result, in, -1);
+
+ /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned
+ * arithmetic.
+ */
+ spe_clgti(f, clamp_mask, result, 0x0ff);
+ spe_selb(f, result, result, clamp, clamp_mask);
+ break;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ spe_ai(f, result, in, 1);
+ break;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ spe_ai(f, result, in, -1);
+ break;
+ case PIPE_STENCIL_OP_INVERT:
+ spe_nor(f, result, in, in);
+ break;
+ default:
+ assert(0);
+ }
+
+ spe_selb(f, out, in, result, mask);
+
+ spe_release_register(f, result);
+ spe_release_register(f, clamp_mask);
+ spe_release_register(f, clamp);
+}
+
+
+/**
+ * Generate code to do stencil test. Four pixels are tested at once.
+ * \param dsa Depth / stencil test state
+ * \param face 0 for front face, 1 for back face
+ * \param f Function to append instructions to
+ * \param mask Register containing mask of fragments passing the
+ * alpha test
+ * \param depth_mask Register containing mask of fragments passing the
+ * depth test
+ * \param depth_compliment Is \c depth_mask the compliment of the actual mask?
+ * \param stencil Register containing values from stencil buffer
+ * \param depth_pass Register to store mask of fragments passing stencil test
+ * and depth test
+ *
+ * \note
+ * Emits a maximum of 10 + (3 * 5) = 25 instructions.
+ */
+static int
+emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
+ unsigned face,
+ struct spe_function *f,
+ int mask,
+ int depth_mask,
+ boolean depth_complement,
+ int stencil,
+ int depth_pass)
+{
+ int stencil_fail = spe_allocate_available_register(f);
+ int depth_fail = spe_allocate_available_register(f);
+ int stencil_mask = spe_allocate_available_register(f);
+ int stencil_pass = spe_allocate_available_register(f);
+ int face_stencil = spe_allocate_available_register(f);
+ int stencil_src = stencil;
+ const unsigned ref = (dsa->stencil[face].ref_value
+ & dsa->stencil[face].value_mask);
+ boolean complement = FALSE;
+ int stored;
+ int tmp = spe_allocate_available_register(f);
+
+
+ if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
+ && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
+ && (dsa->stencil[face].value_mask != 0x0ff)) {
+ stored = spe_allocate_available_register(f);
+ spe_andi(f, stored, stencil, dsa->stencil[face].value_mask);
+ } else {
+ stored = stencil;
+ }
+
+
+ switch (dsa->stencil[face].func) {
+ case PIPE_FUNC_NEVER:
+ spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */
+ break;
+
+ case PIPE_FUNC_NOTEQUAL:
+ complement = TRUE;
+ /* FALLTHROUGH */
+ case PIPE_FUNC_EQUAL:
+ /* stencil_mask[i] = (stored[i] == ref) */
+ spe_ceqi(f, stencil_mask, stored, ref);
+ break;
+
+ case PIPE_FUNC_LEQUAL:
+ complement = TRUE;
+ /* FALLTHROUGH */
+ case PIPE_FUNC_GREATER:
+ complement = TRUE;
+ /* stencil_mask[i] = (stored[i] > ref) */
+ spe_clgti(f, stencil_mask, stored, ref);
+ break;
+
+ case PIPE_FUNC_LESS:
+ complement = TRUE;
+ /* FALLTHROUGH */
+ case PIPE_FUNC_GEQUAL:
+ /* stencil_mask[i] = (stored[i] > ref) */
+ spe_clgti(f, stencil_mask, stored, ref);
+ /* tmp[i] = (stored[i] == ref) */
+ spe_ceqi(f, tmp, stored, ref);
+ /* stencil_mask[i] = stencil_mask[i] | tmp[i] */
+ spe_or(f, stencil_mask, stencil_mask, tmp);
+ break;
+
+ case PIPE_FUNC_ALWAYS:
+ /* See comment below. */
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ if (stored != stencil) {
+ spe_release_register(f, stored);
+ }
+ spe_release_register(f, tmp);
+
+
+ /* ALWAYS is a very common stencil-test, so some effort is applied to
+ * optimize that case. The stencil-pass mask is the same as the input
+ * fragment mask. This makes the stencil-test (above) a no-op, and the
+ * input fragment mask can be "renamed" the stencil-pass mask.
+ */
+ if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) {
+ spe_release_register(f, stencil_pass);
+ stencil_pass = mask;
+ } else {
+ if (complement) {
+ spe_andc(f, stencil_pass, mask, stencil_mask);
+ } else {
+ spe_and(f, stencil_pass, mask, stencil_mask);
+ }
+ }
+
+ if (depth_complement) {
+ spe_andc(f, depth_pass, stencil_pass, depth_mask);
+ } else {
+ spe_and(f, depth_pass, stencil_pass, depth_mask);
+ }
+
+
+ /* Conditionally emit code to update the stencil value under various
+ * condititons. Note that there is no need to generate code under the
+ * following circumstances:
+ *
+ * - Stencil write mask is zero.
+ * - For stencil-fail if the stencil test is ALWAYS
+ * - For depth-fail if the stencil test is NEVER
+ * - For depth-pass if the stencil test is NEVER
+ * - Any of the 3 conditions if the operation is KEEP
+ */
+ if (dsa->stencil[face].write_mask != 0) {
+ if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
+ && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) {
+ if (complement) {
+ spe_and(f, stencil_fail, mask, stencil_mask);
+ } else {
+ spe_andc(f, stencil_fail, mask, stencil_mask);
+ }
+
+ emit_stencil_op(f, face_stencil, stencil_src, stencil_fail,
+ dsa->stencil[face].fail_op,
+ dsa->stencil[face].ref_value);
+
+ stencil_src = face_stencil;
+ }
+
+ if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
+ && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) {
+ if (depth_complement) {
+ spe_and(f, depth_fail, stencil_pass, depth_mask);
+ } else {
+ spe_andc(f, depth_fail, stencil_pass, depth_mask);
+ }
+
+ emit_stencil_op(f, face_stencil, stencil_src, depth_fail,
+ dsa->stencil[face].zfail_op,
+ dsa->stencil[face].ref_value);
+ stencil_src = face_stencil;
+ }
+
+ if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
+ && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) {
+ emit_stencil_op(f, face_stencil, stencil_src, depth_pass,
+ dsa->stencil[face].zpass_op,
+ dsa->stencil[face].ref_value);
+ stencil_src = face_stencil;
+ }
+ }
+
+ spe_release_register(f, stencil_fail);
+ spe_release_register(f, depth_fail);
+ spe_release_register(f, stencil_mask);
+ if (stencil_pass != mask) {
+ spe_release_register(f, stencil_pass);
+ }
+
+ /* If all of the stencil operations were KEEP or the stencil write mask was
+ * zero, "stencil_src" will still be set to "stencil". In this case
+ * release the "face_stencil" register. Otherwise apply the stencil write
+ * mask to select bits from the calculated stencil value and the previous
+ * stencil value.
+ */
+ if (stencil_src == stencil) {
+ spe_release_register(f, face_stencil);
+ } else if (dsa->stencil[face].write_mask != 0x0ff) {
+ int tmp = spe_allocate_available_register(f);
+
+ spe_il(f, tmp, dsa->stencil[face].write_mask);
+ spe_selb(f, stencil_src, stencil, stencil_src, tmp);
+
+ spe_release_register(f, tmp);
+ }
+
+ return stencil_src;
+}
+
+
+void
+cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
+{
+ struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base;
+ struct spe_function *const f = &cdsa->code;
+
+ /* This code generates a maximum of 6 (alpha test) + 3 (depth test)
+ * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
+ * up to 64 to make it a happy power-of-two.
+ */
+ spe_init_func(f, SPE_INST_SIZE * 64);
+
+
+ /* Allocate registers for the function's input parameters. Cleverly (and
+ * clever code is usually dangerous, but I couldn't resist) the generated
+ * function returns a structure. Returned structures start with register
+ * 3, and the structure fields are ordered to match up exactly with the
+ * input parameters.
+ */
+ int mask = spe_allocate_register(f, 3);
+ int depth = spe_allocate_register(f, 4);
+ int stencil = spe_allocate_register(f, 5);
+ int zvals = spe_allocate_register(f, 6);
+ int frag_a = spe_allocate_register(f, 7);
+ int facing = spe_allocate_register(f, 8);
+
+ int depth_mask = spe_allocate_available_register(f);
+
+ boolean depth_complement;
+
+
+ emit_alpha_test(dsa, f, mask, frag_a);
+
+ depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals);
+
+ if (dsa->stencil[0].enabled) {
+ const int front_depth_pass = spe_allocate_available_register(f);
+ int front_stencil = emit_stencil_test(dsa, 0, f, mask,
+ depth_mask, depth_complement,
+ stencil, front_depth_pass);
+
+ if (dsa->stencil[1].enabled) {
+ const int back_depth_pass = spe_allocate_available_register(f);
+ int back_stencil = emit_stencil_test(dsa, 1, f, mask,
+ depth_mask, depth_complement,
+ stencil, back_depth_pass);
+
+ /* If the front facing stencil value and the back facing stencil
+ * value are stored in the same register, there is no need to select
+ * a value based on the facing. This can happen if the stencil value
+ * was not modified due to the write masks being zero, the stencil
+ * operations being KEEP, etc.
+ */
+ if (front_stencil != back_stencil) {
+ spe_selb(f, stencil, back_stencil, front_stencil, facing);
+ }
+
+ if (back_stencil != stencil) {
+ spe_release_register(f, back_stencil);
+ }
+
+ if (front_stencil != stencil) {
+ spe_release_register(f, front_stencil);
+ }
+
+ spe_selb(f, mask, back_depth_pass, front_depth_pass, facing);
+
+ spe_release_register(f, back_depth_pass);
+ } else {
+ if (front_stencil != stencil) {
+ spe_or(f, stencil, front_stencil, front_stencil);
+ spe_release_register(f, front_stencil);
+ }
+ spe_or(f, mask, front_depth_pass, front_depth_pass);
+ }
+
+ spe_release_register(f, front_depth_pass);
+ } else if (dsa->depth.enabled) {
+ if (depth_complement) {
+ spe_andc(f, mask, mask, depth_mask);
+ } else {
+ spe_and(f, mask, mask, depth_mask);
+ }
+ }
+
+ if (dsa->depth.writemask) {
+ spe_selb(f, depth, depth, zvals, mask);
+ }
+
+ spe_bi(f, 0, 0, 0); /* return from function call */
+
+
+#if 0
+ {
+ const uint32_t *p = f->store;
+ unsigned i;
+
+ printf("# alpha (%sabled)\n",
+ (dsa->alpha.enabled) ? "en" : "dis");
+ printf("# func: %u\n", dsa->alpha.func);
+ printf("# ref: %.2f\n", dsa->alpha.ref);
+
+ printf("# depth (%sabled)\n",
+ (dsa->depth.enabled) ? "en" : "dis");
+ printf("# func: %u\n", dsa->depth.func);
+
+ for (i = 0; i < 2; i++) {
+ printf("# %s stencil (%sabled)\n",
+ (i == 0) ? "front" : "back",
+ (dsa->stencil[i].enabled) ? "en" : "dis");
+
+ printf("# func: %u\n", dsa->stencil[i].func);
+ printf("# op (sf, zf, zp): %u %u %u\n",
+ dsa->stencil[i].fail_op,
+ dsa->stencil[i].zfail_op,
+ dsa->stencil[i].zpass_op);
+ printf("# ref value / value mask / write mask: %02x %02x %02x\n",
+ dsa->stencil[i].ref_value,
+ dsa->stencil[i].value_mask,
+ dsa->stencil[i].write_mask);
+ }
+
+ printf("\t.text\n");
+ for (/* empty */; p < f->csr; p++) {
+ printf("\t.long\t0x%04x\n", *p);
+ }
+ fflush(stdout);
+ }
+#endif
+}
+
+
+/**
+ * \note Emits a maximum of 3 instructions
+ */
+static int
+emit_alpha_factor_calculation(struct spe_function *f,
+ unsigned factor,
+ int src_alpha, int dst_alpha, int const_alpha)
+{
+ int factor_reg;
+ int tmp;
+
+
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ factor_reg = -1;
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ factor_reg = spe_allocate_available_register(f);
+
+ spe_or(f, factor_reg, src_alpha, src_alpha);
+ break;
+
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ factor_reg = dst_alpha;
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ factor_reg = -1;
+ break;
+
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ factor_reg = spe_allocate_available_register(f);
+
+ tmp = spe_allocate_available_register(f);
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+ spe_fs(f, factor_reg, tmp, const_alpha);
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ factor_reg = const_alpha;
+ break;
+
+ case PIPE_BLENDFACTOR_ZERO:
+ factor_reg = -1;
+ break;
+
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ tmp = spe_allocate_available_register(f);
+ factor_reg = spe_allocate_available_register(f);
+
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+ spe_fs(f, factor_reg, tmp, src_alpha);
+
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ tmp = spe_allocate_available_register(f);
+ factor_reg = spe_allocate_available_register(f);
+
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+ spe_fs(f, factor_reg, tmp, dst_alpha);
+
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ default:
+ assert(0);
+ factor_reg = -1;
+ break;
+ }
+
+ return factor_reg;
+}
+
+
+/**
+ * \note Emits a maximum of 6 instructions
+ */
+static void
+emit_color_factor_calculation(struct spe_function *f,
+ unsigned sF, unsigned mask,
+ const int *src,
+ const int *dst,
+ const int *const_color,
+ int *factor)
+{
+ int tmp;
+ unsigned i;
+
+
+ factor[0] = -1;
+ factor[1] = -1;
+ factor[2] = -1;
+ factor[3] = -1;
+
+ switch (sF) {
+ case PIPE_BLENDFACTOR_ONE:
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ for (i = 0; i < 3; ++i) {
+ if ((mask & (1U << i)) != 0) {
+ factor[i] = spe_allocate_available_register(f);
+ spe_or(f, factor[i], src[i], src[i]);
+ }
+ }
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ factor[0] = spe_allocate_available_register(f);
+ factor[1] = factor[0];
+ factor[2] = factor[0];
+
+ spe_or(f, factor[0], src[3], src[3]);
+ break;
+
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ factor[0] = dst[3];
+ factor[1] = dst[3];
+ factor[2] = dst[3];
+ break;
+
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ factor[0] = dst[0];
+ factor[1] = dst[1];
+ factor[2] = dst[2];
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ tmp = spe_allocate_available_register(f);
+ factor[0] = spe_allocate_available_register(f);
+ factor[1] = factor[0];
+ factor[2] = factor[0];
+
+ /* Alpha saturate means min(As, 1-Ad).
+ */
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+ spe_fs(f, tmp, tmp, dst[3]);
+ spe_fcgt(f, factor[0], tmp, src[3]);
+ spe_selb(f, factor[0], src[3], tmp, factor[0]);
+
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ tmp = spe_allocate_available_register(f);
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+
+ for (i = 0; i < 3; i++) {
+ factor[i] = spe_allocate_available_register(f);
+
+ spe_fs(f, factor[i], tmp, const_color[i]);
+ }
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ for (i = 0; i < 3; i++) {
+ factor[i] = const_color[i];
+ }
+ break;
+
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ factor[0] = spe_allocate_available_register(f);
+ factor[1] = factor[0];
+ factor[2] = factor[0];
+
+ tmp = spe_allocate_available_register(f);
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+ spe_fs(f, factor[0], tmp, const_color[3]);
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ factor[0] = const_color[3];
+ factor[1] = factor[0];
+ factor[2] = factor[0];
+ break;
+
+ case PIPE_BLENDFACTOR_ZERO:
+ break;
+
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ tmp = spe_allocate_available_register(f);
+
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+
+ for (i = 0; i < 3; ++i) {
+ if ((mask & (1U << i)) != 0) {
+ factor[i] = spe_allocate_available_register(f);
+ spe_fs(f, factor[i], tmp, src[i]);
+ }
+ }
+
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ tmp = spe_allocate_available_register(f);
+ factor[0] = spe_allocate_available_register(f);
+ factor[1] = factor[0];
+ factor[2] = factor[0];
+
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+ spe_fs(f, factor[0], tmp, src[3]);
+
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ tmp = spe_allocate_available_register(f);
+ factor[0] = spe_allocate_available_register(f);
+ factor[1] = factor[0];
+ factor[2] = factor[0];
+
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+ spe_fs(f, factor[0], tmp, dst[3]);
+
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ tmp = spe_allocate_available_register(f);
+
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+
+ for (i = 0; i < 3; ++i) {
+ if ((mask & (1U << i)) != 0) {
+ factor[i] = spe_allocate_available_register(f);
+ spe_fs(f, factor[i], tmp, dst[i]);
+ }
+ }
+
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ default:
+ assert(0);
+ }
+}
+
+
+static void
+emit_blend_calculation(struct spe_function *f,
+ unsigned func, unsigned sF, unsigned dF,
+ int src, int src_factor, int dst, int dst_factor)
+{
+ int tmp = spe_allocate_available_register(f);
+
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ if (sF == PIPE_BLENDFACTOR_ONE) {
+ if (dF == PIPE_BLENDFACTOR_ZERO) {
+ /* Do nothing. */
+ } else if (dF == PIPE_BLENDFACTOR_ONE) {
+ spe_fa(f, src, src, dst);
+ }
+ } else if (sF == PIPE_BLENDFACTOR_ZERO) {
+ if (dF == PIPE_BLENDFACTOR_ZERO) {
+ spe_il(f, src, 0);
+ } else if (dF == PIPE_BLENDFACTOR_ONE) {
+ spe_or(f, src, dst, dst);
+ } else {
+ spe_fm(f, src, dst, dst_factor);
+ }
+ } else if (dF == PIPE_BLENDFACTOR_ZERO) {
+ spe_fm(f, src, src, src_factor);
+ } else {
+ spe_fm(f, tmp, dst, dst_factor);
+ spe_fma(f, src, src, src_factor, tmp);
+ }
+ break;
+
+ case PIPE_BLEND_SUBTRACT:
+ if (sF == PIPE_BLENDFACTOR_ONE) {
+ if (dF == PIPE_BLENDFACTOR_ZERO) {
+ /* Do nothing. */
+ } else if (dF == PIPE_BLENDFACTOR_ONE) {
+ spe_fs(f, src, src, dst);
+ }
+ } else if (sF == PIPE_BLENDFACTOR_ZERO) {
+ if (dF == PIPE_BLENDFACTOR_ZERO) {
+ spe_il(f, src, 0);
+ } else if (dF == PIPE_BLENDFACTOR_ONE) {
+ spe_il(f, tmp, 0);
+ spe_fs(f, src, tmp, dst);
+ } else {
+ spe_fm(f, src, dst, dst_factor);
+ }
+ } else if (dF == PIPE_BLENDFACTOR_ZERO) {
+ spe_fm(f, src, src, src_factor);
+ } else {
+ spe_fm(f, tmp, dst, dst_factor);
+ spe_fms(f, src, src, src_factor, tmp);
+ }
+ break;
+
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ if (sF == PIPE_BLENDFACTOR_ONE) {
+ if (dF == PIPE_BLENDFACTOR_ZERO) {
+ spe_il(f, tmp, 0);
+ spe_fs(f, src, tmp, src);
+ } else if (dF == PIPE_BLENDFACTOR_ONE) {
+ spe_fs(f, src, dst, src);
+ }
+ } else if (sF == PIPE_BLENDFACTOR_ZERO) {
+ if (dF == PIPE_BLENDFACTOR_ZERO) {
+ spe_il(f, src, 0);
+ } else if (dF == PIPE_BLENDFACTOR_ONE) {
+ spe_or(f, src, dst, dst);
+ } else {
+ spe_fm(f, src, dst, dst_factor);
+ }
+ } else if (dF == PIPE_BLENDFACTOR_ZERO) {
+ spe_fm(f, src, src, src_factor);
+ } else {
+ spe_fm(f, tmp, src, src_factor);
+ spe_fms(f, src, src, dst_factor, tmp);
+ }
+ break;
+
+ case PIPE_BLEND_MIN:
+ spe_cgt(f, tmp, src, dst);
+ spe_selb(f, src, src, dst, tmp);
+ break;
+
+ case PIPE_BLEND_MAX:
+ spe_cgt(f, tmp, src, dst);
+ spe_selb(f, src, dst, src, tmp);
+ break;
+
+ default:
+ assert(0);
+ }
+
+ spe_release_register(f, tmp);
+}
+
+
+/**
+ * Generate code to perform alpha blending on the SPE
+ */
+void
+cell_generate_alpha_blend(struct cell_blend_state *cb)
+{
+ struct pipe_blend_state *const b = &cb->base;
+ struct spe_function *const f = &cb->code;
+
+ /* This code generates a maximum of 3 (source alpha factor)
+ * + 3 (destination alpha factor) + (3 * 6) (source color factor)
+ * + (3 * 6) (destination color factor) + (4 * 2) (blend equation)
+ * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to
+ * make it a happy power-of-two.
+ */
+ spe_init_func(f, SPE_INST_SIZE * 64);
+
+
+ const int frag[4] = {
+ spe_allocate_register(f, 3),
+ spe_allocate_register(f, 4),
+ spe_allocate_register(f, 5),
+ spe_allocate_register(f, 6),
+ };
+ const int pixel[4] = {
+ spe_allocate_register(f, 7),
+ spe_allocate_register(f, 8),
+ spe_allocate_register(f, 9),
+ spe_allocate_register(f, 10),
+ };
+ const int const_color[4] = {
+ spe_allocate_register(f, 11),
+ spe_allocate_register(f, 12),
+ spe_allocate_register(f, 13),
+ spe_allocate_register(f, 14),
+ };
+ unsigned func[4];
+ unsigned sF[4];
+ unsigned dF[4];
+ unsigned i;
+ int src_factor[4];
+ int dst_factor[4];
+
+
+ /* Does the selected blend mode make use of the source / destination
+ * color (RGB) blend factors?
+ */
+ boolean need_color_factor = b->blend_enable
+ && (b->rgb_func != PIPE_BLEND_MIN)
+ && (b->rgb_func != PIPE_BLEND_MAX);
+
+ /* Does the selected blend mode make use of the source / destination
+ * alpha blend factors?
+ */
+ boolean need_alpha_factor = b->blend_enable
+ && (b->alpha_func != PIPE_BLEND_MIN)
+ && (b->alpha_func != PIPE_BLEND_MAX);
+
+
+ if (b->blend_enable) {
+ sF[0] = b->rgb_src_factor;
+ sF[1] = sF[0];
+ sF[2] = sF[0];
+ switch (b->alpha_src_factor & 0x0f) {
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ sF[3] = PIPE_BLENDFACTOR_ONE;
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ sF[3] = b->alpha_src_factor + 1;
+ break;
+ default:
+ sF[3] = b->alpha_src_factor;
+ }
+
+ dF[0] = b->rgb_dst_factor;
+ dF[1] = dF[0];
+ dF[2] = dF[0];
+ switch (b->alpha_dst_factor & 0x0f) {
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ dF[3] = b->alpha_dst_factor + 1;
+ break;
+ default:
+ dF[3] = b->alpha_dst_factor;
+ }
+
+ func[0] = b->rgb_func;
+ func[1] = func[0];
+ func[2] = func[0];
+ func[3] = b->alpha_func;
+ } else {
+ sF[0] = PIPE_BLENDFACTOR_ONE;
+ sF[1] = PIPE_BLENDFACTOR_ONE;
+ sF[2] = PIPE_BLENDFACTOR_ONE;
+ sF[3] = PIPE_BLENDFACTOR_ONE;
+ dF[0] = PIPE_BLENDFACTOR_ZERO;
+ dF[1] = PIPE_BLENDFACTOR_ZERO;
+ dF[2] = PIPE_BLENDFACTOR_ZERO;
+ dF[3] = PIPE_BLENDFACTOR_ZERO;
+
+ func[0] = PIPE_BLEND_ADD;
+ func[1] = PIPE_BLEND_ADD;
+ func[2] = PIPE_BLEND_ADD;
+ func[3] = PIPE_BLEND_ADD;
+ }
+
+
+ /* If alpha writing is enabled and the alpha blend mode requires use of
+ * the alpha factor, calculate the alpha factor.
+ */
+ if (((b->colormask & 8) != 0) && need_alpha_factor) {
+ src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3],
+ frag[3], pixel[3]);
+
+ /* If the alpha destination blend factor is the same as the alpha source
+ * blend factor, re-use the previously calculated value.
+ */
+ dst_factor[3] = (dF[3] == sF[3])
+ ? src_factor[3]
+ : emit_alpha_factor_calculation(f, dF[3], const_color[3],
+ frag[3], pixel[3]);
+ }
+
+
+ if (sF[0] == sF[3]) {
+ src_factor[0] = src_factor[3];
+ src_factor[1] = src_factor[3];
+ src_factor[2] = src_factor[3];
+ } else if (sF[0] == dF[3]) {
+ src_factor[0] = dst_factor[3];
+ src_factor[1] = dst_factor[3];
+ src_factor[2] = dst_factor[3];
+ } else if (need_color_factor) {
+ emit_color_factor_calculation(f,
+ b->rgb_src_factor,
+ b->colormask,
+ frag, pixel, const_color, src_factor);
+ }
+
+
+ if (dF[0] == sF[3]) {
+ dst_factor[0] = src_factor[3];
+ dst_factor[1] = src_factor[3];
+ dst_factor[2] = src_factor[3];
+ } else if (dF[0] == dF[3]) {
+ dst_factor[0] = dst_factor[3];
+ dst_factor[1] = dst_factor[3];
+ dst_factor[2] = dst_factor[3];
+ } else if (dF[0] == sF[0]) {
+ dst_factor[0] = src_factor[0];
+ dst_factor[1] = src_factor[1];
+ dst_factor[2] = src_factor[2];
+ } else if (need_color_factor) {
+ emit_color_factor_calculation(f,
+ b->rgb_dst_factor,
+ b->colormask,
+ frag, pixel, const_color, dst_factor);
+ }
+
+
+
+ for (i = 0; i < 4; ++i) {
+ if ((b->colormask & (1U << i)) != 0) {
+ emit_blend_calculation(f,
+ func[i], sF[i], dF[i],
+ frag[i], src_factor[i],
+ pixel[i], dst_factor[i]);
+ }
+ }
+
+ spe_bi(f, 0, 0, 0);
+
+#if 0
+ {
+ const uint32_t *p = f->store;
+
+ printf("# %u instructions\n", f->csr - f->store);
+ printf("# blend (%sabled)\n",
+ (cb->base.blend_enable) ? "en" : "dis");
+ printf("# RGB func / sf / df: %u %u %u\n",
+ cb->base.rgb_func,
+ cb->base.rgb_src_factor,
+ cb->base.rgb_dst_factor);
+ printf("# ALP func / sf / df: %u %u %u\n",
+ cb->base.alpha_func,
+ cb->base.alpha_src_factor,
+ cb->base.alpha_dst_factor);
+
+ printf("\t.text\n");
+ for (/* empty */; p < f->csr; p++) {
+ printf("\t.long\t0x%04x\n", *p);
+ }
+ fflush(stdout);
+ }
+#endif
+}
+
+
+static int
+PC_OFFSET(const struct spe_function *f, const void *d)
+{
+ const intptr_t pc = (intptr_t) &f->store[f->num_inst];
+ const intptr_t ea = ~0x0f & (intptr_t) d;
+
+ return (ea - pc) >> 2;
+}
+
+
+/**
+ * Generate code to perform color conversion and logic op
+ *
+ * \bug
+ * The code generated by this function should also perform dithering.
+ *
+ * \bug
+ * The code generated by this function should also perform color-write
+ * masking.
+ *
+ * \bug
+ * Only two framebuffer formats are supported at this time.
+ */
+void
+cell_generate_logic_op(struct spe_function *f,
+ const struct pipe_blend_state *blend,
+ struct pipe_surface *surf)
+{
+ const unsigned logic_op = (blend->logicop_enable)
+ ? blend->logicop_func : PIPE_LOGICOP_COPY;
+
+ /* This code generates a maximum of 37 instructions. An additional 32
+ * bytes (equiv. to 8 instructions) are needed for data storage. Round up
+ * to 64 to make it a happy power-of-two.
+ */
+ spe_init_func(f, SPE_INST_SIZE * 64);
+
+
+ /* Pixel colors in framebuffer format in AoS layout.
+ */
+ const int pixel[4] = {
+ spe_allocate_register(f, 3),
+ spe_allocate_register(f, 4),
+ spe_allocate_register(f, 5),
+ spe_allocate_register(f, 6),
+ };
+
+ /* Fragment colors stored as floats in SoA layout.
+ */
+ const int frag[4] = {
+ spe_allocate_register(f, 7),
+ spe_allocate_register(f, 8),
+ spe_allocate_register(f, 9),
+ spe_allocate_register(f, 10),
+ };
+
+ const int mask = spe_allocate_register(f, 11);
+
+
+ /* Short-circuit the noop and invert cases.
+ */
+ if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->colormask == 0)) {
+ spe_bi(f, 0, 0, 0);
+ return;
+ } else if (logic_op == PIPE_LOGICOP_INVERT) {
+ spe_nor(f, pixel[0], pixel[0], pixel[0]);
+ spe_nor(f, pixel[1], pixel[1], pixel[1]);
+ spe_nor(f, pixel[2], pixel[2], pixel[2]);
+ spe_nor(f, pixel[3], pixel[3], pixel[3]);
+ spe_bi(f, 0, 0, 0);
+ return;
+ }
+
+
+ const int tmp[4] = {
+ spe_allocate_available_register(f),
+ spe_allocate_available_register(f),
+ spe_allocate_available_register(f),
+ spe_allocate_available_register(f),
+ };
+
+ const int shuf_xpose_hi = spe_allocate_available_register(f);
+ const int shuf_xpose_lo = spe_allocate_available_register(f);
+ const int shuf_color = spe_allocate_available_register(f);
+
+
+ /* Pointer to the begining of the function's private data area.
+ */
+ uint32_t *const data = ((uint32_t *) f->store) + (64 - 8);
+
+
+ /* Convert fragment colors to framebuffer format in AoS layout.
+ */
+ switch (surf->format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ data[0] = 0x00010203;
+ data[1] = 0x10111213;
+ data[2] = 0x04050607;
+ data[3] = 0x14151617;
+ data[4] = 0x0c000408;
+ data[5] = 0x80808080;
+ data[6] = 0x80808080;
+ data[7] = 0x80808080;
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ data[0] = 0x03020100;
+ data[1] = 0x13121110;
+ data[2] = 0x07060504;
+ data[3] = 0x17161514;
+ data[4] = 0x0804000c;
+ data[5] = 0x80808080;
+ data[6] = 0x80808080;
+ data[7] = 0x80808080;
+ break;
+ default:
+ fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()");
+ ASSERT(0);
+ }
+
+ spe_ilh(f, tmp[0], 0x0808);
+ spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0));
+ spe_lqr(f, shuf_color, PC_OFFSET(f, data+4));
+ spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]);
+
+ spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi);
+ spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo);
+ spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi);
+ spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo);
+
+ spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi);
+ spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo);
+ spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi);
+ spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo);
+
+ spe_cfltu(f, frag[0], frag[0], 32);
+ spe_cfltu(f, frag[1], frag[1], 32);
+ spe_cfltu(f, frag[2], frag[2], 32);
+ spe_cfltu(f, frag[3], frag[3], 32);
+
+ spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color);
+ spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color);
+ spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color);
+ spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color);
+
+
+ /* If logic op is enabled, perform the requested logical operation on the
+ * converted fragment colors and the pixel colors.
+ */
+ switch (logic_op) {
+ case PIPE_LOGICOP_CLEAR:
+ spe_il(f, frag[0], 0);
+ spe_il(f, frag[1], 0);
+ spe_il(f, frag[2], 0);
+ spe_il(f, frag[3], 0);
+ break;
+ case PIPE_LOGICOP_NOR:
+ spe_nor(f, frag[0], frag[0], pixel[0]);
+ spe_nor(f, frag[1], frag[1], pixel[1]);
+ spe_nor(f, frag[2], frag[2], pixel[2]);
+ spe_nor(f, frag[3], frag[3], pixel[3]);
+ break;
+ case PIPE_LOGICOP_AND_INVERTED:
+ spe_andc(f, frag[0], pixel[0], frag[0]);
+ spe_andc(f, frag[1], pixel[1], frag[1]);
+ spe_andc(f, frag[2], pixel[2], frag[2]);
+ spe_andc(f, frag[3], pixel[3], frag[3]);
+ break;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ spe_nor(f, frag[0], frag[0], frag[0]);
+ spe_nor(f, frag[1], frag[1], frag[1]);
+ spe_nor(f, frag[2], frag[2], frag[2]);
+ spe_nor(f, frag[3], frag[3], frag[3]);
+ break;
+ case PIPE_LOGICOP_AND_REVERSE:
+ spe_andc(f, frag[0], frag[0], pixel[0]);
+ spe_andc(f, frag[1], frag[1], pixel[1]);
+ spe_andc(f, frag[2], frag[2], pixel[2]);
+ spe_andc(f, frag[3], frag[3], pixel[3]);
+ break;
+ case PIPE_LOGICOP_XOR:
+ spe_xor(f, frag[0], frag[0], pixel[0]);
+ spe_xor(f, frag[1], frag[1], pixel[1]);
+ spe_xor(f, frag[2], frag[2], pixel[2]);
+ spe_xor(f, frag[3], frag[3], pixel[3]);
+ break;
+ case PIPE_LOGICOP_NAND:
+ spe_nand(f, frag[0], frag[0], pixel[0]);
+ spe_nand(f, frag[1], frag[1], pixel[1]);
+ spe_nand(f, frag[2], frag[2], pixel[2]);
+ spe_nand(f, frag[3], frag[3], pixel[3]);
+ break;
+ case PIPE_LOGICOP_AND:
+ spe_and(f, frag[0], frag[0], pixel[0]);
+ spe_and(f, frag[1], frag[1], pixel[1]);
+ spe_and(f, frag[2], frag[2], pixel[2]);
+ spe_and(f, frag[3], frag[3], pixel[3]);
+ break;
+ case PIPE_LOGICOP_EQUIV:
+ spe_eqv(f, frag[0], frag[0], pixel[0]);
+ spe_eqv(f, frag[1], frag[1], pixel[1]);
+ spe_eqv(f, frag[2], frag[2], pixel[2]);
+ spe_eqv(f, frag[3], frag[3], pixel[3]);
+ break;
+ case PIPE_LOGICOP_OR_INVERTED:
+ spe_orc(f, frag[0], pixel[0], frag[0]);
+ spe_orc(f, frag[1], pixel[1], frag[1]);
+ spe_orc(f, frag[2], pixel[2], frag[2]);
+ spe_orc(f, frag[3], pixel[3], frag[3]);
+ break;
+ case PIPE_LOGICOP_COPY:
+ break;
+ case PIPE_LOGICOP_OR_REVERSE:
+ spe_orc(f, frag[0], frag[0], pixel[0]);
+ spe_orc(f, frag[1], frag[1], pixel[1]);
+ spe_orc(f, frag[2], frag[2], pixel[2]);
+ spe_orc(f, frag[3], frag[3], pixel[3]);
+ break;
+ case PIPE_LOGICOP_OR:
+ spe_or(f, frag[0], frag[0], pixel[0]);
+ spe_or(f, frag[1], frag[1], pixel[1]);
+ spe_or(f, frag[2], frag[2], pixel[2]);
+ spe_or(f, frag[3], frag[3], pixel[3]);
+ break;
+ case PIPE_LOGICOP_SET:
+ spe_il(f, frag[0], ~0);
+ spe_il(f, frag[1], ~0);
+ spe_il(f, frag[2], ~0);
+ spe_il(f, frag[3], ~0);
+ break;
+
+ /* These two cases are short-circuited above.
+ */
+ case PIPE_LOGICOP_INVERT:
+ case PIPE_LOGICOP_NOOP:
+ default:
+ assert(0);
+ }
+
+
+ /* Apply fragment mask.
+ */
+ spe_ilh(f, tmp[0], 0x0000);
+ spe_ilh(f, tmp[1], 0x0404);
+ spe_ilh(f, tmp[2], 0x0808);
+ spe_ilh(f, tmp[3], 0x0c0c);
+
+ spe_shufb(f, tmp[0], mask, mask, tmp[0]);
+ spe_shufb(f, tmp[1], mask, mask, tmp[1]);
+ spe_shufb(f, tmp[2], mask, mask, tmp[2]);
+ spe_shufb(f, tmp[3], mask, mask, tmp[3]);
+
+ spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]);
+ spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]);
+ spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]);
+ spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]);
+
+ spe_bi(f, 0, 0, 0);
+
+#if 0
+ {
+ const uint32_t *p = f->store;
+ unsigned i;
+
+ printf("# %u instructions\n", f->csr - f->store);
+
+ printf("\t.text\n");
+ for (i = 0; i < 64; i++) {
+ printf("\t.long\t0x%04x\n", p[i]);
+ }
+ fflush(stdout);
+ }
+#endif
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h
new file mode 100644
index 0000000000..a8267a5133
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h
@@ -0,0 +1,39 @@
+/*
+ * (C) Copyright IBM Corporation 2008
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef CELL_STATE_PER_FRAGMENT_H
+#define CELL_STATE_PER_FRAGMENT_H
+
+extern void
+cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa);
+
+extern void
+cell_generate_alpha_blend(struct cell_blend_state *cb);
+
+extern void
+cell_generate_logic_op(struct spe_function *f,
+ const struct pipe_blend_state *blend,
+ struct pipe_surface *surf);
+
+#endif /* CELL_STATE_PER_FRAGMENT_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c
new file mode 100644
index 0000000000..cda39f8d59
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c
@@ -0,0 +1,221 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_winsys.h"
+#include "draw/draw_context.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "cell_context.h"
+#include "cell_state.h"
+#include "cell_gen_fp.h"
+
+
+/** cast wrapper */
+static INLINE struct cell_fragment_shader_state *
+cell_fragment_shader_state(void *shader)
+{
+ return (struct cell_fragment_shader_state *) shader;
+}
+
+
+/** cast wrapper */
+static INLINE struct cell_vertex_shader_state *
+cell_vertex_shader_state(void *shader)
+{
+ return (struct cell_vertex_shader_state *) shader;
+}
+
+
+/**
+ * Create fragment shader state.
+ * Called via pipe->create_fs_state()
+ */
+static void *
+cell_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct cell_context *cell = cell_context(pipe);
+ struct cell_fragment_shader_state *cfs;
+
+ cfs = CALLOC_STRUCT(cell_fragment_shader_state);
+ if (!cfs)
+ return NULL;
+
+ cfs->shader.tokens = tgsi_dup_tokens(templ->tokens);
+ if (!cfs->shader.tokens) {
+ FREE(cfs);
+ return NULL;
+ }
+
+ tgsi_scan_shader(templ->tokens, &cfs->info);
+
+ cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code);
+
+ return cfs;
+}
+
+
+/**
+ * Called via pipe->bind_fs_state()
+ */
+static void
+cell_bind_fs_state(struct pipe_context *pipe, void *fs)
+{
+ struct cell_context *cell = cell_context(pipe);
+
+ cell->fs = cell_fragment_shader_state(fs);
+
+ cell->dirty |= CELL_NEW_FS;
+}
+
+
+/**
+ * Called via pipe->delete_fs_state()
+ */
+static void
+cell_delete_fs_state(struct pipe_context *pipe, void *fs)
+{
+ struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs);
+
+ spe_release_func(&cfs->code);
+
+ FREE((void *) cfs->shader.tokens);
+ FREE(cfs);
+}
+
+
+/**
+ * Create vertex shader state.
+ * Called via pipe->create_vs_state()
+ */
+static void *
+cell_create_vs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct cell_context *cell = cell_context(pipe);
+ struct cell_vertex_shader_state *cvs;
+
+ cvs = CALLOC_STRUCT(cell_vertex_shader_state);
+ if (!cvs)
+ return NULL;
+
+ cvs->shader.tokens = tgsi_dup_tokens(templ->tokens);
+ if (!cvs->shader.tokens) {
+ FREE(cvs);
+ return NULL;
+ }
+
+ tgsi_scan_shader(templ->tokens, &cvs->info);
+
+ cvs->draw_data = draw_create_vertex_shader(cell->draw, &cvs->shader);
+ if (cvs->draw_data == NULL) {
+ FREE( (void *) cvs->shader.tokens );
+ FREE( cvs );
+ return NULL;
+ }
+
+ return cvs;
+}
+
+
+/**
+ * Called via pipe->bind_vs_state()
+ */
+static void
+cell_bind_vs_state(struct pipe_context *pipe, void *vs)
+{
+ struct cell_context *cell = cell_context(pipe);
+
+ cell->vs = cell_vertex_shader_state(vs);
+
+ draw_bind_vertex_shader(cell->draw,
+ (cell->vs ? cell->vs->draw_data : NULL));
+
+ cell->dirty |= CELL_NEW_VS;
+}
+
+
+/**
+ * Called via pipe->delete_vs_state()
+ */
+static void
+cell_delete_vs_state(struct pipe_context *pipe, void *vs)
+{
+ struct cell_context *cell = cell_context(pipe);
+ struct cell_vertex_shader_state *cvs = cell_vertex_shader_state(vs);
+
+ draw_delete_vertex_shader(cell->draw, cvs->draw_data);
+ FREE( (void *) cvs->shader.tokens );
+ FREE( cvs );
+}
+
+
+/**
+ * Called via pipe->set_constant_buffer()
+ */
+static void
+cell_set_constant_buffer(struct pipe_context *pipe,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *buf)
+{
+ struct cell_context *cell = cell_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(index == 0);
+
+ draw_flush(cell->draw);
+
+ /* note: reference counting */
+ winsys_buffer_reference(ws,
+ &cell->constants[shader].buffer,
+ buf->buffer);
+ cell->constants[shader].size = buf->size;
+
+ if (shader == PIPE_SHADER_VERTEX)
+ cell->dirty |= CELL_NEW_VS_CONSTANTS;
+ else if (shader == PIPE_SHADER_FRAGMENT)
+ cell->dirty |= CELL_NEW_FS_CONSTANTS;
+}
+
+
+void
+cell_init_shader_functions(struct cell_context *cell)
+{
+ cell->pipe.create_fs_state = cell_create_fs_state;
+ cell->pipe.bind_fs_state = cell_bind_fs_state;
+ cell->pipe.delete_fs_state = cell_delete_fs_state;
+
+ cell->pipe.create_vs_state = cell_create_vs_state;
+ cell->pipe.bind_vs_state = cell_bind_vs_state;
+ cell->pipe.delete_vs_state = cell_delete_vs_state;
+
+ cell->pipe.set_constant_buffer = cell_set_constant_buffer;
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
new file mode 100644
index 0000000000..fbe55c8472
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
@@ -0,0 +1,79 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "cell_context.h"
+#include "cell_state.h"
+
+#include "draw/draw_context.h"
+
+
+static void
+cell_set_vertex_elements(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *elements)
+{
+ struct cell_context *cell = cell_context(pipe);
+
+ assert(count <= PIPE_MAX_ATTRIBS);
+
+ memcpy(cell->vertex_element, elements, count * sizeof(elements[0]));
+ cell->num_vertex_elements = count;
+
+ cell->dirty |= CELL_NEW_VERTEX;
+
+ draw_set_vertex_elements(cell->draw, count, elements);
+}
+
+
+static void
+cell_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct cell_context *cell = cell_context(pipe);
+
+ assert(count <= PIPE_MAX_ATTRIBS);
+
+ memcpy(cell->vertex_buffer, buffers, count * sizeof(buffers[0]));
+ cell->num_vertex_buffers = count;
+
+ cell->dirty |= CELL_NEW_VERTEX;
+
+ draw_set_vertex_buffers(cell->draw, count, buffers);
+}
+
+
+void
+cell_init_vertex_functions(struct cell_context *cell)
+{
+ cell->pipe.set_vertex_buffers = cell_set_vertex_buffers;
+ cell->pipe.set_vertex_elements = cell_set_vertex_elements;
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c
new file mode 100644
index 0000000000..c9203fee08
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_surface.c
@@ -0,0 +1,38 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_rect.h"
+#include "cell_context.h"
+#include "cell_surface.h"
+
+
+void
+cell_init_surface_functions(struct cell_context *cell)
+{
+ cell->pipe.surface_copy = util_surface_copy;
+ cell->pipe.surface_fill = util_surface_fill;
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_surface.h b/src/gallium/drivers/cell/ppu/cell_surface.h
new file mode 100644
index 0000000000..9e58f32944
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_surface.h
@@ -0,0 +1,42 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef CELL_SURFACE_H
+#define CELL_SURFACE_H
+
+
+struct cell_context;
+
+
+extern void
+cell_init_surface_functions(struct cell_context *cell);
+
+
+#endif /* SP_SURFACE_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
new file mode 100644
index 0000000000..9f83ab8fa4
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_texture.c
@@ -0,0 +1,528 @@
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Michel Dänzer <michel@tungstengraphics.com>
+ * Brian Paul
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_winsys.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "cell_context.h"
+#include "cell_state.h"
+#include "cell_texture.h"
+
+
+
+static unsigned
+minify(unsigned d)
+{
+ return MAX2(1, d>>1);
+}
+
+
+static void
+cell_texture_layout(struct cell_texture *ct)
+{
+ struct pipe_texture *pt = &ct->base;
+ unsigned level;
+ unsigned width = pt->width[0];
+ unsigned height = pt->height[0];
+ unsigned depth = pt->depth[0];
+
+ ct->buffer_size = 0;
+
+ for ( level = 0 ; level <= pt->last_level ; level++ ) {
+ unsigned size;
+ unsigned w_tile, h_tile;
+
+ assert(level < CELL_MAX_TEXTURE_LEVELS);
+
+ /* width, height, rounded up to tile size */
+ w_tile = align(width, TILE_SIZE);
+ h_tile = align(height, TILE_SIZE);
+
+ pt->width[level] = width;
+ pt->height[level] = height;
+ pt->depth[level] = depth;
+ pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile);
+ pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile);
+
+ ct->stride[level] = pt->nblocksx[level] * pt->block.size;
+
+ ct->level_offset[level] = ct->buffer_size;
+
+ size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size;
+ if (pt->target == PIPE_TEXTURE_CUBE)
+ size *= 6;
+ else
+ size *= depth;
+
+ ct->buffer_size += size;
+
+ width = minify(width);
+ height = minify(height);
+ depth = minify(depth);
+ }
+}
+
+
+static struct pipe_texture *
+cell_texture_create(struct pipe_screen *screen,
+ const struct pipe_texture *templat)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct cell_texture *ct = CALLOC_STRUCT(cell_texture);
+ if (!ct)
+ return NULL;
+
+ ct->base = *templat;
+ ct->base.refcount = 1;
+ ct->base.screen = screen;
+
+ cell_texture_layout(ct);
+
+ ct->buffer = ws->buffer_create(ws, 32, PIPE_BUFFER_USAGE_PIXEL,
+ ct->buffer_size);
+
+ if (!ct->buffer) {
+ FREE(ct);
+ return NULL;
+ }
+
+ return &ct->base;
+}
+
+
+static void
+cell_texture_release(struct pipe_screen *screen,
+ struct pipe_texture **pt)
+{
+ if (!*pt)
+ return;
+
+ /*
+ DBG("%s %p refcount will be %d\n",
+ __FUNCTION__, (void *) *pt, (*pt)->refcount - 1);
+ */
+ if (--(*pt)->refcount <= 0) {
+ /* Delete this texture now.
+ * But note that the underlying pipe_buffer may linger...
+ */
+ struct cell_texture *ct = cell_texture(*pt);
+ uint i;
+
+ /*
+ DBG("%s deleting %p\n", __FUNCTION__, (void *) ct);
+ */
+
+ pipe_buffer_reference(screen, &ct->buffer, NULL);
+
+ for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
+ /* Unreference the tiled image buffer.
+ * It may not actually be deleted until a fence is hit.
+ */
+ if (ct->tiled_buffer[i]) {
+ ct->tiled_mapped[i] = NULL;
+ winsys_buffer_reference(screen->winsys, &ct->tiled_buffer[i], NULL);
+ }
+ }
+
+ FREE(ct);
+ }
+ *pt = NULL;
+}
+
+
+
+/**
+ * Convert image from linear layout to tiled layout. 4-byte pixels.
+ */
+static void
+twiddle_image_uint(uint w, uint h, uint tile_size, uint *dst,
+ uint src_stride, const uint *src)
+{
+ const uint tile_size2 = tile_size * tile_size;
+ const uint h_t = (h + tile_size - 1) / tile_size;
+ const uint w_t = (w + tile_size - 1) / tile_size;
+
+ uint it, jt; /* tile counters */
+ uint i, j; /* intra-tile counters */
+
+ src_stride /= 4; /* convert from bytes to pixels */
+
+ /* loop over dest tiles */
+ for (it = 0; it < h_t; it++) {
+ for (jt = 0; jt < w_t; jt++) {
+ /* start of dest tile: */
+ uint *tdst = dst + (it * w_t + jt) * tile_size2;
+
+ /* compute size of this tile (may be smaller than tile_size) */
+ /* XXX note: a compiler bug was found here. That's why the code
+ * looks as it does.
+ */
+ uint tile_width = w - jt * tile_size;
+ tile_width = MIN2(tile_width, tile_size);
+ uint tile_height = h - it * tile_size;
+ tile_height = MIN2(tile_height, tile_size);
+
+ /* loop over texels in the tile */
+ for (i = 0; i < tile_height; i++) {
+ for (j = 0; j < tile_width; j++) {
+ const uint srci = it * tile_size + i;
+ const uint srcj = jt * tile_size + j;
+ ASSERT(srci < h);
+ ASSERT(srcj < w);
+ tdst[i * tile_size + j] = src[srci * src_stride + srcj];
+ }
+ }
+ }
+ }
+}
+
+
+/**
+ * For Cell. Basically, rearrange the pixels/quads from this layout:
+ * +--+--+--+--+
+ * |p0|p1|p2|p3|....
+ * +--+--+--+--+
+ *
+ * to this layout:
+ * +--+--+
+ * |p0|p1|....
+ * +--+--+
+ * |p2|p3|
+ * +--+--+
+ */
+static void
+twiddle_tile(const uint *tileIn, uint *tileOut)
+{
+ int y, x;
+
+ for (y = 0; y < TILE_SIZE; y+=2) {
+ for (x = 0; x < TILE_SIZE; x+=2) {
+ int k = 4 * (y/2 * TILE_SIZE/2 + x/2);
+ tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k];
+ tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1];
+ tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2];
+ tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3];
+ }
+ }
+}
+
+
+/**
+ * Convert image from tiled layout to linear layout. 4-byte pixels.
+ */
+static void
+untwiddle_image_uint(uint w, uint h, uint tile_size, uint *dst,
+ uint dst_stride, const uint *src)
+{
+ const uint tile_size2 = tile_size * tile_size;
+ const uint h_t = (h + tile_size - 1) / tile_size;
+ const uint w_t = (w + tile_size - 1) / tile_size;
+ uint *tile_buf;
+ uint it, jt; /* tile counters */
+ uint i, j; /* intra-tile counters */
+
+ dst_stride /= 4; /* convert from bytes to pixels */
+
+ tile_buf = align_malloc(tile_size * tile_size * 4, 16);
+
+ /* loop over src tiles */
+ for (it = 0; it < h_t; it++) {
+ for (jt = 0; jt < w_t; jt++) {
+ /* start of src tile: */
+ const uint *tsrc = src + (it * w_t + jt) * tile_size2;
+
+ twiddle_tile(tsrc, tile_buf);
+ tsrc = tile_buf;
+
+ /* compute size of this tile (may be smaller than tile_size) */
+ /* XXX note: a compiler bug was found here. That's why the code
+ * looks as it does.
+ */
+ uint tile_width = w - jt * tile_size;
+ tile_width = MIN2(tile_width, tile_size);
+ uint tile_height = h - it * tile_size;
+ tile_height = MIN2(tile_height, tile_size);
+
+ /* loop over texels in the tile */
+ for (i = 0; i < tile_height; i++) {
+ for (j = 0; j < tile_width; j++) {
+ uint dsti = it * tile_size + i;
+ uint dstj = jt * tile_size + j;
+ ASSERT(dsti < h);
+ ASSERT(dstj < w);
+ dst[dsti * dst_stride + dstj] = tsrc[i * tile_size + j];
+ }
+ }
+ }
+ }
+
+ align_free(tile_buf);
+}
+
+
+/**
+ * Convert linear texture image data to tiled format for SPU usage.
+ */
+static void
+cell_twiddle_texture(struct pipe_screen *screen,
+ struct pipe_surface *surface)
+{
+ struct cell_texture *ct = cell_texture(surface->texture);
+ const uint level = surface->level;
+ const uint texWidth = ct->base.width[level];
+ const uint texHeight = ct->base.height[level];
+ const uint bufWidth = align(texWidth, TILE_SIZE);
+ const uint bufHeight = align(texHeight, TILE_SIZE);
+ const void *map = pipe_buffer_map(screen, surface->buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ const uint *src = (const uint *) ((const ubyte *) map + surface->offset);
+
+ switch (ct->base.format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ {
+ int numFaces = ct->base.target == PIPE_TEXTURE_CUBE ? 6 : 1;
+ int offset = bufWidth * bufHeight * 4 * surface->face;
+ uint *dst;
+
+ if (!ct->tiled_buffer[level]) {
+ /* allocate buffer for tiled data now */
+ struct pipe_winsys *ws = screen->winsys;
+ uint bytes = bufWidth * bufHeight * 4 * numFaces;
+ ct->tiled_buffer[level] = ws->buffer_create(ws, 16,
+ PIPE_BUFFER_USAGE_PIXEL,
+ bytes);
+ /* and map it */
+ ct->tiled_mapped[level] = ws->buffer_map(ws, ct->tiled_buffer[level],
+ PIPE_BUFFER_USAGE_GPU_READ);
+ }
+ dst = (uint *) ((ubyte *) ct->tiled_mapped[level] + offset);
+
+ twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst,
+ surface->stride, src);
+ }
+ break;
+ default:
+ printf("Cell: twiddle unsupported texture format %s\n", pf_name(ct->base.format));
+ ;
+ }
+
+ pipe_buffer_unmap(screen, surface->buffer);
+}
+
+
+/**
+ * Convert SPU tiled texture image data to linear format for app usage.
+ */
+static void
+cell_untwiddle_texture(struct pipe_screen *screen,
+ struct pipe_surface *surface)
+{
+ struct cell_texture *ct = cell_texture(surface->texture);
+ const uint level = surface->level;
+ const uint texWidth = ct->base.width[level];
+ const uint texHeight = ct->base.height[level];
+ const void *map = pipe_buffer_map(screen, surface->buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ const uint *src = (const uint *) ((const ubyte *) map + surface->offset);
+
+ switch (ct->base.format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ {
+ int numFaces = ct->base.target == PIPE_TEXTURE_CUBE ? 6 : 1;
+ int offset = surface->stride * texHeight * 4 * surface->face;
+ uint *dst;
+
+ if (!ct->untiled_data[level]) {
+ ct->untiled_data[level] =
+ align_malloc(surface->stride * texHeight * 4 * numFaces, 16);
+ }
+
+ dst = (uint *) ((ubyte *) ct->untiled_data[level] + offset);
+
+ untwiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst,
+ surface->stride, src);
+ }
+ break;
+ default:
+ {
+ ct->untiled_data[level] = NULL;
+ printf("Cell: untwiddle unsupported texture format %s\n", pf_name(ct->base.format));
+ }
+ }
+
+ pipe_buffer_unmap(screen, surface->buffer);
+}
+
+
+static struct pipe_surface *
+cell_get_tex_surface(struct pipe_screen *screen,
+ struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned usage)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct cell_texture *ct = cell_texture(pt);
+ struct pipe_surface *ps;
+
+ ps = ws->surface_alloc(ws);
+ if (ps) {
+ assert(ps->refcount);
+ assert(ps->winsys);
+ winsys_buffer_reference(ws, &ps->buffer, ct->buffer);
+ ps->format = pt->format;
+ ps->block = pt->block;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = ct->stride[level];
+ ps->offset = ct->level_offset[level];
+ ps->usage = usage;
+
+ /* XXX may need to override usage flags (see sp_texture.c) */
+
+ pipe_texture_reference(&ps->texture, pt);
+ ps->face = face;
+ ps->level = level;
+ ps->zslice = zslice;
+
+ if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) {
+ ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) *
+ ps->nblocksy *
+ ps->stride;
+ }
+ else {
+ assert(face == 0);
+ assert(zslice == 0);
+ }
+
+ if (ps->usage & PIPE_BUFFER_USAGE_CPU_READ) {
+ /* convert from tiled to linear layout */
+ cell_untwiddle_texture(screen, ps);
+ }
+ }
+ return ps;
+}
+
+
+static void
+cell_tex_surface_release(struct pipe_screen *screen,
+ struct pipe_surface **s)
+{
+ struct cell_texture *ct = cell_texture((*s)->texture);
+ const uint level = (*s)->level;
+
+ if (((*s)->usage & PIPE_BUFFER_USAGE_CPU_READ) && (ct->untiled_data[level]))
+ {
+ align_free(ct->untiled_data[level]);
+ ct->untiled_data[level] = NULL;
+ }
+
+ /* XXX if done rendering to teximage, re-tile */
+
+ pipe_texture_reference(&(*s)->texture, NULL);
+
+ screen->winsys->surface_release(screen->winsys, s);
+}
+
+
+static void *
+cell_surface_map(struct pipe_screen *screen,
+ struct pipe_surface *surface,
+ unsigned flags)
+{
+ ubyte *map;
+ struct cell_texture *ct = cell_texture(surface->texture);
+ const uint level = surface->level;
+
+ assert(ct);
+
+ if (flags & ~surface->usage) {
+ assert(0);
+ return NULL;
+ }
+
+ map = pipe_buffer_map( screen, surface->buffer, flags );
+ if (map == NULL)
+ return NULL;
+ else
+ {
+ if ((surface->usage & PIPE_BUFFER_USAGE_CPU_READ) && (ct->untiled_data[level])) {
+ return (void *) ((ubyte *) ct->untiled_data[level] + surface->offset);
+ }
+ else {
+ return (void *) (map + surface->offset);
+ }
+ }
+}
+
+
+static void
+cell_surface_unmap(struct pipe_screen *screen,
+ struct pipe_surface *surface)
+{
+ struct cell_texture *ct = cell_texture(surface->texture);
+
+ assert(ct);
+
+ if ((ct->base.tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) &&
+ (surface->usage & PIPE_BUFFER_USAGE_CPU_WRITE)) {
+ /* convert from linear to tiled layout */
+ cell_twiddle_texture(screen, surface);
+ }
+
+ pipe_buffer_unmap( screen, surface->buffer );
+}
+
+
+
+void
+cell_init_screen_texture_funcs(struct pipe_screen *screen)
+{
+ screen->texture_create = cell_texture_create;
+ screen->texture_release = cell_texture_release;
+
+ screen->get_tex_surface = cell_get_tex_surface;
+ screen->tex_surface_release = cell_tex_surface_release;
+
+ screen->surface_map = cell_surface_map;
+ screen->surface_unmap = cell_surface_unmap;
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h
new file mode 100644
index 0000000000..7018b0c9bf
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_texture.h
@@ -0,0 +1,72 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef CELL_TEXTURE_H
+#define CELL_TEXTURE_H
+
+
+struct cell_context;
+struct pipe_texture;
+
+
+/**
+ * Subclass of pipe_texture
+ */
+struct cell_texture
+{
+ struct pipe_texture base;
+
+ unsigned long level_offset[CELL_MAX_TEXTURE_LEVELS];
+ unsigned long stride[CELL_MAX_TEXTURE_LEVELS];
+
+ /* The data is held here:
+ */
+ struct pipe_buffer *buffer;
+ unsigned long buffer_size;
+
+ /** Texture data in tiled layout is held here */
+ struct pipe_buffer *tiled_buffer[CELL_MAX_TEXTURE_LEVELS];
+ /** Mapped, tiled texture data */
+ void *tiled_mapped[CELL_MAX_TEXTURE_LEVELS];
+ void *untiled_data[CELL_MAX_TEXTURE_LEVELS];
+};
+
+
+/** cast wrapper */
+static INLINE struct cell_texture *
+cell_texture(struct pipe_texture *pt)
+{
+ return (struct cell_texture *) pt;
+}
+
+
+
+extern void
+cell_init_screen_texture_funcs(struct pipe_screen *screen);
+
+
+#endif /* CELL_TEXTURE_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c
new file mode 100644
index 0000000000..ab54e79689
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c
@@ -0,0 +1,309 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Vertex buffer code. The draw module transforms vertices to window
+ * coords, etc. and emits the vertices into buffer supplied by this module.
+ * When a vertex buffer is full, or we flush, we'll send the vertex data
+ * to the SPUs.
+ *
+ * Authors
+ * Brian Paul
+ */
+
+
+#include "cell_batch.h"
+#include "cell_context.h"
+#include "cell_fence.h"
+#include "cell_flush.h"
+#include "cell_spu.h"
+#include "cell_vbuf.h"
+#include "draw/draw_vbuf.h"
+#include "util/u_memory.h"
+
+
+/** Allow vertex data to be inlined after RENDER command */
+#define ALLOW_INLINE_VERTS 1
+
+
+/**
+ * Subclass of vbuf_render because we need a cell_context pointer in
+ * a few places.
+ */
+struct cell_vbuf_render
+{
+ struct vbuf_render base;
+ struct cell_context *cell;
+ uint prim; /**< PIPE_PRIM_x */
+ uint vertex_size; /**< in bytes */
+ void *vertex_buffer; /**< just for debug, really */
+ uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */
+};
+
+
+/** cast wrapper */
+static struct cell_vbuf_render *
+cell_vbuf_render(struct vbuf_render *vbr)
+{
+ return (struct cell_vbuf_render *) vbr;
+}
+
+
+
+static const struct vertex_info *
+cell_vbuf_get_vertex_info(struct vbuf_render *vbr)
+{
+ struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
+ return &cvbr->cell->vertex_info;
+}
+
+
+static void *
+cell_vbuf_allocate_vertices(struct vbuf_render *vbr,
+ ushort vertex_size, ushort nr_vertices)
+{
+ struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
+ /*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/
+
+ assert(cvbr->vertex_buf == ~0);
+ cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell);
+ cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf];
+ cvbr->vertex_size = vertex_size;
+ return cvbr->vertex_buffer;
+}
+
+
+static void
+cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices,
+ unsigned vertex_size, unsigned vertices_used)
+{
+ struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
+ struct cell_context *cell = cvbr->cell;
+
+ /*
+ printf("%s vertex_buf = %u count = %u\n",
+ __FUNCTION__, cvbr->vertex_buf, vertices_used);
+ */
+
+ /* Make sure texture buffers aren't released until we're done rendering
+ * with them.
+ */
+ cell_add_fenced_textures(cell);
+
+ /* Tell SPUs they can release the vert buf */
+ if (cvbr->vertex_buf != ~0U) {
+ STATIC_ASSERT(sizeof(struct cell_command_release_verts) % 16 == 0);
+ struct cell_command_release_verts *release
+ = (struct cell_command_release_verts *)
+ cell_batch_alloc16(cell, sizeof(struct cell_command_release_verts));
+ release->opcode[0] = CELL_CMD_RELEASE_VERTS;
+ release->vertex_buf = cvbr->vertex_buf;
+ }
+
+ cvbr->vertex_buf = ~0;
+ cell_flush_int(cell, 0x0);
+
+ assert(vertices == cvbr->vertex_buffer);
+ cvbr->vertex_buffer = NULL;
+}
+
+
+
+static boolean
+cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
+{
+ struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
+ cvbr->prim = prim;
+ /*printf("cell_set_prim %u\n", prim);*/
+ return TRUE;
+}
+
+
+static void
+cell_vbuf_draw(struct vbuf_render *vbr,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
+ struct cell_context *cell = cvbr->cell;
+ float xmin, ymin, xmax, ymax;
+ uint i;
+ uint nr_vertices = 0, min_index = ~0;
+ const void *vertices = cvbr->vertex_buffer;
+ const uint vertex_size = cvbr->vertex_size;
+
+ for (i = 0; i < nr_indices; i++) {
+ if (indices[i] > nr_vertices)
+ nr_vertices = indices[i];
+ if (indices[i] < min_index)
+ min_index = indices[i];
+ }
+ nr_vertices++;
+
+#if 0
+ /*if (min_index > 0)*/
+ printf("%s min_index = %u\n", __FUNCTION__, min_index);
+#endif
+
+#if 0
+ printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u\n",
+ nr_indices, nr_vertices);
+ printf(" ");
+ for (i = 0; i < nr_indices; i += 3) {
+ printf("%u %u %u, ", indices[i+0], indices[i+1], indices[i+2]);
+ }
+ printf("\n");
+#elif 0
+ printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u indexes = [%u %u %u ...]\n",
+ nr_indices, nr_vertices,
+ indices[0], indices[1], indices[2]);
+ printf("ind space = %u, vert space = %u, space = %u\n",
+ nr_indices * 2,
+ nr_vertices * 4 * cell->vertex_info.size,
+ cell_batch_free_space(cell));
+#endif
+
+ /* compute x/y bounding box */
+ xmin = ymin = 1e50;
+ xmax = ymax = -1e50;
+ for (i = min_index; i < nr_vertices; i++) {
+ const float *v = (float *) ((ubyte *) vertices + i * vertex_size);
+ if (v[0] < xmin)
+ xmin = v[0];
+ if (v[0] > xmax)
+ xmax = v[0];
+ if (v[1] < ymin)
+ ymin = v[1];
+ if (v[1] > ymax)
+ ymax = v[1];
+ }
+#if 0
+ printf("PPU Bounds %g, %g .. %g, %g\n", xmin, ymin, xmax, ymax);
+ fflush(stdout);
+#endif
+
+ if (cvbr->prim != PIPE_PRIM_TRIANGLES)
+ return; /* only render tris for now */
+
+ /* build/insert batch RENDER command */
+ {
+ const uint index_bytes = ROUNDUP16(nr_indices * 2);
+ const uint vertex_bytes = ROUNDUP16(nr_vertices * 4 * cell->vertex_info.size);
+ STATIC_ASSERT(sizeof(struct cell_command_render) % 16 == 0);
+ const uint batch_size = sizeof(struct cell_command_render) + index_bytes;
+
+ struct cell_command_render *render
+ = (struct cell_command_render *)
+ cell_batch_alloc16(cell, batch_size);
+
+ render->opcode[0] = CELL_CMD_RENDER;
+ render->prim_type = cvbr->prim;
+
+ render->num_indexes = nr_indices;
+ render->min_index = min_index;
+
+ /* append indices after render command */
+ memcpy(render + 1, indices, nr_indices * 2);
+
+ /* if there's room, append vertices after the indices, else leave
+ * vertices in the original/separate buffer.
+ */
+ render->vertex_size = 4 * cell->vertex_info.size;
+ render->num_verts = nr_vertices;
+ if (ALLOW_INLINE_VERTS &&
+ min_index == 0 &&
+ vertex_bytes + 16 <= cell_batch_free_space(cell)) {
+ /* vertex data inlined, after indices, at 16-byte boundary */
+ void *dst = cell_batch_alloc16(cell, vertex_bytes);
+ memcpy(dst, vertices, vertex_bytes);
+ render->inline_verts = TRUE;
+ render->vertex_buf = ~0;
+ }
+ else {
+ /* vertex data in separate buffer */
+ render->inline_verts = FALSE;
+ ASSERT(cvbr->vertex_buf >= 0);
+ render->vertex_buf = cvbr->vertex_buf;
+ }
+
+ render->xmin = xmin;
+ render->ymin = ymin;
+ render->xmax = xmax;
+ render->ymax = ymax;
+ }
+
+#if 0
+ /* helpful for debug */
+ cell_flush_int(cell, CELL_FLUSH_WAIT);
+#endif
+}
+
+
+static void
+cell_vbuf_destroy(struct vbuf_render *vbr)
+{
+ struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
+ cvbr->cell->vbuf_render = NULL;
+ FREE(cvbr);
+}
+
+
+/**
+ * Initialize the post-transform vertex buffer information for the given
+ * context.
+ */
+void
+cell_init_vbuf(struct cell_context *cell)
+{
+ assert(cell->draw);
+
+ cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render);
+
+ /* The max number of indexes is what can fix into a batch buffer,
+ * minus the render and release-verts commands.
+ */
+ cell->vbuf_render->base.max_indices
+ = (CELL_BUFFER_SIZE
+ - sizeof(struct cell_command_render)
+ - sizeof(struct cell_command_release_verts))
+ / sizeof(ushort);
+ cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE;
+
+ cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info;
+ cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices;
+ cell->vbuf_render->base.set_primitive = cell_vbuf_set_primitive;
+ cell->vbuf_render->base.draw = cell_vbuf_draw;
+ cell->vbuf_render->base.release_vertices = cell_vbuf_release_vertices;
+ cell->vbuf_render->base.destroy = cell_vbuf_destroy;
+
+ cell->vbuf_render->cell = cell;
+#if 1
+ cell->vbuf_render->vertex_buf = ~0;
+#endif
+
+ cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base);
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.h b/src/gallium/drivers/cell/ppu/cell_vbuf.h
new file mode 100644
index 0000000000..d265cbf770
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_vbuf.h
@@ -0,0 +1,38 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef CELL_VBUF_H
+#define CELL_VBUF_H
+
+
+struct cell_context;
+
+extern void
+cell_init_vbuf(struct cell_context *cell);
+
+
+#endif /* CELL_VBUF_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
new file mode 100644
index 0000000000..9cba537d9e
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
@@ -0,0 +1,346 @@
+/*
+ * (C) Copyright IBM Corporation 2008
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include "pipe/p_defines.h"
+#include "pipe/p_context.h"
+#include "pipe/p_format.h"
+
+#include "../auxiliary/draw/draw_context.h"
+#include "../auxiliary/draw/draw_private.h"
+
+#include "cell_context.h"
+#include "rtasm/rtasm_ppc_spe.h"
+
+
+/**
+ * Emit a 4x4 matrix transpose operation
+ *
+ * \param p Function that the transpose operation is to be appended to
+ * \param row0 Register containing row 0 of the source matrix
+ * \param row1 Register containing row 1 of the source matrix
+ * \param row2 Register containing row 2 of the source matrix
+ * \param row3 Register containing row 3 of the source matrix
+ * \param dest_ptr Register containing the address of the destination matrix
+ * \param shuf_ptr Register containing the address of the shuffled data
+ * \param count Number of colums to actually be written to the destination
+ *
+ * \note
+ * This function assumes that the registers named by \c row0, \c row1,
+ * \c row2, and \c row3 are scratch and can be modified by the generated code.
+ * Furthermore, these registers will be released, via calls to
+ * \c release_register, by this function.
+ *
+ * \note
+ * This function requires that four temporary are available on entry.
+ */
+static void
+emit_matrix_transpose(struct spe_function *p,
+ unsigned row0, unsigned row1, unsigned row2,
+ unsigned row3, unsigned dest_ptr,
+ unsigned shuf_ptr, unsigned count)
+{
+ int shuf_hi = spe_allocate_available_register(p);
+ int shuf_lo = spe_allocate_available_register(p);
+ int t1 = spe_allocate_available_register(p);
+ int t2 = spe_allocate_available_register(p);
+ int t3;
+ int t4;
+ int col0;
+ int col1;
+ int col2;
+ int col3;
+
+
+ spe_lqd(p, shuf_hi, shuf_ptr, 3*16);
+ spe_lqd(p, shuf_lo, shuf_ptr, 4*16);
+ spe_shufb(p, t1, row0, row2, shuf_hi);
+ spe_shufb(p, t2, row0, row2, shuf_lo);
+
+
+ /* row0 and row2 are now no longer needed. Re-use those registers as
+ * temporaries.
+ */
+ t3 = row0;
+ t4 = row2;
+
+ spe_shufb(p, t3, row1, row3, shuf_hi);
+ spe_shufb(p, t4, row1, row3, shuf_lo);
+
+
+ /* row1 and row3 are now no longer needed. Re-use those registers as
+ * temporaries.
+ */
+ col0 = row1;
+ col1 = row3;
+
+ spe_shufb(p, col0, t1, t3, shuf_hi);
+ if (count > 1) {
+ spe_shufb(p, col1, t1, t3, shuf_lo);
+ }
+
+ /* t1 and t3 are now no longer needed. Re-use those registers as
+ * temporaries.
+ */
+ col2 = t1;
+ col3 = t3;
+
+ if (count > 2) {
+ spe_shufb(p, col2, t2, t4, shuf_hi);
+ }
+
+ if (count > 3) {
+ spe_shufb(p, col3, t2, t4, shuf_lo);
+ }
+
+
+ /* Store the results. Remember that the stqd instruction is encoded using
+ * the qword offset (stand-alone assemblers to the byte-offset to
+ * qword-offset conversion for you), so the byte-offset needs be divided by
+ * 16.
+ */
+ switch (count) {
+ case 4:
+ spe_stqd(p, col3, dest_ptr, 3 * 16);
+ case 3:
+ spe_stqd(p, col2, dest_ptr, 2 * 16);
+ case 2:
+ spe_stqd(p, col1, dest_ptr, 1 * 16);
+ case 1:
+ spe_stqd(p, col0, dest_ptr, 0 * 16);
+ }
+
+
+ /* Release all of the temporary registers used.
+ */
+ spe_release_register(p, col0);
+ spe_release_register(p, col1);
+ spe_release_register(p, col2);
+ spe_release_register(p, col3);
+ spe_release_register(p, shuf_hi);
+ spe_release_register(p, shuf_lo);
+ spe_release_register(p, t2);
+ spe_release_register(p, t4);
+}
+
+
+#if 0
+/* This appears to not be used currently */
+static void
+emit_fetch(struct spe_function *p,
+ unsigned in_ptr, unsigned *offset,
+ unsigned out_ptr, unsigned shuf_ptr,
+ enum pipe_format format)
+{
+ const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0)
+ + (pf_size_z(format) != 0) + (pf_size_w(format) != 0);
+ const unsigned type = pf_type(format);
+ const unsigned bytes = pf_size_x(format);
+
+ int v0 = spe_allocate_available_register(p);
+ int v1 = spe_allocate_available_register(p);
+ int v2 = spe_allocate_available_register(p);
+ int v3 = spe_allocate_available_register(p);
+ int tmp = spe_allocate_available_register(p);
+ int float_zero = -1;
+ int float_one = -1;
+ float scale_signed = 0.0;
+ float scale_unsigned = 0.0;
+
+ spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16);
+ spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16);
+ spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16);
+ spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16);
+ offset[0] += 4;
+
+ switch (bytes) {
+ case 1:
+ scale_signed = 1.0f / 127.0f;
+ scale_unsigned = 1.0f / 255.0f;
+ spe_lqd(p, tmp, shuf_ptr, 1 * 16);
+ spe_shufb(p, v0, v0, v0, tmp);
+ spe_shufb(p, v1, v1, v1, tmp);
+ spe_shufb(p, v2, v2, v2, tmp);
+ spe_shufb(p, v3, v3, v3, tmp);
+ break;
+ case 2:
+ scale_signed = 1.0f / 32767.0f;
+ scale_unsigned = 1.0f / 65535.0f;
+ spe_lqd(p, tmp, shuf_ptr, 2 * 16);
+ spe_shufb(p, v0, v0, v0, tmp);
+ spe_shufb(p, v1, v1, v1, tmp);
+ spe_shufb(p, v2, v2, v2, tmp);
+ spe_shufb(p, v3, v3, v3, tmp);
+ break;
+ case 4:
+ scale_signed = 1.0f / 2147483647.0f;
+ scale_unsigned = 1.0f / 4294967295.0f;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (type) {
+ case PIPE_FORMAT_TYPE_FLOAT:
+ break;
+ case PIPE_FORMAT_TYPE_UNORM:
+ spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16);
+ spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff);
+ spe_cuflt(p, v0, v0, 0);
+ spe_fm(p, v0, v0, tmp);
+ break;
+ case PIPE_FORMAT_TYPE_SNORM:
+ spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16);
+ spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff);
+ spe_csflt(p, v0, v0, 0);
+ spe_fm(p, v0, v0, tmp);
+ break;
+ case PIPE_FORMAT_TYPE_USCALED:
+ spe_cuflt(p, v0, v0, 0);
+ break;
+ case PIPE_FORMAT_TYPE_SSCALED:
+ spe_csflt(p, v0, v0, 0);
+ break;
+ }
+
+
+ if (count < 4) {
+ float_one = spe_allocate_available_register(p);
+ spe_il(p, float_one, 1);
+ spe_cuflt(p, float_one, float_one, 0);
+
+ if (count < 3) {
+ float_zero = spe_allocate_available_register(p);
+ spe_il(p, float_zero, 0);
+ }
+ }
+
+ spe_release_register(p, tmp);
+
+ emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count);
+
+ switch (count) {
+ case 1:
+ spe_stqd(p, float_zero, out_ptr, 1 * 16);
+ case 2:
+ spe_stqd(p, float_zero, out_ptr, 2 * 16);
+ case 3:
+ spe_stqd(p, float_one, out_ptr, 3 * 16);
+ }
+
+ if (float_zero != -1) {
+ spe_release_register(p, float_zero);
+ }
+
+ if (float_one != -1) {
+ spe_release_register(p, float_one);
+ }
+}
+#endif
+
+
+void cell_update_vertex_fetch(struct draw_context *draw)
+{
+#if 0
+ struct cell_context *const cell =
+ (struct cell_context *) draw->driver_private;
+ struct spe_function *p = &cell->attrib_fetch;
+ unsigned function_index[PIPE_MAX_ATTRIBS];
+ unsigned unique_attr_formats;
+ int out_ptr;
+ int in_ptr;
+ int shuf_ptr;
+ unsigned i;
+ unsigned j;
+
+
+ /* Determine how many unique input attribute formats there are. At the
+ * same time, store the index of the lowest numbered attribute that has
+ * the same format as any non-unique format.
+ */
+ unique_attr_formats = 1;
+ function_index[0] = 0;
+ for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) {
+ const enum pipe_format curr_fmt = draw->vertex_element[i].src_format;
+
+ for (j = 0; j < i; j++) {
+ if (curr_fmt == draw->vertex_element[j].src_format) {
+ break;
+ }
+ }
+
+ if (j == i) {
+ unique_attr_formats++;
+ }
+
+ function_index[i] = j;
+ }
+
+
+ /* Each fetch function can be a maximum of 34 instructions (note: this is
+ * actually a slight over-estimate).
+ */
+ spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats);
+
+
+ /* Allocate registers for the function's input parameters.
+ */
+ out_ptr = spe_allocate_register(p, 3);
+ in_ptr = spe_allocate_register(p, 4);
+ shuf_ptr = spe_allocate_register(p, 5);
+
+
+ /* Generate code for the individual attribute fetch functions.
+ */
+ for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) {
+ unsigned offset;
+
+ if (function_index[i] == i) {
+ cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr
+ - (void *) p->store);
+
+ offset = 0;
+ emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr,
+ draw->vertex_element[i].src_format);
+ spe_bi(p, 0, 0, 0);
+
+ /* Round up to the next 16-byte boundary.
+ */
+ if ((((unsigned) p->store) & 0x0f) != 0) {
+ const unsigned align = ((unsigned) p->store) & 0x0f;
+ p->store = (uint32_t *) (((void *) p->store) + align);
+ }
+ } else {
+ /* Use the same function entry-point as a previously seen attribute
+ * with the same format.
+ */
+ cell->attrib_fetch_offsets[i] =
+ cell->attrib_fetch_offsets[function_index[i]];
+ }
+ }
+#else
+ assert(0);
+#endif
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
new file mode 100644
index 0000000000..2b10c116fa
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
@@ -0,0 +1,146 @@
+/*
+ * (C) Copyright IBM Corporation 2008
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file cell_vertex_shader.c
+ * Vertex shader interface routines for Cell.
+ *
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_context.h"
+#include "pipe/p_winsys.h"
+#include "util/u_math.h"
+
+#include "cell_context.h"
+#include "cell_draw_arrays.h"
+#include "cell_flush.h"
+#include "cell_spu.h"
+#include "cell_batch.h"
+
+#include "cell/common.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+
+/**
+ * Run the vertex shader on all vertices in the vertex queue.
+ * Called by the draw module when the vertx cache needs to be flushed.
+ */
+void
+cell_vertex_shader_queue_flush(struct draw_context *draw)
+{
+#if 0
+ struct cell_context *const cell =
+ (struct cell_context *) draw->driver_private;
+ struct cell_command_vs *const vs = &cell_global.command[0].vs;
+ uint64_t *batch;
+ struct cell_array_info *array_info;
+ unsigned i, j;
+ struct cell_attribute_fetch_code *cf;
+
+ assert(draw->vs.queue_nr != 0);
+
+ /* XXX: do this on statechange:
+ */
+ draw_update_vertex_fetch(draw);
+ cell_update_vertex_fetch(draw);
+
+
+ batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*cf));
+ batch[0] = CELL_CMD_STATE_ATTRIB_FETCH;
+ cf = (struct cell_attribute_fetch_code *) (&batch[1]);
+ cf->base = (uint64_t) cell->attrib_fetch.store;
+ cf->size = ROUNDUP16((unsigned)((void *) cell->attrib_fetch.csr
+ - (void *) cell->attrib_fetch.store));
+
+
+ for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) {
+ const enum pipe_format format = draw->vertex_element[i].src_format;
+ const unsigned count = ((pf_size_x(format) != 0)
+ + (pf_size_y(format) != 0)
+ + (pf_size_z(format) != 0)
+ + (pf_size_w(format) != 0));
+ const unsigned size = pf_size_x(format) * count;
+
+ batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info));
+
+ batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO;
+
+ array_info = (struct cell_array_info *) &batch[1];
+ assert(draw->vertex_fetch.src_ptr[i] != NULL);
+ array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i];
+ array_info->attr = i;
+ array_info->pitch = draw->vertex_fetch.pitch[i];
+ array_info->size = size;
+ array_info->function_offset = cell->attrib_fetch_offsets[i];
+ }
+
+ batch = cell_batch_alloc(cell, sizeof(batch[0])
+ + sizeof(struct pipe_viewport_state));
+ batch[0] = CELL_CMD_STATE_VIEWPORT;
+ (void) memcpy(&batch[1], &draw->viewport,
+ sizeof(struct pipe_viewport_state));
+
+ {
+ uint64_t uniforms = (uintptr_t) draw->user.constants;
+
+ batch = cell_batch_alloc(cell, 2 *sizeof(batch[0]));
+ batch[0] = CELL_CMD_STATE_UNIFORMS;
+ batch[1] = uniforms;
+ }
+
+ cell_batch_flush(cell);
+
+ vs->opcode = CELL_CMD_VS_EXECUTE;
+ vs->nr_attrs = draw->vertex_fetch.nr_attrs;
+
+ (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane));
+ vs->nr_planes = draw->nr_planes;
+
+ for (i = 0; i < draw->vs.queue_nr; i += SPU_VERTS_PER_BATCH) {
+ const unsigned n = MIN2(SPU_VERTS_PER_BATCH, draw->vs.queue_nr - i);
+
+ for (j = 0; j < n; j++) {
+ vs->elts[j] = draw->vs.queue[i + j].elt;
+ vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex;
+ }
+
+ for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) {
+ vs->elts[j] = vs->elts[0];
+ vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex;
+ }
+
+ vs->num_elts = n;
+ send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE);
+
+ cell_flush_int(cell, CELL_FLUSH_WAIT);
+ }
+
+ draw->vs.post_nr = draw->vs.queue_nr;
+ draw->vs.queue_nr = 0;
+#else
+ assert(0);
+#endif
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.h b/src/gallium/drivers/cell/ppu/cell_winsys.h
new file mode 100644
index 0000000000..ae2af5696b
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_winsys.h
@@ -0,0 +1,50 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef CELL_WINSYS_H
+#define CELL_WINSYS_H
+
+#include "pipe/p_compiler.h"
+
+
+/**
+ * Very simple winsys at this time.
+ * Will probably eventually add SPU control info.
+ */
+struct cell_winsys
+{
+ uint preferredFormat;
+};
+
+
+extern struct cell_winsys *
+cell_get_winsys(uint format);
+
+
+
+#endif
diff --git a/src/gallium/drivers/cell/spu/.gitignore b/src/gallium/drivers/cell/spu/.gitignore
new file mode 100644
index 0000000000..2be9a2d324
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/.gitignore
@@ -0,0 +1 @@
+g3d_spu
diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile
new file mode 100644
index 0000000000..116453b79c
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/Makefile
@@ -0,0 +1,82 @@
+# Gallium3D Cell driver: SPU code
+
+# This makefile builds the g3d_spu.a file that's linked into the
+# PPU code/library.
+
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+
+PROG = g3d
+
+PROG_SPU = $(PROG)_spu
+PROG_SPU_A = $(PROG)_spu.a
+PROG_SPU_EMBED_O = $(PROG)_spu-embed.o
+
+
+SOURCES = \
+ spu_command.c \
+ spu_dcache.c \
+ spu_funcs.c \
+ spu_main.c \
+ spu_per_fragment_op.c \
+ spu_render.c \
+ spu_texture.c \
+ spu_tile.c \
+ spu_tri.c
+
+OLD_SOURCES = \
+ spu_exec.c \
+ spu_util.c \
+ spu_vertex_fetch.c \
+ spu_vertex_shader.c
+
+
+SPU_OBJECTS = $(SOURCES:.c=.o) \
+
+SPU_ASM_OUT = $(SOURCES:.c=.s) \
+
+INCLUDE_DIRS = \
+ -I$(TOP)/src/mesa \
+ -I$(TOP)/src/gallium/include \
+ -I$(TOP)/src/gallium/auxiliary \
+ -I$(TOP)/src/gallium/drivers
+
+
+.c.o:
+ $(SPU_CC) $(SPU_CFLAGS) -c $<
+
+.c.s:
+ $(SPU_CC) $(SPU_CFLAGS) -O3 -S $<
+
+
+# The .a file will be linked into the main/PPU executable
+default: $(PROG_SPU_A)
+
+$(PROG_SPU_A): $(PROG_SPU_EMBED_O)
+ $(SPU_AR) $(SPU_AR_FLAGS) $(PROG_SPU_A) $(PROG_SPU_EMBED_O)
+
+$(PROG_SPU_EMBED_O): $(PROG_SPU)
+ $(SPU_EMBED) $(SPU_EMBED_FLAGS) $(PROG_SPU) $(PROG_SPU) $(PROG_SPU_EMBED_O)
+
+$(PROG_SPU): $(SPU_OBJECTS)
+ $(SPU_CC) -o $(PROG_SPU) $(SPU_OBJECTS) $(SPU_LFLAGS)
+
+
+
+asmfiles: $(SPU_ASM_OUT)
+
+
+clean:
+ rm -f *~ *.o *.a *.d *.s $(PROG_SPU)
+
+
+
+depend: $(SOURCES)
+ rm -f depend
+ touch depend
+ $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null
+
+include depend
+
diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h
new file mode 100644
index 0000000000..d7ce005524
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_colorpack.h
@@ -0,0 +1,145 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+
+#ifndef SPU_COLORPACK_H
+#define SPU_COLORPACK_H
+
+
+#include <transpose_matrix4x4.h>
+#include <spu_intrinsics.h>
+
+
+static INLINE unsigned int
+spu_pack_R8G8B8A8(vector float rgba)
+{
+ vector unsigned int out = spu_convtu(rgba, 32);
+
+ out = spu_shuffle(out, out, ((vector unsigned char) {
+ 0, 4, 8, 12, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 }) );
+
+ return spu_extract(out, 0);
+}
+
+
+static INLINE unsigned int
+spu_pack_A8R8G8B8(vector float rgba)
+{
+ vector unsigned int out = spu_convtu(rgba, 32);
+ out = spu_shuffle(out, out, ((vector unsigned char) {
+ 12, 0, 4, 8, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0}) );
+ return spu_extract(out, 0);
+}
+
+
+static INLINE unsigned int
+spu_pack_B8G8R8A8(vector float rgba)
+{
+ vector unsigned int out = spu_convtu(rgba, 32);
+ out = spu_shuffle(out, out, ((vector unsigned char) {
+ 8, 4, 0, 12, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0}) );
+ return spu_extract(out, 0);
+}
+
+
+static INLINE unsigned int
+spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle)
+{
+ vector unsigned int out = spu_convtu(rgba, 32);
+ out = spu_shuffle(out, out, shuffle);
+ return spu_extract(out, 0);
+}
+
+
+static INLINE vector float
+spu_unpack_B8G8R8A8(uint color)
+{
+ vector unsigned int color_u4 = spu_splats(color);
+ color_u4 = spu_shuffle(color_u4, color_u4,
+ ((vector unsigned char) {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 3, 3, 3, 3}) );
+ return spu_convtf(color_u4, 32);
+}
+
+
+static INLINE vector float
+spu_unpack_A8R8G8B8(uint color)
+{
+ vector unsigned int color_u4 = spu_splats(color);
+ color_u4 = spu_shuffle(color_u4, color_u4,
+ ((vector unsigned char) {
+ 1, 1, 1, 1,
+ 2, 2, 2, 2,
+ 3, 3, 3, 3,
+ 0, 0, 0, 0}) );
+ return spu_convtf(color_u4, 32);
+}
+
+
+/**
+ * \param color_in - array of 32-bit packed ARGB colors
+ * \param color_out - returns float colors in RRRR, GGGG, BBBB, AAAA order
+ */
+static INLINE void
+spu_unpack_A8R8G8B8_transpose4(const vector unsigned int color_in[4],
+ vector float color_out[4])
+{
+ vector unsigned int c0;
+
+ c0 = spu_shuffle(color_in[0], color_in[0],
+ ((vector unsigned char) {
+ 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
+ color_out[0] = spu_convtf(c0, 32);
+
+ c0 = spu_shuffle(color_in[1], color_in[1],
+ ((vector unsigned char) {
+ 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
+ color_out[1] = spu_convtf(c0, 32);
+
+ c0 = spu_shuffle(color_in[2], color_in[2],
+ ((vector unsigned char) {
+ 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
+ color_out[2] = spu_convtf(c0, 32);
+
+ c0 = spu_shuffle(color_in[3], color_in[3],
+ ((vector unsigned char) {
+ 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
+ color_out[3] = spu_convtf(c0, 32);
+
+ _transpose_matrix4x4(color_out, color_out);
+}
+
+
+
+#endif /* SPU_COLORPACK_H */
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c
new file mode 100644
index 0000000000..5c0179d954
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_command.c
@@ -0,0 +1,815 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * SPU command processing code
+ */
+
+
+#include <stdio.h>
+#include <libmisc.h>
+
+#include "pipe/p_defines.h"
+
+#include "spu_command.h"
+#include "spu_main.h"
+#include "spu_render.h"
+#include "spu_per_fragment_op.h"
+#include "spu_texture.h"
+#include "spu_tile.h"
+#include "spu_vertex_shader.h"
+#include "spu_dcache.h"
+#include "cell/common.h"
+
+
+struct spu_vs_context draw;
+
+
+/**
+ * Buffers containing dynamically generated SPU code:
+ */
+static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
+ ALIGN16_ATTRIB;
+
+
+
+static INLINE int
+align(int value, int alignment)
+{
+ return (value + alignment - 1) & ~(alignment - 1);
+}
+
+
+
+/**
+ * Tell the PPU that this SPU has finished copying a buffer to
+ * local store and that it may be reused by the PPU.
+ * This is done by writting a 16-byte batch-buffer-status block back into
+ * main memory (in cell_context->buffer_status[]).
+ */
+static void
+release_buffer(uint buffer)
+{
+ /* Evidently, using less than a 16-byte status doesn't work reliably */
+ static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE,
+ CELL_BUFFER_STATUS_FREE,
+ CELL_BUFFER_STATUS_FREE,
+ CELL_BUFFER_STATUS_FREE};
+ const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
+ uint *dst = spu.init.buffer_status + index;
+
+ ASSERT(buffer < CELL_NUM_BUFFERS);
+
+ mfc_put((void *) &status, /* src in local memory */
+ (unsigned int) dst, /* dst in main memory */
+ sizeof(status), /* size */
+ TAG_MISC, /* tag is unimportant */
+ 0, /* tid */
+ 0 /* rid */);
+}
+
+
+/**
+ * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory.
+ * There's a qword of status per SPU.
+ */
+static void
+cmd_fence(struct cell_command_fence *fence_cmd)
+{
+ static const vector unsigned int status = {CELL_FENCE_SIGNALLED,
+ CELL_FENCE_SIGNALLED,
+ CELL_FENCE_SIGNALLED,
+ CELL_FENCE_SIGNALLED};
+ uint *dst = (uint *) fence_cmd->fence;
+ dst += 4 * spu.init.id; /* main store/memory address, not local store */
+ ASSERT_ALIGN16(dst);
+ mfc_put((void *) &status, /* src in local memory */
+ (unsigned int) dst, /* dst in main memory */
+ sizeof(status), /* size */
+ TAG_FENCE, /* tag */
+ 0, /* tid */
+ 0 /* rid */);
+}
+
+
+static void
+cmd_clear_surface(const struct cell_command_clear_surface *clear)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value);
+
+ if (clear->surface == 0) {
+ spu.fb.color_clear_value = clear->value;
+ if (spu.init.debug_flags & CELL_DEBUG_CHECKER) {
+ uint x = (spu.init.id << 4) | (spu.init.id << 12) |
+ (spu.init.id << 20) | (spu.init.id << 28);
+ spu.fb.color_clear_value ^= x;
+ }
+ }
+ else {
+ spu.fb.depth_clear_value = clear->value;
+ }
+
+#define CLEAR_OPT 1
+#if CLEAR_OPT
+
+ /* Simply set all tiles' status to CLEAR.
+ * When we actually begin rendering into a tile, we'll initialize it to
+ * the clear value. If any tiles go untouched during the frame,
+ * really_clear_tiles() will set them to the clear value.
+ */
+ if (clear->surface == 0) {
+ memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
+ }
+ else {
+ memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
+ }
+
+#else
+
+ /*
+ * This path clears the whole framebuffer to the clear color right now.
+ */
+
+ /*
+ printf("SPU: %s num=%d w=%d h=%d\n",
+ __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
+ */
+
+ /* init a single tile to the clear value */
+ if (clear->surface == 0) {
+ clear_c_tile(&spu.ctile);
+ }
+ else {
+ clear_z_tile(&spu.ztile);
+ }
+
+ /* walk over my tiles, writing the 'clear' tile's data */
+ {
+ const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
+ uint i;
+ for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
+ uint tx = i % spu.fb.width_tiles;
+ uint ty = i / spu.fb.width_tiles;
+ if (clear->surface == 0)
+ put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
+ else
+ put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
+ }
+ }
+
+ if (spu.init.debug_flags & CELL_DEBUG_SYNC) {
+ wait_on_mask(1 << TAG_SURFACE_CLEAR);
+ }
+
+#endif /* CLEAR_OPT */
+
+ D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n");
+}
+
+
+static void
+cmd_release_verts(const struct cell_command_release_verts *release)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf);
+ ASSERT(release->vertex_buf != ~0U);
+ release_buffer(release->vertex_buf);
+}
+
+
+/**
+ * Process a CELL_CMD_STATE_FRAGMENT_OPS command.
+ * This involves installing new fragment ops SPU code.
+ * If this function is never called, we'll use a regular C fallback function
+ * for fragment processing.
+ */
+static void
+cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n");
+
+ /* Copy state info (for fallback case only - this will eventually
+ * go away when the fallback case goes away)
+ */
+ memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
+ memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
+ memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color));
+
+ /* Make sure the SPU knows which buffers it's expected to read when
+ * it's told to pull tiles.
+ */
+ spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled);
+
+ /* If we're forcing the fallback code to be used (for debug purposes),
+ * install that. Otherwise install the incoming SPU code.
+ */
+ if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) {
+ static unsigned int warned = 0;
+ if (!warned) {
+ fprintf(stderr, "Cell Warning: using fallback per-fragment code\n");
+ warned = 1;
+ }
+ /* The following two lines aren't really necessary if you
+ * know the debug flags won't change during a run, and if you
+ * know that the function pointers are initialized correctly.
+ * We set them here to allow a person to change the debug
+ * flags during a run (from inside a debugger).
+ */
+ spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
+ return;
+ }
+
+ /* Make sure the SPU code buffer is large enough to hold the incoming code.
+ * Note that we *don't* use align_malloc() and align_free(), because
+ * those utility functions are *not* available in SPU code.
+ * */
+ if (spu.fragment_ops_code_size < fops->total_code_size) {
+ if (spu.fragment_ops_code != NULL) {
+ free(spu.fragment_ops_code);
+ }
+ spu.fragment_ops_code_size = fops->total_code_size;
+ spu.fragment_ops_code = malloc(fops->total_code_size);
+ if (spu.fragment_ops_code == NULL) {
+ /* Whoops. */
+ fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size);
+ spu.fragment_ops_code = NULL;
+ spu.fragment_ops_code_size = 0;
+ spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
+ return;
+ }
+ }
+
+ /* Copy the SPU code from the command buffer to the spu buffer */
+ memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size);
+
+ /* Set the pointers for the front-facing and back-facing fragments
+ * to the specified offsets within the code. Note that if the
+ * front-facing and back-facing code are the same, they'll have
+ * the same offset.
+ */
+ spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index];
+ spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index];
+}
+
+static void
+cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n");
+ /* Copy SPU code from batch buffer to spu buffer */
+ memcpy(spu.fragment_program_code, fp->code,
+ SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
+#if 01
+ /* Point function pointer at new code */
+ spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
+#endif
+}
+
+
+static uint
+cmd_state_fs_constants(const qword *buffer, uint pos)
+{
+ const uint num_const = spu_extract((vector unsigned int)buffer[pos+1], 0);
+ const float *constants = (const float *) &buffer[pos+2];
+ uint i;
+
+ D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const);
+
+ /* Expand each float to float[4] for SOA execution */
+ for (i = 0; i < num_const; i++) {
+ D_PRINTF(CELL_DEBUG_CMD, " const[%u] = %f\n", i, constants[i]);
+ spu.constants[i] = spu_splats(constants[i]);
+ }
+
+ /* return new buffer pos (in 16-byte words) */
+ return pos + 2 + (ROUNDUP16(num_const * sizeof(float)) / 16);
+}
+
+
+static void
+cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
+ cmd->width,
+ cmd->height,
+ cmd->color_start,
+ cmd->color_format,
+ cmd->depth_format);
+
+ ASSERT_ALIGN16(cmd->color_start);
+ ASSERT_ALIGN16(cmd->depth_start);
+
+ spu.fb.color_start = cmd->color_start;
+ spu.fb.depth_start = cmd->depth_start;
+ spu.fb.color_format = cmd->color_format;
+ spu.fb.depth_format = cmd->depth_format;
+ spu.fb.width = cmd->width;
+ spu.fb.height = cmd->height;
+ spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE;
+ spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE;
+
+ switch (spu.fb.depth_format) {
+ case PIPE_FORMAT_Z32_UNORM:
+ spu.fb.zsize = 4;
+ spu.fb.zscale = (float) 0xffffffffu;
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ spu.fb.zsize = 4;
+ spu.fb.zscale = (float) 0x00ffffffu;
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
+ spu.fb.zsize = 2;
+ spu.fb.zscale = (float) 0xffffu;
+ break;
+ default:
+ spu.fb.zsize = 0;
+ break;
+ }
+}
+
+
+/**
+ * Tex texture mask_s/t and scale_s/t fields depend on the texture size and
+ * sampler wrap modes.
+ */
+static void
+update_tex_masks(struct spu_texture *texture,
+ const struct pipe_sampler_state *sampler)
+{
+ uint i;
+
+ for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
+ int width = texture->level[i].width;
+ int height = texture->level[i].height;
+
+ if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT)
+ texture->level[i].mask_s = spu_splats(width - 1);
+ else
+ texture->level[i].mask_s = spu_splats(~0);
+
+ if (sampler->wrap_t == PIPE_TEX_WRAP_REPEAT)
+ texture->level[i].mask_t = spu_splats(height - 1);
+ else
+ texture->level[i].mask_t = spu_splats(~0);
+
+ if (sampler->normalized_coords) {
+ texture->level[i].scale_s = spu_splats((float) width);
+ texture->level[i].scale_t = spu_splats((float) height);
+ }
+ else {
+ texture->level[i].scale_s = spu_splats(1.0f);
+ texture->level[i].scale_t = spu_splats(1.0f);
+ }
+ }
+}
+
+
+static void
+cmd_state_sampler(const struct cell_command_sampler *sampler)
+{
+ uint unit = sampler->unit;
+
+ D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit);
+
+ spu.sampler[unit] = sampler->state;
+
+ switch (spu.sampler[unit].min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear;
+ break;
+ case PIPE_TEX_FILTER_ANISO:
+ /* fall-through, for now */
+ case PIPE_TEX_FILTER_NEAREST:
+ spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest;
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ switch (spu.sampler[sampler->unit].mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear;
+ break;
+ case PIPE_TEX_FILTER_ANISO:
+ /* fall-through, for now */
+ case PIPE_TEX_FILTER_NEAREST:
+ spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest;
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ switch (spu.sampler[sampler->unit].min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ spu.sample_texture_2d[unit] = sample_texture_2d_lod;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit];
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
+}
+
+
+static void
+cmd_state_texture(const struct cell_command_texture *texture)
+{
+ const uint unit = texture->unit;
+ uint i;
+
+ D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit);
+
+ spu.texture[unit].max_level = 0;
+ spu.texture[unit].target = texture->target;
+
+ for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
+ uint width = texture->width[i];
+ uint height = texture->height[i];
+ uint depth = texture->depth[i];
+
+ D_PRINTF(CELL_DEBUG_CMD, " LEVEL %u: at %p size[0] %u x %u\n", i,
+ texture->start[i], texture->width[i], texture->height[i]);
+
+ spu.texture[unit].level[i].start = texture->start[i];
+ spu.texture[unit].level[i].width = width;
+ spu.texture[unit].level[i].height = height;
+ spu.texture[unit].level[i].depth = depth;
+
+ spu.texture[unit].level[i].tiles_per_row =
+ (width + TILE_SIZE - 1) / TILE_SIZE;
+
+ spu.texture[unit].level[i].bytes_per_image =
+ 4 * align(width, TILE_SIZE) * align(height, TILE_SIZE) * depth;
+
+ spu.texture[unit].level[i].max_s = spu_splats((int) width - 1);
+ spu.texture[unit].level[i].max_t = spu_splats((int) height - 1);
+
+ if (texture->start[i])
+ spu.texture[unit].max_level = i;
+ }
+
+ update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
+}
+
+
+static void
+cmd_state_vertex_info(const struct vertex_info *vinfo)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs);
+ ASSERT(vinfo->num_attribs >= 1);
+ ASSERT(vinfo->num_attribs <= 8);
+ memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo));
+}
+
+
+static void
+cmd_state_vs_array_info(const struct cell_array_info *vs_info)
+{
+ const unsigned attr = vs_info->attr;
+
+ ASSERT(attr < PIPE_MAX_ATTRIBS);
+ draw.vertex_fetch.src_ptr[attr] = vs_info->base;
+ draw.vertex_fetch.pitch[attr] = vs_info->pitch;
+ draw.vertex_fetch.size[attr] = vs_info->size;
+ draw.vertex_fetch.code_offset[attr] = vs_info->function_offset;
+ draw.vertex_fetch.dirty = 1;
+}
+
+
+static void
+cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
+{
+ mfc_get(attribute_fetch_code_buffer,
+ (unsigned int) code->base, /* src */
+ code->size,
+ TAG_BATCH_BUFFER,
+ 0, /* tid */
+ 0 /* rid */);
+ wait_on_mask(1 << TAG_BATCH_BUFFER);
+
+ draw.vertex_fetch.code = attribute_fetch_code_buffer;
+}
+
+
+static void
+cmd_finish(void)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "FINISH\n");
+ really_clear_tiles(0);
+ /* wait for all outstanding DMAs to finish */
+ mfc_write_tag_mask(~0);
+ mfc_read_tag_status_all();
+ /* send mbox message to PPU */
+ spu_write_out_mbox(CELL_CMD_FINISH);
+}
+
+
+/**
+ * Execute a batch of commands which was sent to us by the PPU.
+ * See the cell_emit_state.c code to see where the commands come from.
+ *
+ * The opcode param encodes the location of the buffer and its size.
+ */
+static void
+cmd_batch(uint opcode)
+{
+ const uint buf = (opcode >> 8) & 0xff;
+ uint size = (opcode >> 16);
+ qword buffer[CELL_BUFFER_SIZE / 16] ALIGN16_ATTRIB;
+ const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]);
+ uint pos;
+
+ D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n",
+ buf, size, spu.init.buffers[buf]);
+
+ ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH);
+
+ ASSERT_ALIGN16(spu.init.buffers[buf]);
+
+ size = ROUNDUP16(size);
+
+ ASSERT_ALIGN16(spu.init.buffers[buf]);
+
+ mfc_get(buffer, /* dest */
+ (unsigned int) spu.init.buffers[buf], /* src */
+ size,
+ TAG_BATCH_BUFFER,
+ 0, /* tid */
+ 0 /* rid */);
+ wait_on_mask(1 << TAG_BATCH_BUFFER);
+
+ /* Tell PPU we're done copying the buffer to local store */
+ D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf);
+ release_buffer(buf);
+
+ /*
+ * Loop over commands in the batch buffer
+ */
+ for (pos = 0; pos < usize; /* no incr */) {
+ switch (si_to_uint(buffer[pos])) {
+ /*
+ * rendering commands
+ */
+ case CELL_CMD_CLEAR_SURFACE:
+ {
+ struct cell_command_clear_surface *clr
+ = (struct cell_command_clear_surface *) &buffer[pos];
+ cmd_clear_surface(clr);
+ pos += sizeof(*clr) / 16;
+ }
+ break;
+ case CELL_CMD_RENDER:
+ {
+ struct cell_command_render *render
+ = (struct cell_command_render *) &buffer[pos];
+ uint pos_incr;
+ cmd_render(render, &pos_incr);
+ pos += ((pos_incr+1)&~1) / 2; // should 'fix' cmd_render return
+ }
+ break;
+ /*
+ * state-update commands
+ */
+ case CELL_CMD_STATE_FRAMEBUFFER:
+ {
+ struct cell_command_framebuffer *fb
+ = (struct cell_command_framebuffer *) &buffer[pos];
+ cmd_state_framebuffer(fb);
+ pos += sizeof(*fb) / 16;
+ }
+ break;
+ case CELL_CMD_STATE_FRAGMENT_OPS:
+ {
+ struct cell_command_fragment_ops *fops
+ = (struct cell_command_fragment_ops *) &buffer[pos];
+ cmd_state_fragment_ops(fops);
+ /* This is a variant-sized command */
+ pos += ROUNDUP16(sizeof(*fops) + fops->total_code_size) / 16;
+ }
+ break;
+ case CELL_CMD_STATE_FRAGMENT_PROGRAM:
+ {
+ struct cell_command_fragment_program *fp
+ = (struct cell_command_fragment_program *) &buffer[pos];
+ cmd_state_fragment_program(fp);
+ pos += sizeof(*fp) / 16;
+ }
+ break;
+ case CELL_CMD_STATE_FS_CONSTANTS:
+ pos = cmd_state_fs_constants(buffer, pos);
+ break;
+ case CELL_CMD_STATE_RASTERIZER:
+ {
+ struct cell_command_rasterizer *rast =
+ (struct cell_command_rasterizer *) &buffer[pos];
+ spu.rasterizer = rast->rasterizer;
+ pos += sizeof(*rast) / 16;
+ }
+ break;
+ case CELL_CMD_STATE_SAMPLER:
+ {
+ struct cell_command_sampler *sampler
+ = (struct cell_command_sampler *) &buffer[pos];
+ cmd_state_sampler(sampler);
+ pos += sizeof(*sampler) / 16;
+ }
+ break;
+ case CELL_CMD_STATE_TEXTURE:
+ {
+ struct cell_command_texture *texture
+ = (struct cell_command_texture *) &buffer[pos];
+ cmd_state_texture(texture);
+ pos += sizeof(*texture) / 16;
+ }
+ break;
+ case CELL_CMD_STATE_VERTEX_INFO:
+ cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]);
+ pos += 1 + ROUNDUP16(sizeof(struct vertex_info)) / 16;
+ break;
+ case CELL_CMD_STATE_VIEWPORT:
+ (void) memcpy(& draw.viewport, &buffer[pos+1],
+ sizeof(struct pipe_viewport_state));
+ pos += 1 + ROUNDUP16(sizeof(struct pipe_viewport_state)) / 16;
+ break;
+ case CELL_CMD_STATE_UNIFORMS:
+ draw.constants = (const float (*)[4]) (uintptr_t)spu_extract((vector unsigned int)buffer[pos+1],0);
+ pos += 2;
+ break;
+ case CELL_CMD_STATE_VS_ARRAY_INFO:
+ cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]);
+ pos += 1 + ROUNDUP16(sizeof(struct cell_array_info)) / 16;
+ break;
+ case CELL_CMD_STATE_BIND_VS:
+#if 0
+ spu_bind_vertex_shader(&draw,
+ (struct cell_shader_info *) &buffer[pos+1]);
+#endif
+ pos += 1 + ROUNDUP16(sizeof(struct cell_shader_info)) / 16;
+ break;
+ case CELL_CMD_STATE_ATTRIB_FETCH:
+ cmd_state_attrib_fetch((struct cell_attribute_fetch_code *)
+ &buffer[pos+1]);
+ pos += 1 + ROUNDUP16(sizeof(struct cell_attribute_fetch_code)) / 16;
+ break;
+ /*
+ * misc commands
+ */
+ case CELL_CMD_FINISH:
+ cmd_finish();
+ pos += 1;
+ break;
+ case CELL_CMD_FENCE:
+ {
+ struct cell_command_fence *fence_cmd =
+ (struct cell_command_fence *) &buffer[pos];
+ cmd_fence(fence_cmd);
+ pos += sizeof(*fence_cmd) / 16;
+ }
+ break;
+ case CELL_CMD_RELEASE_VERTS:
+ {
+ struct cell_command_release_verts *release
+ = (struct cell_command_release_verts *) &buffer[pos];
+ cmd_release_verts(release);
+ pos += sizeof(*release) / 16;
+ }
+ break;
+ case CELL_CMD_FLUSH_BUFFER_RANGE: {
+ struct cell_buffer_range *br = (struct cell_buffer_range *)
+ &buffer[pos+1];
+
+ spu_dcache_mark_dirty((unsigned) br->base, br->size);
+ pos += 1 + ROUNDUP16(sizeof(struct cell_buffer_range)) / 16;
+ break;
+ }
+ default:
+ printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, si_to_uint(buffer[pos]));
+ ASSERT(0);
+ break;
+ }
+ }
+
+ D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n");
+}
+
+
+#define PERF 0
+
+
+/**
+ * Main loop for SPEs: Get a command, execute it, repeat.
+ */
+void
+command_loop(void)
+{
+ int exitFlag = 0;
+ uint t0, t1;
+
+ D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n");
+
+ while (!exitFlag) {
+ unsigned opcode;
+
+ D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n");
+
+ if (PERF)
+ spu_write_decrementer(~0);
+
+ /* read/wait from mailbox */
+ opcode = (unsigned int) spu_read_in_mbox();
+ D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode);
+
+ if (PERF)
+ t0 = spu_read_decrementer();
+
+ switch (opcode & CELL_CMD_OPCODE_MASK) {
+ case CELL_CMD_EXIT:
+ D_PRINTF(CELL_DEBUG_CMD, "EXIT\n");
+ exitFlag = 1;
+ break;
+ case CELL_CMD_VS_EXECUTE:
+#if 0
+ spu_execute_vertex_shader(&draw, &cmd.vs);
+#endif
+ break;
+ case CELL_CMD_BATCH:
+ cmd_batch(opcode);
+ break;
+ default:
+ printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK);
+ }
+
+ if (PERF) {
+ t1 = spu_read_decrementer();
+ printf("wait mbox time: %gms batch time: %gms\n",
+ (~0u - t0) * spu.init.inv_timebase,
+ (t0 - t1) * spu.init.inv_timebase);
+ }
+ }
+
+ D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n");
+
+ if (spu.init.debug_flags & CELL_DEBUG_CACHE)
+ spu_dcache_report();
+}
+
+/* Initialize this module; we manage the fragment ops buffer here. */
+void
+spu_command_init(void)
+{
+ /* Install default/fallback fragment processing function.
+ * This will normally be overriden by a code-gen'd function
+ * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set.
+ */
+ spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
+
+ /* Set up the basic empty buffer for code-gen'ed fragment ops */
+ spu.fragment_ops_code = NULL;
+ spu.fragment_ops_code_size = 0;
+}
+
+void
+spu_command_close(void)
+{
+ /* Deallocate the code-gen buffer for fragment ops, and reset the
+ * fragment ops functions to their initial setting (just to leave
+ * things in a good state).
+ */
+ if (spu.fragment_ops_code != NULL) {
+ free(spu.fragment_ops_code);
+ }
+ spu_command_init();
+}
diff --git a/src/gallium/drivers/cell/spu/spu_command.h b/src/gallium/drivers/cell/spu/spu_command.h
new file mode 100644
index 0000000000..83dcdade28
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_command.h
@@ -0,0 +1,35 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+extern void
+command_loop(void);
+
+extern void
+spu_command_init(void);
+
+extern void
+spu_command_close(void);
diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c
new file mode 100644
index 0000000000..a6d67634fd
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_dcache.c
@@ -0,0 +1,145 @@
+/*
+ * (C) Copyright IBM Corporation 2008
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "cell/common.h"
+#include "spu_main.h"
+#include "spu_dcache.h"
+
+#define CACHELINE_LOG2SIZE 7
+#define LINE_SIZE (1U << 7)
+#define ALIGN_MASK (~(LINE_SIZE - 1))
+
+#define CACHE_NAME data
+#define CACHED_TYPE qword
+#define CACHE_TYPE CACHE_TYPE_RO
+#define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0)
+#define CACHE_LOG2NNWAY 2
+#define CACHE_LOG2NSETS 6
+#ifdef DEBUG
+#define CACHE_STATS 1
+#endif
+#include <cache-api.h>
+
+/* Yes folks, this is ugly.
+ */
+#undef CACHE_NWAY
+#undef CACHE_NSETS
+#define CACHE_NAME data
+#define CACHE_NWAY 4
+#define CACHE_NSETS (1U << 6)
+
+
+/**
+ * Fetch between arbitrary number of bytes from an unaligned address
+ *
+ * \param dst Destination data buffer
+ * \param ea Main memory effective address of source data
+ * \param size Number of bytes to read
+ *
+ * \warning
+ * As is hinted by the type of the \c dst pointer, this function writes
+ * multiples of 16-bytes.
+ */
+void
+spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size)
+{
+ const int shift = ea & 0x0f;
+ const unsigned read_size = ROUNDUP16(size + shift);
+ const unsigned last_read = ROUNDUP16(ea + size);
+ const qword *const last_write = dst + (ROUNDUP16(size) / 16);
+ unsigned i;
+
+
+ if (shift == 0) {
+ /* Data is already aligned. Fetch directly into the destination buffer.
+ */
+ for (i = 0; i < size; i += 16) {
+ *(dst++) = cache_rd(data, ea + i);
+ }
+ } else {
+ qword hi;
+
+
+ /* Please exercise extreme caution when modifying this code. This code
+ * must not read past the end of the page containing the source data,
+ * and it must not write more than ((size + 15) / 16) qwords to the
+ * destination buffer.
+ */
+ ea &= ~0x0f;
+ hi = cache_rd(data, ea);
+ for (i = 16; i < read_size; i += 16) {
+ qword lo = cache_rd(data, ea + i);
+
+ *(dst++) = si_or((qword) spu_slqwbyte(hi, shift),
+ (qword) spu_rlmaskqwbyte(lo, shift - 16));
+ hi = lo;
+ }
+
+ if (dst != last_write) {
+ *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), si_il(0));
+ }
+ }
+
+ ASSERT((ea + i) == last_read);
+ ASSERT(dst == last_write);
+}
+
+
+/**
+ * Notify the cache that a range of main memory may have been modified
+ */
+void
+spu_dcache_mark_dirty(unsigned ea, unsigned size)
+{
+ unsigned i;
+ const unsigned aligned_start = (ea & ALIGN_MASK);
+ const unsigned aligned_end = (ea + size + (LINE_SIZE - 1))
+ & ALIGN_MASK;
+
+
+ for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) {
+ const unsigned entry = __cache_dir[i];
+ const unsigned addr = entry & ~0x0f;
+
+ __cache_dir[i] = ((addr >= aligned_start) && (addr < aligned_end))
+ ? (entry & ~CACHELINE_VALID) : entry;
+ }
+}
+
+
+/**
+ * Print cache utilization report
+ */
+void
+spu_dcache_report(void)
+{
+#ifdef CACHE_STATS
+ if (spu.init.id == 0) {
+ printf("SPU 0: Texture cache report:\n");
+ cache_pr_stats(data);
+ }
+#endif
+}
+
+
diff --git a/src/gallium/drivers/cell/spu/spu_dcache.h b/src/gallium/drivers/cell/spu/spu_dcache.h
new file mode 100644
index 0000000000..39a19eb31b
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_dcache.h
@@ -0,0 +1,37 @@
+/*
+ * (C) Copyright IBM Corporation 2008
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SPU_DCACHE_H
+#define SPU_DCACHE_H
+
+extern void
+spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size);
+
+extern void
+spu_dcache_mark_dirty(unsigned ea, unsigned size);
+
+extern void
+spu_dcache_report(void);
+
+#endif /* SPU_DCACHE_H */
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
new file mode 100644
index 0000000000..e27df2dfb3
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -0,0 +1,1933 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * TGSI interpretor/executor.
+ *
+ * Flow control information:
+ *
+ * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
+ * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
+ * care since a condition may be true for some quad components but false
+ * for other components.
+ *
+ * We basically execute all statements (even if they're in the part of
+ * an IF/ELSE clause that's "not taken") and use a special mask to
+ * control writing to destination registers. This is the ExecMask.
+ * See store_dest().
+ *
+ * The ExecMask is computed from three other masks (CondMask, LoopMask and
+ * ContMask) which are controlled by the flow control instructions (namely:
+ * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
+ *
+ *
+ * Authors:
+ * Michal Krol
+ * Brian Paul
+ */
+
+#include <transpose_matrix4x4.h>
+#include <simdmath/ceilf4.h>
+#include <simdmath/cosf4.h>
+#include <simdmath/divf4.h>
+#include <simdmath/floorf4.h>
+#include <simdmath/log2f4.h>
+#include <simdmath/powf4.h>
+#include <simdmath/sinf4.h>
+#include <simdmath/sqrtf4.h>
+#include <simdmath/truncf4.h>
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "spu_exec.h"
+#include "spu_main.h"
+#include "spu_vertex_shader.h"
+#include "spu_dcache.h"
+#include "cell/common.h"
+
+#define TILE_TOP_LEFT 0
+#define TILE_TOP_RIGHT 1
+#define TILE_BOTTOM_LEFT 2
+#define TILE_BOTTOM_RIGHT 3
+
+/*
+ * Shorthand locations of various utility registers (_I = Index, _C = Channel)
+ */
+#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
+#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
+#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
+#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
+#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
+#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
+#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
+#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
+#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
+#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
+#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
+#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
+#define TEMP_128_I TGSI_EXEC_TEMP_128_I
+#define TEMP_128_C TGSI_EXEC_TEMP_128_C
+#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
+#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
+#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
+#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
+#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
+#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
+#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
+#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
+#define TEMP_R0 TGSI_EXEC_TEMP_R0
+
+#define FOR_EACH_CHANNEL(CHAN)\
+ for (CHAN = 0; CHAN < 4; CHAN++)
+
+#define IS_CHANNEL_ENABLED(INST, CHAN)\
+ ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+
+#define IS_CHANNEL_ENABLED2(INST, CHAN)\
+ ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
+
+#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
+ FOR_EACH_CHANNEL( CHAN )\
+ if (IS_CHANNEL_ENABLED( INST, CHAN ))
+
+#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
+ FOR_EACH_CHANNEL( CHAN )\
+ if (IS_CHANNEL_ENABLED2( INST, CHAN ))
+
+
+/** The execution mask depends on the conditional mask and the loop mask */
+#define UPDATE_EXEC_MASK(MACH) \
+ MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
+
+
+#define CHAN_X 0
+#define CHAN_Y 1
+#define CHAN_Z 2
+#define CHAN_W 3
+
+
+
+/**
+ * Initialize machine state by expanding tokens to full instructions,
+ * allocating temporary storage, setting up constants, etc.
+ * After this, we can call spu_exec_machine_run() many times.
+ */
+void
+spu_exec_machine_init(struct spu_exec_machine *mach,
+ uint numSamplers,
+ struct spu_sampler *samplers,
+ unsigned processor)
+{
+ const qword zero = si_il(0);
+ const qword not_zero = si_il(~0);
+
+ (void) numSamplers;
+ mach->Samplers = samplers;
+ mach->Processor = processor;
+ mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
+
+ /* Setup constants. */
+ mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero;
+ mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero;
+ mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1);
+ mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31);
+
+ mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f);
+ mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f);
+ mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f);
+ mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f);
+}
+
+
+static INLINE qword
+micro_abs(qword src)
+{
+ return si_rotmi(si_shli(src, 1), -1);
+}
+
+static INLINE qword
+micro_ceil(qword src)
+{
+ return (qword) _ceilf4((vec_float4) src);
+}
+
+static INLINE qword
+micro_cos(qword src)
+{
+ return (qword) _cosf4((vec_float4) src);
+}
+
+static const qword br_shuf = {
+ TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
+ TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
+ TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
+ TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
+ TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
+ TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
+ TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
+ TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
+};
+
+static const qword bl_shuf = {
+ TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
+ TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
+ TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
+ TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
+ TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
+ TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
+ TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
+ TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
+};
+
+static const qword tl_shuf = {
+ TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
+ TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
+ TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
+ TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
+ TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
+ TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
+ TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
+ TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
+};
+
+static qword
+micro_ddx(qword src)
+{
+ qword bottom_right = si_shufb(src, src, br_shuf);
+ qword bottom_left = si_shufb(src, src, bl_shuf);
+
+ return si_fs(bottom_right, bottom_left);
+}
+
+static qword
+micro_ddy(qword src)
+{
+ qword top_left = si_shufb(src, src, tl_shuf);
+ qword bottom_left = si_shufb(src, src, bl_shuf);
+
+ return si_fs(top_left, bottom_left);
+}
+
+static INLINE qword
+micro_div(qword src0, qword src1)
+{
+ return (qword) _divf4((vec_float4) src0, (vec_float4) src1);
+}
+
+static qword
+micro_flr(qword src)
+{
+ return (qword) _floorf4((vec_float4) src);
+}
+
+static qword
+micro_frc(qword src)
+{
+ return si_fs(src, (qword) _floorf4((vec_float4) src));
+}
+
+static INLINE qword
+micro_ge(qword src0, qword src1)
+{
+ return si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
+}
+
+static qword
+micro_lg2(qword src)
+{
+ return (qword) _log2f4((vec_float4) src);
+}
+
+static INLINE qword
+micro_lt(qword src0, qword src1)
+{
+ const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
+
+ return si_xori(tmp, 0xff);
+}
+
+static INLINE qword
+micro_max(qword src0, qword src1)
+{
+ return si_selb(src1, src0, si_fcgt(src0, src1));
+}
+
+static INLINE qword
+micro_min(qword src0, qword src1)
+{
+ return si_selb(src0, src1, si_fcgt(src0, src1));
+}
+
+static qword
+micro_neg(qword src)
+{
+ return si_xor(src, (qword) spu_splats(0x80000000));
+}
+
+static qword
+micro_set_sign(qword src)
+{
+ return si_or(src, (qword) spu_splats(0x80000000));
+}
+
+static qword
+micro_pow(qword src0, qword src1)
+{
+ return (qword) _powf4((vec_float4) src0, (vec_float4) src1);
+}
+
+static qword
+micro_rnd(qword src)
+{
+ const qword half = (qword) spu_splats(0.5f);
+
+ /* May be able to use _roundf4. There may be some difference, though.
+ */
+ return (qword) _floorf4((vec_float4) si_fa(src, half));
+}
+
+static INLINE qword
+micro_ishr(qword src0, qword src1)
+{
+ return si_rotma(src0, si_sfi(src1, 0));
+}
+
+static qword
+micro_trunc(qword src)
+{
+ return (qword) _truncf4((vec_float4) src);
+}
+
+static qword
+micro_sin(qword src)
+{
+ return (qword) _sinf4((vec_float4) src);
+}
+
+static INLINE qword
+micro_sqrt(qword src)
+{
+ return (qword) _sqrtf4((vec_float4) src);
+}
+
+static void
+fetch_src_file_channel(
+ const struct spu_exec_machine *mach,
+ const uint file,
+ const uint swizzle,
+ const union spu_exec_channel *index,
+ union spu_exec_channel *chan )
+{
+ switch( swizzle ) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ switch( file ) {
+ case TGSI_FILE_CONSTANT: {
+ unsigned i;
+
+ for (i = 0; i < 4; i++) {
+ const float *ptr = mach->Consts[index->i[i]];
+ float tmp[4];
+
+ spu_dcache_fetch_unaligned((qword *) tmp,
+ (uintptr_t)(ptr + swizzle),
+ sizeof(float));
+
+ chan->f[i] = tmp[0];
+ }
+ break;
+ }
+
+ case TGSI_FILE_INPUT:
+ chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
+ chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
+ chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
+ chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
+ chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
+ chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
+ chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
+ break;
+
+ case TGSI_FILE_IMMEDIATE:
+ ASSERT( index->i[0] < (int) mach->ImmLimit );
+ ASSERT( index->i[1] < (int) mach->ImmLimit );
+ ASSERT( index->i[2] < (int) mach->ImmLimit );
+ ASSERT( index->i[3] < (int) mach->ImmLimit );
+
+ chan->f[0] = mach->Imms[index->i[0]][swizzle];
+ chan->f[1] = mach->Imms[index->i[1]][swizzle];
+ chan->f[2] = mach->Imms[index->i[2]][swizzle];
+ chan->f[3] = mach->Imms[index->i[3]][swizzle];
+ break;
+
+ case TGSI_FILE_ADDRESS:
+ chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
+ chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
+ chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
+ chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
+ break;
+
+ case TGSI_FILE_OUTPUT:
+ /* vertex/fragment output vars can be read too */
+ chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
+ chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
+ chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
+ chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
+ break;
+
+ default:
+ ASSERT( 0 );
+ }
+ break;
+
+ case TGSI_EXTSWIZZLE_ZERO:
+ *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
+ break;
+
+ case TGSI_EXTSWIZZLE_ONE:
+ *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
+ break;
+
+ default:
+ ASSERT( 0 );
+ }
+}
+
+static void
+fetch_source(
+ const struct spu_exec_machine *mach,
+ union spu_exec_channel *chan,
+ const struct tgsi_full_src_register *reg,
+ const uint chan_index )
+{
+ union spu_exec_channel index;
+ uint swizzle;
+
+ index.i[0] =
+ index.i[1] =
+ index.i[2] =
+ index.i[3] = reg->SrcRegister.Index;
+
+ if (reg->SrcRegister.Indirect) {
+ union spu_exec_channel index2;
+ union spu_exec_channel indir_index;
+
+ index2.i[0] =
+ index2.i[1] =
+ index2.i[2] =
+ index2.i[3] = reg->SrcRegisterInd.Index;
+
+ swizzle = tgsi_util_get_src_register_swizzle(&reg->SrcRegisterInd,
+ CHAN_X);
+ fetch_src_file_channel(
+ mach,
+ reg->SrcRegisterInd.File,
+ swizzle,
+ &index2,
+ &indir_index );
+
+ index.q = si_a(index.q, indir_index.q);
+ }
+
+ if( reg->SrcRegister.Dimension ) {
+ switch( reg->SrcRegister.File ) {
+ case TGSI_FILE_INPUT:
+ index.q = si_mpyi(index.q, 17);
+ break;
+ case TGSI_FILE_CONSTANT:
+ index.q = si_shli(index.q, 12);
+ break;
+ default:
+ ASSERT( 0 );
+ }
+
+ index.i[0] += reg->SrcRegisterDim.Index;
+ index.i[1] += reg->SrcRegisterDim.Index;
+ index.i[2] += reg->SrcRegisterDim.Index;
+ index.i[3] += reg->SrcRegisterDim.Index;
+
+ if (reg->SrcRegisterDim.Indirect) {
+ union spu_exec_channel index2;
+ union spu_exec_channel indir_index;
+
+ index2.i[0] =
+ index2.i[1] =
+ index2.i[2] =
+ index2.i[3] = reg->SrcRegisterDimInd.Index;
+
+ swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
+ fetch_src_file_channel(
+ mach,
+ reg->SrcRegisterDimInd.File,
+ swizzle,
+ &index2,
+ &indir_index );
+
+ index.q = si_a(index.q, indir_index.q);
+ }
+ }
+
+ swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+ fetch_src_file_channel(
+ mach,
+ reg->SrcRegister.File,
+ swizzle,
+ &index,
+ chan );
+
+ switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
+ case TGSI_UTIL_SIGN_CLEAR:
+ chan->q = micro_abs(chan->q);
+ break;
+
+ case TGSI_UTIL_SIGN_SET:
+ chan->q = micro_set_sign(chan->q);
+ break;
+
+ case TGSI_UTIL_SIGN_TOGGLE:
+ chan->q = micro_neg(chan->q);
+ break;
+
+ case TGSI_UTIL_SIGN_KEEP:
+ break;
+ }
+
+ if (reg->SrcRegisterExtMod.Complement) {
+ chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q);
+ }
+}
+
+static void
+store_dest(
+ struct spu_exec_machine *mach,
+ const union spu_exec_channel *chan,
+ const struct tgsi_full_dst_register *reg,
+ const struct tgsi_full_instruction *inst,
+ uint chan_index )
+{
+ union spu_exec_channel *dst;
+
+ switch( reg->DstRegister.File ) {
+ case TGSI_FILE_NULL:
+ return;
+
+ case TGSI_FILE_OUTPUT:
+ dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
+ + reg->DstRegister.Index].xyzw[chan_index];
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
+ break;
+
+ case TGSI_FILE_ADDRESS:
+ dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
+ break;
+
+ default:
+ ASSERT( 0 );
+ return;
+ }
+
+ switch (inst->Instruction.Saturate)
+ {
+ case TGSI_SAT_NONE:
+ if (mach->ExecMask & 0x1)
+ dst->i[0] = chan->i[0];
+ if (mach->ExecMask & 0x2)
+ dst->i[1] = chan->i[1];
+ if (mach->ExecMask & 0x4)
+ dst->i[2] = chan->i[2];
+ if (mach->ExecMask & 0x8)
+ dst->i[3] = chan->i[3];
+ break;
+
+ case TGSI_SAT_ZERO_ONE:
+ /* XXX need to obey ExecMask here */
+ dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
+ dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q);
+ break;
+
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ ASSERT( 0 );
+ break;
+
+ default:
+ ASSERT( 0 );
+ }
+}
+
+#define FETCH(VAL,INDEX,CHAN)\
+ fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
+
+#define STORE(VAL,INDEX,CHAN)\
+ store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
+
+
+/**
+ * Execute ARB-style KIL which is predicated by a src register.
+ * Kill fragment if any of the four values is less than zero.
+ */
+static void
+exec_kil(struct spu_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ uint uniquemask;
+ uint chan_index;
+ uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
+ union spu_exec_channel r[1];
+
+ /* This mask stores component bits that were already tested. Note that
+ * we test if the value is less than zero, so 1.0 and 0.0 need not to be
+ * tested. */
+ uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
+
+ for (chan_index = 0; chan_index < 4; chan_index++)
+ {
+ uint swizzle;
+ uint i;
+
+ /* unswizzle channel */
+ swizzle = tgsi_util_get_full_src_register_extswizzle (
+ &inst->FullSrcRegisters[0],
+ chan_index);
+
+ /* check if the component has not been already tested */
+ if (uniquemask & (1 << swizzle))
+ continue;
+ uniquemask |= 1 << swizzle;
+
+ FETCH(&r[0], 0, chan_index);
+ for (i = 0; i < 4; i++)
+ if (r[0].f[i] < 0.0f)
+ kilmask |= 1 << i;
+ }
+
+ mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
+}
+
+/**
+ * Execute NVIDIA-style KIL which is predicated by a condition code.
+ * Kill fragment if the condition code is TRUE.
+ */
+static void
+exec_kilp(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
+
+ /* TODO: build kilmask from CC mask */
+
+ mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
+}
+
+/*
+ * Fetch a texel using STR texture coordinates.
+ */
+static void
+fetch_texel( struct spu_sampler *sampler,
+ const union spu_exec_channel *s,
+ const union spu_exec_channel *t,
+ const union spu_exec_channel *p,
+ float lodbias, /* XXX should be float[4] */
+ union spu_exec_channel *r,
+ union spu_exec_channel *g,
+ union spu_exec_channel *b,
+ union spu_exec_channel *a )
+{
+ qword rgba[4];
+ qword out[4];
+
+ sampler->get_samples(sampler, s->f, t->f, p->f, lodbias,
+ (float (*)[4]) rgba);
+
+ _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba);
+ r->q = out[0];
+ g->q = out[1];
+ b->q = out[2];
+ a->q = out[3];
+}
+
+
+static void
+exec_tex(struct spu_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ boolean biasLod, boolean projected)
+{
+ const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+ union spu_exec_channel r[8];
+ uint chan_index;
+ float lodBias;
+
+ /* printf("Sampler %u unit %u\n", sampler, unit); */
+
+ switch (inst->InstructionExtTexture.Texture) {
+ case TGSI_TEXTURE_1D:
+
+ FETCH(&r[0], 0, CHAN_X);
+
+ if (projected) {
+ FETCH(&r[1], 0, CHAN_W);
+ r[0].q = micro_div(r[0].q, r[1].q);
+ }
+
+ if (biasLod) {
+ FETCH(&r[1], 0, CHAN_W);
+ lodBias = r[2].f[0];
+ }
+ else
+ lodBias = 0.0;
+
+ fetch_texel(&mach->Samplers[unit],
+ &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
+ &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
+ break;
+
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 0, CHAN_Z);
+
+ if (projected) {
+ FETCH(&r[3], 0, CHAN_W);
+ r[0].q = micro_div(r[0].q, r[3].q);
+ r[1].q = micro_div(r[1].q, r[3].q);
+ r[2].q = micro_div(r[2].q, r[3].q);
+ }
+
+ if (biasLod) {
+ FETCH(&r[3], 0, CHAN_W);
+ lodBias = r[3].f[0];
+ }
+ else
+ lodBias = 0.0;
+
+ fetch_texel(&mach->Samplers[unit],
+ &r[0], &r[1], &r[2], lodBias, /* inputs */
+ &r[0], &r[1], &r[2], &r[3]); /* outputs */
+ break;
+
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 0, CHAN_Z);
+
+ if (projected) {
+ FETCH(&r[3], 0, CHAN_W);
+ r[0].q = micro_div(r[0].q, r[3].q);
+ r[1].q = micro_div(r[1].q, r[3].q);
+ r[2].q = micro_div(r[2].q, r[3].q);
+ }
+
+ if (biasLod) {
+ FETCH(&r[3], 0, CHAN_W);
+ lodBias = r[3].f[0];
+ }
+ else
+ lodBias = 0.0;
+
+ fetch_texel(&mach->Samplers[unit],
+ &r[0], &r[1], &r[2], lodBias,
+ &r[0], &r[1], &r[2], &r[3]);
+ break;
+
+ default:
+ ASSERT (0);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[chan_index], 0, chan_index );
+ }
+}
+
+
+
+static void
+constant_interpolation(
+ struct spu_exec_machine *mach,
+ unsigned attrib,
+ unsigned chan )
+{
+ unsigned i;
+
+ for( i = 0; i < QUAD_SIZE; i++ ) {
+ mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
+ }
+}
+
+static void
+linear_interpolation(
+ struct spu_exec_machine *mach,
+ unsigned attrib,
+ unsigned chan )
+{
+ const float x = mach->QuadPos.xyzw[0].f[0];
+ const float y = mach->QuadPos.xyzw[1].f[0];
+ const float dadx = mach->InterpCoefs[attrib].dadx[chan];
+ const float dady = mach->InterpCoefs[attrib].dady[chan];
+ const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
+ mach->Inputs[attrib].xyzw[chan].f[0] = a0;
+ mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
+ mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
+ mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
+}
+
+static void
+perspective_interpolation(
+ struct spu_exec_machine *mach,
+ unsigned attrib,
+ unsigned chan )
+{
+ const float x = mach->QuadPos.xyzw[0].f[0];
+ const float y = mach->QuadPos.xyzw[1].f[0];
+ const float dadx = mach->InterpCoefs[attrib].dadx[chan];
+ const float dady = mach->InterpCoefs[attrib].dady[chan];
+ const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
+ const float *w = mach->QuadPos.xyzw[3].f;
+ /* divide by W here */
+ mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
+ mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
+ mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
+ mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
+}
+
+
+typedef void (* interpolation_func)(
+ struct spu_exec_machine *mach,
+ unsigned attrib,
+ unsigned chan );
+
+static void
+exec_declaration(struct spu_exec_machine *mach,
+ const struct tgsi_full_declaration *decl)
+{
+ if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
+ if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+ unsigned first, last, mask;
+ interpolation_func interp;
+
+ first = decl->DeclarationRange.First;
+ last = decl->DeclarationRange.Last;
+ mask = decl->Declaration.UsageMask;
+
+ switch( decl->Declaration.Interpolate ) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ interp = constant_interpolation;
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ interp = linear_interpolation;
+ break;
+
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ interp = perspective_interpolation;
+ break;
+
+ default:
+ ASSERT( 0 );
+ }
+
+ if( mask == TGSI_WRITEMASK_XYZW ) {
+ unsigned i, j;
+
+ for( i = first; i <= last; i++ ) {
+ for( j = 0; j < NUM_CHANNELS; j++ ) {
+ interp( mach, i, j );
+ }
+ }
+ }
+ else {
+ unsigned i, j;
+
+ for( j = 0; j < NUM_CHANNELS; j++ ) {
+ if( mask & (1 << j) ) {
+ for( i = first; i <= last; i++ ) {
+ interp( mach, i, j );
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static void
+exec_instruction(
+ struct spu_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ int *pc )
+{
+ uint chan_index;
+ union spu_exec_channel r[8];
+
+ (*pc)++;
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ARL:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = si_cflts(r[0].q, 0);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_SWZ:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_LIT:
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ FETCH( &r[0], 0, CHAN_X );
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
+ STORE( &r[0], 0, CHAN_Y );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ FETCH( &r[1], 0, CHAN_Y );
+ r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
+
+ FETCH( &r[2], 0, CHAN_W );
+ r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q);
+ r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q);
+ r[1].q = micro_pow(r[1].q, r[2].q);
+
+ /* r0 = (r0 > 0.0) ? r1 : 0.0
+ */
+ r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
+ r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q,
+ r[0].q);
+ STORE( &r[0], 0, CHAN_Z );
+ }
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_RCP:
+ /* TGSI_OPCODE_RECIP */
+ FETCH( &r[0], 0, CHAN_X );
+ r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_RSQ:
+ /* TGSI_OPCODE_RECIPSQRT */
+ FETCH( &r[0], 0, CHAN_X );
+ r[0].q = micro_sqrt(r[0].q);
+ r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_EXP:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_LOG:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_MUL:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
+ {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+
+ r[0].q = si_fm(r[0].q, r[1].q);
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_ADD:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ r[0].q = si_fa(r[0].q, r[1].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DP3:
+ /* TGSI_OPCODE_DOT3 */
+ FETCH( &r[0], 0, CHAN_X );
+ FETCH( &r[1], 1, CHAN_X );
+ r[0].q = si_fm(r[0].q, r[1].q);
+
+ FETCH( &r[1], 0, CHAN_Y );
+ FETCH( &r[2], 1, CHAN_Y );
+ r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
+
+
+ FETCH( &r[1], 0, CHAN_Z );
+ FETCH( &r[2], 1, CHAN_Z );
+ r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DP4:
+ /* TGSI_OPCODE_DOT4 */
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 1, CHAN_X);
+
+ r[0].q = si_fm(r[0].q, r[1].q);
+
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 1, CHAN_Y);
+
+ r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
+
+ FETCH(&r[1], 0, CHAN_Z);
+ FETCH(&r[2], 1, CHAN_Z);
+
+ r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
+
+ FETCH(&r[1], 0, CHAN_W);
+ FETCH(&r[2], 1, CHAN_W);
+
+ r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DST:
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ FETCH( &r[0], 0, CHAN_Y );
+ FETCH( &r[1], 1, CHAN_Y);
+ r[0].q = si_fm(r[0].q, r[1].q);
+ STORE( &r[0], 0, CHAN_Y );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ FETCH( &r[0], 0, CHAN_Z );
+ STORE( &r[0], 0, CHAN_Z );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+ FETCH( &r[0], 1, CHAN_W );
+ STORE( &r[0], 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_MIN:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+
+ r[0].q = micro_min(r[0].q, r[1].q);
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_MAX:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+
+ r[0].q = micro_max(r[0].q, r[1].q);
+
+ STORE(&r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SLT:
+ /* TGSI_OPCODE_SETLT */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+
+ r[0].q = micro_ge(r[0].q, r[1].q);
+ r[0].q = si_xori(r[0].q, 0xff);
+
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SGE:
+ /* TGSI_OPCODE_SETGE */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ r[0].q = micro_ge(r[0].q, r[1].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_MAD:
+ /* TGSI_OPCODE_MADD */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ FETCH( &r[2], 2, chan_index );
+ r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SUB:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+
+ r[0].q = si_fs(r[0].q, r[1].q);
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_LERP:
+ /* TGSI_OPCODE_LRP */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+ FETCH(&r[2], 2, chan_index);
+
+ r[1].q = si_fs(r[1].q, r[2].q);
+ r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_CND:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_CND0:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_DOT2ADD:
+ /* TGSI_OPCODE_DP2A */
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_INDEX:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_NEGATE:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_FRAC:
+ /* TGSI_OPCODE_FRC */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = micro_frc(r[0].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_CLAMP:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_FLOOR:
+ /* TGSI_OPCODE_FLR */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = micro_flr(r[0].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_ROUND:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = micro_rnd(r[0].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_EXPBASE2:
+ /* TGSI_OPCODE_EX2 */
+ FETCH(&r[0], 0, CHAN_X);
+
+ r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q);
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_LOGBASE2:
+ /* TGSI_OPCODE_LG2 */
+ FETCH( &r[0], 0, CHAN_X );
+ r[0].q = micro_lg2(r[0].q);
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_POWER:
+ /* TGSI_OPCODE_POW */
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 1, CHAN_X);
+
+ r[0].q = micro_pow(r[0].q, r[1].q);
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_CROSSPRODUCT:
+ /* TGSI_OPCODE_XPD */
+ FETCH(&r[0], 0, CHAN_Y);
+ FETCH(&r[1], 1, CHAN_Z);
+ FETCH(&r[3], 0, CHAN_Z);
+ FETCH(&r[4], 1, CHAN_Y);
+
+ /* r2 = (r0 * r1) - (r3 * r5)
+ */
+ r[2].q = si_fm(r[3].q, r[5].q);
+ r[2].q = si_fms(r[0].q, r[1].q, r[2].q);
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ STORE( &r[2], 0, CHAN_X );
+ }
+
+ FETCH(&r[2], 1, CHAN_X);
+ FETCH(&r[5], 0, CHAN_X);
+
+ /* r3 = (r3 * r2) - (r1 * r5)
+ */
+ r[1].q = si_fm(r[1].q, r[5].q);
+ r[3].q = si_fms(r[3].q, r[2].q, r[1].q);
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ STORE( &r[3], 0, CHAN_Y );
+ }
+
+ /* r5 = (r5 * r4) - (r0 * r2)
+ */
+ r[0].q = si_fm(r[0].q, r[2].q);
+ r[5].q = si_fms(r[5].q, r[4].q, r[0].q);
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ STORE( &r[5], 0, CHAN_Z );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_MULTIPLYMATRIX:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_ABS:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+
+ r[0].q = micro_abs(r[0].q);
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_RCC:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_DPH:
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 1, CHAN_X);
+
+ r[0].q = si_fm(r[0].q, r[1].q);
+
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 1, CHAN_Y);
+
+ r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
+
+ FETCH(&r[1], 0, CHAN_Z);
+ FETCH(&r[2], 1, CHAN_Z);
+
+ r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
+
+ FETCH(&r[1], 1, CHAN_W);
+
+ r[0].q = si_fa(r[0].q, r[1].q);
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_COS:
+ FETCH(&r[0], 0, CHAN_X);
+
+ r[0].q = micro_cos(r[0].q);
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DDX:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = micro_ddx(r[0].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DDY:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = micro_ddy(r[0].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_KILP:
+ exec_kilp (mach, inst);
+ break;
+
+ case TGSI_OPCODE_KIL:
+ exec_kil (mach, inst);
+ break;
+
+ case TGSI_OPCODE_PK2H:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_PK2US:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_PK4B:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_PK4UB:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_RFL:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_SEQ:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+
+ r[0].q = si_fceq(r[0].q, r[1].q);
+
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SFL:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_SGT:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ r[0].q = si_fcgt(r[0].q, r[1].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SIN:
+ FETCH( &r[0], 0, CHAN_X );
+ r[0].q = micro_sin(r[0].q);
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SLE:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+
+ r[0].q = si_fcgt(r[0].q, r[1].q);
+ r[0].q = si_xori(r[0].q, 0xff);
+
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SNE:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+
+ r[0].q = si_fceq(r[0].q, r[1].q);
+ r[0].q = si_xori(r[0].q, 0xff);
+
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_STR:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_TEX:
+ /* simple texture lookup */
+ /* src[0] = texcoord */
+ /* src[1] = sampler unit */
+ exec_tex(mach, inst, FALSE, FALSE);
+ break;
+
+ case TGSI_OPCODE_TXB:
+ /* Texture lookup with lod bias */
+ /* src[0] = texcoord (src[0].w = load bias) */
+ /* src[1] = sampler unit */
+ exec_tex(mach, inst, TRUE, FALSE);
+ break;
+
+ case TGSI_OPCODE_TXD:
+ /* Texture lookup with explict partial derivatives */
+ /* src[0] = texcoord */
+ /* src[1] = d[strq]/dx */
+ /* src[2] = d[strq]/dy */
+ /* src[3] = sampler unit */
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_TXL:
+ /* Texture lookup with explit LOD */
+ /* src[0] = texcoord (src[0].w = load bias) */
+ /* src[1] = sampler unit */
+ exec_tex(mach, inst, TRUE, FALSE);
+ break;
+
+ case TGSI_OPCODE_TXP:
+ /* Texture lookup with projection */
+ /* src[0] = texcoord (src[0].w = projection) */
+ /* src[1] = sampler unit */
+ exec_tex(mach, inst, TRUE, TRUE);
+ break;
+
+ case TGSI_OPCODE_UP2H:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_UP2US:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_UP4B:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_UP4UB:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_X2D:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_ARA:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_ARR:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_BRA:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_CAL:
+ /* skip the call if no execution channels are enabled */
+ if (mach->ExecMask) {
+ /* do the call */
+
+ /* push the Cond, Loop, Cont stacks */
+ ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
+ mach->CondStack[mach->CondStackTop++] = mach->CondMask;
+ ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
+ ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ mach->ContStack[mach->ContStackTop++] = mach->ContMask;
+
+ ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
+ mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
+
+ /* note that PC was already incremented above */
+ mach->CallStack[mach->CallStackTop++] = *pc;
+ *pc = inst->InstructionExtLabel.Label;
+ }
+ break;
+
+ case TGSI_OPCODE_RET:
+ mach->FuncMask &= ~mach->ExecMask;
+ UPDATE_EXEC_MASK(mach);
+
+ if (mach->ExecMask == 0x0) {
+ /* really return now (otherwise, keep executing */
+
+ if (mach->CallStackTop == 0) {
+ /* returning from main() */
+ *pc = -1;
+ return;
+ }
+ *pc = mach->CallStack[--mach->CallStackTop];
+
+ /* pop the Cond, Loop, Cont stacks */
+ ASSERT(mach->CondStackTop > 0);
+ mach->CondMask = mach->CondStack[--mach->CondStackTop];
+ ASSERT(mach->LoopStackTop > 0);
+ mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
+ ASSERT(mach->ContStackTop > 0);
+ mach->ContMask = mach->ContStack[--mach->ContStackTop];
+ ASSERT(mach->FuncStackTop > 0);
+ mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
+
+ UPDATE_EXEC_MASK(mach);
+ }
+ break;
+
+ case TGSI_OPCODE_SSG:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_CMP:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+ FETCH(&r[2], 2, chan_index);
+
+ /* r0 = (r0 < 0.0) ? r1 : r2
+ */
+ r[3].q = si_xor(r[3].q, r[3].q);
+ r[0].q = micro_lt(r[0].q, r[3].q);
+ r[0].q = si_selb(r[1].q, r[2].q, r[0].q);
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_SCS:
+ if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+ FETCH( &r[0], 0, CHAN_X );
+ }
+ if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
+ r[1].q = micro_cos(r[0].q);
+ STORE( &r[1], 0, CHAN_X );
+ }
+ if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+ r[1].q = micro_sin(r[0].q);
+ STORE( &r[1], 0, CHAN_Y );
+ }
+ if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+ STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
+ }
+ if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_NRM:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_DIV:
+ ASSERT( 0 );
+ break;
+
+ case TGSI_OPCODE_DP2:
+ FETCH( &r[0], 0, CHAN_X );
+ FETCH( &r[1], 1, CHAN_X );
+ r[0].q = si_fm(r[0].q, r[1].q);
+
+ FETCH( &r[1], 0, CHAN_Y );
+ FETCH( &r[2], 1, CHAN_Y );
+ r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_IF:
+ /* push CondMask */
+ ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
+ mach->CondStack[mach->CondStackTop++] = mach->CondMask;
+ FETCH( &r[0], 0, CHAN_X );
+ /* update CondMask */
+ if( ! r[0].u[0] ) {
+ mach->CondMask &= ~0x1;
+ }
+ if( ! r[0].u[1] ) {
+ mach->CondMask &= ~0x2;
+ }
+ if( ! r[0].u[2] ) {
+ mach->CondMask &= ~0x4;
+ }
+ if( ! r[0].u[3] ) {
+ mach->CondMask &= ~0x8;
+ }
+ UPDATE_EXEC_MASK(mach);
+ /* Todo: If CondMask==0, jump to ELSE */
+ break;
+
+ case TGSI_OPCODE_ELSE:
+ /* invert CondMask wrt previous mask */
+ {
+ uint prevMask;
+ ASSERT(mach->CondStackTop > 0);
+ prevMask = mach->CondStack[mach->CondStackTop - 1];
+ mach->CondMask = ~mach->CondMask & prevMask;
+ UPDATE_EXEC_MASK(mach);
+ /* Todo: If CondMask==0, jump to ENDIF */
+ }
+ break;
+
+ case TGSI_OPCODE_ENDIF:
+ /* pop CondMask */
+ ASSERT(mach->CondStackTop > 0);
+ mach->CondMask = mach->CondStack[--mach->CondStackTop];
+ UPDATE_EXEC_MASK(mach);
+ break;
+
+ case TGSI_OPCODE_END:
+ /* halt execution */
+ *pc = -1;
+ break;
+
+ case TGSI_OPCODE_REP:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_ENDREP:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_PUSHA:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_POPA:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_CEIL:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = micro_ceil(r[0].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_I2F:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = si_csflt(r[0].q, 0);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_NOT:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = si_xorbi(r[0].q, 0xff);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_TRUNC:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = micro_trunc(r[0].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SHL:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+
+ r[0].q = si_shl(r[0].q, r[1].q);
+
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SHR:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ r[0].q = micro_ishr(r[0].q, r[1].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_AND:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ r[0].q = si_and(r[0].q, r[1].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_OR:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ r[0].q = si_or(r[0].q, r[1].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_MOD:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_XOR:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ r[0].q = si_xor(r[0].q, r[1].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SAD:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_TXF:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_TXQ:
+ ASSERT (0);
+ break;
+
+ case TGSI_OPCODE_EMIT:
+ mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
+ mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
+ break;
+
+ case TGSI_OPCODE_ENDPRIM:
+ mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
+ mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
+ break;
+
+ case TGSI_OPCODE_LOOP:
+ /* fall-through (for now) */
+ case TGSI_OPCODE_BGNLOOP2:
+ /* push LoopMask and ContMasks */
+ ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
+ ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ mach->ContStack[mach->ContStackTop++] = mach->ContMask;
+ break;
+
+ case TGSI_OPCODE_ENDLOOP:
+ /* fall-through (for now at least) */
+ case TGSI_OPCODE_ENDLOOP2:
+ /* Restore ContMask, but don't pop */
+ ASSERT(mach->ContStackTop > 0);
+ mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
+ if (mach->LoopMask) {
+ /* repeat loop: jump to instruction just past BGNLOOP */
+ *pc = inst->InstructionExtLabel.Label + 1;
+ }
+ else {
+ /* exit loop: pop LoopMask */
+ ASSERT(mach->LoopStackTop > 0);
+ mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
+ /* pop ContMask */
+ ASSERT(mach->ContStackTop > 0);
+ mach->ContMask = mach->ContStack[--mach->ContStackTop];
+ }
+ UPDATE_EXEC_MASK(mach);
+ break;
+
+ case TGSI_OPCODE_BRK:
+ /* turn off loop channels for each enabled exec channel */
+ mach->LoopMask &= ~mach->ExecMask;
+ /* Todo: if mach->LoopMask == 0, jump to end of loop */
+ UPDATE_EXEC_MASK(mach);
+ break;
+
+ case TGSI_OPCODE_CONT:
+ /* turn off cont channels for each enabled exec channel */
+ mach->ContMask &= ~mach->ExecMask;
+ /* Todo: if mach->LoopMask == 0, jump to end of loop */
+ UPDATE_EXEC_MASK(mach);
+ break;
+
+ case TGSI_OPCODE_BGNSUB:
+ /* no-op */
+ break;
+
+ case TGSI_OPCODE_ENDSUB:
+ /* no-op */
+ break;
+
+ case TGSI_OPCODE_NOISE1:
+ ASSERT( 0 );
+ break;
+
+ case TGSI_OPCODE_NOISE2:
+ ASSERT( 0 );
+ break;
+
+ case TGSI_OPCODE_NOISE3:
+ ASSERT( 0 );
+ break;
+
+ case TGSI_OPCODE_NOISE4:
+ ASSERT( 0 );
+ break;
+
+ case TGSI_OPCODE_NOP:
+ break;
+
+ default:
+ ASSERT( 0 );
+ }
+}
+
+
+/**
+ * Run TGSI interpreter.
+ * \return bitmask of "alive" quad components
+ */
+uint
+spu_exec_machine_run( struct spu_exec_machine *mach )
+{
+ uint i;
+ int pc = 0;
+
+ mach->CondMask = 0xf;
+ mach->LoopMask = 0xf;
+ mach->ContMask = 0xf;
+ mach->FuncMask = 0xf;
+ mach->ExecMask = 0xf;
+
+ mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */
+ ASSERT(mach->CondStackTop == 0);
+ ASSERT(mach->LoopStackTop == 0);
+ ASSERT(mach->ContStackTop == 0);
+ ASSERT(mach->CallStackTop == 0);
+
+ mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
+ mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
+
+ if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
+ mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
+ mach->Primitives[0] = 0;
+ }
+
+
+ /* execute declarations (interpolants) */
+ if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
+ for (i = 0; i < mach->NumDeclarations; i++) {
+ union {
+ struct tgsi_full_declaration decl;
+ qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16];
+ } d ALIGN16_ATTRIB;
+ unsigned ea = (unsigned) (mach->Declarations + pc);
+
+ spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl));
+
+ exec_declaration( mach, &d.decl );
+ }
+ }
+
+ /* execute instructions, until pc is set to -1 */
+ while (pc != -1) {
+ union {
+ struct tgsi_full_instruction inst;
+ qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16];
+ } i ALIGN16_ATTRIB;
+ unsigned ea = (unsigned) (mach->Instructions + pc);
+
+ spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst));
+ exec_instruction( mach, & i.inst, &pc );
+ }
+
+#if 0
+ /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
+ if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
+ /*
+ * Scale back depth component.
+ */
+ for (i = 0; i < 4; i++)
+ mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
+ }
+#endif
+
+ return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+}
+
+
diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h
new file mode 100644
index 0000000000..8605679940
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_exec.h
@@ -0,0 +1,172 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#if !defined SPU_EXEC_H
+#define SPU_EXEC_H
+
+#include "pipe/p_compiler.h"
+#include "tgsi/tgsi_exec.h"
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Registers may be treated as float, signed int or unsigned int.
+ */
+union spu_exec_channel
+{
+ float f[QUAD_SIZE];
+ int i[QUAD_SIZE];
+ unsigned u[QUAD_SIZE];
+ qword q;
+};
+
+/**
+ * A vector[RGBA] of channels[4 pixels]
+ */
+struct spu_exec_vector
+{
+ union spu_exec_channel xyzw[NUM_CHANNELS];
+};
+
+/**
+ * For fragment programs, information for computing fragment input
+ * values from plane equation of the triangle/line.
+ */
+struct spu_interp_coef
+{
+ float a0[NUM_CHANNELS]; /* in an xyzw layout */
+ float dadx[NUM_CHANNELS];
+ float dady[NUM_CHANNELS];
+};
+
+
+struct softpipe_tile_cache; /**< Opaque to TGSI */
+
+/**
+ * Information for sampling textures, which must be implemented
+ * by code outside the TGSI executor.
+ */
+struct spu_sampler
+{
+ const struct pipe_sampler_state *state;
+ struct pipe_texture *texture;
+ /** Get samples for four fragments in a quad */
+ void (*get_samples)(struct spu_sampler *sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE]);
+ void *pipe; /*XXX temporary*/
+ struct softpipe_tile_cache *cache;
+};
+
+
+/**
+ * Run-time virtual machine state for executing TGSI shader.
+ */
+struct spu_exec_machine
+{
+ /*
+ * 32 program temporaries
+ * 4 internal temporaries
+ * 1 address
+ */
+ struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS
+ + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]
+ ALIGN16_ATTRIB;
+
+ struct spu_exec_vector *Addrs;
+
+ struct spu_sampler *Samplers;
+
+ float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
+ unsigned ImmLimit;
+ float (*Consts)[4];
+ struct spu_exec_vector *Inputs;
+ struct spu_exec_vector *Outputs;
+ unsigned Processor;
+
+ /* GEOMETRY processor only. */
+ unsigned *Primitives;
+
+ /* FRAGMENT processor only. */
+ const struct spu_interp_coef *InterpCoefs;
+ struct spu_exec_vector QuadPos;
+
+ /* Conditional execution masks */
+ uint CondMask; /**< For IF/ELSE/ENDIF */
+ uint LoopMask; /**< For BGNLOOP/ENDLOOP */
+ uint ContMask; /**< For loop CONT statements */
+ uint FuncMask; /**< For function calls */
+ uint ExecMask; /**< = CondMask & LoopMask */
+
+ /** Condition mask stack (for nested conditionals) */
+ uint CondStack[TGSI_EXEC_MAX_COND_NESTING];
+ int CondStackTop;
+
+ /** Loop mask stack (for nested loops) */
+ uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING];
+ int LoopStackTop;
+
+ /** Loop continue mask stack (see comments in tgsi_exec.c) */
+ uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];
+ int ContStackTop;
+
+ /** Function execution mask stack (for executing subroutine code) */
+ uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING];
+ int FuncStackTop;
+
+ /** Function call stack for saving/restoring the program counter */
+ uint CallStack[TGSI_EXEC_MAX_CALL_NESTING];
+ int CallStackTop;
+
+ struct tgsi_full_instruction *Instructions;
+ uint NumInstructions;
+
+ struct tgsi_full_declaration *Declarations;
+ uint NumDeclarations;
+};
+
+
+extern void
+spu_exec_machine_init(struct spu_exec_machine *mach,
+ uint numSamplers,
+ struct spu_sampler *samplers,
+ unsigned processor);
+
+extern uint
+spu_exec_machine_run( struct spu_exec_machine *mach );
+
+
+#if defined __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* SPU_EXEC_H */
diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c
new file mode 100644
index 0000000000..ff3d609d25
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_funcs.c
@@ -0,0 +1,173 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * SPU functions accessed by shaders.
+ *
+ * Authors: Brian Paul
+ */
+
+
+#include <string.h>
+#include <libmisc.h>
+#include <math.h>
+#include <cos14_v.h>
+#include <sin14_v.h>
+#include <simdmath/exp2f4.h>
+#include <simdmath/log2f4.h>
+#include <simdmath/powf4.h>
+
+#include "cell/common.h"
+#include "spu_main.h"
+#include "spu_funcs.h"
+#include "spu_texture.h"
+
+
+/** For "return"-ing four vectors */
+struct vec_4x4
+{
+ vector float v[4];
+};
+
+
+static vector float
+spu_cos(vector float x)
+{
+ return _cos14_v(x);
+}
+
+static vector float
+spu_sin(vector float x)
+{
+ return _sin14_v(x);
+}
+
+static vector float
+spu_pow(vector float x, vector float y)
+{
+ return _powf4(x, y);
+}
+
+static vector float
+spu_exp2(vector float x)
+{
+ return _exp2f4(x);
+}
+
+static vector float
+spu_log2(vector float x)
+{
+ return _log2f4(x);
+}
+
+
+static struct vec_4x4
+spu_tex_2d(vector float s, vector float t, vector float r, vector float q,
+ unsigned unit)
+{
+ struct vec_4x4 colors;
+ (void) r;
+ (void) q;
+ spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v);
+ return colors;
+}
+
+static struct vec_4x4
+spu_tex_3d(vector float s, vector float t, vector float r, vector float q,
+ unsigned unit)
+{
+ struct vec_4x4 colors;
+ (void) r;
+ (void) q;
+ spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v);
+ return colors;
+}
+
+static struct vec_4x4
+spu_tex_cube(vector float s, vector float t, vector float r, vector float q,
+ unsigned unit)
+{
+ struct vec_4x4 colors;
+ (void) q;
+ sample_texture_cube(s, t, r, unit, colors.v);
+ return colors;
+}
+
+
+/**
+ * Add named function to list of "exported" functions that will be
+ * made available to the PPU-hosted code generator.
+ */
+static void
+export_func(struct cell_spu_function_info *spu_functions,
+ const char *name, void *addr)
+{
+ uint n = spu_functions->num;
+ ASSERT(strlen(name) < 16);
+ strcpy(spu_functions->names[n], name);
+ spu_functions->addrs[n] = (uint) addr;
+ spu_functions->num++;
+ ASSERT(spu_functions->num <= 16);
+}
+
+
+/**
+ * Return info about the SPU's function to the PPU / main memory.
+ * The PPU needs to know the address of some SPU-side functions so
+ * that we can generate shader code with function calls.
+ */
+void
+return_function_info(void)
+{
+ struct cell_spu_function_info funcs ALIGN16_ATTRIB;
+ int tag = TAG_MISC;
+
+ ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */
+
+ funcs.num = 0;
+ export_func(&funcs, "spu_cos", &spu_cos);
+ export_func(&funcs, "spu_sin", &spu_sin);
+ export_func(&funcs, "spu_pow", &spu_pow);
+ export_func(&funcs, "spu_exp2", &spu_exp2);
+ export_func(&funcs, "spu_log2", &spu_log2);
+ export_func(&funcs, "spu_tex_2d", &spu_tex_2d);
+ export_func(&funcs, "spu_tex_3d", &spu_tex_3d);
+ export_func(&funcs, "spu_tex_cube", &spu_tex_cube);
+
+ /* Send the function info back to the PPU / main memory */
+ mfc_put((void *) &funcs, /* src in local store */
+ (unsigned int) spu.init.spu_functions, /* dst in main memory */
+ sizeof(funcs), /* bytes */
+ tag,
+ 0, /* tid */
+ 0 /* rid */);
+ wait_on_mask(1 << tag);
+}
+
+
+
diff --git a/src/gallium/drivers/cell/spu/spu_funcs.h b/src/gallium/drivers/cell/spu/spu_funcs.h
new file mode 100644
index 0000000000..3adb6ae99f
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_funcs.h
@@ -0,0 +1,35 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef SPU_FUNCS_H
+#define SPU_FUNCS_H
+
+extern void
+return_function_info(void);
+
+#endif
+
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
new file mode 100644
index 0000000000..97c86d194d
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -0,0 +1,117 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/* main() for Cell SPU code */
+
+
+#include <stdio.h>
+#include <libmisc.h>
+
+#include "pipe/p_defines.h"
+
+#include "spu_funcs.h"
+#include "spu_command.h"
+#include "spu_main.h"
+#include "spu_per_fragment_op.h"
+#include "spu_texture.h"
+//#include "spu_test.h"
+#include "cell/common.h"
+
+
+/*
+helpful headers:
+/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h
+/opt/cell/sdk/usr/include/libmisc.h
+*/
+
+struct spu_global spu;
+
+
+static void
+one_time_init(void)
+{
+ memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status));
+ memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status));
+ invalidate_tex_cache();
+}
+
+/* In some versions of the SDK the SPE main takes 'unsigned long' as a
+ * parameter. In others it takes 'unsigned long long'. Use a define to
+ * select between the two.
+ */
+#ifdef SPU_MAIN_PARAM_LONG_LONG
+typedef unsigned long long main_param_t;
+#else
+typedef unsigned long main_param_t;
+#endif
+
+/**
+ * SPE entrypoint.
+ */
+int
+main(main_param_t speid, main_param_t argp)
+{
+ int tag = 0;
+
+ (void) speid;
+
+ ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4);
+ ASSERT(sizeof(struct cell_command_render) % 8 == 0);
+ ASSERT(sizeof(struct cell_command_fragment_ops) % 8 == 0);
+ ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0);
+
+ one_time_init();
+ spu_command_init();
+
+ D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid);
+ D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n");
+
+ /* get initialization data */
+ mfc_get(&spu.init, /* dest */
+ (unsigned int) argp, /* src */
+ sizeof(struct cell_init_info), /* bytes */
+ tag,
+ 0, /* tid */
+ 0 /* rid */);
+ wait_on_mask( 1 << tag );
+
+ if (spu.init.id == 0) {
+ return_function_info();
+ }
+
+#if 0
+ if (spu.init.id==0)
+ spu_test_misc(spu.init.id);
+#endif
+
+ command_loop();
+
+ spu_command_close();
+
+ return 0;
+}
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
new file mode 100644
index 0000000000..33767e7c51
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -0,0 +1,254 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef SPU_MAIN_H
+#define SPU_MAIN_H
+
+
+#include <spu_mfcio.h>
+
+#include "cell/common.h"
+#include "draw/draw_vertex.h"
+#include "pipe/p_state.h"
+
+
+#if DEBUG
+/* These debug macros use the unusual construction ", ##__VA_ARGS__"
+ * which expands to the expected comma + args if variadic arguments
+ * are supplied, but swallows the comma if there are no variadic
+ * arguments (which avoids syntax errors that would otherwise occur).
+ */
+#define D_PRINTF(flag, format,...) \
+ if (spu.init.debug_flags & (flag)) \
+ printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__)
+#else
+#define D_PRINTF(...)
+#endif
+
+
+/**
+ * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels.
+ * The data may be addressed through several different types.
+ */
+typedef union {
+ ushort us[TILE_SIZE][TILE_SIZE];
+ uint ui[TILE_SIZE][TILE_SIZE];
+ vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4];
+ vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2];
+} tile_t;
+
+
+#define TILE_STATUS_CLEAR 1
+#define TILE_STATUS_DEFINED 2 /**< defined in FB, but not in local store */
+#define TILE_STATUS_CLEAN 3 /**< in local store, but not changed */
+#define TILE_STATUS_DIRTY 4 /**< modified locally, but not put back yet */
+#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */
+
+
+/** Function for sampling textures */
+typedef void (*spu_sample_texture_2d_func)(vector float s,
+ vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
+
+
+/** Function for performing per-fragment ops */
+typedef void (*spu_fragment_ops_func)(uint x, uint y,
+ tile_t *colorTile,
+ tile_t *depthStencilTile,
+ vector float fragZ,
+ vector float fragRed,
+ vector float fragGreen,
+ vector float fragBlue,
+ vector float fragAlpha,
+ vector unsigned int mask);
+
+/** Function for running fragment program */
+typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs,
+ vector float *outputs,
+ vector float *constants);
+
+
+struct spu_framebuffer
+{
+ void *color_start; /**< addr of color surface in main memory */
+ void *depth_start; /**< addr of depth surface in main memory */
+ enum pipe_format color_format;
+ enum pipe_format depth_format;
+ uint width, height; /**< size in pixels */
+ uint width_tiles, height_tiles; /**< width and height in tiles */
+
+ uint color_clear_value;
+ uint depth_clear_value;
+
+ uint zsize; /**< 0, 2 or 4 bytes per Z */
+ float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */
+} ALIGN16_ATTRIB;
+
+
+/** per-texture level info */
+struct spu_texture_level
+{
+ void *start;
+ ushort width, height, depth;
+ ushort tiles_per_row;
+ uint bytes_per_image;
+ /** texcoord scale factors */
+ vector float scale_s, scale_t, scale_r;
+ /** texcoord masks (if REPEAT then size-1, else ~0) */
+ vector signed int mask_s, mask_t, mask_r;
+ /** texcoord clamp limits */
+ vector signed int max_s, max_t, max_r;
+} ALIGN16_ATTRIB;
+
+
+struct spu_texture
+{
+ struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS];
+ uint max_level;
+ uint target; /**< PIPE_TEXTURE_x */
+} ALIGN16_ATTRIB;
+
+
+/**
+ * All SPU global/context state will be in a singleton object of this type:
+ */
+struct spu_global
+{
+ /** One-time init/constant info */
+ struct cell_init_info init;
+
+ /*
+ * Current state
+ */
+ struct spu_framebuffer fb;
+ struct pipe_depth_stencil_alpha_state depth_stencil_alpha;
+ struct pipe_blend_state blend;
+ struct pipe_blend_color blend_color;
+ struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
+ struct pipe_rasterizer_state rasterizer;
+ struct spu_texture texture[PIPE_MAX_SAMPLERS];
+ struct vertex_info vertex_info;
+
+ /** Current color and Z tiles */
+ tile_t ctile ALIGN16_ATTRIB;
+ tile_t ztile ALIGN16_ATTRIB;
+
+ /** Read depth/stencil tiles? */
+ boolean read_depth_stencil;
+
+ /** Current tiles' status */
+ ubyte cur_ctile_status, cur_ztile_status;
+
+ /** Status of all tiles in framebuffer */
+ ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
+ ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
+
+ /** Current fragment ops machine code, at 8-byte boundary */
+ uint *fragment_ops_code;
+ uint fragment_ops_code_size;
+ /** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */
+ spu_fragment_ops_func fragment_ops[2];
+
+ /** Current fragment program machine code, at 8-byte boundary */
+ uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB;
+ /** Current fragment ops function */
+ spu_fragment_program_func fragment_program;
+
+ /** Current texture sampler function */
+ spu_sample_texture_2d_func sample_texture_2d[CELL_MAX_SAMPLERS];
+ spu_sample_texture_2d_func min_sample_texture_2d[CELL_MAX_SAMPLERS];
+ spu_sample_texture_2d_func mag_sample_texture_2d[CELL_MAX_SAMPLERS];
+
+ /** Fragment program constants */
+ vector float constants[4 * CELL_MAX_CONSTANTS];
+
+} ALIGN16_ATTRIB;
+
+
+extern struct spu_global spu;
+
+
+
+/* DMA TAGS */
+
+#define TAG_SURFACE_CLEAR 10
+#define TAG_VERTEX_BUFFER 11
+#define TAG_READ_TILE_COLOR 12
+#define TAG_READ_TILE_Z 13
+#define TAG_WRITE_TILE_COLOR 14
+#define TAG_WRITE_TILE_Z 15
+#define TAG_INDEX_BUFFER 16
+#define TAG_BATCH_BUFFER 17
+#define TAG_MISC 18
+#define TAG_DCACHE0 20
+#define TAG_DCACHE1 21
+#define TAG_DCACHE2 22
+#define TAG_DCACHE3 23
+#define TAG_FENCE 24
+
+
+static INLINE void
+wait_on_mask(unsigned tagMask)
+{
+ mfc_write_tag_mask( tagMask );
+ /* wait for completion of _any_ DMAs specified by tagMask */
+ mfc_read_tag_status_any();
+}
+
+
+static INLINE void
+wait_on_mask_all(unsigned tagMask)
+{
+ mfc_write_tag_mask( tagMask );
+ /* wait for completion of _any_ DMAs specified by tagMask */
+ mfc_read_tag_status_all();
+}
+
+
+
+
+
+static INLINE void
+memset16(ushort *d, ushort value, uint count)
+{
+ uint i;
+ for (i = 0; i < count; i++)
+ d[i] = value;
+}
+
+
+static INLINE void
+memset32(uint *d, uint value, uint count)
+{
+ uint i;
+ for (i = 0; i < count; i++)
+ d[i] = value;
+}
+
+
+#endif /* SPU_MAIN_H */
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
new file mode 100644
index 0000000000..683664e8a4
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
@@ -0,0 +1,631 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \author Brian Paul
+ */
+
+
+#include <transpose_matrix4x4.h>
+#include "pipe/p_format.h"
+#include "spu_main.h"
+#include "spu_colorpack.h"
+#include "spu_per_fragment_op.h"
+
+
+#define LINEAR_QUAD_LAYOUT 1
+
+
+static INLINE vector float
+spu_min(vector float a, vector float b)
+{
+ vector unsigned int m;
+ m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */
+ return spu_sel(a, b, m);
+}
+
+
+static INLINE vector float
+spu_max(vector float a, vector float b)
+{
+ vector unsigned int m;
+ m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */
+ return spu_sel(b, a, m);
+}
+
+
+/**
+ * Called by rasterizer for each quad after the shader has run. Do
+ * all the per-fragment operations including alpha test, z test,
+ * stencil test, blend, colormask and logicops. This is a
+ * fallback/debug function. In reality we'll use a generated function
+ * produced by the PPU. But this function is useful for
+ * debug/validation.
+ */
+void
+spu_fallback_fragment_ops(uint x, uint y,
+ tile_t *colorTile,
+ tile_t *depthStencilTile,
+ vector float fragZ,
+ vector float fragR,
+ vector float fragG,
+ vector float fragB,
+ vector float fragA,
+ vector unsigned int mask)
+{
+ vector float frag_aos[4];
+ unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */
+ unsigned int fragc0, fragc1, fragc2, fragc3; /* fragment colors */
+
+ /*
+ * Do alpha test
+ */
+ if (spu.depth_stencil_alpha.alpha.enabled) {
+ vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref);
+ vector unsigned int amask;
+
+ switch (spu.depth_stencil_alpha.alpha.func) {
+ case PIPE_FUNC_LESS:
+ amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */
+ break;
+ case PIPE_FUNC_GREATER:
+ amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */
+ break;
+ case PIPE_FUNC_GEQUAL:
+ amask = spu_cmpgt(ref, fragA);
+ amask = spu_nor(amask, amask);
+ break;
+ case PIPE_FUNC_LEQUAL:
+ amask = spu_cmpgt(fragA, ref);
+ amask = spu_nor(amask, amask);
+ break;
+ case PIPE_FUNC_EQUAL:
+ amask = spu_cmpeq(ref, fragA);
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ amask = spu_cmpeq(ref, fragA);
+ amask = spu_nor(amask, amask);
+ break;
+ case PIPE_FUNC_ALWAYS:
+ amask = spu_splats(0xffffffffU);
+ break;
+ case PIPE_FUNC_NEVER:
+ amask = spu_splats( 0x0U);
+ break;
+ default:
+ ;
+ }
+
+ mask = spu_and(mask, amask);
+ }
+
+
+ /*
+ * Z and/or stencil testing...
+ */
+ if (spu.depth_stencil_alpha.depth.enabled ||
+ spu.depth_stencil_alpha.stencil[0].enabled) {
+
+ /* get four Z/Stencil values from tile */
+ vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU);
+ vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2];
+ vector unsigned int ifbZ = spu_and(ifbZS, mask24);
+ vector unsigned int ifbS = spu_andc(ifbZS, mask24);
+
+ if (spu.depth_stencil_alpha.stencil[0].enabled) {
+ /* do stencil test */
+ ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM);
+
+ }
+ else if (spu.depth_stencil_alpha.depth.enabled) {
+ /* do depth test */
+
+ ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM ||
+ spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM);
+
+ vector unsigned int ifragZ;
+ vector unsigned int zmask;
+
+ /* convert four fragZ from float to uint */
+ fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff));
+ ifragZ = spu_convtu(fragZ, 0);
+
+ /* do depth comparison, setting zmask with results */
+ switch (spu.depth_stencil_alpha.depth.func) {
+ case PIPE_FUNC_LESS:
+ zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */
+ break;
+ case PIPE_FUNC_GREATER:
+ zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */
+ break;
+ case PIPE_FUNC_GEQUAL:
+ zmask = spu_cmpgt(ifbZ, ifragZ);
+ zmask = spu_nor(zmask, zmask);
+ break;
+ case PIPE_FUNC_LEQUAL:
+ zmask = spu_cmpgt(ifragZ, ifbZ);
+ zmask = spu_nor(zmask, zmask);
+ break;
+ case PIPE_FUNC_EQUAL:
+ zmask = spu_cmpeq(ifbZ, ifragZ);
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ zmask = spu_cmpeq(ifbZ, ifragZ);
+ zmask = spu_nor(zmask, zmask);
+ break;
+ case PIPE_FUNC_ALWAYS:
+ zmask = spu_splats(0xffffffffU);
+ break;
+ case PIPE_FUNC_NEVER:
+ zmask = spu_splats( 0x0U);
+ break;
+ default:
+ ;
+ }
+
+ mask = spu_and(mask, zmask);
+
+ /* merge framebuffer Z and fragment Z according to the mask */
+ ifbZ = spu_or(spu_and(ifragZ, mask),
+ spu_andc(ifbZ, mask));
+ }
+
+ if (spu_extract(spu_orx(mask), 0)) {
+ /* put new fragment Z/Stencil values back into Z/Stencil tile */
+ depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS);
+
+ spu.cur_ztile_status = TILE_STATUS_DIRTY;
+ }
+ }
+
+
+ /*
+ * If we'll need the current framebuffer/tile colors for blending
+ * or logicop or colormask, fetch them now.
+ */
+ if (spu.blend.blend_enable ||
+ spu.blend.logicop_enable ||
+ spu.blend.colormask != 0xf) {
+
+#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
+ fbc0 = colorTile->ui[y][x*2+0];
+ fbc1 = colorTile->ui[y][x*2+1];
+ fbc2 = colorTile->ui[y][x*2+2];
+ fbc3 = colorTile->ui[y][x*2+3];
+#else
+ fbc0 = colorTile->ui[y+0][x+0];
+ fbc1 = colorTile->ui[y+0][x+1];
+ fbc2 = colorTile->ui[y+1][x+0];
+ fbc3 = colorTile->ui[y+1][x+1];
+#endif
+ }
+
+
+ /*
+ * Do blending
+ */
+ if (spu.blend.blend_enable) {
+ /* blending terms, misc regs */
+ vector float term1r, term1g, term1b, term1a;
+ vector float term2r, term2g, term2b, term2a;
+ vector float one, tmp;
+
+ vector float fbRGBA[4]; /* current framebuffer colors */
+
+ /* convert framebuffer colors from packed int to vector float */
+ {
+ vector float temp[4]; /* float colors in AOS form */
+ switch (spu.fb.color_format) {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ temp[0] = spu_unpack_B8G8R8A8(fbc0);
+ temp[1] = spu_unpack_B8G8R8A8(fbc1);
+ temp[2] = spu_unpack_B8G8R8A8(fbc2);
+ temp[3] = spu_unpack_B8G8R8A8(fbc3);
+ break;
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ temp[0] = spu_unpack_A8R8G8B8(fbc0);
+ temp[1] = spu_unpack_A8R8G8B8(fbc1);
+ temp[2] = spu_unpack_A8R8G8B8(fbc2);
+ temp[3] = spu_unpack_A8R8G8B8(fbc3);
+ break;
+ default:
+ ASSERT(0);
+ }
+ _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */
+ }
+
+ /*
+ * Compute Src RGB terms (fragment color * factor)
+ */
+ switch (spu.blend.rgb_src_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term1r = fragR;
+ term1g = fragG;
+ term1b = fragB;
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ term1r =
+ term1g =
+ term1b = spu_splats(0.0f);
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ term1r = spu_mul(fragR, fragR);
+ term1g = spu_mul(fragG, fragG);
+ term1b = spu_mul(fragB, fragB);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term1r = spu_mul(fragR, fragA);
+ term1g = spu_mul(fragG, fragA);
+ term1b = spu_mul(fragB, fragA);
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ term1r = spu_mul(fragR, fbRGBA[0]);
+ term1g = spu_mul(fragG, fbRGBA[1]);
+ term1b = spu_mul(fragB, fbRGBA[1]);
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term1r = spu_mul(fragR, fbRGBA[3]);
+ term1g = spu_mul(fragG, fbRGBA[3]);
+ term1b = spu_mul(fragB, fbRGBA[3]);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[0]));
+ term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[1]));
+ term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[2]));
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[3]));
+ term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[3]));
+ term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[3]));
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Src Alpha term (fragment alpha * factor)
+ */
+ switch (spu.blend.alpha_src_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term1a = fragA;
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ term1a = spu_splats(0.0f);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term1a = spu_mul(fragA, fragA);
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term1a = spu_mul(fragA, fbRGBA[3]);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term1a = spu_mul(fragR, spu_splats(spu.blend_color.color[3]));
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Dest RGB terms (framebuffer color * factor)
+ */
+ switch (spu.blend.rgb_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term2r = fbRGBA[0];
+ term2g = fbRGBA[1];
+ term2b = fbRGBA[2];
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ term2r =
+ term2g =
+ term2b = spu_splats(0.0f);
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ term2r = spu_mul(fbRGBA[0], fragR);
+ term2g = spu_mul(fbRGBA[1], fragG);
+ term2b = spu_mul(fbRGBA[2], fragB);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term2r = spu_mul(fbRGBA[0], fragA);
+ term2g = spu_mul(fbRGBA[1], fragA);
+ term2b = spu_mul(fbRGBA[2], fragA);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ one = spu_splats(1.0f);
+ tmp = spu_sub(one, fragA);
+ term2r = spu_mul(fbRGBA[0], tmp);
+ term2g = spu_mul(fbRGBA[1], tmp);
+ term2b = spu_mul(fbRGBA[2], tmp);
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ term2r = spu_mul(fbRGBA[0], fbRGBA[0]);
+ term2g = spu_mul(fbRGBA[1], fbRGBA[1]);
+ term2b = spu_mul(fbRGBA[2], fbRGBA[2]);
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term2r = spu_mul(fbRGBA[0], fbRGBA[3]);
+ term2g = spu_mul(fbRGBA[1], fbRGBA[3]);
+ term2b = spu_mul(fbRGBA[2], fbRGBA[3]);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[0]));
+ term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[1]));
+ term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[2]));
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[3]));
+ term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[3]));
+ term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[3]));
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Dest Alpha term (framebuffer alpha * factor)
+ */
+ switch (spu.blend.alpha_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term2a = fbRGBA[3];
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ term2a = spu_splats(0.0f);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term2a = spu_mul(fbRGBA[3], fragA);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ one = spu_splats(1.0f);
+ tmp = spu_sub(one, fragA);
+ term2a = spu_mul(fbRGBA[3], tmp);
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term2a = spu_mul(fbRGBA[3], fbRGBA[3]);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term2a = spu_mul(fbRGBA[3], spu_splats(spu.blend_color.color[3]));
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Combine Src/Dest RGB terms
+ */
+ switch (spu.blend.rgb_func) {
+ case PIPE_BLEND_ADD:
+ fragR = spu_add(term1r, term2r);
+ fragG = spu_add(term1g, term2g);
+ fragB = spu_add(term1b, term2b);
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ fragR = spu_sub(term1r, term2r);
+ fragG = spu_sub(term1g, term2g);
+ fragB = spu_sub(term1b, term2b);
+ break;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ fragR = spu_sub(term2r, term1r);
+ fragG = spu_sub(term2g, term1g);
+ fragB = spu_sub(term2b, term1b);
+ break;
+ case PIPE_BLEND_MIN:
+ fragR = spu_min(term1r, term2r);
+ fragG = spu_min(term1g, term2g);
+ fragB = spu_min(term1b, term2b);
+ break;
+ case PIPE_BLEND_MAX:
+ fragR = spu_max(term1r, term2r);
+ fragG = spu_max(term1g, term2g);
+ fragB = spu_max(term1b, term2b);
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Combine Src/Dest A term
+ */
+ switch (spu.blend.alpha_func) {
+ case PIPE_BLEND_ADD:
+ fragA = spu_add(term1a, term2a);
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ fragA = spu_sub(term1a, term2a);
+ break;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ fragA = spu_sub(term2a, term1a);
+ break;
+ case PIPE_BLEND_MIN:
+ fragA = spu_min(term1a, term2a);
+ break;
+ case PIPE_BLEND_MAX:
+ fragA = spu_max(term1a, term2a);
+ break;
+ default:
+ ASSERT(0);
+ }
+ }
+
+
+ /*
+ * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA.
+ */
+#if 0
+ /* original code */
+ {
+ vector float frag_soa[4];
+ frag_soa[0] = fragR;
+ frag_soa[1] = fragG;
+ frag_soa[2] = fragB;
+ frag_soa[3] = fragA;
+ _transpose_matrix4x4(frag_aos, frag_soa);
+ }
+#else
+ /* short-cut relying on function parameter layout: */
+ _transpose_matrix4x4(frag_aos, &fragR);
+ (void) fragG;
+ (void) fragB;
+#endif
+
+ /*
+ * Pack fragment float colors into 32-bit RGBA words.
+ */
+ switch (spu.fb.color_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ fragc0 = spu_pack_A8R8G8B8(frag_aos[0]);
+ fragc1 = spu_pack_A8R8G8B8(frag_aos[1]);
+ fragc2 = spu_pack_A8R8G8B8(frag_aos[2]);
+ fragc3 = spu_pack_A8R8G8B8(frag_aos[3]);
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ fragc0 = spu_pack_B8G8R8A8(frag_aos[0]);
+ fragc1 = spu_pack_B8G8R8A8(frag_aos[1]);
+ fragc2 = spu_pack_B8G8R8A8(frag_aos[2]);
+ fragc3 = spu_pack_B8G8R8A8(frag_aos[3]);
+ break;
+ default:
+ fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n");
+ ASSERT(0);
+ }
+
+
+ /*
+ * Do color masking
+ */
+ if (spu.blend.colormask != 0xf) {
+ uint cmask = 0x0; /* each byte corresponds to a color channel */
+
+ /* Form bitmask depending on color buffer format and colormask bits */
+ switch (spu.fb.color_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ if (spu.blend.colormask & PIPE_MASK_R)
+ cmask |= 0x00ff0000; /* red */
+ if (spu.blend.colormask & PIPE_MASK_G)
+ cmask |= 0x0000ff00; /* green */
+ if (spu.blend.colormask & PIPE_MASK_B)
+ cmask |= 0x000000ff; /* blue */
+ if (spu.blend.colormask & PIPE_MASK_A)
+ cmask |= 0xff000000; /* alpha */
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ if (spu.blend.colormask & PIPE_MASK_R)
+ cmask |= 0x0000ff00; /* red */
+ if (spu.blend.colormask & PIPE_MASK_G)
+ cmask |= 0x00ff0000; /* green */
+ if (spu.blend.colormask & PIPE_MASK_B)
+ cmask |= 0xff000000; /* blue */
+ if (spu.blend.colormask & PIPE_MASK_A)
+ cmask |= 0x000000ff; /* alpha */
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Apply color mask to the 32-bit packed colors.
+ * if (cmask[i])
+ * frag color[i] = frag color[i];
+ * else
+ * frag color[i] = framebuffer color[i];
+ */
+ fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask);
+ fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask);
+ fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask);
+ fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask);
+ }
+
+
+ /*
+ * Do logic ops
+ */
+ if (spu.blend.logicop_enable) {
+ /* XXX to do */
+ /* apply logicop to 32-bit packed colors (fragcx and fbcx) */
+ }
+
+
+ /*
+ * If mask is non-zero, mark tile as dirty.
+ */
+ if (spu_extract(spu_orx(mask), 0)) {
+ spu.cur_ctile_status = TILE_STATUS_DIRTY;
+ }
+ else {
+ /* write no fragments */
+ return;
+ }
+
+
+ /*
+ * Write new fragment/quad colors to the framebuffer/tile.
+ * Only write pixels where the corresponding mask word is set.
+ */
+#if LINEAR_QUAD_LAYOUT
+ /*
+ * Quad layout:
+ * +--+--+--+--+
+ * |p0|p1|p2|p3|...
+ * +--+--+--+--+
+ */
+ if (spu_extract(mask, 0))
+ colorTile->ui[y][x*2] = fragc0;
+ if (spu_extract(mask, 1))
+ colorTile->ui[y][x*2+1] = fragc1;
+ if (spu_extract(mask, 2))
+ colorTile->ui[y][x*2+2] = fragc2;
+ if (spu_extract(mask, 3))
+ colorTile->ui[y][x*2+3] = fragc3;
+#else
+ /*
+ * Quad layout:
+ * +--+--+
+ * |p0|p1|...
+ * +--+--+
+ * |p2|p3|...
+ * +--+--+
+ */
+ if (spu_extract(mask, 0))
+ colorTile->ui[y+0][x+0] = fragc0;
+ if (spu_extract(mask, 1))
+ colorTile->ui[y+0][x+1] = fragc1;
+ if (spu_extract(mask, 2))
+ colorTile->ui[y+1][x+0] = fragc2;
+ if (spu_extract(mask, 3))
+ colorTile->ui[y+1][x+1] = fragc3;
+#endif
+}
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
new file mode 100644
index 0000000000..f817abf046
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
@@ -0,0 +1,44 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef SPU_PER_FRAGMENT_OP
+#define SPU_PER_FRAGMENT_OP
+
+
+extern void
+spu_fallback_fragment_ops(uint x, uint y,
+ tile_t *colorTile,
+ tile_t *depthStencilTile,
+ vector float fragZ,
+ vector float fragRed,
+ vector float fragGreen,
+ vector float fragBlue,
+ vector float fragAlpha,
+ vector unsigned int mask);
+
+
+#endif /* SPU_PER_FRAGMENT_OP */
diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c
new file mode 100644
index 0000000000..7c225e2f27
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_render.c
@@ -0,0 +1,295 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <stdio.h>
+#include <libmisc.h>
+#include <spu_mfcio.h>
+
+#include "spu_main.h"
+#include "spu_render.h"
+#include "spu_tri.h"
+#include "spu_tile.h"
+#include "cell/common.h"
+#include "util/u_memory.h"
+
+
+/**
+ * Given a rendering command's bounding box (in pixels) compute the
+ * location of the corresponding screen tile bounding box.
+ */
+static INLINE void
+tile_bounding_box(const struct cell_command_render *render,
+ uint *txmin, uint *tymin,
+ uint *box_num_tiles, uint *box_width_tiles)
+{
+#if 0
+ /* Debug: full-window bounding box */
+ uint txmax = spu.fb.width_tiles - 1;
+ uint tymax = spu.fb.height_tiles - 1;
+ *txmin = 0;
+ *tymin = 0;
+ *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
+ *box_width_tiles = spu.fb.width_tiles;
+ (void) render;
+ (void) txmax;
+ (void) tymax;
+#else
+ uint txmax, tymax, box_height_tiles;
+
+ *txmin = (uint) render->xmin / TILE_SIZE;
+ *tymin = (uint) render->ymin / TILE_SIZE;
+ txmax = (uint) render->xmax / TILE_SIZE;
+ tymax = (uint) render->ymax / TILE_SIZE;
+ if (txmax >= spu.fb.width_tiles)
+ txmax = spu.fb.width_tiles-1;
+ if (tymax >= spu.fb.height_tiles)
+ tymax = spu.fb.height_tiles-1;
+ *box_width_tiles = txmax - *txmin + 1;
+ box_height_tiles = tymax - *tymin + 1;
+ *box_num_tiles = *box_width_tiles * box_height_tiles;
+#endif
+#if 0
+ printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id,
+ render->xmin, render->ymin, render->xmax, render->ymax);
+ printf("SPU %u: tiles: %u, %u .. %u, %u\n",
+ spu.init.id, *txmin, *tymin, txmax, tymax);
+ ASSERT(render->xmin <= render->xmax);
+ ASSERT(render->ymin <= render->ymax);
+#endif
+}
+
+
+/** Check if the tile at (tx,ty) belongs to this SPU */
+static INLINE boolean
+my_tile(uint tx, uint ty)
+{
+ return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id;
+}
+
+
+/**
+ * Start fetching non-clear color/Z tiles from main memory
+ */
+static INLINE void
+get_cz_tiles(uint tx, uint ty)
+{
+ if (spu.read_depth_stencil) {
+ if (spu.cur_ztile_status != TILE_STATUS_CLEAR) {
+ //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty);
+ get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1);
+ spu.cur_ztile_status = TILE_STATUS_GETTING;
+ }
+ }
+
+ if (spu.cur_ctile_status != TILE_STATUS_CLEAR) {
+ //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty);
+ get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0);
+ spu.cur_ctile_status = TILE_STATUS_GETTING;
+ }
+}
+
+
+/**
+ * Start putting dirty color/Z tiles back to main memory
+ */
+static INLINE void
+put_cz_tiles(uint tx, uint ty)
+{
+ if (spu.cur_ztile_status == TILE_STATUS_DIRTY) {
+ /* tile was modified and needs to be written back */
+ //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty);
+ put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1);
+ spu.cur_ztile_status = TILE_STATUS_DEFINED;
+ }
+ else if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
+ /* tile was never used */
+ spu.cur_ztile_status = TILE_STATUS_DEFINED;
+ //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty);
+ }
+
+ if (spu.cur_ctile_status == TILE_STATUS_DIRTY) {
+ /* tile was modified and needs to be written back */
+ //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty);
+ put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0);
+ spu.cur_ctile_status = TILE_STATUS_DEFINED;
+ }
+ else if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
+ /* tile was never used */
+ spu.cur_ctile_status = TILE_STATUS_DEFINED;
+ //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty);
+ }
+}
+
+
+/**
+ * Wait for 'put' of color/z tiles to complete.
+ */
+static INLINE void
+wait_put_cz_tiles(void)
+{
+ wait_on_mask(1 << TAG_WRITE_TILE_COLOR);
+ if (spu.read_depth_stencil) {
+ wait_on_mask(1 << TAG_WRITE_TILE_Z);
+ }
+}
+
+
+/**
+ * Render primitives
+ * \param pos_incr returns value indicating how may words to skip after
+ * this command in the batch buffer
+ */
+void
+cmd_render(const struct cell_command_render *render, uint *pos_incr)
+{
+ /* we'll DMA into these buffers */
+ ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB;
+ const uint vertex_size = render->vertex_size; /* in bytes */
+ /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size;
+ uint index_bytes;
+ const ubyte *vertices;
+ const ushort *indexes;
+ uint i, j;
+ uint num_tiles;
+
+ D_PRINTF(CELL_DEBUG_CMD,
+ "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n",
+ render->prim_type,
+ render->num_verts,
+ render->num_indexes,
+ render->inline_verts);
+
+ ASSERT(sizeof(*render) % 4 == 0);
+ ASSERT(total_vertex_bytes % 16 == 0);
+ ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES);
+ ASSERT(render->num_indexes % 3 == 0);
+
+
+ /* indexes are right after the render command in the batch buffer */
+ indexes = (const ushort *) (render + 1);
+ index_bytes = ROUNDUP8(render->num_indexes * 2);
+ *pos_incr = index_bytes / 8 + sizeof(*render) / 8;
+
+
+ if (render->inline_verts) {
+ /* Vertices are after indexes in batch buffer at next 16-byte addr */
+ vertices = (const ubyte *) render + (*pos_incr * 8);
+ vertices = (const ubyte *) align_pointer((void *) vertices, 16);
+ ASSERT_ALIGN16(vertices);
+ *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8;
+ }
+ else {
+ /* Begin DMA fetch of vertex buffer */
+ ubyte *src = spu.init.buffers[render->vertex_buf];
+ ubyte *dest = vertex_data;
+
+ /* skip vertex data we won't use */
+#if 01
+ src += render->min_index * vertex_size;
+ dest += render->min_index * vertex_size;
+ total_vertex_bytes -= render->min_index * vertex_size;
+#endif
+ ASSERT(total_vertex_bytes % 16 == 0);
+ ASSERT_ALIGN16(dest);
+ ASSERT_ALIGN16(src);
+
+ mfc_get(dest, /* in vertex_data[] array */
+ (unsigned int) src, /* src in main memory */
+ total_vertex_bytes, /* size */
+ TAG_VERTEX_BUFFER,
+ 0, /* tid */
+ 0 /* rid */);
+
+ vertices = vertex_data;
+
+ wait_on_mask(1 << TAG_VERTEX_BUFFER);
+ }
+
+
+ /**
+ ** find tiles which intersect the prim bounding box
+ **/
+ uint txmin, tymin, box_width_tiles, box_num_tiles;
+ tile_bounding_box(render, &txmin, &tymin,
+ &box_num_tiles, &box_width_tiles);
+
+
+ /* make sure any pending clears have completed */
+ wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */
+
+
+ num_tiles = 0;
+
+ /**
+ ** loop over tiles, rendering tris
+ **/
+ for (i = 0; i < box_num_tiles; i++) {
+ const uint tx = txmin + i % box_width_tiles;
+ const uint ty = tymin + i / box_width_tiles;
+
+ ASSERT(tx < spu.fb.width_tiles);
+ ASSERT(ty < spu.fb.height_tiles);
+
+ if (!my_tile(tx, ty))
+ continue;
+
+ num_tiles++;
+
+ spu.cur_ctile_status = spu.ctile_status[ty][tx];
+ spu.cur_ztile_status = spu.ztile_status[ty][tx];
+
+ get_cz_tiles(tx, ty);
+
+ uint drawn = 0;
+
+ /* loop over tris */
+ for (j = 0; j < render->num_indexes; j += 3) {
+ const float *v0, *v1, *v2;
+
+ v0 = (const float *) (vertices + indexes[j+0] * vertex_size);
+ v1 = (const float *) (vertices + indexes[j+1] * vertex_size);
+ v2 = (const float *) (vertices + indexes[j+2] * vertex_size);
+
+ drawn += tri_draw(v0, v1, v2, tx, ty);
+ }
+
+ //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3);
+
+ /* write color/z tiles back to main framebuffer, if dirtied */
+ put_cz_tiles(tx, ty);
+
+ wait_put_cz_tiles(); /* XXX seems unnecessary... */
+
+ spu.ctile_status[ty][tx] = spu.cur_ctile_status;
+ spu.ztile_status[ty][tx] = spu.cur_ztile_status;
+ }
+
+ D_PRINTF(CELL_DEBUG_CMD,
+ "RENDER done (%u tiles hit)\n",
+ num_tiles);
+}
diff --git a/src/gallium/drivers/cell/spu/spu_render.h b/src/gallium/drivers/cell/spu/spu_render.h
new file mode 100644
index 0000000000..493434f087
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_render.h
@@ -0,0 +1,38 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef SPU_RENDER_H
+#define SPU_RENDER_H
+
+#include "cell/common.h"
+
+extern void
+cmd_render(const struct cell_command_render *render, uint *pos_incr);
+
+#endif /* SPU_RENDER_H */
+
diff --git a/src/gallium/drivers/cell/spu/spu_shuffle.h b/src/gallium/drivers/cell/spu/spu_shuffle.h
new file mode 100644
index 0000000000..74f2a0b6d2
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_shuffle.h
@@ -0,0 +1,186 @@
+#ifndef SPU_SHUFFLE_H
+#define SPU_SHUFFLE_H
+
+/*
+ * Generate shuffle patterns with minimal fuss.
+ *
+ * Based on ideas from
+ * http://www.insomniacgames.com/tech/articles/0408/files/shuffles.pdf
+ *
+ * A-P indicates 0-15th position in first vector
+ * a-p indicates 0-15th position in second vector
+ *
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * |00|01|02|03|04|05|06|07|08|09|0a|0b|0c|0d|0e|0f|
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * | A| B| C| D|
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | A| B| C| D| E| F| G| H|
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * | A| B| C| D| E| F| G| H| I| J| K| L| M| N| O| P|
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ *
+ * x or X indicates 0xff
+ * 8 indicates 0x80
+ * 0 indicates 0x00
+ *
+ * The macros SHUFFLE4() SHUFFLE8() and SHUFFLE16() provide a const vector
+ * unsigned char literal suitable for use with spu_shuffle().
+ *
+ * The macros SHUFB4() SHUFB8() and SHUFB16() provide a const qword vector
+ * literal suitable for use with si_shufb().
+ *
+ *
+ * For example :
+ * SHUFB4(A,A,A,A)
+ * expands to :
+ * ((const qword){0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3})
+ *
+ * SHUFFLE8(A,B,a,b,C,c,8,8)
+ * expands to :
+ * ((const vector unsigned char){0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13,
+ * 0x04,0x05,0x14,0x15,0xe0,0xe0,0xe0,0xe0})
+ *
+ */
+
+#include <spu_intrinsics.h>
+
+#define SHUFFLE_PATTERN_4_A__ 0x00, 0x01, 0x02, 0x03
+#define SHUFFLE_PATTERN_4_B__ 0x04, 0x05, 0x06, 0x07
+#define SHUFFLE_PATTERN_4_C__ 0x08, 0x09, 0x0a, 0x0b
+#define SHUFFLE_PATTERN_4_D__ 0x0c, 0x0d, 0x0e, 0x0f
+#define SHUFFLE_PATTERN_4_a__ 0x10, 0x11, 0x12, 0x13
+#define SHUFFLE_PATTERN_4_b__ 0x14, 0x15, 0x16, 0x17
+#define SHUFFLE_PATTERN_4_c__ 0x18, 0x19, 0x1a, 0x1b
+#define SHUFFLE_PATTERN_4_d__ 0x1c, 0x1d, 0x1e, 0x1f
+#define SHUFFLE_PATTERN_4_X__ 0xc0, 0xc0, 0xc0, 0xc0
+#define SHUFFLE_PATTERN_4_x__ 0xc0, 0xc0, 0xc0, 0xc0
+#define SHUFFLE_PATTERN_4_0__ 0x80, 0x80, 0x80, 0x80
+#define SHUFFLE_PATTERN_4_8__ 0xe0, 0xe0, 0xe0, 0xe0
+
+#define SHUFFLE_VECTOR_4__(A, B, C, D) \
+ SHUFFLE_PATTERN_4_##A##__, \
+ SHUFFLE_PATTERN_4_##B##__, \
+ SHUFFLE_PATTERN_4_##C##__, \
+ SHUFFLE_PATTERN_4_##D##__
+
+#define SHUFFLE4(A, B, C, D) \
+ ((const vector unsigned char){ \
+ SHUFFLE_VECTOR_4__(A, B, C, D) \
+ })
+
+#define SHUFB4(A, B, C, D) \
+ ((const qword){ \
+ SHUFFLE_VECTOR_4__(A, B, C, D) \
+ })
+
+
+#define SHUFFLE_PATTERN_8_A__ 0x00, 0x01
+#define SHUFFLE_PATTERN_8_B__ 0x02, 0x03
+#define SHUFFLE_PATTERN_8_C__ 0x04, 0x05
+#define SHUFFLE_PATTERN_8_D__ 0x06, 0x07
+#define SHUFFLE_PATTERN_8_E__ 0x08, 0x09
+#define SHUFFLE_PATTERN_8_F__ 0x0a, 0x0b
+#define SHUFFLE_PATTERN_8_G__ 0x0c, 0x0d
+#define SHUFFLE_PATTERN_8_H__ 0x0e, 0x0f
+#define SHUFFLE_PATTERN_8_a__ 0x10, 0x11
+#define SHUFFLE_PATTERN_8_b__ 0x12, 0x13
+#define SHUFFLE_PATTERN_8_c__ 0x14, 0x15
+#define SHUFFLE_PATTERN_8_d__ 0x16, 0x17
+#define SHUFFLE_PATTERN_8_e__ 0x18, 0x19
+#define SHUFFLE_PATTERN_8_f__ 0x1a, 0x1b
+#define SHUFFLE_PATTERN_8_g__ 0x1c, 0x1d
+#define SHUFFLE_PATTERN_8_h__ 0x1e, 0x1f
+#define SHUFFLE_PATTERN_8_X__ 0xc0, 0xc0
+#define SHUFFLE_PATTERN_8_x__ 0xc0, 0xc0
+#define SHUFFLE_PATTERN_8_0__ 0x80, 0x80
+#define SHUFFLE_PATTERN_8_8__ 0xe0, 0xe0
+
+
+#define SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
+ SHUFFLE_PATTERN_8_##A##__, \
+ SHUFFLE_PATTERN_8_##B##__, \
+ SHUFFLE_PATTERN_8_##C##__, \
+ SHUFFLE_PATTERN_8_##D##__, \
+ SHUFFLE_PATTERN_8_##E##__, \
+ SHUFFLE_PATTERN_8_##F##__, \
+ SHUFFLE_PATTERN_8_##G##__, \
+ SHUFFLE_PATTERN_8_##H##__
+
+#define SHUFFLE8(A, B, C, D, E, F, G, H) \
+ ((const vector unsigned char){ \
+ SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
+ })
+
+#define SHUFB8(A, B, C, D, E, F, G, H) \
+ ((const qword){ \
+ SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
+ })
+
+
+#define SHUFFLE_PATTERN_16_A__ 0x00
+#define SHUFFLE_PATTERN_16_B__ 0x01
+#define SHUFFLE_PATTERN_16_C__ 0x02
+#define SHUFFLE_PATTERN_16_D__ 0x03
+#define SHUFFLE_PATTERN_16_E__ 0x04
+#define SHUFFLE_PATTERN_16_F__ 0x05
+#define SHUFFLE_PATTERN_16_G__ 0x06
+#define SHUFFLE_PATTERN_16_H__ 0x07
+#define SHUFFLE_PATTERN_16_I__ 0x08
+#define SHUFFLE_PATTERN_16_J__ 0x09
+#define SHUFFLE_PATTERN_16_K__ 0x0a
+#define SHUFFLE_PATTERN_16_L__ 0x0b
+#define SHUFFLE_PATTERN_16_M__ 0x0c
+#define SHUFFLE_PATTERN_16_N__ 0x0d
+#define SHUFFLE_PATTERN_16_O__ 0x0e
+#define SHUFFLE_PATTERN_16_P__ 0x0f
+#define SHUFFLE_PATTERN_16_a__ 0x10
+#define SHUFFLE_PATTERN_16_b__ 0x11
+#define SHUFFLE_PATTERN_16_c__ 0x12
+#define SHUFFLE_PATTERN_16_d__ 0x13
+#define SHUFFLE_PATTERN_16_e__ 0x14
+#define SHUFFLE_PATTERN_16_f__ 0x15
+#define SHUFFLE_PATTERN_16_g__ 0x16
+#define SHUFFLE_PATTERN_16_h__ 0x17
+#define SHUFFLE_PATTERN_16_i__ 0x18
+#define SHUFFLE_PATTERN_16_j__ 0x19
+#define SHUFFLE_PATTERN_16_k__ 0x1a
+#define SHUFFLE_PATTERN_16_l__ 0x1b
+#define SHUFFLE_PATTERN_16_m__ 0x1c
+#define SHUFFLE_PATTERN_16_n__ 0x1d
+#define SHUFFLE_PATTERN_16_o__ 0x1e
+#define SHUFFLE_PATTERN_16_p__ 0x1f
+#define SHUFFLE_PATTERN_16_X__ 0xc0
+#define SHUFFLE_PATTERN_16_x__ 0xc0
+#define SHUFFLE_PATTERN_16_0__ 0x80
+#define SHUFFLE_PATTERN_16_8__ 0xe0
+
+#define SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
+ SHUFFLE_PATTERN_16_##A##__, \
+ SHUFFLE_PATTERN_16_##B##__, \
+ SHUFFLE_PATTERN_16_##C##__, \
+ SHUFFLE_PATTERN_16_##D##__, \
+ SHUFFLE_PATTERN_16_##E##__, \
+ SHUFFLE_PATTERN_16_##F##__, \
+ SHUFFLE_PATTERN_16_##G##__, \
+ SHUFFLE_PATTERN_16_##H##__, \
+ SHUFFLE_PATTERN_16_##I##__, \
+ SHUFFLE_PATTERN_16_##J##__, \
+ SHUFFLE_PATTERN_16_##K##__, \
+ SHUFFLE_PATTERN_16_##L##__, \
+ SHUFFLE_PATTERN_16_##M##__, \
+ SHUFFLE_PATTERN_16_##N##__, \
+ SHUFFLE_PATTERN_16_##O##__, \
+ SHUFFLE_PATTERN_16_##P##__
+
+#define SHUFFLE16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
+ ((const vector unsigned char){ \
+ SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
+ })
+
+#define SHUFB16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
+ ((const qword){ \
+ SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
+ })
+
+#endif
diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c
new file mode 100644
index 0000000000..69784c8978
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_texture.c
@@ -0,0 +1,641 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <math.h>
+
+#include "pipe/p_compiler.h"
+#include "spu_main.h"
+#include "spu_texture.h"
+#include "spu_tile.h"
+#include "spu_colorpack.h"
+#include "spu_dcache.h"
+
+
+/**
+ * Mark all tex cache entries as invalid.
+ */
+void
+invalidate_tex_cache(void)
+{
+ uint lvl;
+ for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) {
+ uint unit = 0;
+ uint bytes = 4 * spu.texture[unit].level[lvl].width
+ * spu.texture[unit].level[lvl].height;
+
+ if (spu.texture[unit].target == PIPE_TEXTURE_CUBE)
+ bytes *= 6;
+ else if (spu.texture[unit].target == PIPE_TEXTURE_3D)
+ bytes *= spu.texture[unit].level[lvl].depth;
+
+ spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes);
+ }
+}
+
+
+/**
+ * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ...
+ *
+ * NOTE: in the typical case of bilinear filtering, the four texels
+ * are in a 2x2 group so we could get by with just two dcache fetches
+ * (two side-by-side texels per fetch). But when bilinear filtering
+ * wraps around a texture edge, we'll probably need code like we have
+ * now.
+ * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time,
+ * it's quite likely that the four pixels in a quad will need some of the
+ * same texels. So look into doing texture fetches for four pixels at
+ * a time.
+ */
+static void
+get_four_texels(const struct spu_texture_level *tlevel, uint face,
+ vec_int4 x, vec_int4 y,
+ vec_uint4 *texels)
+{
+ unsigned texture_ea = (uintptr_t) tlevel->start;
+ const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */
+ const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */
+ const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */
+ const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */
+
+ const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row);
+ const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t));
+
+ qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
+ tile_offset = si_mpy((qword) tile_offset, tile_size);
+
+ qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x);
+ texel_offset = si_mpyui(texel_offset, 4);
+
+ vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset);
+
+ texture_ea = texture_ea + face * tlevel->bytes_per_image;
+
+ spu_dcache_fetch_unaligned((qword *) & texels[0],
+ texture_ea + spu_extract(offset, 0), 4);
+ spu_dcache_fetch_unaligned((qword *) & texels[1],
+ texture_ea + spu_extract(offset, 1), 4);
+ spu_dcache_fetch_unaligned((qword *) & texels[2],
+ texture_ea + spu_extract(offset, 2), 4);
+ spu_dcache_fetch_unaligned((qword *) & texels[3],
+ texture_ea + spu_extract(offset, 3), 4);
+}
+
+
+/** clamp vec to [0, max] */
+static INLINE vector signed int
+spu_clamp(vector signed int vec, vector signed int max)
+{
+ static const vector signed int zero = {0,0,0,0};
+ vector unsigned int c;
+ c = spu_cmpgt(vec, zero); /* c = vec > zero ? ~0 : 0 */
+ vec = spu_sel(zero, vec, c);
+ c = spu_cmpgt(vec, max); /* c = vec > max ? ~0 : 0 */
+ vec = spu_sel(vec, max, c);
+ return vec;
+}
+
+
+
+/**
+ * Do nearest texture sampling for four pixels.
+ * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
+ */
+void
+sample_texture_2d_nearest(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4])
+{
+ const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
+ vector float ss = spu_mul(s, tlevel->scale_s);
+ vector float tt = spu_mul(t, tlevel->scale_t);
+ vector signed int is = spu_convts(ss, 0);
+ vector signed int it = spu_convts(tt, 0);
+ vec_uint4 texels[4];
+
+ /* PIPE_TEX_WRAP_REPEAT */
+ is = spu_and(is, tlevel->mask_s);
+ it = spu_and(it, tlevel->mask_t);
+
+ /* PIPE_TEX_WRAP_CLAMP */
+ is = spu_clamp(is, tlevel->max_s);
+ it = spu_clamp(it, tlevel->max_t);
+
+ get_four_texels(tlevel, face, is, it, texels);
+
+ /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */
+ spu_unpack_A8R8G8B8_transpose4(texels, colors);
+}
+
+
+/**
+ * Do bilinear texture sampling for four pixels.
+ * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
+ */
+void
+sample_texture_2d_bilinear(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4])
+{
+ const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
+ static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
+
+ vector float ss = spu_madd(s, tlevel->scale_s, half);
+ vector float tt = spu_madd(t, tlevel->scale_t, half);
+
+ vector signed int is0 = spu_convts(ss, 0);
+ vector signed int it0 = spu_convts(tt, 0);
+
+ /* is + 1, it + 1 */
+ vector signed int is1 = spu_add(is0, 1);
+ vector signed int it1 = spu_add(it0, 1);
+
+ /* PIPE_TEX_WRAP_REPEAT */
+ is0 = spu_and(is0, tlevel->mask_s);
+ it0 = spu_and(it0, tlevel->mask_t);
+ is1 = spu_and(is1, tlevel->mask_s);
+ it1 = spu_and(it1, tlevel->mask_t);
+
+ /* PIPE_TEX_WRAP_CLAMP */
+ is0 = spu_clamp(is0, tlevel->max_s);
+ it0 = spu_clamp(it0, tlevel->max_t);
+ is1 = spu_clamp(is1, tlevel->max_s);
+ it1 = spu_clamp(it1, tlevel->max_t);
+
+ /* get packed int texels */
+ vector unsigned int texels[16];
+ get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */
+ get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */
+ get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */
+ get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */
+
+ /* convert packed int texels to float colors */
+ vector float ftexels[16];
+ spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0);
+ spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4);
+ spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8);
+ spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12);
+
+ /* Compute weighting factors in [0,1]
+ * Multiply texcoord by 1024, AND with 1023, convert back to float.
+ */
+ vector float ss1024 = spu_mul(ss, spu_splats(1024.0f));
+ vector signed int iss1024 = spu_convts(ss1024, 0);
+ iss1024 = spu_and(iss1024, 1023);
+ vector float sWeights0 = spu_convtf(iss1024, 10);
+
+ vector float tt1024 = spu_mul(tt, spu_splats(1024.0f));
+ vector signed int itt1024 = spu_convts(tt1024, 0);
+ itt1024 = spu_and(itt1024, 1023);
+ vector float tWeights0 = spu_convtf(itt1024, 10);
+
+ /* 1 - sWeight and 1 - tWeight */
+ vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0);
+ vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0);
+
+ /* reds, for four pixels */
+ ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/
+ ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/
+ ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/
+ ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/
+ colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]),
+ spu_add(ftexels[8], ftexels[12]));
+
+ /* greens, for four pixels */
+ ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/
+ ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/
+ ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/
+ ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/
+ colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]),
+ spu_add(ftexels[9], ftexels[13]));
+
+ /* blues, for four pixels */
+ ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/
+ ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/
+ ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/
+ ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/
+ colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]),
+ spu_add(ftexels[10], ftexels[14]));
+
+ /* alphas, for four pixels */
+ ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/
+ ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/
+ ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/
+ ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/
+ colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]),
+ spu_add(ftexels[11], ftexels[15]));
+}
+
+
+
+/**
+ * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h
+ */
+static INLINE void
+transpose(vector unsigned int *mOut0,
+ vector unsigned int *mOut1,
+ vector unsigned int *mOut2,
+ vector unsigned int *mOut3,
+ vector unsigned int *mIn)
+{
+ vector unsigned int abcd, efgh, ijkl, mnop; /* input vectors */
+ vector unsigned int aeim, bfjn, cgko, dhlp; /* output vectors */
+ vector unsigned int aibj, ckdl, emfn, gohp; /* intermediate vectors */
+
+ vector unsigned char shufflehi = ((vector unsigned char) {
+ 0x00, 0x01, 0x02, 0x03,
+ 0x10, 0x11, 0x12, 0x13,
+ 0x04, 0x05, 0x06, 0x07,
+ 0x14, 0x15, 0x16, 0x17});
+ vector unsigned char shufflelo = ((vector unsigned char) {
+ 0x08, 0x09, 0x0A, 0x0B,
+ 0x18, 0x19, 0x1A, 0x1B,
+ 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x1C, 0x1D, 0x1E, 0x1F});
+ abcd = *(mIn+0);
+ efgh = *(mIn+1);
+ ijkl = *(mIn+2);
+ mnop = *(mIn+3);
+
+ aibj = spu_shuffle(abcd, ijkl, shufflehi);
+ ckdl = spu_shuffle(abcd, ijkl, shufflelo);
+ emfn = spu_shuffle(efgh, mnop, shufflehi);
+ gohp = spu_shuffle(efgh, mnop, shufflelo);
+
+ aeim = spu_shuffle(aibj, emfn, shufflehi);
+ bfjn = spu_shuffle(aibj, emfn, shufflelo);
+ cgko = spu_shuffle(ckdl, gohp, shufflehi);
+ dhlp = spu_shuffle(ckdl, gohp, shufflelo);
+
+ *mOut0 = aeim;
+ *mOut1 = bfjn;
+ *mOut2 = cgko;
+ *mOut3 = dhlp;
+}
+
+
+/**
+ * Bilinear filtering, using int instead of float arithmetic for computing
+ * sample weights.
+ */
+void
+sample_texture_2d_bilinear_int(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4])
+{
+ const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
+ static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
+
+ /* Scale texcoords by size of texture, and add half pixel bias */
+ vector float ss = spu_madd(s, tlevel->scale_s, half);
+ vector float tt = spu_madd(t, tlevel->scale_t, half);
+
+ /* convert float coords to fixed-pt coords with 7 fraction bits */
+ vector signed int is = spu_convts(ss, 7); /* XXX really need floor() here */
+ vector signed int it = spu_convts(tt, 7); /* XXX really need floor() here */
+
+ /* compute integer texel weights in [0, 127] */
+ vector signed int sWeights0 = spu_and(is, 127);
+ vector signed int tWeights0 = spu_and(it, 127);
+ vector signed int sWeights1 = spu_sub(127, sWeights0);
+ vector signed int tWeights1 = spu_sub(127, tWeights0);
+
+ /* texel coords: is0 = is / 128, it0 = is / 128 */
+ vector signed int is0 = spu_rlmask(is, -7);
+ vector signed int it0 = spu_rlmask(it, -7);
+
+ /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */
+ vector signed int is1 = spu_add(is0, 1);
+ vector signed int it1 = spu_add(it0, 1);
+
+ /* PIPE_TEX_WRAP_REPEAT */
+ is0 = spu_and(is0, tlevel->mask_s);
+ it0 = spu_and(it0, tlevel->mask_t);
+ is1 = spu_and(is1, tlevel->mask_s);
+ it1 = spu_and(it1, tlevel->mask_t);
+
+ /* PIPE_TEX_WRAP_CLAMP */
+ is0 = spu_clamp(is0, tlevel->max_s);
+ it0 = spu_clamp(it0, tlevel->max_t);
+ is1 = spu_clamp(is1, tlevel->max_s);
+ it1 = spu_clamp(it1, tlevel->max_t);
+
+ /* get packed int texels */
+ vector unsigned int texels[16];
+ get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */
+ get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */
+ get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */
+ get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */
+
+ /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */
+ {
+ static const unsigned char ZERO = 0x80;
+ int i;
+ for (i = 0; i < 16; i++) {
+ texels[i] = spu_shuffle(texels[i], texels[i],
+ ((vector unsigned char) {
+ ZERO, ZERO, ZERO, 1,
+ ZERO, ZERO, ZERO, 2,
+ ZERO, ZERO, ZERO, 3,
+ ZERO, ZERO, ZERO, 0}));
+ }
+ }
+
+ /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */
+ vector unsigned int texel0, texel1, texel2, texel3, texel4, texel5, texel6, texel7,
+ texel8, texel9, texel10, texel11, texel12, texel13, texel14, texel15;
+ transpose(&texel0, &texel1, &texel2, &texel3, texels + 0);
+ transpose(&texel4, &texel5, &texel6, &texel7, texels + 4);
+ transpose(&texel8, &texel9, &texel10, &texel11, texels + 8);
+ transpose(&texel12, &texel13, &texel14, &texel15, texels + 12);
+
+ /* computed weighted colors */
+ vector unsigned int c0, c1, c2, c3, cSum;
+
+ /* red */
+ c0 = (vector unsigned int) si_mpy((qword) texel0, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel4, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel8, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel12, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
+ colors[0] = spu_convtf(cSum, 22);
+
+ /* green */
+ c0 = (vector unsigned int) si_mpy((qword) texel1, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel5, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel9, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel13, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
+ colors[1] = spu_convtf(cSum, 22);
+
+ /* blue */
+ c0 = (vector unsigned int) si_mpy((qword) texel2, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel6, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel10, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel14, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
+ colors[2] = spu_convtf(cSum, 22);
+
+ /* alpha */
+ c0 = (vector unsigned int) si_mpy((qword) texel3, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel7, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel11, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel15, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
+ colors[3] = spu_convtf(cSum, 22);
+}
+
+
+
+/**
+ * Compute level of detail factor from texcoords.
+ */
+static INLINE float
+compute_lambda_2d(uint unit, vector float s, vector float t)
+{
+ uint baseLevel = 0;
+ float width = spu.texture[unit].level[baseLevel].width;
+ float height = spu.texture[unit].level[baseLevel].width;
+ float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0));
+ float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0));
+ float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0));
+ float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0));
+#if 0
+ /* ideal value */
+ float x = dsdx * dsdx + dtdx * dtdx;
+ float y = dsdy * dsdy + dtdy * dtdy;
+ float rho = x > y ? x : y;
+ rho = sqrtf(rho);
+#else
+ /* approximation */
+ dsdx = fabsf(dsdx);
+ dsdy = fabsf(dsdy);
+ dtdx = fabsf(dtdx);
+ dtdy = fabsf(dtdy);
+ float rho = (dsdx + dsdy + dtdx + dtdy) * 0.5;
+#endif
+ float lambda = logf(rho) * 1.442695f; /* compute logbase2(rho) */
+ return lambda;
+}
+
+
+/**
+ * Blend two sets of colors according to weight.
+ */
+static void
+blend_colors(vector float c0[4], const vector float c1[4], float weight)
+{
+ vector float t = spu_splats(weight);
+ vector float dc0 = spu_sub(c1[0], c0[0]);
+ vector float dc1 = spu_sub(c1[1], c0[1]);
+ vector float dc2 = spu_sub(c1[2], c0[2]);
+ vector float dc3 = spu_sub(c1[3], c0[3]);
+ c0[0] = spu_madd(dc0, t, c0[0]);
+ c0[1] = spu_madd(dc1, t, c0[1]);
+ c0[2] = spu_madd(dc2, t, c0[2]);
+ c0[3] = spu_madd(dc3, t, c0[3]);
+}
+
+
+/**
+ * Texture sampling with level of detail selection and possibly mipmap
+ * interpolation.
+ */
+void
+sample_texture_2d_lod(vector float s, vector float t,
+ uint unit, uint level_ignored, uint face,
+ vector float colors[4])
+{
+ /*
+ * Note that we're computing a lambda/lod here that's used for all
+ * four pixels in the quad.
+ */
+ float lambda = compute_lambda_2d(unit, s, t);
+
+ (void) face;
+ (void) level_ignored;
+
+ /* apply lod bias */
+ lambda += spu.sampler[unit].lod_bias;
+
+ /* clamp */
+ if (lambda < spu.sampler[unit].min_lod)
+ lambda = spu.sampler[unit].min_lod;
+ else if (lambda > spu.sampler[unit].max_lod)
+ lambda = spu.sampler[unit].max_lod;
+
+ if (lambda <= 0.0f) {
+ /* magnify */
+ spu.mag_sample_texture_2d[unit](s, t, unit, 0, face, colors);
+ }
+ else {
+ /* minify */
+ if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) {
+ /* sample two mipmap levels and interpolate */
+ int level = (int) lambda;
+ if (level > (int) spu.texture[unit].max_level)
+ level = spu.texture[unit].max_level;
+ spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors);
+ if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) {
+ /* sample second mipmap level */
+ float weight = lambda - (float) level;
+ level++;
+ if (level <= (int) spu.texture[unit].max_level) {
+ vector float colors2[4];
+ spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors2);
+ blend_colors(colors, colors2, weight);
+ }
+ }
+ }
+ else {
+ /* sample one mipmap level */
+ int level = (int) (lambda + 0.5f);
+ if (level > (int) spu.texture[unit].max_level)
+ level = spu.texture[unit].max_level;
+ spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors);
+ }
+ }
+}
+
+
+/** XXX need a SIMD version of this */
+static unsigned
+choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
+{
+ /*
+ major axis
+ direction target sc tc ma
+ ---------- ------------------------------- --- --- ---
+ +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
+ -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
+ +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
+ -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
+ +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
+ -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
+ */
+ const float arx = fabsf(rx);
+ const float ary = fabsf(ry);
+ const float arz = fabsf(rz);
+ unsigned face;
+ float sc, tc, ma;
+
+ if (arx > ary && arx > arz) {
+ if (rx >= 0.0F) {
+ face = PIPE_TEX_FACE_POS_X;
+ sc = -rz;
+ tc = -ry;
+ ma = arx;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_X;
+ sc = rz;
+ tc = -ry;
+ ma = arx;
+ }
+ }
+ else if (ary > arx && ary > arz) {
+ if (ry >= 0.0F) {
+ face = PIPE_TEX_FACE_POS_Y;
+ sc = rx;
+ tc = rz;
+ ma = ary;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_Y;
+ sc = rx;
+ tc = -rz;
+ ma = ary;
+ }
+ }
+ else {
+ if (rz > 0.0F) {
+ face = PIPE_TEX_FACE_POS_Z;
+ sc = rx;
+ tc = -ry;
+ ma = arz;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_Z;
+ sc = -rx;
+ tc = -ry;
+ ma = arz;
+ }
+ }
+
+ *newS = (sc / ma + 1.0F) * 0.5F;
+ *newT = (tc / ma + 1.0F) * 0.5F;
+
+ return face;
+}
+
+
+
+void
+sample_texture_cube(vector float s, vector float t, vector float r,
+ uint unit, vector float colors[4])
+{
+ uint p, faces[4], level = 0;
+ float newS[4], newT[4];
+
+ /* Compute cube faces referenced by the four sets of texcoords.
+ * XXX we should SIMD-ize this.
+ */
+ for (p = 0; p < 4; p++) {
+ float rx = spu_extract(s, p);
+ float ry = spu_extract(t, p);
+ float rz = spu_extract(r, p);
+ faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]);
+ }
+
+ if (faces[0] == faces[1] &&
+ faces[0] == faces[2] &&
+ faces[0] == faces[3]) {
+ /* GOOD! All four texcoords refer to the same cube face */
+ s = (vector float) {newS[0], newS[1], newS[2], newS[3]};
+ t = (vector float) {newT[0], newT[1], newT[2], newT[3]};
+ spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors);
+ }
+ else {
+ /* BAD! The four texcoords refer to different faces */
+ for (p = 0; p < 4; p++) {
+ vector float c[4];
+
+ spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]),
+ unit, level, faces[p], c);
+
+ float red = spu_extract(c[0], p);
+ float green = spu_extract(c[1], p);
+ float blue = spu_extract(c[2], p);
+ float alpha = spu_extract(c[3], p);
+
+ colors[0] = spu_insert(red, colors[0], p);
+ colors[1] = spu_insert(green, colors[1], p);
+ colors[2] = spu_insert(blue, colors[2], p);
+ colors[3] = spu_insert(alpha, colors[3], p);
+ }
+ }
+}
diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h
new file mode 100644
index 0000000000..7b75b007b5
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_texture.h
@@ -0,0 +1,67 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef SPU_TEXTURE_H
+#define SPU_TEXTURE_H
+
+
+#include "pipe/p_compiler.h"
+
+
+extern void
+invalidate_tex_cache(void);
+
+
+extern void
+sample_texture_2d_nearest(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
+
+
+extern void
+sample_texture_2d_bilinear(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
+
+extern void
+sample_texture_2d_bilinear_int(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
+
+
+extern void
+sample_texture_2d_lod(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
+
+
+extern void
+sample_texture_cube(vector float s, vector float t, vector float r,
+ uint unit, vector float colors[4]);
+
+
+#endif /* SPU_TEXTURE_H */
diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c
new file mode 100644
index 0000000000..6905015a48
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_tile.c
@@ -0,0 +1,126 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+
+#include "spu_tile.h"
+#include "spu_main.h"
+
+
+/**
+ * Get tile of color or Z values from main memory, put into SPU memory.
+ */
+void
+get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf)
+{
+ const uint offset = ty * spu.fb.width_tiles + tx;
+ const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4);
+ const ubyte *src = zBuf ? spu.fb.depth_start : spu.fb.color_start;
+
+ src += offset * bytesPerTile;
+
+ ASSERT(tx < spu.fb.width_tiles);
+ ASSERT(ty < spu.fb.height_tiles);
+ ASSERT_ALIGN16(tile);
+ /*
+ printf("get_tile: dest: %p src: 0x%x size: %d\n",
+ tile, (unsigned int) src, bytesPerTile);
+ */
+ mfc_get(tile->ui, /* dest in local memory */
+ (unsigned int) src, /* src in main memory */
+ bytesPerTile,
+ tag,
+ 0, /* tid */
+ 0 /* rid */);
+}
+
+
+/**
+ * Move tile of color or Z values from SPU memory to main memory.
+ */
+void
+put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf)
+{
+ const uint offset = ty * spu.fb.width_tiles + tx;
+ const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4);
+ ubyte *dst = zBuf ? spu.fb.depth_start : spu.fb.color_start;
+
+ dst += offset * bytesPerTile;
+
+ ASSERT(tx < spu.fb.width_tiles);
+ ASSERT(ty < spu.fb.height_tiles);
+ ASSERT_ALIGN16(tile);
+ /*
+ printf("SPU %u: put_tile: src: %p dst: 0x%x size: %d\n",
+ spu.init.id,
+ tile, (unsigned int) dst, bytesPerTile);
+ */
+ mfc_put((void *) tile->ui, /* src in local memory */
+ (unsigned int) dst, /* dst in main memory */
+ bytesPerTile,
+ tag,
+ 0, /* tid */
+ 0 /* rid */);
+}
+
+
+/**
+ * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled
+ * tiles back to the main framebuffer.
+ */
+void
+really_clear_tiles(uint surfaceIndex)
+{
+ const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
+ uint i;
+
+ if (surfaceIndex == 0) {
+ clear_c_tile(&spu.ctile);
+
+ for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
+ uint tx = i % spu.fb.width_tiles;
+ uint ty = i / spu.fb.width_tiles;
+ if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) {
+ put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
+ }
+ }
+ }
+ else {
+ clear_z_tile(&spu.ztile);
+
+ for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
+ uint tx = i % spu.fb.width_tiles;
+ uint ty = i / spu.fb.width_tiles;
+ if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR)
+ put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1);
+ }
+ }
+
+#if 0
+ wait_on_mask(1 << TAG_SURFACE_CLEAR);
+#endif
+}
diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h
new file mode 100644
index 0000000000..7bfb52be8f
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_tile.h
@@ -0,0 +1,75 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef SPU_TILE_H
+#define SPU_TILE_H
+
+
+#include <libmisc.h>
+#include <spu_mfcio.h>
+#include "spu_main.h"
+#include "cell/common.h"
+
+
+
+extern void
+get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf);
+
+extern void
+put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf);
+
+extern void
+really_clear_tiles(uint surfaceIndex);
+
+
+static INLINE void
+clear_c_tile(tile_t *ctile)
+{
+ memset32((uint*) ctile->ui,
+ spu.fb.color_clear_value,
+ TILE_SIZE * TILE_SIZE);
+}
+
+
+static INLINE void
+clear_z_tile(tile_t *ztile)
+{
+ if (spu.fb.zsize == 2) {
+ memset16((ushort*) ztile->us,
+ spu.fb.depth_clear_value,
+ TILE_SIZE * TILE_SIZE);
+ }
+ else {
+ ASSERT(spu.fb.zsize != 0);
+ memset32((uint*) ztile->ui,
+ spu.fb.depth_clear_value,
+ TILE_SIZE * TILE_SIZE);
+ }
+}
+
+
+#endif /* SPU_TILE_H */
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
new file mode 100644
index 0000000000..0d9fcb9997
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -0,0 +1,809 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Triangle rendering within a tile.
+ */
+
+#include <transpose_matrix4x4.h>
+#include "pipe/p_compiler.h"
+#include "pipe/p_format.h"
+#include "util/u_math.h"
+#include "spu_colorpack.h"
+#include "spu_main.h"
+#include "spu_shuffle.h"
+#include "spu_texture.h"
+#include "spu_tile.h"
+#include "spu_tri.h"
+
+
+/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
+typedef vector unsigned int mask_t;
+
+
+
+/**
+ * Simplified types taken from other parts of Gallium
+ */
+struct vertex_header {
+ vector float data[1];
+};
+
+
+
+/* XXX fix this */
+#undef CEILF
+#define CEILF(X) ((float) (int) ((X) + 0.99999f))
+
+
+#define QUAD_TOP_LEFT 0
+#define QUAD_TOP_RIGHT 1
+#define QUAD_BOTTOM_LEFT 2
+#define QUAD_BOTTOM_RIGHT 3
+#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT)
+#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT)
+#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT)
+#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
+#define MASK_ALL 0xf
+
+
+#define DEBUG_VERTS 0
+
+/**
+ * Triangle edge info
+ */
+struct edge {
+ union {
+ struct {
+ float dx; /**< X(v1) - X(v0), used only during setup */
+ float dy; /**< Y(v1) - Y(v0), used only during setup */
+ };
+ vec_float4 ds; /**< vector accessor for dx and dy */
+ };
+ float dxdy; /**< dx/dy */
+ float sx, sy; /**< first sample point coord */
+ int lines; /**< number of lines on this edge */
+};
+
+
+struct interp_coef
+{
+ vector float a0;
+ vector float dadx;
+ vector float dady;
+};
+
+
+/**
+ * Triangle setup info (derived from draw_stage).
+ * Also used for line drawing (taking some liberties).
+ */
+struct setup_stage {
+
+ /* Vertices are just an array of floats making up each attribute in
+ * turn. Currently fixed at 4 floats, but should change in time.
+ * Codegen will help cope with this.
+ */
+ union {
+ struct {
+ const struct vertex_header *vmin;
+ const struct vertex_header *vmid;
+ const struct vertex_header *vmax;
+ const struct vertex_header *vprovoke;
+ };
+ qword vertex_headers;
+ };
+
+ struct edge ebot;
+ struct edge etop;
+ struct edge emaj;
+
+ float oneOverArea; /* XXX maybe make into vector? */
+
+ uint facing;
+
+ uint tx, ty; /**< position of current tile (x, y) */
+
+ int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy;
+
+ struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
+
+ struct {
+ vec_int4 quad; /**< [0] = row0, [1] = row1; {left[0],left[1],right[0],right[1]} */
+ int y;
+ unsigned y_flags;
+ unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
+ } span;
+};
+
+
+static struct setup_stage setup;
+
+
+/**
+ * Evaluate attribute coefficients (plane equations) to compute
+ * attribute values for the four fragments in a quad.
+ * Eg: four colors will be computed (in AoS format).
+ */
+static INLINE void
+eval_coeff(uint slot, float x, float y, vector float w, vector float result[4])
+{
+ switch (spu.vertex_info.attrib[slot].interp_mode) {
+ case INTERP_CONSTANT:
+ result[QUAD_TOP_LEFT] =
+ result[QUAD_TOP_RIGHT] =
+ result[QUAD_BOTTOM_LEFT] =
+ result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0;
+ break;
+ case INTERP_LINEAR:
+ {
+ vector float dadx = setup.coef[slot].dadx;
+ vector float dady = setup.coef[slot].dady;
+ vector float topLeft =
+ spu_add(setup.coef[slot].a0,
+ spu_add(spu_mul(spu_splats(x), dadx),
+ spu_mul(spu_splats(y), dady)));
+
+ result[QUAD_TOP_LEFT] = topLeft;
+ result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx);
+ result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady);
+ result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady);
+ }
+ break;
+ case INTERP_PERSPECTIVE:
+ {
+ vector float dadx = setup.coef[slot].dadx;
+ vector float dady = setup.coef[slot].dady;
+ vector float topLeft =
+ spu_add(setup.coef[slot].a0,
+ spu_add(spu_mul(spu_splats(x), dadx),
+ spu_mul(spu_splats(y), dady)));
+
+ vector float wInv = spu_re(w); /* 1.0 / w */
+
+ result[QUAD_TOP_LEFT] = spu_mul(topLeft, wInv);
+ result[QUAD_TOP_RIGHT] = spu_mul(spu_add(topLeft, dadx), wInv);
+ result[QUAD_BOTTOM_LEFT] = spu_mul(spu_add(topLeft, dady), wInv);
+ result[QUAD_BOTTOM_RIGHT] = spu_mul(spu_add(spu_add(topLeft, dadx), dady), wInv);
+ }
+ break;
+ case INTERP_POS:
+ case INTERP_NONE:
+ break;
+ default:
+ ASSERT(0);
+ }
+}
+
+
+/**
+ * As above, but return 4 vectors in SOA format.
+ * XXX this will all be re-written someday.
+ */
+static INLINE void
+eval_coeff_soa(uint slot, float x, float y, vector float w, vector float result[4])
+{
+ eval_coeff(slot, x, y, w, result);
+ _transpose_matrix4x4(result, result);
+}
+
+
+/** Evalute coefficients to get Z for four pixels in a quad */
+static INLINE vector float
+eval_z(float x, float y)
+{
+ const uint slot = 0;
+ const float dzdx = spu_extract(setup.coef[slot].dadx, 2);
+ const float dzdy = spu_extract(setup.coef[slot].dady, 2);
+ const float topLeft = spu_extract(setup.coef[slot].a0, 2) + x * dzdx + y * dzdy;
+ const vector float topLeftv = spu_splats(topLeft);
+ const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy };
+ return spu_add(topLeftv, derivs);
+}
+
+
+/** Evalute coefficients to get W for four pixels in a quad */
+static INLINE vector float
+eval_w(float x, float y)
+{
+ const uint slot = 0;
+ const float dwdx = spu_extract(setup.coef[slot].dadx, 3);
+ const float dwdy = spu_extract(setup.coef[slot].dady, 3);
+ const float topLeft = spu_extract(setup.coef[slot].a0, 3) + x * dwdx + y * dwdy;
+ const vector float topLeftv = spu_splats(topLeft);
+ const vector float derivs = (vector float) { 0.0, dwdx, dwdy, dwdx + dwdy };
+ return spu_add(topLeftv, derivs);
+}
+
+
+/**
+ * Emit a quad (pass to next stage). No clipping is done.
+ * Note: about 1/5 to 1/7 of the time, mask is zero and this function
+ * should be skipped. But adding the test for that slows things down
+ * overall.
+ */
+static INLINE void
+emit_quad( int x, int y, mask_t mask)
+{
+ /* If any bits in mask are set... */
+ if (spu_extract(spu_orx(mask), 0)) {
+ const int ix = x - setup.cliprect_minx;
+ const int iy = y - setup.cliprect_miny;
+
+ spu.cur_ctile_status = TILE_STATUS_DIRTY;
+ spu.cur_ztile_status = TILE_STATUS_DIRTY;
+
+ {
+ /*
+ * Run fragment shader, execute per-fragment ops, update fb/tile.
+ */
+ vector float inputs[4*4], outputs[2*4];
+ vector float fragZ = eval_z((float) x, (float) y);
+ vector float fragW = eval_w((float) x, (float) y);
+ vector unsigned int kill_mask;
+
+ /* setup inputs */
+#if 0
+ eval_coeff_soa(1, (float) x, (float) y, fragW, inputs);
+#else
+ uint i;
+ for (i = 0; i < spu.vertex_info.num_attribs; i++) {
+ eval_coeff_soa(i+1, (float) x, (float) y, fragW, inputs + i * 4);
+ }
+#endif
+ ASSERT(spu.fragment_program);
+ ASSERT(spu.fragment_ops);
+
+ /* Execute the current fragment program */
+ kill_mask = spu.fragment_program(inputs, outputs, spu.constants);
+
+ mask = spu_andc(mask, kill_mask);
+
+ /* Execute per-fragment/quad operations, including:
+ * alpha test, z test, stencil test, blend and framebuffer writing.
+ * Note that there are two different fragment operations functions
+ * that can be called, one for front-facing fragments, and one
+ * for back-facing fragments. (Often the two are the same;
+ * but in some cases, like two-sided stenciling, they can be
+ * very different.) So choose the correct function depending
+ * on the calculated facing.
+ */
+ spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile,
+ fragZ,
+ outputs[0*4+0],
+ outputs[0*4+1],
+ outputs[0*4+2],
+ outputs[0*4+3],
+ mask);
+ }
+ }
+}
+
+
+/**
+ * Given an X or Y coordinate, return the block/quad coordinate that it
+ * belongs to.
+ */
+static INLINE int
+block(int x)
+{
+ return x & ~1;
+}
+
+
+/**
+ * Render a horizontal span of quads
+ */
+static void
+flush_spans(void)
+{
+ int minleft, maxright;
+
+ const int l0 = spu_extract(setup.span.quad, 0);
+ const int l1 = spu_extract(setup.span.quad, 1);
+ const int r0 = spu_extract(setup.span.quad, 2);
+ const int r1 = spu_extract(setup.span.quad, 3);
+
+ switch (setup.span.y_flags) {
+ case 0x3:
+ /* both odd and even lines written (both quad rows) */
+ minleft = MIN2(l0, l1);
+ maxright = MAX2(r0, r1);
+ break;
+
+ case 0x1:
+ /* only even line written (quad top row) */
+ minleft = l0;
+ maxright = r0;
+ break;
+
+ case 0x2:
+ /* only odd line written (quad bottom row) */
+ minleft = l1;
+ maxright = r1;
+ break;
+
+ default:
+ return;
+ }
+
+ /* OK, we're very likely to need the tile data now.
+ * clear or finish waiting if needed.
+ */
+ if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
+ /* wait for mfc_get() to complete */
+ //printf("SPU: %u: waiting for ctile\n", spu.init.id);
+ wait_on_mask(1 << TAG_READ_TILE_COLOR);
+ spu.cur_ctile_status = TILE_STATUS_CLEAN;
+ }
+ else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) {
+ //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
+ clear_c_tile(&spu.ctile);
+ spu.cur_ctile_status = TILE_STATUS_DIRTY;
+ }
+ ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
+
+ if (spu.read_depth_stencil) {
+ if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
+ /* wait for mfc_get() to complete */
+ //printf("SPU: %u: waiting for ztile\n", spu.init.id);
+ wait_on_mask(1 << TAG_READ_TILE_Z);
+ spu.cur_ztile_status = TILE_STATUS_CLEAN;
+ }
+ else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) {
+ //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
+ clear_z_tile(&spu.ztile);
+ spu.cur_ztile_status = TILE_STATUS_DIRTY;
+ }
+ ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED);
+ }
+
+ /* XXX this loop could be moved into the above switch cases... */
+
+ /* Setup for mask calculation */
+ const vec_int4 quad_LlRr = setup.span.quad;
+ const vec_int4 quad_RrLl = spu_rlqwbyte(quad_LlRr, 8);
+ const vec_int4 quad_LLll = spu_shuffle(quad_LlRr, quad_LlRr, SHUFFLE4(A,A,B,B));
+ const vec_int4 quad_RRrr = spu_shuffle(quad_RrLl, quad_RrLl, SHUFFLE4(A,A,B,B));
+
+ const vec_int4 twos = spu_splats(2);
+
+ const int x = block(minleft);
+ vec_int4 xs = {x, x+1, x, x+1};
+
+ for (; spu_extract(xs, 0) <= block(maxright); xs += twos) {
+ /**
+ * Computes mask to indicate which pixels in the 2x2 quad are actually
+ * inside the triangle's bounds.
+ */
+
+ /* Calculate ({x,x+1,x,x+1} >= {l[0],l[0],l[1],l[1]}) */
+ const mask_t gt_LLll_xs = spu_cmpgt(quad_LLll, xs);
+ const mask_t gte_xs_LLll = spu_nand(gt_LLll_xs, gt_LLll_xs);
+
+ /* Calculate ({r[0],r[0],r[1],r[1]} > {x,x+1,x,x+1}) */
+ const mask_t gt_RRrr_xs = spu_cmpgt(quad_RRrr, xs);
+
+ /* Combine results to create mask */
+ const mask_t mask = spu_and(gte_xs_LLll, gt_RRrr_xs);
+
+ emit_quad(spu_extract(xs, 0), setup.span.y, mask);
+ }
+
+ setup.span.y = 0;
+ setup.span.y_flags = 0;
+ /* Zero right elements */
+ setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0));
+}
+
+
+#if DEBUG_VERTS
+static void
+print_vertex(const struct vertex_header *v)
+{
+ uint i;
+ fprintf(stderr, " Vertex: (%p)\n", v);
+ for (i = 0; i < spu.vertex_info.num_attribs; i++) {
+ fprintf(stderr, " %d: %f %f %f %f\n", i,
+ spu_extract(v->data[i], 0),
+ spu_extract(v->data[i], 1),
+ spu_extract(v->data[i], 2),
+ spu_extract(v->data[i], 3));
+ }
+}
+#endif
+
+
+/**
+ * Sort vertices from top to bottom.
+ * Compute area and determine front vs. back facing.
+ * Do coarse clip test against tile bounds
+ * \return FALSE if tri is totally outside tile, TRUE otherwise
+ */
+static boolean
+setup_sort_vertices(const struct vertex_header *v0,
+ const struct vertex_header *v1,
+ const struct vertex_header *v2)
+{
+ float area, sign;
+
+#if DEBUG_VERTS
+ if (spu.init.id==0) {
+ fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id);
+ print_vertex(v0);
+ print_vertex(v1);
+ print_vertex(v2);
+ }
+#endif
+
+ /* determine bottom to top order of vertices */
+ {
+ /* A table of shuffle patterns for putting vertex_header pointers into
+ correct order. Quite magical. */
+ const vec_uchar16 sort_order_patterns[] = {
+ SHUFFLE4(A,B,C,C),
+ SHUFFLE4(C,A,B,C),
+ SHUFFLE4(A,C,B,C),
+ SHUFFLE4(B,C,A,C),
+ SHUFFLE4(B,A,C,C),
+ SHUFFLE4(C,B,A,C) };
+
+ /* The vertex_header pointers, packed for easy shuffling later */
+ const vec_uint4 vs = {(unsigned)v0, (unsigned)v1, (unsigned)v2};
+
+ /* Collate y values into two vectors for comparison.
+ Using only one shuffle constant! ;) */
+ const vec_float4 y_02_ = spu_shuffle(v0->data[0], v2->data[0], SHUFFLE4(0,B,b,C));
+ const vec_float4 y_10_ = spu_shuffle(v1->data[0], v0->data[0], SHUFFLE4(0,B,b,C));
+ const vec_float4 y_012 = spu_shuffle(y_02_, v1->data[0], SHUFFLE4(0,B,b,C));
+ const vec_float4 y_120 = spu_shuffle(y_10_, v2->data[0], SHUFFLE4(0,B,b,C));
+
+ /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */
+ const vec_uint4 compare = spu_cmpgt(y_012, y_120);
+ /* Compress the result of the comparison into 4 bits */
+ const vec_uint4 gather = spu_gather(compare);
+ /* Subtract one to attain the index into the LUT. Magical. */
+ const unsigned int index = spu_extract(gather, 0) - 1;
+
+ /* Load the appropriate pattern and construct the desired vector. */
+ setup.vertex_headers = (qword)spu_shuffle(vs, vs, sort_order_patterns[index]);
+
+ /* Using the result of the comparison, set sign.
+ Very magical. */
+ sign = ((si_to_uint(si_cntb((qword)gather)) == 2) ? 1.0f : -1.0f);
+ }
+
+ /* Check if triangle is completely outside the tile bounds */
+ if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy)
+ return FALSE;
+ if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny)
+ return FALSE;
+ if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx &&
+ spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx &&
+ spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx)
+ return FALSE;
+ if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx &&
+ spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx &&
+ spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx)
+ return FALSE;
+
+ setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]);
+ setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]);
+ setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]);
+
+ /*
+ * Compute triangle's area. Use 1/area to compute partial
+ * derivatives of attributes later.
+ */
+ area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy;
+
+ setup.oneOverArea = 1.0f / area;
+
+ /* The product of area * sign indicates front/back orientation (0/1).
+ * Just in case someone gets the bright idea of switching the front
+ * and back constants without noticing that we're assuming their
+ * values in this operation, also assert that the values are
+ * what we think they are.
+ */
+ ASSERT(CELL_FACING_FRONT == 0);
+ ASSERT(CELL_FACING_BACK == 1);
+ setup.facing = (area * sign > 0.0f)
+ ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW);
+
+ return TRUE;
+}
+
+
+/**
+ * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
+ * The value value comes from vertex->data[slot].
+ * The result will be put into setup.coef[slot].a0.
+ * \param slot which attribute slot
+ */
+static INLINE void
+const_coeff4(uint slot)
+{
+ setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0};
+ setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0};
+ setup.coef[slot].a0 = setup.vprovoke->data[slot];
+}
+
+
+/**
+ * As above, but interp setup all four vector components.
+ */
+static INLINE void
+tri_linear_coeff4(uint slot)
+{
+ const vector float vmin_d = setup.vmin->data[slot];
+ const vector float vmid_d = setup.vmid->data[slot];
+ const vector float vmax_d = setup.vmax->data[slot];
+ const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
+ const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
+
+ vector float botda = vmid_d - vmin_d;
+ vector float majda = vmax_d - vmin_d;
+
+ vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
+ spu_mul(botda, spu_splats(setup.emaj.dy)));
+ vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
+ spu_mul(majda, spu_splats(setup.ebot.dx)));
+
+ setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
+ setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
+
+ vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
+ vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
+
+ setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
+}
+
+
+/**
+ * Compute a0, dadx and dady for a perspective-corrected interpolant,
+ * for a triangle.
+ * We basically multiply the vertex value by 1/w before computing
+ * the plane coefficients (a0, dadx, dady).
+ * Later, when we compute the value at a particular fragment position we'll
+ * divide the interpolated value by the interpolated W at that fragment.
+ */
+static void
+tri_persp_coeff4(uint slot)
+{
+ const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
+ const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
+
+ const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3));
+ const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3));
+ const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3));
+
+ vector float vmin_d = setup.vmin->data[slot];
+ vector float vmid_d = setup.vmid->data[slot];
+ vector float vmax_d = setup.vmax->data[slot];
+
+ vmin_d = spu_mul(vmin_d, vmin_w);
+ vmid_d = spu_mul(vmid_d, vmid_w);
+ vmax_d = spu_mul(vmax_d, vmax_w);
+
+ vector float botda = vmid_d - vmin_d;
+ vector float majda = vmax_d - vmin_d;
+
+ vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
+ spu_mul(botda, spu_splats(setup.emaj.dy)));
+ vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
+ spu_mul(majda, spu_splats(setup.ebot.dx)));
+
+ setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
+ setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
+
+ vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
+ vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
+
+ setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
+}
+
+
+
+/**
+ * Compute the setup.coef[] array dadx, dady, a0 values.
+ * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
+ */
+static void
+setup_tri_coefficients(void)
+{
+ uint i;
+
+ for (i = 0; i < spu.vertex_info.num_attribs; i++) {
+ switch (spu.vertex_info.attrib[i].interp_mode) {
+ case INTERP_NONE:
+ break;
+ case INTERP_CONSTANT:
+ const_coeff4(i);
+ break;
+ case INTERP_POS:
+ /* fall-through */
+ case INTERP_LINEAR:
+ tri_linear_coeff4(i);
+ break;
+ case INTERP_PERSPECTIVE:
+ tri_persp_coeff4(i);
+ break;
+ default:
+ ASSERT(0);
+ }
+ }
+}
+
+
+static void
+setup_tri_edges(void)
+{
+ float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f;
+ float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f;
+
+ float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
+ float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f;
+ float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f;
+
+ setup.emaj.sy = CEILF(vmin_y);
+ setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy);
+ setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy;
+ setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy;
+
+ setup.etop.sy = CEILF(vmid_y);
+ setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy);
+ setup.etop.dxdy = setup.etop.dx / setup.etop.dy;
+ setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy;
+
+ setup.ebot.sy = CEILF(vmin_y);
+ setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy);
+ setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy;
+ setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy;
+}
+
+
+/**
+ * Render the upper or lower half of a triangle.
+ * Scissoring/cliprect is applied here too.
+ */
+static void
+subtriangle(struct edge *eleft, struct edge *eright, unsigned lines)
+{
+ const int minx = setup.cliprect_minx;
+ const int maxx = setup.cliprect_maxx;
+ const int miny = setup.cliprect_miny;
+ const int maxy = setup.cliprect_maxy;
+ int y, start_y, finish_y;
+ int sy = (int)eleft->sy;
+
+ ASSERT((int)eleft->sy == (int) eright->sy);
+
+ /* clip top/bottom */
+ start_y = sy;
+ finish_y = sy + lines;
+
+ if (start_y < miny)
+ start_y = miny;
+
+ if (finish_y > maxy)
+ finish_y = maxy;
+
+ start_y -= sy;
+ finish_y -= sy;
+
+ /*
+ _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
+ */
+
+ for (y = start_y; y < finish_y; y++) {
+
+ /* avoid accumulating adds as floats don't have the precision to
+ * accurately iterate large triangle edges that way. luckily we
+ * can just multiply these days.
+ *
+ * this is all drowned out by the attribute interpolation anyway.
+ */
+ int left = (int)(eleft->sx + y * eleft->dxdy);
+ int right = (int)(eright->sx + y * eright->dxdy);
+
+ /* clip left/right */
+ if (left < minx)
+ left = minx;
+ if (right > maxx)
+ right = maxx;
+
+ if (left < right) {
+ int _y = sy + y;
+ if (block(_y) != setup.span.y) {
+ flush_spans();
+ setup.span.y = block(_y);
+ }
+
+ int offset = _y&1;
+ vec_int4 quad_LlRr = {left, left, right, right};
+ /* Store left and right in 0 or 1 row of quad based on offset */
+ setup.span.quad = spu_sel(quad_LlRr, setup.span.quad, spu_maskw(5<<offset));
+ setup.span.y_flags |= 1<<offset;
+ }
+ }
+
+
+ /* save the values so that emaj can be restarted:
+ */
+ eleft->sx += lines * eleft->dxdy;
+ eright->sx += lines * eright->dxdy;
+ eleft->sy += lines;
+ eright->sy += lines;
+}
+
+
+/**
+ * Draw triangle into tile at (tx, ty) (tile coords)
+ * The tile data should have already been fetched.
+ */
+boolean
+tri_draw(const float *v0, const float *v1, const float *v2,
+ uint tx, uint ty)
+{
+ setup.tx = tx;
+ setup.ty = ty;
+
+ /* set clipping bounds to tile bounds */
+ setup.cliprect_minx = tx * TILE_SIZE;
+ setup.cliprect_miny = ty * TILE_SIZE;
+ setup.cliprect_maxx = (tx + 1) * TILE_SIZE;
+ setup.cliprect_maxy = (ty + 1) * TILE_SIZE;
+
+ if (!setup_sort_vertices((struct vertex_header *) v0,
+ (struct vertex_header *) v1,
+ (struct vertex_header *) v2)) {
+ return FALSE; /* totally clipped */
+ }
+
+ setup_tri_coefficients();
+ setup_tri_edges();
+
+ setup.span.y = 0;
+ setup.span.y_flags = 0;
+ /* Zero right elements */
+ setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0));
+
+ if (setup.oneOverArea < 0.0) {
+ /* emaj on left */
+ subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines );
+ subtriangle( &setup.emaj, &setup.etop, setup.etop.lines );
+ }
+ else {
+ /* emaj on right */
+ subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines );
+ subtriangle( &setup.etop, &setup.emaj, setup.etop.lines );
+ }
+
+ flush_spans();
+
+ return TRUE;
+}
diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h
new file mode 100644
index 0000000000..aa694dd7c9
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_tri.h
@@ -0,0 +1,37 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef SPU_TRI_H
+#define SPU_TRI_H
+
+
+extern boolean
+tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty);
+
+
+#endif /* SPU_TRI_H */
diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c
new file mode 100644
index 0000000000..b8a0d4a265
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_util.c
@@ -0,0 +1,167 @@
+
+#include "cell/common.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_debug.h"
+#include "tgsi/tgsi_parse.h"
+//#include "tgsi_build.h"
+#include "tgsi/tgsi_util.h"
+
+unsigned
+tgsi_util_get_src_register_swizzle(
+ const struct tgsi_src_register *reg,
+ unsigned component )
+{
+ switch( component ) {
+ case 0:
+ return reg->SwizzleX;
+ case 1:
+ return reg->SwizzleY;
+ case 2:
+ return reg->SwizzleZ;
+ case 3:
+ return reg->SwizzleW;
+ default:
+ ASSERT( 0 );
+ }
+ return 0;
+}
+
+unsigned
+tgsi_util_get_src_register_extswizzle(
+ const struct tgsi_src_register_ext_swz *reg,
+ unsigned component )
+{
+ switch( component ) {
+ case 0:
+ return reg->ExtSwizzleX;
+ case 1:
+ return reg->ExtSwizzleY;
+ case 2:
+ return reg->ExtSwizzleZ;
+ case 3:
+ return reg->ExtSwizzleW;
+ default:
+ ASSERT( 0 );
+ }
+ return 0;
+}
+
+unsigned
+tgsi_util_get_full_src_register_extswizzle(
+ const struct tgsi_full_src_register *reg,
+ unsigned component )
+{
+ unsigned swizzle;
+
+ /*
+ * First, calculate the extended swizzle for a given channel. This will give
+ * us either a channel index into the simple swizzle or a constant 1 or 0.
+ */
+ swizzle = tgsi_util_get_src_register_extswizzle(
+ &reg->SrcRegisterExtSwz,
+ component );
+
+ ASSERT (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X);
+ ASSERT (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y);
+ ASSERT (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z);
+ ASSERT (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W);
+ ASSERT (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W);
+ ASSERT (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W);
+
+ /*
+ * Second, calculate the simple swizzle for the unswizzled channel index.
+ * Leave the constants intact, they are not affected by the simple swizzle.
+ */
+ if( swizzle <= TGSI_SWIZZLE_W ) {
+ swizzle = tgsi_util_get_src_register_swizzle(
+ &reg->SrcRegister,
+ component );
+ }
+
+ return swizzle;
+}
+
+unsigned
+tgsi_util_get_src_register_extnegate(
+ const struct tgsi_src_register_ext_swz *reg,
+ unsigned component )
+{
+ switch( component ) {
+ case 0:
+ return reg->NegateX;
+ case 1:
+ return reg->NegateY;
+ case 2:
+ return reg->NegateZ;
+ case 3:
+ return reg->NegateW;
+ default:
+ ASSERT( 0 );
+ }
+ return 0;
+}
+
+void
+tgsi_util_set_src_register_extnegate(
+ struct tgsi_src_register_ext_swz *reg,
+ unsigned negate,
+ unsigned component )
+{
+ switch( component ) {
+ case 0:
+ reg->NegateX = negate;
+ break;
+ case 1:
+ reg->NegateY = negate;
+ break;
+ case 2:
+ reg->NegateZ = negate;
+ break;
+ case 3:
+ reg->NegateW = negate;
+ break;
+ default:
+ ASSERT( 0 );
+ }
+}
+
+unsigned
+tgsi_util_get_full_src_register_sign_mode(
+ const struct tgsi_full_src_register *reg,
+ unsigned component )
+{
+ unsigned sign_mode;
+
+ if( reg->SrcRegisterExtMod.Absolute ) {
+ /* Consider only the post-abs negation. */
+
+ if( reg->SrcRegisterExtMod.Negate ) {
+ sign_mode = TGSI_UTIL_SIGN_SET;
+ }
+ else {
+ sign_mode = TGSI_UTIL_SIGN_CLEAR;
+ }
+ }
+ else {
+ /* Accumulate the three negations. */
+
+ unsigned negate;
+
+ negate = reg->SrcRegister.Negate;
+ if( tgsi_util_get_src_register_extnegate( &reg->SrcRegisterExtSwz, component ) ) {
+ negate = !negate;
+ }
+ if( reg->SrcRegisterExtMod.Negate ) {
+ negate = !negate;
+ }
+
+ if( negate ) {
+ sign_mode = TGSI_UTIL_SIGN_TOGGLE;
+ }
+ else {
+ sign_mode = TGSI_UTIL_SIGN_KEEP;
+ }
+ }
+
+ return sign_mode;
+}
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
new file mode 100644
index 0000000000..03375d84a5
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
@@ -0,0 +1,145 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * (C) Copyright IBM Corporation 2008
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Ian Romanick <idr@us.ibm.com>
+ */
+
+#include "pipe/p_state.h"
+#include "pipe/p_shader_tokens.h"
+#include "spu_exec.h"
+#include "spu_vertex_shader.h"
+#include "spu_main.h"
+#include "spu_dcache.h"
+
+typedef void (*spu_fetch_func)(qword *out, const qword *in,
+ const qword *shuffle_data);
+
+
+static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = {
+ /* Shuffle used by CVT_64_FLOAT
+ */
+ {
+ 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+ },
+
+ /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED
+ */
+ {
+ 0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80,
+ 0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80,
+ },
+
+ /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED
+ */
+ {
+ 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80,
+ 0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80,
+ },
+
+ /* High value shuffle used by trans4x4.
+ */
+ {
+ 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
+ 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17
+ },
+
+ /* Low value shuffle used by trans4x4.
+ */
+ {
+ 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
+ 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F
+ }
+};
+
+
+/**
+ * Fetch vertex attributes for 'count' vertices.
+ */
+static void generic_vertex_fetch(struct spu_vs_context *draw,
+ struct spu_exec_machine *machine,
+ const unsigned *elts,
+ unsigned count)
+{
+ unsigned nr_attrs = draw->vertex_fetch.nr_attrs;
+ unsigned attr;
+
+ ASSERT(count <= 4);
+
+#if DRAW_DBG
+ printf("SPU: %s count = %u, nr_attrs = %u\n",
+ __FUNCTION__, count, nr_attrs);
+#endif
+
+ /* loop over vertex attributes (vertex shader inputs)
+ */
+ for (attr = 0; attr < nr_attrs; attr++) {
+ const unsigned pitch = draw->vertex_fetch.pitch[attr];
+ const uint64_t src = draw->vertex_fetch.src_ptr[attr];
+ const spu_fetch_func fetch = (spu_fetch_func)
+ (draw->vertex_fetch.code + draw->vertex_fetch.code_offset[attr]);
+ unsigned i;
+ unsigned idx;
+ const unsigned bytes_per_entry = draw->vertex_fetch.size[attr];
+ const unsigned quads_per_entry = (bytes_per_entry + 15) / 16;
+ qword in[2 * 4] ALIGN16_ATTRIB;
+
+
+ /* Fetch four attributes for four vertices.
+ */
+ idx = 0;
+ for (i = 0; i < count; i++) {
+ const uint64_t addr = src + (elts[i] * pitch);
+
+#if DRAW_DBG
+ printf("SPU: fetching = 0x%llx\n", addr);
+#endif
+
+ spu_dcache_fetch_unaligned(& in[idx], addr, bytes_per_entry);
+ idx += quads_per_entry;
+ }
+
+ /* Be nice and zero out any missing vertices.
+ */
+ (void) memset(& in[idx], 0, (8 - idx) * sizeof(qword));
+
+
+ /* Convert all 4 vertices to vectors of float.
+ */
+ (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, fetch_shuffle_data);
+ }
+}
+
+
+void spu_update_vertex_fetch( struct spu_vs_context *draw )
+{
+ draw->vertex_fetch.fetch_func = generic_vertex_fetch;
+}
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c
new file mode 100644
index 0000000000..fbe5b34d39
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c
@@ -0,0 +1,244 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Brian Paul
+ * Ian Romanick <idr@us.ibm.com>
+ */
+
+#include <spu_mfcio.h>
+
+#include "pipe/p_state.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "draw/draw_private.h"
+#include "draw/draw_context.h"
+#include "cell/common.h"
+#include "spu_vertex_shader.h"
+#include "spu_exec.h"
+#include "spu_main.h"
+
+
+#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float))
+
+
+#define CLIP_RIGHT_BIT 0x01
+#define CLIP_LEFT_BIT 0x02
+#define CLIP_TOP_BIT 0x04
+#define CLIP_BOTTOM_BIT 0x08
+#define CLIP_FAR_BIT 0x10
+#define CLIP_NEAR_BIT 0x20
+
+
+static INLINE float
+dot4(const float *a, const float *b)
+{
+ return (a[0]*b[0] +
+ a[1]*b[1] +
+ a[2]*b[2] +
+ a[3]*b[3]);
+}
+
+static INLINE unsigned
+compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
+{
+ unsigned mask = 0;
+ unsigned i;
+
+ /* Do the hardwired planes first:
+ */
+ if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
+ if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
+ if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
+ if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
+ if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
+ if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
+
+ /* Followed by any remaining ones:
+ */
+ for (i = 6; i < nr; i++) {
+ if (dot4(clip, plane[i]) < 0)
+ mask |= (1<<i);
+ }
+
+ return mask;
+}
+
+
+/**
+ * Transform vertices with the current vertex program/shader
+ * Up to four vertices can be shaded at a time.
+ * \param vbuffer the input vertex data
+ * \param elts indexes of four input vertices
+ * \param count number of vertices to shade [1..4]
+ * \param vOut array of pointers to four output vertices
+ */
+static void
+run_vertex_program(struct spu_vs_context *draw,
+ unsigned elts[4], unsigned count,
+ const uint64_t *vOut)
+{
+ struct spu_exec_machine *machine = &draw->machine;
+ unsigned int j;
+
+ ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS);
+ ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS);
+ const float *scale = draw->viewport.scale;
+ const float *trans = draw->viewport.translate;
+
+ ASSERT(count <= 4);
+
+ machine->Processor = TGSI_PROCESSOR_VERTEX;
+
+ ASSERT_ALIGN16(draw->constants);
+ machine->Consts = (float (*)[4]) draw->constants;
+
+ machine->Inputs = ALIGN16_ASSIGN(inputs);
+ machine->Outputs = ALIGN16_ASSIGN(outputs);
+
+ spu_vertex_fetch( draw, machine, elts, count );
+
+ /* run shader */
+ spu_exec_machine_run( machine );
+
+
+ /* store machine results */
+ for (j = 0; j < count; j++) {
+ unsigned slot;
+ float x, y, z, w;
+ unsigned char buffer[sizeof(struct vertex_header)
+ + MAX_VERTEX_SIZE] ALIGN16_ATTRIB;
+ struct vertex_header *const tmpOut =
+ (struct vertex_header *) buffer;
+ const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header)
+ + (sizeof(float) * 4
+ * draw->num_vs_outputs));
+
+ mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0);
+ wait_on_mask(1 << TAG_VERTEX_BUFFER);
+
+
+ /* Handle attr[0] (position) specially:
+ *
+ * XXX: Computing the clipmask should be done in the vertex
+ * program as a set of DP4 instructions appended to the
+ * user-provided code.
+ */
+ x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j];
+ y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j];
+ z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j];
+ w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j];
+
+ tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane,
+ draw->nr_planes);
+ tmpOut->edgeflag = 1;
+
+ /* divide by w */
+ w = 1.0f / w;
+ x *= w;
+ y *= w;
+ z *= w;
+
+ /* Viewport mapping */
+ tmpOut->data[0][0] = x * scale[0] + trans[0];
+ tmpOut->data[0][1] = y * scale[1] + trans[1];
+ tmpOut->data[0][2] = z * scale[2] + trans[2];
+ tmpOut->data[0][3] = w;
+
+ /* Remaining attributes are packed into sequential post-transform
+ * vertex attrib slots.
+ */
+ for (slot = 1; slot < draw->num_vs_outputs; slot++) {
+ tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
+ tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
+ tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+ }
+
+ mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0);
+ } /* loop over vertices */
+}
+
+
+unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]
+ ALIGN16_ATTRIB;
+
+
+void
+spu_bind_vertex_shader(struct spu_vs_context *draw,
+ struct cell_shader_info *vs)
+{
+ const unsigned immediate_addr = vs->immediates;
+ const unsigned immediate_size =
+ ROUNDUP16((sizeof(float) * 4 * vs->num_immediates)
+ + (immediate_addr & 0x0f));
+
+
+ mfc_get(immediates, immediate_addr & ~0x0f, immediate_size,
+ TAG_VERTEX_BUFFER, 0, 0);
+
+ draw->machine.Instructions = (struct tgsi_full_instruction *)
+ vs->instructions;
+ draw->machine.NumInstructions = vs->num_instructions;
+
+ draw->machine.Declarations = (struct tgsi_full_declaration *)
+ vs->declarations;
+ draw->machine.NumDeclarations = vs->num_declarations;
+
+ draw->num_vs_outputs = vs->num_outputs;
+
+ /* specify the shader to interpret/execute */
+ spu_exec_machine_init(&draw->machine,
+ PIPE_MAX_SAMPLERS,
+ NULL /*samplers*/,
+ PIPE_SHADER_VERTEX);
+
+ wait_on_mask(1 << TAG_VERTEX_BUFFER);
+
+ (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f],
+ sizeof(float) * 4 * vs->num_immediates);
+}
+
+
+void
+spu_execute_vertex_shader(struct spu_vs_context *draw,
+ const struct cell_command_vs *vs)
+{
+ unsigned i;
+
+ (void) memcpy(draw->plane, vs->plane, sizeof(float) * 4 * vs->nr_planes);
+ draw->nr_planes = vs->nr_planes;
+ draw->vertex_fetch.nr_attrs = vs->nr_attrs;
+
+ for (i = 0; i < vs->num_elts; i += 4) {
+ const unsigned batch_size = MIN2(vs->num_elts - i, 4);
+
+ run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]);
+ }
+}
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h
new file mode 100644
index 0000000000..4c74f5e74d
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.h
@@ -0,0 +1,66 @@
+#ifndef SPU_VERTEX_SHADER_H
+#define SPU_VERTEX_SHADER_H
+
+#include "cell/common.h"
+#include "pipe/p_format.h"
+#include "spu_exec.h"
+
+struct spu_vs_context;
+
+typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw,
+ struct spu_exec_machine *machine,
+ const unsigned *elts,
+ unsigned count );
+
+struct spu_vs_context {
+ struct pipe_viewport_state viewport;
+
+ struct {
+ uint64_t src_ptr[PIPE_MAX_ATTRIBS];
+ unsigned pitch[PIPE_MAX_ATTRIBS];
+ unsigned size[PIPE_MAX_ATTRIBS];
+ unsigned code_offset[PIPE_MAX_ATTRIBS];
+ unsigned nr_attrs;
+ boolean dirty;
+
+ spu_full_fetch_func fetch_func;
+ void *code;
+ } vertex_fetch;
+
+ /* Clip derived state:
+ */
+ float plane[12][4];
+ unsigned nr_planes;
+
+ struct spu_exec_machine machine;
+ const float (*constants)[4];
+
+ unsigned num_vs_outputs;
+};
+
+extern void spu_update_vertex_fetch(struct spu_vs_context *draw);
+
+static INLINE void spu_vertex_fetch(struct spu_vs_context *draw,
+ struct spu_exec_machine *machine,
+ const unsigned *elts,
+ unsigned count)
+{
+ if (draw->vertex_fetch.dirty) {
+ spu_update_vertex_fetch(draw);
+ draw->vertex_fetch.dirty = 0;
+ }
+
+ (*draw->vertex_fetch.fetch_func)(draw, machine, elts, count);
+}
+
+struct cell_command_vs;
+
+extern void
+spu_bind_vertex_shader(struct spu_vs_context *draw,
+ struct cell_shader_info *vs);
+
+extern void
+spu_execute_vertex_shader(struct spu_vs_context *draw,
+ const struct cell_command_vs *vs);
+
+#endif /* SPU_VERTEX_SHADER_H */
diff --git a/src/gallium/drivers/failover/Makefile b/src/gallium/drivers/failover/Makefile
new file mode 100644
index 0000000000..f08b8df07a
--- /dev/null
+++ b/src/gallium/drivers/failover/Makefile
@@ -0,0 +1,14 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = failover
+
+C_SOURCES = \
+ fo_state.c \
+ fo_state_emit.c \
+ fo_context.c
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/drivers/failover/SConscript b/src/gallium/drivers/failover/SConscript
new file mode 100644
index 0000000000..f8e9b1b491
--- /dev/null
+++ b/src/gallium/drivers/failover/SConscript
@@ -0,0 +1,13 @@
+Import('*')
+
+env = env.Clone()
+
+failover = env.ConvenienceLibrary(
+ target = 'failover',
+ source = [
+ 'fo_state.c',
+ 'fo_state_emit.c',
+ 'fo_context.c',
+ ])
+
+Export('failover')
diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c
new file mode 100644
index 0000000000..10c4ffc209
--- /dev/null
+++ b/src/gallium/drivers/failover/fo_context.c
@@ -0,0 +1,159 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+#include "util/u_memory.h"
+#include "pipe/p_context.h"
+
+#include "fo_context.h"
+#include "fo_winsys.h"
+
+
+
+static void failover_destroy( struct pipe_context *pipe )
+{
+ struct failover_context *failover = failover_context( pipe );
+
+ free( failover );
+}
+
+
+
+static boolean failover_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count)
+{
+ struct failover_context *failover = failover_context( pipe );
+
+ /* If there has been any statechange since last time, try hardware
+ * rendering again:
+ */
+ if (failover->dirty) {
+ failover->mode = FO_HW;
+ }
+
+ /* Try hardware:
+ */
+ if (failover->mode == FO_HW) {
+ if (!failover->hw->draw_elements( failover->hw,
+ indexBuffer,
+ indexSize,
+ prim,
+ start,
+ count )) {
+
+ failover->hw->flush( failover->hw, ~0, NULL );
+ failover->mode = FO_SW;
+ }
+ }
+
+ /* Possibly try software:
+ */
+ if (failover->mode == FO_SW) {
+
+ if (failover->dirty)
+ failover_state_emit( failover );
+
+ failover->sw->draw_elements( failover->sw,
+ indexBuffer,
+ indexSize,
+ prim,
+ start,
+ count );
+
+ /* Be ready to switch back to hardware rendering without an
+ * intervening flush. Unlikely to be much performance impact to
+ * this:
+ */
+ failover->sw->flush( failover->sw, ~0, NULL );
+ }
+
+ return TRUE;
+}
+
+
+static boolean failover_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
+{
+ return failover_draw_elements(pipe, NULL, 0, prim, start, count);
+}
+
+
+
+struct pipe_context *failover_create( struct pipe_context *hw,
+ struct pipe_context *sw )
+{
+ struct failover_context *failover = CALLOC_STRUCT(failover_context);
+ if (failover == NULL)
+ return NULL;
+
+ failover->hw = hw;
+ failover->sw = sw;
+ failover->pipe.winsys = hw->winsys;
+ failover->pipe.screen = hw->screen;
+ failover->pipe.destroy = failover_destroy;
+#if 0
+ failover->pipe.is_format_supported = hw->is_format_supported;
+ failover->pipe.get_name = hw->get_name;
+ failover->pipe.get_vendor = hw->get_vendor;
+ failover->pipe.get_param = hw->get_param;
+ failover->pipe.get_paramf = hw->get_paramf;
+#endif
+
+ failover->pipe.draw_arrays = failover_draw_arrays;
+ failover->pipe.draw_elements = failover_draw_elements;
+ failover->pipe.clear = hw->clear;
+
+ /* No software occlusion fallback (or other optional functionality)
+ * at this point - if the hardware doesn't support it, don't
+ * advertise it to the application.
+ */
+ failover->pipe.begin_query = hw->begin_query;
+ failover->pipe.end_query = hw->end_query;
+
+ failover_init_state_functions( failover );
+
+ failover->pipe.surface_copy = hw->surface_copy;
+ failover->pipe.surface_fill = hw->surface_fill;
+
+#if 0
+ failover->pipe.texture_create = hw->texture_create;
+ failover->pipe.texture_release = hw->texture_release;
+ failover->pipe.get_tex_surface = hw->get_tex_surface;
+ failover->pipe.texture_update = hw->texture_update;
+#endif
+
+ failover->pipe.flush = hw->flush;
+
+ failover->dirty = 0;
+
+ return &failover->pipe;
+}
+
diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h
new file mode 100644
index 0000000000..c6409fe1e1
--- /dev/null
+++ b/src/gallium/drivers/failover/fo_context.h
@@ -0,0 +1,118 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef FO_CONTEXT_H
+#define FO_CONTEXT_H
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+
+
+#define FO_NEW_VIEWPORT 0x1
+#define FO_NEW_RASTERIZER 0x2
+#define FO_NEW_FRAGMENT_SHADER 0x4
+#define FO_NEW_BLEND 0x8
+#define FO_NEW_CLIP 0x10
+#define FO_NEW_SCISSOR 0x20
+#define FO_NEW_STIPPLE 0x40
+#define FO_NEW_FRAMEBUFFER 0x80
+#define FO_NEW_ALPHA_TEST 0x100
+#define FO_NEW_DEPTH_STENCIL 0x200
+#define FO_NEW_SAMPLER 0x400
+#define FO_NEW_TEXTURE 0x800
+#define FO_NEW_VERTEX 0x2000
+#define FO_NEW_VERTEX_SHADER 0x4000
+#define FO_NEW_BLEND_COLOR 0x8000
+#define FO_NEW_CLEAR_COLOR 0x10000
+#define FO_NEW_VERTEX_BUFFER 0x20000
+#define FO_NEW_VERTEX_ELEMENT 0x40000
+
+
+
+#define FO_HW 0
+#define FO_SW 1
+
+struct fo_state {
+ void *sw_state;
+ void *hw_state;
+};
+struct failover_context {
+ struct pipe_context pipe; /**< base class */
+
+
+ /* The most recent drawing state as set by the driver:
+ */
+ const struct fo_state *blend;
+ const struct fo_state *sampler[PIPE_MAX_SAMPLERS];
+ const struct fo_state *depth_stencil;
+ const struct fo_state *rasterizer;
+ const struct fo_state *fragment_shader;
+ const struct fo_state *vertex_shader;
+
+ struct pipe_blend_color blend_color;
+ struct pipe_clip_state clip;
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_poly_stipple poly_stipple;
+ struct pipe_scissor_state scissor;
+ struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+ struct pipe_viewport_state viewport;
+ struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
+ struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS];
+
+ uint num_vertex_buffers;
+ uint num_vertex_elements;
+
+ void *sw_sampler_state[PIPE_MAX_SAMPLERS];
+ void *hw_sampler_state[PIPE_MAX_SAMPLERS];
+
+ unsigned dirty;
+
+ unsigned num_samplers;
+ unsigned num_textures;
+
+ unsigned mode;
+ struct pipe_context *hw;
+ struct pipe_context *sw;
+};
+
+
+
+void failover_init_state_functions( struct failover_context *failover );
+void failover_state_emit( struct failover_context *failover );
+
+static INLINE struct failover_context *
+failover_context( struct pipe_context *pipe )
+{
+ return (struct failover_context *)pipe;
+}
+
+
+#endif /* FO_CONTEXT_H */
diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c
new file mode 100644
index 0000000000..6a79706632
--- /dev/null
+++ b/src/gallium/drivers/failover/fo_state.c
@@ -0,0 +1,483 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_inlines.h"
+
+#include "fo_context.h"
+
+
+/* This looks like a lot of work at the moment - we're keeping a
+ * duplicate copy of the state up-to-date.
+ *
+ * This can change in two ways:
+ * - With constant state objects we would only need to save a pointer,
+ * not the whole object.
+ * - By adding a callback in the state tracker to re-emit state. The
+ * state tracker knows the current state already and can re-emit it
+ * without additional complexity.
+ *
+ * This works as a proof-of-concept, but a final version will have
+ * lower overheads.
+ */
+
+
+
+static void *
+failover_create_blend_state( struct pipe_context *pipe,
+ const struct pipe_blend_state *blend )
+{
+ struct fo_state *state = malloc(sizeof(struct fo_state));
+ struct failover_context *failover = failover_context(pipe);
+
+ state->sw_state = failover->sw->create_blend_state(failover->sw, blend);
+ state->hw_state = failover->hw->create_blend_state(failover->hw, blend);
+
+ return state;
+}
+
+static void
+failover_bind_blend_state( struct pipe_context *pipe,
+ void *blend )
+{
+ struct failover_context *failover = failover_context(pipe);
+ struct fo_state *state = (struct fo_state *)blend;
+ failover->blend = state;
+ failover->dirty |= FO_NEW_BLEND;
+ failover->sw->bind_blend_state( failover->sw, state->sw_state );
+ failover->hw->bind_blend_state( failover->hw, state->hw_state );
+}
+
+static void
+failover_delete_blend_state( struct pipe_context *pipe,
+ void *blend )
+{
+ struct fo_state *state = (struct fo_state*)blend;
+ struct failover_context *failover = failover_context(pipe);
+
+ failover->sw->delete_blend_state(failover->sw, state->sw_state);
+ failover->hw->delete_blend_state(failover->hw, state->hw_state);
+ state->sw_state = 0;
+ state->hw_state = 0;
+ free(state);
+}
+
+static void
+failover_set_blend_color( struct pipe_context *pipe,
+ const struct pipe_blend_color *blend_color )
+{
+ struct failover_context *failover = failover_context(pipe);
+
+ failover->blend_color = *blend_color;
+ failover->dirty |= FO_NEW_BLEND_COLOR;
+ failover->sw->set_blend_color( failover->sw, blend_color );
+ failover->hw->set_blend_color( failover->hw, blend_color );
+}
+
+static void
+failover_set_clip_state( struct pipe_context *pipe,
+ const struct pipe_clip_state *clip )
+{
+ struct failover_context *failover = failover_context(pipe);
+
+ failover->clip = *clip;
+ failover->dirty |= FO_NEW_CLIP;
+ failover->sw->set_clip_state( failover->sw, clip );
+ failover->hw->set_clip_state( failover->hw, clip );
+}
+
+
+static void *
+failover_create_depth_stencil_state(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *templ)
+{
+ struct fo_state *state = malloc(sizeof(struct fo_state));
+ struct failover_context *failover = failover_context(pipe);
+
+ state->sw_state = failover->sw->create_depth_stencil_alpha_state(failover->sw, templ);
+ state->hw_state = failover->hw->create_depth_stencil_alpha_state(failover->hw, templ);
+
+ return state;
+}
+
+static void
+failover_bind_depth_stencil_state(struct pipe_context *pipe,
+ void *depth_stencil)
+{
+ struct failover_context *failover = failover_context(pipe);
+ struct fo_state *state = (struct fo_state *)depth_stencil;
+ failover->depth_stencil = state;
+ failover->dirty |= FO_NEW_DEPTH_STENCIL;
+ failover->sw->bind_depth_stencil_alpha_state(failover->sw, state->sw_state);
+ failover->hw->bind_depth_stencil_alpha_state(failover->hw, state->hw_state);
+}
+
+static void
+failover_delete_depth_stencil_state(struct pipe_context *pipe,
+ void *ds)
+{
+ struct fo_state *state = (struct fo_state*)ds;
+ struct failover_context *failover = failover_context(pipe);
+
+ failover->sw->delete_depth_stencil_alpha_state(failover->sw, state->sw_state);
+ failover->hw->delete_depth_stencil_alpha_state(failover->hw, state->hw_state);
+ state->sw_state = 0;
+ state->hw_state = 0;
+ free(state);
+}
+
+static void
+failover_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *framebuffer)
+{
+ struct failover_context *failover = failover_context(pipe);
+
+ failover->framebuffer = *framebuffer;
+ failover->dirty |= FO_NEW_FRAMEBUFFER;
+ failover->sw->set_framebuffer_state( failover->sw, framebuffer );
+ failover->hw->set_framebuffer_state( failover->hw, framebuffer );
+}
+
+
+static void *
+failover_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct fo_state *state = malloc(sizeof(struct fo_state));
+ struct failover_context *failover = failover_context(pipe);
+
+ state->sw_state = failover->sw->create_fs_state(failover->sw, templ);
+ state->hw_state = failover->hw->create_fs_state(failover->hw, templ);
+
+ return state;
+}
+
+static void
+failover_bind_fs_state(struct pipe_context *pipe, void *fs)
+{
+ struct failover_context *failover = failover_context(pipe);
+ struct fo_state *state = (struct fo_state*)fs;
+ failover->fragment_shader = state;
+ failover->dirty |= FO_NEW_FRAGMENT_SHADER;
+ failover->sw->bind_fs_state(failover->sw, state->sw_state);
+ failover->hw->bind_fs_state(failover->hw, state->hw_state);
+}
+
+static void
+failover_delete_fs_state(struct pipe_context *pipe,
+ void *fs)
+{
+ struct fo_state *state = (struct fo_state*)fs;
+ struct failover_context *failover = failover_context(pipe);
+
+ failover->sw->delete_fs_state(failover->sw, state->sw_state);
+ failover->hw->delete_fs_state(failover->hw, state->hw_state);
+ state->sw_state = 0;
+ state->hw_state = 0;
+ free(state);
+}
+
+static void *
+failover_create_vs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct fo_state *state = malloc(sizeof(struct fo_state));
+ struct failover_context *failover = failover_context(pipe);
+
+ state->sw_state = failover->sw->create_vs_state(failover->sw, templ);
+ state->hw_state = failover->hw->create_vs_state(failover->hw, templ);
+
+ return state;
+}
+
+static void
+failover_bind_vs_state(struct pipe_context *pipe,
+ void *vs)
+{
+ struct failover_context *failover = failover_context(pipe);
+
+ struct fo_state *state = (struct fo_state*)vs;
+ failover->vertex_shader = state;
+ failover->dirty |= FO_NEW_VERTEX_SHADER;
+ failover->sw->bind_vs_state(failover->sw, state->sw_state);
+ failover->hw->bind_vs_state(failover->hw, state->hw_state);
+}
+
+static void
+failover_delete_vs_state(struct pipe_context *pipe,
+ void *vs)
+{
+ struct fo_state *state = (struct fo_state*)vs;
+ struct failover_context *failover = failover_context(pipe);
+
+ failover->sw->delete_vs_state(failover->sw, state->sw_state);
+ failover->hw->delete_vs_state(failover->hw, state->hw_state);
+ state->sw_state = 0;
+ state->hw_state = 0;
+ free(state);
+}
+
+static void
+failover_set_polygon_stipple( struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple )
+{
+ struct failover_context *failover = failover_context(pipe);
+
+ failover->poly_stipple = *stipple;
+ failover->dirty |= FO_NEW_STIPPLE;
+ failover->sw->set_polygon_stipple( failover->sw, stipple );
+ failover->hw->set_polygon_stipple( failover->hw, stipple );
+}
+
+
+static void *
+failover_create_rasterizer_state(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *templ)
+{
+ struct fo_state *state = malloc(sizeof(struct fo_state));
+ struct failover_context *failover = failover_context(pipe);
+
+ state->sw_state = failover->sw->create_rasterizer_state(failover->sw, templ);
+ state->hw_state = failover->hw->create_rasterizer_state(failover->hw, templ);
+
+ return state;
+}
+
+static void
+failover_bind_rasterizer_state(struct pipe_context *pipe,
+ void *raster)
+{
+ struct failover_context *failover = failover_context(pipe);
+
+ struct fo_state *state = (struct fo_state*)raster;
+ failover->rasterizer = state;
+ failover->dirty |= FO_NEW_RASTERIZER;
+ failover->sw->bind_rasterizer_state(failover->sw, state->sw_state);
+ failover->hw->bind_rasterizer_state(failover->hw, state->hw_state);
+}
+
+static void
+failover_delete_rasterizer_state(struct pipe_context *pipe,
+ void *raster)
+{
+ struct fo_state *state = (struct fo_state*)raster;
+ struct failover_context *failover = failover_context(pipe);
+
+ failover->sw->delete_rasterizer_state(failover->sw, state->sw_state);
+ failover->hw->delete_rasterizer_state(failover->hw, state->hw_state);
+ state->sw_state = 0;
+ state->hw_state = 0;
+ free(state);
+}
+
+
+static void
+failover_set_scissor_state( struct pipe_context *pipe,
+ const struct pipe_scissor_state *scissor )
+{
+ struct failover_context *failover = failover_context(pipe);
+
+ failover->scissor = *scissor;
+ failover->dirty |= FO_NEW_SCISSOR;
+ failover->sw->set_scissor_state( failover->sw, scissor );
+ failover->hw->set_scissor_state( failover->hw, scissor );
+}
+
+
+static void *
+failover_create_sampler_state(struct pipe_context *pipe,
+ const struct pipe_sampler_state *templ)
+{
+ struct fo_state *state = malloc(sizeof(struct fo_state));
+ struct failover_context *failover = failover_context(pipe);
+
+ state->sw_state = failover->sw->create_sampler_state(failover->sw, templ);
+ state->hw_state = failover->hw->create_sampler_state(failover->hw, templ);
+
+ return state;
+}
+
+static void
+failover_bind_sampler_states(struct pipe_context *pipe,
+ unsigned num, void **sampler)
+{
+ struct failover_context *failover = failover_context(pipe);
+ struct fo_state *state = (struct fo_state*)sampler;
+ uint i;
+ assert(num <= PIPE_MAX_SAMPLERS);
+ /* Check for no-op */
+ if (num == failover->num_samplers &&
+ !memcmp(failover->sampler, sampler, num * sizeof(void *)))
+ return;
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ failover->sw_sampler_state[i] = i < num ? state[i].sw_state : NULL;
+ failover->hw_sampler_state[i] = i < num ? state[i].hw_state : NULL;
+ }
+ failover->dirty |= FO_NEW_SAMPLER;
+ failover->num_samplers = num;
+ failover->sw->bind_sampler_states(failover->sw, num,
+ failover->sw_sampler_state);
+ failover->hw->bind_sampler_states(failover->hw, num,
+ failover->hw_sampler_state);
+}
+
+static void
+failover_delete_sampler_state(struct pipe_context *pipe, void *sampler)
+{
+ struct fo_state *state = (struct fo_state*)sampler;
+ struct failover_context *failover = failover_context(pipe);
+
+ failover->sw->delete_sampler_state(failover->sw, state->sw_state);
+ failover->hw->delete_sampler_state(failover->hw, state->hw_state);
+ state->sw_state = 0;
+ state->hw_state = 0;
+ free(state);
+}
+
+
+static void
+failover_set_sampler_textures(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_texture **texture)
+{
+ struct failover_context *failover = failover_context(pipe);
+ uint i;
+
+ assert(num <= PIPE_MAX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num == failover->num_textures &&
+ !memcmp(failover->texture, texture, num * sizeof(struct pipe_texture *)))
+ return;
+ for (i = 0; i < num; i++)
+ pipe_texture_reference((struct pipe_texture **) &failover->texture[i],
+ texture[i]);
+ for (i = num; i < failover->num_textures; i++)
+ pipe_texture_reference((struct pipe_texture **) &failover->texture[i],
+ NULL);
+ failover->dirty |= FO_NEW_TEXTURE;
+ failover->num_textures = num;
+ failover->sw->set_sampler_textures( failover->sw, num, texture );
+ failover->hw->set_sampler_textures( failover->hw, num, texture );
+}
+
+
+static void
+failover_set_viewport_state( struct pipe_context *pipe,
+ const struct pipe_viewport_state *viewport )
+{
+ struct failover_context *failover = failover_context(pipe);
+
+ failover->viewport = *viewport;
+ failover->dirty |= FO_NEW_VIEWPORT;
+ failover->sw->set_viewport_state( failover->sw, viewport );
+ failover->hw->set_viewport_state( failover->hw, viewport );
+}
+
+
+static void
+failover_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_buffer *vertex_buffers)
+{
+ struct failover_context *failover = failover_context(pipe);
+
+ memcpy(failover->vertex_buffers, vertex_buffers,
+ count * sizeof(vertex_buffers[0]));
+ failover->dirty |= FO_NEW_VERTEX_BUFFER;
+ failover->num_vertex_buffers = count;
+ failover->sw->set_vertex_buffers( failover->sw, count, vertex_buffers );
+ failover->hw->set_vertex_buffers( failover->hw, count, vertex_buffers );
+}
+
+
+static void
+failover_set_vertex_elements(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *vertex_elements)
+{
+ struct failover_context *failover = failover_context(pipe);
+
+ memcpy(failover->vertex_elements, vertex_elements,
+ count * sizeof(vertex_elements[0]));
+
+ failover->dirty |= FO_NEW_VERTEX_ELEMENT;
+ failover->num_vertex_elements = count;
+ failover->sw->set_vertex_elements( failover->sw, count, vertex_elements );
+ failover->hw->set_vertex_elements( failover->hw, count, vertex_elements );
+}
+
+void
+failover_set_constant_buffer(struct pipe_context *pipe,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *buf)
+{
+ struct failover_context *failover = failover_context(pipe);
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(index == 0);
+
+ failover->sw->set_constant_buffer(failover->sw, shader, index, buf);
+ failover->hw->set_constant_buffer(failover->hw, shader, index, buf);
+}
+
+
+void
+failover_init_state_functions( struct failover_context *failover )
+{
+ failover->pipe.create_blend_state = failover_create_blend_state;
+ failover->pipe.bind_blend_state = failover_bind_blend_state;
+ failover->pipe.delete_blend_state = failover_delete_blend_state;
+ failover->pipe.create_sampler_state = failover_create_sampler_state;
+ failover->pipe.bind_sampler_states = failover_bind_sampler_states;
+ failover->pipe.delete_sampler_state = failover_delete_sampler_state;
+ failover->pipe.create_depth_stencil_alpha_state = failover_create_depth_stencil_state;
+ failover->pipe.bind_depth_stencil_alpha_state = failover_bind_depth_stencil_state;
+ failover->pipe.delete_depth_stencil_alpha_state = failover_delete_depth_stencil_state;
+ failover->pipe.create_rasterizer_state = failover_create_rasterizer_state;
+ failover->pipe.bind_rasterizer_state = failover_bind_rasterizer_state;
+ failover->pipe.delete_rasterizer_state = failover_delete_rasterizer_state;
+ failover->pipe.create_fs_state = failover_create_fs_state;
+ failover->pipe.bind_fs_state = failover_bind_fs_state;
+ failover->pipe.delete_fs_state = failover_delete_fs_state;
+ failover->pipe.create_vs_state = failover_create_vs_state;
+ failover->pipe.bind_vs_state = failover_bind_vs_state;
+ failover->pipe.delete_vs_state = failover_delete_vs_state;
+
+ failover->pipe.set_blend_color = failover_set_blend_color;
+ failover->pipe.set_clip_state = failover_set_clip_state;
+ failover->pipe.set_framebuffer_state = failover_set_framebuffer_state;
+ failover->pipe.set_polygon_stipple = failover_set_polygon_stipple;
+ failover->pipe.set_scissor_state = failover_set_scissor_state;
+ failover->pipe.set_sampler_textures = failover_set_sampler_textures;
+ failover->pipe.set_viewport_state = failover_set_viewport_state;
+ failover->pipe.set_vertex_buffers = failover_set_vertex_buffers;
+ failover->pipe.set_vertex_elements = failover_set_vertex_elements;
+ failover->pipe.set_constant_buffer = failover_set_constant_buffer;
+}
diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c
new file mode 100644
index 0000000000..bd4fce9d20
--- /dev/null
+++ b/src/gallium/drivers/failover/fo_state_emit.c
@@ -0,0 +1,117 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "fo_context.h"
+
+/* This looks like a lot of work at the moment - we're keeping a
+ * duplicate copy of the state up-to-date.
+ *
+ * This can change in two ways:
+ * - With constant state objects we would only need to save a pointer,
+ * not the whole object.
+ * - By adding a callback in the state tracker to re-emit state. The
+ * state tracker knows the current state already and can re-emit it
+ * without additional complexity.
+ *
+ * This works as a proof-of-concept, but a final version will have
+ * lower overheads.
+ */
+
+
+/* Bring the software pipe uptodate with current state.
+ *
+ * With constant state objects we would probably just send all state
+ * to both rasterizers all the time???
+ */
+void
+failover_state_emit( struct failover_context *failover )
+{
+ if (failover->dirty & FO_NEW_BLEND)
+ failover->sw->bind_blend_state( failover->sw,
+ failover->blend->sw_state );
+
+ if (failover->dirty & FO_NEW_BLEND_COLOR)
+ failover->sw->set_blend_color( failover->sw, &failover->blend_color );
+
+ if (failover->dirty & FO_NEW_CLIP)
+ failover->sw->set_clip_state( failover->sw, &failover->clip );
+
+ if (failover->dirty & FO_NEW_DEPTH_STENCIL)
+ failover->sw->bind_depth_stencil_alpha_state( failover->sw,
+ failover->depth_stencil->sw_state );
+
+ if (failover->dirty & FO_NEW_FRAMEBUFFER)
+ failover->sw->set_framebuffer_state( failover->sw, &failover->framebuffer );
+
+ if (failover->dirty & FO_NEW_FRAGMENT_SHADER)
+ failover->sw->bind_fs_state( failover->sw,
+ failover->fragment_shader->sw_state );
+
+ if (failover->dirty & FO_NEW_VERTEX_SHADER)
+ failover->sw->bind_vs_state( failover->sw,
+ failover->vertex_shader->sw_state );
+
+ if (failover->dirty & FO_NEW_STIPPLE)
+ failover->sw->set_polygon_stipple( failover->sw, &failover->poly_stipple );
+
+ if (failover->dirty & FO_NEW_RASTERIZER)
+ failover->sw->bind_rasterizer_state( failover->sw,
+ failover->rasterizer->sw_state );
+
+ if (failover->dirty & FO_NEW_SCISSOR)
+ failover->sw->set_scissor_state( failover->sw, &failover->scissor );
+
+ if (failover->dirty & FO_NEW_VIEWPORT)
+ failover->sw->set_viewport_state( failover->sw, &failover->viewport );
+
+ if (failover->dirty & FO_NEW_SAMPLER) {
+ failover->sw->bind_sampler_states( failover->sw, failover->num_samplers,
+ failover->sw_sampler_state );
+ }
+
+ if (failover->dirty & FO_NEW_TEXTURE) {
+ failover->sw->set_sampler_textures( failover->sw, failover->num_textures,
+ failover->texture );
+ }
+
+ if (failover->dirty & FO_NEW_VERTEX_BUFFER) {
+ failover->sw->set_vertex_buffers( failover->sw,
+ failover->num_vertex_buffers,
+ failover->vertex_buffers );
+ }
+
+ if (failover->dirty & FO_NEW_VERTEX_ELEMENT) {
+ failover->sw->set_vertex_elements( failover->sw,
+ failover->num_vertex_elements,
+ failover->vertex_elements );
+ }
+
+ failover->dirty = 0;
+}
diff --git a/src/gallium/drivers/failover/fo_winsys.h b/src/gallium/drivers/failover/fo_winsys.h
new file mode 100644
index 0000000000..a8ce997a1f
--- /dev/null
+++ b/src/gallium/drivers/failover/fo_winsys.h
@@ -0,0 +1,45 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef FO_WINSYS_H
+#define FO_WINSYS_H
+
+
+/* This is the interface that failover requires any window system
+ * hosting it to implement. This is the only include file in failover
+ * which is public.
+ */
+
+
+struct pipe_context;
+
+
+struct pipe_context *failover_create( struct pipe_context *hw,
+ struct pipe_context *sw );
+
+
+#endif /* FO_WINSYS_H */
diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile
new file mode 100644
index 0000000000..41a61a0020
--- /dev/null
+++ b/src/gallium/drivers/i915simple/Makefile
@@ -0,0 +1,31 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = i915simple
+
+C_SOURCES = \
+ i915_blit.c \
+ i915_clear.c \
+ i915_flush.c \
+ i915_context.c \
+ i915_context.c \
+ i915_debug.c \
+ i915_debug_fp.c \
+ i915_state.c \
+ i915_state_immediate.c \
+ i915_state_dynamic.c \
+ i915_state_derived.c \
+ i915_state_emit.c \
+ i915_state_sampler.c \
+ i915_screen.c \
+ i915_prim_emit.c \
+ i915_prim_vbuf.c \
+ i915_texture.c \
+ i915_fpc_emit.c \
+ i915_fpc_translate.c \
+ i915_surface.c
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/drivers/i915simple/SConscript b/src/gallium/drivers/i915simple/SConscript
new file mode 100644
index 0000000000..2366e1247f
--- /dev/null
+++ b/src/gallium/drivers/i915simple/SConscript
@@ -0,0 +1,29 @@
+Import('*')
+
+env = env.Clone()
+
+i915simple = env.ConvenienceLibrary(
+ target = 'i915simple',
+ source = [
+ 'i915_blit.c',
+ 'i915_clear.c',
+ 'i915_context.c',
+ 'i915_debug.c',
+ 'i915_debug_fp.c',
+ 'i915_flush.c',
+ 'i915_fpc_emit.c',
+ 'i915_fpc_translate.c',
+ 'i915_prim_emit.c',
+ 'i915_prim_vbuf.c',
+ 'i915_screen.c',
+ 'i915_state.c',
+ 'i915_state_derived.c',
+ 'i915_state_dynamic.c',
+ 'i915_state_emit.c',
+ 'i915_state_immediate.c',
+ 'i915_state_sampler.c',
+ 'i915_surface.c',
+ 'i915_texture.c',
+ ])
+
+Export('i915simple')
diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h
new file mode 100644
index 0000000000..45bf4f4028
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_batch.h
@@ -0,0 +1,116 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef I915_BATCH_H
+#define I915_BATCH_H
+
+#include "i915_winsys.h"
+
+struct i915_batchbuffer
+{
+ struct pipe_buffer *buffer;
+ struct i915_winsys *winsys;
+
+ unsigned char *map;
+ unsigned char *ptr;
+
+ size_t size;
+ size_t actual_size;
+
+ size_t relocs;
+ size_t max_relocs;
+};
+
+static INLINE boolean
+i915_batchbuffer_check( struct i915_batchbuffer *batch,
+ size_t dwords,
+ size_t relocs )
+{
+ /** TODO JB: Check relocs */
+ return dwords * 4 <= batch->size - (batch->ptr - batch->map);
+}
+
+static INLINE size_t
+i915_batchbuffer_space( struct i915_batchbuffer *batch )
+{
+ return batch->size - (batch->ptr - batch->map);
+}
+
+static INLINE void
+i915_batchbuffer_dword( struct i915_batchbuffer *batch,
+ unsigned dword )
+{
+ if (i915_batchbuffer_space(batch) < 4)
+ return;
+
+ *(unsigned *)batch->ptr = dword;
+ batch->ptr += 4;
+}
+
+static INLINE void
+i915_batchbuffer_write( struct i915_batchbuffer *batch,
+ void *data,
+ size_t size )
+{
+ if (i915_batchbuffer_space(batch) < size)
+ return;
+
+ memcpy(data, batch->ptr, size);
+ batch->ptr += size;
+}
+
+static INLINE void
+i915_batchbuffer_reloc( struct i915_batchbuffer *batch,
+ struct pipe_buffer *buffer,
+ size_t flags,
+ size_t offset )
+{
+ batch->winsys->batch_reloc( batch->winsys, buffer, flags, offset );
+}
+
+static INLINE void
+i915_batchbuffer_flush( struct i915_batchbuffer *batch,
+ struct pipe_fence_handle **fence )
+{
+ batch->winsys->batch_flush( batch->winsys, fence );
+}
+
+#define BEGIN_BATCH( dwords, relocs ) \
+ (i915_batchbuffer_check( i915->batch, dwords, relocs ))
+
+#define OUT_BATCH( dword ) \
+ i915_batchbuffer_dword( i915->batch, dword )
+
+#define OUT_RELOC( buf, flags, delta ) \
+ i915_batchbuffer_reloc( i915->batch, buf, flags, delta )
+
+#define FLUSH_BATCH(fence) do { \
+ i915->winsys->batch_flush( i915->winsys, fence ); \
+ i915->hardware_dirty = ~0; \
+} while (0)
+
+#endif
diff --git a/src/gallium/drivers/i915simple/i915_blit.c b/src/gallium/drivers/i915simple/i915_blit.c
new file mode 100644
index 0000000000..45fae4c999
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_blit.c
@@ -0,0 +1,157 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "i915_context.h"
+#include "i915_winsys.h"
+#include "i915_blit.h"
+#include "i915_reg.h"
+#include "i915_batch.h"
+#include "i915_debug.h"
+
+#define FILE_DEBUG_FLAG DEBUG_BLIT
+
+void
+i915_fill_blit(struct i915_context *i915,
+ unsigned cpp,
+ short dst_pitch,
+ struct pipe_buffer *dst_buffer,
+ unsigned dst_offset,
+ short x, short y,
+ short w, short h,
+ unsigned color)
+{
+ unsigned BR13, CMD;
+
+ switch (cpp) {
+ case 1:
+ case 2:
+ case 3:
+ BR13 = dst_pitch | (0xF0 << 16) | (1 << 24);
+ CMD = XY_COLOR_BLT_CMD;
+ break;
+ case 4:
+ BR13 = dst_pitch | (0xF0 << 16) | (1 << 24) | (1 << 25);
+ CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA |
+ XY_COLOR_BLT_WRITE_RGB);
+ break;
+ default:
+ return;
+ }
+
+// DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
+// __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h);
+
+
+ if (!BEGIN_BATCH(6, 1)) {
+ FLUSH_BATCH(NULL);
+ assert(BEGIN_BATCH(6, 1));
+ }
+ OUT_BATCH(CMD);
+ OUT_BATCH(BR13);
+ OUT_BATCH((y << 16) | x);
+ OUT_BATCH(((y + h) << 16) | (x + w));
+ OUT_RELOC( dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset);
+ OUT_BATCH(color);
+}
+
+
+void
+i915_copy_blit( struct i915_context *i915,
+ unsigned do_flip,
+ unsigned cpp,
+ short src_pitch,
+ struct pipe_buffer *src_buffer,
+ unsigned src_offset,
+ short dst_pitch,
+ struct pipe_buffer *dst_buffer,
+ unsigned dst_offset,
+ short src_x, short src_y,
+ short dst_x, short dst_y,
+ short w, short h )
+{
+ unsigned CMD, BR13;
+ int dst_y2 = dst_y + h;
+ int dst_x2 = dst_x + w;
+
+
+ I915_DBG(i915,
+ "%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
+ __FUNCTION__,
+ src_buffer, src_pitch, src_offset, src_x, src_y,
+ dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
+
+ src_pitch *= (short) cpp;
+ dst_pitch *= (short) cpp;
+
+ switch (cpp) {
+ case 1:
+ case 2:
+ case 3:
+ BR13 = (((int) dst_pitch) & 0xffff) |
+ (0xCC << 16) | (1 << 24);
+ CMD = XY_SRC_COPY_BLT_CMD;
+ break;
+ case 4:
+ BR13 =
+ (((int) dst_pitch) & 0xffff) |
+ (0xCC << 16) | (1 << 24) | (1 << 25);
+ CMD =
+ (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
+ XY_SRC_COPY_BLT_WRITE_RGB);
+ break;
+ default:
+ return;
+ }
+
+ if (dst_y2 < dst_y ||
+ dst_x2 < dst_x) {
+ return;
+ }
+
+ /* Hardware can handle negative pitches but loses the ability to do
+ * proper overlapping blits in that case. We don't really have a
+ * need for either at this stage.
+ */
+ assert (dst_pitch > 0 && src_pitch > 0);
+
+
+ if (!BEGIN_BATCH(8, 2)) {
+ FLUSH_BATCH(NULL);
+ assert(BEGIN_BATCH(8, 2));
+ }
+ OUT_BATCH(CMD);
+ OUT_BATCH(BR13);
+ OUT_BATCH((dst_y << 16) | dst_x);
+ OUT_BATCH((dst_y2 << 16) | dst_x2);
+ OUT_RELOC(dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset);
+ OUT_BATCH((src_y << 16) | src_x);
+ OUT_BATCH(((int) src_pitch & 0xffff));
+ OUT_RELOC(src_buffer, I915_BUFFER_ACCESS_READ, src_offset);
+}
+
+
diff --git a/src/gallium/drivers/i915simple/i915_blit.h b/src/gallium/drivers/i915simple/i915_blit.h
new file mode 100644
index 0000000000..6e5b44e124
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_blit.h
@@ -0,0 +1,55 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef I915_BLIT_H
+#define I915_BLIT_H
+
+#include "i915_context.h"
+
+extern void i915_copy_blit(struct i915_context *i915,
+ unsigned do_flip,
+ unsigned cpp,
+ short src_pitch,
+ struct pipe_buffer *src_buffer,
+ unsigned src_offset,
+ short dst_pitch,
+ struct pipe_buffer *dst_buffer,
+ unsigned dst_offset,
+ short srcx, short srcy,
+ short dstx, short dsty,
+ short w, short h );
+
+extern void i915_fill_blit(struct i915_context *i915,
+ unsigned cpp,
+ short dst_pitch,
+ struct pipe_buffer *dst_buffer,
+ unsigned dst_offset,
+ short x, short y,
+ short w, short h, unsigned color);
+
+
+#endif
diff --git a/src/gallium/drivers/i915simple/i915_clear.c b/src/gallium/drivers/i915simple/i915_clear.c
new file mode 100644
index 0000000000..8a2d3ca43f
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_clear.c
@@ -0,0 +1,48 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Brian Paul
+ */
+
+
+#include "pipe/p_defines.h"
+#include "i915_context.h"
+#include "i915_state.h"
+
+
+/**
+ * Clear the given surface to the specified value.
+ * No masking, no scissor (clear entire buffer).
+ */
+void
+i915_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+}
diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c
new file mode 100644
index 0000000000..6dd3eda85d
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_context.c
@@ -0,0 +1,193 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "i915_context.h"
+#include "i915_winsys.h"
+#include "i915_state.h"
+#include "i915_batch.h"
+#include "i915_texture.h"
+#include "i915_reg.h"
+
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+#include "util/u_memory.h"
+#include "pipe/p_screen.h"
+
+
+static void i915_destroy( struct pipe_context *pipe )
+{
+ struct i915_context *i915 = i915_context( pipe );
+
+ draw_destroy( i915->draw );
+
+ if(i915->winsys->destroy)
+ i915->winsys->destroy(i915->winsys);
+
+ FREE( i915 );
+}
+
+
+static boolean
+i915_draw_range_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned prim, unsigned start, unsigned count)
+{
+ struct i915_context *i915 = i915_context( pipe );
+ struct draw_context *draw = i915->draw;
+ unsigned i;
+
+ if (i915->dirty)
+ i915_update_derived( i915 );
+
+ /*
+ * Map vertex buffers
+ */
+ for (i = 0; i < i915->num_vertex_buffers; i++) {
+ void *buf
+ = pipe_buffer_map(pipe->screen,
+ i915->vertex_buffer[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(draw, i, buf);
+ }
+ /* Map index buffer, if present */
+ if (indexBuffer) {
+ void *mapped_indexes
+ = pipe_buffer_map(pipe->screen, indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer_range(draw, indexSize,
+ min_index,
+ max_index,
+ mapped_indexes);
+ }
+ else {
+ /* no index/element buffer */
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+
+ draw_set_mapped_constant_buffer(draw,
+ i915->current.constants[PIPE_SHADER_VERTEX],
+ ( i915->current.num_user_constants[PIPE_SHADER_VERTEX] *
+ 4 * sizeof(float) ));
+
+ /* draw! */
+ draw_arrays(i915->draw, prim, start, count);
+
+ /*
+ * unmap vertex/index buffers
+ */
+ for (i = 0; i < i915->num_vertex_buffers; i++) {
+ pipe_buffer_unmap(pipe->screen, i915->vertex_buffer[i].buffer);
+ draw_set_mapped_vertex_buffer(draw, i, NULL);
+ }
+ if (indexBuffer) {
+ pipe_buffer_unmap(pipe->screen, indexBuffer);
+ draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL);
+ }
+
+ return TRUE;
+}
+
+static boolean
+i915_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count)
+{
+ return i915_draw_range_elements( pipe, indexBuffer,
+ indexSize,
+ 0, 0xffffffff,
+ prim, start, count );
+}
+
+static boolean i915_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
+{
+ return i915_draw_elements(pipe, NULL, 0, prim, start, count);
+}
+
+
+
+struct pipe_context *i915_create_context( struct pipe_screen *screen,
+ struct pipe_winsys *pipe_winsys,
+ struct i915_winsys *i915_winsys )
+{
+ struct i915_context *i915;
+
+ i915 = CALLOC_STRUCT(i915_context);
+ if (i915 == NULL)
+ return NULL;
+
+ i915->winsys = i915_winsys;
+ i915->pipe.winsys = pipe_winsys;
+ i915->pipe.screen = screen;
+
+ i915->pipe.destroy = i915_destroy;
+
+ i915->pipe.clear = i915_clear;
+
+
+ i915->pipe.draw_arrays = i915_draw_arrays;
+ i915->pipe.draw_elements = i915_draw_elements;
+ i915->pipe.draw_range_elements = i915_draw_range_elements;
+
+ /*
+ * Create drawing context and plug our rendering stage into it.
+ */
+ i915->draw = draw_create();
+ assert(i915->draw);
+ if (!debug_get_bool_option("I915_NO_VBUF", FALSE)) {
+ draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915));
+ }
+ else {
+ draw_set_rasterize_stage(i915->draw, i915_draw_render_stage(i915));
+ }
+
+ i915_init_surface_functions(i915);
+ i915_init_state_functions(i915);
+ i915_init_flush_functions(i915);
+ i915_init_texture_functions(i915);
+
+ draw_install_aaline_stage(i915->draw, &i915->pipe);
+ draw_install_aapoint_stage(i915->draw, &i915->pipe);
+
+ i915->dirty = ~0;
+ i915->hardware_dirty = ~0;
+
+ /* Batch stream debugging is a bit hacked up at the moment:
+ */
+ i915->batch = i915_winsys->batch_get(i915_winsys);
+ i915->batch->winsys = i915_winsys;
+
+ return &i915->pipe;
+}
+
diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h
new file mode 100644
index 0000000000..3cdabe45f9
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_context.h
@@ -0,0 +1,345 @@
+ /**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef I915_CONTEXT_H
+#define I915_CONTEXT_H
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "draw/draw_vertex.h"
+
+#include "tgsi/tgsi_scan.h"
+
+
+#define I915_TEX_UNITS 8
+
+#define I915_DYNAMIC_MODES4 0
+#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */
+#define I915_DYNAMIC_DEPTHSCALE_1 2
+#define I915_DYNAMIC_IAB 3
+#define I915_DYNAMIC_BC_0 4 /* just the header */
+#define I915_DYNAMIC_BC_1 5
+#define I915_DYNAMIC_BFO_0 6
+#define I915_DYNAMIC_BFO_1 7
+#define I915_DYNAMIC_STP_0 8
+#define I915_DYNAMIC_STP_1 9
+#define I915_DYNAMIC_SC_ENA_0 10
+#define I915_DYNAMIC_SC_RECT_0 11
+#define I915_DYNAMIC_SC_RECT_1 12
+#define I915_DYNAMIC_SC_RECT_2 13
+#define I915_MAX_DYNAMIC 14
+
+
+#define I915_IMMEDIATE_S0 0
+#define I915_IMMEDIATE_S1 1
+#define I915_IMMEDIATE_S2 2
+#define I915_IMMEDIATE_S3 3
+#define I915_IMMEDIATE_S4 4
+#define I915_IMMEDIATE_S5 5
+#define I915_IMMEDIATE_S6 6
+#define I915_IMMEDIATE_S7 7
+#define I915_MAX_IMMEDIATE 8
+
+/* These must mach the order of LI0_STATE_* bits, as they will be used
+ * to generate hardware packets:
+ */
+#define I915_CACHE_STATIC 0
+#define I915_CACHE_DYNAMIC 1 /* handled specially */
+#define I915_CACHE_SAMPLER 2
+#define I915_CACHE_MAP 3
+#define I915_CACHE_PROGRAM 4
+#define I915_CACHE_CONSTANTS 5
+#define I915_MAX_CACHE 6
+
+#define I915_MAX_CONSTANT 32
+
+
+/** See constant_flags[] below */
+#define I915_CONSTFLAG_USER 0x1f
+
+
+/**
+ * Subclass of pipe_shader_state
+ */
+struct i915_fragment_shader
+{
+ struct pipe_shader_state state;
+
+ struct tgsi_shader_info info;
+
+ uint *program;
+ uint program_len;
+
+ /**
+ * constants introduced during translation.
+ * These are placed at the end of the constant buffer and grow toward
+ * the beginning (eg: slot 31, 30 29, ...)
+ * User-provided constants start at 0.
+ * This allows both types of constants to co-exist (until there's too many)
+ * and doesn't require regenerating/changing the fragment program to
+ * shuffle constants around.
+ */
+ uint num_constants;
+ float constants[I915_MAX_CONSTANT][4];
+
+ /**
+ * Status of each constant
+ * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding
+ * slot of the user's constant buffer. (set by pipe->set_constant_buffer())
+ * Else, the bitmask indicates which components are occupied by immediates.
+ */
+ ubyte constant_flags[I915_MAX_CONSTANT];
+};
+
+
+struct i915_cache_context;
+
+/* Use to calculate differences between state emitted to hardware and
+ * current driver-calculated state.
+ */
+struct i915_state
+{
+ unsigned immediate[I915_MAX_IMMEDIATE];
+ unsigned dynamic[I915_MAX_DYNAMIC];
+
+ float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4];
+ /** number of constants passed in through a constant buffer */
+ uint num_user_constants[PIPE_SHADER_TYPES];
+
+ /* texture sampler state */
+ unsigned sampler[I915_TEX_UNITS][3];
+ unsigned sampler_enable_flags;
+ unsigned sampler_enable_nr;
+
+ /* texture image buffers */
+ unsigned texbuffer[I915_TEX_UNITS][2];
+
+ /** Describes the current hardware vertex layout */
+ struct vertex_info vertex_info;
+
+ unsigned id; /* track lost context events */
+};
+
+struct i915_blend_state {
+ unsigned iab;
+ unsigned modes4;
+ unsigned LIS5;
+ unsigned LIS6;
+};
+
+struct i915_depth_stencil_state {
+ unsigned stencil_modes4;
+ unsigned bfo[2];
+ unsigned stencil_LIS5;
+ unsigned depth_LIS6;
+};
+
+struct i915_rasterizer_state {
+ int light_twoside : 1;
+ unsigned st;
+ enum interp_mode color_interp;
+
+ unsigned LIS4;
+ unsigned LIS7;
+ unsigned sc[1];
+
+ const struct pipe_rasterizer_state *templ;
+
+ union { float f; unsigned u; } ds[2];
+};
+
+struct i915_sampler_state {
+ unsigned state[3];
+ const struct pipe_sampler_state *templ;
+ unsigned minlod;
+ unsigned maxlod;
+};
+
+
+struct i915_texture {
+ struct pipe_texture base;
+
+ /* Derived from the above:
+ */
+ unsigned stride;
+ unsigned depth_stride; /* per-image on i945? */
+ unsigned total_nblocksy;
+
+ unsigned tiled;
+
+ unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS];
+
+ /* Explicitly store the offset of each image for each cube face or
+ * depth value. Pretty much have to accept that hardware formats
+ * are going to be so diverse that there is no unified way to
+ * compute the offsets of depth/cube images within a mipmap level,
+ * so have to store them as a lookup table:
+ */
+ unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */
+
+ /* The data is held here:
+ */
+ struct pipe_buffer *buffer;
+};
+
+struct i915_batchbuffer;
+
+struct i915_context
+{
+ struct pipe_context pipe;
+ struct i915_winsys *winsys;
+ struct draw_context *draw;
+
+ /* The most recent drawing state as set by the driver:
+ */
+ const struct i915_blend_state *blend;
+ const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+ const struct i915_depth_stencil_state *depth_stencil;
+ const struct i915_rasterizer_state *rasterizer;
+
+ struct i915_fragment_shader *fs;
+
+ struct pipe_blend_color blend_color;
+ struct pipe_clip_state clip;
+ struct pipe_constant_buffer constants[PIPE_SHADER_TYPES];
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_poly_stipple poly_stipple;
+ struct pipe_scissor_state scissor;
+ struct i915_texture *texture[PIPE_MAX_SAMPLERS];
+ struct pipe_viewport_state viewport;
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+
+ unsigned dirty;
+
+ unsigned num_samplers;
+ unsigned num_textures;
+ unsigned num_vertex_elements;
+ unsigned num_vertex_buffers;
+
+ struct i915_batchbuffer *batch;
+
+ /** Vertex buffer */
+ struct pipe_buffer *vbo;
+ size_t vbo_offset;
+ unsigned vbo_flushed;
+
+ struct i915_state current;
+ unsigned hardware_dirty;
+
+ unsigned debug;
+};
+
+/* A flag for each state_tracker state object:
+ */
+#define I915_NEW_VIEWPORT 0x1
+#define I915_NEW_RASTERIZER 0x2
+#define I915_NEW_FS 0x4
+#define I915_NEW_BLEND 0x8
+#define I915_NEW_CLIP 0x10
+#define I915_NEW_SCISSOR 0x20
+#define I915_NEW_STIPPLE 0x40
+#define I915_NEW_FRAMEBUFFER 0x80
+#define I915_NEW_ALPHA_TEST 0x100
+#define I915_NEW_DEPTH_STENCIL 0x200
+#define I915_NEW_SAMPLER 0x400
+#define I915_NEW_TEXTURE 0x800
+#define I915_NEW_CONSTANTS 0x1000
+#define I915_NEW_VBO 0x2000
+#define I915_NEW_VS 0x4000
+
+
+/* Driver's internally generated state flags:
+ */
+#define I915_NEW_VERTEX_FORMAT 0x10000
+
+
+/* Dirty flags for hardware emit
+ */
+#define I915_HW_STATIC (1<<I915_CACHE_STATIC)
+#define I915_HW_DYNAMIC (1<<I915_CACHE_DYNAMIC)
+#define I915_HW_SAMPLER (1<<I915_CACHE_SAMPLER)
+#define I915_HW_MAP (1<<I915_CACHE_MAP)
+#define I915_HW_PROGRAM (1<<I915_CACHE_PROGRAM)
+#define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS)
+#define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0))
+#define I915_HW_INVARIENT (1<<(I915_MAX_CACHE+1))
+
+
+/***********************************************************************
+ * i915_prim_emit.c:
+ */
+struct draw_stage *i915_draw_render_stage( struct i915_context *i915 );
+
+
+/***********************************************************************
+ * i915_prim_vbuf.c:
+ */
+struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 );
+
+
+/***********************************************************************
+ * i915_state_emit.c:
+ */
+void i915_emit_hardware_state(struct i915_context *i915 );
+
+
+
+/***********************************************************************
+ * i915_clear.c:
+ */
+void i915_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+
+/***********************************************************************
+ * i915_surface.c:
+ */
+void i915_init_surface_functions( struct i915_context *i915 );
+
+void i915_init_state_functions( struct i915_context *i915 );
+void i915_init_flush_functions( struct i915_context *i915 );
+void i915_init_string_functions( struct i915_context *i915 );
+
+
+
+
+/***********************************************************************
+ * Inline conversion functions. These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct i915_context *
+i915_context( struct pipe_context *pipe )
+{
+ return (struct i915_context *)pipe;
+}
+
+
+
+#endif
diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915simple/i915_debug.c
new file mode 100644
index 0000000000..7a4e7051d2
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_debug.c
@@ -0,0 +1,900 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_winsys.h"
+#include "i915_debug.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_debug.h"
+
+
+static void
+PRINTF(
+ struct debug_stream *stream,
+ const char *fmt,
+ ... )
+{
+ va_list args;
+
+ va_start( args, fmt );
+ debug_vprintf( fmt, args );
+ va_end( args );
+}
+
+
+static boolean debug( struct debug_stream *stream, const char *name, unsigned len )
+{
+ unsigned i;
+ unsigned *ptr = (unsigned *)(stream->ptr + stream->offset);
+
+ if (len == 0) {
+ PRINTF(stream, "Error - zero length packet (0x%08x)\n", stream->ptr[0]);
+ assert(0);
+ return FALSE;
+ }
+
+ if (stream->print_addresses)
+ PRINTF(stream, "%08x: ", stream->offset);
+
+
+ PRINTF(stream, "%s (%d dwords):\n", name, len);
+ for (i = 0; i < len; i++)
+ PRINTF(stream, "\t0x%08x\n", ptr[i]);
+ PRINTF(stream, "\n");
+
+ stream->offset += len * sizeof(unsigned);
+
+ return TRUE;
+}
+
+
+static const char *get_prim_name( unsigned val )
+{
+ switch (val & PRIM3D_MASK) {
+ case PRIM3D_TRILIST: return "TRILIST"; break;
+ case PRIM3D_TRISTRIP: return "TRISTRIP"; break;
+ case PRIM3D_TRISTRIP_RVRSE: return "TRISTRIP_RVRSE"; break;
+ case PRIM3D_TRIFAN: return "TRIFAN"; break;
+ case PRIM3D_POLY: return "POLY"; break;
+ case PRIM3D_LINELIST: return "LINELIST"; break;
+ case PRIM3D_LINESTRIP: return "LINESTRIP"; break;
+ case PRIM3D_RECTLIST: return "RECTLIST"; break;
+ case PRIM3D_POINTLIST: return "POINTLIST"; break;
+ case PRIM3D_DIB: return "DIB"; break;
+ case PRIM3D_CLEAR_RECT: return "CLEAR_RECT"; break;
+ case PRIM3D_ZONE_INIT: return "ZONE_INIT"; break;
+ default: return "????"; break;
+ }
+}
+
+static boolean debug_prim( struct debug_stream *stream, const char *name,
+ boolean dump_floats,
+ unsigned len )
+{
+ unsigned *ptr = (unsigned *)(stream->ptr + stream->offset);
+ const char *prim = get_prim_name( ptr[0] );
+ unsigned i;
+
+
+
+ PRINTF(stream, "%s %s (%d dwords):\n", name, prim, len);
+ PRINTF(stream, "\t0x%08x\n", ptr[0]);
+ for (i = 1; i < len; i++) {
+ if (dump_floats)
+ PRINTF(stream, "\t0x%08x // %f\n", ptr[i], *(float *)&ptr[i]);
+ else
+ PRINTF(stream, "\t0x%08x\n", ptr[i]);
+ }
+
+
+ PRINTF(stream, "\n");
+
+ stream->offset += len * sizeof(unsigned);
+
+ return TRUE;
+}
+
+
+
+
+static boolean debug_program( struct debug_stream *stream, const char *name, unsigned len )
+{
+ unsigned *ptr = (unsigned *)(stream->ptr + stream->offset);
+
+ if (len == 0) {
+ PRINTF(stream, "Error - zero length packet (0x%08x)\n", stream->ptr[0]);
+ assert(0);
+ return FALSE;
+ }
+
+ if (stream->print_addresses)
+ PRINTF(stream, "%08x: ", stream->offset);
+
+ PRINTF(stream, "%s (%d dwords):\n", name, len);
+ i915_disassemble_program( stream, ptr, len );
+
+ stream->offset += len * sizeof(unsigned);
+ return TRUE;
+}
+
+
+static boolean debug_chain( struct debug_stream *stream, const char *name, unsigned len )
+{
+ unsigned *ptr = (unsigned *)(stream->ptr + stream->offset);
+ unsigned old_offset = stream->offset + len * sizeof(unsigned);
+ unsigned i;
+
+ PRINTF(stream, "%s (%d dwords):\n", name, len);
+ for (i = 0; i < len; i++)
+ PRINTF(stream, "\t0x%08x\n", ptr[i]);
+
+ stream->offset = ptr[1] & ~0x3;
+
+ if (stream->offset < old_offset)
+ PRINTF(stream, "\n... skipping backwards from 0x%x --> 0x%x ...\n\n",
+ old_offset, stream->offset );
+ else
+ PRINTF(stream, "\n... skipping from 0x%x --> 0x%x ...\n\n",
+ old_offset, stream->offset );
+
+
+ return TRUE;
+}
+
+
+static boolean debug_variable_length_prim( struct debug_stream *stream )
+{
+ unsigned *ptr = (unsigned *)(stream->ptr + stream->offset);
+ const char *prim = get_prim_name( ptr[0] );
+ unsigned i, len;
+
+ ushort *idx = (ushort *)(ptr+1);
+ for (i = 0; idx[i] != 0xffff; i++)
+ ;
+
+ len = 1+(i+2)/2;
+
+ PRINTF(stream, "3DPRIM, %s variable length %d indicies (%d dwords):\n", prim, i, len);
+ for (i = 0; i < len; i++)
+ PRINTF(stream, "\t0x%08x\n", ptr[i]);
+ PRINTF(stream, "\n");
+
+ stream->offset += len * sizeof(unsigned);
+ return TRUE;
+}
+
+
+static void
+BITS(
+ struct debug_stream *stream,
+ unsigned dw,
+ unsigned hi,
+ unsigned lo,
+ const char *fmt,
+ ... )
+{
+ va_list args;
+ unsigned himask = ~0UL >> (31 - (hi));
+
+ PRINTF(stream, "\t\t ");
+
+ va_start( args, fmt );
+ debug_vprintf( fmt, args );
+ va_end( args );
+
+ PRINTF(stream, ": 0x%x\n", ((dw) & himask) >> (lo));
+}
+
+#ifdef DEBUG
+#define MBZ( dw, hi, lo) do { \
+ unsigned x = (dw) >> (lo); \
+ unsigned lomask = (1 << (lo)) - 1; \
+ unsigned himask; \
+ himask = (1UL << (hi)) - 1; \
+ assert ((x & himask & ~lomask) == 0); \
+} while (0)
+#else
+#define MBZ( dw, hi, lo) do { \
+} while (0)
+#endif
+
+static void
+FLAG(
+ struct debug_stream *stream,
+ unsigned dw,
+ unsigned bit,
+ const char *fmt,
+ ... )
+{
+ if (((dw) >> (bit)) & 1) {
+ va_list args;
+
+ PRINTF(stream, "\t\t ");
+
+ va_start( args, fmt );
+ debug_vprintf( fmt, args );
+ va_end( args );
+
+ PRINTF(stream, "\n");
+ }
+}
+
+static boolean debug_load_immediate( struct debug_stream *stream,
+ const char *name,
+ unsigned len )
+{
+ unsigned *ptr = (unsigned *)(stream->ptr + stream->offset);
+ unsigned bits = (ptr[0] >> 4) & 0xff;
+ unsigned j = 0;
+
+ PRINTF(stream, "%s (%d dwords, flags: %x):\n", name, len, bits);
+ PRINTF(stream, "\t0x%08x\n", ptr[j++]);
+
+ if (bits & (1<<0)) {
+ PRINTF(stream, "\t LIS0: 0x%08x\n", ptr[j]);
+ PRINTF(stream, "\t vb address: 0x%08x\n", (ptr[j] & ~0x3));
+ BITS(stream, ptr[j], 0, 0, "vb invalidate disable");
+ j++;
+ }
+ if (bits & (1<<1)) {
+ PRINTF(stream, "\t LIS1: 0x%08x\n", ptr[j]);
+ BITS(stream, ptr[j], 29, 24, "vb dword width");
+ BITS(stream, ptr[j], 21, 16, "vb dword pitch");
+ BITS(stream, ptr[j], 15, 0, "vb max index");
+ j++;
+ }
+ if (bits & (1<<2)) {
+ int i;
+ PRINTF(stream, "\t LIS2: 0x%08x\n", ptr[j]);
+ for (i = 0; i < 8; i++) {
+ unsigned tc = (ptr[j] >> (i * 4)) & 0xf;
+ if (tc != 0xf)
+ BITS(stream, tc, 3, 0, "tex coord %d", i);
+ }
+ j++;
+ }
+ if (bits & (1<<3)) {
+ PRINTF(stream, "\t LIS3: 0x%08x\n", ptr[j]);
+ j++;
+ }
+ if (bits & (1<<4)) {
+ PRINTF(stream, "\t LIS4: 0x%08x\n", ptr[j]);
+ BITS(stream, ptr[j], 31, 23, "point width");
+ BITS(stream, ptr[j], 22, 19, "line width");
+ FLAG(stream, ptr[j], 18, "alpha flatshade");
+ FLAG(stream, ptr[j], 17, "fog flatshade");
+ FLAG(stream, ptr[j], 16, "spec flatshade");
+ FLAG(stream, ptr[j], 15, "rgb flatshade");
+ BITS(stream, ptr[j], 14, 13, "cull mode");
+ FLAG(stream, ptr[j], 12, "vfmt: point width");
+ FLAG(stream, ptr[j], 11, "vfmt: specular/fog");
+ FLAG(stream, ptr[j], 10, "vfmt: rgba");
+ FLAG(stream, ptr[j], 9, "vfmt: depth offset");
+ BITS(stream, ptr[j], 8, 6, "vfmt: position (2==xyzw)");
+ FLAG(stream, ptr[j], 5, "force dflt diffuse");
+ FLAG(stream, ptr[j], 4, "force dflt specular");
+ FLAG(stream, ptr[j], 3, "local depth offset enable");
+ FLAG(stream, ptr[j], 2, "vfmt: fp32 fog coord");
+ FLAG(stream, ptr[j], 1, "sprite point");
+ FLAG(stream, ptr[j], 0, "antialiasing");
+ j++;
+ }
+ if (bits & (1<<5)) {
+ PRINTF(stream, "\t LIS5: 0x%08x\n", ptr[j]);
+ BITS(stream, ptr[j], 31, 28, "rgba write disables");
+ FLAG(stream, ptr[j], 27, "force dflt point width");
+ FLAG(stream, ptr[j], 26, "last pixel enable");
+ FLAG(stream, ptr[j], 25, "global z offset enable");
+ FLAG(stream, ptr[j], 24, "fog enable");
+ BITS(stream, ptr[j], 23, 16, "stencil ref");
+ BITS(stream, ptr[j], 15, 13, "stencil test");
+ BITS(stream, ptr[j], 12, 10, "stencil fail op");
+ BITS(stream, ptr[j], 9, 7, "stencil pass z fail op");
+ BITS(stream, ptr[j], 6, 4, "stencil pass z pass op");
+ FLAG(stream, ptr[j], 3, "stencil write enable");
+ FLAG(stream, ptr[j], 2, "stencil test enable");
+ FLAG(stream, ptr[j], 1, "color dither enable");
+ FLAG(stream, ptr[j], 0, "logiop enable");
+ j++;
+ }
+ if (bits & (1<<6)) {
+ PRINTF(stream, "\t LIS6: 0x%08x\n", ptr[j]);
+ FLAG(stream, ptr[j], 31, "alpha test enable");
+ BITS(stream, ptr[j], 30, 28, "alpha func");
+ BITS(stream, ptr[j], 27, 20, "alpha ref");
+ FLAG(stream, ptr[j], 19, "depth test enable");
+ BITS(stream, ptr[j], 18, 16, "depth func");
+ FLAG(stream, ptr[j], 15, "blend enable");
+ BITS(stream, ptr[j], 14, 12, "blend func");
+ BITS(stream, ptr[j], 11, 8, "blend src factor");
+ BITS(stream, ptr[j], 7, 4, "blend dst factor");
+ FLAG(stream, ptr[j], 3, "depth write enable");
+ FLAG(stream, ptr[j], 2, "color write enable");
+ BITS(stream, ptr[j], 1, 0, "provoking vertex");
+ j++;
+ }
+
+
+ PRINTF(stream, "\n");
+
+ assert(j == len);
+
+ stream->offset += len * sizeof(unsigned);
+
+ return TRUE;
+}
+
+
+
+static boolean debug_load_indirect( struct debug_stream *stream,
+ const char *name,
+ unsigned len )
+{
+ unsigned *ptr = (unsigned *)(stream->ptr + stream->offset);
+ unsigned bits = (ptr[0] >> 8) & 0x3f;
+ unsigned i, j = 0;
+
+ PRINTF(stream, "%s (%d dwords):\n", name, len);
+ PRINTF(stream, "\t0x%08x\n", ptr[j++]);
+
+ for (i = 0; i < 6; i++) {
+ if (bits & (1<<i)) {
+ switch (1<<(8+i)) {
+ case LI0_STATE_STATIC_INDIRECT:
+ PRINTF(stream, " STATIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+ PRINTF(stream, " 0x%08x\n", ptr[j++]);
+ break;
+ case LI0_STATE_DYNAMIC_INDIRECT:
+ PRINTF(stream, " DYNAMIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+ break;
+ case LI0_STATE_SAMPLER:
+ PRINTF(stream, " SAMPLER: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+ PRINTF(stream, " 0x%08x\n", ptr[j++]);
+ break;
+ case LI0_STATE_MAP:
+ PRINTF(stream, " MAP: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+ PRINTF(stream, " 0x%08x\n", ptr[j++]);
+ break;
+ case LI0_STATE_PROGRAM:
+ PRINTF(stream, " PROGRAM: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+ PRINTF(stream, " 0x%08x\n", ptr[j++]);
+ break;
+ case LI0_STATE_CONSTANTS:
+ PRINTF(stream, " CONSTANTS: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+ PRINTF(stream, " 0x%08x\n", ptr[j++]);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+ }
+
+ if (bits == 0) {
+ PRINTF(stream, "\t DUMMY: 0x%08x\n", ptr[j++]);
+ }
+
+ PRINTF(stream, "\n");
+
+
+ assert(j == len);
+
+ stream->offset += len * sizeof(unsigned);
+
+ return TRUE;
+}
+
+static void BR13( struct debug_stream *stream,
+ unsigned val )
+{
+ PRINTF(stream, "\t0x%08x\n", val);
+ FLAG(stream, val, 30, "clipping enable");
+ BITS(stream, val, 25, 24, "color depth (3==32bpp)");
+ BITS(stream, val, 23, 16, "raster op");
+ BITS(stream, val, 15, 0, "dest pitch");
+}
+
+
+static void BR22( struct debug_stream *stream,
+ unsigned val )
+{
+ PRINTF(stream, "\t0x%08x\n", val);
+ BITS(stream, val, 31, 16, "dest y1");
+ BITS(stream, val, 15, 0, "dest x1");
+}
+
+static void BR23( struct debug_stream *stream,
+ unsigned val )
+{
+ PRINTF(stream, "\t0x%08x\n", val);
+ BITS(stream, val, 31, 16, "dest y2");
+ BITS(stream, val, 15, 0, "dest x2");
+}
+
+static void BR09( struct debug_stream *stream,
+ unsigned val )
+{
+ PRINTF(stream, "\t0x%08x -- dest address\n", val);
+}
+
+static void BR26( struct debug_stream *stream,
+ unsigned val )
+{
+ PRINTF(stream, "\t0x%08x\n", val);
+ BITS(stream, val, 31, 16, "src y1");
+ BITS(stream, val, 15, 0, "src x1");
+}
+
+static void BR11( struct debug_stream *stream,
+ unsigned val )
+{
+ PRINTF(stream, "\t0x%08x\n", val);
+ BITS(stream, val, 15, 0, "src pitch");
+}
+
+static void BR12( struct debug_stream *stream,
+ unsigned val )
+{
+ PRINTF(stream, "\t0x%08x -- src address\n", val);
+}
+
+static void BR16( struct debug_stream *stream,
+ unsigned val )
+{
+ PRINTF(stream, "\t0x%08x -- color\n", val);
+}
+
+static boolean debug_copy_blit( struct debug_stream *stream,
+ const char *name,
+ unsigned len )
+{
+ unsigned *ptr = (unsigned *)(stream->ptr + stream->offset);
+ int j = 0;
+
+ PRINTF(stream, "%s (%d dwords):\n", name, len);
+ PRINTF(stream, "\t0x%08x\n", ptr[j++]);
+
+ BR13(stream, ptr[j++]);
+ BR22(stream, ptr[j++]);
+ BR23(stream, ptr[j++]);
+ BR09(stream, ptr[j++]);
+ BR26(stream, ptr[j++]);
+ BR11(stream, ptr[j++]);
+ BR12(stream, ptr[j++]);
+
+ stream->offset += len * sizeof(unsigned);
+ assert(j == len);
+ return TRUE;
+}
+
+static boolean debug_color_blit( struct debug_stream *stream,
+ const char *name,
+ unsigned len )
+{
+ unsigned *ptr = (unsigned *)(stream->ptr + stream->offset);
+ int j = 0;
+
+ PRINTF(stream, "%s (%d dwords):\n", name, len);
+ PRINTF(stream, "\t0x%08x\n", ptr[j++]);
+
+ BR13(stream, ptr[j++]);
+ BR22(stream, ptr[j++]);
+ BR23(stream, ptr[j++]);
+ BR09(stream, ptr[j++]);
+ BR16(stream, ptr[j++]);
+
+ stream->offset += len * sizeof(unsigned);
+ assert(j == len);
+ return TRUE;
+}
+
+static boolean debug_modes4( struct debug_stream *stream,
+ const char *name,
+ unsigned len )
+{
+ unsigned *ptr = (unsigned *)(stream->ptr + stream->offset);
+ int j = 0;
+
+ PRINTF(stream, "%s (%d dwords):\n", name, len);
+ PRINTF(stream, "\t0x%08x\n", ptr[j]);
+ BITS(stream, ptr[j], 21, 18, "logicop func");
+ FLAG(stream, ptr[j], 17, "stencil test mask modify-enable");
+ FLAG(stream, ptr[j], 16, "stencil write mask modify-enable");
+ BITS(stream, ptr[j], 15, 8, "stencil test mask");
+ BITS(stream, ptr[j], 7, 0, "stencil write mask");
+ j++;
+
+ stream->offset += len * sizeof(unsigned);
+ assert(j == len);
+ return TRUE;
+}
+
+static boolean debug_map_state( struct debug_stream *stream,
+ const char *name,
+ unsigned len )
+{
+ unsigned *ptr = (unsigned *)(stream->ptr + stream->offset);
+ unsigned j = 0;
+
+ PRINTF(stream, "%s (%d dwords):\n", name, len);
+ PRINTF(stream, "\t0x%08x\n", ptr[j++]);
+
+ {
+ PRINTF(stream, "\t0x%08x\n", ptr[j]);
+ BITS(stream, ptr[j], 15, 0, "map mask");
+ j++;
+ }
+
+ while (j < len) {
+ {
+ PRINTF(stream, "\t TMn.0: 0x%08x\n", ptr[j]);
+ PRINTF(stream, "\t map address: 0x%08x\n", (ptr[j] & ~0x3));
+ FLAG(stream, ptr[j], 1, "vertical line stride");
+ FLAG(stream, ptr[j], 0, "vertical line stride offset");
+ j++;
+ }
+
+ {
+ PRINTF(stream, "\t TMn.1: 0x%08x\n", ptr[j]);
+ BITS(stream, ptr[j], 31, 21, "height");
+ BITS(stream, ptr[j], 20, 10, "width");
+ BITS(stream, ptr[j], 9, 7, "surface format");
+ BITS(stream, ptr[j], 6, 3, "texel format");
+ FLAG(stream, ptr[j], 2, "use fence regs");
+ FLAG(stream, ptr[j], 1, "tiled surface");
+ FLAG(stream, ptr[j], 0, "tile walk ymajor");
+ j++;
+ }
+ {
+ PRINTF(stream, "\t TMn.2: 0x%08x\n", ptr[j]);
+ BITS(stream, ptr[j], 31, 21, "dword pitch");
+ BITS(stream, ptr[j], 20, 15, "cube face enables");
+ BITS(stream, ptr[j], 14, 9, "max lod");
+ FLAG(stream, ptr[j], 8, "mip layout right");
+ BITS(stream, ptr[j], 7, 0, "depth");
+ j++;
+ }
+ }
+
+ stream->offset += len * sizeof(unsigned);
+ assert(j == len);
+ return TRUE;
+}
+
+static boolean debug_sampler_state( struct debug_stream *stream,
+ const char *name,
+ unsigned len )
+{
+ unsigned *ptr = (unsigned *)(stream->ptr + stream->offset);
+ unsigned j = 0;
+
+ PRINTF(stream, "%s (%d dwords):\n", name, len);
+ PRINTF(stream, "\t0x%08x\n", ptr[j++]);
+
+ {
+ PRINTF(stream, "\t0x%08x\n", ptr[j]);
+ BITS(stream, ptr[j], 15, 0, "sampler mask");
+ j++;
+ }
+
+ while (j < len) {
+ {
+ PRINTF(stream, "\t TSn.0: 0x%08x\n", ptr[j]);
+ FLAG(stream, ptr[j], 31, "reverse gamma");
+ FLAG(stream, ptr[j], 30, "planar to packed");
+ FLAG(stream, ptr[j], 29, "yuv->rgb");
+ BITS(stream, ptr[j], 28, 27, "chromakey index");
+ BITS(stream, ptr[j], 26, 22, "base mip level");
+ BITS(stream, ptr[j], 21, 20, "mip mode filter");
+ BITS(stream, ptr[j], 19, 17, "mag mode filter");
+ BITS(stream, ptr[j], 16, 14, "min mode filter");
+ BITS(stream, ptr[j], 13, 5, "lod bias (s4.4)");
+ FLAG(stream, ptr[j], 4, "shadow enable");
+ FLAG(stream, ptr[j], 3, "max-aniso-4");
+ BITS(stream, ptr[j], 2, 0, "shadow func");
+ j++;
+ }
+
+ {
+ PRINTF(stream, "\t TSn.1: 0x%08x\n", ptr[j]);
+ BITS(stream, ptr[j], 31, 24, "min lod");
+ MBZ( ptr[j], 23, 18 );
+ FLAG(stream, ptr[j], 17, "kill pixel enable");
+ FLAG(stream, ptr[j], 16, "keyed tex filter mode");
+ FLAG(stream, ptr[j], 15, "chromakey enable");
+ BITS(stream, ptr[j], 14, 12, "tcx wrap mode");
+ BITS(stream, ptr[j], 11, 9, "tcy wrap mode");
+ BITS(stream, ptr[j], 8, 6, "tcz wrap mode");
+ FLAG(stream, ptr[j], 5, "normalized coords");
+ BITS(stream, ptr[j], 4, 1, "map (surface) index");
+ FLAG(stream, ptr[j], 0, "EAST deinterlacer enable");
+ j++;
+ }
+ {
+ PRINTF(stream, "\t TSn.2: 0x%08x (default color)\n", ptr[j]);
+ j++;
+ }
+ }
+
+ stream->offset += len * sizeof(unsigned);
+ assert(j == len);
+ return TRUE;
+}
+
+static boolean debug_dest_vars( struct debug_stream *stream,
+ const char *name,
+ unsigned len )
+{
+ unsigned *ptr = (unsigned *)(stream->ptr + stream->offset);
+ int j = 0;
+
+ PRINTF(stream, "%s (%d dwords):\n", name, len);
+ PRINTF(stream, "\t0x%08x\n", ptr[j++]);
+
+ {
+ PRINTF(stream, "\t0x%08x\n", ptr[j]);
+ FLAG(stream, ptr[j], 31, "early classic ztest");
+ FLAG(stream, ptr[j], 30, "opengl tex default color");
+ FLAG(stream, ptr[j], 29, "bypass iz");
+ FLAG(stream, ptr[j], 28, "lod preclamp");
+ BITS(stream, ptr[j], 27, 26, "dither pattern");
+ FLAG(stream, ptr[j], 25, "linear gamma blend");
+ FLAG(stream, ptr[j], 24, "debug dither");
+ BITS(stream, ptr[j], 23, 20, "dstorg x");
+ BITS(stream, ptr[j], 19, 16, "dstorg y");
+ MBZ (ptr[j], 15, 15 );
+ BITS(stream, ptr[j], 14, 12, "422 write select");
+ BITS(stream, ptr[j], 11, 8, "cbuf format");
+ BITS(stream, ptr[j], 3, 2, "zbuf format");
+ FLAG(stream, ptr[j], 1, "vert line stride");
+ FLAG(stream, ptr[j], 1, "vert line stride offset");
+ j++;
+ }
+
+ stream->offset += len * sizeof(unsigned);
+ assert(j == len);
+ return TRUE;
+}
+
+static boolean debug_buf_info( struct debug_stream *stream,
+ const char *name,
+ unsigned len )
+{
+ unsigned *ptr = (unsigned *)(stream->ptr + stream->offset);
+ int j = 0;
+
+ PRINTF(stream, "%s (%d dwords):\n", name, len);
+ PRINTF(stream, "\t0x%08x\n", ptr[j++]);
+
+ {
+ PRINTF(stream, "\t0x%08x\n", ptr[j]);
+ BITS(stream, ptr[j], 28, 28, "aux buffer id");
+ BITS(stream, ptr[j], 27, 24, "buffer id (7=depth, 3=back)");
+ FLAG(stream, ptr[j], 23, "use fence regs");
+ FLAG(stream, ptr[j], 22, "tiled surface");
+ FLAG(stream, ptr[j], 21, "tile walk ymajor");
+ MBZ (ptr[j], 20, 14);
+ BITS(stream, ptr[j], 13, 2, "dword pitch");
+ MBZ (ptr[j], 2, 0);
+ j++;
+ }
+
+ PRINTF(stream, "\t0x%08x -- buffer base address\n", ptr[j++]);
+
+ stream->offset += len * sizeof(unsigned);
+ assert(j == len);
+ return TRUE;
+}
+
+static boolean i915_debug_packet( struct debug_stream *stream )
+{
+ unsigned *ptr = (unsigned *)(stream->ptr + stream->offset);
+ unsigned cmd = *ptr;
+
+ switch (((cmd >> 29) & 0x7)) {
+ case 0x0:
+ switch ((cmd >> 23) & 0x3f) {
+ case 0x0:
+ return debug(stream, "MI_NOOP", 1);
+ case 0x3:
+ return debug(stream, "MI_WAIT_FOR_EVENT", 1);
+ case 0x4:
+ return debug(stream, "MI_FLUSH", 1);
+ case 0xA:
+ debug(stream, "MI_BATCH_BUFFER_END", 1);
+ return FALSE;
+ case 0x22:
+ return debug(stream, "MI_LOAD_REGISTER_IMM", 3);
+ case 0x31:
+ return debug_chain(stream, "MI_BATCH_BUFFER_START", 2);
+ default:
+ (void)debug(stream, "UNKNOWN 0x0 case!", 1);
+ assert(0);
+ break;
+ }
+ break;
+ case 0x1:
+ (void) debug(stream, "UNKNOWN 0x1 case!", 1);
+ assert(0);
+ break;
+ case 0x2:
+ switch ((cmd >> 22) & 0xff) {
+ case 0x50:
+ return debug_color_blit(stream, "XY_COLOR_BLT", (cmd & 0xff) + 2);
+ case 0x53:
+ return debug_copy_blit(stream, "XY_SRC_COPY_BLT", (cmd & 0xff) + 2);
+ default:
+ return debug(stream, "blit command", (cmd & 0xff) + 2);
+ }
+ break;
+ case 0x3:
+ switch ((cmd >> 24) & 0x1f) {
+ case 0x6:
+ return debug(stream, "3DSTATE_ANTI_ALIASING", 1);
+ case 0x7:
+ return debug(stream, "3DSTATE_RASTERIZATION_RULES", 1);
+ case 0x8:
+ return debug(stream, "3DSTATE_BACKFACE_STENCIL_OPS", 2);
+ case 0x9:
+ return debug(stream, "3DSTATE_BACKFACE_STENCIL_MASKS", 1);
+ case 0xb:
+ return debug(stream, "3DSTATE_INDEPENDENT_ALPHA_BLEND", 1);
+ case 0xc:
+ return debug(stream, "3DSTATE_MODES5", 1);
+ case 0xd:
+ return debug_modes4(stream, "3DSTATE_MODES4", 1);
+ case 0x15:
+ return debug(stream, "3DSTATE_FOG_COLOR", 1);
+ case 0x16:
+ return debug(stream, "3DSTATE_COORD_SET_BINDINGS", 1);
+ case 0x1c:
+ /* 3DState16NP */
+ switch((cmd >> 19) & 0x1f) {
+ case 0x10:
+ return debug(stream, "3DSTATE_SCISSOR_ENABLE", 1);
+ case 0x11:
+ return debug(stream, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE", 1);
+ default:
+ (void) debug(stream, "UNKNOWN 0x1c case!", 1);
+ assert(0);
+ break;
+ }
+ break;
+ case 0x1d:
+ /* 3DStateMW */
+ switch ((cmd >> 16) & 0xff) {
+ case 0x0:
+ return debug_map_state(stream, "3DSTATE_MAP_STATE", (cmd & 0x1f) + 2);
+ case 0x1:
+ return debug_sampler_state(stream, "3DSTATE_SAMPLER_STATE", (cmd & 0x1f) + 2);
+ case 0x4:
+ return debug_load_immediate(stream, "3DSTATE_LOAD_STATE_IMMEDIATE", (cmd & 0xf) + 2);
+ case 0x5:
+ return debug_program(stream, "3DSTATE_PIXEL_SHADER_PROGRAM", (cmd & 0x1ff) + 2);
+ case 0x6:
+ return debug(stream, "3DSTATE_PIXEL_SHADER_CONSTANTS", (cmd & 0xff) + 2);
+ case 0x7:
+ return debug_load_indirect(stream, "3DSTATE_LOAD_INDIRECT", (cmd & 0xff) + 2);
+ case 0x80:
+ return debug(stream, "3DSTATE_DRAWING_RECTANGLE", (cmd & 0xffff) + 2);
+ case 0x81:
+ return debug(stream, "3DSTATE_SCISSOR_RECTANGLE", (cmd & 0xffff) + 2);
+ case 0x83:
+ return debug(stream, "3DSTATE_SPAN_STIPPLE", (cmd & 0xffff) + 2);
+ case 0x85:
+ return debug_dest_vars(stream, "3DSTATE_DEST_BUFFER_VARS", (cmd & 0xffff) + 2);
+ case 0x88:
+ return debug(stream, "3DSTATE_CONSTANT_BLEND_COLOR", (cmd & 0xffff) + 2);
+ case 0x89:
+ return debug(stream, "3DSTATE_FOG_MODE", (cmd & 0xffff) + 2);
+ case 0x8e:
+ return debug_buf_info(stream, "3DSTATE_BUFFER_INFO", (cmd & 0xffff) + 2);
+ case 0x97:
+ return debug(stream, "3DSTATE_DEPTH_OFFSET_SCALE", (cmd & 0xffff) + 2);
+ case 0x98:
+ return debug(stream, "3DSTATE_DEFAULT_Z", (cmd & 0xffff) + 2);
+ case 0x99:
+ return debug(stream, "3DSTATE_DEFAULT_DIFFUSE", (cmd & 0xffff) + 2);
+ case 0x9a:
+ return debug(stream, "3DSTATE_DEFAULT_SPECULAR", (cmd & 0xffff) + 2);
+ case 0x9c:
+ return debug(stream, "3DSTATE_CLEAR_PARAMETERS", (cmd & 0xffff) + 2);
+ default:
+ assert(0);
+ return 0;
+ }
+ break;
+ case 0x1e:
+ if (cmd & (1 << 23))
+ return debug(stream, "???", (cmd & 0xffff) + 1);
+ else
+ return debug(stream, "", 1);
+ break;
+ case 0x1f:
+ if ((cmd & (1 << 23)) == 0)
+ return debug_prim(stream, "3DPRIM (inline)", 1, (cmd & 0x1ffff) + 2);
+ else if (cmd & (1 << 17))
+ {
+ if ((cmd & 0xffff) == 0)
+ return debug_variable_length_prim(stream);
+ else
+ return debug_prim(stream, "3DPRIM (indexed)", 0, (((cmd & 0xffff) + 1) / 2) + 1);
+ }
+ else
+ return debug_prim(stream, "3DPRIM (indirect sequential)", 0, 2);
+ break;
+ default:
+ return debug(stream, "", 0);
+ }
+ default:
+ assert(0);
+ return 0;
+ }
+
+ assert(0);
+ return 0;
+}
+
+
+
+void
+i915_dump_batchbuffer( struct i915_context *i915 )
+{
+ struct debug_stream stream;
+ /* TODO fix me */
+ unsigned *start = 0;/*i915->batch_start;*/
+ unsigned *end = 0;/*i915->winsys->batch_start( i915->winsys, 0, 0 );*/
+ unsigned long bytes = (unsigned long) (end - start) * 4;
+ boolean done = FALSE;
+
+ stream.offset = 0;
+ stream.ptr = (char *)start;
+ stream.print_addresses = 0;
+ stream.winsys = i915->pipe.winsys;
+
+ if (!start || !end) {
+ debug_printf( "\n\nBATCH: ???\n");
+ return;
+ }
+
+ debug_printf( "\n\nBATCH: (%d)\n", bytes / 4);
+
+ while (!done &&
+ stream.offset < bytes)
+ {
+ if (!i915_debug_packet( &stream ))
+ break;
+
+ assert(stream.offset <= bytes &&
+ stream.offset >= 0);
+ }
+
+ debug_printf( "END-BATCH\n\n\n");
+}
+
+
diff --git a/src/gallium/drivers/i915simple/i915_debug.h b/src/gallium/drivers/i915simple/i915_debug.h
new file mode 100644
index 0000000000..afb63edabf
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_debug.h
@@ -0,0 +1,115 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef I915_DEBUG_H
+#define I915_DEBUG_H
+
+#include <stdarg.h>
+
+struct i915_context;
+
+struct debug_stream
+{
+ unsigned offset; /* current gtt offset */
+ char *ptr; /* pointer to gtt offset zero */
+ char *end; /* pointer to gtt offset zero */
+ unsigned print_addresses;
+ struct pipe_winsys *winsys;
+};
+
+
+/* Internal functions
+ */
+void i915_disassemble_program(struct debug_stream *stream,
+ const unsigned *program, unsigned sz);
+
+void i915_print_ureg(const char *msg, unsigned ureg);
+
+
+#define DEBUG_BATCH 0x1
+#define DEBUG_BLIT 0x2
+#define DEBUG_BUFFER 0x4
+#define DEBUG_CONSTANTS 0x8
+#define DEBUG_CONTEXT 0x10
+#define DEBUG_DRAW 0x20
+#define DEBUG_DYNAMIC 0x40
+#define DEBUG_FLUSH 0x80
+#define DEBUG_MAP 0x100
+#define DEBUG_PROGRAM 0x200
+#define DEBUG_REGIONS 0x400
+#define DEBUG_SAMPLER 0x800
+#define DEBUG_STATIC 0x1000
+#define DEBUG_SURFACE 0x2000
+#define DEBUG_WINSYS 0x4000
+
+#include "pipe/p_compiler.h"
+
+#if defined(DEBUG) && defined(FILE_DEBUG_FLAG)
+
+#include "pipe/p_winsys.h"
+
+static INLINE void
+I915_DBG(
+ struct i915_context *i915,
+ const char *fmt,
+ ... )
+{
+ if ((i915)->debug & FILE_DEBUG_FLAG) {
+ va_list args;
+
+ va_start( args, fmt );
+ debug_vprintf( fmt, args );
+ va_end( args );
+ }
+}
+
+#else
+
+static INLINE void
+I915_DBG(
+ struct i915_context *i915,
+ const char *fmt,
+ ... )
+{
+ (void) i915;
+ (void) fmt;
+}
+
+#endif
+
+
+void i915_dump_batchbuffer( struct i915_context *i915 );
+
+
+
+void i915_debug_init( struct i915_context *i915 );
+
+
+#endif
diff --git a/src/gallium/drivers/i915simple/i915_debug_fp.c b/src/gallium/drivers/i915simple/i915_debug_fp.c
new file mode 100644
index 0000000000..48be3e1472
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_debug_fp.c
@@ -0,0 +1,363 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "i915_reg.h"
+#include "i915_debug.h"
+#include "pipe/p_winsys.h"
+#include "util/u_memory.h"
+
+
+static void
+PRINTF(
+ struct debug_stream *stream,
+ const char *fmt,
+ ... )
+{
+ va_list args;
+
+ va_start( args, fmt );
+ debug_vprintf( fmt, args );
+ va_end( args );
+}
+
+
+static const char *opcodes[0x20] = {
+ "NOP",
+ "ADD",
+ "MOV",
+ "MUL",
+ "MAD",
+ "DP2ADD",
+ "DP3",
+ "DP4",
+ "FRC",
+ "RCP",
+ "RSQ",
+ "EXP",
+ "LOG",
+ "CMP",
+ "MIN",
+ "MAX",
+ "FLR",
+ "MOD",
+ "TRC",
+ "SGE",
+ "SLT",
+ "TEXLD",
+ "TEXLDP",
+ "TEXLDB",
+ "TEXKILL",
+ "DCL",
+ "0x1a",
+ "0x1b",
+ "0x1c",
+ "0x1d",
+ "0x1e",
+ "0x1f",
+};
+
+
+static const int args[0x20] = {
+ 0, /* 0 nop */
+ 2, /* 1 add */
+ 1, /* 2 mov */
+ 2, /* 3 m ul */
+ 3, /* 4 mad */
+ 3, /* 5 dp2add */
+ 2, /* 6 dp3 */
+ 2, /* 7 dp4 */
+ 1, /* 8 frc */
+ 1, /* 9 rcp */
+ 1, /* a rsq */
+ 1, /* b exp */
+ 1, /* c log */
+ 3, /* d cmp */
+ 2, /* e min */
+ 2, /* f max */
+ 1, /* 10 flr */
+ 1, /* 11 mod */
+ 1, /* 12 trc */
+ 2, /* 13 sge */
+ 2, /* 14 slt */
+ 1,
+ 1,
+ 1,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+};
+
+
+static const char *regname[0x8] = {
+ "R",
+ "T",
+ "CONST",
+ "S",
+ "OC",
+ "OD",
+ "U",
+ "UNKNOWN",
+};
+
+static void
+print_reg_type_nr(struct debug_stream *stream, unsigned type, unsigned nr)
+{
+ switch (type) {
+ case REG_TYPE_T:
+ switch (nr) {
+ case T_DIFFUSE:
+ PRINTF(stream, "T_DIFFUSE");
+ return;
+ case T_SPECULAR:
+ PRINTF(stream, "T_SPECULAR");
+ return;
+ case T_FOG_W:
+ PRINTF(stream, "T_FOG_W");
+ return;
+ default:
+ PRINTF(stream, "T_TEX%d", nr);
+ return;
+ }
+ case REG_TYPE_OC:
+ if (nr == 0) {
+ PRINTF(stream, "oC");
+ return;
+ }
+ break;
+ case REG_TYPE_OD:
+ if (nr == 0) {
+ PRINTF(stream, "oD");
+ return;
+ }
+ break;
+ default:
+ break;
+ }
+
+ PRINTF(stream, "%s[%d]", regname[type], nr);
+}
+
+#define REG_SWIZZLE_MASK 0x7777
+#define REG_NEGATE_MASK 0x8888
+
+#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) | \
+ (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) | \
+ (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) | \
+ (SRC_W << A2_SRC2_CHANNEL_W_SHIFT))
+
+
+static void
+print_reg_neg_swizzle(struct debug_stream *stream, unsigned reg)
+{
+ int i;
+
+ if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW &&
+ (reg & REG_NEGATE_MASK) == 0)
+ return;
+
+ PRINTF(stream, ".");
+
+ for (i = 3; i >= 0; i--) {
+ if (reg & (1 << ((i * 4) + 3)))
+ PRINTF(stream, "-");
+
+ switch ((reg >> (i * 4)) & 0x7) {
+ case 0:
+ PRINTF(stream, "x");
+ break;
+ case 1:
+ PRINTF(stream, "y");
+ break;
+ case 2:
+ PRINTF(stream, "z");
+ break;
+ case 3:
+ PRINTF(stream, "w");
+ break;
+ case 4:
+ PRINTF(stream, "0");
+ break;
+ case 5:
+ PRINTF(stream, "1");
+ break;
+ default:
+ PRINTF(stream, "?");
+ break;
+ }
+ }
+}
+
+
+static void
+print_src_reg(struct debug_stream *stream, unsigned dword)
+{
+ unsigned nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK;
+ unsigned type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK;
+ print_reg_type_nr(stream, type, nr);
+ print_reg_neg_swizzle(stream, dword);
+}
+
+
+static void
+print_dest_reg(struct debug_stream *stream, unsigned dword)
+{
+ unsigned nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK;
+ unsigned type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK;
+ print_reg_type_nr(stream, type, nr);
+ if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL)
+ return;
+ PRINTF(stream, ".");
+ if (dword & A0_DEST_CHANNEL_X)
+ PRINTF(stream, "x");
+ if (dword & A0_DEST_CHANNEL_Y)
+ PRINTF(stream, "y");
+ if (dword & A0_DEST_CHANNEL_Z)
+ PRINTF(stream, "z");
+ if (dword & A0_DEST_CHANNEL_W)
+ PRINTF(stream, "w");
+}
+
+
+#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT))
+#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT))
+#define GET_SRC2_REG(r) (r)
+
+
+static void
+print_arith_op(struct debug_stream *stream,
+ unsigned opcode, const unsigned * program)
+{
+ if (opcode != A0_NOP) {
+ print_dest_reg(stream, program[0]);
+ if (program[0] & A0_DEST_SATURATE)
+ PRINTF(stream, " = SATURATE ");
+ else
+ PRINTF(stream, " = ");
+ }
+
+ PRINTF(stream, "%s ", opcodes[opcode]);
+
+ print_src_reg(stream, GET_SRC0_REG(program[0], program[1]));
+ if (args[opcode] == 1) {
+ PRINTF(stream, "\n");
+ return;
+ }
+
+ PRINTF(stream, ", ");
+ print_src_reg(stream, GET_SRC1_REG(program[1], program[2]));
+ if (args[opcode] == 2) {
+ PRINTF(stream, "\n");
+ return;
+ }
+
+ PRINTF(stream, ", ");
+ print_src_reg(stream, GET_SRC2_REG(program[2]));
+ PRINTF(stream, "\n");
+ return;
+}
+
+
+static void
+print_tex_op(struct debug_stream *stream,
+ unsigned opcode, const unsigned * program)
+{
+ print_dest_reg(stream, program[0] | A0_DEST_CHANNEL_ALL);
+ PRINTF(stream, " = ");
+
+ PRINTF(stream, "%s ", opcodes[opcode]);
+
+ PRINTF(stream, "S[%d],", program[0] & T0_SAMPLER_NR_MASK);
+
+ print_reg_type_nr(stream,
+ (program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) &
+ REG_TYPE_MASK,
+ (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK);
+ PRINTF(stream, "\n");
+}
+
+static void
+print_texkil_op(struct debug_stream *stream,
+ unsigned opcode, const unsigned * program)
+{
+ PRINTF(stream, "TEXKIL ");
+
+ print_reg_type_nr(stream,
+ (program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) &
+ REG_TYPE_MASK,
+ (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK);
+ PRINTF(stream, "\n");
+}
+
+static void
+print_dcl_op(struct debug_stream *stream,
+ unsigned opcode, const unsigned * program)
+{
+ PRINTF(stream, "%s ", opcodes[opcode]);
+ print_dest_reg(stream,
+ program[0] | A0_DEST_CHANNEL_ALL);
+ PRINTF(stream, "\n");
+}
+
+
+void
+i915_disassemble_program(struct debug_stream *stream,
+ const unsigned * program, unsigned sz)
+{
+ unsigned i;
+
+ PRINTF(stream, "\t\tBEGIN\n");
+
+ assert((program[0] & 0x1ff) + 2 == sz);
+
+ program++;
+ for (i = 1; i < sz; i += 3, program += 3) {
+ unsigned opcode = program[0] & (0x1f << 24);
+
+ PRINTF(stream, "\t\t");
+
+ if ((int) opcode >= A0_NOP && opcode <= A0_SLT)
+ print_arith_op(stream, opcode >> 24, program);
+ else if (opcode >= T0_TEXLD && opcode < T0_TEXKILL)
+ print_tex_op(stream, opcode >> 24, program);
+ else if (opcode == T0_TEXKILL)
+ print_texkil_op(stream, opcode >> 24, program);
+ else if (opcode == D0_DCL)
+ print_dcl_op(stream, opcode >> 24, program);
+ else
+ PRINTF(stream, "Unknown opcode 0x%x\n", opcode);
+ }
+
+ PRINTF(stream, "\t\tEND\n\n");
+}
+
+
diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c
new file mode 100644
index 0000000000..472e0ab774
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_flush.c
@@ -0,0 +1,78 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_defines.h"
+#include "draw/draw_context.h"
+#include "i915_context.h"
+#include "i915_reg.h"
+#include "i915_batch.h"
+
+
+static void i915_flush( struct pipe_context *pipe,
+ unsigned flags,
+ struct pipe_fence_handle **fence )
+{
+ struct i915_context *i915 = i915_context(pipe);
+
+ draw_flush(i915->draw);
+
+ /* Do we need to emit an MI_FLUSH command to flush the hardware
+ * caches?
+ */
+ if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) {
+ unsigned flush = MI_FLUSH;
+
+ if (!(flags & PIPE_FLUSH_RENDER_CACHE))
+ flush |= INHIBIT_FLUSH_RENDER_CACHE;
+
+ if (flags & PIPE_FLUSH_TEXTURE_CACHE)
+ flush |= FLUSH_MAP_CACHE;
+
+ if (!BEGIN_BATCH(1, 0)) {
+ FLUSH_BATCH(NULL);
+ assert(BEGIN_BATCH(1, 0));
+ }
+ OUT_BATCH( flush );
+ }
+
+ /* If there are no flags, just flush pending commands to hardware:
+ */
+ FLUSH_BATCH(fence);
+ i915->vbo_flushed = 1;
+}
+
+
+
+void i915_init_flush_functions( struct i915_context *i915 )
+{
+ i915->pipe.flush = i915_flush;
+}
diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h
new file mode 100644
index 0000000000..2f0f99d046
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_fpc.h
@@ -0,0 +1,207 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef I915_FPC_H
+#define I915_FPC_H
+
+
+#include "i915_context.h"
+#include "i915_reg.h"
+
+
+
+#define I915_PROGRAM_SIZE 192
+
+
+
+/**
+ * Program translation state
+ */
+struct i915_fp_compile {
+ struct i915_fragment_shader *shader; /* the shader we're compiling */
+
+ boolean used_constants[I915_MAX_CONSTANT];
+
+ /** maps TGSI immediate index to constant slot */
+ uint num_immediates;
+ uint immediates_map[I915_MAX_CONSTANT];
+ float immediates[I915_MAX_CONSTANT][4];
+
+ boolean first_instruction;
+
+ uint declarations[I915_PROGRAM_SIZE];
+ uint program[I915_PROGRAM_SIZE];
+
+ uint *csr; /**< Cursor, points into program. */
+
+ uint *decl; /**< Cursor, points into declarations. */
+
+ uint decl_s; /**< flags for which s regs need to be decl'd */
+ uint decl_t; /**< flags for which t regs need to be decl'd */
+
+ uint temp_flag; /**< Tracks temporary regs which are in use */
+ uint utemp_flag; /**< Tracks TYPE_U temporary regs which are in use */
+
+ uint nr_tex_indirect;
+ uint nr_tex_insn;
+ uint nr_alu_insn;
+ uint nr_decl_insn;
+
+ boolean error; /**< Set if i915_program_error() is called */
+ uint wpos_tex;
+ uint NumNativeInstructions;
+ uint NumNativeAluInstructions;
+ uint NumNativeTexInstructions;
+ uint NumNativeTexIndirections;
+};
+
+
+/* Having zero and one in here makes the definition of swizzle a lot
+ * easier.
+ */
+#define UREG_TYPE_SHIFT 29
+#define UREG_NR_SHIFT 24
+#define UREG_CHANNEL_X_NEGATE_SHIFT 23
+#define UREG_CHANNEL_X_SHIFT 20
+#define UREG_CHANNEL_Y_NEGATE_SHIFT 19
+#define UREG_CHANNEL_Y_SHIFT 16
+#define UREG_CHANNEL_Z_NEGATE_SHIFT 15
+#define UREG_CHANNEL_Z_SHIFT 12
+#define UREG_CHANNEL_W_NEGATE_SHIFT 11
+#define UREG_CHANNEL_W_SHIFT 8
+#define UREG_CHANNEL_ZERO_NEGATE_MBZ 5
+#define UREG_CHANNEL_ZERO_SHIFT 4
+#define UREG_CHANNEL_ONE_NEGATE_MBZ 1
+#define UREG_CHANNEL_ONE_SHIFT 0
+
+#define UREG_BAD 0xffffffff /* not a valid ureg */
+
+#define X SRC_X
+#define Y SRC_Y
+#define Z SRC_Z
+#define W SRC_W
+#define ZERO SRC_ZERO
+#define ONE SRC_ONE
+
+/* Construct a ureg:
+ */
+#define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) | \
+ ((nr) << UREG_NR_SHIFT) | \
+ (X << UREG_CHANNEL_X_SHIFT) | \
+ (Y << UREG_CHANNEL_Y_SHIFT) | \
+ (Z << UREG_CHANNEL_Z_SHIFT) | \
+ (W << UREG_CHANNEL_W_SHIFT) | \
+ (ZERO << UREG_CHANNEL_ZERO_SHIFT) | \
+ (ONE << UREG_CHANNEL_ONE_SHIFT))
+
+#define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & (0xf<<20))
+#define CHANNEL_SRC( src, channel ) (src>>(channel*4))
+
+#define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)&REG_TYPE_MASK)
+#define GET_UREG_NR(reg) (((reg)>>UREG_NR_SHIFT)&REG_NR_MASK)
+
+
+
+#define UREG_XYZW_CHANNEL_MASK 0x00ffff00
+
+/* One neat thing about the UREG representation:
+ */
+static INLINE int
+swizzle(int reg, uint x, uint y, uint z, uint w)
+{
+ assert(x <= SRC_ONE);
+ assert(y <= SRC_ONE);
+ assert(z <= SRC_ONE);
+ assert(w <= SRC_ONE);
+ return ((reg & ~UREG_XYZW_CHANNEL_MASK) |
+ CHANNEL_SRC(GET_CHANNEL_SRC(reg, x), 0) |
+ CHANNEL_SRC(GET_CHANNEL_SRC(reg, y), 1) |
+ CHANNEL_SRC(GET_CHANNEL_SRC(reg, z), 2) |
+ CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3));
+}
+
+
+
+/***********************************************************************
+ * Public interface for the compiler
+ */
+extern void
+i915_translate_fragment_program( struct i915_context *i915,
+ struct i915_fragment_shader *fs);
+
+
+
+extern uint i915_get_temp(struct i915_fp_compile *p);
+extern uint i915_get_utemp(struct i915_fp_compile *p);
+extern void i915_release_utemps(struct i915_fp_compile *p);
+
+
+extern uint i915_emit_texld(struct i915_fp_compile *p,
+ uint dest,
+ uint destmask,
+ uint sampler, uint coord, uint op);
+
+extern uint i915_emit_arith(struct i915_fp_compile *p,
+ uint op,
+ uint dest,
+ uint mask,
+ uint saturate,
+ uint src0, uint src1, uint src2);
+
+extern uint i915_emit_decl(struct i915_fp_compile *p,
+ uint type, uint nr, uint d0_flags);
+
+
+extern uint i915_emit_const1f(struct i915_fp_compile *p, float c0);
+
+extern uint i915_emit_const2f(struct i915_fp_compile *p,
+ float c0, float c1);
+
+extern uint i915_emit_const4fv(struct i915_fp_compile *p,
+ const float * c);
+
+extern uint i915_emit_const4f(struct i915_fp_compile *p,
+ float c0, float c1,
+ float c2, float c3);
+
+
+/*======================================================================
+ * i915_fpc_debug.c
+ */
+extern void i915_disassemble_program(const uint * program, uint sz);
+
+
+/*======================================================================
+ * i915_fpc_translate.c
+ */
+
+extern void
+i915_program_error(struct i915_fp_compile *p, const char *msg, ...);
+
+
+#endif
diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915simple/i915_fpc_emit.c
new file mode 100644
index 0000000000..b054ce41d3
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_fpc_emit.c
@@ -0,0 +1,375 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_fpc.h"
+#include "util/u_math.h"
+
+
+#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT)
+#define A1_SRC0( reg ) (((reg)&UREG_MASK)<<UREG_A1_SRC0_SHIFT_RIGHT)
+#define A1_SRC1( reg ) (((reg)&UREG_MASK)>>UREG_A1_SRC1_SHIFT_LEFT)
+#define A2_SRC1( reg ) (((reg)&UREG_MASK)<<UREG_A2_SRC1_SHIFT_RIGHT)
+#define A2_SRC2( reg ) (((reg)&UREG_MASK)>>UREG_A2_SRC2_SHIFT_LEFT)
+
+/* These are special, and don't have swizzle/negate bits.
+ */
+#define T0_SAMPLER( reg ) (GET_UREG_NR(reg)<<T0_SAMPLER_NR_SHIFT)
+#define T1_ADDRESS_REG( reg ) ((GET_UREG_NR(reg)<<T1_ADDRESS_REG_NR_SHIFT) | \
+ (GET_UREG_TYPE(reg)<<T1_ADDRESS_REG_TYPE_SHIFT))
+
+
+/* Macros for translating UREG's into the various register fields used
+ * by the I915 programmable unit.
+ */
+#define UREG_A0_DEST_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_DEST_TYPE_SHIFT)
+#define UREG_A0_SRC0_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_SRC0_TYPE_SHIFT)
+#define UREG_A1_SRC0_SHIFT_RIGHT (A1_SRC0_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT)
+#define UREG_A1_SRC1_SHIFT_LEFT (UREG_TYPE_SHIFT - A1_SRC1_TYPE_SHIFT)
+#define UREG_A2_SRC1_SHIFT_RIGHT (A2_SRC1_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT)
+#define UREG_A2_SRC2_SHIFT_LEFT (UREG_TYPE_SHIFT - A2_SRC2_TYPE_SHIFT)
+
+#define UREG_MASK 0xffffff00
+#define UREG_TYPE_NR_MASK ((REG_TYPE_MASK << UREG_TYPE_SHIFT) | \
+ (REG_NR_MASK << UREG_NR_SHIFT))
+
+
+uint
+i915_get_temp(struct i915_fp_compile *p)
+{
+ int bit = ffs(~p->temp_flag);
+ if (!bit) {
+ i915_program_error(p, "i915_get_temp: out of temporaries\n");
+ return 0;
+ }
+
+ p->temp_flag |= 1 << (bit - 1);
+ return bit - 1;
+}
+
+
+static void
+i915_release_temp(struct i915_fp_compile *p, int reg)
+{
+ p->temp_flag &= ~(1 << reg);
+}
+
+
+/**
+ * Get unpreserved temporary, a temp whose value is not preserved between
+ * PS program phases.
+ */
+uint
+i915_get_utemp(struct i915_fp_compile * p)
+{
+ int bit = ffs(~p->utemp_flag);
+ if (!bit) {
+ i915_program_error(p, "i915_get_utemp: out of temporaries\n");
+ return 0;
+ }
+
+ p->utemp_flag |= 1 << (bit - 1);
+ return UREG(REG_TYPE_U, (bit - 1));
+}
+
+void
+i915_release_utemps(struct i915_fp_compile *p)
+{
+ p->utemp_flag = ~0x7;
+}
+
+
+uint
+i915_emit_decl(struct i915_fp_compile *p,
+ uint type, uint nr, uint d0_flags)
+{
+ uint reg = UREG(type, nr);
+
+ if (type == REG_TYPE_T) {
+ if (p->decl_t & (1 << nr))
+ return reg;
+
+ p->decl_t |= (1 << nr);
+ }
+ else if (type == REG_TYPE_S) {
+ if (p->decl_s & (1 << nr))
+ return reg;
+
+ p->decl_s |= (1 << nr);
+ }
+ else
+ return reg;
+
+ *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags);
+ *(p->decl++) = D1_MBZ;
+ *(p->decl++) = D2_MBZ;
+
+ p->nr_decl_insn++;
+ return reg;
+}
+
+uint
+i915_emit_arith(struct i915_fp_compile * p,
+ uint op,
+ uint dest,
+ uint mask,
+ uint saturate, uint src0, uint src1, uint src2)
+{
+ uint c[3];
+ uint nr_const = 0;
+
+ assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
+ dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest));
+ assert(dest);
+
+ if (GET_UREG_TYPE(src0) == REG_TYPE_CONST)
+ c[nr_const++] = 0;
+ if (GET_UREG_TYPE(src1) == REG_TYPE_CONST)
+ c[nr_const++] = 1;
+ if (GET_UREG_TYPE(src2) == REG_TYPE_CONST)
+ c[nr_const++] = 2;
+
+ /* Recursively call this function to MOV additional const values
+ * into temporary registers. Use utemp registers for this -
+ * currently shouldn't be possible to run out, but keep an eye on
+ * this.
+ */
+ if (nr_const > 1) {
+ uint s[3], first, i, old_utemp_flag;
+
+ s[0] = src0;
+ s[1] = src1;
+ s[2] = src2;
+ old_utemp_flag = p->utemp_flag;
+
+ first = GET_UREG_NR(s[c[0]]);
+ for (i = 1; i < nr_const; i++) {
+ if (GET_UREG_NR(s[c[i]]) != first) {
+ uint tmp = i915_get_utemp(p);
+
+ i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
+ s[c[i]], 0, 0);
+ s[c[i]] = tmp;
+ }
+ }
+
+ src0 = s[0];
+ src1 = s[1];
+ src2 = s[2];
+ p->utemp_flag = old_utemp_flag; /* restore */
+ }
+
+ *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0));
+ *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1));
+ *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2));
+
+ p->nr_alu_insn++;
+ return dest;
+}
+
+
+/**
+ * Emit a texture load or texkill instruction.
+ * \param dest the dest i915 register
+ * \param destmask the dest register writemask
+ * \param sampler the i915 sampler register
+ * \param coord the i915 source texcoord operand
+ * \param opcode the instruction opcode
+ */
+uint i915_emit_texld( struct i915_fp_compile *p,
+ uint dest,
+ uint destmask,
+ uint sampler,
+ uint coord,
+ uint opcode )
+{
+ const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord));
+ int temp = -1;
+
+ if (coord != k) {
+ /* texcoord is swizzled or negated. Need to allocate a new temporary
+ * register (a utemp / unpreserved temp) won't do.
+ */
+ uint tempReg;
+
+ temp = i915_get_temp(p); /* get temp reg index */
+ tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */
+
+ i915_emit_arith( p, A0_MOV,
+ tempReg, A0_DEST_CHANNEL_ALL, /* dest reg, writemask */
+ 0, /* saturate */
+ coord, 0, 0 ); /* src0, src1, src2 */
+
+ /* new src texcoord is tempReg */
+ coord = tempReg;
+ }
+
+ /* Don't worry about saturate as we only support
+ */
+ if (destmask != A0_DEST_CHANNEL_ALL) {
+ /* if not writing to XYZW... */
+ uint tmp = i915_get_utemp(p);
+ i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode );
+ i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 );
+ /* XXX release utemp here? */
+ }
+ else {
+ assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
+ assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)));
+
+ /* is the sampler coord a texcoord input reg? */
+ if (GET_UREG_TYPE(coord) != REG_TYPE_T) {
+ p->nr_tex_indirect++;
+ }
+
+ *(p->csr++) = (opcode |
+ T0_DEST( dest ) |
+ T0_SAMPLER( sampler ));
+
+ *(p->csr++) = T1_ADDRESS_REG( coord );
+ *(p->csr++) = T2_MBZ;
+
+ p->nr_tex_insn++;
+ }
+
+ if (temp >= 0)
+ i915_release_temp(p, temp);
+
+ return dest;
+}
+
+
+uint
+i915_emit_const1f(struct i915_fp_compile * p, float c0)
+{
+ struct i915_fragment_shader *ifs = p->shader;
+ unsigned reg, idx;
+
+ if (c0 == 0.0)
+ return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
+ if (c0 == 1.0)
+ return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);
+
+ for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+ if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
+ continue;
+ for (idx = 0; idx < 4; idx++) {
+ if (!(ifs->constant_flags[reg] & (1 << idx)) ||
+ ifs->constants[reg][idx] == c0) {
+ ifs->constants[reg][idx] = c0;
+ ifs->constant_flags[reg] |= 1 << idx;
+ if (reg + 1 > ifs->num_constants)
+ ifs->num_constants = reg + 1;
+ return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
+ }
+ }
+ }
+
+ i915_program_error(p, "i915_emit_const1f: out of constants\n");
+ return 0;
+}
+
+uint
+i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1)
+{
+ struct i915_fragment_shader *ifs = p->shader;
+ unsigned reg, idx;
+
+ if (c0 == 0.0)
+ return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W);
+ if (c0 == 1.0)
+ return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W);
+
+ if (c1 == 0.0)
+ return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W);
+ if (c1 == 1.0)
+ return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W);
+
+ for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+ if (ifs->constant_flags[reg] == 0xf ||
+ ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
+ continue;
+ for (idx = 0; idx < 3; idx++) {
+ if (!(ifs->constant_flags[reg] & (3 << idx))) {
+ ifs->constants[reg][idx + 0] = c0;
+ ifs->constants[reg][idx + 1] = c1;
+ ifs->constant_flags[reg] |= 3 << idx;
+ if (reg + 1 > ifs->num_constants)
+ ifs->num_constants = reg + 1;
+ return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE);
+ }
+ }
+ }
+
+ i915_program_error(p, "i915_emit_const2f: out of constants\n");
+ return 0;
+}
+
+
+
+uint
+i915_emit_const4f(struct i915_fp_compile * p,
+ float c0, float c1, float c2, float c3)
+{
+ struct i915_fragment_shader *ifs = p->shader;
+ unsigned reg;
+
+ for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+ if (ifs->constant_flags[reg] == 0xf &&
+ ifs->constants[reg][0] == c0 &&
+ ifs->constants[reg][1] == c1 &&
+ ifs->constants[reg][2] == c2 &&
+ ifs->constants[reg][3] == c3) {
+ return UREG(REG_TYPE_CONST, reg);
+ }
+ else if (ifs->constant_flags[reg] == 0) {
+
+ ifs->constants[reg][0] = c0;
+ ifs->constants[reg][1] = c1;
+ ifs->constants[reg][2] = c2;
+ ifs->constants[reg][3] = c3;
+ ifs->constant_flags[reg] = 0xf;
+ if (reg + 1 > ifs->num_constants)
+ ifs->num_constants = reg + 1;
+ return UREG(REG_TYPE_CONST, reg);
+ }
+ }
+
+ i915_program_error(p, "i915_emit_const4f: out of constants\n");
+ return 0;
+}
+
+
+uint
+i915_emit_const4fv(struct i915_fp_compile * p, const float * c)
+{
+ return i915_emit_const4f(p, c[0], c[1], c[2], c[3]);
+}
diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c
new file mode 100644
index 0000000000..43d62c5176
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c
@@ -0,0 +1,1190 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <stdarg.h>
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_fpc.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "draw/draw_vertex.h"
+
+
+/**
+ * Simple pass-through fragment shader to use when we don't have
+ * a real shader (or it fails to compile for some reason).
+ */
+static unsigned passthrough[] =
+{
+ _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1),
+
+ /* declare input color:
+ */
+ (D0_DCL |
+ (REG_TYPE_T << D0_TYPE_SHIFT) |
+ (T_DIFFUSE << D0_NR_SHIFT) |
+ D0_CHANNEL_ALL),
+ 0,
+ 0,
+
+ /* move to output color:
+ */
+ (A0_MOV |
+ (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) |
+ A0_DEST_CHANNEL_ALL |
+ (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) |
+ (T_DIFFUSE << A0_SRC0_NR_SHIFT)),
+ 0x01230000, /* .xyzw */
+ 0
+};
+
+
+/* 1, -1/3!, 1/5!, -1/7! */
+static const float sin_constants[4] = { 1.0,
+ -1.0f / (3 * 2 * 1),
+ 1.0f / (5 * 4 * 3 * 2 * 1),
+ -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1)
+};
+
+/* 1, -1/2!, 1/4!, -1/6! */
+static const float cos_constants[4] = { 1.0,
+ -1.0f / (2 * 1),
+ 1.0f / (4 * 3 * 2 * 1),
+ -1.0f / (6 * 5 * 4 * 3 * 2 * 1)
+};
+
+
+
+/**
+ * component-wise negation of ureg
+ */
+static INLINE int
+negate(int reg, int x, int y, int z, int w)
+{
+ /* Another neat thing about the UREG representation */
+ return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
+ ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
+ ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
+ ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
+}
+
+
+/**
+ * In the event of a translation failure, we'll generate a simple color
+ * pass-through program.
+ */
+static void
+i915_use_passthrough_shader(struct i915_fragment_shader *fs)
+{
+ fs->program = (uint *) MALLOC(sizeof(passthrough));
+ if (fs->program) {
+ memcpy(fs->program, passthrough, sizeof(passthrough));
+ fs->program_len = Elements(passthrough);
+ }
+ fs->num_constants = 0;
+}
+
+
+void
+i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
+{
+ va_list args;
+ char buffer[1024];
+
+ debug_printf("i915_program_error: ");
+ va_start( args, msg );
+ util_vsnprintf( buffer, sizeof(buffer), msg, args );
+ va_end( args );
+ debug_printf(buffer);
+ debug_printf("\n");
+
+ p->error = 1;
+}
+
+
+
+/**
+ * Construct a ureg for the given source register. Will emit
+ * constants, apply swizzling and negation as needed.
+ */
+static uint
+src_vector(struct i915_fp_compile *p,
+ const struct tgsi_full_src_register *source)
+{
+ uint index = source->SrcRegister.Index;
+ uint src = 0, sem_name, sem_ind;
+
+ switch (source->SrcRegister.File) {
+ case TGSI_FILE_TEMPORARY:
+ if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) {
+ i915_program_error(p, "Exceeded max temporary reg");
+ return 0;
+ }
+ src = UREG(REG_TYPE_R, index);
+ break;
+ case TGSI_FILE_INPUT:
+ /* XXX: Packing COL1, FOGC into a single attribute works for
+ * texenv programs, but will fail for real fragment programs
+ * that use these attributes and expect them to be a full 4
+ * components wide. Could use a texcoord to pass these
+ * attributes if necessary, but that won't work in the general
+ * case.
+ *
+ * We also use a texture coordinate to pass wpos when possible.
+ */
+
+ sem_name = p->shader->info.input_semantic_name[index];
+ sem_ind = p->shader->info.input_semantic_index[index];
+
+ switch (sem_name) {
+ case TGSI_SEMANTIC_POSITION:
+ debug_printf("SKIP SEM POS\n");
+ /*
+ assert(p->wpos_tex != -1);
+ src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL);
+ */
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (sem_ind == 0) {
+ src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
+ }
+ else {
+ /* secondary color */
+ assert(sem_ind == 1);
+ src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
+ src = swizzle(src, X, Y, Z, ONE);
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
+ src = swizzle(src, W, W, W, W);
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ /* usually a texcoord */
+ src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind, D0_CHANNEL_ALL);
+ break;
+ default:
+ i915_program_error(p, "Bad source->Index");
+ return 0;
+ }
+ break;
+
+ case TGSI_FILE_IMMEDIATE:
+ assert(index < p->num_immediates);
+ index = p->immediates_map[index];
+ /* fall-through */
+ case TGSI_FILE_CONSTANT:
+ src = UREG(REG_TYPE_CONST, index);
+ break;
+
+ default:
+ i915_program_error(p, "Bad source->File");
+ return 0;
+ }
+
+ if (source->SrcRegister.Extended) {
+ src = swizzle(src,
+ source->SrcRegisterExtSwz.ExtSwizzleX,
+ source->SrcRegisterExtSwz.ExtSwizzleY,
+ source->SrcRegisterExtSwz.ExtSwizzleZ,
+ source->SrcRegisterExtSwz.ExtSwizzleW);
+ }
+ else {
+ src = swizzle(src,
+ source->SrcRegister.SwizzleX,
+ source->SrcRegister.SwizzleY,
+ source->SrcRegister.SwizzleZ,
+ source->SrcRegister.SwizzleW);
+ }
+
+
+ /* There's both negate-all-components and per-component negation.
+ * Try to handle both here.
+ */
+ {
+ int nx = source->SrcRegisterExtSwz.NegateX;
+ int ny = source->SrcRegisterExtSwz.NegateY;
+ int nz = source->SrcRegisterExtSwz.NegateZ;
+ int nw = source->SrcRegisterExtSwz.NegateW;
+ if (source->SrcRegister.Negate) {
+ nx = !nx;
+ ny = !ny;
+ nz = !nz;
+ nw = !nw;
+ }
+ src = negate(src, nx, ny, nz, nw);
+ }
+
+ /* no abs() or post-abs negation */
+#if 0
+ /* XXX assertions disabled to allow arbfplight.c to run */
+ /* XXX enable these assertions, or fix things */
+ assert(!source->SrcRegisterExtMod.Absolute);
+ assert(!source->SrcRegisterExtMod.Negate);
+#endif
+ return src;
+}
+
+
+/**
+ * Construct a ureg for a destination register.
+ */
+static uint
+get_result_vector(struct i915_fp_compile *p,
+ const struct tgsi_full_dst_register *dest)
+{
+ switch (dest->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ {
+ uint sem_name = p->shader->info.output_semantic_name[dest->DstRegister.Index];
+ switch (sem_name) {
+ case TGSI_SEMANTIC_POSITION:
+ return UREG(REG_TYPE_OD, 0);
+ case TGSI_SEMANTIC_COLOR:
+ return UREG(REG_TYPE_OC, 0);
+ default:
+ i915_program_error(p, "Bad inst->DstReg.Index/semantics");
+ return 0;
+ }
+ }
+ case TGSI_FILE_TEMPORARY:
+ return UREG(REG_TYPE_R, dest->DstRegister.Index);
+ default:
+ i915_program_error(p, "Bad inst->DstReg.File");
+ return 0;
+ }
+}
+
+
+/**
+ * Compute flags for saturation and writemask.
+ */
+static uint
+get_result_flags(const struct tgsi_full_instruction *inst)
+{
+ const uint writeMask
+ = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ uint flags = 0x0;
+
+ if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE)
+ flags |= A0_DEST_SATURATE;
+
+ if (writeMask & TGSI_WRITEMASK_X)
+ flags |= A0_DEST_CHANNEL_X;
+ if (writeMask & TGSI_WRITEMASK_Y)
+ flags |= A0_DEST_CHANNEL_Y;
+ if (writeMask & TGSI_WRITEMASK_Z)
+ flags |= A0_DEST_CHANNEL_Z;
+ if (writeMask & TGSI_WRITEMASK_W)
+ flags |= A0_DEST_CHANNEL_W;
+
+ return flags;
+}
+
+
+/**
+ * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
+ */
+static uint
+translate_tex_src_target(struct i915_fp_compile *p, uint tex)
+{
+ switch (tex) {
+ case TGSI_TEXTURE_1D:
+ return D0_SAMPLE_TYPE_2D;
+ case TGSI_TEXTURE_2D:
+ return D0_SAMPLE_TYPE_2D;
+ case TGSI_TEXTURE_RECT:
+ return D0_SAMPLE_TYPE_2D;
+ case TGSI_TEXTURE_3D:
+ return D0_SAMPLE_TYPE_VOLUME;
+ case TGSI_TEXTURE_CUBE:
+ return D0_SAMPLE_TYPE_CUBE;
+ default:
+ i915_program_error(p, "TexSrc type");
+ return 0;
+ }
+}
+
+
+/**
+ * Generate texel lookup instruction.
+ */
+static void
+emit_tex(struct i915_fp_compile *p,
+ const struct tgsi_full_instruction *inst,
+ uint opcode)
+{
+ uint texture = inst->InstructionExtTexture.Texture;
+ uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+ uint tex = translate_tex_src_target( p, texture );
+ uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
+ uint coord = src_vector( p, &inst->FullSrcRegisters[0]);
+
+ i915_emit_texld( p,
+ get_result_vector( p, &inst->FullDstRegisters[0] ),
+ get_result_flags( inst ),
+ sampler,
+ coord,
+ opcode);
+}
+
+
+/**
+ * Generate a simple arithmetic instruction
+ * \param opcode the i915 opcode
+ * \param numArgs the number of input/src arguments
+ */
+static void
+emit_simple_arith(struct i915_fp_compile *p,
+ const struct tgsi_full_instruction *inst,
+ uint opcode, uint numArgs)
+{
+ uint arg1, arg2, arg3;
+
+ assert(numArgs <= 3);
+
+ arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] );
+ arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] );
+ arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] );
+
+ i915_emit_arith( p,
+ opcode,
+ get_result_vector( p, &inst->FullDstRegisters[0]),
+ get_result_flags( inst ), 0,
+ arg1,
+ arg2,
+ arg3 );
+}
+
+
+/** As above, but swap the first two src regs */
+static void
+emit_simple_arith_swap2(struct i915_fp_compile *p,
+ const struct tgsi_full_instruction *inst,
+ uint opcode, uint numArgs)
+{
+ struct tgsi_full_instruction inst2;
+
+ assert(numArgs == 2);
+
+ /* transpose first two registers */
+ inst2 = *inst;
+ inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1];
+ inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0];
+
+ emit_simple_arith(p, &inst2, opcode, numArgs);
+}
+
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+/*
+ * Translate TGSI instruction to i915 instruction.
+ *
+ * Possible concerns:
+ *
+ * SIN, COS -- could use another taylor step?
+ * LIT -- results seem a little different to sw mesa
+ * LOG -- different to mesa on negative numbers, but this is conformant.
+ */
+static void
+i915_translate_instruction(struct i915_fp_compile *p,
+ const struct tgsi_full_instruction *inst)
+{
+ uint writemask;
+ uint src0, src1, src2, flags;
+ uint tmp = 0;
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ i915_emit_arith(p,
+ A0_MAX,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ src0, negate(src0, 1, 1, 1, 1), 0);
+ break;
+
+ case TGSI_OPCODE_ADD:
+ emit_simple_arith(p, inst, A0_ADD, 2);
+ break;
+
+ case TGSI_OPCODE_CMP:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+ src2 = src_vector(p, &inst->FullSrcRegisters[2]);
+ i915_emit_arith(p, A0_CMP,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst),
+ 0, src0, src2, src1); /* NOTE: order of src2, src1 */
+ break;
+
+ case TGSI_OPCODE_COS:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ tmp = i915_get_utemp(p);
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_X, 0,
+ src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0);
+
+ i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
+
+ /* By choosing different taylor constants, could get rid of this mul:
+ */
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_X, 0,
+ tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0);
+
+ /*
+ * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
+ * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
+ * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
+ * result = DP4 t0, cos_constants
+ */
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_XY, 0,
+ swizzle(tmp, X, X, ONE, ONE),
+ swizzle(tmp, X, ONE, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_XYZ, 0,
+ swizzle(tmp, X, Y, X, ONE),
+ swizzle(tmp, X, X, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_XYZ, 0,
+ swizzle(tmp, X, X, Z, ONE),
+ swizzle(tmp, Z, ONE, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_DP4,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(tmp, ONE, Z, Y, X),
+ i915_emit_const4fv(p, cos_constants), 0);
+ break;
+
+ case TGSI_OPCODE_DP3:
+ emit_simple_arith(p, inst, A0_DP3, 2);
+ break;
+
+ case TGSI_OPCODE_DP4:
+ emit_simple_arith(p, inst, A0_DP4, 2);
+ break;
+
+ case TGSI_OPCODE_DPH:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+
+ i915_emit_arith(p,
+ A0_DP4,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(src0, X, Y, Z, ONE), src1, 0);
+ break;
+
+ case TGSI_OPCODE_DST:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+
+ /* result[0] = 1 * 1;
+ * result[1] = a[1] * b[1];
+ * result[2] = a[2] * 1;
+ * result[3] = 1 * b[3];
+ */
+ i915_emit_arith(p,
+ A0_MUL,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(src0, ONE, Y, Z, ONE),
+ swizzle(src1, ONE, Y, ONE, W), 0);
+ break;
+
+ case TGSI_OPCODE_END:
+ /* no-op */
+ break;
+
+ case TGSI_OPCODE_EX2:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+
+ i915_emit_arith(p,
+ A0_EXP,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(src0, X, X, X, X), 0, 0);
+ break;
+
+ case TGSI_OPCODE_FLR:
+ emit_simple_arith(p, inst, A0_FLR, 1);
+ break;
+
+ case TGSI_OPCODE_FRC:
+ emit_simple_arith(p, inst, A0_FRC, 1);
+ break;
+
+ case TGSI_OPCODE_KIL:
+ /* kill if src[0].x < 0 || src[0].y < 0 ... */
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ tmp = i915_get_utemp(p);
+
+ i915_emit_texld(p,
+ tmp, /* dest reg: a dummy reg */
+ A0_DEST_CHANNEL_ALL, /* dest writemask */
+ 0, /* sampler */
+ src0, /* coord*/
+ T0_TEXKILL); /* opcode */
+ break;
+
+ case TGSI_OPCODE_KILP:
+ assert(0); /* not tested yet */
+ break;
+
+ case TGSI_OPCODE_LG2:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+
+ i915_emit_arith(p,
+ A0_LOG,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(src0, X, X, X, X), 0, 0);
+ break;
+
+ case TGSI_OPCODE_LIT:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ tmp = i915_get_utemp(p);
+
+ /* tmp = max( a.xyzw, a.00zw )
+ * XXX: Clamp tmp.w to -128..128
+ * tmp.y = log(tmp.y)
+ * tmp.y = tmp.w * tmp.y
+ * tmp.y = exp(tmp.y)
+ * result = cmp (a.11-x1, a.1x01, a.1xy1 )
+ */
+ i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
+ src0, swizzle(src0, ZERO, ZERO, Z, W), 0);
+
+ i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
+ swizzle(tmp, Y, Y, Y, Y), 0, 0);
+
+ i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
+ swizzle(tmp, ZERO, Y, ZERO, ZERO),
+ swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
+
+ i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
+ swizzle(tmp, Y, Y, Y, Y), 0, 0);
+
+ i915_emit_arith(p, A0_CMP,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
+ swizzle(tmp, ONE, X, ZERO, ONE),
+ swizzle(tmp, ONE, X, Y, ONE));
+
+ break;
+
+ case TGSI_OPCODE_LRP:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+ src2 = src_vector(p, &inst->FullSrcRegisters[2]);
+ flags = get_result_flags(inst);
+ tmp = i915_get_utemp(p);
+
+ /* b*a + c*(1-a)
+ *
+ * b*a + c - ca
+ *
+ * tmp = b*a + c,
+ * result = (-c)*a + tmp
+ */
+ i915_emit_arith(p, A0_MAD, tmp,
+ flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
+
+ i915_emit_arith(p, A0_MAD,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
+ break;
+
+ case TGSI_OPCODE_MAD:
+ emit_simple_arith(p, inst, A0_MAD, 3);
+ break;
+
+ case TGSI_OPCODE_MAX:
+ emit_simple_arith(p, inst, A0_MAX, 2);
+ break;
+
+ case TGSI_OPCODE_MIN:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+ tmp = i915_get_utemp(p);
+ flags = get_result_flags(inst);
+
+ i915_emit_arith(p,
+ A0_MAX,
+ tmp, flags & A0_DEST_CHANNEL_ALL, 0,
+ negate(src0, 1, 1, 1, 1),
+ negate(src1, 1, 1, 1, 1), 0);
+
+ i915_emit_arith(p,
+ A0_MOV,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
+ break;
+
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_SWZ:
+ emit_simple_arith(p, inst, A0_MOV, 1);
+ break;
+
+ case TGSI_OPCODE_MUL:
+ emit_simple_arith(p, inst, A0_MUL, 2);
+ break;
+
+ case TGSI_OPCODE_POW:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+ tmp = i915_get_utemp(p);
+ flags = get_result_flags(inst);
+
+ /* XXX: masking on intermediate values, here and elsewhere.
+ */
+ i915_emit_arith(p,
+ A0_LOG,
+ tmp, A0_DEST_CHANNEL_X, 0,
+ swizzle(src0, X, X, X, X), 0, 0);
+
+ i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
+
+ i915_emit_arith(p,
+ A0_EXP,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
+ break;
+
+ case TGSI_OPCODE_RET:
+ /* XXX: no-op? */
+ break;
+
+ case TGSI_OPCODE_RCP:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+
+ i915_emit_arith(p,
+ A0_RCP,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(src0, X, X, X, X), 0, 0);
+ break;
+
+ case TGSI_OPCODE_RSQ:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+
+ i915_emit_arith(p,
+ A0_RSQ,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(src0, X, X, X, X), 0, 0);
+ break;
+
+ case TGSI_OPCODE_SCS:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ tmp = i915_get_utemp(p);
+
+ /*
+ * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
+ * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
+ * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
+ * scs.x = DP4 t1, sin_constants
+ * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
+ * scs.y = DP4 t1, cos_constants
+ */
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_XY, 0,
+ swizzle(src0, X, X, ONE, ONE),
+ swizzle(src0, X, ONE, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_ALL, 0,
+ swizzle(tmp, X, Y, X, Y),
+ swizzle(tmp, X, X, ONE, ONE), 0);
+
+ writemask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+
+ if (writemask & TGSI_WRITEMASK_Y) {
+ uint tmp1;
+
+ if (writemask & TGSI_WRITEMASK_X)
+ tmp1 = i915_get_utemp(p);
+ else
+ tmp1 = tmp;
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp1, A0_DEST_CHANNEL_ALL, 0,
+ swizzle(tmp, X, Y, Y, W),
+ swizzle(tmp, X, Z, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_DP4,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ A0_DEST_CHANNEL_Y, 0,
+ swizzle(tmp1, W, Z, Y, X),
+ i915_emit_const4fv(p, sin_constants), 0);
+ }
+
+ if (writemask & TGSI_WRITEMASK_X) {
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_XYZ, 0,
+ swizzle(tmp, X, X, Z, ONE),
+ swizzle(tmp, Z, ONE, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_DP4,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ A0_DEST_CHANNEL_X, 0,
+ swizzle(tmp, ONE, Z, Y, X),
+ i915_emit_const4fv(p, cos_constants), 0);
+ }
+ break;
+
+ case TGSI_OPCODE_SGE:
+ emit_simple_arith(p, inst, A0_SGE, 2);
+ break;
+
+ case TGSI_OPCODE_SLE:
+ /* like SGE, but swap reg0, reg1 */
+ emit_simple_arith_swap2(p, inst, A0_SGE, 2);
+ break;
+
+ case TGSI_OPCODE_SIN:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ tmp = i915_get_utemp(p);
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_X, 0,
+ src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0);
+
+ i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
+
+ /* By choosing different taylor constants, could get rid of this mul:
+ */
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_X, 0,
+ tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0);
+
+ /*
+ * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
+ * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
+ * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
+ * result = DP4 t1.wzyx, sin_constants
+ */
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_XY, 0,
+ swizzle(tmp, X, X, ONE, ONE),
+ swizzle(tmp, X, ONE, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_ALL, 0,
+ swizzle(tmp, X, Y, X, Y),
+ swizzle(tmp, X, X, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_ALL, 0,
+ swizzle(tmp, X, Y, Y, W),
+ swizzle(tmp, X, Z, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_DP4,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(tmp, W, Z, Y, X),
+ i915_emit_const4fv(p, sin_constants), 0);
+ break;
+
+ case TGSI_OPCODE_SLT:
+ emit_simple_arith(p, inst, A0_SLT, 2);
+ break;
+
+ case TGSI_OPCODE_SGT:
+ /* like SLT, but swap reg0, reg1 */
+ emit_simple_arith_swap2(p, inst, A0_SLT, 2);
+ break;
+
+ case TGSI_OPCODE_SUB:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+
+ i915_emit_arith(p,
+ A0_ADD,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ src0, negate(src1, 1, 1, 1, 1), 0);
+ break;
+
+ case TGSI_OPCODE_TEX:
+ emit_tex(p, inst, T0_TEXLD);
+ break;
+
+ case TGSI_OPCODE_TXB:
+ emit_tex(p, inst, T0_TEXLDB);
+ break;
+
+ case TGSI_OPCODE_TXP:
+ emit_tex(p, inst, T0_TEXLDP);
+ break;
+
+ case TGSI_OPCODE_XPD:
+ /* Cross product:
+ * result.x = src0.y * src1.z - src0.z * src1.y;
+ * result.y = src0.z * src1.x - src0.x * src1.z;
+ * result.z = src0.x * src1.y - src0.y * src1.x;
+ * result.w = undef;
+ */
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+ tmp = i915_get_utemp(p);
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_ALL, 0,
+ swizzle(src0, Z, X, Y, ONE),
+ swizzle(src1, Y, Z, X, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_MAD,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(src0, Y, Z, X, ONE),
+ swizzle(src1, Z, X, Y, ONE),
+ negate(tmp, 1, 1, 1, 0));
+ break;
+
+ default:
+ i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode);
+ p->error = 1;
+ return;
+ }
+
+ i915_release_utemps(p);
+}
+
+
+/**
+ * Translate TGSI fragment shader into i915 hardware instructions.
+ * \param p the translation state
+ * \param tokens the TGSI token array
+ */
+static void
+i915_translate_instructions(struct i915_fp_compile *p,
+ const struct tgsi_token *tokens)
+{
+ struct i915_fragment_shader *ifs = p->shader;
+ struct tgsi_parse_context parse;
+
+ tgsi_parse_init( &parse, tokens );
+
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ if (parse.FullToken.FullDeclaration.Declaration.File
+ == TGSI_FILE_CONSTANT) {
+ uint i;
+ for (i = parse.FullToken.FullDeclaration.DeclarationRange.First;
+ i <= parse.FullToken.FullDeclaration.DeclarationRange.Last;
+ i++) {
+ assert(ifs->constant_flags[i] == 0x0);
+ ifs->constant_flags[i] = I915_CONSTFLAG_USER;
+ ifs->num_constants = MAX2(ifs->num_constants, i + 1);
+ }
+ }
+ else if (parse.FullToken.FullDeclaration.Declaration.File
+ == TGSI_FILE_TEMPORARY) {
+ uint i;
+ for (i = parse.FullToken.FullDeclaration.DeclarationRange.First;
+ i <= parse.FullToken.FullDeclaration.DeclarationRange.Last;
+ i++) {
+ assert(i < I915_MAX_TEMPORARY);
+ /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
+ p->temp_flag |= (1 << i); /* mark temp as used */
+ }
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const struct tgsi_full_immediate *imm
+ = &parse.FullToken.FullImmediate;
+ const uint pos = p->num_immediates++;
+ uint j;
+ for (j = 0; j < imm->Immediate.Size; j++) {
+ p->immediates[pos][j] = imm->u.ImmediateFloat32[j].Float;
+ }
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if (p->first_instruction) {
+ /* resolve location of immediates */
+ uint i, j;
+ for (i = 0; i < p->num_immediates; i++) {
+ /* find constant slot for this immediate */
+ for (j = 0; j < I915_MAX_CONSTANT; j++) {
+ if (ifs->constant_flags[j] == 0x0) {
+ memcpy(ifs->constants[j],
+ p->immediates[i],
+ 4 * sizeof(float));
+ /*printf("immediate %d maps to const %d\n", i, j);*/
+ ifs->constant_flags[j] = 0xf; /* all four comps used */
+ p->immediates_map[i] = j;
+ ifs->num_constants = MAX2(ifs->num_constants, j + 1);
+ break;
+ }
+ }
+ }
+
+ p->first_instruction = FALSE;
+ }
+
+ i915_translate_instruction(p, &parse.FullToken.FullInstruction);
+ break;
+
+ default:
+ assert( 0 );
+ }
+
+ } /* while */
+
+ tgsi_parse_free (&parse);
+}
+
+
+static struct i915_fp_compile *
+i915_init_compile(struct i915_context *i915,
+ struct i915_fragment_shader *ifs)
+{
+ struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
+
+ p->shader = ifs;
+
+ /* Put new constants at end of const buffer, growing downward.
+ * The problem is we don't know how many user-defined constants might
+ * be specified with pipe->set_constant_buffer().
+ * Should pre-scan the user's program to determine the highest-numbered
+ * constant referenced.
+ */
+ ifs->num_constants = 0;
+ memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
+
+ p->first_instruction = TRUE;
+
+ p->nr_tex_indirect = 1; /* correct? */
+ p->nr_tex_insn = 0;
+ p->nr_alu_insn = 0;
+ p->nr_decl_insn = 0;
+
+ p->csr = p->program;
+ p->decl = p->declarations;
+ p->decl_s = 0;
+ p->decl_t = 0;
+ p->temp_flag = ~0x0 << I915_MAX_TEMPORARY;
+ p->utemp_flag = ~0x7;
+
+ p->wpos_tex = -1;
+
+ /* initialize the first program word */
+ *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
+
+ return p;
+}
+
+
+/* Copy compile results to the fragment program struct and destroy the
+ * compilation context.
+ */
+static void
+i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
+{
+ struct i915_fragment_shader *ifs = p->shader;
+ unsigned long program_size = (unsigned long) (p->csr - p->program);
+ unsigned long decl_size = (unsigned long) (p->decl - p->declarations);
+
+ if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT)
+ i915_program_error(p, "Exceeded max nr indirect texture lookups");
+
+ if (p->nr_tex_insn > I915_MAX_TEX_INSN)
+ i915_program_error(p, "Exceeded max TEX instructions");
+
+ if (p->nr_alu_insn > I915_MAX_ALU_INSN)
+ i915_program_error(p, "Exceeded max ALU instructions");
+
+ if (p->nr_decl_insn > I915_MAX_DECL_INSN)
+ i915_program_error(p, "Exceeded max DECL instructions");
+
+ if (p->error) {
+ p->NumNativeInstructions = 0;
+ p->NumNativeAluInstructions = 0;
+ p->NumNativeTexInstructions = 0;
+ p->NumNativeTexIndirections = 0;
+
+ i915_use_passthrough_shader(ifs);
+ }
+ else {
+ p->NumNativeInstructions
+ = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn;
+ p->NumNativeAluInstructions = p->nr_alu_insn;
+ p->NumNativeTexInstructions = p->nr_tex_insn;
+ p->NumNativeTexIndirections = p->nr_tex_indirect;
+
+ /* patch in the program length */
+ p->declarations[0] |= program_size + decl_size - 2;
+
+ /* Copy compilation results to fragment program struct:
+ */
+ assert(!ifs->program);
+ ifs->program
+ = (uint *) MALLOC((program_size + decl_size) * sizeof(uint));
+ if (ifs->program) {
+ ifs->program_len = program_size + decl_size;
+
+ memcpy(ifs->program,
+ p->declarations,
+ decl_size * sizeof(uint));
+
+ memcpy(ifs->program + decl_size,
+ p->program,
+ program_size * sizeof(uint));
+ }
+ }
+
+ /* Release the compilation struct:
+ */
+ FREE(p);
+}
+
+
+/**
+ * Find an unused texture coordinate slot to use for fragment WPOS.
+ * Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found).
+ */
+static void
+i915_find_wpos_space(struct i915_fp_compile *p)
+{
+#if 0
+ const uint inputs
+ = p->shader->inputs_read | (1 << TGSI_ATTRIB_POS); /*XXX hack*/
+ uint i;
+
+ p->wpos_tex = -1;
+
+ if (inputs & (1 << TGSI_ATTRIB_POS)) {
+ for (i = 0; i < I915_TEX_UNITS; i++) {
+ if ((inputs & (1 << (TGSI_ATTRIB_TEX0 + i))) == 0) {
+ p->wpos_tex = i;
+ return;
+ }
+ }
+
+ i915_program_error(p, "No free texcoord for wpos value");
+ }
+#else
+ if (p->shader->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
+ /* frag shader using the fragment position input */
+#if 0
+ assert(0);
+#endif
+ }
+#endif
+}
+
+
+
+
+/**
+ * Rather than trying to intercept and jiggle depth writes during
+ * emit, just move the value into its correct position at the end of
+ * the program:
+ */
+static void
+i915_fixup_depth_write(struct i915_fp_compile *p)
+{
+ /* XXX assuming pos/depth is always in output[0] */
+ if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
+ const uint depth = UREG(REG_TYPE_OD, 0);
+
+ i915_emit_arith(p,
+ A0_MOV, /* opcode */
+ depth, /* dest reg */
+ A0_DEST_CHANNEL_W, /* write mask */
+ 0, /* saturate? */
+ swizzle(depth, X, Y, Z, Z), /* src0 */
+ 0, 0 /* src1, src2 */);
+ }
+}
+
+
+void
+i915_translate_fragment_program( struct i915_context *i915,
+ struct i915_fragment_shader *fs)
+{
+ struct i915_fp_compile *p = i915_init_compile(i915, fs);
+ const struct tgsi_token *tokens = fs->state.tokens;
+
+ i915_find_wpos_space(p);
+
+#if 0
+ tgsi_dump(tokens, 0);
+#endif
+
+ i915_translate_instructions(p, tokens);
+ i915_fixup_depth_write(p);
+
+ i915_fini_compile(i915, p);
+}
diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c
new file mode 100644
index 0000000000..8f1f58b2dd
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_prim_emit.c
@@ -0,0 +1,220 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "draw/draw_pipe.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_pack_color.h"
+
+#include "i915_context.h"
+#include "i915_winsys.h"
+#include "i915_reg.h"
+#include "i915_state.h"
+#include "i915_batch.h"
+
+
+
+/**
+ * Primitive emit to hardware. No support for vertex buffers or any
+ * nice fast paths.
+ */
+struct setup_stage {
+ struct draw_stage stage; /**< This must be first (base class) */
+
+ struct i915_context *i915;
+};
+
+
+
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
+{
+ return (struct setup_stage *)stage;
+}
+
+
+/**
+ * Extract the needed fields from vertex_header and emit i915 dwords.
+ * Recall that the vertices are constructed by the 'draw' module and
+ * have a couple of slots at the beginning (1-dword header, 4-dword
+ * clip pos) that we ignore here.
+ */
+static INLINE void
+emit_hw_vertex( struct i915_context *i915,
+ const struct vertex_header *vertex)
+{
+ const struct vertex_info *vinfo = &i915->current.vertex_info;
+ uint i;
+ uint count = 0; /* for debug/sanity */
+
+ assert(!i915->dirty);
+
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ const uint j = vinfo->attrib[i].src_index;
+ const float *attrib = vertex->data[j];
+ switch (vinfo->attrib[i].emit) {
+ case EMIT_1F:
+ OUT_BATCH( fui(attrib[0]) );
+ count++;
+ break;
+ case EMIT_2F:
+ OUT_BATCH( fui(attrib[0]) );
+ OUT_BATCH( fui(attrib[1]) );
+ count += 2;
+ break;
+ case EMIT_3F:
+ OUT_BATCH( fui(attrib[0]) );
+ OUT_BATCH( fui(attrib[1]) );
+ OUT_BATCH( fui(attrib[2]) );
+ count += 3;
+ break;
+ case EMIT_4F:
+ OUT_BATCH( fui(attrib[0]) );
+ OUT_BATCH( fui(attrib[1]) );
+ OUT_BATCH( fui(attrib[2]) );
+ OUT_BATCH( fui(attrib[3]) );
+ count += 4;
+ break;
+ case EMIT_4UB:
+ OUT_BATCH( pack_ub4(float_to_ubyte( attrib[2] ),
+ float_to_ubyte( attrib[1] ),
+ float_to_ubyte( attrib[0] ),
+ float_to_ubyte( attrib[3] )) );
+ count += 1;
+ break;
+ default:
+ assert(0);
+ }
+ }
+ assert(count == vinfo->size);
+}
+
+
+
+static INLINE void
+emit_prim( struct draw_stage *stage,
+ struct prim_header *prim,
+ unsigned hwprim,
+ unsigned nr )
+{
+ struct i915_context *i915 = setup_stage(stage)->i915;
+ unsigned vertex_size;
+ unsigned i;
+
+ if (i915->dirty)
+ i915_update_derived( i915 );
+
+ if (i915->hardware_dirty)
+ i915_emit_hardware_state( i915 );
+
+ /* need to do this after validation! */
+ vertex_size = i915->current.vertex_info.size * 4; /* in bytes */
+ assert(vertex_size >= 12); /* never smaller than 12 bytes */
+
+ if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) {
+ FLUSH_BATCH(NULL);
+
+ /* Make sure state is re-emitted after a flush:
+ */
+ i915_update_derived( i915 );
+ i915_emit_hardware_state( i915 );
+
+ if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) {
+ assert(0);
+ return;
+ }
+ }
+
+ /* Emit each triangle as a single primitive. I told you this was
+ * simple.
+ */
+ OUT_BATCH(_3DPRIMITIVE |
+ hwprim |
+ ((4 + vertex_size * nr)/4 - 2));
+
+ for (i = 0; i < nr; i++)
+ emit_hw_vertex(i915, prim->v[i]);
+}
+
+
+static void
+setup_tri( struct draw_stage *stage, struct prim_header *prim )
+{
+ emit_prim( stage, prim, PRIM3D_TRILIST, 3 );
+}
+
+
+static void
+setup_line(struct draw_stage *stage, struct prim_header *prim)
+{
+ emit_prim( stage, prim, PRIM3D_LINELIST, 2 );
+}
+
+
+static void
+setup_point(struct draw_stage *stage, struct prim_header *prim)
+{
+ emit_prim( stage, prim, PRIM3D_POINTLIST, 1 );
+}
+
+
+static void setup_flush( struct draw_stage *stage, unsigned flags )
+{
+}
+
+static void reset_stipple_counter( struct draw_stage *stage )
+{
+}
+
+static void render_destroy( struct draw_stage *stage )
+{
+ FREE( stage );
+}
+
+
+/**
+ * Create a new primitive setup/render stage. This gets plugged into
+ * the 'draw' module's pipeline.
+ */
+struct draw_stage *i915_draw_render_stage( struct i915_context *i915 )
+{
+ struct setup_stage *setup = CALLOC_STRUCT(setup_stage);
+
+ setup->i915 = i915;
+ setup->stage.draw = i915->draw;
+ setup->stage.point = setup_point;
+ setup->stage.line = setup_line;
+ setup->stage.tri = setup_tri;
+ setup->stage.flush = setup_flush;
+ setup->stage.reset_stipple_counter = reset_stipple_counter;
+ setup->stage.destroy = render_destroy;
+
+ return &setup->stage;
+}
diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
new file mode 100644
index 0000000000..a8e97e7c30
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
@@ -0,0 +1,545 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Build post-transformation, post-clipping vertex buffers and element
+ * lists by hooking into the end of the primitive pipeline and
+ * manipulating the vertex_id field in the vertex headers.
+ *
+ * XXX: work in progress
+ *
+ * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+#include "pipe/p_debug.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_winsys.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "i915_context.h"
+#include "i915_reg.h"
+#include "i915_winsys.h"
+#include "i915_batch.h"
+#include "i915_state.h"
+
+
+/**
+ * Primitive renderer for i915.
+ */
+struct i915_vbuf_render {
+ struct vbuf_render base;
+
+ struct i915_context *i915;
+
+ /** Vertex size in bytes */
+ unsigned vertex_size;
+
+ /** Software primitive */
+ unsigned prim;
+
+ /** Hardware primitive */
+ unsigned hwprim;
+
+ /** Genereate a vertex list */
+ unsigned fallback;
+
+ /* Stuff for the vbo */
+ struct pipe_buffer *vbo;
+ size_t vbo_size;
+ size_t vbo_offset;
+ void *vbo_ptr;
+ size_t vbo_alloc_size;
+};
+
+
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct i915_vbuf_render *
+i915_vbuf_render( struct vbuf_render *render )
+{
+ assert(render);
+ return (struct i915_vbuf_render *)render;
+}
+
+
+static const struct vertex_info *
+i915_vbuf_render_get_vertex_info( struct vbuf_render *render )
+{
+ struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+ struct i915_context *i915 = i915_render->i915;
+
+ if (i915->dirty) {
+ /* make sure we have up to date vertex layout */
+ i915_update_derived( i915 );
+ }
+
+ return &i915->current.vertex_info;
+}
+
+
+static void *
+i915_vbuf_render_allocate_vertices( struct vbuf_render *render,
+ ushort vertex_size,
+ ushort nr_vertices )
+{
+ struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+ struct i915_context *i915 = i915_render->i915;
+ struct pipe_screen *screen = i915->pipe.screen;
+ size_t size = (size_t)vertex_size * (size_t)nr_vertices;
+
+ /* FIXME: handle failure */
+ assert(!i915->vbo);
+
+ if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) {
+ } else {
+ i915->vbo_flushed = 0;
+ pipe_buffer_reference(screen, &i915_render->vbo, NULL);
+ }
+
+ if (!i915_render->vbo) {
+ i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size);
+ i915_render->vbo_offset = 0;
+ i915_render->vbo = pipe_buffer_create(screen,
+ 64,
+ I915_BUFFER_USAGE_LIT_VERTEX,
+ i915_render->vbo_size);
+ i915_render->vbo_ptr = pipe_buffer_map(screen,
+ i915_render->vbo,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+ pipe_buffer_unmap(screen, i915_render->vbo);
+ }
+
+ i915->vbo = i915_render->vbo;
+ i915->vbo_offset = i915_render->vbo_offset;
+ i915->dirty |= I915_NEW_VBO;
+
+ return (unsigned char *)i915_render->vbo_ptr + i915->vbo_offset;
+}
+
+
+static boolean
+i915_vbuf_render_set_primitive( struct vbuf_render *render,
+ unsigned prim )
+{
+ struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+ i915_render->prim = prim;
+
+ switch(prim) {
+ case PIPE_PRIM_POINTS:
+ i915_render->hwprim = PRIM3D_POINTLIST;
+ i915_render->fallback = 0;
+ return TRUE;
+ case PIPE_PRIM_LINES:
+ i915_render->hwprim = PRIM3D_LINELIST;
+ i915_render->fallback = 0;
+ return TRUE;
+ case PIPE_PRIM_LINE_LOOP:
+ i915_render->hwprim = PRIM3D_LINELIST;
+ i915_render->fallback = PIPE_PRIM_LINE_LOOP;
+ return TRUE;
+ case PIPE_PRIM_LINE_STRIP:
+ i915_render->hwprim = PRIM3D_LINESTRIP;
+ i915_render->fallback = 0;
+ return TRUE;
+ case PIPE_PRIM_TRIANGLES:
+ i915_render->hwprim = PRIM3D_TRILIST;
+ i915_render->fallback = 0;
+ return TRUE;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ i915_render->hwprim = PRIM3D_TRISTRIP;
+ i915_render->fallback = 0;
+ return TRUE;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ i915_render->hwprim = PRIM3D_TRIFAN;
+ i915_render->fallback = 0;
+ return TRUE;
+ case PIPE_PRIM_QUADS:
+ i915_render->hwprim = PRIM3D_TRILIST;
+ i915_render->fallback = PIPE_PRIM_QUADS;
+ return TRUE;
+ case PIPE_PRIM_QUAD_STRIP:
+ i915_render->hwprim = PRIM3D_TRILIST;
+ i915_render->fallback = PIPE_PRIM_QUAD_STRIP;
+ return TRUE;
+ case PIPE_PRIM_POLYGON:
+ i915_render->hwprim = PRIM3D_POLY;
+ i915_render->fallback = 0;
+ return TRUE;
+ default:
+ /* FIXME: Actually, can handle a lot more just fine... */
+ return FALSE;
+ }
+}
+
+
+
+/**
+ * Used for fallbacks in draw_arrays
+ */
+static void
+draw_arrays_generate_indices( struct vbuf_render *render,
+ unsigned start, uint nr,
+ unsigned type )
+{
+ struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+ struct i915_context *i915 = i915_render->i915;
+ unsigned i;
+ unsigned end = start + nr;
+ switch(type) {
+ case 0:
+ for (i = start; i+1 < end; i += 2)
+ OUT_BATCH( (i+0) | (i+1) << 16 );
+ if (i < end)
+ OUT_BATCH( i );
+ break;
+ case PIPE_PRIM_LINE_LOOP:
+ if (nr >= 2) {
+ for (i = start + 1; i < end; i++)
+ OUT_BATCH( (i-0) | (i+0) << 16 );
+ OUT_BATCH( (i-0) | ( start) << 16 );
+ }
+ break;
+ case PIPE_PRIM_QUADS:
+ for (i = start; i + 3 < end; i += 4) {
+ OUT_BATCH( (i+0) | (i+1) << 16 );
+ OUT_BATCH( (i+3) | (i+1) << 16 );
+ OUT_BATCH( (i+2) | (i+3) << 16 );
+ }
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ for (i = start; i + 3 < end; i += 2) {
+ OUT_BATCH( (i+0) | (i+1) << 16 );
+ OUT_BATCH( (i+3) | (i+2) << 16 );
+ OUT_BATCH( (i+0) | (i+3) << 16 );
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static unsigned
+draw_arrays_calc_nr_indices( uint nr, unsigned type )
+{
+ switch (type) {
+ case 0:
+ return nr;
+ case PIPE_PRIM_LINE_LOOP:
+ if (nr >= 2)
+ return nr * 2;
+ else
+ return 0;
+ case PIPE_PRIM_QUADS:
+ return (nr / 4) * 6;
+ case PIPE_PRIM_QUAD_STRIP:
+ return ((nr - 2) / 2) * 6;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static void
+draw_arrays_fallback( struct vbuf_render *render,
+ unsigned start,
+ uint nr )
+{
+ struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+ struct i915_context *i915 = i915_render->i915;
+ unsigned nr_indices;
+
+ if (i915->dirty)
+ i915_update_derived( i915 );
+
+ if (i915->hardware_dirty)
+ i915_emit_hardware_state( i915 );
+
+ nr_indices = draw_arrays_calc_nr_indices( nr, i915_render->fallback );
+ if (!nr_indices)
+ return;
+
+ if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) {
+ FLUSH_BATCH(NULL);
+
+ /* Make sure state is re-emitted after a flush:
+ */
+ i915_update_derived( i915 );
+ i915_emit_hardware_state( i915 );
+ i915->vbo_flushed = 1;
+
+ if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) {
+ assert(0);
+ goto out;
+ }
+ }
+ OUT_BATCH( _3DPRIMITIVE |
+ PRIM_INDIRECT |
+ i915_render->hwprim |
+ PRIM_INDIRECT_ELTS |
+ nr_indices );
+
+ draw_arrays_generate_indices( render, start, nr, i915_render->fallback );
+
+out:
+ return;
+}
+
+static void
+i915_vbuf_render_draw_arrays( struct vbuf_render *render,
+ unsigned start,
+ uint nr )
+{
+ struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+
+ if (i915_render->fallback) {
+ draw_arrays_fallback( render, start, nr );
+ return;
+ }
+
+ /* JB: TODO submit direct cmds */
+ draw_arrays_fallback( render, start, nr );
+}
+
+/**
+ * Used for normal and fallback emitting of indices
+ * If type is zero normal operation assumed.
+ */
+static void
+draw_generate_indices( struct vbuf_render *render,
+ const ushort *indices,
+ uint nr_indices,
+ unsigned type )
+{
+ struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+ struct i915_context *i915 = i915_render->i915;
+ unsigned i;
+
+ switch(type) {
+ case 0:
+ for (i = 0; i + 1 < nr_indices; i += 2) {
+ OUT_BATCH( indices[i] | indices[i+1] << 16 );
+ }
+ if (i < nr_indices) {
+ OUT_BATCH( indices[i] );
+ }
+ break;
+ case PIPE_PRIM_LINE_LOOP:
+ if (nr_indices >= 2) {
+ for (i = 1; i < nr_indices; i++)
+ OUT_BATCH( indices[i-1] | indices[i] << 16 );
+ OUT_BATCH( indices[i-1] | indices[0] << 16 );
+ }
+ break;
+ case PIPE_PRIM_QUADS:
+ for (i = 0; i + 3 < nr_indices; i += 4) {
+ OUT_BATCH( indices[i+0] | indices[i+1] << 16 );
+ OUT_BATCH( indices[i+3] | indices[i+1] << 16 );
+ OUT_BATCH( indices[i+2] | indices[i+3] << 16 );
+ }
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ for (i = 0; i + 3 < nr_indices; i += 2) {
+ OUT_BATCH( indices[i+0] | indices[i+1] << 16 );
+ OUT_BATCH( indices[i+3] | indices[i+2] << 16 );
+ OUT_BATCH( indices[i+0] | indices[i+3] << 16 );
+ }
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static unsigned
+draw_calc_nr_indices( uint nr_indices, unsigned type )
+{
+ switch (type) {
+ case 0:
+ return nr_indices;
+ case PIPE_PRIM_LINE_LOOP:
+ if (nr_indices >= 2)
+ return nr_indices * 2;
+ else
+ return 0;
+ case PIPE_PRIM_QUADS:
+ return (nr_indices / 4) * 6;
+ case PIPE_PRIM_QUAD_STRIP:
+ return ((nr_indices - 2) / 2) * 6;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static void
+i915_vbuf_render_draw( struct vbuf_render *render,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+ struct i915_context *i915 = i915_render->i915;
+ unsigned save_nr_indices;
+
+ save_nr_indices = nr_indices;
+
+ nr_indices = draw_calc_nr_indices( nr_indices, i915_render->fallback );
+ if (!nr_indices)
+ return;
+
+ if (i915->dirty)
+ i915_update_derived( i915 );
+
+ if (i915->hardware_dirty)
+ i915_emit_hardware_state( i915 );
+
+ if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) {
+ FLUSH_BATCH(NULL);
+
+ /* Make sure state is re-emitted after a flush:
+ */
+ i915_update_derived( i915 );
+ i915_emit_hardware_state( i915 );
+ i915->vbo_flushed = 1;
+
+ if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) {
+ assert(0);
+ goto out;
+ }
+ }
+
+ OUT_BATCH( _3DPRIMITIVE |
+ PRIM_INDIRECT |
+ i915_render->hwprim |
+ PRIM_INDIRECT_ELTS |
+ nr_indices );
+ draw_generate_indices( render,
+ indices,
+ save_nr_indices,
+ i915_render->fallback );
+
+out:
+ return;
+}
+
+
+static void
+i915_vbuf_render_release_vertices( struct vbuf_render *render,
+ void *vertices,
+ unsigned vertex_size,
+ unsigned vertices_used )
+{
+ struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+ struct i915_context *i915 = i915_render->i915;
+ size_t size = (size_t)vertex_size * (size_t)vertices_used;
+
+ assert(i915->vbo);
+
+ i915_render->vbo_offset += size;
+ i915->vbo = NULL;
+ i915->dirty |= I915_NEW_VBO;
+}
+
+
+static void
+i915_vbuf_render_destroy( struct vbuf_render *render )
+{
+ struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
+ FREE(i915_render);
+}
+
+
+/**
+ * Create a new primitive render.
+ */
+static struct vbuf_render *
+i915_vbuf_render_create( struct i915_context *i915 )
+{
+ struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render);
+ struct pipe_screen *screen = i915->pipe.screen;
+
+ i915_render->i915 = i915;
+
+ i915_render->base.max_vertex_buffer_bytes = 128*1024;
+
+ /* NOTE: it must be such that state and vertices indices fit in a single
+ * batch buffer.
+ */
+ i915_render->base.max_indices = 16*1024;
+
+ i915_render->base.get_vertex_info = i915_vbuf_render_get_vertex_info;
+ i915_render->base.allocate_vertices = i915_vbuf_render_allocate_vertices;
+ i915_render->base.set_primitive = i915_vbuf_render_set_primitive;
+ i915_render->base.draw = i915_vbuf_render_draw;
+ i915_render->base.draw_arrays = i915_vbuf_render_draw_arrays;
+ i915_render->base.release_vertices = i915_vbuf_render_release_vertices;
+ i915_render->base.destroy = i915_vbuf_render_destroy;
+
+ i915_render->vbo_alloc_size = 128 * 4096;
+ i915_render->vbo_size = i915_render->vbo_alloc_size;
+ i915_render->vbo_offset = 0;
+ i915_render->vbo = pipe_buffer_create(screen,
+ 64,
+ I915_BUFFER_USAGE_LIT_VERTEX,
+ i915_render->vbo_size);
+ i915_render->vbo_ptr = pipe_buffer_map(screen,
+ i915_render->vbo,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+ pipe_buffer_unmap(screen, i915_render->vbo);
+
+ return &i915_render->base;
+}
+
+
+/**
+ * Create a new primitive vbuf/render stage.
+ */
+struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 )
+{
+ struct vbuf_render *render;
+ struct draw_stage *stage;
+
+ render = i915_vbuf_render_create(i915);
+ if(!render)
+ return NULL;
+
+ stage = draw_vbuf_stage( i915->draw, render );
+ if(!stage) {
+ render->destroy(render);
+ return NULL;
+ }
+ /** TODO JB: this shouldn't be here */
+ draw_set_render(i915->draw, render);
+
+ return stage;
+}
diff --git a/src/gallium/drivers/i915simple/i915_reg.h b/src/gallium/drivers/i915simple/i915_reg.h
new file mode 100644
index 0000000000..04620fec68
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_reg.h
@@ -0,0 +1,978 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef I915_REG_H
+#define I915_REG_H
+
+
+#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value)
+
+#define CMD_3D (0x3<<29)
+
+#define PRIM3D_INLINE (CMD_3D | (0x1f<<24))
+#define PRIM3D_TRILIST (0x0<<18)
+#define PRIM3D_TRISTRIP (0x1<<18)
+#define PRIM3D_TRISTRIP_RVRSE (0x2<<18)
+#define PRIM3D_TRIFAN (0x3<<18)
+#define PRIM3D_POLY (0x4<<18)
+#define PRIM3D_LINELIST (0x5<<18)
+#define PRIM3D_LINESTRIP (0x6<<18)
+#define PRIM3D_RECTLIST (0x7<<18)
+#define PRIM3D_POINTLIST (0x8<<18)
+#define PRIM3D_DIB (0x9<<18)
+#define PRIM3D_CLEAR_RECT (0xa<<18)
+#define PRIM3D_ZONE_INIT (0xd<<18)
+#define PRIM3D_MASK (0x1f<<18)
+
+/* p137 */
+#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24))
+#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16)
+#define AA_LINE_ECAAR_WIDTH_0_5 0
+#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14)
+#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14)
+#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14)
+#define AA_LINE_REGION_WIDTH_ENABLE (1<<8)
+#define AA_LINE_REGION_WIDTH_0_5 0
+#define AA_LINE_REGION_WIDTH_1_0 (1<<6)
+#define AA_LINE_REGION_WIDTH_2_0 (2<<6)
+#define AA_LINE_REGION_WIDTH_4_0 (3<<6)
+
+/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/
+#define _3DSTATE_BACKFACE_STENCIL_OPS (CMD_3D | (0x8<<24))
+#define BFO_ENABLE_STENCIL_REF (1<<23)
+#define BFO_STENCIL_REF_SHIFT 15
+#define BFO_STENCIL_REF_MASK (0xff<<15)
+#define BFO_ENABLE_STENCIL_FUNCS (1<<14)
+#define BFO_STENCIL_TEST_SHIFT 11
+#define BFO_STENCIL_TEST_MASK (0x7<<11)
+#define BFO_STENCIL_FAIL_SHIFT 8
+#define BFO_STENCIL_FAIL_MASK (0x7<<8)
+#define BFO_STENCIL_PASS_Z_FAIL_SHIFT 5
+#define BFO_STENCIL_PASS_Z_FAIL_MASK (0x7<<5)
+#define BFO_STENCIL_PASS_Z_PASS_SHIFT 2
+#define BFO_STENCIL_PASS_Z_PASS_MASK (0x7<<2)
+#define BFO_ENABLE_STENCIL_TWO_SIDE (1<<1)
+#define BFO_STENCIL_TWO_SIDE (1<<0)
+
+
+/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */
+#define _3DSTATE_BACKFACE_STENCIL_MASKS (CMD_3D | (0x9<<24))
+#define BFM_ENABLE_STENCIL_TEST_MASK (1<<17)
+#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16)
+#define BFM_STENCIL_TEST_MASK_SHIFT 8
+#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8)
+#define BFM_STENCIL_WRITE_MASK_SHIFT 0
+#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0)
+
+
+
+/* 3DSTATE_BIN_CONTROL p141 */
+
+/* p143 */
+#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1)
+/* Dword 1 */
+#define BUF_3D_ID_COLOR_BACK (0x3<<24)
+#define BUF_3D_ID_DEPTH (0x7<<24)
+#define BUF_3D_USE_FENCE (1<<23)
+#define BUF_3D_TILED_SURFACE (1<<22)
+#define BUF_3D_TILE_WALK_X 0
+#define BUF_3D_TILE_WALK_Y (1<<21)
+#define BUF_3D_PITCH(x) (((x)/4)<<2)
+/* Dword 2 */
+#define BUF_3D_ADDR(x) ((x) & ~0x3)
+
+
+/* 3DSTATE_CHROMA_KEY */
+
+/* 3DSTATE_CLEAR_PARAMETERS, p150 */
+#define _3DSTATE_CLEAR_PARAMETERS (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5)
+/* Dword 1 */
+#define CLEARPARAM_CLEAR_RECT (1 << 16)
+#define CLEARPARAM_ZONE_INIT (0 << 16)
+#define CLEARPARAM_WRITE_COLOR (1 << 2)
+#define CLEARPARAM_WRITE_DEPTH (1 << 1)
+#define CLEARPARAM_WRITE_STENCIL (1 << 0)
+
+/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */
+#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16))
+
+
+
+/* 3DSTATE_COORD_SET_BINDINGS, p154 */
+#define _3DSTATE_COORD_SET_BINDINGS (CMD_3D | (0x16<<24))
+#define CSB_TCB(iunit, eunit) ((eunit)<<(iunit*3))
+
+/* p156 */
+#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16))
+
+/* p157 */
+#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16))
+
+/* p158 */
+#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16))
+
+
+/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */
+#define _3DSTATE_DEPTH_OFFSET_SCALE (CMD_3D | (0x1d<<24) | (0x97<<16))
+/* scale in dword 1 */
+
+
+/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */
+#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | 0x2)
+
+/* p161 */
+#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16))
+/* Dword 1 */
+#define TEX_DEFAULT_COLOR_OGL (0<<30)
+#define TEX_DEFAULT_COLOR_D3D (1<<30)
+#define ZR_EARLY_DEPTH (1<<29)
+#define LOD_PRECLAMP_OGL (1<<28)
+#define LOD_PRECLAMP_D3D (0<<28)
+#define DITHER_FULL_ALWAYS (0<<26)
+#define DITHER_FULL_ON_FB_BLEND (1<<26)
+#define DITHER_CLAMPED_ALWAYS (2<<26)
+#define LINEAR_GAMMA_BLEND_32BPP (1<<25)
+#define DEBUG_DISABLE_ENH_DITHER (1<<24)
+#define DSTORG_HORT_BIAS(x) ((x)<<20)
+#define DSTORG_VERT_BIAS(x) ((x)<<16)
+#define COLOR_4_2_2_CHNL_WRT_ALL 0
+#define COLOR_4_2_2_CHNL_WRT_Y (1<<12)
+#define COLOR_4_2_2_CHNL_WRT_CR (2<<12)
+#define COLOR_4_2_2_CHNL_WRT_CB (3<<12)
+#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12)
+#define COLOR_BUF_8BIT 0
+#define COLOR_BUF_RGB555 (1<<8)
+#define COLOR_BUF_RGB565 (2<<8)
+#define COLOR_BUF_ARGB8888 (3<<8)
+#define DEPTH_FRMT_16_FIXED 0
+#define DEPTH_FRMT_16_FLOAT (1<<2)
+#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2)
+#define VERT_LINE_STRIDE_1 (1<<1)
+#define VERT_LINE_STRIDE_0 (0<<1)
+#define VERT_LINE_STRIDE_OFS_1 1
+#define VERT_LINE_STRIDE_OFS_0 0
+
+/* p166 */
+#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3)
+/* Dword 1 */
+#define DRAW_RECT_DIS_DEPTH_OFS (1<<30)
+#define DRAW_DITHER_OFS_X(x) ((x)<<26)
+#define DRAW_DITHER_OFS_Y(x) ((x)<<24)
+/* Dword 2 */
+#define DRAW_YMIN(x) ((x)<<16)
+#define DRAW_XMIN(x) (x)
+/* Dword 3 */
+#define DRAW_YMAX(x) ((x)<<16)
+#define DRAW_XMAX(x) (x)
+/* Dword 4 */
+#define DRAW_YORG(x) ((x)<<16)
+#define DRAW_XORG(x) (x)
+
+
+/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */
+
+/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */
+
+
+/* _3DSTATE_FOG_COLOR, p173 */
+#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24))
+#define FOG_COLOR_RED(x) ((x)<<16)
+#define FOG_COLOR_GREEN(x) ((x)<<8)
+#define FOG_COLOR_BLUE(x) (x)
+
+/* _3DSTATE_FOG_MODE, p174 */
+#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2)
+/* Dword 1 */
+#define FMC1_FOGFUNC_MODIFY_ENABLE (1<<31)
+#define FMC1_FOGFUNC_VERTEX (0<<28)
+#define FMC1_FOGFUNC_PIXEL_EXP (1<<28)
+#define FMC1_FOGFUNC_PIXEL_EXP2 (2<<28)
+#define FMC1_FOGFUNC_PIXEL_LINEAR (3<<28)
+#define FMC1_FOGFUNC_MASK (3<<28)
+#define FMC1_FOGINDEX_MODIFY_ENABLE (1<<27)
+#define FMC1_FOGINDEX_Z (0<<25)
+#define FMC1_FOGINDEX_W (1<<25)
+#define FMC1_C1_C2_MODIFY_ENABLE (1<<24)
+#define FMC1_DENSITY_MODIFY_ENABLE (1<<23)
+#define FMC1_C1_ONE (1<<13)
+#define FMC1_C1_MASK (0xffff<<4)
+/* Dword 2 */
+#define FMC2_C2_ONE (1<<16)
+/* Dword 3 */
+#define FMC3_D_ONE (1<<16)
+
+
+
+/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */
+#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24))
+#define IAB_MODIFY_ENABLE (1<<23)
+#define IAB_ENABLE (1<<22)
+#define IAB_MODIFY_FUNC (1<<21)
+#define IAB_FUNC_SHIFT 16
+#define IAB_MODIFY_SRC_FACTOR (1<<11)
+#define IAB_SRC_FACTOR_SHIFT 6
+#define IAB_SRC_FACTOR_MASK (BLENDFACT_MASK<<6)
+#define IAB_MODIFY_DST_FACTOR (1<<5)
+#define IAB_DST_FACTOR_SHIFT 0
+#define IAB_DST_FACTOR_MASK (BLENDFACT_MASK<<0)
+
+
+#define BLENDFUNC_ADD 0x0
+#define BLENDFUNC_SUBTRACT 0x1
+#define BLENDFUNC_REVERSE_SUBTRACT 0x2
+#define BLENDFUNC_MIN 0x3
+#define BLENDFUNC_MAX 0x4
+#define BLENDFUNC_MASK 0x7
+
+/* 3DSTATE_LOAD_INDIRECT, p180 */
+
+#define _3DSTATE_LOAD_INDIRECT (CMD_3D|(0x1d<<24)|(0x7<<16))
+#define LI0_STATE_STATIC_INDIRECT (0x01<<8)
+#define LI0_STATE_DYNAMIC_INDIRECT (0x02<<8)
+#define LI0_STATE_SAMPLER (0x04<<8)
+#define LI0_STATE_MAP (0x08<<8)
+#define LI0_STATE_PROGRAM (0x10<<8)
+#define LI0_STATE_CONSTANTS (0x20<<8)
+
+#define SIS0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define SIS0_FORCE_LOAD (1<<1)
+#define SIS0_BUFFER_VALID (1<<0)
+#define SIS1_BUFFER_LENGTH(x) ((x)&0xff)
+
+#define DIS0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define DIS0_BUFFER_RESET (1<<1)
+#define DIS0_BUFFER_VALID (1<<0)
+
+#define SSB0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define SSB0_FORCE_LOAD (1<<1)
+#define SSB0_BUFFER_VALID (1<<0)
+#define SSB1_BUFFER_LENGTH(x) ((x)&0xff)
+
+#define MSB0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define MSB0_FORCE_LOAD (1<<1)
+#define MSB0_BUFFER_VALID (1<<0)
+#define MSB1_BUFFER_LENGTH(x) ((x)&0xff)
+
+#define PSP0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define PSP0_FORCE_LOAD (1<<1)
+#define PSP0_BUFFER_VALID (1<<0)
+#define PSP1_BUFFER_LENGTH(x) ((x)&0xff)
+
+#define PSC0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define PSC0_FORCE_LOAD (1<<1)
+#define PSC0_BUFFER_VALID (1<<0)
+#define PSC1_BUFFER_LENGTH(x) ((x)&0xff)
+
+
+
+
+
+/* _3DSTATE_RASTERIZATION_RULES */
+#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24))
+#define ENABLE_POINT_RASTER_RULE (1<<15)
+#define OGL_POINT_RASTER_RULE (1<<13)
+#define ENABLE_TEXKILL_3D_4D (1<<10)
+#define TEXKILL_3D (0<<9)
+#define TEXKILL_4D (1<<9)
+#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8)
+#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5)
+#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6)
+#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3)
+
+/* _3DSTATE_SCISSOR_ENABLE, p256 */
+#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19))
+#define ENABLE_SCISSOR_RECT ((1<<1) | 1)
+#define DISABLE_SCISSOR_RECT (1<<1)
+
+/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */
+#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1)
+/* Dword 1 */
+#define SCISSOR_RECT_0_YMIN(x) ((x)<<16)
+#define SCISSOR_RECT_0_XMIN(x) (x)
+/* Dword 2 */
+#define SCISSOR_RECT_0_YMAX(x) ((x)<<16)
+#define SCISSOR_RECT_0_XMAX(x) (x)
+
+/* p189 */
+#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 ((0x3<<29)|(0x1d<<24)|(0x04<<16))
+#define I1_LOAD_S(n) (1<<(4+n))
+
+#define S0_VB_OFFSET_MASK 0xffffffc
+#define S0_AUTO_CACHE_INV_DISABLE (1<<0)
+
+#define S1_VERTEX_WIDTH_SHIFT 24
+#define S1_VERTEX_WIDTH_MASK (0x3f<<24)
+#define S1_VERTEX_PITCH_SHIFT 16
+#define S1_VERTEX_PITCH_MASK (0x3f<<16)
+
+#define TEXCOORDFMT_2D 0x0
+#define TEXCOORDFMT_3D 0x1
+#define TEXCOORDFMT_4D 0x2
+#define TEXCOORDFMT_1D 0x3
+#define TEXCOORDFMT_2D_16 0x4
+#define TEXCOORDFMT_4D_16 0x5
+#define TEXCOORDFMT_NOT_PRESENT 0xf
+#define S2_TEXCOORD_FMT0_MASK 0xf
+#define S2_TEXCOORD_FMT1_SHIFT 4
+#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4))
+#define S2_TEXCOORD_NONE (~0)
+
+/* S3 not interesting */
+
+#define S4_POINT_WIDTH_SHIFT 23
+#define S4_POINT_WIDTH_MASK (0x1ff<<23)
+#define S4_LINE_WIDTH_SHIFT 19
+#define S4_LINE_WIDTH_ONE (0x2<<19)
+#define S4_LINE_WIDTH_MASK (0xf<<19)
+#define S4_FLATSHADE_ALPHA (1<<18)
+#define S4_FLATSHADE_FOG (1<<17)
+#define S4_FLATSHADE_SPECULAR (1<<16)
+#define S4_FLATSHADE_COLOR (1<<15)
+#define S4_CULLMODE_BOTH (0<<13)
+#define S4_CULLMODE_NONE (1<<13)
+#define S4_CULLMODE_CW (2<<13)
+#define S4_CULLMODE_CCW (3<<13)
+#define S4_CULLMODE_MASK (3<<13)
+#define S4_VFMT_POINT_WIDTH (1<<12)
+#define S4_VFMT_SPEC_FOG (1<<11)
+#define S4_VFMT_COLOR (1<<10)
+#define S4_VFMT_DEPTH_OFFSET (1<<9)
+#define S4_VFMT_XYZ (1<<6)
+#define S4_VFMT_XYZW (2<<6)
+#define S4_VFMT_XY (3<<6)
+#define S4_VFMT_XYW (4<<6)
+#define S4_VFMT_XYZW_MASK (7<<6)
+#define S4_FORCE_DEFAULT_DIFFUSE (1<<5)
+#define S4_FORCE_DEFAULT_SPECULAR (1<<4)
+#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3)
+#define S4_VFMT_FOG_PARAM (1<<2)
+#define S4_SPRITE_POINT_ENABLE (1<<1)
+#define S4_LINE_ANTIALIAS_ENABLE (1<<0)
+
+#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \
+ S4_VFMT_SPEC_FOG | \
+ S4_VFMT_COLOR | \
+ S4_VFMT_DEPTH_OFFSET | \
+ S4_VFMT_XYZW_MASK | \
+ S4_VFMT_FOG_PARAM)
+
+
+#define S5_WRITEDISABLE_ALPHA (1<<31)
+#define S5_WRITEDISABLE_RED (1<<30)
+#define S5_WRITEDISABLE_GREEN (1<<29)
+#define S5_WRITEDISABLE_BLUE (1<<28)
+#define S5_WRITEDISABLE_MASK (0xf<<28)
+#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27)
+#define S5_LAST_PIXEL_ENABLE (1<<26)
+#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25)
+#define S5_FOG_ENABLE (1<<24)
+#define S5_STENCIL_REF_SHIFT 16
+#define S5_STENCIL_REF_MASK (0xff<<16)
+#define S5_STENCIL_TEST_FUNC_SHIFT 13
+#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13)
+#define S5_STENCIL_FAIL_SHIFT 10
+#define S5_STENCIL_FAIL_MASK (0x7<<10)
+#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7
+#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7)
+#define S5_STENCIL_PASS_Z_PASS_SHIFT 4
+#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4)
+#define S5_STENCIL_WRITE_ENABLE (1<<3)
+#define S5_STENCIL_TEST_ENABLE (1<<2)
+#define S5_COLOR_DITHER_ENABLE (1<<1)
+#define S5_LOGICOP_ENABLE (1<<0)
+
+
+#define S6_ALPHA_TEST_ENABLE (1<<31)
+#define S6_ALPHA_TEST_FUNC_SHIFT 28
+#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28)
+#define S6_ALPHA_REF_SHIFT 20
+#define S6_ALPHA_REF_MASK (0xff<<20)
+#define S6_DEPTH_TEST_ENABLE (1<<19)
+#define S6_DEPTH_TEST_FUNC_SHIFT 16
+#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16)
+#define S6_CBUF_BLEND_ENABLE (1<<15)
+#define S6_CBUF_BLEND_FUNC_SHIFT 12
+#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12)
+#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8
+#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8)
+#define S6_CBUF_DST_BLEND_FACT_SHIFT 4
+#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4)
+#define S6_DEPTH_WRITE_ENABLE (1<<3)
+#define S6_COLOR_WRITE_ENABLE (1<<2)
+#define S6_TRISTRIP_PV_SHIFT 0
+#define S6_TRISTRIP_PV_MASK (0x3<<0)
+
+#define S7_DEPTH_OFFSET_CONST_MASK ~0
+
+
+
+#define DST_BLND_FACT(f) ((f)<<S6_CBUF_DST_BLEND_FACT_SHIFT)
+#define SRC_BLND_FACT(f) ((f)<<S6_CBUF_SRC_BLEND_FACT_SHIFT)
+#define DST_ABLND_FACT(f) ((f)<<IAB_DST_FACTOR_SHIFT)
+#define SRC_ABLND_FACT(f) ((f)<<IAB_SRC_FACTOR_SHIFT)
+
+
+
+
+/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */
+
+/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */
+#define _3DSTATE_MAP_PALETTE_LOAD_32 (CMD_3D|(0x1d<<24)|(0x8f<<16))
+/* subsequent dwords up to length (max 16) are ARGB8888 color values */
+
+/* _3DSTATE_MODES_4, p218 */
+#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x0d<<24))
+#define ENABLE_LOGIC_OP_FUNC (1<<23)
+#define LOGIC_OP_FUNC(x) ((x)<<18)
+#define LOGICOP_MASK (0xf<<18)
+#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00))
+#define ENABLE_STENCIL_TEST_MASK (1<<17)
+#define STENCIL_TEST_MASK(x) (((x)&0xff)<<8)
+#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff))
+#define ENABLE_STENCIL_WRITE_MASK (1<<16)
+#define STENCIL_WRITE_MASK(x) ((x)&0xff)
+
+/* _3DSTATE_MODES_5, p220 */
+#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24))
+#define PIPELINE_FLUSH_RENDER_CACHE (1<<18)
+#define PIPELINE_FLUSH_TEXTURE_CACHE (1<<16)
+
+
+/* p221 */
+#define _3DSTATE_PIXEL_SHADER_CONSTANTS (CMD_3D|(0x1d<<24)|(0x6<<16))
+#define PS1_REG(n) (1<<(n))
+#define PS2_CONST_X(n) (n)
+#define PS3_CONST_Y(n) (n)
+#define PS4_CONST_Z(n) (n)
+#define PS5_CONST_W(n) (n)
+
+/* p222 */
+
+
+#define I915_MAX_TEX_INDIRECT 4
+#define I915_MAX_TEX_INSN 32
+#define I915_MAX_ALU_INSN 64
+#define I915_MAX_DECL_INSN 27
+#define I915_MAX_TEMPORARY 16
+
+
+/* Each instruction is 3 dwords long, though most don't require all
+ * this space. Maximum of 123 instructions. Smaller maxes per insn
+ * type.
+ */
+#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16))
+
+#define REG_TYPE_R 0 /* temporary regs, no need to
+ * dcl, must be written before
+ * read -- Preserved between
+ * phases.
+ */
+#define REG_TYPE_T 1 /* Interpolated values, must be
+ * dcl'ed before use.
+ *
+ * 0..7: texture coord,
+ * 8: diffuse spec,
+ * 9: specular color,
+ * 10: fog parameter in w.
+ */
+#define REG_TYPE_CONST 2 /* Restriction: only one const
+ * can be referenced per
+ * instruction, though it may be
+ * selected for multiple inputs.
+ * Constants not initialized
+ * default to zero.
+ */
+#define REG_TYPE_S 3 /* sampler */
+#define REG_TYPE_OC 4 /* output color (rgba) */
+#define REG_TYPE_OD 5 /* output depth (w), xyz are
+ * temporaries. If not written,
+ * interpolated depth is used?
+ */
+#define REG_TYPE_U 6 /* unpreserved temporaries */
+#define REG_TYPE_MASK 0x7
+#define REG_NR_MASK 0xf
+
+
+/* REG_TYPE_T:
+ */
+#define T_TEX0 0
+#define T_TEX1 1
+#define T_TEX2 2
+#define T_TEX3 3
+#define T_TEX4 4
+#define T_TEX5 5
+#define T_TEX6 6
+#define T_TEX7 7
+#define T_DIFFUSE 8
+#define T_SPECULAR 9
+#define T_FOG_W 10 /* interpolated fog is in W coord */
+
+/* Arithmetic instructions */
+
+/* .replicate_swizzle == selection and replication of a particular
+ * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww
+ */
+#define A0_NOP (0x0<<24) /* no operation */
+#define A0_ADD (0x1<<24) /* dst = src0 + src1 */
+#define A0_MOV (0x2<<24) /* dst = src0 */
+#define A0_MUL (0x3<<24) /* dst = src0 * src1 */
+#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */
+#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */
+#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */
+#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */
+#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */
+#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */
+#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */
+#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */
+#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */
+#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */
+#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */
+#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */
+#define A0_FLR (0x10<<24) /* dst = floor(src0) */
+#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */
+#define A0_TRC (0x12<<24) /* dst = int(src0) */
+#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */
+#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */
+#define A0_DEST_SATURATE (1<<22)
+#define A0_DEST_TYPE_SHIFT 19
+/* Allow: R, OC, OD, U */
+#define A0_DEST_NR_SHIFT 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define A0_DEST_CHANNEL_X (1<<10)
+#define A0_DEST_CHANNEL_Y (2<<10)
+#define A0_DEST_CHANNEL_Z (4<<10)
+#define A0_DEST_CHANNEL_W (8<<10)
+#define A0_DEST_CHANNEL_ALL (0xf<<10)
+#define A0_DEST_CHANNEL_SHIFT 10
+#define A0_SRC0_TYPE_SHIFT 7
+#define A0_SRC0_NR_SHIFT 2
+
+#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y)
+#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z)
+
+
+#define SRC_X 0
+#define SRC_Y 1
+#define SRC_Z 2
+#define SRC_W 3
+#define SRC_ZERO 4
+#define SRC_ONE 5
+
+#define A1_SRC0_CHANNEL_X_NEGATE (1<<31)
+#define A1_SRC0_CHANNEL_X_SHIFT 28
+#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27)
+#define A1_SRC0_CHANNEL_Y_SHIFT 24
+#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23)
+#define A1_SRC0_CHANNEL_Z_SHIFT 20
+#define A1_SRC0_CHANNEL_W_NEGATE (1<<19)
+#define A1_SRC0_CHANNEL_W_SHIFT 16
+#define A1_SRC1_TYPE_SHIFT 13
+#define A1_SRC1_NR_SHIFT 8
+#define A1_SRC1_CHANNEL_X_NEGATE (1<<7)
+#define A1_SRC1_CHANNEL_X_SHIFT 4
+#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3)
+#define A1_SRC1_CHANNEL_Y_SHIFT 0
+
+#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31)
+#define A2_SRC1_CHANNEL_Z_SHIFT 28
+#define A2_SRC1_CHANNEL_W_NEGATE (1<<27)
+#define A2_SRC1_CHANNEL_W_SHIFT 24
+#define A2_SRC2_TYPE_SHIFT 21
+#define A2_SRC2_NR_SHIFT 16
+#define A2_SRC2_CHANNEL_X_NEGATE (1<<15)
+#define A2_SRC2_CHANNEL_X_SHIFT 12
+#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11)
+#define A2_SRC2_CHANNEL_Y_SHIFT 8
+#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7)
+#define A2_SRC2_CHANNEL_Z_SHIFT 4
+#define A2_SRC2_CHANNEL_W_NEGATE (1<<3)
+#define A2_SRC2_CHANNEL_W_SHIFT 0
+
+
+
+/* Texture instructions */
+#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared
+ * sampler and address, and output
+ * filtered texel data to destination
+ * register */
+#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a
+ * perspective divide of the texture
+ * coordinate .xyz values by .w before
+ * sampling. */
+#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the
+ * computed LOD by w. Only S4.6 two's
+ * comp is used. This implies that a
+ * float to fixed conversion is
+ * done. */
+#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling
+ * operation. Simply kills the pixel
+ * if any channel of the address
+ * register is < 0.0. */
+#define T0_DEST_TYPE_SHIFT 19
+/* Allow: R, OC, OD, U */
+/* Note: U (unpreserved) regs do not retain their values between
+ * phases (cannot be used for feedback)
+ *
+ * Note: oC and OD registers can only be used as the destination of a
+ * texture instruction once per phase (this is an implementation
+ * restriction).
+ */
+#define T0_DEST_NR_SHIFT 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */
+#define T0_SAMPLER_NR_MASK (0xf<<0)
+
+#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */
+/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */
+#define T1_ADDRESS_REG_NR_SHIFT 17
+#define T2_MBZ 0
+
+/* Declaration instructions */
+#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib)
+ * register or an s (sampler)
+ * register. */
+#define D0_SAMPLE_TYPE_SHIFT 22
+#define D0_SAMPLE_TYPE_2D (0x0<<22)
+#define D0_SAMPLE_TYPE_CUBE (0x1<<22)
+#define D0_SAMPLE_TYPE_VOLUME (0x2<<22)
+#define D0_SAMPLE_TYPE_MASK (0x3<<22)
+
+#define D0_TYPE_SHIFT 19
+/* Allow: T, S */
+#define D0_NR_SHIFT 14
+/* Allow T: 0..10, S: 0..15 */
+#define D0_CHANNEL_X (1<<10)
+#define D0_CHANNEL_Y (2<<10)
+#define D0_CHANNEL_Z (4<<10)
+#define D0_CHANNEL_W (8<<10)
+#define D0_CHANNEL_ALL (0xf<<10)
+#define D0_CHANNEL_NONE (0<<10)
+
+#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y)
+#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z)
+
+/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse
+ * or specular declarations.
+ *
+ * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw)
+ *
+ * Must be zero for S (sampler) dcls
+ */
+#define D1_MBZ 0
+#define D2_MBZ 0
+
+
+
+/* p207 */
+#define _3DSTATE_MAP_STATE (CMD_3D|(0x1d<<24)|(0x0<<16))
+
+#define MS1_MAPMASK_SHIFT 0
+#define MS1_MAPMASK_MASK (0x8fff<<0)
+
+#define MS2_UNTRUSTED_SURFACE (1<<31)
+#define MS2_ADDRESS_MASK 0xfffffffc
+#define MS2_VERTICAL_LINE_STRIDE (1<<1)
+#define MS2_VERTICAL_OFFSET (1<<1)
+
+#define MS3_HEIGHT_SHIFT 21
+#define MS3_WIDTH_SHIFT 10
+#define MS3_PALETTE_SELECT (1<<9)
+#define MS3_MAPSURF_FORMAT_SHIFT 7
+#define MS3_MAPSURF_FORMAT_MASK (0x7<<7)
+#define MAPSURF_8BIT (1<<7)
+#define MAPSURF_16BIT (2<<7)
+#define MAPSURF_32BIT (3<<7)
+#define MAPSURF_422 (5<<7)
+#define MAPSURF_COMPRESSED (6<<7)
+#define MAPSURF_4BIT_INDEXED (7<<7)
+#define MS3_MT_FORMAT_MASK (0x7 << 3)
+#define MS3_MT_FORMAT_SHIFT 3
+#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */
+#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */
+#define MT_8BIT_L8 (1<<3)
+#define MT_8BIT_A8 (4<<3)
+#define MT_8BIT_MONO8 (5<<3)
+#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */
+#define MT_16BIT_ARGB1555 (1<<3)
+#define MT_16BIT_ARGB4444 (2<<3)
+#define MT_16BIT_AY88 (3<<3)
+#define MT_16BIT_88DVDU (5<<3)
+#define MT_16BIT_BUMP_655LDVDU (6<<3)
+#define MT_16BIT_I16 (7<<3)
+#define MT_16BIT_L16 (8<<3)
+#define MT_16BIT_A16 (9<<3)
+#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */
+#define MT_32BIT_ABGR8888 (1<<3)
+#define MT_32BIT_XRGB8888 (2<<3)
+#define MT_32BIT_XBGR8888 (3<<3)
+#define MT_32BIT_QWVU8888 (4<<3)
+#define MT_32BIT_AXVU8888 (5<<3)
+#define MT_32BIT_LXVU8888 (6<<3)
+#define MT_32BIT_XLVU8888 (7<<3)
+#define MT_32BIT_ARGB2101010 (8<<3)
+#define MT_32BIT_ABGR2101010 (9<<3)
+#define MT_32BIT_AWVU2101010 (0xA<<3)
+#define MT_32BIT_GR1616 (0xB<<3)
+#define MT_32BIT_VU1616 (0xC<<3)
+#define MT_32BIT_xI824 (0xD<<3)
+#define MT_32BIT_xA824 (0xE<<3)
+#define MT_32BIT_xL824 (0xF<<3)
+#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */
+#define MT_422_YCRCB_NORMAL (1<<3)
+#define MT_422_YCRCB_SWAPUV (2<<3)
+#define MT_422_YCRCB_SWAPUVY (3<<3)
+#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */
+#define MT_COMPRESS_DXT2_3 (1<<3)
+#define MT_COMPRESS_DXT4_5 (2<<3)
+#define MT_COMPRESS_FXT1 (3<<3)
+#define MT_COMPRESS_DXT1_RGB (4<<3)
+#define MS3_USE_FENCE_REGS (1<<2)
+#define MS3_TILED_SURFACE (1<<1)
+#define MS3_TILE_WALK (1<<0)
+
+#define MS4_PITCH_SHIFT 21
+#define MS4_CUBE_FACE_ENA_NEGX (1<<20)
+#define MS4_CUBE_FACE_ENA_POSX (1<<19)
+#define MS4_CUBE_FACE_ENA_NEGY (1<<18)
+#define MS4_CUBE_FACE_ENA_POSY (1<<17)
+#define MS4_CUBE_FACE_ENA_NEGZ (1<<16)
+#define MS4_CUBE_FACE_ENA_POSZ (1<<15)
+#define MS4_CUBE_FACE_ENA_MASK (0x3f<<15)
+#define MS4_MAX_LOD_SHIFT 9
+#define MS4_MAX_LOD_MASK (0x3f<<9)
+#define MS4_MIP_LAYOUT_LEGACY (0<<8)
+#define MS4_MIP_LAYOUT_BELOW_LPT (0<<8)
+#define MS4_MIP_LAYOUT_RIGHT_LPT (1<<8)
+#define MS4_VOLUME_DEPTH_SHIFT 0
+#define MS4_VOLUME_DEPTH_MASK (0xff<<0)
+
+/* p244 */
+#define _3DSTATE_SAMPLER_STATE (CMD_3D|(0x1d<<24)|(0x1<<16))
+
+#define SS1_MAPMASK_SHIFT 0
+#define SS1_MAPMASK_MASK (0x8fff<<0)
+
+#define SS2_REVERSE_GAMMA_ENABLE (1<<31)
+#define SS2_PACKED_TO_PLANAR_ENABLE (1<<30)
+#define SS2_COLORSPACE_CONVERSION (1<<29)
+#define SS2_CHROMAKEY_SHIFT 27
+#define SS2_BASE_MIP_LEVEL_SHIFT 22
+#define SS2_BASE_MIP_LEVEL_MASK (0x1f<<22)
+#define SS2_MIP_FILTER_SHIFT 20
+#define SS2_MIP_FILTER_MASK (0x3<<20)
+#define MIPFILTER_NONE 0
+#define MIPFILTER_NEAREST 1
+#define MIPFILTER_LINEAR 3
+#define SS2_MAG_FILTER_SHIFT 17
+#define SS2_MAG_FILTER_MASK (0x7<<17)
+#define FILTER_NEAREST 0
+#define FILTER_LINEAR 1
+#define FILTER_ANISOTROPIC 2
+#define FILTER_4X4_1 3
+#define FILTER_4X4_2 4
+#define FILTER_4X4_FLAT 5
+#define FILTER_6X5_MONO 6 /* XXX - check */
+#define SS2_MIN_FILTER_SHIFT 14
+#define SS2_MIN_FILTER_MASK (0x7<<14)
+#define SS2_LOD_BIAS_SHIFT 5
+#define SS2_LOD_BIAS_ONE (0x10<<5)
+#define SS2_LOD_BIAS_MASK (0x1ff<<5)
+/* Shadow requires:
+ * MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format
+ * FILTER_4X4_x MIN and MAG filters
+ */
+#define SS2_SHADOW_ENABLE (1<<4)
+#define SS2_MAX_ANISO_MASK (1<<3)
+#define SS2_MAX_ANISO_2 (0<<3)
+#define SS2_MAX_ANISO_4 (1<<3)
+#define SS2_SHADOW_FUNC_SHIFT 0
+#define SS2_SHADOW_FUNC_MASK (0x7<<0)
+/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */
+
+#define SS3_MIN_LOD_SHIFT 24
+#define SS3_MIN_LOD_ONE (0x10<<24)
+#define SS3_MIN_LOD_MASK (0xff<<24)
+#define SS3_KILL_PIXEL_ENABLE (1<<17)
+#define SS3_TCX_ADDR_MODE_SHIFT 12
+#define SS3_TCX_ADDR_MODE_MASK (0x7<<12)
+#define TEXCOORDMODE_WRAP 0
+#define TEXCOORDMODE_MIRROR 1
+#define TEXCOORDMODE_CLAMP_EDGE 2
+#define TEXCOORDMODE_CUBE 3
+#define TEXCOORDMODE_CLAMP_BORDER 4
+#define TEXCOORDMODE_MIRROR_ONCE 5
+#define SS3_TCY_ADDR_MODE_SHIFT 9
+#define SS3_TCY_ADDR_MODE_MASK (0x7<<9)
+#define SS3_TCZ_ADDR_MODE_SHIFT 6
+#define SS3_TCZ_ADDR_MODE_MASK (0x7<<6)
+#define SS3_NORMALIZED_COORDS (1<<5)
+#define SS3_TEXTUREMAP_INDEX_SHIFT 1
+#define SS3_TEXTUREMAP_INDEX_MASK (0xf<<1)
+#define SS3_DEINTERLACER_ENABLE (1<<0)
+
+#define SS4_BORDER_COLOR_MASK (~0)
+
+/* 3DSTATE_SPAN_STIPPLE, p258
+ */
+#define _3DSTATE_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16))
+#define ST1_ENABLE (1<<16)
+#define ST1_MASK (0xffff)
+
+#define _3DSTATE_DEFAULT_Z ((0x3<<29)|(0x1d<<24)|(0x98<<16))
+#define _3DSTATE_DEFAULT_DIFFUSE ((0x3<<29)|(0x1d<<24)|(0x99<<16))
+#define _3DSTATE_DEFAULT_SPECULAR ((0x3<<29)|(0x1d<<24)|(0x9a<<16))
+
+
+#define MI_FLUSH ((0<<29)|(4<<23))
+#define FLUSH_MAP_CACHE (1<<0)
+#define INHIBIT_FLUSH_RENDER_CACHE (1<<2)
+
+
+#define CMD_3D (0x3<<29)
+
+
+#define _3DPRIMITIVE ((0x3<<29)|(0x1f<<24))
+#define PRIM_INDIRECT (1<<23)
+#define PRIM_INLINE (0<<23)
+#define PRIM_INDIRECT_SEQUENTIAL (0<<17)
+#define PRIM_INDIRECT_ELTS (1<<17)
+
+#define PRIM3D_TRILIST (0x0<<18)
+#define PRIM3D_TRISTRIP (0x1<<18)
+#define PRIM3D_TRISTRIP_RVRSE (0x2<<18)
+#define PRIM3D_TRIFAN (0x3<<18)
+#define PRIM3D_POLY (0x4<<18)
+#define PRIM3D_LINELIST (0x5<<18)
+#define PRIM3D_LINESTRIP (0x6<<18)
+#define PRIM3D_RECTLIST (0x7<<18)
+#define PRIM3D_POINTLIST (0x8<<18)
+#define PRIM3D_DIB (0x9<<18)
+#define PRIM3D_MASK (0x1f<<18)
+
+#define I915PACKCOLOR4444(r,g,b,a) \
+ ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4))
+
+#define I915PACKCOLOR1555(r,g,b,a) \
+ ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \
+ ((a) ? 0x8000 : 0))
+
+#define I915PACKCOLOR565(r,g,b) \
+ ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
+
+#define I915PACKCOLOR8888(r,g,b,a) \
+ ((a<<24) | (r<<16) | (g<<8) | b)
+
+
+
+
+#define BR00_BITBLT_CLIENT 0x40000000
+#define BR00_OP_COLOR_BLT 0x10000000
+#define BR00_OP_SRC_COPY_BLT 0x10C00000
+#define BR13_SOLID_PATTERN 0x80000000
+
+#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|0x4)
+#define XY_COLOR_BLT_WRITE_ALPHA (1<<21)
+#define XY_COLOR_BLT_WRITE_RGB (1<<20)
+
+#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6)
+#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21)
+#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20)
+
+#define MI_WAIT_FOR_EVENT ((0x3<<23))
+#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6)
+#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2)
+
+#define MI_BATCH_BUFFER (0x30<<23)
+#define MI_BATCH_BUFFER_START (0x31<<23)
+#define MI_BATCH_BUFFER_END (0xa<<23)
+
+
+
+#define COMPAREFUNC_ALWAYS 0
+#define COMPAREFUNC_NEVER 0x1
+#define COMPAREFUNC_LESS 0x2
+#define COMPAREFUNC_EQUAL 0x3
+#define COMPAREFUNC_LEQUAL 0x4
+#define COMPAREFUNC_GREATER 0x5
+#define COMPAREFUNC_NOTEQUAL 0x6
+#define COMPAREFUNC_GEQUAL 0x7
+
+#define STENCILOP_KEEP 0
+#define STENCILOP_ZERO 0x1
+#define STENCILOP_REPLACE 0x2
+#define STENCILOP_INCRSAT 0x3
+#define STENCILOP_DECRSAT 0x4
+#define STENCILOP_INCR 0x5
+#define STENCILOP_DECR 0x6
+#define STENCILOP_INVERT 0x7
+
+#define LOGICOP_CLEAR 0
+#define LOGICOP_NOR 0x1
+#define LOGICOP_AND_INV 0x2
+#define LOGICOP_COPY_INV 0x3
+#define LOGICOP_AND_RVRSE 0x4
+#define LOGICOP_INV 0x5
+#define LOGICOP_XOR 0x6
+#define LOGICOP_NAND 0x7
+#define LOGICOP_AND 0x8
+#define LOGICOP_EQUIV 0x9
+#define LOGICOP_NOOP 0xa
+#define LOGICOP_OR_INV 0xb
+#define LOGICOP_COPY 0xc
+#define LOGICOP_OR_RVRSE 0xd
+#define LOGICOP_OR 0xe
+#define LOGICOP_SET 0xf
+
+#define BLENDFACT_ZERO 0x01
+#define BLENDFACT_ONE 0x02
+#define BLENDFACT_SRC_COLR 0x03
+#define BLENDFACT_INV_SRC_COLR 0x04
+#define BLENDFACT_SRC_ALPHA 0x05
+#define BLENDFACT_INV_SRC_ALPHA 0x06
+#define BLENDFACT_DST_ALPHA 0x07
+#define BLENDFACT_INV_DST_ALPHA 0x08
+#define BLENDFACT_DST_COLR 0x09
+#define BLENDFACT_INV_DST_COLR 0x0a
+#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b
+#define BLENDFACT_CONST_COLOR 0x0c
+#define BLENDFACT_INV_CONST_COLOR 0x0d
+#define BLENDFACT_CONST_ALPHA 0x0e
+#define BLENDFACT_INV_CONST_ALPHA 0x0f
+#define BLENDFACT_MASK 0x0f
+
+#define PCI_CHIP_I915_G 0x2582
+#define PCI_CHIP_I915_GM 0x2592
+#define PCI_CHIP_I945_G 0x2772
+#define PCI_CHIP_I945_GM 0x27A2
+#define PCI_CHIP_I945_GME 0x27AE
+#define PCI_CHIP_G33_G 0x29C2
+#define PCI_CHIP_Q35_G 0x29B2
+#define PCI_CHIP_Q33_G 0x29D2
+
+
+#endif
diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c
new file mode 100644
index 0000000000..1c976082df
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_screen.c
@@ -0,0 +1,284 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_memory.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+#include "util/u_string.h"
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_screen.h"
+#include "i915_texture.h"
+
+
+static const char *
+i915_get_vendor( struct pipe_screen *pscreen )
+{
+ return "Tungsten Graphics, Inc.";
+}
+
+
+static const char *
+i915_get_name( struct pipe_screen *pscreen )
+{
+ static char buffer[128];
+ const char *chipset;
+
+ switch (i915_screen(pscreen)->pci_id) {
+ case PCI_CHIP_I915_G:
+ chipset = "915G";
+ break;
+ case PCI_CHIP_I915_GM:
+ chipset = "915GM";
+ break;
+ case PCI_CHIP_I945_G:
+ chipset = "945G";
+ break;
+ case PCI_CHIP_I945_GM:
+ chipset = "945GM";
+ break;
+ case PCI_CHIP_I945_GME:
+ chipset = "945GME";
+ break;
+ case PCI_CHIP_G33_G:
+ chipset = "G33";
+ break;
+ case PCI_CHIP_Q35_G:
+ chipset = "Q35";
+ break;
+ case PCI_CHIP_Q33_G:
+ chipset = "Q33";
+ break;
+ default:
+ chipset = "unknown";
+ break;
+ }
+
+ util_snprintf(buffer, sizeof(buffer), "i915 (chipset: %s)", chipset);
+ return buffer;
+}
+
+
+static int
+i915_get_param(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 8;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 0;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 0;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 11; /* max 1024x1024 */
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 8; /* max 128x128x128 */
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 11; /* max 1024x1024 */
+ default:
+ return 0;
+ }
+}
+
+
+static float
+i915_get_paramf(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 7.5;
+
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 255.0;
+
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 4.0;
+
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 16.0;
+
+ default:
+ return 0;
+ }
+}
+
+
+static boolean
+i915_is_format_supported( struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage,
+ unsigned geom_flags )
+{
+ static const enum pipe_format tex_supported[] = {
+ PIPE_FORMAT_R8G8B8A8_UNORM,
+ PIPE_FORMAT_A8R8G8B8_UNORM,
+ PIPE_FORMAT_R5G6B5_UNORM,
+ PIPE_FORMAT_L8_UNORM,
+ PIPE_FORMAT_A8_UNORM,
+ PIPE_FORMAT_I8_UNORM,
+ PIPE_FORMAT_A8L8_UNORM,
+ PIPE_FORMAT_YCBCR,
+ PIPE_FORMAT_YCBCR_REV,
+ PIPE_FORMAT_S8Z24_UNORM,
+ PIPE_FORMAT_NONE /* list terminator */
+ };
+ static const enum pipe_format surface_supported[] = {
+ PIPE_FORMAT_A8R8G8B8_UNORM,
+ PIPE_FORMAT_R5G6B5_UNORM,
+ PIPE_FORMAT_S8Z24_UNORM,
+ /*PIPE_FORMAT_R16G16B16A16_SNORM,*/
+ PIPE_FORMAT_NONE /* list terminator */
+ };
+ const enum pipe_format *list;
+ uint i;
+
+ if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET)
+ list = surface_supported;
+ else
+ list = tex_supported;
+
+ for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) {
+ if (list[i] == format)
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+static void
+i915_destroy_screen( struct pipe_screen *screen )
+{
+ struct pipe_winsys *winsys = screen->winsys;
+
+ if(winsys->destroy)
+ winsys->destroy(winsys);
+
+ FREE(screen);
+}
+
+
+static void *
+i915_surface_map( struct pipe_screen *screen,
+ struct pipe_surface *surface,
+ unsigned flags )
+{
+ char *map = pipe_buffer_map( screen, surface->buffer, flags );
+ if (map == NULL)
+ return NULL;
+
+ if (surface->texture &&
+ (flags & PIPE_BUFFER_USAGE_CPU_WRITE))
+ {
+ /* Do something to notify contexts of a texture change.
+ */
+ /* i915_screen(screen)->timestamp++; */
+ }
+
+ return map + surface->offset;
+}
+
+static void
+i915_surface_unmap(struct pipe_screen *screen,
+ struct pipe_surface *surface)
+{
+ pipe_buffer_unmap( screen, surface->buffer );
+}
+
+
+
+/**
+ * Create a new i915_screen object
+ */
+struct pipe_screen *
+i915_create_screen(struct pipe_winsys *winsys, uint pci_id)
+{
+ struct i915_screen *i915screen = CALLOC_STRUCT(i915_screen);
+
+ if (!i915screen)
+ return NULL;
+
+ switch (pci_id) {
+ case PCI_CHIP_I915_G:
+ case PCI_CHIP_I915_GM:
+ i915screen->is_i945 = FALSE;
+ break;
+
+ case PCI_CHIP_I945_G:
+ case PCI_CHIP_I945_GM:
+ case PCI_CHIP_I945_GME:
+ case PCI_CHIP_G33_G:
+ case PCI_CHIP_Q33_G:
+ case PCI_CHIP_Q35_G:
+ i915screen->is_i945 = TRUE;
+ break;
+
+ default:
+ debug_printf("%s: unknown pci id 0x%x, cannot create screen\n",
+ __FUNCTION__, pci_id);
+ return NULL;
+ }
+
+ i915screen->pci_id = pci_id;
+
+ i915screen->screen.winsys = winsys;
+
+ i915screen->screen.destroy = i915_destroy_screen;
+
+ i915screen->screen.get_name = i915_get_name;
+ i915screen->screen.get_vendor = i915_get_vendor;
+ i915screen->screen.get_param = i915_get_param;
+ i915screen->screen.get_paramf = i915_get_paramf;
+ i915screen->screen.is_format_supported = i915_is_format_supported;
+ i915screen->screen.surface_map = i915_surface_map;
+ i915screen->screen.surface_unmap = i915_surface_unmap;
+
+ i915_init_screen_texture_functions(&i915screen->screen);
+
+ return &i915screen->screen;
+}
diff --git a/src/gallium/drivers/i915simple/i915_screen.h b/src/gallium/drivers/i915simple/i915_screen.h
new file mode 100644
index 0000000000..73b0ff05ce
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_screen.h
@@ -0,0 +1,69 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef I915_SCREEN_H
+#define I915_SCREEN_H
+
+
+#include "pipe/p_screen.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/**
+ * Subclass of pipe_screen
+ */
+struct i915_screen
+{
+ struct pipe_screen screen;
+
+ boolean is_i945;
+ uint pci_id;
+};
+
+
+/** cast wrapper */
+static INLINE struct i915_screen *
+i915_screen(struct pipe_screen *pscreen)
+{
+ return (struct i915_screen *) pscreen;
+}
+
+
+extern struct pipe_screen *
+i915_create_screen(struct pipe_winsys *winsys, uint pci_id);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* I915_SCREEN_H */
diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c
new file mode 100644
index 0000000000..d2487d8277
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_state.c
@@ -0,0 +1,788 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "draw/draw_context.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "i915_context.h"
+#include "i915_reg.h"
+#include "i915_state.h"
+#include "i915_state_inlines.h"
+#include "i915_fpc.h"
+
+/* The i915 (and related graphics cores) do not support GL_CLAMP. The
+ * Intel drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ */
+static unsigned
+translate_wrap_mode(unsigned wrap)
+{
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return TEXCOORDMODE_WRAP;
+ case PIPE_TEX_WRAP_CLAMP:
+ return TEXCOORDMODE_CLAMP_EDGE; /* not quite correct */
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return TEXCOORDMODE_CLAMP_EDGE;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return TEXCOORDMODE_CLAMP_BORDER;
+// case PIPE_TEX_WRAP_MIRRORED_REPEAT:
+// return TEXCOORDMODE_MIRROR;
+ default:
+ return TEXCOORDMODE_WRAP;
+ }
+}
+
+static unsigned translate_img_filter( unsigned filter )
+{
+ switch (filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ return FILTER_NEAREST;
+ case PIPE_TEX_FILTER_LINEAR:
+ return FILTER_LINEAR;
+ case PIPE_TEX_FILTER_ANISO:
+ return FILTER_ANISOTROPIC;
+ default:
+ assert(0);
+ return FILTER_NEAREST;
+ }
+}
+
+static unsigned translate_mip_filter( unsigned filter )
+{
+ switch (filter) {
+ case PIPE_TEX_MIPFILTER_NONE:
+ return MIPFILTER_NONE;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ return MIPFILTER_NEAREST;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ return MIPFILTER_LINEAR;
+ default:
+ assert(0);
+ return MIPFILTER_NONE;
+ }
+}
+
+
+/* None of this state is actually used for anything yet.
+ */
+static void *
+i915_create_blend_state(struct pipe_context *pipe,
+ const struct pipe_blend_state *blend)
+{
+ struct i915_blend_state *cso_data = CALLOC_STRUCT( i915_blend_state );
+
+ {
+ unsigned eqRGB = blend->rgb_func;
+ unsigned srcRGB = blend->rgb_src_factor;
+ unsigned dstRGB = blend->rgb_dst_factor;
+
+ unsigned eqA = blend->alpha_func;
+ unsigned srcA = blend->alpha_src_factor;
+ unsigned dstA = blend->alpha_dst_factor;
+
+ /* Special handling for MIN/MAX filter modes handled at
+ * state_tracker level.
+ */
+
+ if (srcA != srcRGB ||
+ dstA != dstRGB ||
+ eqA != eqRGB) {
+
+ cso_data->iab = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
+ IAB_MODIFY_ENABLE |
+ IAB_ENABLE |
+ IAB_MODIFY_FUNC |
+ IAB_MODIFY_SRC_FACTOR |
+ IAB_MODIFY_DST_FACTOR |
+ SRC_ABLND_FACT(i915_translate_blend_factor(srcA)) |
+ DST_ABLND_FACT(i915_translate_blend_factor(dstA)) |
+ (i915_translate_blend_func(eqA) << IAB_FUNC_SHIFT));
+ }
+ else {
+ cso_data->iab = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
+ IAB_MODIFY_ENABLE |
+ 0);
+ }
+ }
+
+ cso_data->modes4 |= (_3DSTATE_MODES_4_CMD |
+ ENABLE_LOGIC_OP_FUNC |
+ LOGIC_OP_FUNC(i915_translate_logic_op(blend->logicop_func)));
+
+ if (blend->logicop_enable)
+ cso_data->LIS5 |= S5_LOGICOP_ENABLE;
+
+ if (blend->dither)
+ cso_data->LIS5 |= S5_COLOR_DITHER_ENABLE;
+
+ if ((blend->colormask & PIPE_MASK_R) == 0)
+ cso_data->LIS5 |= S5_WRITEDISABLE_RED;
+
+ if ((blend->colormask & PIPE_MASK_G) == 0)
+ cso_data->LIS5 |= S5_WRITEDISABLE_GREEN;
+
+ if ((blend->colormask & PIPE_MASK_B) == 0)
+ cso_data->LIS5 |= S5_WRITEDISABLE_BLUE;
+
+ if ((blend->colormask & PIPE_MASK_A) == 0)
+ cso_data->LIS5 |= S5_WRITEDISABLE_ALPHA;
+
+ if (blend->blend_enable) {
+ unsigned funcRGB = blend->rgb_func;
+ unsigned srcRGB = blend->rgb_src_factor;
+ unsigned dstRGB = blend->rgb_dst_factor;
+
+ cso_data->LIS6 |= (S6_CBUF_BLEND_ENABLE |
+ SRC_BLND_FACT(i915_translate_blend_factor(srcRGB)) |
+ DST_BLND_FACT(i915_translate_blend_factor(dstRGB)) |
+ (i915_translate_blend_func(funcRGB) << S6_CBUF_BLEND_FUNC_SHIFT));
+ }
+
+ return cso_data;
+}
+
+static void i915_bind_blend_state(struct pipe_context *pipe,
+ void *blend)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ draw_flush(i915->draw);
+
+ i915->blend = (struct i915_blend_state*)blend;
+
+ i915->dirty |= I915_NEW_BLEND;
+}
+
+
+static void i915_delete_blend_state(struct pipe_context *pipe, void *blend)
+{
+ FREE(blend);
+}
+
+static void i915_set_blend_color( struct pipe_context *pipe,
+ const struct pipe_blend_color *blend_color )
+{
+ struct i915_context *i915 = i915_context(pipe);
+ draw_flush(i915->draw);
+
+ i915->blend_color = *blend_color;
+
+ i915->dirty |= I915_NEW_BLEND;
+}
+
+static void *
+i915_create_sampler_state(struct pipe_context *pipe,
+ const struct pipe_sampler_state *sampler)
+{
+ struct i915_sampler_state *cso = CALLOC_STRUCT( i915_sampler_state );
+ const unsigned ws = sampler->wrap_s;
+ const unsigned wt = sampler->wrap_t;
+ const unsigned wr = sampler->wrap_r;
+ unsigned minFilt, magFilt;
+ unsigned mipFilt;
+
+ cso->templ = sampler;
+
+ mipFilt = translate_mip_filter(sampler->min_mip_filter);
+ minFilt = translate_img_filter( sampler->min_img_filter );
+ magFilt = translate_img_filter( sampler->mag_img_filter );
+
+ if (sampler->max_anisotropy > 2.0) {
+ cso->state[0] |= SS2_MAX_ANISO_4;
+ }
+
+ {
+ int b = (int) (sampler->lod_bias * 16.0);
+ b = CLAMP(b, -256, 255);
+ cso->state[0] |= ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK);
+ }
+
+ /* Shadow:
+ */
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
+ {
+ cso->state[0] |= (SS2_SHADOW_ENABLE |
+ i915_translate_compare_func(sampler->compare_func));
+
+ minFilt = FILTER_4X4_FLAT;
+ magFilt = FILTER_4X4_FLAT;
+ }
+
+ cso->state[0] |= ((minFilt << SS2_MIN_FILTER_SHIFT) |
+ (mipFilt << SS2_MIP_FILTER_SHIFT) |
+ (magFilt << SS2_MAG_FILTER_SHIFT));
+
+ cso->state[1] |=
+ ((translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT) |
+ (translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) |
+ (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT));
+
+ if (sampler->normalized_coords)
+ cso->state[1] |= SS3_NORMALIZED_COORDS;
+
+ {
+ int minlod = (int) (16.0 * sampler->min_lod);
+ int maxlod = (int) (16.0 * sampler->max_lod);
+ minlod = CLAMP(minlod, 0, 16 * 11);
+ maxlod = CLAMP(maxlod, 0, 16 * 11);
+
+ if (minlod > maxlod)
+ maxlod = minlod;
+
+ cso->minlod = minlod;
+ cso->maxlod = maxlod;
+ }
+
+ {
+ ubyte r = float_to_ubyte(sampler->border_color[0]);
+ ubyte g = float_to_ubyte(sampler->border_color[1]);
+ ubyte b = float_to_ubyte(sampler->border_color[2]);
+ ubyte a = float_to_ubyte(sampler->border_color[3]);
+ cso->state[2] = I915PACKCOLOR8888(r, g, b, a);
+ }
+ return cso;
+}
+
+static void i915_bind_sampler_states(struct pipe_context *pipe,
+ unsigned num, void **sampler)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ unsigned i;
+
+ assert(num <= PIPE_MAX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num == i915->num_samplers &&
+ !memcmp(i915->sampler, sampler, num * sizeof(void *)))
+ return;
+
+ draw_flush(i915->draw);
+
+ for (i = 0; i < num; ++i)
+ i915->sampler[i] = sampler[i];
+ for (i = num; i < PIPE_MAX_SAMPLERS; ++i)
+ i915->sampler[i] = NULL;
+
+ i915->num_samplers = num;
+
+ i915->dirty |= I915_NEW_SAMPLER;
+}
+
+static void i915_delete_sampler_state(struct pipe_context *pipe,
+ void *sampler)
+{
+ FREE(sampler);
+}
+
+
+/** XXX move someday? Or consolidate all these simple state setters
+ * into one file.
+ */
+
+static void *
+i915_create_depth_stencil_state(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *depth_stencil)
+{
+ struct i915_depth_stencil_state *cso = CALLOC_STRUCT( i915_depth_stencil_state );
+
+ {
+ int testmask = depth_stencil->stencil[0].value_mask & 0xff;
+ int writemask = depth_stencil->stencil[0].write_mask & 0xff;
+
+ cso->stencil_modes4 |= (_3DSTATE_MODES_4_CMD |
+ ENABLE_STENCIL_TEST_MASK |
+ STENCIL_TEST_MASK(testmask) |
+ ENABLE_STENCIL_WRITE_MASK |
+ STENCIL_WRITE_MASK(writemask));
+ }
+
+ if (depth_stencil->stencil[0].enabled) {
+ int test = i915_translate_compare_func(depth_stencil->stencil[0].func);
+ int fop = i915_translate_stencil_op(depth_stencil->stencil[0].fail_op);
+ int dfop = i915_translate_stencil_op(depth_stencil->stencil[0].zfail_op);
+ int dpop = i915_translate_stencil_op(depth_stencil->stencil[0].zpass_op);
+ int ref = depth_stencil->stencil[0].ref_value & 0xff;
+
+ cso->stencil_LIS5 |= (S5_STENCIL_TEST_ENABLE |
+ S5_STENCIL_WRITE_ENABLE |
+ (ref << S5_STENCIL_REF_SHIFT) |
+ (test << S5_STENCIL_TEST_FUNC_SHIFT) |
+ (fop << S5_STENCIL_FAIL_SHIFT) |
+ (dfop << S5_STENCIL_PASS_Z_FAIL_SHIFT) |
+ (dpop << S5_STENCIL_PASS_Z_PASS_SHIFT));
+ }
+
+ if (depth_stencil->stencil[1].enabled) {
+ int test = i915_translate_compare_func(depth_stencil->stencil[1].func);
+ int fop = i915_translate_stencil_op(depth_stencil->stencil[1].fail_op);
+ int dfop = i915_translate_stencil_op(depth_stencil->stencil[1].zfail_op);
+ int dpop = i915_translate_stencil_op(depth_stencil->stencil[1].zpass_op);
+ int ref = depth_stencil->stencil[1].ref_value & 0xff;
+ int tmask = depth_stencil->stencil[1].value_mask & 0xff;
+ int wmask = depth_stencil->stencil[1].write_mask & 0xff;
+
+ cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS |
+ BFO_ENABLE_STENCIL_FUNCS |
+ BFO_ENABLE_STENCIL_TWO_SIDE |
+ BFO_ENABLE_STENCIL_REF |
+ BFO_STENCIL_TWO_SIDE |
+ (ref << BFO_STENCIL_REF_SHIFT) |
+ (test << BFO_STENCIL_TEST_SHIFT) |
+ (fop << BFO_STENCIL_FAIL_SHIFT) |
+ (dfop << BFO_STENCIL_PASS_Z_FAIL_SHIFT) |
+ (dpop << BFO_STENCIL_PASS_Z_PASS_SHIFT));
+
+ cso->bfo[1] = (_3DSTATE_BACKFACE_STENCIL_MASKS |
+ BFM_ENABLE_STENCIL_TEST_MASK |
+ BFM_ENABLE_STENCIL_WRITE_MASK |
+ (tmask << BFM_STENCIL_TEST_MASK_SHIFT) |
+ (wmask << BFM_STENCIL_WRITE_MASK_SHIFT));
+ }
+ else {
+ /* This actually disables two-side stencil: The bit set is a
+ * modify-enable bit to indicate we are changing the two-side
+ * setting. Then there is a symbolic zero to show that we are
+ * setting the flag to zero/off.
+ */
+ cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS |
+ BFO_ENABLE_STENCIL_TWO_SIDE |
+ 0);
+ cso->bfo[1] = 0;
+ }
+
+ if (depth_stencil->depth.enabled) {
+ int func = i915_translate_compare_func(depth_stencil->depth.func);
+
+ cso->depth_LIS6 |= (S6_DEPTH_TEST_ENABLE |
+ (func << S6_DEPTH_TEST_FUNC_SHIFT));
+
+ if (depth_stencil->depth.writemask)
+ cso->depth_LIS6 |= S6_DEPTH_WRITE_ENABLE;
+ }
+
+ if (depth_stencil->alpha.enabled) {
+ int test = i915_translate_compare_func(depth_stencil->alpha.func);
+ ubyte refByte = float_to_ubyte(depth_stencil->alpha.ref);
+
+ cso->depth_LIS6 |= (S6_ALPHA_TEST_ENABLE |
+ (test << S6_ALPHA_TEST_FUNC_SHIFT) |
+ (((unsigned) refByte) << S6_ALPHA_REF_SHIFT));
+ }
+
+ return cso;
+}
+
+static void i915_bind_depth_stencil_state(struct pipe_context *pipe,
+ void *depth_stencil)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ draw_flush(i915->draw);
+
+ i915->depth_stencil = (const struct i915_depth_stencil_state *)depth_stencil;
+
+ i915->dirty |= I915_NEW_DEPTH_STENCIL;
+}
+
+static void i915_delete_depth_stencil_state(struct pipe_context *pipe,
+ void *depth_stencil)
+{
+ FREE(depth_stencil);
+}
+
+
+static void i915_set_scissor_state( struct pipe_context *pipe,
+ const struct pipe_scissor_state *scissor )
+{
+ struct i915_context *i915 = i915_context(pipe);
+ draw_flush(i915->draw);
+
+ memcpy( &i915->scissor, scissor, sizeof(*scissor) );
+ i915->dirty |= I915_NEW_SCISSOR;
+}
+
+
+static void i915_set_polygon_stipple( struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple )
+{
+}
+
+
+
+static void *
+i915_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ struct i915_fragment_shader *ifs = CALLOC_STRUCT(i915_fragment_shader);
+ if (!ifs)
+ return NULL;
+
+ ifs->state.tokens = tgsi_dup_tokens(templ->tokens);
+
+ tgsi_scan_shader(templ->tokens, &ifs->info);
+
+ /* The shader's compiled to i915 instructions here */
+ i915_translate_fragment_program(i915, ifs);
+
+ return ifs;
+}
+
+static void
+i915_bind_fs_state(struct pipe_context *pipe, void *shader)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ draw_flush(i915->draw);
+
+ i915->fs = (struct i915_fragment_shader*) shader;
+
+ i915->dirty |= I915_NEW_FS;
+}
+
+static
+void i915_delete_fs_state(struct pipe_context *pipe, void *shader)
+{
+ struct i915_fragment_shader *ifs = (struct i915_fragment_shader *) shader;
+
+ if (ifs->program)
+ FREE(ifs->program);
+ ifs->program_len = 0;
+
+ FREE((struct tgsi_token *)ifs->state.tokens);
+
+ FREE(ifs);
+}
+
+
+static void *
+i915_create_vs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct i915_context *i915 = i915_context(pipe);
+
+ /* just pass-through to draw module */
+ return draw_create_vertex_shader(i915->draw, templ);
+}
+
+static void i915_bind_vs_state(struct pipe_context *pipe, void *shader)
+{
+ struct i915_context *i915 = i915_context(pipe);
+
+ /* just pass-through to draw module */
+ draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader);
+
+ i915->dirty |= I915_NEW_VS;
+}
+
+static void i915_delete_vs_state(struct pipe_context *pipe, void *shader)
+{
+ struct i915_context *i915 = i915_context(pipe);
+
+ /* just pass-through to draw module */
+ draw_delete_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader);
+}
+
+static void i915_set_constant_buffer(struct pipe_context *pipe,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *buf)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+ draw_flush(i915->draw);
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(index == 0);
+
+ /* Make a copy of shader constants.
+ * During fragment program translation we may add additional
+ * constants to the array.
+ *
+ * We want to consider the situation where some user constants
+ * (ex: a material color) may change frequently but the shader program
+ * stays the same. In that case we should only be updating the first
+ * N constants, leaving any extras from shader translation alone.
+ */
+ if (buf) {
+ void *mapped;
+ if (buf->size &&
+ (mapped = ws->buffer_map(ws, buf->buffer,
+ PIPE_BUFFER_USAGE_CPU_READ))) {
+ memcpy(i915->current.constants[shader], mapped, buf->size);
+ ws->buffer_unmap(ws, buf->buffer);
+ i915->current.num_user_constants[shader]
+ = buf->size / (4 * sizeof(float));
+ }
+ else {
+ i915->current.num_user_constants[shader] = 0;
+ }
+ }
+
+ i915->dirty |= I915_NEW_CONSTANTS;
+}
+
+
+static void i915_set_sampler_textures(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_texture **texture)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ uint i;
+
+ assert(num <= PIPE_MAX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num == i915->num_textures &&
+ !memcmp(i915->texture, texture, num * sizeof(struct pipe_texture *)))
+ return;
+
+ /* Fixes wrong texture in texobj with VBUF */
+ draw_flush(i915->draw);
+
+ for (i = 0; i < num; i++)
+ pipe_texture_reference((struct pipe_texture **) &i915->texture[i],
+ texture[i]);
+
+ for (i = num; i < i915->num_textures; i++)
+ pipe_texture_reference((struct pipe_texture **) &i915->texture[i],
+ NULL);
+
+ i915->num_textures = num;
+
+ i915->dirty |= I915_NEW_TEXTURE;
+}
+
+
+
+static void i915_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ draw_flush(i915->draw);
+
+ i915->framebuffer = *fb; /* struct copy */
+
+ i915->dirty |= I915_NEW_FRAMEBUFFER;
+}
+
+
+
+static void i915_set_clip_state( struct pipe_context *pipe,
+ const struct pipe_clip_state *clip )
+{
+ struct i915_context *i915 = i915_context(pipe);
+ draw_flush(i915->draw);
+
+ draw_set_clip_state(i915->draw, clip);
+
+ i915->dirty |= I915_NEW_CLIP;
+}
+
+
+
+/* Called when driver state tracker notices changes to the viewport
+ * matrix:
+ */
+static void i915_set_viewport_state( struct pipe_context *pipe,
+ const struct pipe_viewport_state *viewport )
+{
+ struct i915_context *i915 = i915_context(pipe);
+
+ i915->viewport = *viewport; /* struct copy */
+
+ /* pass the viewport info to the draw module */
+ draw_set_viewport_state(i915->draw, &i915->viewport);
+
+ i915->dirty |= I915_NEW_VIEWPORT;
+}
+
+
+static void *
+i915_create_rasterizer_state(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *rasterizer)
+{
+ struct i915_rasterizer_state *cso = CALLOC_STRUCT( i915_rasterizer_state );
+
+ cso->templ = rasterizer;
+ cso->color_interp = rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
+ cso->light_twoside = rasterizer->light_twoside;
+ cso->ds[0].u = _3DSTATE_DEPTH_OFFSET_SCALE;
+ cso->ds[1].f = rasterizer->offset_scale;
+ if (rasterizer->poly_stipple_enable) {
+ cso->st |= ST1_ENABLE;
+ }
+
+ if (rasterizer->scissor)
+ cso->sc[0] = _3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT;
+ else
+ cso->sc[0] = _3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT;
+
+ switch (rasterizer->cull_mode) {
+ case PIPE_WINDING_NONE:
+ cso->LIS4 |= S4_CULLMODE_NONE;
+ break;
+ case PIPE_WINDING_CW:
+ cso->LIS4 |= S4_CULLMODE_CW;
+ break;
+ case PIPE_WINDING_CCW:
+ cso->LIS4 |= S4_CULLMODE_CCW;
+ break;
+ case PIPE_WINDING_BOTH:
+ cso->LIS4 |= S4_CULLMODE_BOTH;
+ break;
+ }
+
+ {
+ int line_width = CLAMP((int)(rasterizer->line_width * 2), 1, 0xf);
+
+ cso->LIS4 |= line_width << S4_LINE_WIDTH_SHIFT;
+
+ if (rasterizer->line_smooth)
+ cso->LIS4 |= S4_LINE_ANTIALIAS_ENABLE;
+ }
+
+ {
+ int point_size = CLAMP((int) rasterizer->point_size, 1, 0xff);
+
+ cso->LIS4 |= point_size << S4_POINT_WIDTH_SHIFT;
+ }
+
+ if (rasterizer->flatshade) {
+ cso->LIS4 |= (S4_FLATSHADE_ALPHA |
+ S4_FLATSHADE_COLOR |
+ S4_FLATSHADE_SPECULAR);
+ }
+
+ cso->LIS7 = fui( rasterizer->offset_units );
+
+
+ return cso;
+}
+
+static void i915_bind_rasterizer_state( struct pipe_context *pipe,
+ void *raster )
+{
+ struct i915_context *i915 = i915_context(pipe);
+
+ i915->rasterizer = (struct i915_rasterizer_state *)raster;
+
+ /* pass-through to draw module */
+ draw_set_rasterizer_state(i915->draw,
+ (i915->rasterizer ? i915->rasterizer->templ : NULL));
+
+ i915->dirty |= I915_NEW_RASTERIZER;
+}
+
+static void i915_delete_rasterizer_state(struct pipe_context *pipe,
+ void *raster)
+{
+ FREE(raster);
+}
+
+static void i915_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ /* Because we change state before the draw_set_vertex_buffers call
+ * we need a flush here, just to be sure.
+ */
+ draw_flush(i915->draw);
+
+ memcpy(i915->vertex_buffer, buffers, count * sizeof(buffers[0]));
+ i915->num_vertex_buffers = count;
+
+ /* pass-through to draw module */
+ draw_set_vertex_buffers(i915->draw, count, buffers);
+}
+
+static void i915_set_vertex_elements(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *elements)
+{
+ struct i915_context *i915 = i915_context(pipe);
+ /* Because we change state before the draw_set_vertex_buffers call
+ * we need a flush here, just to be sure.
+ */
+ draw_flush(i915->draw);
+
+ i915->num_vertex_elements = count;
+ /* pass-through to draw module */
+ draw_set_vertex_elements(i915->draw, count, elements);
+}
+
+
+static void i915_set_edgeflags(struct pipe_context *pipe,
+ const unsigned *bitfield)
+{
+ /* TODO do something here */
+}
+
+void
+i915_init_state_functions( struct i915_context *i915 )
+{
+ i915->pipe.set_edgeflags = i915_set_edgeflags;
+ i915->pipe.create_blend_state = i915_create_blend_state;
+ i915->pipe.bind_blend_state = i915_bind_blend_state;
+ i915->pipe.delete_blend_state = i915_delete_blend_state;
+
+ i915->pipe.create_sampler_state = i915_create_sampler_state;
+ i915->pipe.bind_sampler_states = i915_bind_sampler_states;
+ i915->pipe.delete_sampler_state = i915_delete_sampler_state;
+
+ i915->pipe.create_depth_stencil_alpha_state = i915_create_depth_stencil_state;
+ i915->pipe.bind_depth_stencil_alpha_state = i915_bind_depth_stencil_state;
+ i915->pipe.delete_depth_stencil_alpha_state = i915_delete_depth_stencil_state;
+
+ i915->pipe.create_rasterizer_state = i915_create_rasterizer_state;
+ i915->pipe.bind_rasterizer_state = i915_bind_rasterizer_state;
+ i915->pipe.delete_rasterizer_state = i915_delete_rasterizer_state;
+ i915->pipe.create_fs_state = i915_create_fs_state;
+ i915->pipe.bind_fs_state = i915_bind_fs_state;
+ i915->pipe.delete_fs_state = i915_delete_fs_state;
+ i915->pipe.create_vs_state = i915_create_vs_state;
+ i915->pipe.bind_vs_state = i915_bind_vs_state;
+ i915->pipe.delete_vs_state = i915_delete_vs_state;
+
+ i915->pipe.set_blend_color = i915_set_blend_color;
+ i915->pipe.set_clip_state = i915_set_clip_state;
+ i915->pipe.set_constant_buffer = i915_set_constant_buffer;
+ i915->pipe.set_framebuffer_state = i915_set_framebuffer_state;
+
+ i915->pipe.set_polygon_stipple = i915_set_polygon_stipple;
+ i915->pipe.set_scissor_state = i915_set_scissor_state;
+ i915->pipe.set_sampler_textures = i915_set_sampler_textures;
+ i915->pipe.set_viewport_state = i915_set_viewport_state;
+ i915->pipe.set_vertex_buffers = i915_set_vertex_buffers;
+ i915->pipe.set_vertex_elements = i915_set_vertex_elements;
+}
diff --git a/src/gallium/drivers/i915simple/i915_state.h b/src/gallium/drivers/i915simple/i915_state.h
new file mode 100644
index 0000000000..86c6b0027d
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_state.h
@@ -0,0 +1,50 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef I915_STATE_H
+#define I915_STATE_H
+
+struct i915_context;
+
+
+struct i915_tracked_state {
+ unsigned dirty;
+ void (*update)( struct i915_context * );
+};
+
+void i915_update_immediate( struct i915_context *i915 );
+void i915_update_dynamic( struct i915_context *i915 );
+void i915_update_derived( struct i915_context *i915 );
+void i915_update_samplers( struct i915_context *i915 );
+void i915_update_textures(struct i915_context *i915);
+
+void i915_emit_hardware_state( struct i915_context *i915 );
+
+#endif
diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c
new file mode 100644
index 0000000000..178d4e8781
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_state_derived.c
@@ -0,0 +1,183 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_memory.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw/draw_context.h"
+#include "draw/draw_vertex.h"
+#include "i915_context.h"
+#include "i915_state.h"
+#include "i915_reg.h"
+#include "i915_fpc.h"
+
+
+
+/**
+ * Determine the hardware vertex layout.
+ * Depends on vertex/fragment shader state.
+ */
+static void calculate_vertex_layout( struct i915_context *i915 )
+{
+ const struct i915_fragment_shader *fs = i915->fs;
+ const enum interp_mode colorInterp = i915->rasterizer->color_interp;
+ struct vertex_info vinfo;
+ boolean texCoords[8], colors[2], fog, needW;
+ uint i;
+ int src;
+
+ memset(texCoords, 0, sizeof(texCoords));
+ colors[0] = colors[1] = fog = needW = FALSE;
+ memset(&vinfo, 0, sizeof(vinfo));
+
+ /* Determine which fragment program inputs are needed. Setup HW vertex
+ * layout below, in the HW-specific attribute order.
+ */
+ for (i = 0; i < fs->info.num_inputs; i++) {
+ switch (fs->info.input_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ assert(fs->info.input_semantic_index[i] < 2);
+ colors[fs->info.input_semantic_index[i]] = TRUE;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ /* usually a texcoord */
+ {
+ const uint unit = fs->info.input_semantic_index[i];
+ assert(unit < 8);
+ texCoords[unit] = TRUE;
+ needW = TRUE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ fog = TRUE;
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+
+ /* pos */
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0);
+ if (needW) {
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src);
+ vinfo.hwfmt[0] |= S4_VFMT_XYZW;
+ vinfo.attrib[0].emit = EMIT_4F;
+ }
+ else {
+ draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src);
+ vinfo.hwfmt[0] |= S4_VFMT_XYZ;
+ vinfo.attrib[0].emit = EMIT_3F;
+ }
+
+ /* hardware point size */
+ /* XXX todo */
+
+ /* primary color */
+ if (colors[0]) {
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0);
+ draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src);
+ vinfo.hwfmt[0] |= S4_VFMT_COLOR;
+ }
+
+ /* secondary color */
+ if (colors[1]) {
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1);
+ draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src);
+ vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG;
+ }
+
+ /* fog coord, not fog blend factor */
+ if (fog) {
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0);
+ draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
+ vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM;
+ }
+
+ /* texcoords */
+ for (i = 0; i < 8; i++) {
+ uint hwtc;
+ if (texCoords[i]) {
+ hwtc = TEXCOORDFMT_4D;
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i);
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ }
+ else {
+ hwtc = TEXCOORDFMT_NOT_PRESENT;
+ }
+ vinfo.hwfmt[1] |= hwtc << (i * 4);
+ }
+
+ draw_compute_vertex_size(&vinfo);
+
+ if (memcmp(&i915->current.vertex_info, &vinfo, sizeof(vinfo))) {
+ /* Need to set this flag so that the LIS2/4 registers get set.
+ * It also means the i915_update_immediate() function must be called
+ * after this one, in i915_update_derived().
+ */
+ i915->dirty |= I915_NEW_VERTEX_FORMAT;
+
+ memcpy(&i915->current.vertex_info, &vinfo, sizeof(vinfo));
+ }
+}
+
+
+
+
+/* Hopefully this will remain quite simple, otherwise need to pull in
+ * something like the state tracker mechanism.
+ */
+void i915_update_derived( struct i915_context *i915 )
+{
+ if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS))
+ calculate_vertex_layout( i915 );
+
+ if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_TEXTURE))
+ i915_update_samplers(i915);
+
+ if (i915->dirty & I915_NEW_TEXTURE)
+ i915_update_textures(i915);
+
+ if (i915->dirty)
+ i915_update_immediate( i915 );
+
+ if (i915->dirty)
+ i915_update_dynamic( i915 );
+
+ if (i915->dirty & I915_NEW_FS) {
+ i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */
+ }
+
+ /* HW emit currently references framebuffer state directly:
+ */
+ if (i915->dirty & I915_NEW_FRAMEBUFFER)
+ i915->hardware_dirty |= I915_HW_STATIC;
+
+ i915->dirty = 0;
+}
diff --git a/src/gallium/drivers/i915simple/i915_state_dynamic.c b/src/gallium/drivers/i915simple/i915_state_dynamic.c
new file mode 100644
index 0000000000..86126a5a15
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_state_dynamic.c
@@ -0,0 +1,310 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "i915_batch.h"
+#include "i915_state_inlines.h"
+#include "i915_context.h"
+#include "i915_reg.h"
+#include "i915_state.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_pack_color.h"
+
+#define FILE_DEBUG_FLAG DEBUG_STATE
+
+/* State that we have chosen to store in the DYNAMIC segment of the
+ * i915 indirect state mechanism.
+ *
+ * Can't cache these in the way we do the static state, as there is no
+ * start/size in the command packet, instead an 'end' value that gets
+ * incremented.
+ *
+ * Additionally, there seems to be a requirement to re-issue the full
+ * (active) state every time a 4kb boundary is crossed.
+ */
+
+static INLINE void set_dynamic_indirect( struct i915_context *i915,
+ unsigned offset,
+ const unsigned *src,
+ unsigned dwords )
+{
+ unsigned i;
+
+ for (i = 0; i < dwords; i++)
+ i915->current.dynamic[offset + i] = src[i];
+
+ i915->hardware_dirty |= I915_HW_DYNAMIC;
+}
+
+
+/***********************************************************************
+ * Modes4: stencil masks and logicop
+ */
+static void upload_MODES4( struct i915_context *i915 )
+{
+ unsigned modes4 = 0;
+
+ /* I915_NEW_STENCIL */
+ modes4 |= i915->depth_stencil->stencil_modes4;
+ /* I915_NEW_BLEND */
+ modes4 |= i915->blend->modes4;
+
+ /* Always, so that we know when state is in-active:
+ */
+ set_dynamic_indirect( i915,
+ I915_DYNAMIC_MODES4,
+ &modes4,
+ 1 );
+}
+
+const struct i915_tracked_state i915_upload_MODES4 = {
+ I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL,
+ upload_MODES4
+};
+
+
+
+
+/***********************************************************************
+ */
+
+static void upload_BFO( struct i915_context *i915 )
+{
+ set_dynamic_indirect( i915,
+ I915_DYNAMIC_BFO_0,
+ &(i915->depth_stencil->bfo[0]),
+ 2 );
+}
+
+const struct i915_tracked_state i915_upload_BFO = {
+ I915_NEW_DEPTH_STENCIL,
+ upload_BFO
+};
+
+
+/***********************************************************************
+ */
+
+
+static void upload_BLENDCOLOR( struct i915_context *i915 )
+{
+ unsigned bc[2];
+
+ memset( bc, 0, sizeof(bc) );
+
+ /* I915_NEW_BLEND {_COLOR}
+ */
+ {
+ const float *color = i915->blend_color.color;
+
+ bc[0] = _3DSTATE_CONST_BLEND_COLOR_CMD;
+ bc[1] = pack_ui32_float4( color[0],
+ color[1],
+ color[2],
+ color[3] );
+ }
+
+ set_dynamic_indirect( i915,
+ I915_DYNAMIC_BC_0,
+ bc,
+ 2 );
+}
+
+const struct i915_tracked_state i915_upload_BLENDCOLOR = {
+ I915_NEW_BLEND,
+ upload_BLENDCOLOR
+};
+
+/***********************************************************************
+ */
+
+
+static void upload_IAB( struct i915_context *i915 )
+{
+ unsigned iab = i915->blend->iab;
+
+
+ set_dynamic_indirect( i915,
+ I915_DYNAMIC_IAB,
+ &iab,
+ 1 );
+}
+
+const struct i915_tracked_state i915_upload_IAB = {
+ I915_NEW_BLEND,
+ upload_IAB
+};
+
+
+/***********************************************************************
+ */
+
+
+
+static void upload_DEPTHSCALE( struct i915_context *i915 )
+{
+ set_dynamic_indirect( i915,
+ I915_DYNAMIC_DEPTHSCALE_0,
+ &(i915->rasterizer->ds[0].u),
+ 2 );
+}
+
+const struct i915_tracked_state i915_upload_DEPTHSCALE = {
+ I915_NEW_RASTERIZER,
+ upload_DEPTHSCALE
+};
+
+
+
+/***********************************************************************
+ * Polygon stipple
+ *
+ * The i915 supports a 4x4 stipple natively, GL wants 32x32.
+ * Fortunately stipple is usually a repeating pattern.
+ *
+ * XXX: does stipple pattern need to be adjusted according to
+ * the window position?
+ *
+ * XXX: possibly need workaround for conform paths test.
+ */
+
+static void upload_STIPPLE( struct i915_context *i915 )
+{
+ unsigned st[2];
+
+ st[0] = _3DSTATE_STIPPLE;
+ st[1] = 0;
+
+ /* I915_NEW_RASTERIZER
+ */
+ st[1] |= i915->rasterizer->st;
+
+
+ /* I915_NEW_STIPPLE
+ */
+ {
+ const ubyte *mask = (const ubyte *)i915->poly_stipple.stipple;
+ ubyte p[4];
+
+ p[0] = mask[12] & 0xf;
+ p[1] = mask[8] & 0xf;
+ p[2] = mask[4] & 0xf;
+ p[3] = mask[0] & 0xf;
+
+ /* Not sure what to do about fallbacks, so for now just dont:
+ */
+ st[1] |= ((p[0] << 0) |
+ (p[1] << 4) |
+ (p[2] << 8) |
+ (p[3] << 12));
+ }
+
+
+ set_dynamic_indirect( i915,
+ I915_DYNAMIC_STP_0,
+ &st[0],
+ 2 );
+}
+
+
+const struct i915_tracked_state i915_upload_STIPPLE = {
+ I915_NEW_RASTERIZER | I915_NEW_STIPPLE,
+ upload_STIPPLE
+};
+
+
+
+/***********************************************************************
+ * Scissor.
+ */
+static void upload_SCISSOR_ENABLE( struct i915_context *i915 )
+{
+ set_dynamic_indirect( i915,
+ I915_DYNAMIC_SC_ENA_0,
+ &(i915->rasterizer->sc[0]),
+ 1 );
+}
+
+const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = {
+ I915_NEW_RASTERIZER,
+ upload_SCISSOR_ENABLE
+};
+
+
+
+static void upload_SCISSOR_RECT( struct i915_context *i915 )
+{
+ unsigned x1 = i915->scissor.minx;
+ unsigned y1 = i915->scissor.miny;
+ unsigned x2 = i915->scissor.maxx;
+ unsigned y2 = i915->scissor.maxy;
+ unsigned sc[3];
+
+ sc[0] = _3DSTATE_SCISSOR_RECT_0_CMD;
+ sc[1] = (y1 << 16) | (x1 & 0xffff);
+ sc[2] = (y2 << 16) | (x2 & 0xffff);
+
+ set_dynamic_indirect( i915,
+ I915_DYNAMIC_SC_RECT_0,
+ &sc[0],
+ 3 );
+}
+
+
+const struct i915_tracked_state i915_upload_SCISSOR_RECT = {
+ I915_NEW_SCISSOR,
+ upload_SCISSOR_RECT
+};
+
+
+
+
+
+
+static const struct i915_tracked_state *atoms[] = {
+ &i915_upload_MODES4,
+ &i915_upload_BFO,
+ &i915_upload_BLENDCOLOR,
+ &i915_upload_IAB,
+ &i915_upload_DEPTHSCALE,
+ &i915_upload_STIPPLE,
+ &i915_upload_SCISSOR_ENABLE,
+ &i915_upload_SCISSOR_RECT
+};
+
+/* These will be dynamic indirect state commands, but for now just end
+ * up on the batch buffer with everything else.
+ */
+void i915_update_dynamic( struct i915_context *i915 )
+{
+ int i;
+
+ for (i = 0; i < Elements(atoms); i++)
+ if (i915->dirty & atoms[i]->dirty)
+ atoms[i]->update( i915 );
+}
+
diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c
new file mode 100644
index 0000000000..9bd6f92323
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_state_emit.c
@@ -0,0 +1,402 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_winsys.h"
+#include "i915_batch.h"
+#include "i915_reg.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+
+static unsigned translate_format( enum pipe_format format )
+{
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return COLOR_BUF_ARGB8888;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ return COLOR_BUF_RGB565;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static unsigned translate_depth_format( enum pipe_format zformat )
+{
+ switch (zformat) {
+ case PIPE_FORMAT_S8Z24_UNORM:
+ return DEPTH_FRMT_24_FIXED_8_OTHER;
+ case PIPE_FORMAT_Z16_UNORM:
+ return DEPTH_FRMT_16_FIXED;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+
+/**
+ * Examine framebuffer state to determine width, height.
+ */
+static boolean
+framebuffer_size(const struct pipe_framebuffer_state *fb,
+ uint *width, uint *height)
+{
+ if (fb->cbufs[0]) {
+ *width = fb->cbufs[0]->width;
+ *height = fb->cbufs[0]->height;
+ return TRUE;
+ }
+ else if (fb->zsbuf) {
+ *width = fb->zsbuf->width;
+ *height = fb->zsbuf->height;
+ return TRUE;
+ }
+ else {
+ *width = *height = 0;
+ return FALSE;
+ }
+}
+
+
+/* Push the state into the sarea and/or texture memory.
+ */
+void
+i915_emit_hardware_state(struct i915_context *i915 )
+{
+ /* XXX: there must be an easier way */
+ const unsigned dwords = ( 14 +
+ 7 +
+ I915_MAX_DYNAMIC +
+ 8 +
+ 2 + I915_TEX_UNITS*3 +
+ 2 + I915_TEX_UNITS*3 +
+ 2 + I915_MAX_CONSTANT*4 +
+#if 0
+ i915->current.program_len +
+#else
+ i915->fs->program_len +
+#endif
+ 6
+ ) * 3/2; /* plus 50% margin */
+ const unsigned relocs = ( I915_TEX_UNITS +
+ 3
+ ) * 3/2; /* plus 50% margin */
+
+#if 0
+ debug_printf("i915_emit_hardware_state: %d dwords, %d relocs\n", dwords, relocs);
+#endif
+
+ if(!BEGIN_BATCH(dwords, relocs)) {
+ FLUSH_BATCH(NULL);
+ assert(BEGIN_BATCH(dwords, relocs));
+ }
+
+ /* 14 dwords, 0 relocs */
+ if (i915->hardware_dirty & I915_HW_INVARIENT)
+ {
+ OUT_BATCH(_3DSTATE_AA_CMD |
+ AA_LINE_ECAAR_WIDTH_ENABLE |
+ AA_LINE_ECAAR_WIDTH_1_0 |
+ AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
+
+ OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
+ OUT_BATCH(0);
+
+ OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
+ OUT_BATCH(0);
+
+ OUT_BATCH(_3DSTATE_DFLT_Z_CMD);
+ OUT_BATCH(0);
+
+ OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
+ CSB_TCB(0, 0) |
+ CSB_TCB(1, 1) |
+ CSB_TCB(2, 2) |
+ CSB_TCB(3, 3) |
+ CSB_TCB(4, 4) |
+ CSB_TCB(5, 5) |
+ CSB_TCB(6, 6) |
+ CSB_TCB(7, 7));
+
+ OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
+ ENABLE_POINT_RASTER_RULE |
+ OGL_POINT_RASTER_RULE |
+ ENABLE_LINE_STRIP_PROVOKE_VRTX |
+ ENABLE_TRI_FAN_PROVOKE_VRTX |
+ LINE_STRIP_PROVOKE_VRTX(1) |
+ TRI_FAN_PROVOKE_VRTX(2) |
+ ENABLE_TEXKILL_3D_4D |
+ TEXKILL_4D);
+
+ /* Need to initialize this to zero.
+ */
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0));
+ OUT_BATCH(0);
+
+ OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
+
+ /* disable indirect state for now
+ */
+ OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0);
+ OUT_BATCH(0);
+ }
+
+ /* 7 dwords, 1 relocs */
+ if (i915->hardware_dirty & I915_HW_IMMEDIATE)
+ {
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+ I1_LOAD_S(0) |
+ I1_LOAD_S(1) |
+ I1_LOAD_S(2) |
+ I1_LOAD_S(4) |
+ I1_LOAD_S(5) |
+ I1_LOAD_S(6) |
+ (5));
+
+ if(i915->vbo)
+ OUT_RELOC(i915->vbo,
+ I915_BUFFER_ACCESS_READ,
+ i915->current.immediate[I915_IMMEDIATE_S0]);
+ else
+ /* FIXME: we should not do this */
+ OUT_BATCH(0);
+ OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S1]);
+ OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S2]);
+ OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S4]);
+ OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]);
+ OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]);
+ }
+
+ /* I915_MAX_DYNAMIC dwords, 0 relocs */
+ if (i915->hardware_dirty & I915_HW_DYNAMIC)
+ {
+ int i;
+ for (i = 0; i < I915_MAX_DYNAMIC; i++) {
+ OUT_BATCH(i915->current.dynamic[i]);
+ }
+ }
+
+ /* 8 dwords, 2 relocs */
+ if (i915->hardware_dirty & I915_HW_STATIC)
+ {
+ struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
+ struct pipe_surface *depth_surface = i915->framebuffer.zsbuf;
+
+ if (cbuf_surface) {
+ unsigned cpitch = cbuf_surface->stride;
+ unsigned ctile = BUF_3D_USE_FENCE;
+ if (cbuf_surface->texture &&
+ ((struct i915_texture*)(cbuf_surface->texture))->tiled) {
+ ctile = BUF_3D_TILED_SURFACE;
+ }
+
+ OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
+
+ OUT_BATCH(BUF_3D_ID_COLOR_BACK |
+ BUF_3D_PITCH(cpitch) | /* pitch in bytes */
+ ctile);
+
+ OUT_RELOC(cbuf_surface->buffer,
+ I915_BUFFER_ACCESS_WRITE,
+ cbuf_surface->offset);
+ }
+
+ /* What happens if no zbuf??
+ */
+ if (depth_surface) {
+ unsigned zpitch = depth_surface->stride;
+ unsigned ztile = BUF_3D_USE_FENCE;
+ if (depth_surface->texture &&
+ ((struct i915_texture*)(depth_surface->texture))->tiled) {
+ ztile = BUF_3D_TILED_SURFACE;
+ }
+
+ OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
+
+ OUT_BATCH(BUF_3D_ID_DEPTH |
+ BUF_3D_PITCH(zpitch) | /* pitch in bytes */
+ ztile);
+
+ OUT_RELOC(depth_surface->buffer,
+ I915_BUFFER_ACCESS_WRITE,
+ depth_surface->offset);
+ }
+
+ {
+ unsigned cformat, zformat = 0;
+
+ if (cbuf_surface)
+ cformat = cbuf_surface->format;
+ else
+ cformat = PIPE_FORMAT_A8R8G8B8_UNORM; /* arbitrary */
+ cformat = translate_format(cformat);
+
+ if (depth_surface)
+ zformat = translate_depth_format( i915->framebuffer.zsbuf->format );
+
+ OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
+ OUT_BATCH(DSTORG_HORT_BIAS(0x8) | /* .5 */
+ DSTORG_VERT_BIAS(0x8) | /* .5 */
+ LOD_PRECLAMP_OGL |
+ TEX_DEFAULT_COLOR_OGL |
+ cformat |
+ zformat );
+ }
+ }
+
+#if 01
+ /* texture images */
+ /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */
+ if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER))
+ {
+ const uint nr = i915->current.sampler_enable_nr;
+ if (nr) {
+ const uint enabled = i915->current.sampler_enable_flags;
+ uint unit;
+ uint count = 0;
+ OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
+ OUT_BATCH(enabled);
+ for (unit = 0; unit < I915_TEX_UNITS; unit++) {
+ if (enabled & (1 << unit)) {
+ struct pipe_buffer *buf =
+ i915->texture[unit]->buffer;
+ uint offset = 0;
+ assert(buf);
+
+ count++;
+
+ OUT_RELOC(buf,
+ I915_BUFFER_ACCESS_READ,
+ offset);
+ OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */
+ OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */
+ }
+ }
+ assert(count == nr);
+ }
+ }
+#endif
+
+#if 01
+ /* samplers */
+ /* 2 + I915_TEX_UNITS*3 dwords, 0 relocs */
+ if (i915->hardware_dirty & I915_HW_SAMPLER)
+ {
+ if (i915->current.sampler_enable_nr) {
+ int i;
+
+ OUT_BATCH( _3DSTATE_SAMPLER_STATE |
+ (3 * i915->current.sampler_enable_nr) );
+
+ OUT_BATCH( i915->current.sampler_enable_flags );
+
+ for (i = 0; i < I915_TEX_UNITS; i++) {
+ if (i915->current.sampler_enable_flags & (1<<i)) {
+ OUT_BATCH( i915->current.sampler[i][0] );
+ OUT_BATCH( i915->current.sampler[i][1] );
+ OUT_BATCH( i915->current.sampler[i][2] );
+ }
+ }
+ }
+ }
+#endif
+
+ /* constants */
+ /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */
+ if (i915->hardware_dirty & I915_HW_PROGRAM)
+ {
+ /* Collate the user-defined constants with the fragment shader's
+ * immediates according to the constant_flags[] array.
+ */
+ const uint nr = i915->fs->num_constants;
+ if (nr) {
+ uint i;
+
+ OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) );
+ OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) );
+
+ for (i = 0; i < nr; i++) {
+ const uint *c;
+ if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
+ /* grab user-defined constant */
+ c = (uint *) i915->current.constants[PIPE_SHADER_FRAGMENT][i];
+ }
+ else {
+ /* emit program constant */
+ c = (uint *) i915->fs->constants[i];
+ }
+#if 0 /* debug */
+ {
+ float *f = (float *) c;
+ printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
+ (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
+ ? "user" : "immediate"));
+ }
+#endif
+ OUT_BATCH(*c++);
+ OUT_BATCH(*c++);
+ OUT_BATCH(*c++);
+ OUT_BATCH(*c++);
+ }
+ }
+ }
+
+ /* Fragment program */
+ /* i915->current.program_len dwords, 0 relocs */
+ if (i915->hardware_dirty & I915_HW_PROGRAM)
+ {
+ uint i;
+ /* we should always have, at least, a pass-through program */
+ assert(i915->fs->program_len > 0);
+ for (i = 0; i < i915->fs->program_len; i++) {
+ OUT_BATCH(i915->fs->program[i]);
+ }
+ }
+
+ /* drawing surface size */
+ /* 6 dwords, 0 relocs */
+ {
+ uint w, h;
+ boolean k = framebuffer_size(&i915->framebuffer, &w, &h);
+ (void)k;
+ assert(k);
+
+ OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(((w - 1) & 0xffff) | ((h - 1) << 16));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ }
+
+
+ i915->hardware_dirty = 0;
+}
diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915simple/i915_state_immediate.c
new file mode 100644
index 0000000000..8c16bb4e27
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_state_immediate.c
@@ -0,0 +1,225 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "i915_state_inlines.h"
+#include "i915_context.h"
+#include "i915_state.h"
+#include "i915_reg.h"
+#include "util/u_memory.h"
+
+
+/* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet.
+ * Would like to opportunistically recombine all these fragments into
+ * a single packet containing only what has changed, but for now emit
+ * as multiple packets.
+ */
+
+
+
+
+/***********************************************************************
+ * S0,S1: Vertex buffer state.
+ */
+static void upload_S0S1(struct i915_context *i915)
+{
+ unsigned LIS0, LIS1;
+
+ /* INTEL_NEW_VBO */
+ /* TODO: re-use vertex buffers here? */
+ LIS0 = i915->vbo_offset;
+
+ /* INTEL_NEW_VERTEX_SIZE -- do this where the vertex size is calculated!
+ */
+ {
+ unsigned vertex_size = i915->current.vertex_info.size;
+
+ LIS1 = ((vertex_size << 24) |
+ (vertex_size << 16));
+ }
+
+ /* INTEL_NEW_VBO */
+ /* TODO: use a vertex generation number to track vbo changes */
+ if (1 ||
+ i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 ||
+ i915->current.immediate[I915_IMMEDIATE_S1] != LIS1)
+ {
+ i915->current.immediate[I915_IMMEDIATE_S0] = LIS0;
+ i915->current.immediate[I915_IMMEDIATE_S1] = LIS1;
+ i915->hardware_dirty |= I915_HW_IMMEDIATE;
+ }
+}
+
+const struct i915_tracked_state i915_upload_S0S1 = {
+ I915_NEW_VBO | I915_NEW_VERTEX_FORMAT,
+ upload_S0S1
+};
+
+
+
+
+/***********************************************************************
+ * S4: Vertex format, rasterization state
+ */
+static void upload_S2S4(struct i915_context *i915)
+{
+ unsigned LIS2, LIS4;
+
+ /* I915_NEW_VERTEX_FORMAT */
+ {
+ LIS2 = i915->current.vertex_info.hwfmt[1];
+ LIS4 = i915->current.vertex_info.hwfmt[0];
+ /*
+ debug_printf("LIS2: 0x%x LIS4: 0x%x\n", LIS2, LIS4);
+ */
+ assert(LIS4); /* should never be zero? */
+ }
+
+ LIS4 |= i915->rasterizer->LIS4;
+
+ if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] ||
+ LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) {
+
+ i915->current.immediate[I915_IMMEDIATE_S2] = LIS2;
+ i915->current.immediate[I915_IMMEDIATE_S4] = LIS4;
+ i915->hardware_dirty |= I915_HW_IMMEDIATE;
+ }
+}
+
+
+const struct i915_tracked_state i915_upload_S2S4 = {
+ I915_NEW_RASTERIZER | I915_NEW_VERTEX_FORMAT,
+ upload_S2S4
+};
+
+
+
+/***********************************************************************
+ *
+ */
+static void upload_S5( struct i915_context *i915 )
+{
+ unsigned LIS5 = 0;
+
+ LIS5 |= i915->depth_stencil->stencil_LIS5;
+
+ LIS5 |= i915->blend->LIS5;
+
+#if 0
+ /* I915_NEW_RASTERIZER */
+ if (i915->state.Polygon->OffsetFill) {
+ LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE;
+ }
+#endif
+
+
+ if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) {
+ i915->current.immediate[I915_IMMEDIATE_S5] = LIS5;
+ i915->hardware_dirty |= I915_HW_IMMEDIATE;
+ }
+}
+
+const struct i915_tracked_state i915_upload_S5 = {
+ (I915_NEW_DEPTH_STENCIL | I915_NEW_BLEND | I915_NEW_RASTERIZER),
+ upload_S5
+};
+
+
+/***********************************************************************
+ */
+static void upload_S6( struct i915_context *i915 )
+{
+ unsigned LIS6 = (2 << S6_TRISTRIP_PV_SHIFT);
+
+ /* I915_NEW_FRAMEBUFFER
+ */
+ if (i915->framebuffer.cbufs[0])
+ LIS6 |= S6_COLOR_WRITE_ENABLE;
+
+ /* I915_NEW_BLEND
+ */
+ LIS6 |= i915->blend->LIS6;
+
+ /* I915_NEW_DEPTH
+ */
+ LIS6 |= i915->depth_stencil->depth_LIS6;
+
+ if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) {
+ i915->current.immediate[I915_IMMEDIATE_S6] = LIS6;
+ i915->hardware_dirty |= I915_HW_IMMEDIATE;
+ }
+}
+
+const struct i915_tracked_state i915_upload_S6 = {
+ I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER,
+ upload_S6
+};
+
+
+/***********************************************************************
+ */
+static void upload_S7( struct i915_context *i915 )
+{
+ unsigned LIS7;
+
+ /* I915_NEW_RASTERIZER
+ */
+ LIS7 = i915->rasterizer->LIS7;
+
+ if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) {
+ i915->current.immediate[I915_IMMEDIATE_S7] = LIS7;
+ i915->hardware_dirty |= I915_HW_IMMEDIATE;
+ }
+}
+
+const struct i915_tracked_state i915_upload_S7 = {
+ I915_NEW_RASTERIZER,
+ upload_S7
+};
+
+
+static const struct i915_tracked_state *atoms[] = {
+ &i915_upload_S0S1,
+ &i915_upload_S2S4,
+ &i915_upload_S5,
+ &i915_upload_S6,
+ &i915_upload_S7
+};
+
+/*
+ */
+void i915_update_immediate( struct i915_context *i915 )
+{
+ int i;
+
+ for (i = 0; i < Elements(atoms); i++)
+ if (i915->dirty & atoms[i]->dirty)
+ atoms[i]->update( i915 );
+}
diff --git a/src/gallium/drivers/i915simple/i915_state_inlines.h b/src/gallium/drivers/i915simple/i915_state_inlines.h
new file mode 100644
index 0000000000..378de8f9c4
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_state_inlines.h
@@ -0,0 +1,230 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef I915_STATE_INLINES_H
+#define I915_STATE_INLINES_H
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_defines.h"
+#include "i915_reg.h"
+
+
+static INLINE unsigned
+i915_translate_compare_func(unsigned func)
+{
+ switch (func) {
+ case PIPE_FUNC_NEVER:
+ return COMPAREFUNC_NEVER;
+ case PIPE_FUNC_LESS:
+ return COMPAREFUNC_LESS;
+ case PIPE_FUNC_LEQUAL:
+ return COMPAREFUNC_LEQUAL;
+ case PIPE_FUNC_GREATER:
+ return COMPAREFUNC_GREATER;
+ case PIPE_FUNC_GEQUAL:
+ return COMPAREFUNC_GEQUAL;
+ case PIPE_FUNC_NOTEQUAL:
+ return COMPAREFUNC_NOTEQUAL;
+ case PIPE_FUNC_EQUAL:
+ return COMPAREFUNC_EQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return COMPAREFUNC_ALWAYS;
+ default:
+ return COMPAREFUNC_ALWAYS;
+ }
+}
+
+static INLINE unsigned
+i915_translate_stencil_op(unsigned op)
+{
+ switch (op) {
+ case PIPE_STENCIL_OP_KEEP:
+ return STENCILOP_KEEP;
+ case PIPE_STENCIL_OP_ZERO:
+ return STENCILOP_ZERO;
+ case PIPE_STENCIL_OP_REPLACE:
+ return STENCILOP_REPLACE;
+ case PIPE_STENCIL_OP_INCR:
+ return STENCILOP_INCRSAT;
+ case PIPE_STENCIL_OP_DECR:
+ return STENCILOP_DECRSAT;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ return STENCILOP_INCR;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ return STENCILOP_DECR;
+ case PIPE_STENCIL_OP_INVERT:
+ return STENCILOP_INVERT;
+ default:
+ return STENCILOP_ZERO;
+ }
+}
+
+static INLINE unsigned
+i915_translate_blend_factor(unsigned factor)
+{
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ return BLENDFACT_ZERO;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return BLENDFACT_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_ONE:
+ return BLENDFACT_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return BLENDFACT_SRC_COLR;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return BLENDFACT_INV_SRC_COLR;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return BLENDFACT_DST_COLR;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return BLENDFACT_INV_DST_COLR;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return BLENDFACT_INV_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return BLENDFACT_DST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return BLENDFACT_INV_DST_ALPHA;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return BLENDFACT_SRC_ALPHA_SATURATE;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return BLENDFACT_CONST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return BLENDFACT_INV_CONST_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return BLENDFACT_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return BLENDFACT_INV_CONST_ALPHA;
+ default:
+ return BLENDFACT_ZERO;
+ }
+}
+
+static INLINE unsigned
+i915_translate_blend_func(unsigned mode)
+{
+ switch (mode) {
+ case PIPE_BLEND_ADD:
+ return BLENDFUNC_ADD;
+ case PIPE_BLEND_MIN:
+ return BLENDFUNC_MIN;
+ case PIPE_BLEND_MAX:
+ return BLENDFUNC_MAX;
+ case PIPE_BLEND_SUBTRACT:
+ return BLENDFUNC_SUBTRACT;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return BLENDFUNC_REVERSE_SUBTRACT;
+ default:
+ return 0;
+ }
+}
+
+
+static INLINE unsigned
+i915_translate_logic_op(unsigned opcode)
+{
+ switch (opcode) {
+ case PIPE_LOGICOP_CLEAR:
+ return LOGICOP_CLEAR;
+ case PIPE_LOGICOP_AND:
+ return LOGICOP_AND;
+ case PIPE_LOGICOP_AND_REVERSE:
+ return LOGICOP_AND_RVRSE;
+ case PIPE_LOGICOP_COPY:
+ return LOGICOP_COPY;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ return LOGICOP_COPY_INV;
+ case PIPE_LOGICOP_AND_INVERTED:
+ return LOGICOP_AND_INV;
+ case PIPE_LOGICOP_NOOP:
+ return LOGICOP_NOOP;
+ case PIPE_LOGICOP_XOR:
+ return LOGICOP_XOR;
+ case PIPE_LOGICOP_OR:
+ return LOGICOP_OR;
+ case PIPE_LOGICOP_OR_INVERTED:
+ return LOGICOP_OR_INV;
+ case PIPE_LOGICOP_NOR:
+ return LOGICOP_NOR;
+ case PIPE_LOGICOP_EQUIV:
+ return LOGICOP_EQUIV;
+ case PIPE_LOGICOP_INVERT:
+ return LOGICOP_INV;
+ case PIPE_LOGICOP_OR_REVERSE:
+ return LOGICOP_OR_RVRSE;
+ case PIPE_LOGICOP_NAND:
+ return LOGICOP_NAND;
+ case PIPE_LOGICOP_SET:
+ return LOGICOP_SET;
+ default:
+ return LOGICOP_SET;
+ }
+}
+
+
+
+static INLINE boolean i915_validate_vertices( unsigned hw_prim, unsigned nr )
+{
+ boolean ok;
+
+ switch (hw_prim) {
+ case PRIM3D_POINTLIST:
+ ok = (nr >= 1);
+ assert(ok);
+ break;
+ case PRIM3D_LINELIST:
+ ok = (nr >= 2) && (nr % 2) == 0;
+ assert(ok);
+ break;
+ case PRIM3D_LINESTRIP:
+ ok = (nr >= 2);
+ assert(ok);
+ break;
+ case PRIM3D_TRILIST:
+ ok = (nr >= 3) && (nr % 3) == 0;
+ assert(ok);
+ break;
+ case PRIM3D_TRISTRIP:
+ ok = (nr >= 3);
+ assert(ok);
+ break;
+ case PRIM3D_TRIFAN:
+ ok = (nr >= 3);
+ assert(ok);
+ break;
+ case PRIM3D_POLY:
+ ok = (nr >= 3);
+ assert(ok);
+ break;
+ default:
+ assert(0);
+ ok = 0;
+ break;
+ }
+
+ return ok;
+}
+
+#endif
diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c
new file mode 100644
index 0000000000..c09c10601b
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_state_sampler.c
@@ -0,0 +1,299 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_memory.h"
+
+#include "i915_state_inlines.h"
+#include "i915_context.h"
+#include "i915_reg.h"
+#include "i915_state.h"
+
+
+/*
+ * A note about min_lod & max_lod.
+ *
+ * There is a circular dependancy between the sampler state
+ * and the map state to be submitted to hw.
+ *
+ * Two condition must be meet:
+ * min_lod =< max_lod == true
+ * max_lod =< last_level == true
+ *
+ *
+ * This is all fine and dandy if it where for the fact that max_lod
+ * is set on the map state instead of the sampler state. That is
+ * the max_lod we submit on map is:
+ * max_lod = MIN2(last_level, max_lod);
+ *
+ * So we need to update the map state when we change samplers and
+ * we need to be change the sampler state when map state is changed.
+ * The first part is done by calling i915_update_texture in
+ * i915_update_samplers and the second part is done else where in
+ * code tracking the state changes.
+ */
+
+static void
+i915_update_texture(struct i915_context *i915,
+ uint unit,
+ const struct i915_texture *tex,
+ const struct i915_sampler_state *sampler,
+ uint state[6]);
+/**
+ * Compute i915 texture sampling state.
+ *
+ * Recalculate all state from scratch. Perhaps not the most
+ * efficient, but this has gotten complex enough that we need
+ * something which is understandable and reliable.
+ * \param state returns the 3 words of compute state
+ */
+static void update_sampler(struct i915_context *i915,
+ uint unit,
+ const struct i915_sampler_state *sampler,
+ const struct i915_texture *tex,
+ unsigned state[3] )
+{
+ const struct pipe_texture *pt = &tex->base;
+ unsigned minlod, lastlod;
+
+ /* Need to do this after updating the maps, which call the
+ * intel_finalize_mipmap_tree and hence can update firstLevel:
+ */
+ state[0] = sampler->state[0];
+ state[1] = sampler->state[1];
+ state[2] = sampler->state[2];
+
+ if (pt->format == PIPE_FORMAT_YCBCR ||
+ pt->format == PIPE_FORMAT_YCBCR_REV)
+ state[0] |= SS2_COLORSPACE_CONVERSION;
+
+ /* 3D textures don't seem to respect the border color.
+ * Fallback if there's ever a danger that they might refer to
+ * it.
+ *
+ * Effectively this means fallback on 3D clamp or
+ * clamp_to_border.
+ *
+ * XXX: Check if this is true on i945.
+ * XXX: Check if this bug got fixed in release silicon.
+ */
+#if 0
+ {
+ const unsigned ws = sampler->templ->wrap_s;
+ const unsigned wt = sampler->templ->wrap_t;
+ const unsigned wr = sampler->templ->wrap_r;
+ if (pt->target == PIPE_TEXTURE_3D &&
+ (sampler->templ->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
+ sampler->templ->mag_img_filter != PIPE_TEX_FILTER_NEAREST) &&
+ (ws == PIPE_TEX_WRAP_CLAMP ||
+ wt == PIPE_TEX_WRAP_CLAMP ||
+ wr == PIPE_TEX_WRAP_CLAMP ||
+ ws == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
+ wt == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
+ wr == PIPE_TEX_WRAP_CLAMP_TO_BORDER)) {
+ if (i915->strict_conformance) {
+ assert(0);
+ /* sampler->fallback = true; */
+ /* TODO */
+ }
+ }
+ }
+#endif
+
+ /* See note at the top of file */
+ minlod = sampler->minlod;
+ lastlod = pt->last_level << 4;
+
+ if (lastlod < minlod) {
+ minlod = lastlod;
+ }
+
+ state[1] |= (sampler->minlod << SS3_MIN_LOD_SHIFT);
+ state[1] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT);
+}
+
+
+void i915_update_samplers( struct i915_context *i915 )
+{
+ uint unit;
+
+ i915->current.sampler_enable_nr = 0;
+ i915->current.sampler_enable_flags = 0x0;
+
+ for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers;
+ unit++) {
+ /* determine unit enable/disable by looking for a bound texture */
+ /* could also examine the fragment program? */
+ if (i915->texture[unit]) {
+ update_sampler( i915,
+ unit,
+ i915->sampler[unit], /* sampler state */
+ i915->texture[unit], /* texture */
+ i915->current.sampler[unit] /* the result */
+ );
+ i915_update_texture( i915,
+ unit,
+ i915->texture[unit], /* texture */
+ i915->sampler[unit], /* sampler state */
+ i915->current.texbuffer[unit] );
+
+ i915->current.sampler_enable_nr++;
+ i915->current.sampler_enable_flags |= (1 << unit);
+ }
+ }
+
+ i915->hardware_dirty |= I915_HW_SAMPLER | I915_HW_MAP;
+}
+
+
+static uint
+translate_texture_format(enum pipe_format pipeFormat)
+{
+ switch (pipeFormat) {
+ case PIPE_FORMAT_L8_UNORM:
+ return MAPSURF_8BIT | MT_8BIT_L8;
+ case PIPE_FORMAT_I8_UNORM:
+ return MAPSURF_8BIT | MT_8BIT_I8;
+ case PIPE_FORMAT_A8_UNORM:
+ return MAPSURF_8BIT | MT_8BIT_A8;
+ case PIPE_FORMAT_A8L8_UNORM:
+ return MAPSURF_16BIT | MT_16BIT_AY88;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ return MAPSURF_16BIT | MT_16BIT_RGB565;
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ return MAPSURF_16BIT | MT_16BIT_ARGB1555;
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ return MAPSURF_16BIT | MT_16BIT_ARGB4444;
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return MAPSURF_32BIT | MT_32BIT_ARGB8888;
+ case PIPE_FORMAT_YCBCR_REV:
+ return (MAPSURF_422 | MT_422_YCRCB_NORMAL);
+ case PIPE_FORMAT_YCBCR:
+ return (MAPSURF_422 | MT_422_YCRCB_SWAPY);
+#if 0
+ case PIPE_FORMAT_RGB_FXT1:
+ case PIPE_FORMAT_RGBA_FXT1:
+ return (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1);
+#endif
+ case PIPE_FORMAT_Z16_UNORM:
+ return (MAPSURF_16BIT | MT_16BIT_L16);
+#if 0
+ case PIPE_FORMAT_RGBA_DXT1:
+ case PIPE_FORMAT_RGB_DXT1:
+ return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1);
+ case PIPE_FORMAT_RGBA_DXT3:
+ return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3);
+ case PIPE_FORMAT_RGBA_DXT5:
+ return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5);
+#endif
+ case PIPE_FORMAT_S8Z24_UNORM:
+ return (MAPSURF_32BIT | MT_32BIT_xI824);
+ default:
+ debug_printf("i915: translate_texture_format() bad image format %x\n",
+ pipeFormat);
+ assert(0);
+ return 0;
+ }
+}
+
+
+static void
+i915_update_texture(struct i915_context *i915,
+ uint unit,
+ const struct i915_texture *tex,
+ const struct i915_sampler_state *sampler,
+ uint state[6])
+{
+ const struct pipe_texture *pt = &tex->base;
+ uint format, pitch;
+ const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
+ const uint num_levels = pt->last_level;
+ unsigned max_lod = num_levels * 4;
+ unsigned tiled = MS3_USE_FENCE_REGS;
+
+ assert(tex);
+ assert(width);
+ assert(height);
+ assert(depth);
+
+ format = translate_texture_format(pt->format);
+ pitch = tex->stride;
+
+ assert(format);
+ assert(pitch);
+
+ if (tex->tiled) {
+ assert(!((pitch - 1) & pitch));
+ tiled = MS3_TILED_SURFACE;
+ }
+
+ /* MS3 state */
+ state[0] =
+ (((height - 1) << MS3_HEIGHT_SHIFT)
+ | ((width - 1) << MS3_WIDTH_SHIFT)
+ | format
+ | tiled);
+
+ /*
+ * XXX When min_filter != mag_filter and there's just one mipmap level,
+ * set max_lod = 1 to make sure i915 chooses between min/mag filtering.
+ */
+
+ /* See note at the top of file */
+ if (max_lod > (sampler->maxlod >> 2))
+ max_lod = sampler->maxlod >> 2;
+
+ /* MS4 state */
+ state[1] =
+ ((((pitch / 4) - 1) << MS4_PITCH_SHIFT)
+ | MS4_CUBE_FACE_ENA_MASK
+ | ((max_lod) << MS4_MAX_LOD_SHIFT)
+ | ((depth - 1) << MS4_VOLUME_DEPTH_SHIFT));
+}
+
+
+void
+i915_update_textures(struct i915_context *i915)
+{
+ uint unit;
+
+ for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers;
+ unit++) {
+ /* determine unit enable/disable by looking for a bound texture */
+ /* could also examine the fragment program? */
+ if (i915->texture[unit]) {
+ i915_update_texture( i915,
+ unit,
+ i915->texture[unit], /* texture */
+ i915->sampler[unit], /* sampler state */
+ i915->current.texbuffer[unit] );
+ }
+ }
+
+ i915->hardware_dirty |= I915_HW_MAP;
+}
diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c
new file mode 100644
index 0000000000..62f1926644
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_surface.c
@@ -0,0 +1,123 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "i915_context.h"
+#include "i915_blit.h"
+#include "i915_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_winsys.h"
+#include "util/u_tile.h"
+#include "util/u_rect.h"
+
+
+/* Assumes all values are within bounds -- no checking at this level -
+ * do it higher up if required.
+ */
+static void
+i915_surface_copy(struct pipe_context *pipe,
+ boolean do_flip,
+ struct pipe_surface *dst,
+ unsigned dstx, unsigned dsty,
+ struct pipe_surface *src,
+ unsigned srcx, unsigned srcy, unsigned width, unsigned height)
+{
+ assert( dst != src );
+ assert( dst->block.size == src->block.size );
+ assert( dst->block.width == src->block.height );
+ assert( dst->block.height == src->block.height );
+
+ if (0) {
+ void *dst_map = pipe->screen->surface_map( pipe->screen,
+ dst,
+ PIPE_BUFFER_USAGE_CPU_WRITE );
+
+ const void *src_map = pipe->screen->surface_map( pipe->screen,
+ src,
+ PIPE_BUFFER_USAGE_CPU_READ );
+
+ pipe_copy_rect(dst_map,
+ &dst->block,
+ dst->stride,
+ dstx, dsty,
+ width, height,
+ src_map,
+ do_flip ? -(int) src->stride : src->stride,
+ srcx, do_flip ? height - 1 - srcy : srcy);
+
+ pipe->screen->surface_unmap(pipe->screen, src);
+ pipe->screen->surface_unmap(pipe->screen, dst);
+ }
+ else {
+ assert(dst->block.width == 1);
+ assert(dst->block.height == 1);
+ i915_copy_blit( i915_context(pipe),
+ do_flip,
+ dst->block.size,
+ (short) src->stride, src->buffer, src->offset,
+ (short) dst->stride, dst->buffer, dst->offset,
+ (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height );
+ }
+}
+
+
+static void
+i915_surface_fill(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height, unsigned value)
+{
+ if (0) {
+ void *dst_map = pipe->screen->surface_map( pipe->screen,
+ dst,
+ PIPE_BUFFER_USAGE_CPU_WRITE );
+
+ pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value);
+
+ pipe->screen->surface_unmap(pipe->screen, dst);
+ }
+ else {
+ assert(dst->block.width == 1);
+ assert(dst->block.height == 1);
+ i915_fill_blit( i915_context(pipe),
+ dst->block.size,
+ (short) dst->stride,
+ dst->buffer, dst->offset,
+ (short) dstx, (short) dsty,
+ (short) width, (short) height,
+ value );
+ }
+}
+
+
+void
+i915_init_surface_functions(struct i915_context *i915)
+{
+ i915->pipe.surface_copy = i915_surface_copy;
+ i915->pipe.surface_fill = i915_surface_fill;
+}
diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c
new file mode 100644
index 0000000000..af823f2d3c
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_texture.c
@@ -0,0 +1,774 @@
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Michel Dänzer <michel@tungstengraphics.com>
+ */
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_winsys.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "i915_context.h"
+#include "i915_texture.h"
+#include "i915_debug.h"
+#include "i915_screen.h"
+
+/*
+ * Helper function and arrays
+ */
+
+/**
+ * Initial offset for Cube map.
+ */
+static const int initial_offsets[6][2] = {
+ {0, 0},
+ {0, 2},
+ {1, 0},
+ {1, 2},
+ {1, 1},
+ {1, 3}
+};
+
+/**
+ * Step offsets for Cube map.
+ */
+static const int step_offsets[6][2] = {
+ {0, 2},
+ {0, 2},
+ {-1, 2},
+ {-1, 2},
+ {-1, 1},
+ {-1, 1}
+};
+
+static unsigned minify( unsigned d )
+{
+ return MAX2(1, d>>1);
+}
+
+static unsigned
+power_of_two(unsigned x)
+{
+ unsigned value = 1;
+ while (value < x)
+ value = value << 1;
+ return value;
+}
+
+static unsigned
+round_up(unsigned n, unsigned multiple)
+{
+ return (n + multiple - 1) & ~(multiple - 1);
+}
+
+
+/*
+ * More advanced helper funcs
+ */
+
+
+static void
+i915_miptree_set_level_info(struct i915_texture *tex,
+ unsigned level,
+ unsigned nr_images,
+ unsigned w, unsigned h, unsigned d)
+{
+ struct pipe_texture *pt = &tex->base;
+
+ assert(level < PIPE_MAX_TEXTURE_LEVELS);
+
+ pt->width[level] = w;
+ pt->height[level] = h;
+ pt->depth[level] = d;
+
+ pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w);
+ pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h);
+
+ tex->nr_images[level] = nr_images;
+
+ /*
+ DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
+ level, w, h, d, x, y, tex->level_offset[level]);
+ */
+
+ /* Not sure when this would happen, but anyway:
+ */
+ if (tex->image_offset[level]) {
+ FREE(tex->image_offset[level]);
+ tex->image_offset[level] = NULL;
+ }
+
+ assert(nr_images);
+ assert(!tex->image_offset[level]);
+
+ tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned));
+ tex->image_offset[level][0] = 0;
+}
+
+static void
+i915_miptree_set_image_offset(struct i915_texture *tex,
+ unsigned level, unsigned img, unsigned x, unsigned y)
+{
+ if (img == 0 && level == 0)
+ assert(x == 0 && y == 0);
+
+ assert(img < tex->nr_images[level]);
+
+ tex->image_offset[level][img] = y * tex->stride + x * tex->base.block.size;
+
+ /*
+ printf("%s level %d img %d pos %d,%d image_offset %x\n",
+ __FUNCTION__, level, img, x, y, tex->image_offset[level][img]);
+ */
+}
+
+
+/*
+ * Layout functions
+ */
+
+
+/**
+ * Special case to deal with display targets.
+ */
+static boolean
+i915_displaytarget_layout(struct i915_texture *tex)
+{
+ struct pipe_texture *pt = &tex->base;
+
+ if (pt->last_level > 0 || pt->block.size != 4)
+ return 0;
+
+ i915_miptree_set_level_info( tex, 0, 1,
+ tex->base.width[0],
+ tex->base.height[0],
+ 1 );
+ i915_miptree_set_image_offset( tex, 0, 0, 0, 0 );
+
+ if (tex->base.width[0] >= 128) {
+ tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size);
+ tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8);
+#if 0 /* used for tiled display targets */
+ tex->tiled = 1;
+#endif
+ } else {
+ tex->stride = round_up(tex->base.nblocksx[0] * pt->block.size, 64);
+ tex->total_nblocksy = tex->base.nblocksy[0];
+ }
+
+ /*
+ printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
+ tex->base.width[0], tex->base.height[0], pt->block.size,
+ tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy);
+ */
+
+ return 1;
+}
+
+static void
+i945_miptree_layout_2d( struct i915_texture *tex )
+{
+ struct pipe_texture *pt = &tex->base;
+ const int align_x = 2, align_y = 4;
+ unsigned level;
+ unsigned x = 0;
+ unsigned y = 0;
+ unsigned width = pt->width[0];
+ unsigned height = pt->height[0];
+ unsigned nblocksx = pt->nblocksx[0];
+ unsigned nblocksy = pt->nblocksy[0];
+
+ /* used for tiled display targets */
+ if (0)
+ if (i915_displaytarget_layout(tex))
+ return;
+
+ tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4);
+
+ /* May need to adjust pitch to accomodate the placement of
+ * the 2nd mipmap level. This occurs when the alignment
+ * constraints of mipmap placement push the right edge of the
+ * 2nd mipmap level out past the width of its parent.
+ */
+ if (pt->last_level > 0) {
+ unsigned mip1_nblocksx
+ = align(pf_get_nblocksx(&pt->block, minify(width)), align_x)
+ + pf_get_nblocksx(&pt->block, minify(minify(width)));
+
+ if (mip1_nblocksx > nblocksx)
+ tex->stride = mip1_nblocksx * pt->block.size;
+ }
+
+ /* Pitch must be a whole number of dwords
+ */
+ tex->stride = align(tex->stride, 64);
+ tex->total_nblocksy = 0;
+
+ for (level = 0; level <= pt->last_level; level++) {
+ i915_miptree_set_level_info(tex, level, 1, width, height, 1);
+ i915_miptree_set_image_offset(tex, level, 0, x, y);
+
+ nblocksy = align(nblocksy, align_y);
+
+ /* Because the images are packed better, the final offset
+ * might not be the maximal one:
+ */
+ tex->total_nblocksy = MAX2(tex->total_nblocksy, y + nblocksy);
+
+ /* Layout_below: step right after second mipmap level.
+ */
+ if (level == 1) {
+ x += align(nblocksx, align_x);
+ }
+ else {
+ y += nblocksy;
+ }
+
+ width = minify(width);
+ height = minify(height);
+ nblocksx = pf_get_nblocksx(&pt->block, width);
+ nblocksy = pf_get_nblocksy(&pt->block, height);
+ }
+}
+
+static void
+i945_miptree_layout_cube(struct i915_texture *tex)
+{
+ struct pipe_texture *pt = &tex->base;
+ unsigned level;
+
+ const unsigned nblocks = pt->nblocksx[0];
+ unsigned face;
+ unsigned width = pt->width[0];
+ unsigned height = pt->height[0];
+
+ /*
+ printf("%s %i, %i\n", __FUNCTION__, pt->width[0], pt->height[0]);
+ */
+
+ assert(width == height); /* cubemap images are square */
+
+ /*
+ * XXX Should only be used for compressed formats. But lets
+ * keep this code active just in case.
+ *
+ * Depending on the size of the largest images, pitch can be
+ * determined either by the old-style packing of cubemap faces,
+ * or the final row of 4x4, 2x2 and 1x1 faces below this.
+ */
+ if (nblocks > 32)
+ tex->stride = round_up(nblocks * pt->block.size * 2, 4);
+ else
+ tex->stride = 14 * 8 * pt->block.size;
+
+ tex->total_nblocksy = nblocks * 4;
+
+ /* Set all the levels to effectively occupy the whole rectangular region.
+ */
+ for (level = 0; level <= pt->last_level; level++) {
+ i915_miptree_set_level_info(tex, level, 6, width, height, 1);
+ width /= 2;
+ height /= 2;
+ }
+
+ for (face = 0; face < 6; face++) {
+ unsigned x = initial_offsets[face][0] * nblocks;
+ unsigned y = initial_offsets[face][1] * nblocks;
+ unsigned d = nblocks;
+
+#if 0 /* Fix and enable this code for compressed formats */
+ if (nblocks == 4 && face >= 4) {
+ y = tex->total_height - 4;
+ x = (face - 4) * 8;
+ }
+ else if (nblocks < 4 && (face > 0)) {
+ y = tex->total_height - 4;
+ x = face * 8;
+ }
+#endif
+
+ for (level = 0; level <= pt->last_level; level++) {
+ i915_miptree_set_image_offset(tex, level, face, x, y);
+
+ d >>= 1;
+
+#if 0 /* Fix and enable this code for compressed formats */
+ switch (d) {
+ case 4:
+ switch (face) {
+ case PIPE_TEX_FACE_POS_X:
+ case PIPE_TEX_FACE_NEG_X:
+ x += step_offsets[face][0] * d;
+ y += step_offsets[face][1] * d;
+ break;
+ case PIPE_TEX_FACE_POS_Y:
+ case PIPE_TEX_FACE_NEG_Y:
+ y += 12;
+ x -= 8;
+ break;
+ case PIPE_TEX_FACE_POS_Z:
+ case PIPE_TEX_FACE_NEG_Z:
+ y = tex->total_height - 4;
+ x = (face - 4) * 8;
+ break;
+ }
+ case 2:
+ y = tex->total_height - 4;
+ x = 16 + face * 8;
+ break;
+
+ case 1:
+ x += 48;
+ break;
+ default:
+#endif
+ x += step_offsets[face][0] * d;
+ y += step_offsets[face][1] * d;
+#if 0
+ break;
+ }
+#endif
+ }
+ }
+}
+
+static boolean
+i915_miptree_layout(struct i915_texture * tex)
+{
+ struct pipe_texture *pt = &tex->base;
+ unsigned level;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE: {
+ const unsigned nblocks = pt->nblocksx[0];
+ unsigned face;
+ unsigned width = pt->width[0], height = pt->height[0];
+
+ assert(width == height); /* cubemap images are square */
+
+ /* double pitch for cube layouts */
+ tex->stride = round_up(nblocks * pt->block.size * 2, 4);
+ tex->total_nblocksy = nblocks * 4;
+
+ for (level = 0; level <= pt->last_level; level++) {
+ i915_miptree_set_level_info(tex, level, 6,
+ width, height,
+ 1);
+ width /= 2;
+ height /= 2;
+ }
+
+ for (face = 0; face < 6; face++) {
+ unsigned x = initial_offsets[face][0] * nblocks;
+ unsigned y = initial_offsets[face][1] * nblocks;
+ unsigned d = nblocks;
+
+ for (level = 0; level <= pt->last_level; level++) {
+ i915_miptree_set_image_offset(tex, level, face, x, y);
+ d >>= 1;
+ x += step_offsets[face][0] * d;
+ y += step_offsets[face][1] * d;
+ }
+ }
+ break;
+ }
+ case PIPE_TEXTURE_3D:{
+ unsigned width = pt->width[0];
+ unsigned height = pt->height[0];
+ unsigned depth = pt->depth[0];
+ unsigned nblocksx = pt->nblocksx[0];
+ unsigned nblocksy = pt->nblocksy[0];
+ unsigned stack_nblocksy = 0;
+
+ /* Calculate the size of a single slice.
+ */
+ tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4);
+
+ /* XXX: hardware expects/requires 9 levels at minimum.
+ */
+ for (level = 0; level <= MAX2(8, pt->last_level);
+ level++) {
+ i915_miptree_set_level_info(tex, level, depth,
+ width, height, depth);
+
+
+ stack_nblocksy += MAX2(2, nblocksy);
+
+ width = minify(width);
+ height = minify(height);
+ depth = minify(depth);
+ nblocksx = pf_get_nblocksx(&pt->block, width);
+ nblocksy = pf_get_nblocksy(&pt->block, height);
+ }
+
+ /* Fixup depth image_offsets:
+ */
+ depth = pt->depth[0];
+ for (level = 0; level <= pt->last_level; level++) {
+ unsigned i;
+ for (i = 0; i < depth; i++)
+ i915_miptree_set_image_offset(tex, level, i,
+ 0, i * stack_nblocksy);
+
+ depth = minify(depth);
+ }
+
+
+ /* Multiply slice size by texture depth for total size. It's
+ * remarkable how wasteful of memory the i915 texture layouts
+ * are. They are largely fixed in the i945.
+ */
+ tex->total_nblocksy = stack_nblocksy * pt->depth[0];
+ break;
+ }
+
+ default:{
+ unsigned width = pt->width[0];
+ unsigned height = pt->height[0];
+ unsigned nblocksx = pt->nblocksx[0];
+ unsigned nblocksy = pt->nblocksy[0];
+
+ tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4);
+ tex->total_nblocksy = 0;
+
+ for (level = 0; level <= pt->last_level; level++) {
+ i915_miptree_set_level_info(tex, level, 1,
+ width, height, 1);
+ i915_miptree_set_image_offset(tex, level, 0,
+ 0, tex->total_nblocksy);
+
+ nblocksy = round_up(MAX2(2, nblocksy), 2);
+
+ tex->total_nblocksy += nblocksy;
+
+ width = minify(width);
+ height = minify(height);
+ nblocksx = pf_get_nblocksx(&pt->block, width);
+ nblocksy = pf_get_nblocksy(&pt->block, height);
+ }
+ break;
+ }
+ }
+ /*
+ DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
+ tex->pitch,
+ tex->total_nblocksy, pt->block.size, tex->stride * tex->total_nblocksy);
+ */
+
+ return TRUE;
+}
+
+
+static boolean
+i945_miptree_layout(struct i915_texture * tex)
+{
+ struct pipe_texture *pt = &tex->base;
+ unsigned level;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ i945_miptree_layout_cube(tex);
+ break;
+ case PIPE_TEXTURE_3D:{
+ unsigned width = pt->width[0];
+ unsigned height = pt->height[0];
+ unsigned depth = pt->depth[0];
+ unsigned nblocksx = pt->nblocksx[0];
+ unsigned nblocksy = pt->nblocksy[0];
+ unsigned pack_x_pitch, pack_x_nr;
+ unsigned pack_y_pitch;
+
+ tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4);
+ tex->total_nblocksy = 0;
+
+ pack_y_pitch = MAX2(pt->nblocksy[0], 2);
+ pack_x_pitch = tex->stride / pt->block.size;
+ pack_x_nr = 1;
+
+ for (level = 0; level <= pt->last_level; level++) {
+ unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6;
+ int x = 0;
+ int y = 0;
+ unsigned q, j;
+
+ i915_miptree_set_level_info(tex, level, nr_images,
+ width, height, depth);
+
+ for (q = 0; q < nr_images;) {
+ for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) {
+ i915_miptree_set_image_offset(tex, level, q, x, y + tex->total_nblocksy);
+ x += pack_x_pitch;
+ }
+
+ x = 0;
+ y += pack_y_pitch;
+ }
+
+
+ tex->total_nblocksy += y;
+
+ if (pack_x_pitch > 4) {
+ pack_x_pitch >>= 1;
+ pack_x_nr <<= 1;
+ assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride);
+ }
+
+ if (pack_y_pitch > 2) {
+ pack_y_pitch >>= 1;
+ }
+
+ width = minify(width);
+ height = minify(height);
+ depth = minify(depth);
+ nblocksx = pf_get_nblocksx(&pt->block, width);
+ nblocksy = pf_get_nblocksy(&pt->block, height);
+ }
+ break;
+ }
+
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_2D:
+// case PIPE_TEXTURE_RECTANGLE:
+ i945_miptree_layout_2d(tex);
+ break;
+ default:
+ assert(0);
+ return FALSE;
+ }
+
+ /*
+ DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
+ tex->pitch,
+ tex->total_nblocksy, pt->block.size, tex->stride * tex->total_nblocksy);
+ */
+
+ return TRUE;
+}
+
+
+static struct pipe_texture *
+i915_texture_create(struct pipe_screen *screen,
+ const struct pipe_texture *templat)
+{
+ struct i915_screen *i915screen = i915_screen(screen);
+ struct pipe_winsys *ws = screen->winsys;
+ struct i915_texture *tex = CALLOC_STRUCT(i915_texture);
+ size_t tex_size;
+
+ if (!tex)
+ return NULL;
+
+ tex->base = *templat;
+ tex->base.refcount = 1;
+ tex->base.screen = screen;
+
+ tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]);
+ tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]);
+
+ if (i915screen->is_i945) {
+ if (!i945_miptree_layout(tex))
+ goto fail;
+ } else {
+ if (!i915_miptree_layout(tex))
+ goto fail;
+ }
+
+ tex_size = tex->stride * tex->total_nblocksy;
+
+ tex->buffer = ws->buffer_create(ws, 64,
+ PIPE_BUFFER_USAGE_PIXEL,
+ tex_size);
+
+ if (!tex->buffer)
+ goto fail;
+
+#if 0
+ void *ptr = ws->buffer_map(ws, tex->buffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+ memset(ptr, 0x80, tex_size);
+ ws->buffer_unmap(ws, tex->buffer);
+#endif
+
+ return &tex->base;
+
+fail:
+ FREE(tex);
+ return NULL;
+}
+
+
+static void
+i915_texture_release(struct pipe_screen *screen,
+ struct pipe_texture **pt)
+{
+ if (!*pt)
+ return;
+
+ /*
+ DBG("%s %p refcount will be %d\n",
+ __FUNCTION__, (void *) *pt, (*pt)->refcount - 1);
+ */
+ if (--(*pt)->refcount <= 0) {
+ struct i915_texture *tex = (struct i915_texture *)*pt;
+ uint i;
+
+ /*
+ DBG("%s deleting %p\n", __FUNCTION__, (void *) tex);
+ */
+
+ pipe_buffer_reference(screen, &tex->buffer, NULL);
+
+ for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++)
+ if (tex->image_offset[i])
+ FREE(tex->image_offset[i]);
+
+ FREE(tex);
+ }
+ *pt = NULL;
+}
+
+static struct pipe_surface *
+i915_get_tex_surface(struct pipe_screen *screen,
+ struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct i915_texture *tex = (struct i915_texture *)pt;
+ struct pipe_winsys *ws = screen->winsys;
+ struct pipe_surface *ps;
+ unsigned offset; /* in bytes */
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ offset = tex->image_offset[level][face];
+ }
+ else if (pt->target == PIPE_TEXTURE_3D) {
+ offset = tex->image_offset[level][zslice];
+ }
+ else {
+ offset = tex->image_offset[level][0];
+ assert(face == 0);
+ assert(zslice == 0);
+ }
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (ps) {
+ ps->refcount = 1;
+ pipe_texture_reference(&ps->texture, pt);
+ pipe_buffer_reference(screen, &ps->buffer, tex->buffer);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = tex->stride;
+ ps->offset = offset;
+ ps->usage = flags;
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+ }
+ return ps;
+}
+
+static struct pipe_texture *
+i915_texture_blanket(struct pipe_screen * screen,
+ const struct pipe_texture *base,
+ const unsigned *stride,
+ struct pipe_buffer *buffer)
+{
+ struct i915_texture *tex;
+ assert(screen);
+
+ /* Only supports one type */
+ if (base->target != PIPE_TEXTURE_2D ||
+ base->last_level != 0 ||
+ base->depth[0] != 1) {
+ return NULL;
+ }
+
+ tex = CALLOC_STRUCT(i915_texture);
+ if (!tex)
+ return NULL;
+
+ tex->base = *base;
+ tex->base.refcount = 1;
+ tex->base.screen = screen;
+
+ tex->stride = stride[0];
+
+ i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1);
+ i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
+
+ pipe_buffer_reference(screen, &tex->buffer, buffer);
+
+ return &tex->base;
+}
+
+void
+i915_init_texture_functions(struct i915_context *i915)
+{
+// i915->pipe.texture_update = i915_texture_update;
+}
+
+static void
+i915_tex_surface_release(struct pipe_screen *screen,
+ struct pipe_surface **surface)
+{
+ struct pipe_surface *surf = *surface;
+
+ if (--surf->refcount == 0) {
+
+ /* This really should not be possible, but it's actually
+ * happening quite a bit... Will fix.
+ */
+ if (surf->status == PIPE_SURFACE_STATUS_CLEAR) {
+ debug_printf("XXX destroying a surface with pending clears...\n");
+ assert(0);
+ }
+
+ pipe_texture_reference(&surf->texture, NULL);
+ pipe_buffer_reference(screen, &surf->buffer, NULL);
+ FREE(surf);
+ }
+
+ *surface = NULL;
+}
+
+void
+i915_init_screen_texture_functions(struct pipe_screen *screen)
+{
+ screen->texture_create = i915_texture_create;
+ screen->texture_release = i915_texture_release;
+ screen->get_tex_surface = i915_get_tex_surface;
+ screen->texture_blanket = i915_texture_blanket;
+ screen->tex_surface_release = i915_tex_surface_release;
+}
diff --git a/src/gallium/drivers/i915simple/i915_texture.h b/src/gallium/drivers/i915simple/i915_texture.h
new file mode 100644
index 0000000000..7225016a9f
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_texture.h
@@ -0,0 +1,43 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef I915_TEXTURE_H
+#define I915_TEXTURE_H
+
+struct i915_context;
+struct pipe_screen;
+
+
+extern void
+i915_init_texture_functions(struct i915_context *i915);
+
+
+extern void
+i915_init_screen_texture_functions(struct pipe_screen *screen);
+
+
+#endif /* I915_TEXTURE_H */
diff --git a/src/gallium/drivers/i915simple/i915_winsys.h b/src/gallium/drivers/i915simple/i915_winsys.h
new file mode 100644
index 0000000000..81904c2a74
--- /dev/null
+++ b/src/gallium/drivers/i915simple/i915_winsys.h
@@ -0,0 +1,121 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * This is the interface that i915simple requires any window system
+ * hosting it to implement. This is the only include file in i915simple
+ * which is public.
+ *
+ */
+
+#ifndef I915_WINSYS_H
+#define I915_WINSYS_H
+
+
+#include "pipe/p_defines.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* Pipe drivers are (meant to be!) independent of both GL and the
+ * window system. The window system provides a buffer manager and a
+ * set of additional hooks for things like command buffer submission,
+ * etc.
+ *
+ * There clearly has to be some agreement between the window system
+ * driver and the hardware driver about the format of command buffers,
+ * etc.
+ */
+
+struct i915_batchbuffer;
+struct pipe_buffer;
+struct pipe_fence_handle;
+struct pipe_winsys;
+struct pipe_screen;
+
+
+/**
+ * Additional winsys interface for i915simple.
+ *
+ * It is an over-simple batchbuffer mechanism. Will want to improve the
+ * performance of this, perhaps based on the cmdstream stuff. It
+ * would be pretty impossible to implement swz on top of this
+ * interface.
+ *
+ * Will also need additions/changes to implement static/dynamic
+ * indirect state.
+ */
+struct i915_winsys {
+
+ void (*destroy)( struct i915_winsys *sws );
+
+ /**
+ * Get the current batch buffer from the winsys.
+ */
+ struct i915_batchbuffer *(*batch_get)( struct i915_winsys *sws );
+
+ /**
+ * Emit a relocation to a buffer.
+ *
+ * Used not only when the buffer addresses are not pinned, but also to
+ * ensure refered buffers will not be destroyed until the current batch
+ * buffer execution is finished.
+ *
+ * The access flags is a combination of I915_BUFFER_ACCESS_WRITE and
+ * I915_BUFFER_ACCESS_READ macros.
+ */
+ void (*batch_reloc)( struct i915_winsys *sws,
+ struct pipe_buffer *buf,
+ unsigned access_flags,
+ unsigned delta );
+
+ /**
+ * Flush the batch.
+ */
+ void (*batch_flush)( struct i915_winsys *sws,
+ struct pipe_fence_handle **fence );
+};
+
+#define I915_BUFFER_ACCESS_WRITE 0x1
+#define I915_BUFFER_ACCESS_READ 0x2
+
+#define I915_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0)
+
+
+struct pipe_context *i915_create_context( struct pipe_screen *,
+ struct pipe_winsys *,
+ struct i915_winsys * );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile
new file mode 100644
index 0000000000..e97146e57c
--- /dev/null
+++ b/src/gallium/drivers/i965simple/Makefile
@@ -0,0 +1,54 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = i965simple
+
+C_SOURCES = \
+ brw_blit.c \
+ brw_flush.c \
+ brw_screen.c \
+ brw_surface.c \
+ brw_cc.c \
+ brw_clip.c \
+ brw_clip_line.c \
+ brw_clip_point.c \
+ brw_clip_state.c \
+ brw_clip_tri.c \
+ brw_clip_util.c \
+ brw_context.c \
+ brw_curbe.c \
+ brw_draw.c \
+ brw_draw_upload.c \
+ brw_eu.c \
+ brw_eu_debug.c \
+ brw_eu_emit.c \
+ brw_eu_util.c \
+ brw_gs.c \
+ brw_gs_emit.c \
+ brw_gs_state.c \
+ brw_misc_state.c \
+ brw_sf.c \
+ brw_sf_emit.c \
+ brw_sf_state.c \
+ brw_state.c \
+ brw_state_batch.c \
+ brw_state_cache.c \
+ brw_state_pool.c \
+ brw_state_upload.c \
+ brw_tex_layout.c \
+ brw_urb.c \
+ brw_util.c \
+ brw_vs.c \
+ brw_vs_emit.c \
+ brw_vs_state.c \
+ brw_wm.c \
+ brw_wm_iz.c \
+ brw_wm_decl.c \
+ brw_wm_glsl.c \
+ brw_wm_sampler_state.c \
+ brw_wm_state.c \
+ brw_wm_surface_state.c
+
+include ../../Makefile.template
+
+symlinks:
diff --git a/src/gallium/drivers/i965simple/SConscript b/src/gallium/drivers/i965simple/SConscript
new file mode 100644
index 0000000000..43fc2a4005
--- /dev/null
+++ b/src/gallium/drivers/i965simple/SConscript
@@ -0,0 +1,54 @@
+Import('*')
+
+env = env.Clone()
+
+i965simple = env.ConvenienceLibrary(
+ target = 'i965simple',
+ source = [
+ 'brw_blit.c',
+ 'brw_cc.c',
+ 'brw_clip.c',
+ 'brw_clip_line.c',
+ 'brw_clip_point.c',
+ 'brw_clip_state.c',
+ 'brw_clip_tri.c',
+ 'brw_clip_util.c',
+ 'brw_context.c',
+ 'brw_curbe.c',
+ 'brw_draw.c',
+ 'brw_draw_upload.c',
+ 'brw_eu.c',
+ 'brw_eu_debug.c',
+ 'brw_eu_emit.c',
+ 'brw_eu_util.c',
+ 'brw_flush.c',
+ 'brw_gs.c',
+ 'brw_gs_emit.c',
+ 'brw_gs_state.c',
+ 'brw_misc_state.c',
+ 'brw_screen.c',
+ 'brw_sf.c',
+ 'brw_sf_emit.c',
+ 'brw_sf_state.c',
+ 'brw_state.c',
+ 'brw_state_batch.c',
+ 'brw_state_cache.c',
+ 'brw_state_pool.c',
+ 'brw_state_upload.c',
+ 'brw_surface.c',
+ 'brw_tex_layout.c',
+ 'brw_urb.c',
+ 'brw_util.c',
+ 'brw_vs.c',
+ 'brw_vs_emit.c',
+ 'brw_vs_state.c',
+ 'brw_wm.c',
+ 'brw_wm_decl.c',
+ 'brw_wm_glsl.c',
+ 'brw_wm_iz.c',
+ 'brw_wm_sampler_state.c',
+ 'brw_wm_state.c',
+ 'brw_wm_surface_state.c',
+ ])
+
+Export('i965simple')
diff --git a/src/gallium/drivers/i965simple/brw_batch.h b/src/gallium/drivers/i965simple/brw_batch.h
new file mode 100644
index 0000000000..5f5932a488
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_batch.h
@@ -0,0 +1,59 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef BRW_BATCH_H
+#define BRW_BATCH_H
+
+#include "brw_winsys.h"
+
+#define BATCH_LOCALS
+
+#define INTEL_BATCH_NO_CLIPRECTS 0x1
+#define INTEL_BATCH_CLIPRECTS 0x2
+
+#define BEGIN_BATCH( dwords, relocs ) \
+ brw->winsys->batch_start(brw->winsys, dwords, relocs)
+
+#define OUT_BATCH( dword ) \
+ brw->winsys->batch_dword(brw->winsys, dword)
+
+#define OUT_RELOC( buf, flags, delta ) \
+ brw->winsys->batch_reloc(brw->winsys, buf, flags, delta)
+
+#define ADVANCE_BATCH() \
+ brw->winsys->batch_end( brw->winsys )
+
+/* XXX: this is bogus - need proper handling for out-of-memory in batchbuffer.
+ */
+#define FLUSH_BATCH(fence) do { \
+ brw->winsys->batch_flush(brw->winsys, fence); \
+ brw->hardware_dirty = ~0; \
+} while (0)
+
+#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->winsys, (s), sizeof(*(s)))
+
+#endif
diff --git a/src/gallium/drivers/i965simple/brw_blit.c b/src/gallium/drivers/i965simple/brw_blit.c
new file mode 100644
index 0000000000..8494f70493
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_blit.c
@@ -0,0 +1,218 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <stdio.h>
+#include <errno.h>
+
+#include "brw_batch.h"
+#include "brw_blit.h"
+#include "brw_context.h"
+#include "brw_reg.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_winsys.h"
+
+#define FILE_DEBUG_FLAG DEBUG_BLIT
+
+void brw_fill_blit(struct brw_context *brw,
+ unsigned cpp,
+ short dst_pitch,
+ struct pipe_buffer *dst_buffer,
+ unsigned dst_offset,
+ boolean dst_tiled,
+ short x, short y,
+ short w, short h,
+ unsigned color)
+{
+ unsigned BR13, CMD;
+ BATCH_LOCALS;
+
+ dst_pitch *= cpp;
+
+ switch(cpp) {
+ case 1:
+ case 2:
+ case 3:
+ BR13 = (0xF0 << 16) | (1<<24);
+ CMD = XY_COLOR_BLT_CMD;
+ break;
+ case 4:
+ BR13 = (0xF0 << 16) | (1<<24) | (1<<25);
+ CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+ break;
+ default:
+ return;
+ }
+
+ if (dst_tiled) {
+ CMD |= XY_DST_TILED;
+ dst_pitch /= 4;
+ }
+
+ BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
+ OUT_BATCH( CMD );
+ OUT_BATCH( dst_pitch | BR13 );
+ OUT_BATCH( (y << 16) | x );
+ OUT_BATCH( ((y+h) << 16) | (x+w) );
+ OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, dst_offset );
+ OUT_BATCH( color );
+ ADVANCE_BATCH();
+}
+
+static unsigned translate_raster_op(unsigned logicop)
+{
+ switch(logicop) {
+ case PIPE_LOGICOP_CLEAR: return 0x00;
+ case PIPE_LOGICOP_AND: return 0x88;
+ case PIPE_LOGICOP_AND_REVERSE: return 0x44;
+ case PIPE_LOGICOP_COPY: return 0xCC;
+ case PIPE_LOGICOP_AND_INVERTED: return 0x22;
+ case PIPE_LOGICOP_NOOP: return 0xAA;
+ case PIPE_LOGICOP_XOR: return 0x66;
+ case PIPE_LOGICOP_OR: return 0xEE;
+ case PIPE_LOGICOP_NOR: return 0x11;
+ case PIPE_LOGICOP_EQUIV: return 0x99;
+ case PIPE_LOGICOP_INVERT: return 0x55;
+ case PIPE_LOGICOP_OR_REVERSE: return 0xDD;
+ case PIPE_LOGICOP_COPY_INVERTED: return 0x33;
+ case PIPE_LOGICOP_OR_INVERTED: return 0xBB;
+ case PIPE_LOGICOP_NAND: return 0x77;
+ case PIPE_LOGICOP_SET: return 0xFF;
+ default: return 0;
+ }
+}
+
+
+/* Copy BitBlt
+ */
+void brw_copy_blit(struct brw_context *brw,
+ unsigned do_flip,
+ unsigned cpp,
+ short src_pitch,
+ struct pipe_buffer *src_buffer,
+ unsigned src_offset,
+ boolean src_tiled,
+ short dst_pitch,
+ struct pipe_buffer *dst_buffer,
+ unsigned dst_offset,
+ boolean dst_tiled,
+ short src_x, short src_y,
+ short dst_x, short dst_y,
+ short w, short h,
+ unsigned logic_op)
+{
+ unsigned CMD, BR13;
+ int dst_y2 = dst_y + h;
+ int dst_x2 = dst_x + w;
+ BATCH_LOCALS;
+
+
+ DBG("%s src:buf(%d)/%d %d,%d dst:buf(%d)/%d %d,%d sz:%dx%d op:%d\n",
+ __FUNCTION__,
+ src_buffer, src_pitch, src_x, src_y,
+ dst_buffer, dst_pitch, dst_x, dst_y,
+ w,h,logic_op);
+
+ assert( logic_op - PIPE_LOGICOP_CLEAR >= 0 );
+ assert( logic_op - PIPE_LOGICOP_CLEAR < 0x10 );
+
+ src_pitch *= cpp;
+ dst_pitch *= cpp;
+
+ switch(cpp) {
+ case 1:
+ case 2:
+ case 3:
+ BR13 = (translate_raster_op(logic_op) << 16) | (1<<24);
+ CMD = XY_SRC_COPY_BLT_CMD;
+ break;
+ case 4:
+ BR13 = (translate_raster_op(logic_op) << 16) | (1<<24) |
+ (1<<25);
+ CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+ break;
+ default:
+ return;
+ }
+
+ if (src_tiled) {
+ CMD |= XY_SRC_TILED;
+ src_pitch /= 4;
+ }
+
+ if (dst_tiled) {
+ CMD |= XY_DST_TILED;
+ dst_pitch /= 4;
+ }
+
+ if (dst_y2 < dst_y ||
+ dst_x2 < dst_x) {
+ return;
+ }
+
+ dst_pitch &= 0xffff;
+ src_pitch &= 0xffff;
+
+ /* Initial y values don't seem to work with negative pitches. If
+ * we adjust the offsets manually (below), it seems to work fine.
+ *
+ * On the other hand, if we always adjust, the hardware doesn't
+ * know which blit directions to use, so overlapping copypixels get
+ * the wrong result.
+ */
+ if (dst_pitch > 0 && src_pitch > 0) {
+ BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS);
+ OUT_BATCH( CMD );
+ OUT_BATCH( dst_pitch | BR13 );
+ OUT_BATCH( (dst_y << 16) | dst_x );
+ OUT_BATCH( (dst_y2 << 16) | dst_x2 );
+ OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE,
+ dst_offset );
+ OUT_BATCH( (src_y << 16) | src_x );
+ OUT_BATCH( src_pitch );
+ OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ,
+ src_offset );
+ ADVANCE_BATCH();
+ }
+ else {
+ BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS);
+ OUT_BATCH( CMD );
+ OUT_BATCH( (dst_pitch & 0xffff) | BR13 );
+ OUT_BATCH( (0 << 16) | dst_x );
+ OUT_BATCH( (h << 16) | dst_x2 );
+ OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE,
+ dst_offset + dst_y * dst_pitch );
+ OUT_BATCH( (src_pitch & 0xffff) );
+ OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ,
+ src_offset + src_y * src_pitch );
+ ADVANCE_BATCH();
+ }
+}
+
+
+
diff --git a/src/gallium/drivers/i965simple/brw_blit.h b/src/gallium/drivers/i965simple/brw_blit.h
new file mode 100644
index 0000000000..111c5d91d3
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_blit.h
@@ -0,0 +1,33 @@
+#ifndef BRW_BLIT_H
+#define BRW_BLIT_H
+
+#include "pipe/p_compiler.h"
+
+struct pipe_buffer;
+struct brw_context;
+
+void brw_fill_blit(struct brw_context *intel,
+ unsigned cpp,
+ short dst_pitch,
+ struct pipe_buffer *dst_buffer,
+ unsigned dst_offset,
+ boolean dst_tiled,
+ short x, short y,
+ short w, short h,
+ unsigned color);
+void brw_copy_blit(struct brw_context *intel,
+ unsigned do_flip,
+ unsigned cpp,
+ short src_pitch,
+ struct pipe_buffer *src_buffer,
+ unsigned src_offset,
+ boolean src_tiled,
+ short dst_pitch,
+ struct pipe_buffer *dst_buffer,
+ unsigned dst_offset,
+ boolean dst_tiled,
+ short src_x, short src_y,
+ short dst_x, short dst_y,
+ short w, short h,
+ unsigned logic_op);
+#endif
diff --git a/src/gallium/drivers/i965simple/brw_cc.c b/src/gallium/drivers/i965simple/brw_cc.c
new file mode 100644
index 0000000000..79d4150383
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_cc.c
@@ -0,0 +1,269 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+
+
+static int brw_translate_compare_func(int func)
+{
+ switch(func) {
+ case PIPE_FUNC_NEVER:
+ return BRW_COMPAREFUNCTION_NEVER;
+ case PIPE_FUNC_LESS:
+ return BRW_COMPAREFUNCTION_LESS;
+ case PIPE_FUNC_LEQUAL:
+ return BRW_COMPAREFUNCTION_LEQUAL;
+ case PIPE_FUNC_GREATER:
+ return BRW_COMPAREFUNCTION_GREATER;
+ case PIPE_FUNC_GEQUAL:
+ return BRW_COMPAREFUNCTION_GEQUAL;
+ case PIPE_FUNC_NOTEQUAL:
+ return BRW_COMPAREFUNCTION_NOTEQUAL;
+ case PIPE_FUNC_EQUAL:
+ return BRW_COMPAREFUNCTION_EQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return BRW_COMPAREFUNCTION_ALWAYS;
+ }
+
+ debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func);
+ return BRW_COMPAREFUNCTION_ALWAYS;
+}
+
+static int brw_translate_stencil_op(int op)
+{
+ switch(op) {
+ case PIPE_STENCIL_OP_KEEP:
+ return BRW_STENCILOP_KEEP;
+ case PIPE_STENCIL_OP_ZERO:
+ return BRW_STENCILOP_ZERO;
+ case PIPE_STENCIL_OP_REPLACE:
+ return BRW_STENCILOP_REPLACE;
+ case PIPE_STENCIL_OP_INCR:
+ return BRW_STENCILOP_INCRSAT;
+ case PIPE_STENCIL_OP_DECR:
+ return BRW_STENCILOP_DECRSAT;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ return BRW_STENCILOP_INCR;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ return BRW_STENCILOP_DECR;
+ case PIPE_STENCIL_OP_INVERT:
+ return BRW_STENCILOP_INVERT;
+ default:
+ return BRW_STENCILOP_ZERO;
+ }
+}
+
+
+static int brw_translate_logic_op(int opcode)
+{
+ switch(opcode) {
+ case PIPE_LOGICOP_CLEAR:
+ return BRW_LOGICOPFUNCTION_CLEAR;
+ case PIPE_LOGICOP_AND:
+ return BRW_LOGICOPFUNCTION_AND;
+ case PIPE_LOGICOP_AND_REVERSE:
+ return BRW_LOGICOPFUNCTION_AND_REVERSE;
+ case PIPE_LOGICOP_COPY:
+ return BRW_LOGICOPFUNCTION_COPY;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ return BRW_LOGICOPFUNCTION_COPY_INVERTED;
+ case PIPE_LOGICOP_AND_INVERTED:
+ return BRW_LOGICOPFUNCTION_AND_INVERTED;
+ case PIPE_LOGICOP_NOOP:
+ return BRW_LOGICOPFUNCTION_NOOP;
+ case PIPE_LOGICOP_XOR:
+ return BRW_LOGICOPFUNCTION_XOR;
+ case PIPE_LOGICOP_OR:
+ return BRW_LOGICOPFUNCTION_OR;
+ case PIPE_LOGICOP_OR_INVERTED:
+ return BRW_LOGICOPFUNCTION_OR_INVERTED;
+ case PIPE_LOGICOP_NOR:
+ return BRW_LOGICOPFUNCTION_NOR;
+ case PIPE_LOGICOP_EQUIV:
+ return BRW_LOGICOPFUNCTION_EQUIV;
+ case PIPE_LOGICOP_INVERT:
+ return BRW_LOGICOPFUNCTION_INVERT;
+ case PIPE_LOGICOP_OR_REVERSE:
+ return BRW_LOGICOPFUNCTION_OR_REVERSE;
+ case PIPE_LOGICOP_NAND:
+ return BRW_LOGICOPFUNCTION_NAND;
+ case PIPE_LOGICOP_SET:
+ return BRW_LOGICOPFUNCTION_SET;
+ default:
+ return BRW_LOGICOPFUNCTION_SET;
+ }
+}
+
+
+static void upload_cc_vp( struct brw_context *brw )
+{
+ struct brw_cc_viewport ccv;
+
+ memset(&ccv, 0, sizeof(ccv));
+
+ ccv.min_depth = 0.0;
+ ccv.max_depth = 1.0;
+
+ brw->cc.vp_gs_offset = brw_cache_data( &brw->cache[BRW_CC_VP], &ccv );
+}
+
+const struct brw_tracked_state brw_cc_vp = {
+ .dirty = {
+ .brw = BRW_NEW_SCENE,
+ .cache = 0
+ },
+ .update = upload_cc_vp
+};
+
+
+static void upload_cc_unit( struct brw_context *brw )
+{
+ struct brw_cc_unit_state cc;
+
+ memset(&cc, 0, sizeof(cc));
+
+ /* BRW_NEW_DEPTH_STENCIL */
+ if (brw->attribs.DepthStencil->stencil[0].enabled) {
+ cc.cc0.stencil_enable = brw->attribs.DepthStencil->stencil[0].enabled;
+ cc.cc0.stencil_func = brw_translate_compare_func(brw->attribs.DepthStencil->stencil[0].func);
+ cc.cc0.stencil_fail_op = brw_translate_stencil_op(brw->attribs.DepthStencil->stencil[0].fail_op);
+ cc.cc0.stencil_pass_depth_fail_op = brw_translate_stencil_op(
+ brw->attribs.DepthStencil->stencil[0].zfail_op);
+ cc.cc0.stencil_pass_depth_pass_op = brw_translate_stencil_op(
+ brw->attribs.DepthStencil->stencil[0].zpass_op);
+ cc.cc1.stencil_ref = brw->attribs.DepthStencil->stencil[0].ref_value;
+ cc.cc1.stencil_write_mask = brw->attribs.DepthStencil->stencil[0].write_mask;
+ cc.cc1.stencil_test_mask = brw->attribs.DepthStencil->stencil[0].value_mask;
+
+ if (brw->attribs.DepthStencil->stencil[1].enabled) {
+ cc.cc0.bf_stencil_enable = brw->attribs.DepthStencil->stencil[1].enabled;
+ cc.cc0.bf_stencil_func = brw_translate_compare_func(
+ brw->attribs.DepthStencil->stencil[1].func);
+ cc.cc0.bf_stencil_fail_op = brw_translate_stencil_op(
+ brw->attribs.DepthStencil->stencil[1].fail_op);
+ cc.cc0.bf_stencil_pass_depth_fail_op = brw_translate_stencil_op(
+ brw->attribs.DepthStencil->stencil[1].zfail_op);
+ cc.cc0.bf_stencil_pass_depth_pass_op = brw_translate_stencil_op(
+ brw->attribs.DepthStencil->stencil[1].zpass_op);
+ cc.cc1.bf_stencil_ref = brw->attribs.DepthStencil->stencil[1].ref_value;
+ cc.cc2.bf_stencil_write_mask = brw->attribs.DepthStencil->stencil[1].write_mask;
+ cc.cc2.bf_stencil_test_mask = brw->attribs.DepthStencil->stencil[1].value_mask;
+ }
+
+ /* Not really sure about this:
+ */
+ if (brw->attribs.DepthStencil->stencil[0].write_mask ||
+ brw->attribs.DepthStencil->stencil[1].write_mask)
+ cc.cc0.stencil_write_enable = 1;
+ }
+
+ /* BRW_NEW_BLEND */
+ if (brw->attribs.Blend->logicop_enable) {
+ cc.cc2.logicop_enable = 1;
+ cc.cc5.logicop_func = brw_translate_logic_op( brw->attribs.Blend->logicop_func );
+ }
+ else if (brw->attribs.Blend->blend_enable) {
+ int eqRGB = brw->attribs.Blend->rgb_func;
+ int eqA = brw->attribs.Blend->alpha_func;
+ int srcRGB = brw->attribs.Blend->rgb_src_factor;
+ int dstRGB = brw->attribs.Blend->rgb_dst_factor;
+ int srcA = brw->attribs.Blend->alpha_src_factor;
+ int dstA = brw->attribs.Blend->alpha_dst_factor;
+
+ if (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX) {
+ srcRGB = dstRGB = PIPE_BLENDFACTOR_ONE;
+ }
+
+ if (eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX) {
+ srcA = dstA = PIPE_BLENDFACTOR_ONE;
+ }
+
+ cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+ cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB);
+ cc.cc6.blend_function = brw_translate_blend_equation( eqRGB );
+
+ cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+ cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA);
+ cc.cc5.ia_blend_function = brw_translate_blend_equation( eqA );
+
+ cc.cc3.blend_enable = 1;
+ cc.cc3.ia_blend_enable = (srcA != srcRGB ||
+ dstA != dstRGB ||
+ eqA != eqRGB);
+ }
+
+ /* BRW_NEW_ALPHATEST
+ */
+ if (brw->attribs.DepthStencil->alpha.enabled) {
+ cc.cc3.alpha_test = 1;
+ cc.cc3.alpha_test_func =
+ brw_translate_compare_func(brw->attribs.DepthStencil->alpha.func);
+
+ cc.cc7.alpha_ref.ub[0] = float_to_ubyte(brw->attribs.DepthStencil->alpha.ref);
+
+ cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+ }
+
+ if (brw->attribs.Blend->dither) {
+ cc.cc5.dither_enable = 1;
+ cc.cc6.y_dither_offset = 0;
+ cc.cc6.x_dither_offset = 0;
+ }
+
+ if (brw->attribs.DepthStencil->depth.enabled) {
+ cc.cc2.depth_test = brw->attribs.DepthStencil->depth.enabled;
+ cc.cc2.depth_test_function = brw_translate_compare_func(brw->attribs.DepthStencil->depth.func);
+ cc.cc2.depth_write_enable = brw->attribs.DepthStencil->depth.writemask;
+ }
+
+ /* CACHE_NEW_CC_VP */
+ cc.cc4.cc_viewport_state_offset = brw->cc.vp_gs_offset >> 5;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ cc.cc5.statistics_enable = 1;
+
+ brw->cc.state_gs_offset = brw_cache_data( &brw->cache[BRW_CC_UNIT], &cc );
+}
+
+const struct brw_tracked_state brw_cc_unit = {
+ .dirty = {
+ .brw = BRW_NEW_DEPTH_STENCIL | BRW_NEW_BLEND | BRW_NEW_ALPHA_TEST,
+ .cache = CACHE_NEW_CC_VP
+ },
+ .update = upload_cc_unit
+};
+
diff --git a/src/gallium/drivers/i965simple/brw_clip.c b/src/gallium/drivers/i965simple/brw_clip.c
new file mode 100644
index 0000000000..268124cc53
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_clip.c
@@ -0,0 +1,206 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_clip.h"
+
+#define FRONT_UNFILLED_BIT 0x1
+#define BACK_UNFILLED_BIT 0x2
+
+
+static void compile_clip_prog( struct brw_context *brw,
+ struct brw_clip_prog_key *key )
+{
+ struct brw_clip_compile c;
+ const unsigned *program;
+ unsigned program_size;
+ unsigned delta;
+ unsigned i;
+
+ memset(&c, 0, sizeof(c));
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(&c.func);
+
+ c.func.single_program_flow = 1;
+
+ c.key = *key;
+
+
+ /* Need to locate the two positions present in vertex + header.
+ * These are currently hardcoded:
+ */
+ c.header_position_offset = ATTR_SIZE;
+
+ for (i = 0, delta = REG_SIZE; i < PIPE_MAX_SHADER_OUTPUTS; i++)
+ if (c.key.attrs & (1<<i)) {
+ c.offset[i] = delta;
+ delta += ATTR_SIZE;
+ }
+
+ c.nr_attrs = brw_count_bits(c.key.attrs);
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+ c.nr_bytes = c.nr_regs * REG_SIZE;
+
+ c.prog_data.clip_mode = c.key.clip_mode; /* XXX */
+
+ /* For some reason the thread is spawned with only 4 channels
+ * unmasked.
+ */
+ brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+ /* Would ideally have the option of producing a program which could
+ * do all three:
+ */
+ switch (key->primitive) {
+ case PIPE_PRIM_TRIANGLES:
+#if 0
+ if (key->do_unfilled)
+ brw_emit_unfilled_clip( &c );
+ else
+#endif
+ brw_emit_tri_clip( &c );
+ break;
+ case PIPE_PRIM_LINES:
+ brw_emit_line_clip( &c );
+ break;
+ case PIPE_PRIM_POINTS:
+ brw_emit_point_clip( &c );
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+
+
+ /* get the program
+ */
+ program = brw_get_program(&c.func, &program_size);
+
+ /* Upload
+ */
+ brw->clip.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_CLIP_PROG],
+ &c.key,
+ sizeof(c.key),
+ program,
+ program_size,
+ &c.prog_data,
+ &brw->clip.prog_data );
+}
+
+
+static boolean search_cache( struct brw_context *brw,
+ struct brw_clip_prog_key *key )
+{
+ return brw_search_cache(&brw->cache[BRW_CLIP_PROG],
+ key, sizeof(*key),
+ &brw->clip.prog_data,
+ &brw->clip.prog_gs_offset);
+}
+
+
+
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void upload_clip_prog(struct brw_context *brw)
+{
+ struct brw_clip_prog_key key;
+
+ memset(&key, 0, sizeof(key));
+
+ /* Populate the key:
+ */
+ /* BRW_NEW_REDUCED_PRIMITIVE */
+ key.primitive = brw->reduced_primitive;
+ /* CACHE_NEW_VS_PROG */
+ key.attrs = brw->vs.prog_data->outputs_written;
+ /* BRW_NEW_RASTER */
+ key.do_flat_shading = (brw->attribs.Raster->flatshade);
+ /* BRW_NEW_CLIP */
+ key.nr_userclip = brw->attribs.Clip.nr; /* XXX */
+
+#if 0
+ key.clip_mode = BRW_CLIPMODE_NORMAL;
+
+ if (key.primitive == PIPE_PRIM_TRIANGLES) {
+ if (brw->attribs.Raster->cull_mode == PIPE_WINDING_BOTH)
+ key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
+ else {
+ if (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL ||
+ brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL)
+ key.do_unfilled = 1;
+
+ /* Most cases the fixed function units will handle. Cases where
+ * one or more polygon faces are unfilled will require help:
+ */
+ if (key.do_unfilled) {
+ key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
+
+ if (brw->attribs.Raster->offset_cw ||
+ brw->attribs.Raster->offset_ccw) {
+ key.offset_units = brw->attribs.Raster->offset_units;
+ key.offset_factor = brw->attribs.Raster->offset_scale;
+ }
+ key.fill_ccw = brw->attribs.Raster->fill_ccw;
+ key.fill_cw = brw->attribs.Raster->fill_cw;
+ key.offset_ccw = brw->attribs.Raster->offset_ccw;
+ key.offset_cw = brw->attribs.Raster->offset_cw;
+ if (brw->attribs.Raster->light_twoside &&
+ key.fill_cw != CLIP_CULL)
+ key.copy_bfc_cw = 1;
+ }
+ }
+ }
+#else
+ key.clip_mode = BRW_CLIPMODE_ACCEPT_ALL;
+#endif
+
+ if (!search_cache(brw, &key))
+ compile_clip_prog( brw, &key );
+}
+
+const struct brw_tracked_state brw_clip_prog = {
+ .dirty = {
+ .brw = (BRW_NEW_RASTERIZER |
+ BRW_NEW_CLIP |
+ BRW_NEW_REDUCED_PRIMITIVE),
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .update = upload_clip_prog
+};
diff --git a/src/gallium/drivers/i965simple/brw_clip.h b/src/gallium/drivers/i965simple/brw_clip.h
new file mode 100644
index 0000000000..d70fc094ff
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_clip.h
@@ -0,0 +1,170 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef BRW_CLIP_H
+#define BRW_CLIP_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define MAX_VERTS (3+6+6)
+
+/* Note that if unfilled primitives are being emitted, we have to fix
+ * up polygon offset and flatshading at this point:
+ */
+struct brw_clip_prog_key {
+ unsigned attrs:32;
+ unsigned primitive:4;
+ unsigned nr_userclip:3;
+ unsigned do_flat_shading:1;
+ unsigned do_unfilled:1;
+ unsigned fill_cw:2; /* includes cull information */
+ unsigned fill_ccw:2; /* includes cull information */
+ unsigned offset_cw:1;
+ unsigned offset_ccw:1;
+ unsigned pad0:17;
+
+ unsigned copy_bfc_cw:1;
+ unsigned copy_bfc_ccw:1;
+ unsigned clip_mode:3;
+ unsigned pad1:27;
+
+ float offset_factor;
+ float offset_units;
+};
+
+
+#define CLIP_LINE 0
+#define CLIP_POINT 1
+#define CLIP_FILL 2
+#define CLIP_CULL 3
+
+
+#define PRIM_MASK (0x1f)
+
+struct brw_clip_compile {
+ struct brw_compile func;
+ struct brw_clip_prog_key key;
+ struct brw_clip_prog_data prog_data;
+
+ struct {
+ struct brw_reg R0;
+ struct brw_reg vertex[MAX_VERTS];
+
+ struct brw_reg t;
+ struct brw_reg t0, t1;
+ struct brw_reg dp0, dp1;
+
+ struct brw_reg dpPrev;
+ struct brw_reg dp;
+ struct brw_reg loopcount;
+ struct brw_reg nr_verts;
+ struct brw_reg planemask;
+
+ struct brw_reg inlist;
+ struct brw_reg outlist;
+ struct brw_reg freelist;
+
+ struct brw_reg dir;
+ struct brw_reg tmp0, tmp1;
+ struct brw_reg offset;
+
+ struct brw_reg fixed_planes;
+ struct brw_reg plane_equation;
+ } reg;
+
+ /* 3 different ways of expressing vertex size:
+ */
+ unsigned nr_attrs;
+ unsigned nr_regs;
+ unsigned nr_bytes;
+
+ unsigned first_tmp;
+ unsigned last_tmp;
+
+ boolean need_direction;
+
+ unsigned last_mrf;
+
+ unsigned header_position_offset;
+ unsigned offset[PIPE_MAX_ATTRIBS];
+};
+
+#define ATTR_SIZE (4*4)
+
+/* Points are only culled, so no need for a clip routine, however it
+ * works out easier to have a dummy one.
+ */
+void brw_emit_unfilled_clip( struct brw_clip_compile *c );
+void brw_emit_tri_clip( struct brw_clip_compile *c );
+void brw_emit_line_clip( struct brw_clip_compile *c );
+void brw_emit_point_clip( struct brw_clip_compile *c );
+
+/* brw_clip_tri.c, for use by the unfilled clip routine:
+ */
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c );
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c );
+void brw_clip_tri( struct brw_clip_compile *c );
+void brw_clip_tri_emit_polygon( struct brw_clip_compile *c );
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
+ unsigned nr_verts );
+
+
+/* Utils:
+ */
+
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+ struct brw_indirect dest_ptr,
+ struct brw_indirect v0_ptr, /* from */
+ struct brw_indirect v1_ptr, /* to */
+ struct brw_reg t0,
+ boolean force_edgeflag );
+
+void brw_clip_init_planes( struct brw_clip_compile *c );
+
+void brw_clip_emit_vue(struct brw_clip_compile *c,
+ struct brw_indirect vert,
+ boolean allocate,
+ boolean eot,
+ unsigned header);
+
+void brw_clip_kill_thread(struct brw_clip_compile *c);
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c );
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c );
+
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+ unsigned to, unsigned from );
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965simple/brw_clip_line.c b/src/gallium/drivers/i965simple/brw_clip_line.c
new file mode 100644
index 0000000000..75d9e5fcda
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_clip_line.c
@@ -0,0 +1,245 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
+{
+ unsigned i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ if (c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec4_grf(i, 0);
+ i += (6 + c->key.nr_userclip + 1) / 2;
+
+ c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+ }
+ else
+ c->prog_data.curb_read_length = 0;
+
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < 4; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ c->reg.t = brw_vec1_grf(i, 0);
+ c->reg.t0 = brw_vec1_grf(i, 1);
+ c->reg.t1 = brw_vec1_grf(i, 2);
+ c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+ c->reg.plane_equation = brw_vec4_grf(i, 4);
+ i++;
+
+ c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+ c->reg.dp1 = brw_vec1_grf(i, 4);
+ i++;
+
+ if (!c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec8_grf(i, 0);
+ i++;
+ }
+
+
+ c->first_tmp = i;
+ c->last_tmp = i;
+
+ c->prog_data.urb_read_length = c->nr_regs; /* ? */
+ c->prog_data.total_grf = i;
+}
+
+
+
+/* Line clipping, more or less following the following algorithm:
+ *
+ * for (p=0;p<MAX_PLANES;p++) {
+ * if (clipmask & (1 << p)) {
+ * float dp0 = DOTPROD( vtx0, plane[p] );
+ * float dp1 = DOTPROD( vtx1, plane[p] );
+ *
+ * if (IS_NEGATIVE(dp1)) {
+ * float t = dp1 / (dp1 - dp0);
+ * if (t > t1) t1 = t;
+ * } else {
+ * float t = dp0 / (dp0 - dp1);
+ * if (t > t0) t0 = t;
+ * }
+ *
+ * if (t0 + t1 >= 1.0)
+ * return;
+ * }
+ * }
+ *
+ * interp( ctx, newvtx0, vtx0, vtx1, t0 );
+ * interp( ctx, newvtx1, vtx1, vtx0, t1 );
+ *
+ */
+static void clip_and_emit_line( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_indirect vtx0 = brw_indirect(0, 0);
+ struct brw_indirect vtx1 = brw_indirect(1, 0);
+ struct brw_indirect newvtx0 = brw_indirect(2, 0);
+ struct brw_indirect newvtx1 = brw_indirect(3, 0);
+ struct brw_indirect plane_ptr = brw_indirect(4, 0);
+ struct brw_instruction *plane_loop;
+ struct brw_instruction *plane_active;
+ struct brw_instruction *is_negative;
+ struct brw_instruction *is_neg2;
+ struct brw_instruction *not_culled;
+ struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
+
+ brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0]));
+ brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1]));
+ brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2]));
+ brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3]));
+ brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
+
+ /* Note: init t0, t1 together:
+ */
+ brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0));
+
+ brw_clip_init_planes(c);
+ brw_clip_init_clipmask(c);
+
+ /* -ve rhw workaround */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+ brw_imm_ud(1<<20));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ plane_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* if (planemask & 1)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1));
+
+ plane_active = brw_IF(p, BRW_EXECUTE_1);
+ {
+ if (c->key.nr_userclip)
+ brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+ else
+ brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+
+#if 0
+ /* dp = DP4(vtx->position, plane)
+ */
+ brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+
+ /* if (IS_NEGATIVE(dp1))
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+#else
+ #warning "disabled"
+#endif
+ is_negative = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1);
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 );
+ brw_MOV(p, c->reg.t1, c->reg.t);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ is_negative = brw_ELSE(p, is_negative);
+ {
+ /* Coming back in. We know that both cannot be negative
+ * because the line would have been culled in that case.
+ */
+
+ /* If both are positive, do nothing */
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
+ is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0);
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 );
+ brw_MOV(p, c->reg.t0, c->reg.t);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ brw_ENDIF(p, is_neg2);
+ }
+ brw_ENDIF(p, is_negative);
+ }
+ brw_ENDIF(p, plane_active);
+
+ /* plane_ptr++;
+ */
+ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+ /* while (planemask>>=1) != 0
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+ }
+ brw_WHILE(p, plane_loop);
+
+ brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
+ not_culled = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, FALSE);
+ brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, FALSE);
+
+ brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+ brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
+ }
+ brw_ENDIF(p, not_culled);
+ brw_clip_kill_thread(c);
+}
+
+
+
+void brw_emit_line_clip( struct brw_clip_compile *c )
+{
+ brw_clip_line_alloc_regs(c);
+
+ if (c->key.do_flat_shading)
+ brw_clip_copy_colors(c, 0, 1);
+
+ clip_and_emit_line(c);
+}
diff --git a/src/gallium/drivers/i965simple/brw_clip_point.c b/src/gallium/drivers/i965simple/brw_clip_point.c
new file mode 100644
index 0000000000..6fce7210d1
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_clip_point.c
@@ -0,0 +1,47 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+/* Point clipping, nothing to do?
+ */
+void brw_emit_point_clip( struct brw_clip_compile *c )
+{
+ /* Send an empty message to kill the thread:
+ */
+ brw_clip_tri_alloc_regs(c, 0);
+ brw_clip_kill_thread(c);
+}
diff --git a/src/gallium/drivers/i965simple/brw_clip_state.c b/src/gallium/drivers/i965simple/brw_clip_state.c
new file mode 100644
index 0000000000..8e78dd51be
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_clip_state.c
@@ -0,0 +1,93 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+
+static void upload_clip_unit( struct brw_context *brw )
+{
+ struct brw_clip_unit_state clip;
+
+ memset(&clip, 0, sizeof(clip));
+
+ /* CACHE_NEW_CLIP_PROG */
+ clip.thread0.grf_reg_count =
+ align(brw->clip.prog_data->total_grf, 16) / 16 - 1;
+ clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6;
+ clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
+ clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length;
+ clip.clip5.clip_mode = brw->clip.prog_data->clip_mode;
+
+ /* BRW_NEW_CURBE_OFFSETS */
+ clip.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
+
+ /* BRW_NEW_URB_FENCE */
+ clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries;
+ clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
+ clip.thread4.max_threads = 1; /* 2 threads */
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ clip.thread4.stats_enable = 1;
+
+ /* CONSTANT */
+ clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ clip.thread1.single_program_flow = 1;
+ clip.thread3.dispatch_grf_start_reg = 1;
+ clip.thread3.urb_entry_read_offset = 0;
+ clip.clip5.userclip_enable_flags = 0x7f;
+ clip.clip5.userclip_must_clip = 1;
+ clip.clip5.guard_band_enable = 0;
+ clip.clip5.viewport_z_clip_enable = 1;
+ clip.clip5.viewport_xy_clip_enable = 1;
+ clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
+ clip.clip5.api_mode = BRW_CLIP_API_OGL;
+ clip.clip6.clipper_viewport_state_ptr = 0;
+ clip.viewport_xmin = -1;
+ clip.viewport_xmax = 1;
+ clip.viewport_ymin = -1;
+ clip.viewport_ymax = 1;
+
+ brw->clip.state_gs_offset = brw_cache_data( &brw->cache[BRW_CLIP_UNIT], &clip );
+}
+
+
+const struct brw_tracked_state brw_clip_unit = {
+ .dirty = {
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_URB_FENCE),
+ .cache = CACHE_NEW_CLIP_PROG
+ },
+ .update = upload_clip_unit
+};
diff --git a/src/gallium/drivers/i965simple/brw_clip_tri.c b/src/gallium/drivers/i965simple/brw_clip_tri.c
new file mode 100644
index 0000000000..c5da7b825e
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_clip_tri.c
@@ -0,0 +1,566 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+static struct brw_reg get_tmp( struct brw_clip_compile *c )
+{
+ struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
+
+ return tmp;
+}
+
+static void release_tmps( struct brw_clip_compile *c )
+{
+ c->last_tmp = c->first_tmp;
+}
+
+
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
+ unsigned nr_verts )
+{
+ unsigned i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ if (c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec4_grf(i, 0);
+ i += (6 + c->key.nr_userclip + 1) / 2;
+
+ c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+ }
+ else
+ c->prog_data.curb_read_length = 0;
+
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < nr_verts; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ if (c->nr_attrs & 1) {
+ for (j = 0; j < 3; j++) {
+ unsigned delta = c->nr_attrs*16 + 32;
+ brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
+ }
+ }
+
+ c->reg.t = brw_vec1_grf(i, 0);
+ c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_UD);
+ c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD);
+ c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+ c->reg.plane_equation = brw_vec4_grf(i, 4);
+ i++;
+
+ c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+ c->reg.dp = brw_vec1_grf(i, 4);
+ i++;
+
+ c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ if (!c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec8_grf(i, 0);
+ i++;
+ }
+
+ if (c->key.do_unfilled) {
+ c->reg.dir = brw_vec4_grf(i, 0);
+ c->reg.offset = brw_vec4_grf(i, 4);
+ i++;
+ c->reg.tmp0 = brw_vec4_grf(i, 0);
+ c->reg.tmp1 = brw_vec4_grf(i, 4);
+ i++;
+ }
+
+ c->first_tmp = i;
+ c->last_tmp = i;
+
+ c->prog_data.urb_read_length = c->nr_regs; /* ? */
+ c->prog_data.total_grf = i;
+}
+
+
+
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+ struct brw_instruction *is_rev;
+
+ /* Initial list of indices for incoming vertexes:
+ */
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
+
+ /* XXX: Is there an easier way to do this? Need to reverse every
+ * second tristrip element: Can ignore sometimes?
+ */
+ is_rev = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) );
+ brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) );
+ if (c->need_direction)
+ brw_MOV(p, c->reg.dir, brw_imm_f(-1));
+ }
+ is_rev = brw_ELSE(p, is_rev);
+ {
+ brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) );
+ brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) );
+ if (c->need_direction)
+ brw_MOV(p, c->reg.dir, brw_imm_f(1));
+ }
+ brw_ENDIF(p, is_rev);
+
+ brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) );
+ brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0));
+ brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3));
+}
+
+
+
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *is_poly;
+ struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_POLYGON));
+
+ is_poly = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_copy_colors(c, 1, 0);
+ brw_clip_copy_colors(c, 2, 0);
+ }
+ is_poly = brw_ELSE(p, is_poly);
+ {
+ brw_clip_copy_colors(c, 0, 2);
+ brw_clip_copy_colors(c, 1, 2);
+ }
+ brw_ENDIF(p, is_poly);
+}
+
+
+
+/* Use mesa's clipping algorithms, translated to GEN4 assembly.
+ */
+void brw_clip_tri( struct brw_clip_compile *c )
+{
+#if 0
+ struct brw_compile *p = &c->func;
+ struct brw_indirect vtx = brw_indirect(0, 0);
+ struct brw_indirect vtxPrev = brw_indirect(1, 0);
+ struct brw_indirect vtxOut = brw_indirect(2, 0);
+ struct brw_indirect plane_ptr = brw_indirect(3, 0);
+ struct brw_indirect inlist_ptr = brw_indirect(4, 0);
+ struct brw_indirect outlist_ptr = brw_indirect(5, 0);
+ struct brw_indirect freelist_ptr = brw_indirect(6, 0);
+ struct brw_instruction *plane_loop;
+ struct brw_instruction *plane_active;
+ struct brw_instruction *vertex_loop;
+ struct brw_instruction *next_test;
+ struct brw_instruction *prev_test;
+
+ brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) );
+ brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
+ brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+
+ brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) );
+
+ plane_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* if (planemask & 1)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1));
+
+ plane_active = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* vtxOut = freelist_ptr++
+ */
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) );
+ brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE));
+
+ if (c->key.nr_userclip)
+ brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+ else
+ brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0));
+
+ vertex_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* vtx = *input_ptr;
+ */
+ brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0));
+
+ /* IS_NEGATIVE(prev) */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+ prev_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* IS_POSITIVE(next)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_GE);
+ brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+ next_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+
+ /* Coming back in.
+ */
+ brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev);
+
+ /* If (vtxOut == 0) vtxOut = vtxPrev
+ */
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) );
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, FALSE);
+
+ /* *outlist_ptr++ = vtxOut;
+ * nr_verts++;
+ * vtxOut = 0;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+ brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ }
+ brw_ENDIF(p, next_test);
+
+ }
+ prev_test = brw_ELSE(p, prev_test);
+ {
+ /* *outlist_ptr++ = vtxPrev;
+ * nr_verts++;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+
+ /* IS_NEGATIVE(next)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+ next_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* Going out of bounds. Avoid division by zero as we
+ * know dp != dpPrev from DIFFERENT_SIGNS, above.
+ */
+ brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp);
+
+ /* If (vtxOut == 0) vtxOut = vtx
+ */
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) );
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, TRUE);
+
+ /* *outlist_ptr++ = vtxOut;
+ * nr_verts++;
+ * vtxOut = 0;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+ brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ }
+ brw_ENDIF(p, next_test);
+ }
+ brw_ENDIF(p, prev_test);
+
+ /* vtxPrev = vtx;
+ * inlist_ptr++;
+ */
+ brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx));
+ brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short)));
+
+ /* while (--loopcount != 0)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, vertex_loop);
+
+ /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1]
+ * inlist = outlist
+ * inlist_ptr = &inlist[0]
+ * outlist_ptr = &outlist[0]
+ */
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2));
+ brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0));
+ brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0));
+ brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+ }
+ brw_ENDIF(p, plane_active);
+
+ /* plane_ptr++;
+ */
+ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+ /* nr_verts >= 3
+ */
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ c->reg.nr_verts,
+ brw_imm_ud(3));
+
+ /* && (planemask>>=1) != 0
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+ }
+ brw_WHILE(p, plane_loop);
+#else
+ #warning "disabled"
+#endif
+}
+
+
+
+void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop, *if_insn;
+
+ /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+ brw_ADD(p,
+ c->reg.loopcount,
+ c->reg.nr_verts,
+ brw_imm_d(-2));
+
+ if_insn = brw_IF(p, BRW_EXECUTE_1);
+ {
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect vptr = brw_indirect(1, 0);
+
+ brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START));
+
+ brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2));
+
+ brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+
+ brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END));
+ }
+ brw_ENDIF(p, if_insn);
+}
+
+static void do_clip_tri( struct brw_clip_compile *c )
+{
+ brw_clip_init_planes(c);
+
+ brw_clip_tri(c);
+}
+
+
+static void maybe_do_clip_tri( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *do_clip;
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+ do_clip = brw_IF(p, BRW_EXECUTE_1);
+ {
+ do_clip_tri(c);
+ }
+ brw_ENDIF(p, do_clip);
+}
+
+static void brw_clip_test( struct brw_clip_compile *c )
+{
+#if 0
+ struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+
+ struct brw_reg v0 = get_tmp(c);
+ struct brw_reg v1 = get_tmp(c);
+ struct brw_reg v2 = get_tmp(c);
+
+ struct brw_indirect vt0 = brw_indirect(0, 0);
+ struct brw_indirect vt1 = brw_indirect(1, 0);
+ struct brw_indirect vt2 = brw_indirect(2, 0);
+
+ struct brw_compile *p = &c->func;
+
+ brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
+ brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
+ brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
+ brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS]));
+ brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS]));
+ brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS]));
+
+ /* test nearz, xmin, ymin plane */
+ brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_XOR(p, t, t1, t2);
+ brw_XOR(p, t1, t2, t3);
+ brw_OR(p, t, t, t1);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 0), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 1), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 2), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* test farz, xmax, ymax plane */
+ brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_XOR(p, t, t1, t2);
+ brw_XOR(p, t1, t2, t3);
+ brw_OR(p, t, t, t1);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 0), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 1), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 2), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ release_tmps(c);
+#else
+ #warning "disabled"
+#endif
+}
+
+
+void brw_emit_tri_clip( struct brw_clip_compile *c )
+{
+ struct brw_instruction *neg_rhw;
+ struct brw_compile *p = &c->func;
+ brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+ brw_clip_tri_init_vertices(c);
+ brw_clip_init_clipmask(c);
+
+ /* if -ve rhw workaround bit is set,
+ do cliptest */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+ brw_imm_ud(1<<20));
+ neg_rhw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_test(c);
+ }
+ brw_ENDIF(p, neg_rhw);
+
+ /* Can't push into do_clip_tri because with polygon (or quad)
+ * flatshading, need to apply the flatshade here because we don't
+ * respect the PV when converting to trifan for emit:
+ */
+ if (c->key.do_flat_shading)
+ brw_clip_tri_flat_shade(c);
+
+ if (c->key.clip_mode == BRW_CLIPMODE_NORMAL)
+ do_clip_tri(c);
+ else
+ maybe_do_clip_tri(c);
+
+ brw_clip_tri_emit_polygon(c);
+
+ /* Send an empty message to kill the thread:
+ */
+ brw_clip_kill_thread(c);
+}
diff --git a/src/gallium/drivers/i965simple/brw_clip_unfilled.c b/src/gallium/drivers/i965simple/brw_clip_unfilled.c
new file mode 100644
index 0000000000..b774a76dd6
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_clip_unfilled.c
@@ -0,0 +1,477 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+/* This is performed against the original triangles, so no indirection
+ * required:
+BZZZT!
+ */
+static void compute_tri_direction( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg e = c->reg.tmp0;
+ struct brw_reg f = c->reg.tmp1;
+ struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]);
+ struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]);
+ struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]);
+
+
+ /* Calculate the vectors of two edges of the triangle:
+ */
+ brw_ADD(p, e, v0, negate(v2));
+ brw_ADD(p, f, v1, negate(v2));
+
+ /* Take their crossproduct:
+ */
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3));
+ brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e));
+}
+
+
+static void cull_direction( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+ unsigned conditional;
+
+ assert (!(c->key.fill_ccw == CLIP_CULL &&
+ c->key.fill_cw == CLIP_CULL));
+
+ if (c->key.fill_ccw == CLIP_CULL)
+ conditional = BRW_CONDITIONAL_GE;
+ else
+ conditional = BRW_CONDITIONAL_L;
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ conditional,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, ccw);
+}
+
+
+
+static void copy_bfc( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+ unsigned conditional;
+
+ /* Do we have any colors to copy?
+ */
+ if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) &&
+ !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]))
+ return;
+
+ /* In some wierd degnerate cases we can end up testing the
+ * direction twice, once for culling and once for bfc copying. Oh
+ * well, that's what you get for setting wierd GL state.
+ */
+ if (c->key.copy_bfc_ccw)
+ conditional = BRW_CONDITIONAL_GE;
+ else
+ conditional = BRW_CONDITIONAL_L;
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ conditional,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ unsigned i;
+
+ for (i = 0; i < 3; i++) {
+ if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]),
+ byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0]));
+
+ if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]),
+ byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1]));
+ }
+ }
+ brw_ENDIF(p, ccw);
+}
+
+
+
+
+/*
+ float iz = 1.0 / dir.z;
+ float ac = dir.x * iz;
+ float bc = dir.y * iz;
+ offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE;
+ offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor;
+ offset *= MRD;
+*/
+static void compute_offset( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg off = c->reg.offset;
+ struct brw_reg dir = c->reg.dir;
+
+ brw_math_invert(p, get_element(off, 2), get_element(dir, 2));
+ brw_MUL(p, vec2(off), dir, get_element(off, 2));
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ brw_abs(get_element(off, 0)),
+ brw_abs(get_element(off, 1)));
+
+ brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor));
+ brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units));
+}
+
+
+static void merge_edgeflags( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *is_poly;
+ struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0);
+
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_POLYGON));
+
+ /* Get away with using reg.vertex because we know that this is not
+ * a _3DPRIM_TRISTRIP_REVERSE:
+ */
+ is_poly = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+ brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8));
+ brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+ brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9));
+ brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ brw_ENDIF(p, is_poly);
+}
+
+
+
+static void apply_one_offset( struct brw_clip_compile *c,
+ struct brw_indirect vert )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg pos = deref_4f(vert, c->offset[VERT_RESULT_HPOS]);
+ struct brw_reg z = get_element(pos, 2);
+
+ brw_ADD(p, z, z, vec1(c->reg.offset));
+}
+
+
+
+/***********************************************************************
+ * Output clipped polygon as an unfilled primitive:
+ */
+static void emit_lines(struct brw_clip_compile *c,
+ boolean do_offset)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop;
+ struct brw_instruction *draw_edge;
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect v1 = brw_indirect(1, 0);
+ struct brw_indirect v0ptr = brw_indirect(2, 0);
+ struct brw_indirect v1ptr = brw_indirect(3, 0);
+
+ /* Need a seperate loop for offset:
+ */
+ if (do_offset) {
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ apply_one_offset(c, v0);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+ }
+
+ /* v1ptr = &inlist[nr_verts]
+ * *v1ptr = v0
+ */
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+ brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+ brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+ brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ /* draw edge if edgeflag != 0 */
+ brw_CMP(p,
+ vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
+ deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
+ brw_imm_f(0));
+ draw_edge = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+ brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
+ }
+ brw_ENDIF(p, draw_edge);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+}
+
+
+
+static void emit_points(struct brw_clip_compile *c,
+ boolean do_offset )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop;
+ struct brw_instruction *draw_point;
+
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect v0ptr = brw_indirect(2, 0);
+
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ /* draw if edgeflag != 0
+ */
+ brw_CMP(p,
+ vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
+ deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
+ brw_imm_f(0));
+ draw_point = brw_IF(p, BRW_EXECUTE_1);
+ {
+ if (do_offset)
+ apply_one_offset(c, v0);
+
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END);
+ }
+ brw_ENDIF(p, draw_point);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+}
+
+
+
+
+
+
+
+static void emit_primitives( struct brw_clip_compile *c,
+ unsigned mode,
+ boolean do_offset )
+{
+ switch (mode) {
+ case CLIP_FILL:
+ brw_clip_tri_emit_polygon(c);
+ break;
+
+ case CLIP_LINE:
+ emit_lines(c, do_offset);
+ break;
+
+ case CLIP_POINT:
+ emit_points(c, do_offset);
+ break;
+
+ case CLIP_CULL:
+ assert(0);
+ break;
+ }
+}
+
+
+
+static void emit_unfilled_primitives( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+
+ /* Direction culling has already been done.
+ */
+ if (c->key.fill_ccw != c->key.fill_cw &&
+ c->key.fill_ccw != CLIP_CULL &&
+ c->key.fill_cw != CLIP_CULL)
+ {
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+ }
+ ccw = brw_ELSE(p, ccw);
+ {
+ emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+ }
+ brw_ENDIF(p, ccw);
+ }
+ else if (c->key.fill_cw != CLIP_CULL) {
+ emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+ }
+ else if (c->key.fill_ccw != CLIP_CULL) {
+ emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+ }
+}
+
+
+
+
+static void check_nr_verts( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *if_insn;
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3));
+ if_insn = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, if_insn);
+}
+
+
+void brw_emit_unfilled_clip( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *do_clip;
+
+
+ c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) ||
+ (c->key.fill_ccw != c->key.fill_cw) ||
+ c->key.fill_ccw == CLIP_CULL ||
+ c->key.fill_cw == CLIP_CULL ||
+ c->key.copy_bfc_cw ||
+ c->key.copy_bfc_ccw);
+
+ brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+ brw_clip_tri_init_vertices(c);
+
+ assert(c->offset[VERT_RESULT_EDGE]);
+
+ if (c->key.fill_ccw == CLIP_CULL &&
+ c->key.fill_cw == CLIP_CULL) {
+ brw_clip_kill_thread(c);
+ return;
+ }
+
+ merge_edgeflags(c);
+
+ /* Need to use the inlist indirection here:
+ */
+ if (c->need_direction)
+ compute_tri_direction(c);
+
+ if (c->key.fill_ccw == CLIP_CULL ||
+ c->key.fill_cw == CLIP_CULL)
+ cull_direction(c);
+
+ if (c->key.offset_ccw ||
+ c->key.offset_cw)
+ compute_offset(c);
+
+ if (c->key.copy_bfc_ccw ||
+ c->key.copy_bfc_cw)
+ copy_bfc(c);
+
+ /* Need to do this whether we clip or not:
+ */
+ if (c->key.do_flat_shading)
+ brw_clip_tri_flat_shade(c);
+
+ brw_clip_init_clipmask(c);
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+ do_clip = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_init_planes(c);
+ brw_clip_tri(c);
+ check_nr_verts(c);
+ }
+ brw_ENDIF(p, do_clip);
+
+ emit_unfilled_primitives(c);
+ brw_clip_kill_thread(c);
+}
+
+
+
diff --git a/src/gallium/drivers/i965simple/brw_clip_util.c b/src/gallium/drivers/i965simple/brw_clip_util.c
new file mode 100644
index 0000000000..6d58ceafff
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_clip_util.c
@@ -0,0 +1,351 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+
+
+static struct brw_reg get_tmp( struct brw_clip_compile *c )
+{
+ struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
+
+ return tmp;
+}
+
+static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp )
+{
+ if (tmp.nr == c->last_tmp-1)
+ c->last_tmp--;
+}
+
+
+static struct brw_reg make_plane_ud(unsigned x, unsigned y, unsigned z, unsigned w)
+{
+ return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x);
+}
+
+
+void brw_clip_init_planes( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+
+ if (!c->key.nr_userclip) {
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1));
+ }
+}
+
+
+
+#define W 3
+
+/* Project 'pos' to screen space (or back again), overwrite with results:
+ */
+static void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
+{
+ struct brw_compile *p = &c->func;
+
+ /* calc rhw
+ */
+ brw_math_invert(p, get_element(pos, W), get_element(pos, W));
+
+ /* value.xyz *= value.rhw
+ */
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_MUL(p, brw_writemask(pos, TGSI_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+}
+
+
+static void brw_clip_project_vertex( struct brw_clip_compile *c,
+ struct brw_indirect vert_addr )
+{
+#if 0
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = get_tmp(c);
+
+ /* Fixup position. Extract from the original vertex and re-project
+ * to screen space:
+ */
+ brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS]));
+ brw_clip_project_position(c, tmp);
+ brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp);
+
+ release_tmp(c, tmp);
+#else
+ #warning "disabled"
+#endif
+}
+
+
+
+
+/* Interpolate between two vertices and put the result into a0.0.
+ * Increment a0.0 accordingly.
+ */
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+ struct brw_indirect dest_ptr,
+ struct brw_indirect v0_ptr, /* from */
+ struct brw_indirect v1_ptr, /* to */
+ struct brw_reg t0,
+ boolean force_edgeflag)
+{
+#if 0
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = get_tmp(c);
+ unsigned i;
+
+ /* Just copy the vertex header:
+ */
+ brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
+
+ /* Iterate over each attribute (could be done in pairs?)
+ */
+ for (i = 0; i < c->nr_attrs; i++) {
+ unsigned delta = i*16 + 32;
+
+ if (delta == c->offset[VERT_RESULT_EDGE]) {
+ if (force_edgeflag)
+ brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
+ else
+ brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
+ }
+ else {
+ /* Interpolate:
+ *
+ * New = attr0 + t*attr1 - t*attr0
+ */
+ brw_MUL(p,
+ vec4(brw_null_reg()),
+ deref_4f(v1_ptr, delta),
+ t0);
+
+ brw_MAC(p,
+ tmp,
+ negate(deref_4f(v0_ptr, delta)),
+ t0);
+
+ brw_ADD(p,
+ deref_4f(dest_ptr, delta),
+ deref_4f(v0_ptr, delta),
+ tmp);
+ }
+ }
+
+ if (i & 1) {
+ unsigned delta = i*16 + 32;
+ brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
+ }
+
+ release_tmp(c, tmp);
+
+ /* Recreate the projected (NDC) coordinate in the new vertex
+ * header:
+ */
+ brw_clip_project_vertex(c, dest_ptr );
+#else
+ #warning "disabled"
+#endif
+}
+
+
+
+
+#define MAX_MRF 16
+
+void brw_clip_emit_vue(struct brw_clip_compile *c,
+ struct brw_indirect vert,
+ boolean allocate,
+ boolean eot,
+ unsigned header)
+{
+ struct brw_compile *p = &c->func;
+ unsigned start = c->last_mrf;
+
+ assert(!(allocate && eot));
+
+ /* Cycle through mrf regs - probably futile as we have to wait for
+ * the allocation response anyway. Also, the order this function
+ * is invoked doesn't correspond to the order the instructions will
+ * be executed, so it won't have any effect in many cases.
+ */
+#if 0
+ if (start + c->nr_regs + 1 >= MAX_MRF)
+ start = 0;
+
+ c->last_mrf = start + c->nr_regs + 1;
+#endif
+
+ /* Copy the vertex from vertn into m1..mN+1:
+ */
+ brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs);
+
+ /* Overwrite PrimType and PrimStart in the message header, for
+ * each vertex in turn:
+ */
+ brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+
+ /* Send each vertex as a seperate write to the urb. This
+ * is different to the concept in brw_sf_emit.c, where
+ * subsequent writes are used to build up a single urb
+ * entry. Each of these writes instantiates a seperate
+ * urb entry - (I think... what about 'allocate'?)
+ */
+ brw_urb_WRITE(p,
+ allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ start,
+ c->reg.R0,
+ allocate,
+ 1, /* used */
+ c->nr_regs + 1, /* msg length */
+ allocate ? 1 : 0, /* response_length */
+ eot, /* eot */
+ 1, /* writes_complete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+void brw_clip_kill_thread(struct brw_clip_compile *c)
+{
+ struct brw_compile *p = &c->func;
+
+ /* Send an empty message to kill the thread and release any
+ * allocated urb entry:
+ */
+ brw_urb_WRITE(p,
+ retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ 0,
+ c->reg.R0,
+ 0, /* allocate */
+ 0, /* used */
+ 0, /* msg len */
+ 0, /* response len */
+ 1, /* eot */
+ 1, /* writes complete */
+ 0,
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )
+{
+ return brw_address(c->reg.fixed_planes);
+}
+
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c )
+{
+ if (c->key.nr_userclip) {
+ return brw_imm_uw(16);
+ }
+ else {
+ return brw_imm_uw(4);
+ }
+}
+
+
+/* If flatshading, distribute color from provoking vertex prior to
+ * clipping.
+ */
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+ unsigned to, unsigned from )
+{
+#if 0
+ struct brw_compile *p = &c->func;
+
+ if (c->offset[VERT_RESULT_COL0])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]),
+ byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0]));
+
+ if (c->offset[VERT_RESULT_COL1])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]),
+ byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1]));
+
+ if (c->offset[VERT_RESULT_BFC0])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]),
+ byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0]));
+
+ if (c->offset[VERT_RESULT_BFC1])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]),
+ byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1]));
+#else
+ #warning "disabled"
+#endif
+}
+
+
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg incoming = get_element_ud(c->reg.R0, 2);
+
+ /* Shift so that lowest outcode bit is rightmost:
+ */
+ brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));
+
+ if (c->key.nr_userclip) {
+ struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);
+
+ /* Rearrange userclip outcodes so that they come directly after
+ * the fixed plane bits.
+ */
+ brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14));
+ brw_SHR(p, tmp, tmp, brw_imm_ud(8));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, tmp);
+
+ release_tmp(c, tmp);
+ }
+}
+
diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c
new file mode 100644
index 0000000000..96920df008
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_context.c
@@ -0,0 +1,114 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_draw.h"
+#include "brw_vs.h"
+#include "brw_tex_layout.h"
+#include "brw_winsys.h"
+
+#include "pipe/p_winsys.h"
+#include "pipe/p_context.h"
+#include "util/u_memory.h"
+#include "pipe/p_screen.h"
+
+
+#ifndef BRW_DEBUG
+int BRW_DEBUG = (0);
+#endif
+
+
+static void brw_destroy(struct pipe_context *pipe)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ if(brw->winsys->destroy)
+ brw->winsys->destroy(brw->winsys);
+
+ FREE(brw);
+}
+
+
+static void brw_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ int x, y, w, h;
+ /* FIXME: corny... */
+
+ x = 0;
+ y = 0;
+ w = ps->width;
+ h = ps->height;
+
+ pipe->surface_fill(pipe, ps, x, y, w, h, clearValue);
+}
+
+
+struct pipe_context *brw_create(struct pipe_screen *screen,
+ struct brw_winsys *brw_winsys,
+ unsigned pci_id)
+{
+ struct brw_context *brw;
+
+ debug_printf("%s: creating brw_context with pci id 0x%x\n",
+ __FUNCTION__, pci_id);
+
+ brw = CALLOC_STRUCT(brw_context);
+ if (brw == NULL)
+ return NULL;
+
+ brw->winsys = brw_winsys;
+ brw->pipe.winsys = screen->winsys;
+ brw->pipe.screen = screen;
+
+ brw->pipe.destroy = brw_destroy;
+ brw->pipe.clear = brw_clear;
+
+ brw_init_surface_functions(brw);
+ brw_init_texture_functions(brw);
+ brw_init_state_functions(brw);
+ brw_init_flush_functions(brw);
+ brw_init_draw_functions( brw );
+
+
+ brw_init_state( brw );
+
+ brw->pci_id = pci_id;
+ brw->dirty = ~0;
+ brw->hardware_dirty = ~0;
+
+ memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind));
+
+ return &brw->pipe;
+}
+
diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h
new file mode 100644
index 0000000000..3079485180
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_context.h
@@ -0,0 +1,684 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRWCONTEXT_INC
+#define BRWCONTEXT_INC
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "tgsi/tgsi_scan.h"
+
+#include "brw_structs.h"
+#include "brw_winsys.h"
+
+
+/* Glossary:
+ *
+ * URB - uniform resource buffer. A mid-sized buffer which is
+ * partitioned between the fixed function units and used for passing
+ * values (vertices, primitives, constants) between them.
+ *
+ * CURBE - constant URB entry. An urb region (entry) used to hold
+ * constant values which the fixed function units can be instructed to
+ * preload into the GRF when spawining a thread.
+ *
+ * VUE - vertex URB entry. An urb entry holding a vertex and usually
+ * a vertex header. The header contains control information and
+ * things like primitive type, Begin/end flags and clip codes.
+ *
+ * PUE - primitive URB entry. An urb entry produced by the setup (SF)
+ * unit holding rasterization and interpolation parameters.
+ *
+ * GRF - general register file. One of several register files
+ * addressable by programmed threads. The inputs (r0, payload, curbe,
+ * urb) of the thread are preloaded to this area before the thread is
+ * spawned. The registers are individually 8 dwords wide and suitable
+ * for general usage. Registers holding thread input values are not
+ * special and may be overwritten.
+ *
+ * MRF - message register file. Threads communicate (and terminate)
+ * by sending messages. Message parameters are placed in contigous
+ * MRF registers. All program output is via these messages. URB
+ * entries are populated by sending a message to the shared URB
+ * function containing the new data, together with a control word,
+ * often an unmodified copy of R0.
+ *
+ * R0 - GRF register 0. Typically holds control information used when
+ * sending messages to other threads.
+ *
+ * EU or GEN4 EU: The name of the programmable subsystem of the
+ * i965 hardware. Threads are executed by the EU, the registers
+ * described above are part of the EU architecture.
+ *
+ * Fixed function units:
+ *
+ * CS - Command streamer. Notional first unit, little software
+ * interaction. Holds the URB entries used for constant data, ie the
+ * CURBEs.
+ *
+ * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of
+ * this unit is responsible for pulling vertices out of vertex buffers
+ * in vram and injecting them into the processing pipe as VUEs. If
+ * enabled, it first passes them to a VS thread which is a good place
+ * for the driver to implement any active vertex shader.
+ *
+ * GS - Geometry Shader. This corresponds to a new DX10 concept. If
+ * enabled, incoming strips etc are passed to GS threads in individual
+ * line/triangle/point units. The GS thread may perform arbitary
+ * computation and emit whatever primtives with whatever vertices it
+ * chooses. This makes GS an excellent place to implement GL's
+ * unfilled polygon modes, though of course it is capable of much
+ * more. Additionally, GS is used to translate away primitives not
+ * handled by latter units, including Quads and Lineloops.
+ *
+ * CS - Clipper. Mesa's clipping algorithms are imported to run on
+ * this unit. The fixed function part performs cliptesting against
+ * the 6 fixed clipplanes and makes descisions on whether or not the
+ * incoming primitive needs to be passed to a thread for clipping.
+ * User clip planes are handled via cooperation with the VS thread.
+ *
+ * SF - Strips Fans or Setup: Triangles are prepared for
+ * rasterization. Interpolation coefficients are calculated.
+ * Flatshading and two-side lighting usually performed here.
+ *
+ * WM - Windower. Interpolation of vertex attributes performed here.
+ * Fragment shader implemented here. SIMD aspects of EU taken full
+ * advantage of, as pixels are processed in blocks of 16.
+ *
+ * CC - Color Calculator. No EU threads associated with this unit.
+ * Handles blending and (presumably) depth and stencil testing.
+ */
+
+#define BRW_MAX_CURBE (32*16)
+
+struct brw_context;
+struct brw_winsys;
+
+
+/* Raised when we receive new state across the pipe interface:
+ */
+#define BRW_NEW_VIEWPORT 0x1
+#define BRW_NEW_RASTERIZER 0x2
+#define BRW_NEW_FS 0x4
+#define BRW_NEW_BLEND 0x8
+#define BRW_NEW_CLIP 0x10
+#define BRW_NEW_SCISSOR 0x20
+#define BRW_NEW_STIPPLE 0x40
+#define BRW_NEW_FRAMEBUFFER 0x80
+#define BRW_NEW_ALPHA_TEST 0x100
+#define BRW_NEW_DEPTH_STENCIL 0x200
+#define BRW_NEW_SAMPLER 0x400
+#define BRW_NEW_TEXTURE 0x800
+#define BRW_NEW_CONSTANTS 0x1000
+#define BRW_NEW_VBO 0x2000
+#define BRW_NEW_VS 0x4000
+
+/* Raised for other internal events:
+ */
+#define BRW_NEW_URB_FENCE 0x10000
+#define BRW_NEW_PSP 0x20000
+#define BRW_NEW_CURBE_OFFSETS 0x40000
+#define BRW_NEW_REDUCED_PRIMITIVE 0x80000
+#define BRW_NEW_PRIMITIVE 0x100000
+#define BRW_NEW_SCENE 0x200000
+#define BRW_NEW_SF_LINKAGE 0x400000
+
+extern int BRW_DEBUG;
+
+#define DEBUG_TEXTURE 0x1
+#define DEBUG_STATE 0x2
+#define DEBUG_IOCTL 0x4
+#define DEBUG_PRIMS 0x8
+#define DEBUG_VERTS 0x10
+#define DEBUG_FALLBACKS 0x20
+#define DEBUG_VERBOSE 0x40
+#define DEBUG_DRI 0x80
+#define DEBUG_DMA 0x100
+#define DEBUG_SANITY 0x200
+#define DEBUG_SYNC 0x400
+#define DEBUG_SLEEP 0x800
+#define DEBUG_PIXEL 0x1000
+#define DEBUG_STATS 0x2000
+#define DEBUG_TILE 0x4000
+#define DEBUG_SINGLE_THREAD 0x8000
+#define DEBUG_WM 0x10000
+#define DEBUG_URB 0x20000
+#define DEBUG_VS 0x40000
+#define DEBUG_BATCH 0x80000
+#define DEBUG_BUFMGR 0x100000
+#define DEBUG_BLIT 0x200000
+#define DEBUG_REGION 0x400000
+#define DEBUG_MIPTREE 0x800000
+
+#define DBG(...) do { \
+ if (BRW_DEBUG & FILE_DEBUG_FLAG) \
+ debug_printf(__VA_ARGS__); \
+} while(0)
+
+#define PRINT(...) do { \
+ debug_printf(__VA_ARGS__); \
+} while(0)
+
+struct brw_state_flags {
+ unsigned cache;
+ unsigned brw;
+};
+
+
+struct brw_vertex_program {
+ struct pipe_shader_state program;
+ struct tgsi_shader_info info;
+ int id;
+};
+
+
+struct brw_fragment_program {
+ struct pipe_shader_state program;
+ struct tgsi_shader_info info;
+
+ boolean UsesDepth; /* XXX add this to tgsi_shader_info? */
+ int id;
+};
+
+
+struct pipe_setup_linkage {
+ struct {
+ unsigned vp_output:5;
+ unsigned interp_mode:4;
+ unsigned bf_vp_output:5;
+ } fp_input[PIPE_MAX_SHADER_INPUTS];
+
+ unsigned fp_input_count:5;
+ unsigned max_vp_output:5;
+};
+
+
+
+struct brw_texture {
+ struct pipe_texture base;
+
+ /* Derived from the above:
+ */
+ unsigned stride;
+ unsigned depth_pitch; /* per-image on i945? */
+ unsigned total_nblocksy;
+
+ unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS];
+
+ /* Explicitly store the offset of each image for each cube face or
+ * depth value. Pretty much have to accept that hardware formats
+ * are going to be so diverse that there is no unified way to
+ * compute the offsets of depth/cube images within a mipmap level,
+ * so have to store them as a lookup table:
+ */
+ unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */
+
+ /* Includes image offset tables:
+ */
+ unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS];
+
+ /* The data is held here:
+ */
+ struct pipe_buffer *buffer;
+};
+
+/* Data about a particular attempt to compile a program. Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs:
+ */
+/* Data about a particular attempt to compile a program. Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs:
+ */
+
+struct brw_wm_prog_data {
+ unsigned curb_read_length;
+ unsigned urb_read_length;
+
+ unsigned first_curbe_grf;
+ unsigned total_grf;
+ unsigned total_scratch;
+
+ /* Internally generated constants for the CURBE. These are loaded
+ * ahead of the data from the constant buffer.
+ */
+ const float internal_const[8];
+ unsigned nr_internal_consts;
+ unsigned max_const;
+
+ boolean error;
+};
+
+struct brw_sf_prog_data {
+ unsigned urb_read_length;
+ unsigned total_grf;
+
+ /* Each vertex may have upto 12 attributes, 4 components each,
+ * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11
+ * rows.
+ *
+ * Actually we use 4 for each, so call it 12 rows.
+ */
+ unsigned urb_entry_size;
+};
+
+struct brw_clip_prog_data {
+ unsigned curb_read_length; /* user planes? */
+ unsigned clip_mode;
+ unsigned urb_read_length;
+ unsigned total_grf;
+};
+
+struct brw_gs_prog_data {
+ unsigned urb_read_length;
+ unsigned total_grf;
+};
+
+struct brw_vs_prog_data {
+ unsigned curb_read_length;
+ unsigned urb_read_length;
+ unsigned total_grf;
+ unsigned outputs_written;
+
+ unsigned inputs_read;
+
+ unsigned max_const;
+
+ float imm_buf[PIPE_MAX_CONSTANT][4];
+ unsigned num_imm;
+ unsigned num_consts;
+
+ /* Used for calculating urb partitions:
+ */
+ unsigned urb_entry_size;
+};
+
+
+#define BRW_MAX_TEX_UNIT 8
+#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1
+
+/* Create a fixed sized struct for caching binding tables:
+ */
+struct brw_surface_binding_table {
+ unsigned surf_ss_offset[BRW_WM_MAX_SURF];
+};
+
+
+struct brw_cache;
+
+struct brw_mem_pool {
+ struct pipe_buffer *buffer;
+
+ unsigned size;
+ unsigned offset; /* offset of first free byte */
+
+ struct brw_context *brw;
+};
+
+struct brw_cache_item {
+ unsigned hash;
+ unsigned key_size; /* for variable-sized keys */
+ const void *key;
+
+ unsigned offset; /* offset within pool's buffer */
+ unsigned data_size;
+
+ struct brw_cache_item *next;
+};
+
+
+
+struct brw_cache {
+ unsigned id;
+
+ const char *name;
+
+ struct brw_context *brw;
+ struct brw_mem_pool *pool;
+
+ struct brw_cache_item **items;
+ unsigned size, n_items;
+
+ unsigned key_size; /* for fixed-size keys */
+ unsigned aux_size;
+
+ unsigned last_addr; /* offset of active item */
+};
+
+
+
+
+/* Considered adding a member to this struct to document which flags
+ * an update might raise so that ordering of the state atoms can be
+ * checked or derived at runtime. Dropped the idea in favor of having
+ * a debug mode where the state is monitored for flags which are
+ * raised that have already been tested against.
+ */
+struct brw_tracked_state {
+ struct brw_state_flags dirty;
+ void (*update)( struct brw_context *brw );
+};
+
+
+/* Flags for brw->state.cache.
+ */
+#define CACHE_NEW_CC_VP (1<<BRW_CC_VP)
+#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT)
+#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG)
+#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR)
+#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER)
+#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT)
+#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG)
+#define CACHE_NEW_SF_VP (1<<BRW_SF_VP)
+#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT)
+#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT)
+#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG)
+#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT)
+#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG)
+#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP)
+#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT)
+#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG)
+#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE)
+#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND)
+
+
+
+
+enum brw_mempool_id {
+ BRW_GS_POOL,
+ BRW_SS_POOL,
+ BRW_MAX_POOL
+};
+
+
+struct brw_cached_batch_item {
+ struct header *header;
+ unsigned sz;
+ struct brw_cached_batch_item *next;
+};
+
+
+
+/* Protect against a future where PIPE_MAX_ATTRIBS > 32. Wouldn't life
+ * be easier if C allowed arrays of packed elements?
+ */
+#define ATTRIB_BIT_DWORDS ((PIPE_MAX_ATTRIBS+31)/32)
+
+
+
+
+struct brw_vertex_info {
+ unsigned varying; /* varying:1[PIPE_MAX_ATTRIBS] */
+ unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_MAX_ATTRIBS] */
+};
+
+
+
+
+
+struct brw_context
+{
+ struct pipe_context pipe;
+ struct brw_winsys *winsys;
+
+ unsigned primitive;
+ unsigned reduced_primitive;
+
+ boolean emit_state_always;
+
+ struct {
+ struct brw_state_flags dirty;
+ } state;
+
+
+ struct {
+ const struct pipe_blend_state *Blend;
+ const struct pipe_depth_stencil_alpha_state *DepthStencil;
+ const struct pipe_poly_stipple *PolygonStipple;
+ const struct pipe_rasterizer_state *Raster;
+ const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS];
+ const struct brw_vertex_program *VertexProgram;
+ const struct brw_fragment_program *FragmentProgram;
+
+ struct pipe_clip_state Clip;
+ struct pipe_blend_color BlendColor;
+ struct pipe_scissor_state Scissor;
+ struct pipe_viewport_state Viewport;
+ struct pipe_framebuffer_state FrameBuffer;
+
+ const struct pipe_constant_buffer *Constants[2];
+ const struct brw_texture *Texture[PIPE_MAX_SAMPLERS];
+ } attribs;
+
+ unsigned num_samplers;
+ unsigned num_textures;
+
+ struct brw_mem_pool pool[BRW_MAX_POOL];
+ struct brw_cache cache[BRW_MAX_CACHE];
+ struct brw_cached_batch_item *cached_batch_items;
+
+ struct {
+
+ /* Arrays with buffer objects to copy non-bufferobj arrays into
+ * for upload:
+ */
+ const struct pipe_vertex_buffer *vbo_array[PIPE_MAX_ATTRIBS];
+
+ struct brw_vertex_element_state inputs[PIPE_MAX_ATTRIBS];
+
+#define BRW_NR_UPLOAD_BUFS 17
+#define BRW_UPLOAD_INIT_SIZE (128*1024)
+
+ /* Summary of size and varying of active arrays, so we can check
+ * for changes to this state:
+ */
+ struct brw_vertex_info info;
+ } vb;
+
+
+ unsigned hardware_dirty;
+ unsigned dirty;
+ unsigned pci_id;
+ /* BRW_NEW_URB_ALLOCATIONS:
+ */
+ struct {
+ unsigned vsize; /* vertex size plus header in urb registers */
+ unsigned csize; /* constant buffer size in urb registers */
+ unsigned sfsize; /* setup data size in urb registers */
+
+ boolean constrained;
+
+ unsigned nr_vs_entries;
+ unsigned nr_gs_entries;
+ unsigned nr_clip_entries;
+ unsigned nr_sf_entries;
+ unsigned nr_cs_entries;
+
+/* unsigned vs_size; */
+/* unsigned gs_size; */
+/* unsigned clip_size; */
+/* unsigned sf_size; */
+/* unsigned cs_size; */
+
+ unsigned vs_start;
+ unsigned gs_start;
+ unsigned clip_start;
+ unsigned sf_start;
+ unsigned cs_start;
+ } urb;
+
+
+ /* BRW_NEW_CURBE_OFFSETS:
+ */
+ struct {
+ unsigned wm_start;
+ unsigned wm_size;
+ unsigned clip_start;
+ unsigned clip_size;
+ unsigned vs_start;
+ unsigned vs_size;
+ unsigned total_size;
+
+ unsigned gs_offset;
+
+ float *last_buf;
+ unsigned last_bufsz;
+ } curbe;
+
+ struct {
+ struct brw_vs_prog_data *prog_data;
+
+ unsigned prog_gs_offset;
+ unsigned state_gs_offset;
+ } vs;
+
+ struct {
+ struct brw_gs_prog_data *prog_data;
+
+ boolean prog_active;
+ unsigned prog_gs_offset;
+ unsigned state_gs_offset;
+ } gs;
+
+ struct {
+ struct brw_clip_prog_data *prog_data;
+
+ unsigned prog_gs_offset;
+ unsigned vp_gs_offset;
+ unsigned state_gs_offset;
+ } clip;
+
+
+ struct {
+ struct brw_sf_prog_data *prog_data;
+
+ struct pipe_setup_linkage linkage;
+
+ unsigned prog_gs_offset;
+ unsigned vp_gs_offset;
+ unsigned state_gs_offset;
+ } sf;
+
+ struct {
+ struct brw_wm_prog_data *prog_data;
+
+// struct brw_wm_compiler *compile_data;
+
+
+ /**
+ * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER
+ * cache
+ */
+ struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];
+
+ unsigned render_surf;
+ unsigned nr_surfaces;
+
+ unsigned max_threads;
+ struct pipe_buffer *scratch_buffer;
+ unsigned scratch_buffer_size;
+
+ unsigned sampler_count;
+ unsigned sampler_gs_offset;
+
+ struct brw_surface_binding_table bind;
+ unsigned bind_ss_offset;
+
+ unsigned prog_gs_offset;
+ unsigned state_gs_offset;
+ } wm;
+
+
+ struct {
+ unsigned vp_gs_offset;
+ unsigned state_gs_offset;
+ } cc;
+
+
+ /* Used to give every program string a unique id
+ */
+ unsigned program_id;
+};
+
+
+#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a)
+
+
+/*======================================================================
+ * brw_vtbl.c
+ */
+void brw_do_flush( struct brw_context *brw,
+ unsigned flags );
+
+
+/*======================================================================
+ * brw_state.c
+ */
+void brw_validate_state(struct brw_context *brw);
+void brw_init_state(struct brw_context *brw);
+void brw_destroy_state(struct brw_context *brw);
+
+
+/*======================================================================
+ * brw_tex.c
+ */
+void brwUpdateTextureState( struct brw_context *brw );
+
+
+/* brw_urb.c
+ */
+void brw_upload_urb_fence(struct brw_context *brw);
+
+void brw_upload_constant_buffer_state(struct brw_context *brw);
+
+void brw_init_surface_functions(struct brw_context *brw);
+void brw_init_state_functions(struct brw_context *brw);
+void brw_init_flush_functions(struct brw_context *brw);
+void brw_init_string_functions(struct brw_context *brw);
+
+/*======================================================================
+ * Inline conversion functions. These are better-typed than the
+ * macros used previously:
+ */
+static inline struct brw_context *
+brw_context( struct pipe_context *ctx )
+{
+ return (struct brw_context *)ctx;
+}
+
+#endif
+
diff --git a/src/gallium/drivers/i965simple/brw_curbe.c b/src/gallium/drivers/i965simple/brw_curbe.c
new file mode 100644
index 0000000000..824ee7fd6d
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_curbe.c
@@ -0,0 +1,369 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "brw_batch.h"
+#include "brw_util.h"
+#include "brw_wm.h"
+#include "pipe/p_state.h"
+#include "pipe/p_winsys.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#define FILE_DEBUG_FLAG DEBUG_FALLBACKS
+
+/* Partition the CURBE between the various users of constant values:
+ */
+static void calculate_curbe_offsets( struct brw_context *brw )
+{
+ /* CACHE_NEW_WM_PROG */
+ unsigned nr_fp_regs = align(brw->wm.prog_data->max_const, 16);
+
+ /* BRW_NEW_VERTEX_PROGRAM */
+ unsigned nr_vp_regs = align(brw->vs.prog_data->max_const, 16);
+ unsigned nr_clip_regs = 0;
+ unsigned total_regs;
+
+#if 0
+ /* BRW_NEW_CLIP ? */
+ if (brw->attribs.Transform->ClipPlanesEnabled) {
+ unsigned nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled);
+ nr_clip_regs = align(nr_planes * 4, 16);
+ }
+#endif
+
+
+ total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
+
+ /* This can happen - what to do? Probably rather than falling
+ * back, the best thing to do is emit programs which code the
+ * constants as immediate values. Could do this either as a static
+ * cap on WM and VS, or adaptively.
+ *
+ * Unfortunately, this is currently dependent on the results of the
+ * program generation process (in the case of wm), so this would
+ * introduce the need to re-generate programs in the event of a
+ * curbe allocation failure.
+ */
+ /* Max size is 32 - just large enough to
+ * hold the 128 parameters allowed by
+ * the fragment and vertex program
+ * api's. It's not clear what happens
+ * when both VP and FP want to use 128
+ * parameters, though.
+ */
+ assert(total_regs <= 32);
+
+ /* Lazy resize:
+ */
+ if (nr_fp_regs > brw->curbe.wm_size ||
+ nr_vp_regs > brw->curbe.vs_size ||
+ nr_clip_regs != brw->curbe.clip_size ||
+ (total_regs < brw->curbe.total_size / 4 &&
+ brw->curbe.total_size > 16)) {
+
+ unsigned reg = 0;
+
+ /* Calculate a new layout:
+ */
+ reg = 0;
+ brw->curbe.wm_start = reg;
+ brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
+ brw->curbe.clip_start = reg;
+ brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
+ brw->curbe.vs_start = reg;
+ brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
+ brw->curbe.total_size = reg;
+
+#if 0
+ if (0)
+ DBG("curbe wm %d+%d clip %d+%d vs %d+%d\n",
+ brw->curbe.wm_start,
+ brw->curbe.wm_size,
+ brw->curbe.clip_start,
+ brw->curbe.clip_size,
+ brw->curbe.vs_start,
+ brw->curbe.vs_size );
+#endif
+
+ brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
+ }
+}
+
+
+const struct brw_tracked_state brw_curbe_offsets = {
+ .dirty = {
+ .brw = (BRW_NEW_CLIP |
+ BRW_NEW_VS),
+ .cache = CACHE_NEW_WM_PROG
+ },
+ .update = calculate_curbe_offsets
+};
+
+
+
+/* Define the number of curbes within CS's urb allocation. Multiple
+ * urb entries -> multiple curbes. These will be used by
+ * fixed-function hardware in a double-buffering scheme to avoid a
+ * pipeline stall each time the contents of the curbe is changed.
+ */
+void brw_upload_constant_buffer_state(struct brw_context *brw)
+{
+ struct brw_constant_buffer_state cbs;
+ memset(&cbs, 0, sizeof(cbs));
+
+ /* It appears that this is the state packet for the CS unit, ie. the
+ * urb entries detailed here are housed in the CS range from the
+ * URB_FENCE command.
+ */
+ cbs.header.opcode = CMD_CONST_BUFFER_STATE;
+ cbs.header.length = sizeof(cbs)/4 - 2;
+
+ /* BRW_NEW_URB_FENCE */
+ cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
+ cbs.bits0.urb_entry_size = brw->urb.csize - 1;
+
+ assert(brw->urb.nr_cs_entries);
+ BRW_CACHED_BATCH_STRUCT(brw, &cbs);
+}
+
+
+static float fixed_plane[6][4] = {
+ { 0, 0, -1, 1 },
+ { 0, 0, 1, 1 },
+ { 0, -1, 0, 1 },
+ { 0, 1, 0, 1 },
+ {-1, 0, 0, 1 },
+ { 1, 0, 0, 1 }
+};
+
+/* Upload a new set of constants. Too much variability to go into the
+ * cache mechanism, but maybe would benefit from a comparison against
+ * the current uploaded set of constants.
+ */
+static void upload_constant_buffer(struct brw_context *brw)
+{
+ struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL];
+ unsigned sz = brw->curbe.total_size;
+ unsigned bufsz = sz * sizeof(float);
+ float *buf;
+ unsigned i;
+
+
+ if (sz == 0) {
+ struct brw_constant_buffer cb;
+ cb.header.opcode = CMD_CONST_BUFFER;
+ cb.header.length = sizeof(cb)/4 - 2;
+ cb.header.valid = 0;
+ cb.bits0.buffer_length = 0;
+ cb.bits0.buffer_address = 0;
+ BRW_BATCH_STRUCT(brw, &cb);
+
+ if (brw->curbe.last_buf) {
+ free(brw->curbe.last_buf);
+ brw->curbe.last_buf = NULL;
+ brw->curbe.last_bufsz = 0;
+ }
+
+ return;
+ }
+
+ buf = (float *)malloc(bufsz);
+
+ memset(buf, 0, bufsz);
+
+ if (brw->curbe.wm_size) {
+ unsigned offset = brw->curbe.wm_start * 16;
+
+ /* First the constant buffer constants:
+ */
+
+ /* Then any internally generated constants:
+ */
+ for (i = 0; i < brw->wm.prog_data->nr_internal_consts; i++)
+ buf[offset + i] = brw->wm.prog_data->internal_const[i];
+
+ assert(brw->wm.prog_data->max_const ==
+ brw->wm.prog_data->nr_internal_consts);
+ }
+
+
+ /* The clipplanes are actually delivered to both CLIP and VS units.
+ * VS uses them to calculate the outcode bitmasks.
+ */
+ if (brw->curbe.clip_size) {
+ unsigned offset = brw->curbe.clip_start * 16;
+ unsigned j;
+
+ /* If any planes are going this way, send them all this way:
+ */
+ for (i = 0; i < 6; i++) {
+ buf[offset + i * 4 + 0] = fixed_plane[i][0];
+ buf[offset + i * 4 + 1] = fixed_plane[i][1];
+ buf[offset + i * 4 + 2] = fixed_plane[i][2];
+ buf[offset + i * 4 + 3] = fixed_plane[i][3];
+ }
+
+ /* Clip planes: BRW_NEW_CLIP:
+ */
+ for (j = 0; j < brw->attribs.Clip.nr; j++) {
+ buf[offset + i * 4 + 0] = brw->attribs.Clip.ucp[j][0];
+ buf[offset + i * 4 + 1] = brw->attribs.Clip.ucp[j][1];
+ buf[offset + i * 4 + 2] = brw->attribs.Clip.ucp[j][2];
+ buf[offset + i * 4 + 3] = brw->attribs.Clip.ucp[j][3];
+ i++;
+ }
+ }
+
+
+ if (brw->curbe.vs_size) {
+ unsigned offset = brw->curbe.vs_start * 16;
+ /*unsigned nr = vp->max_const;*/
+ const struct pipe_constant_buffer *cbuffer = brw->attribs.Constants[0];
+ struct pipe_winsys *ws = brw->pipe.winsys;
+ /* FIXME: buffer size is num_consts + num_immediates */
+ if (brw->vs.prog_data->num_consts) {
+ /* map the vertex constant buffer and copy to curbe: */
+ void *data = ws->buffer_map(ws, cbuffer->buffer, 0);
+ /* FIXME: this is wrong. the cbuffer->size currently
+ * represents size of consts + immediates. so if we'll
+ * have both we'll copy over the end of the buffer
+ * with the subsequent memcpy */
+ memcpy(&buf[offset], data, cbuffer->size);
+ ws->buffer_unmap(ws, cbuffer->buffer);
+ offset += cbuffer->size;
+ }
+ /*immediates*/
+ if (brw->vs.prog_data->num_imm) {
+ memcpy(&buf[offset], brw->vs.prog_data->imm_buf,
+ brw->vs.prog_data->num_imm * 4 * sizeof(float));
+ }
+ }
+
+ if (1) {
+ for (i = 0; i < sz; i+=4)
+ debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
+ buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
+
+ debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
+ brw->curbe.last_buf, buf,
+ bufsz, brw->curbe.last_bufsz,
+ brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
+ }
+
+ if (brw->curbe.last_buf &&
+ bufsz == brw->curbe.last_bufsz &&
+ memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
+ free(buf);
+/* return; */
+ }
+ else {
+ if (brw->curbe.last_buf)
+ free(brw->curbe.last_buf);
+ brw->curbe.last_buf = buf;
+ brw->curbe.last_bufsz = bufsz;
+
+
+ if (!brw_pool_alloc(pool,
+ bufsz,
+ 1 << 6,
+ &brw->curbe.gs_offset)) {
+ debug_printf("out of GS memory for curbe\n");
+ assert(0);
+ return;
+ }
+
+
+ /* Copy data to the buffer:
+ */
+ brw->winsys->buffer_subdata_typed(brw->winsys,
+ pool->buffer,
+ brw->curbe.gs_offset,
+ bufsz,
+ buf,
+ BRW_CONSTANT_BUFFER );
+ }
+
+ /* TODO: only emit the constant_buffer packet when necessary, ie:
+ - contents have changed
+ - offset has changed
+ - hw requirements due to other packets emitted.
+ */
+ {
+ struct brw_constant_buffer cb;
+
+ memset(&cb, 0, sizeof(cb));
+
+ cb.header.opcode = CMD_CONST_BUFFER;
+ cb.header.length = sizeof(cb)/4 - 2;
+ cb.header.valid = 1;
+ cb.bits0.buffer_length = sz - 1;
+ cb.bits0.buffer_address = brw->curbe.gs_offset >> 6;
+
+ /* Because this provokes an action (ie copy the constants into the
+ * URB), it shouldn't be shortcircuited if identical to the
+ * previous time - because eg. the urb destination may have
+ * changed, or the urb contents different to last time.
+ *
+ * Note that the data referred to is actually copied internally,
+ * not just used in place according to passed pointer.
+ *
+ * It appears that the CS unit takes care of using each available
+ * URB entry (Const URB Entry == CURBE) in turn, and issuing
+ * flushes as necessary when doublebuffering of CURBEs isn't
+ * possible.
+ */
+ BRW_BATCH_STRUCT(brw, &cb);
+ }
+}
+
+/* This tracked state is unique in that the state it monitors varies
+ * dynamically depending on the parameters tracked by the fragment and
+ * vertex programs. This is the template used as a starting point,
+ * each context will maintain a copy of this internally and update as
+ * required.
+ */
+const struct brw_tracked_state brw_constant_buffer = {
+ .dirty = {
+ .brw = (BRW_NEW_CLIP |
+ BRW_NEW_CONSTANTS |
+ BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
+ BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
+ BRW_NEW_CURBE_OFFSETS),
+ .cache = (CACHE_NEW_WM_PROG)
+ },
+ .update = upload_constant_buffer
+};
+
diff --git a/src/gallium/drivers/i965simple/brw_defines.h b/src/gallium/drivers/i965simple/brw_defines.h
new file mode 100644
index 0000000000..9379a397f6
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_defines.h
@@ -0,0 +1,852 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_DEFINES_H
+#define BRW_DEFINES_H
+
+/*
+ */
+#define MI_NOOP 0x00
+#define MI_USER_INTERRUPT 0x02
+#define MI_WAIT_FOR_EVENT 0x03
+#define MI_FLUSH 0x04
+#define MI_REPORT_HEAD 0x07
+#define MI_ARB_ON_OFF 0x08
+#define MI_BATCH_BUFFER_END 0x0A
+#define MI_OVERLAY_FLIP 0x11
+#define MI_LOAD_SCAN_LINES_INCL 0x12
+#define MI_LOAD_SCAN_LINES_EXCL 0x13
+#define MI_DISPLAY_BUFFER_INFO 0x14
+#define MI_SET_CONTEXT 0x18
+#define MI_STORE_DATA_IMM 0x20
+#define MI_STORE_DATA_INDEX 0x21
+#define MI_LOAD_REGISTER_IMM 0x22
+#define MI_STORE_REGISTER_MEM 0x24
+#define MI_BATCH_BUFFER_START 0x31
+
+#define MI_SYNCHRONOUS_FLIP 0x0
+#define MI_ASYNCHRONOUS_FLIP 0x1
+
+#define MI_BUFFER_SECURE 0x0
+#define MI_BUFFER_NONSECURE 0x1
+
+#define MI_ARBITRATE_AT_CHAIN_POINTS 0x0
+#define MI_ARBITRATE_BETWEEN_INSTS 0x1
+#define MI_NO_ARBITRATION 0x3
+
+#define MI_CONDITION_CODE_WAIT_DISABLED 0x0
+#define MI_CONDITION_CODE_WAIT_0 0x1
+#define MI_CONDITION_CODE_WAIT_1 0x2
+#define MI_CONDITION_CODE_WAIT_2 0x3
+#define MI_CONDITION_CODE_WAIT_3 0x4
+#define MI_CONDITION_CODE_WAIT_4 0x5
+
+#define MI_DISPLAY_PIPE_A 0x0
+#define MI_DISPLAY_PIPE_B 0x1
+
+#define MI_DISPLAY_PLANE_A 0x0
+#define MI_DISPLAY_PLANE_B 0x1
+#define MI_DISPLAY_PLANE_C 0x2
+
+#define MI_STANDARD_FLIP 0x0
+#define MI_ENQUEUE_FLIP_PERFORM_BASE_FRAME_NUMBER_LOAD 0x1
+#define MI_ENQUEUE_FLIP_TARGET_FRAME_NUMBER_RELATIVE 0x2
+#define MI_ENQUEUE_FLIP_ABSOLUTE_TARGET_FRAME_NUMBER 0x3
+
+#define MI_PHYSICAL_ADDRESS 0x0
+#define MI_VIRTUAL_ADDRESS 0x1
+
+#define MI_BUFFER_MEMORY_MAIN 0x0
+#define MI_BUFFER_MEMORY_GTT 0x2
+#define MI_BUFFER_MEMORY_PER_PROCESS_GTT 0x3
+
+#define MI_FLIP_CONTINUE 0x0
+#define MI_FLIP_ON 0x1
+#define MI_FLIP_OFF 0x2
+
+#define MI_UNTRUSTED_REGISTER_SPACE 0x0
+#define MI_TRUSTED_REGISTER_SPACE 0x1
+
+/* 3D state:
+ */
+#define _3DOP_3DSTATE_PIPELINED 0x0
+#define _3DOP_3DSTATE_NONPIPELINED 0x1
+#define _3DOP_3DCONTROL 0x2
+#define _3DOP_3DPRIMITIVE 0x3
+
+#define _3DSTATE_PIPELINED_POINTERS 0x00
+#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
+#define _3DSTATE_VERTEX_BUFFERS 0x08
+#define _3DSTATE_VERTEX_ELEMENTS 0x09
+#define _3DSTATE_INDEX_BUFFER 0x0A
+#define _3DSTATE_VF_STATISTICS 0x0B
+#define _3DSTATE_DRAWING_RECTANGLE 0x00
+#define _3DSTATE_CONSTANT_COLOR 0x01
+#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
+#define _3DSTATE_CHROMA_KEY 0x04
+#define _3DSTATE_DEPTH_BUFFER 0x05
+#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
+#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
+#define _3DSTATE_LINE_STIPPLE 0x08
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
+#define _3DCONTROL 0x00
+#define _3DPRIMITIVE 0x00
+
+#define PIPE_CONTROL_NOWRITE 0x00
+#define PIPE_CONTROL_WRITEIMMEDIATE 0x01
+#define PIPE_CONTROL_WRITEDEPTH 0x02
+#define PIPE_CONTROL_WRITETIMESTAMP 0x03
+
+#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
+#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01
+
+#define _3DPRIM_POINTLIST 0x01
+#define _3DPRIM_LINELIST 0x02
+#define _3DPRIM_LINESTRIP 0x03
+#define _3DPRIM_TRILIST 0x04
+#define _3DPRIM_TRISTRIP 0x05
+#define _3DPRIM_TRIFAN 0x06
+#define _3DPRIM_QUADLIST 0x07
+#define _3DPRIM_QUADSTRIP 0x08
+#define _3DPRIM_LINELIST_ADJ 0x09
+#define _3DPRIM_LINESTRIP_ADJ 0x0A
+#define _3DPRIM_TRILIST_ADJ 0x0B
+#define _3DPRIM_TRISTRIP_ADJ 0x0C
+#define _3DPRIM_TRISTRIP_REVERSE 0x0D
+#define _3DPRIM_POLYGON 0x0E
+#define _3DPRIM_RECTLIST 0x0F
+#define _3DPRIM_LINELOOP 0x10
+#define _3DPRIM_POINTLIST_BF 0x11
+#define _3DPRIM_LINESTRIP_CONT 0x12
+#define _3DPRIM_LINESTRIP_BF 0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
+
+#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
+#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
+
+#define BRW_ANISORATIO_2 0
+#define BRW_ANISORATIO_4 1
+#define BRW_ANISORATIO_6 2
+#define BRW_ANISORATIO_8 3
+#define BRW_ANISORATIO_10 4
+#define BRW_ANISORATIO_12 5
+#define BRW_ANISORATIO_14 6
+#define BRW_ANISORATIO_16 7
+
+#define BRW_BLENDFACTOR_ONE 0x1
+#define BRW_BLENDFACTOR_SRC_COLOR 0x2
+#define BRW_BLENDFACTOR_SRC_ALPHA 0x3
+#define BRW_BLENDFACTOR_DST_ALPHA 0x4
+#define BRW_BLENDFACTOR_DST_COLOR 0x5
+#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
+#define BRW_BLENDFACTOR_CONST_COLOR 0x7
+#define BRW_BLENDFACTOR_CONST_ALPHA 0x8
+#define BRW_BLENDFACTOR_SRC1_COLOR 0x9
+#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A
+#define BRW_BLENDFACTOR_ZERO 0x11
+#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12
+#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13
+#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14
+#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15
+#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17
+#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18
+#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19
+#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
+
+#define BRW_BLENDFUNCTION_ADD 0
+#define BRW_BLENDFUNCTION_SUBTRACT 1
+#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2
+#define BRW_BLENDFUNCTION_MIN 3
+#define BRW_BLENDFUNCTION_MAX 4
+
+#define BRW_ALPHATEST_FORMAT_UNORM8 0
+#define BRW_ALPHATEST_FORMAT_FLOAT32 1
+
+#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0
+#define BRW_CHROMAKEY_REPLACE_BLACK 1
+
+#define BRW_CLIP_API_OGL 0
+#define BRW_CLIP_API_DX 1
+
+#define BRW_CLIPMODE_NORMAL 0
+#define BRW_CLIPMODE_CLIP_ALL 1
+#define BRW_CLIPMODE_CLIP_NON_REJECTED 2
+#define BRW_CLIPMODE_REJECT_ALL 3
+#define BRW_CLIPMODE_ACCEPT_ALL 4
+
+#define BRW_CLIP_NDCSPACE 0
+#define BRW_CLIP_SCREENSPACE 1
+
+#define BRW_COMPAREFUNCTION_ALWAYS 0
+#define BRW_COMPAREFUNCTION_NEVER 1
+#define BRW_COMPAREFUNCTION_LESS 2
+#define BRW_COMPAREFUNCTION_EQUAL 3
+#define BRW_COMPAREFUNCTION_LEQUAL 4
+#define BRW_COMPAREFUNCTION_GREATER 5
+#define BRW_COMPAREFUNCTION_NOTEQUAL 6
+#define BRW_COMPAREFUNCTION_GEQUAL 7
+
+#define BRW_COVERAGE_PIXELS_HALF 0
+#define BRW_COVERAGE_PIXELS_1 1
+#define BRW_COVERAGE_PIXELS_2 2
+#define BRW_COVERAGE_PIXELS_4 3
+
+#define BRW_CULLMODE_BOTH 0
+#define BRW_CULLMODE_NONE 1
+#define BRW_CULLMODE_FRONT 2
+#define BRW_CULLMODE_BACK 3
+
+#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0
+#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
+
+#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
+#define BRW_DEPTHFORMAT_D32_FLOAT 1
+#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2
+#define BRW_DEPTHFORMAT_D16_UNORM 5
+
+#define BRW_FLOATING_POINT_IEEE_754 0
+#define BRW_FLOATING_POINT_NON_IEEE_754 1
+
+#define BRW_FRONTWINDING_CW 0
+#define BRW_FRONTWINDING_CCW 1
+
+#define BRW_SPRITE_POINT_ENABLE 16
+
+#define BRW_INDEX_BYTE 0
+#define BRW_INDEX_WORD 1
+#define BRW_INDEX_DWORD 2
+
+#define BRW_LOGICOPFUNCTION_CLEAR 0
+#define BRW_LOGICOPFUNCTION_NOR 1
+#define BRW_LOGICOPFUNCTION_AND_INVERTED 2
+#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3
+#define BRW_LOGICOPFUNCTION_AND_REVERSE 4
+#define BRW_LOGICOPFUNCTION_INVERT 5
+#define BRW_LOGICOPFUNCTION_XOR 6
+#define BRW_LOGICOPFUNCTION_NAND 7
+#define BRW_LOGICOPFUNCTION_AND 8
+#define BRW_LOGICOPFUNCTION_EQUIV 9
+#define BRW_LOGICOPFUNCTION_NOOP 10
+#define BRW_LOGICOPFUNCTION_OR_INVERTED 11
+#define BRW_LOGICOPFUNCTION_COPY 12
+#define BRW_LOGICOPFUNCTION_OR_REVERSE 13
+#define BRW_LOGICOPFUNCTION_OR 14
+#define BRW_LOGICOPFUNCTION_SET 15
+
+#define BRW_MAPFILTER_NEAREST 0x0
+#define BRW_MAPFILTER_LINEAR 0x1
+#define BRW_MAPFILTER_ANISOTROPIC 0x2
+
+#define BRW_MIPFILTER_NONE 0
+#define BRW_MIPFILTER_NEAREST 1
+#define BRW_MIPFILTER_LINEAR 3
+
+#define BRW_POLYGON_FRONT_FACING 0
+#define BRW_POLYGON_BACK_FACING 1
+
+#define BRW_PREFILTER_ALWAYS 0x0
+#define BRW_PREFILTER_NEVER 0x1
+#define BRW_PREFILTER_LESS 0x2
+#define BRW_PREFILTER_EQUAL 0x3
+#define BRW_PREFILTER_LEQUAL 0x4
+#define BRW_PREFILTER_GREATER 0x5
+#define BRW_PREFILTER_NOTEQUAL 0x6
+#define BRW_PREFILTER_GEQUAL 0x7
+
+#define BRW_PROVOKING_VERTEX_0 0
+#define BRW_PROVOKING_VERTEX_1 1
+#define BRW_PROVOKING_VERTEX_2 2
+
+#define BRW_RASTRULE_UPPER_LEFT 0
+#define BRW_RASTRULE_UPPER_RIGHT 1
+
+#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0
+#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1
+#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2
+
+#define BRW_STENCILOP_KEEP 0
+#define BRW_STENCILOP_ZERO 1
+#define BRW_STENCILOP_REPLACE 2
+#define BRW_STENCILOP_INCRSAT 3
+#define BRW_STENCILOP_DECRSAT 4
+#define BRW_STENCILOP_INCR 5
+#define BRW_STENCILOP_DECR 6
+#define BRW_STENCILOP_INVERT 7
+
+#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0
+#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1
+
+#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
+#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001
+#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002
+#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
+#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
+#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005
+#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
+#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
+#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
+#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040
+#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041
+#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042
+#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043
+#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044
+#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045
+#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046
+#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
+#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
+#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082
+#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083
+#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
+#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085
+#define BRW_SURFACEFORMAT_R32G32_SINT 0x086
+#define BRW_SURFACEFORMAT_R32G32_UINT 0x087
+#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
+#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
+#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A
+#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B
+#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C
+#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D
+#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
+#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
+#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090
+#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091
+#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092
+#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
+#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
+#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095
+#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
+#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
+#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
+#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
+#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
+#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
+#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC
+#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD
+#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE
+#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF
+#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
+#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
+#define BRW_SURFACEFORMAT_R32_SINT 0x0D6
+#define BRW_SURFACEFORMAT_R32_UINT 0x0D7
+#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8
+#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
+#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
+#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF
+#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0
+#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1
+#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2
+#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3
+#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4
+#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
+#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
+#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
+#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0
+#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1
+#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2
+#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
+#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
+#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
+#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6
+#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7
+#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8
+#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
+#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106
+#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107
+#define BRW_SURFACEFORMAT_R8G8_SINT 0x108
+#define BRW_SURFACEFORMAT_R8G8_UINT 0x109
+#define BRW_SURFACEFORMAT_R16_UNORM 0x10A
+#define BRW_SURFACEFORMAT_R16_SNORM 0x10B
+#define BRW_SURFACEFORMAT_R16_SINT 0x10C
+#define BRW_SURFACEFORMAT_R16_UINT 0x10D
+#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E
+#define BRW_SURFACEFORMAT_I16_UNORM 0x111
+#define BRW_SURFACEFORMAT_L16_UNORM 0x112
+#define BRW_SURFACEFORMAT_A16_UNORM 0x113
+#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114
+#define BRW_SURFACEFORMAT_I16_FLOAT 0x115
+#define BRW_SURFACEFORMAT_L16_FLOAT 0x116
+#define BRW_SURFACEFORMAT_A16_FLOAT 0x117
+#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
+#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C
+#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D
+#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E
+#define BRW_SURFACEFORMAT_R16_USCALED 0x11F
+#define BRW_SURFACEFORMAT_R8_UNORM 0x140
+#define BRW_SURFACEFORMAT_R8_SNORM 0x141
+#define BRW_SURFACEFORMAT_R8_SINT 0x142
+#define BRW_SURFACEFORMAT_R8_UINT 0x143
+#define BRW_SURFACEFORMAT_A8_UNORM 0x144
+#define BRW_SURFACEFORMAT_I8_UNORM 0x145
+#define BRW_SURFACEFORMAT_L8_UNORM 0x146
+#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147
+#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148
+#define BRW_SURFACEFORMAT_R8_SSCALED 0x149
+#define BRW_SURFACEFORMAT_R8_USCALED 0x14A
+#define BRW_SURFACEFORMAT_R1_UINT 0x181
+#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
+#define BRW_SURFACEFORMAT_BC1_UNORM 0x186
+#define BRW_SURFACEFORMAT_BC2_UNORM 0x187
+#define BRW_SURFACEFORMAT_BC3_UNORM 0x188
+#define BRW_SURFACEFORMAT_BC4_UNORM 0x189
+#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A
+#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
+#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
+#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
+#define BRW_SURFACEFORMAT_MONO8 0x18E
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
+#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190
+#define BRW_SURFACEFORMAT_DXT1_RGB 0x191
+#define BRW_SURFACEFORMAT_FXT1 0x192
+#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193
+#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194
+#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195
+#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196
+#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
+#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198
+#define BRW_SURFACEFORMAT_BC4_SNORM 0x199
+#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A
+#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C
+#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D
+#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
+#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+
+#define BRW_SURFACERETURNFORMAT_FLOAT32 0
+#define BRW_SURFACERETURNFORMAT_S1 1
+
+#define BRW_SURFACE_1D 0
+#define BRW_SURFACE_2D 1
+#define BRW_SURFACE_3D 2
+#define BRW_SURFACE_CUBE 3
+#define BRW_SURFACE_BUFFER 4
+#define BRW_SURFACE_NULL 7
+
+#define BRW_TEXCOORDMODE_WRAP 0
+#define BRW_TEXCOORDMODE_MIRROR 1
+#define BRW_TEXCOORDMODE_CLAMP 2
+#define BRW_TEXCOORDMODE_CUBE 3
+#define BRW_TEXCOORDMODE_CLAMP_BORDER 4
+#define BRW_TEXCOORDMODE_MIRROR_ONCE 5
+
+#define BRW_THREAD_PRIORITY_NORMAL 0
+#define BRW_THREAD_PRIORITY_HIGH 1
+
+#define BRW_TILEWALK_XMAJOR 0
+#define BRW_TILEWALK_YMAJOR 1
+
+#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0
+#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1
+
+#define BRW_VERTEXBUFFER_ACCESS_VERTEXDATA 0
+#define BRW_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
+
+#define BRW_VFCOMPONENT_NOSTORE 0
+#define BRW_VFCOMPONENT_STORE_SRC 1
+#define BRW_VFCOMPONENT_STORE_0 2
+#define BRW_VFCOMPONENT_STORE_1_FLT 3
+#define BRW_VFCOMPONENT_STORE_1_INT 4
+#define BRW_VFCOMPONENT_STORE_VID 5
+#define BRW_VFCOMPONENT_STORE_IID 6
+#define BRW_VFCOMPONENT_STORE_PID 7
+
+
+
+/* Execution Unit (EU) defines
+ */
+
+#define BRW_ALIGN_1 0
+#define BRW_ALIGN_16 1
+
+#define BRW_ADDRESS_DIRECT 0
+#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1
+
+#define BRW_CHANNEL_X 0
+#define BRW_CHANNEL_Y 1
+#define BRW_CHANNEL_Z 2
+#define BRW_CHANNEL_W 3
+
+#define BRW_COMPRESSION_NONE 0
+#define BRW_COMPRESSION_2NDHALF 1
+#define BRW_COMPRESSION_COMPRESSED 2
+
+#define BRW_CONDITIONAL_NONE 0
+#define BRW_CONDITIONAL_Z 1
+#define BRW_CONDITIONAL_NZ 2
+#define BRW_CONDITIONAL_EQ 1 /* Z */
+#define BRW_CONDITIONAL_NEQ 2 /* NZ */
+#define BRW_CONDITIONAL_G 3
+#define BRW_CONDITIONAL_GE 4
+#define BRW_CONDITIONAL_L 5
+#define BRW_CONDITIONAL_LE 6
+#define BRW_CONDITIONAL_C 7
+#define BRW_CONDITIONAL_O 8
+
+#define BRW_DEBUG_NONE 0
+#define BRW_DEBUG_BREAKPOINT 1
+
+#define BRW_DEPENDENCY_NORMAL 0
+#define BRW_DEPENDENCY_NOTCLEARED 1
+#define BRW_DEPENDENCY_NOTCHECKED 2
+#define BRW_DEPENDENCY_DISABLE 3
+
+#define BRW_EXECUTE_1 0
+#define BRW_EXECUTE_2 1
+#define BRW_EXECUTE_4 2
+#define BRW_EXECUTE_8 3
+#define BRW_EXECUTE_16 4
+#define BRW_EXECUTE_32 5
+
+#define BRW_HORIZONTAL_STRIDE_0 0
+#define BRW_HORIZONTAL_STRIDE_1 1
+#define BRW_HORIZONTAL_STRIDE_2 2
+#define BRW_HORIZONTAL_STRIDE_4 3
+
+#define BRW_INSTRUCTION_NORMAL 0
+#define BRW_INSTRUCTION_SATURATE 1
+
+#define BRW_MASK_ENABLE 0
+#define BRW_MASK_DISABLE 1
+
+#define BRW_OPCODE_MOV 1
+#define BRW_OPCODE_SEL 2
+#define BRW_OPCODE_NOT 4
+#define BRW_OPCODE_AND 5
+#define BRW_OPCODE_OR 6
+#define BRW_OPCODE_XOR 7
+#define BRW_OPCODE_SHR 8
+#define BRW_OPCODE_SHL 9
+#define BRW_OPCODE_RSR 10
+#define BRW_OPCODE_RSL 11
+#define BRW_OPCODE_ASR 12
+#define BRW_OPCODE_CMP 16
+#define BRW_OPCODE_JMPI 32
+#define BRW_OPCODE_IF 34
+#define BRW_OPCODE_IFF 35
+#define BRW_OPCODE_ELSE 36
+#define BRW_OPCODE_ENDIF 37
+#define BRW_OPCODE_DO 38
+#define BRW_OPCODE_WHILE 39
+#define BRW_OPCODE_BREAK 40
+#define BRW_OPCODE_CONTINUE 41
+#define BRW_OPCODE_HALT 42
+#define BRW_OPCODE_MSAVE 44
+#define BRW_OPCODE_MRESTORE 45
+#define BRW_OPCODE_PUSH 46
+#define BRW_OPCODE_POP 47
+#define BRW_OPCODE_WAIT 48
+#define BRW_OPCODE_SEND 49
+#define BRW_OPCODE_ADD 64
+#define BRW_OPCODE_MUL 65
+#define BRW_OPCODE_AVG 66
+#define BRW_OPCODE_FRC 67
+#define BRW_OPCODE_RNDU 68
+#define BRW_OPCODE_RNDD 69
+#define BRW_OPCODE_RNDE 70
+#define BRW_OPCODE_RNDZ 71
+#define BRW_OPCODE_MAC 72
+#define BRW_OPCODE_MACH 73
+#define BRW_OPCODE_LZD 74
+#define BRW_OPCODE_SAD2 80
+#define BRW_OPCODE_SADA2 81
+#define BRW_OPCODE_DP4 84
+#define BRW_OPCODE_DPH 85
+#define BRW_OPCODE_DP3 86
+#define BRW_OPCODE_DP2 87
+#define BRW_OPCODE_DPA2 88
+#define BRW_OPCODE_LINE 89
+#define BRW_OPCODE_NOP 126
+
+#define BRW_PREDICATE_NONE 0
+#define BRW_PREDICATE_NORMAL 1
+#define BRW_PREDICATE_ALIGN1_ANYV 2
+#define BRW_PREDICATE_ALIGN1_ALLV 3
+#define BRW_PREDICATE_ALIGN1_ANY2H 4
+#define BRW_PREDICATE_ALIGN1_ALL2H 5
+#define BRW_PREDICATE_ALIGN1_ANY4H 6
+#define BRW_PREDICATE_ALIGN1_ALL4H 7
+#define BRW_PREDICATE_ALIGN1_ANY8H 8
+#define BRW_PREDICATE_ALIGN1_ALL8H 9
+#define BRW_PREDICATE_ALIGN1_ANY16H 10
+#define BRW_PREDICATE_ALIGN1_ALL16H 11
+#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4
+#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5
+#define BRW_PREDICATE_ALIGN16_ANY4H 6
+#define BRW_PREDICATE_ALIGN16_ALL4H 7
+
+#define BRW_ARCHITECTURE_REGISTER_FILE 0
+#define BRW_GENERAL_REGISTER_FILE 1
+#define BRW_MESSAGE_REGISTER_FILE 2
+#define BRW_IMMEDIATE_VALUE 3
+
+#define BRW_REGISTER_TYPE_UD 0
+#define BRW_REGISTER_TYPE_D 1
+#define BRW_REGISTER_TYPE_UW 2
+#define BRW_REGISTER_TYPE_W 3
+#define BRW_REGISTER_TYPE_UB 4
+#define BRW_REGISTER_TYPE_B 5
+#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
+#define BRW_REGISTER_TYPE_HF 6
+#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
+#define BRW_REGISTER_TYPE_F 7
+
+#define BRW_ARF_NULL 0x00
+#define BRW_ARF_ADDRESS 0x10
+#define BRW_ARF_ACCUMULATOR 0x20
+#define BRW_ARF_FLAG 0x30
+#define BRW_ARF_MASK 0x40
+#define BRW_ARF_MASK_STACK 0x50
+#define BRW_ARF_MASK_STACK_DEPTH 0x60
+#define BRW_ARF_STATE 0x70
+#define BRW_ARF_CONTROL 0x80
+#define BRW_ARF_NOTIFICATION_COUNT 0x90
+#define BRW_ARF_IP 0xA0
+
+#define BRW_AMASK 0
+#define BRW_IMASK 1
+#define BRW_LMASK 2
+#define BRW_CMASK 3
+
+
+
+#define BRW_THREAD_NORMAL 0
+#define BRW_THREAD_ATOMIC 1
+#define BRW_THREAD_SWITCH 2
+
+#define BRW_VERTICAL_STRIDE_0 0
+#define BRW_VERTICAL_STRIDE_1 1
+#define BRW_VERTICAL_STRIDE_2 2
+#define BRW_VERTICAL_STRIDE_4 3
+#define BRW_VERTICAL_STRIDE_8 4
+#define BRW_VERTICAL_STRIDE_16 5
+#define BRW_VERTICAL_STRIDE_32 6
+#define BRW_VERTICAL_STRIDE_64 7
+#define BRW_VERTICAL_STRIDE_128 8
+#define BRW_VERTICAL_STRIDE_256 9
+#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
+
+#define BRW_WIDTH_1 0
+#define BRW_WIDTH_2 1
+#define BRW_WIDTH_4 2
+#define BRW_WIDTH_8 3
+#define BRW_WIDTH_16 4
+
+#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0
+#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1
+#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2
+#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3
+#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4
+#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5
+#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6
+#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7
+#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8
+#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9
+#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10
+#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11
+
+#define BRW_POLYGON_FACING_FRONT 0
+#define BRW_POLYGON_FACING_BACK 1
+
+#define BRW_MESSAGE_TARGET_NULL 0
+#define BRW_MESSAGE_TARGET_MATH 1
+#define BRW_MESSAGE_TARGET_SAMPLER 2
+#define BRW_MESSAGE_TARGET_GATEWAY 3
+#define BRW_MESSAGE_TARGET_DATAPORT_READ 4
+#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5
+#define BRW_MESSAGE_TARGET_URB 6
+#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7
+
+#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
+#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
+#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3
+
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
+
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
+#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
+#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
+#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
+
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
+
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+
+#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
+#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
+#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
+
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
+
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
+#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+
+#define BRW_MATH_FUNCTION_INV 1
+#define BRW_MATH_FUNCTION_LOG 2
+#define BRW_MATH_FUNCTION_EXP 3
+#define BRW_MATH_FUNCTION_SQRT 4
+#define BRW_MATH_FUNCTION_RSQ 5
+#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */
+#define BRW_MATH_FUNCTION_COS 7 /* was 8 */
+#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */
+#define BRW_MATH_FUNCTION_TAN 9
+#define BRW_MATH_FUNCTION_POW 10
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
+#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13
+
+#define BRW_MATH_INTEGER_UNSIGNED 0
+#define BRW_MATH_INTEGER_SIGNED 1
+
+#define BRW_MATH_PRECISION_FULL 0
+#define BRW_MATH_PRECISION_PARTIAL 1
+
+#define BRW_MATH_SATURATE_NONE 0
+#define BRW_MATH_SATURATE_SATURATE 1
+
+#define BRW_MATH_DATA_VECTOR 0
+#define BRW_MATH_DATA_SCALAR 1
+
+#define BRW_URB_OPCODE_WRITE 0
+
+#define BRW_URB_SWIZZLE_NONE 0
+#define BRW_URB_SWIZZLE_INTERLEAVE 1
+#define BRW_URB_SWIZZLE_TRANSPOSE 2
+
+#define BRW_SCRATCH_SPACE_SIZE_1K 0
+#define BRW_SCRATCH_SPACE_SIZE_2K 1
+#define BRW_SCRATCH_SPACE_SIZE_4K 2
+#define BRW_SCRATCH_SPACE_SIZE_8K 3
+#define BRW_SCRATCH_SPACE_SIZE_16K 4
+#define BRW_SCRATCH_SPACE_SIZE_32K 5
+#define BRW_SCRATCH_SPACE_SIZE_64K 6
+#define BRW_SCRATCH_SPACE_SIZE_128K 7
+#define BRW_SCRATCH_SPACE_SIZE_256K 8
+#define BRW_SCRATCH_SPACE_SIZE_512K 9
+#define BRW_SCRATCH_SPACE_SIZE_1M 10
+#define BRW_SCRATCH_SPACE_SIZE_2M 11
+
+
+
+
+#define CMD_URB_FENCE 0x6000
+#define CMD_CONST_BUFFER_STATE 0x6001
+#define CMD_CONST_BUFFER 0x6002
+
+#define CMD_STATE_BASE_ADDRESS 0x6101
+#define CMD_STATE_INSN_POINTER 0x6102
+#define CMD_PIPELINE_SELECT 0x6104
+
+#define CMD_PIPELINED_STATE_POINTERS 0x7800
+#define CMD_BINDING_TABLE_PTRS 0x7801
+#define CMD_VERTEX_BUFFER 0x7808
+#define CMD_VERTEX_ELEMENT 0x7809
+#define CMD_INDEX_BUFFER 0x780a
+#define CMD_VF_STATISTICS 0x780b
+
+#define CMD_DRAW_RECT 0x7900
+#define CMD_BLEND_CONSTANT_COLOR 0x7901
+#define CMD_CHROMA_KEY 0x7904
+#define CMD_DEPTH_BUFFER 0x7905
+#define CMD_POLY_STIPPLE_OFFSET 0x7906
+#define CMD_POLY_STIPPLE_PATTERN 0x7907
+#define CMD_LINE_STIPPLE_PATTERN 0x7908
+#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
+
+#define CMD_PIPE_CONTROL 0x7a00
+
+#define CMD_3D_PRIM 0x7b00
+
+#define CMD_MI_FLUSH 0x0200
+
+
+/* Various values from the R0 vertex header:
+ */
+#define R02_PRIM_END 0x1
+#define R02_PRIM_START 0x2
+
+
+
+#endif
diff --git a/src/gallium/drivers/i965simple/brw_draw.c b/src/gallium/drivers/i965simple/brw_draw.c
new file mode 100644
index 0000000000..7598e3dc8a
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_draw.c
@@ -0,0 +1,239 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdlib.h>
+
+#include "brw_batch.h"
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_winsys.h"
+
+static unsigned hw_prim[PIPE_PRIM_POLYGON+1] = {
+ _3DPRIM_POINTLIST,
+ _3DPRIM_LINELIST,
+ _3DPRIM_LINELOOP,
+ _3DPRIM_LINESTRIP,
+ _3DPRIM_TRILIST,
+ _3DPRIM_TRISTRIP,
+ _3DPRIM_TRIFAN,
+ _3DPRIM_QUADLIST,
+ _3DPRIM_QUADSTRIP,
+ _3DPRIM_POLYGON
+};
+
+
+static const int reduced_prim[PIPE_PRIM_POLYGON+1] = {
+ PIPE_PRIM_POINTS,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES
+};
+
+
+/* When the primitive changes, set a state bit and re-validate. Not
+ * the nicest and would rather deal with this by having all the
+ * programs be immune to the active primitive (ie. cope with all
+ * possibilities). That may not be realistic however.
+ */
+static void brw_set_prim(struct brw_context *brw, int prim)
+{
+ PRINT("PRIM: %d\n", prim);
+
+ /* Slight optimization to avoid the GS program when not needed:
+ */
+ if (prim == PIPE_PRIM_QUAD_STRIP &&
+ brw->attribs.Raster->flatshade &&
+ brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_FILL &&
+ brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_FILL)
+ prim = PIPE_PRIM_TRIANGLE_STRIP;
+
+ if (prim != brw->primitive) {
+ brw->primitive = prim;
+ brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
+
+ if (reduced_prim[prim] != brw->reduced_primitive) {
+ brw->reduced_primitive = reduced_prim[prim];
+ brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
+ }
+
+ brw_validate_state(brw);
+ }
+
+}
+
+
+static unsigned trim(int prim, unsigned length)
+{
+ if (prim == PIPE_PRIM_QUAD_STRIP)
+ return length > 3 ? (length - length % 2) : 0;
+ else if (prim == PIPE_PRIM_QUADS)
+ return length - length % 4;
+ else
+ return length;
+}
+
+
+
+static boolean brw_emit_prim( struct brw_context *brw,
+ boolean indexed,
+ unsigned start,
+ unsigned count )
+
+{
+ struct brw_3d_primitive prim_packet;
+
+ if (BRW_DEBUG & DEBUG_PRIMS)
+ PRINT("PRIM: %d %d %d\n", brw->primitive, start, count);
+
+ prim_packet.header.opcode = CMD_3D_PRIM;
+ prim_packet.header.length = sizeof(prim_packet)/4 - 2;
+ prim_packet.header.pad = 0;
+ prim_packet.header.topology = hw_prim[brw->primitive];
+ prim_packet.header.indexed = indexed;
+
+ prim_packet.verts_per_instance = trim(brw->primitive, count);
+ prim_packet.start_vert_location = start;
+ prim_packet.instance_count = 1;
+ prim_packet.start_instance_location = 0;
+ prim_packet.base_vert_location = 0;
+
+ if (prim_packet.verts_per_instance == 0)
+ return TRUE;
+
+ return brw_batchbuffer_data( brw->winsys,
+ &prim_packet,
+ sizeof(prim_packet) );
+}
+
+
+/* May fail if out of video memory for texture or vbo upload, or on
+ * fallback conditions.
+ */
+static boolean brw_try_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *index_buffer,
+ unsigned index_size,
+ unsigned mode,
+ unsigned start,
+ unsigned count )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ /* Set the first primitive ahead of validate_state:
+ */
+ brw_set_prim(brw, mode);
+
+ /* Upload index, vertex data:
+ */
+ if (index_buffer &&
+ !brw_upload_indices( brw, index_buffer, index_size, start, count ))
+ return FALSE;
+
+ if (!brw_upload_vertex_buffers(brw))
+ return FALSE;
+
+ if (!brw_upload_vertex_elements( brw ))
+ return FALSE;
+
+ /* XXX: Need to separate validate and upload of state.
+ */
+ if (brw->state.dirty.brw)
+ brw_validate_state( brw );
+
+ if (!brw_emit_prim(brw, index_buffer != NULL,
+ start, count))
+ return FALSE;
+
+ return TRUE;
+}
+
+
+
+static boolean brw_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode,
+ unsigned start,
+ unsigned count )
+{
+ if (!brw_try_draw_elements( pipe,
+ indexBuffer,
+ indexSize,
+ mode, start, count ))
+ {
+ /* flush ? */
+
+ if (!brw_try_draw_elements( pipe,
+ indexBuffer,
+ indexSize,
+ mode, start,
+ count )) {
+ assert(0);
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+
+
+static boolean brw_draw_arrays( struct pipe_context *pipe,
+ unsigned mode,
+ unsigned start,
+ unsigned count )
+{
+ if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) {
+ /* flush ? */
+
+ if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) {
+ assert(0);
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+
+
+void brw_init_draw_functions( struct brw_context *brw )
+{
+ brw->pipe.draw_arrays = brw_draw_arrays;
+ brw->pipe.draw_elements = brw_draw_elements;
+}
+
+
diff --git a/src/gallium/drivers/i965simple/brw_draw.h b/src/gallium/drivers/i965simple/brw_draw.h
new file mode 100644
index 0000000000..62fe0d5d0e
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_draw.h
@@ -0,0 +1,55 @@
+ /**************************************************************************
+ *
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef BRW_DRAW_H
+#define BRW_DRAW_H
+
+#include "pipe/p_context.h"
+
+struct brw_context;
+
+
+
+void brw_init_draw_functions( struct brw_context *brw );
+
+
+boolean brw_upload_vertices( struct brw_context *brw,
+ unsigned min_index,
+ unsigned max_index );
+
+boolean brw_upload_indices(struct brw_context *brw,
+ const struct pipe_buffer *index_buffer,
+ int ib_size, int start, int count);
+
+boolean brw_upload_vertex_buffers( struct brw_context *brw );
+boolean brw_upload_vertex_elements( struct brw_context *brw );
+
+unsigned brw_translate_surface_format( unsigned id );
+
+
+
+#endif
diff --git a/src/gallium/drivers/i965simple/brw_draw_upload.c b/src/gallium/drivers/i965simple/brw_draw_upload.c
new file mode 100644
index 0000000000..7c20ea52af
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_draw_upload.c
@@ -0,0 +1,300 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdlib.h>
+
+#include "brw_batch.h"
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+
+
+struct brw_array_state {
+ union header_union header;
+
+ struct {
+ union {
+ struct {
+ unsigned pitch:11;
+ unsigned pad:15;
+ unsigned access_type:1;
+ unsigned vb_index:5;
+ } bits;
+ unsigned dword;
+ } vb0;
+
+ struct pipe_buffer *buffer;
+ unsigned offset;
+
+ unsigned max_index;
+ unsigned instance_data_step_rate;
+
+ } vb[BRW_VBP_MAX];
+};
+
+
+
+unsigned brw_translate_surface_format( unsigned id )
+{
+ switch (id) {
+ case PIPE_FORMAT_R64_FLOAT:
+ return BRW_SURFACEFORMAT_R64_FLOAT;
+ case PIPE_FORMAT_R64G64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64_FLOAT;
+ case PIPE_FORMAT_R64G64B64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64B64_FLOAT;
+ case PIPE_FORMAT_R64G64B64A64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT;
+
+ case PIPE_FORMAT_R32_FLOAT:
+ return BRW_SURFACEFORMAT_R32_FLOAT;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32_FLOAT;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32B32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+ case PIPE_FORMAT_R32_UNORM:
+ return BRW_SURFACEFORMAT_R32_UNORM;
+ case PIPE_FORMAT_R32G32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32_UNORM;
+ case PIPE_FORMAT_R32G32B32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32B32_UNORM;
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32B32A32_UNORM;
+
+ case PIPE_FORMAT_R32_USCALED:
+ return BRW_SURFACEFORMAT_R32_USCALED;
+ case PIPE_FORMAT_R32G32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32_USCALED;
+ case PIPE_FORMAT_R32G32B32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32B32_USCALED;
+ case PIPE_FORMAT_R32G32B32A32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32B32A32_USCALED;
+
+ case PIPE_FORMAT_R32_SNORM:
+ return BRW_SURFACEFORMAT_R32_SNORM;
+ case PIPE_FORMAT_R32G32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32_SNORM;
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32B32_SNORM;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32B32A32_SNORM;
+
+ case PIPE_FORMAT_R32_SSCALED:
+ return BRW_SURFACEFORMAT_R32_SSCALED;
+ case PIPE_FORMAT_R32G32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32_SSCALED;
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32B32_SSCALED;
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return BRW_SURFACEFORMAT_R16_UNORM;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16_UNORM;
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16B16_UNORM;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16B16A16_UNORM;
+
+ case PIPE_FORMAT_R16_USCALED:
+ return BRW_SURFACEFORMAT_R16_USCALED;
+ case PIPE_FORMAT_R16G16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16_USCALED;
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16B16_USCALED;
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16B16A16_USCALED;
+
+ case PIPE_FORMAT_R16_SNORM:
+ return BRW_SURFACEFORMAT_R16_SNORM;
+ case PIPE_FORMAT_R16G16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16_SNORM;
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16B16_SNORM;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16B16A16_SNORM;
+
+ case PIPE_FORMAT_R16_SSCALED:
+ return BRW_SURFACEFORMAT_R16_SSCALED;
+ case PIPE_FORMAT_R16G16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16_SSCALED;
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16B16_SSCALED;
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED;
+
+ case PIPE_FORMAT_R8_UNORM:
+ return BRW_SURFACEFORMAT_R8_UNORM;
+ case PIPE_FORMAT_R8G8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8_UNORM;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8_UNORM;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_R8_USCALED:
+ return BRW_SURFACEFORMAT_R8_USCALED;
+ case PIPE_FORMAT_R8G8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8_USCALED;
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8B8_USCALED;
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8B8A8_USCALED;
+
+ case PIPE_FORMAT_R8_SNORM:
+ return BRW_SURFACEFORMAT_R8_SNORM;
+ case PIPE_FORMAT_R8G8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8_SNORM;
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8B8_SNORM;
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+ case PIPE_FORMAT_R8_SSCALED:
+ return BRW_SURFACEFORMAT_R8_SSCALED;
+ case PIPE_FORMAT_R8G8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8_SSCALED;
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8B8_SSCALED;
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED;
+
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static unsigned get_index_type(int type)
+{
+ switch (type) {
+ case 1: return BRW_INDEX_BYTE;
+ case 2: return BRW_INDEX_WORD;
+ case 4: return BRW_INDEX_DWORD;
+ default: assert(0); return 0;
+ }
+}
+
+
+boolean brw_upload_vertex_buffers( struct brw_context *brw )
+{
+ struct brw_array_state vbp;
+ unsigned nr_enabled = 0;
+ unsigned i;
+
+ memset(&vbp, 0, sizeof(vbp));
+
+ /* This is a hardware limit:
+ */
+
+ for (i = 0; i < BRW_VEP_MAX; i++)
+ {
+ if (brw->vb.vbo_array[i] == NULL) {
+ nr_enabled = i;
+ break;
+ }
+
+ vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->pitch;
+ vbp.vb[i].vb0.bits.pad = 0;
+ vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA;
+ vbp.vb[i].vb0.bits.vb_index = i;
+ vbp.vb[i].offset = brw->vb.vbo_array[i]->buffer_offset;
+ vbp.vb[i].buffer = brw->vb.vbo_array[i]->buffer;
+ vbp.vb[i].max_index = brw->vb.vbo_array[i]->max_index;
+ }
+
+
+ vbp.header.bits.length = (1 + nr_enabled * 4) - 2;
+ vbp.header.bits.opcode = CMD_VERTEX_BUFFER;
+
+ BEGIN_BATCH(vbp.header.bits.length+2, 0);
+ OUT_BATCH( vbp.header.dword );
+
+ for (i = 0; i < nr_enabled; i++) {
+ OUT_BATCH( vbp.vb[i].vb0.dword );
+ OUT_RELOC( vbp.vb[i].buffer, PIPE_BUFFER_USAGE_GPU_READ,
+ vbp.vb[i].offset);
+ OUT_BATCH( vbp.vb[i].max_index );
+ OUT_BATCH( vbp.vb[i].instance_data_step_rate );
+ }
+ ADVANCE_BATCH();
+ return TRUE;
+}
+
+
+
+boolean brw_upload_vertex_elements( struct brw_context *brw )
+{
+ struct brw_vertex_element_packet vep;
+
+ unsigned i;
+ unsigned nr_enabled = brw->attribs.VertexProgram->info.num_inputs;
+
+ memset(&vep, 0, sizeof(vep));
+
+ for (i = 0; i < nr_enabled; i++)
+ vep.ve[i] = brw->vb.inputs[i];
+
+
+ vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2;
+ vep.header.opcode = CMD_VERTEX_ELEMENT;
+ brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0]));
+
+ return TRUE;
+}
+
+boolean brw_upload_indices( struct brw_context *brw,
+ const struct pipe_buffer *index_buffer,
+ int ib_size, int start, int count)
+{
+ /* Emit the indexbuffer packet:
+ */
+ {
+ struct brw_indexbuffer ib;
+
+ memset(&ib, 0, sizeof(ib));
+
+ ib.header.bits.opcode = CMD_INDEX_BUFFER;
+ ib.header.bits.length = sizeof(ib)/4 - 2;
+ ib.header.bits.index_format = get_index_type(ib_size);
+ ib.header.bits.cut_index_enable = 0;
+
+
+ BEGIN_BATCH(4, 0);
+ OUT_BATCH( ib.header.dword );
+ OUT_RELOC( index_buffer, PIPE_BUFFER_USAGE_GPU_READ, start);
+ OUT_RELOC( index_buffer, PIPE_BUFFER_USAGE_GPU_READ, start + count);
+ OUT_BATCH( 0 );
+ ADVANCE_BATCH();
+ }
+ return TRUE;
+}
diff --git a/src/gallium/drivers/i965simple/brw_eu.c b/src/gallium/drivers/i965simple/brw_eu.c
new file mode 100644
index 0000000000..e2002d1821
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_eu.c
@@ -0,0 +1,130 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+
+/* How does predicate control work when execution_size != 8? Do I
+ * need to test/set for 0xffff when execution_size is 16?
+ */
+void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value )
+{
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ if (value != 0xff) {
+ if (value != p->flag_value) {
+ brw_push_insn_state(p);
+ brw_MOV(p, brw_flag_reg(), brw_imm_uw(value));
+ p->flag_value = value;
+ brw_pop_insn_state(p);
+ }
+
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ }
+}
+
+void brw_set_predicate_control( struct brw_compile *p, unsigned pc )
+{
+ p->current->header.predicate_control = pc;
+}
+
+void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional )
+{
+ p->current->header.destreg__conditonalmod = conditional;
+}
+
+void brw_set_access_mode( struct brw_compile *p, unsigned access_mode )
+{
+ p->current->header.access_mode = access_mode;
+}
+
+void brw_set_compression_control( struct brw_compile *p, boolean compression_control )
+{
+ p->current->header.compression_control = compression_control;
+}
+
+void brw_set_mask_control( struct brw_compile *p, unsigned value )
+{
+ p->current->header.mask_control = value;
+}
+
+void brw_set_saturate( struct brw_compile *p, unsigned value )
+{
+ p->current->header.saturate = value;
+}
+
+void brw_push_insn_state( struct brw_compile *p )
+{
+ assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
+ memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
+ p->current++;
+}
+
+void brw_pop_insn_state( struct brw_compile *p )
+{
+ assert(p->current != p->stack);
+ p->current--;
+}
+
+
+/***********************************************************************
+ */
+void brw_init_compile( struct brw_compile *p )
+{
+ p->nr_insn = 0;
+ p->current = p->stack;
+ memset(p->current, 0, sizeof(p->current[0]));
+
+ /* Some defaults?
+ */
+ brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
+ brw_set_saturate(p, 0);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+
+const unsigned *brw_get_program( struct brw_compile *p,
+ unsigned *sz )
+{
+ unsigned i;
+
+ for (i = 0; i < 8; i++)
+ brw_NOP(p);
+
+ *sz = p->nr_insn * sizeof(struct brw_instruction);
+ return (const unsigned *)p->store;
+}
+
diff --git a/src/gallium/drivers/i965simple/brw_eu.h b/src/gallium/drivers/i965simple/brw_eu.h
new file mode 100644
index 0000000000..23151ae9ed
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_eu.h
@@ -0,0 +1,888 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_EU_H
+#define BRW_EU_H
+
+#include "brw_structs.h"
+#include "brw_defines.h"
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_shader_tokens.h"
+
+#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
+#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
+
+#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
+
+
+#define REG_SIZE (8*4)
+
+
+/* These aren't hardware structs, just something useful for us to pass around:
+ *
+ * Align1 operation has a lot of control over input ranges. Used in
+ * WM programs to implement shaders decomposed into "channel serial"
+ * or "structure of array" form:
+ */
+struct brw_reg
+{
+ unsigned type:4;
+ unsigned file:2;
+ unsigned nr:8;
+ unsigned subnr:5; /* :1 in align16 */
+ unsigned negate:1; /* source only */
+ unsigned abs:1; /* source only */
+ unsigned vstride:4; /* source only */
+ unsigned width:3; /* src only, align1 only */
+ unsigned hstride:2; /* src only, align1 only */
+ unsigned address_mode:1; /* relative addressing, hopefully! */
+ unsigned pad0:1;
+
+ union {
+ struct {
+ unsigned swizzle:8; /* src only, align16 only */
+ unsigned writemask:4; /* dest only, align16 only */
+ int indirect_offset:10; /* relative addressing offset */
+ unsigned pad1:10; /* two dwords total */
+ } bits;
+
+ float f;
+ int d;
+ unsigned ud;
+ } dw1;
+};
+
+
+struct brw_indirect {
+ unsigned addr_subnr:4;
+ int addr_offset:10;
+ unsigned pad:18;
+};
+
+
+#define BRW_EU_MAX_INSN_STACK 5
+#define BRW_EU_MAX_INSN 1200
+
+struct brw_compile {
+ struct brw_instruction store[BRW_EU_MAX_INSN];
+ unsigned nr_insn;
+
+ /* Allow clients to push/pop instruction state:
+ */
+ struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+ struct brw_instruction *current;
+
+ unsigned flag_value;
+ boolean single_program_flow;
+};
+
+
+
+static __inline int type_sz( unsigned type )
+{
+ switch( type ) {
+ case BRW_REGISTER_TYPE_UD:
+ case BRW_REGISTER_TYPE_D:
+ case BRW_REGISTER_TYPE_F:
+ return 4;
+ case BRW_REGISTER_TYPE_HF:
+ case BRW_REGISTER_TYPE_UW:
+ case BRW_REGISTER_TYPE_W:
+ return 2;
+ case BRW_REGISTER_TYPE_UB:
+ case BRW_REGISTER_TYPE_B:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static __inline struct brw_reg brw_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr,
+ unsigned type,
+ unsigned vstride,
+ unsigned width,
+ unsigned hstride,
+ unsigned swizzle,
+ unsigned writemask)
+{
+
+ struct brw_reg reg;
+ reg.type = type;
+ reg.file = file;
+ reg.nr = nr;
+ reg.subnr = subnr * type_sz(type);
+ reg.negate = 0;
+ reg.abs = 0;
+ reg.vstride = vstride;
+ reg.width = width;
+ reg.hstride = hstride;
+ reg.address_mode = BRW_ADDRESS_DIRECT;
+ reg.pad0 = 0;
+
+ /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
+ * set swizzle and writemask to W, as the lower bits of subnr will
+ * be lost when converted to align16. This is probably too much to
+ * keep track of as you'd want it adjusted by suboffset(), etc.
+ * Perhaps fix up when converting to align16?
+ */
+ reg.dw1.bits.swizzle = swizzle;
+ reg.dw1.bits.writemask = writemask;
+ reg.dw1.bits.indirect_offset = 0;
+ reg.dw1.bits.pad1 = 0;
+ return reg;
+}
+
+static __inline struct brw_reg brw_vec16_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_16,
+ BRW_WIDTH_16,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ TGSI_WRITEMASK_XYZW);
+}
+
+static __inline struct brw_reg brw_vec8_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ TGSI_WRITEMASK_XYZW);
+}
+
+
+static __inline struct brw_reg brw_vec4_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ TGSI_WRITEMASK_XYZW);
+}
+
+
+static __inline struct brw_reg brw_vec2_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYXY,
+ TGSI_WRITEMASK_XY);
+}
+
+static __inline struct brw_reg brw_vec1_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XXXX,
+ TGSI_WRITEMASK_X);
+}
+
+
+static __inline struct brw_reg retype( struct brw_reg reg,
+ unsigned type )
+{
+ reg.type = type;
+ return reg;
+}
+
+static __inline struct brw_reg suboffset( struct brw_reg reg,
+ unsigned delta )
+{
+ reg.subnr += delta * type_sz(reg.type);
+ return reg;
+}
+
+
+static __inline struct brw_reg offset( struct brw_reg reg,
+ unsigned delta )
+{
+ reg.nr += delta;
+ return reg;
+}
+
+
+static __inline struct brw_reg byte_offset( struct brw_reg reg,
+ unsigned bytes )
+{
+ unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
+ reg.nr = newoffset / REG_SIZE;
+ reg.subnr = newoffset % REG_SIZE;
+ return reg;
+}
+
+
+static __inline struct brw_reg brw_uw16_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr )
+{
+ return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static __inline struct brw_reg brw_uw8_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr )
+{
+ return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static __inline struct brw_reg brw_uw1_reg( unsigned file,
+ unsigned nr,
+ unsigned subnr )
+{
+ return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static __inline struct brw_reg brw_imm_reg( unsigned type )
+{
+ return brw_reg( BRW_IMMEDIATE_VALUE,
+ 0,
+ 0,
+ type,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ 0,
+ 0);
+}
+
+static __inline struct brw_reg brw_imm_f( float f )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
+ imm.dw1.f = f;
+ return imm;
+}
+
+static __inline struct brw_reg brw_imm_d( int d )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
+ imm.dw1.d = d;
+ return imm;
+}
+
+static __inline struct brw_reg brw_imm_ud( unsigned ud )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
+ imm.dw1.ud = ud;
+ return imm;
+}
+
+static __inline struct brw_reg brw_imm_uw( ushort uw )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
+ imm.dw1.ud = uw;
+ return imm;
+}
+
+static __inline struct brw_reg brw_imm_w( short w )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
+ imm.dw1.d = w;
+ return imm;
+}
+
+/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
+ * numbers alias with _V and _VF below:
+ */
+
+/* Vector of eight signed half-byte values:
+ */
+static __inline struct brw_reg brw_imm_v( unsigned v )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_8;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = v;
+ return imm;
+}
+
+/* Vector of four 8-bit float values:
+ */
+static __inline struct brw_reg brw_imm_vf( unsigned v )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_4;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = v;
+ return imm;
+}
+
+#define VF_ZERO 0x0
+#define VF_ONE 0x30
+#define VF_NEG (1<<7)
+
+static __inline struct brw_reg brw_imm_vf4( unsigned v0,
+ unsigned v1,
+ unsigned v2,
+ unsigned v3)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_4;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = ((v0 << 0) |
+ (v1 << 8) |
+ (v2 << 16) |
+ (v3 << 24));
+ return imm;
+}
+
+
+static __inline struct brw_reg brw_address( struct brw_reg reg )
+{
+ return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
+}
+
+
+static __inline struct brw_reg brw_vec1_grf( unsigned nr,
+ unsigned subnr )
+{
+ return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static __inline struct brw_reg brw_vec8_grf( unsigned nr,
+ unsigned subnr )
+{
+ return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static __inline struct brw_reg brw_vec4_grf( unsigned nr,
+ unsigned subnr )
+{
+ return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+static __inline struct brw_reg brw_vec2_grf( unsigned nr,
+ unsigned subnr )
+{
+ return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static __inline struct brw_reg brw_uw8_grf( unsigned nr,
+ unsigned subnr )
+{
+ return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static __inline struct brw_reg brw_null_reg( void )
+{
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_NULL,
+ 0);
+}
+
+static __inline struct brw_reg brw_address_reg( unsigned subnr )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ADDRESS,
+ subnr);
+}
+
+/* If/else instructions break in align16 mode if writemask & swizzle
+ * aren't xyzw. This goes against the convention for other scalar
+ * regs:
+ */
+static __inline struct brw_reg brw_ip_reg( void )
+{
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_IP,
+ 0,
+ BRW_REGISTER_TYPE_UD,
+ BRW_VERTICAL_STRIDE_4, /* ? */
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, /* NOTE! */
+ TGSI_WRITEMASK_XYZW); /* NOTE! */
+}
+
+static __inline struct brw_reg brw_acc_reg( void )
+{
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ACCUMULATOR,
+ 0);
+}
+
+
+static __inline struct brw_reg brw_flag_reg( void )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_FLAG,
+ 0);
+}
+
+
+static __inline struct brw_reg brw_mask_reg( unsigned subnr )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_MASK,
+ subnr);
+}
+
+static __inline struct brw_reg brw_message_reg( unsigned nr )
+{
+ return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
+ nr,
+ 0);
+}
+
+
+
+
+/* This is almost always called with a numeric constant argument, so
+ * make things easy to evaluate at compile time:
+ */
+static __inline unsigned cvt( unsigned val )
+{
+ switch (val) {
+ case 0: return 0;
+ case 1: return 1;
+ case 2: return 2;
+ case 4: return 3;
+ case 8: return 4;
+ case 16: return 5;
+ case 32: return 6;
+ }
+ return 0;
+}
+
+static __inline struct brw_reg stride( struct brw_reg reg,
+ unsigned vstride,
+ unsigned width,
+ unsigned hstride )
+{
+
+ reg.vstride = cvt(vstride);
+ reg.width = cvt(width) - 1;
+ reg.hstride = cvt(hstride);
+ return reg;
+}
+
+static __inline struct brw_reg vec16( struct brw_reg reg )
+{
+ return stride(reg, 16,16,1);
+}
+
+static __inline struct brw_reg vec8( struct brw_reg reg )
+{
+ return stride(reg, 8,8,1);
+}
+
+static __inline struct brw_reg vec4( struct brw_reg reg )
+{
+ return stride(reg, 4,4,1);
+}
+
+static __inline struct brw_reg vec2( struct brw_reg reg )
+{
+ return stride(reg, 2,2,1);
+}
+
+static __inline struct brw_reg vec1( struct brw_reg reg )
+{
+ return stride(reg, 0,1,0);
+}
+
+static __inline struct brw_reg get_element( struct brw_reg reg, unsigned elt )
+{
+ return vec1(suboffset(reg, elt));
+}
+
+static __inline struct brw_reg get_element_ud( struct brw_reg reg, unsigned elt )
+{
+ return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
+}
+
+
+static __inline struct brw_reg brw_swizzle( struct brw_reg reg,
+ unsigned x,
+ unsigned y,
+ unsigned z,
+ unsigned w)
+{
+ reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+ return reg;
+}
+
+
+static __inline struct brw_reg brw_swizzle1( struct brw_reg reg,
+ unsigned x )
+{
+ return brw_swizzle(reg, x, x, x, x);
+}
+
+static __inline struct brw_reg brw_writemask( struct brw_reg reg,
+ unsigned mask )
+{
+ reg.dw1.bits.writemask &= mask;
+ return reg;
+}
+
+static __inline struct brw_reg brw_set_writemask( struct brw_reg reg,
+ unsigned mask )
+{
+ reg.dw1.bits.writemask = mask;
+ return reg;
+}
+
+static __inline struct brw_reg negate( struct brw_reg reg )
+{
+ reg.negate ^= 1;
+ return reg;
+}
+
+static __inline struct brw_reg brw_abs( struct brw_reg reg )
+{
+ reg.abs = 1;
+ return reg;
+}
+
+/***********************************************************************
+ */
+static __inline struct brw_reg brw_vec4_indirect( unsigned subnr,
+ int offset )
+{
+ struct brw_reg reg = brw_vec4_grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+}
+
+static __inline struct brw_reg brw_vec1_indirect( unsigned subnr,
+ int offset )
+{
+ struct brw_reg reg = brw_vec1_grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+}
+
+static __inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset)
+{
+ return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static __inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset)
+{
+ return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static __inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset)
+{
+ return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
+}
+
+static __inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
+}
+
+static __inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
+}
+
+static __inline struct brw_reg get_addr_reg(struct brw_indirect ptr)
+{
+ return brw_address_reg(ptr.addr_subnr);
+}
+
+static __inline struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, int offset )
+{
+ ptr.addr_offset += offset;
+ return ptr;
+}
+
+static __inline struct brw_indirect brw_indirect( unsigned addr_subnr, int offset )
+{
+ struct brw_indirect ptr;
+ ptr.addr_subnr = addr_subnr;
+ ptr.addr_offset = offset;
+ ptr.pad = 0;
+ return ptr;
+}
+
+static __inline struct brw_instruction *current_insn( struct brw_compile *p)
+{
+ return &p->store[p->nr_insn];
+}
+
+void brw_pop_insn_state( struct brw_compile *p );
+void brw_push_insn_state( struct brw_compile *p );
+void brw_set_mask_control( struct brw_compile *p, unsigned value );
+void brw_set_saturate( struct brw_compile *p, unsigned value );
+void brw_set_access_mode( struct brw_compile *p, unsigned access_mode );
+void brw_set_compression_control( struct brw_compile *p, boolean control );
+void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value );
+void brw_set_predicate_control( struct brw_compile *p, unsigned pc );
+void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional );
+
+void brw_init_compile( struct brw_compile *p );
+const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz );
+
+
+struct brw_instruction *brw_alu1( struct brw_compile *p,
+ unsigned opcode,
+ struct brw_reg dest,
+ struct brw_reg src );
+
+struct brw_instruction *brw_alu2(struct brw_compile *p,
+ unsigned opcode,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1 );
+
+/* Helpers for regular instructions:
+ */
+#define ALU1(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0);
+
+#define ALU2(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1);
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(JMPI)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+
+#undef ALU1
+#undef ALU2
+
+
+
+/* Helpers for SEND instruction:
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ boolean allocate,
+ boolean used,
+ unsigned msg_length,
+ unsigned response_length,
+ boolean eot,
+ boolean writes_complete,
+ unsigned offset,
+ unsigned swizzle);
+
+void brw_fb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ unsigned binding_table_index,
+ unsigned msg_length,
+ unsigned response_length,
+ boolean eot);
+
+void brw_SAMPLE(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ unsigned binding_table_index,
+ unsigned sampler,
+ unsigned writemask,
+ unsigned msg_type,
+ unsigned response_length,
+ unsigned msg_length,
+ boolean eot);
+
+void brw_math_16( struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned function,
+ unsigned saturate,
+ unsigned msg_reg_nr,
+ struct brw_reg src,
+ unsigned precision );
+
+void brw_math( struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned function,
+ unsigned saturate,
+ unsigned msg_reg_nr,
+ struct brw_reg src,
+ unsigned data_type,
+ unsigned precision );
+
+void brw_dp_READ_16( struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ unsigned scratch_offset );
+
+void brw_dp_WRITE_16( struct brw_compile *p,
+ struct brw_reg src,
+ unsigned msg_reg_nr,
+ unsigned scratch_offset );
+
+/* If/else/endif. Works by manipulating the execution flags on each
+ * channel.
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p,
+ unsigned execute_size);
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p,
+ struct brw_instruction *if_insn);
+
+void brw_ENDIF(struct brw_compile *p,
+ struct brw_instruction *if_or_else_insn);
+
+
+/* DO/WHILE loops:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p,
+ unsigned execute_size);
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p,
+ struct brw_instruction *patch_insn);
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p);
+struct brw_instruction *brw_CONT(struct brw_compile *p);
+/* Forward jumps:
+ */
+void brw_land_fwd_jump(struct brw_compile *p,
+ struct brw_instruction *jmp_insn);
+
+
+
+void brw_NOP(struct brw_compile *p);
+
+/* Special case: there is never a destination, execution size will be
+ * taken from src0:
+ */
+void brw_CMP(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned conditional,
+ struct brw_reg src0,
+ struct brw_reg src1);
+
+void brw_print_reg( struct brw_reg reg );
+
+
+/***********************************************************************
+ * brw_eu_util.c:
+ */
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+ struct brw_indirect dst_ptr,
+ struct brw_indirect src_ptr,
+ unsigned count);
+
+void brw_copy_from_indirect(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_indirect ptr,
+ unsigned count);
+
+void brw_copy4(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ unsigned count);
+
+void brw_copy8(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ unsigned count);
+
+void brw_math_invert( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src);
+
+void brw_set_src1( struct brw_instruction *insn,
+ struct brw_reg reg );
+#endif
diff --git a/src/gallium/drivers/i965simple/brw_eu_debug.c b/src/gallium/drivers/i965simple/brw_eu_debug.c
new file mode 100644
index 0000000000..4a94ddefa6
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_eu_debug.c
@@ -0,0 +1,90 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_debug.h"
+
+#include "brw_eu.h"
+
+void brw_print_reg( struct brw_reg hwreg )
+{
+ static const char *file[] = {
+ "arf",
+ "grf",
+ "msg",
+ "imm"
+ };
+
+ static const char *type[] = {
+ "ud",
+ "d",
+ "uw",
+ "w",
+ "ub",
+ "vf",
+ "hf",
+ "f"
+ };
+
+ debug_printf("%s%s",
+ hwreg.abs ? "abs/" : "",
+ hwreg.negate ? "-" : "");
+
+ if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+ hwreg.nr % 2 == 0 &&
+ hwreg.subnr == 0 &&
+ hwreg.vstride == BRW_VERTICAL_STRIDE_8 &&
+ hwreg.width == BRW_WIDTH_8 &&
+ hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ hwreg.type == BRW_REGISTER_TYPE_F) {
+ debug_printf("vec%d", hwreg.nr);
+ }
+ else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+ hwreg.vstride == BRW_VERTICAL_STRIDE_0 &&
+ hwreg.width == BRW_WIDTH_1 &&
+ hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
+ hwreg.type == BRW_REGISTER_TYPE_F) {
+ debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
+ }
+ else {
+ debug_printf("%s%d.%d<%d;%d,%d>:%s",
+ file[hwreg.file],
+ hwreg.nr,
+ hwreg.subnr / type_sz(hwreg.type),
+ hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0,
+ 1<<hwreg.width,
+ hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0,
+ type[hwreg.type]);
+ }
+}
+
+
+
diff --git a/src/gallium/drivers/i965simple/brw_eu_emit.c b/src/gallium/drivers/i965simple/brw_eu_emit.c
new file mode 100644
index 0000000000..400a80b6fb
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_eu_emit.c
@@ -0,0 +1,1080 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+
+
+/***********************************************************************
+ * Internal helper for constructing instructions
+ */
+
+static void guess_execution_size( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ if (reg.width == BRW_WIDTH_8 &&
+ insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
+ insn->header.execution_size = BRW_EXECUTE_16;
+ else
+ insn->header.execution_size = reg.width; /* note - definitions are compatible */
+}
+
+
+static void brw_set_dest( struct brw_instruction *insn,
+ struct brw_reg dest )
+{
+ insn->bits1.da1.dest_reg_file = dest.file;
+ insn->bits1.da1.dest_reg_type = dest.type;
+ insn->bits1.da1.dest_address_mode = dest.address_mode;
+
+ if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+ insn->bits1.da1.dest_reg_nr = dest.nr;
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits1.da1.dest_subreg_nr = dest.subnr;
+ insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
+ }
+ else {
+ insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
+ insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+ }
+ }
+ else {
+ insn->bits1.ia1.dest_subreg_nr = dest.subnr;
+
+ /* These are different sizes in align1 vs align16:
+ */
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
+ }
+ else {
+ insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ }
+ }
+
+ /* NEW: Set the execution size based on dest.width and
+ * insn->compression_control:
+ */
+ guess_execution_size(insn, dest);
+}
+
+static void brw_set_src0( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+ insn->bits1.da1.src0_reg_file = reg.file;
+ insn->bits1.da1.src0_reg_type = reg.type;
+ insn->bits2.da1.src0_abs = reg.abs;
+ insn->bits2.da1.src0_negate = reg.negate;
+ insn->bits2.da1.src0_address_mode = reg.address_mode;
+
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ insn->bits3.ud = reg.dw1.ud;
+
+ /* Required to set some fields in src1 as well:
+ */
+ insn->bits1.da1.src1_reg_file = 0; /* arf */
+ insn->bits1.da1.src1_reg_type = reg.type;
+ }
+ else
+ {
+ if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits2.da1.src0_subreg_nr = reg.subnr;
+ insn->bits2.da1.src0_reg_nr = reg.nr;
+ }
+ else {
+ insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
+ insn->bits2.da16.src0_reg_nr = reg.nr;
+ }
+ }
+ else {
+ insn->bits2.ia1.src0_subreg_nr = reg.subnr;
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
+ }
+ else {
+ insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
+ }
+ }
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ if (reg.width == BRW_WIDTH_1 &&
+ insn->header.execution_size == BRW_EXECUTE_1) {
+ insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+ insn->bits2.da1.src0_width = BRW_WIDTH_1;
+ insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
+ }
+ else {
+ insn->bits2.da1.src0_horiz_stride = reg.hstride;
+ insn->bits2.da1.src0_width = reg.width;
+ insn->bits2.da1.src0_vert_stride = reg.vstride;
+ }
+ }
+ else {
+ insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+ insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+ insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+ insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+ insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
+ else
+ insn->bits2.da16.src0_vert_stride = reg.vstride;
+ }
+ }
+}
+
+
+void brw_set_src1( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+ insn->bits1.da1.src1_reg_file = reg.file;
+ insn->bits1.da1.src1_reg_type = reg.type;
+ insn->bits3.da1.src1_abs = reg.abs;
+ insn->bits3.da1.src1_negate = reg.negate;
+
+ /* Only src1 can be immediate in two-argument instructions.
+ */
+ assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
+
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ insn->bits3.ud = reg.dw1.ud;
+ }
+ else {
+ /* This is a hardware restriction, which may or may not be lifted
+ * in the future:
+ */
+ assert (reg.address_mode == BRW_ADDRESS_DIRECT);
+ //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits3.da1.src1_subreg_nr = reg.subnr;
+ insn->bits3.da1.src1_reg_nr = reg.nr;
+ }
+ else {
+ insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
+ insn->bits3.da16.src1_reg_nr = reg.nr;
+ }
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ if (reg.width == BRW_WIDTH_1 &&
+ insn->header.execution_size == BRW_EXECUTE_1) {
+ insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+ insn->bits3.da1.src1_width = BRW_WIDTH_1;
+ insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
+ }
+ else {
+ insn->bits3.da1.src1_horiz_stride = reg.hstride;
+ insn->bits3.da1.src1_width = reg.width;
+ insn->bits3.da1.src1_vert_stride = reg.vstride;
+ }
+ }
+ else {
+ insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+ insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+ insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+ insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+ insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
+ else
+ insn->bits3.da16.src1_vert_stride = reg.vstride;
+ }
+ }
+}
+
+
+
+static void brw_set_math_message( struct brw_instruction *insn,
+ unsigned msg_length,
+ unsigned response_length,
+ unsigned function,
+ unsigned integer_type,
+ boolean low_precision,
+ boolean saturate,
+ unsigned dataType )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->bits3.math.function = function;
+ insn->bits3.math.int_type = integer_type;
+ insn->bits3.math.precision = low_precision;
+ insn->bits3.math.saturate = saturate;
+ insn->bits3.math.data_type = dataType;
+ insn->bits3.math.response_length = response_length;
+ insn->bits3.math.msg_length = msg_length;
+ insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
+ insn->bits3.math.end_of_thread = 0;
+}
+
+static void brw_set_urb_message( struct brw_instruction *insn,
+ boolean allocate,
+ boolean used,
+ unsigned msg_length,
+ unsigned response_length,
+ boolean end_of_thread,
+ boolean complete,
+ unsigned offset,
+ unsigned swizzle_control )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->bits3.urb.opcode = 0; /* ? */
+ insn->bits3.urb.offset = offset;
+ insn->bits3.urb.swizzle_control = swizzle_control;
+ insn->bits3.urb.allocate = allocate;
+ insn->bits3.urb.used = used; /* ? */
+ insn->bits3.urb.complete = complete;
+ insn->bits3.urb.response_length = response_length;
+ insn->bits3.urb.msg_length = msg_length;
+ insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
+ insn->bits3.urb.end_of_thread = end_of_thread;
+}
+
+static void brw_set_dp_write_message( struct brw_instruction *insn,
+ unsigned binding_table_index,
+ unsigned msg_control,
+ unsigned msg_type,
+ unsigned msg_length,
+ unsigned pixel_scoreboard_clear,
+ unsigned response_length,
+ unsigned end_of_thread )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->bits3.dp_write.binding_table_index = binding_table_index;
+ insn->bits3.dp_write.msg_control = msg_control;
+ insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_write.msg_type = msg_type;
+ insn->bits3.dp_write.send_commit_msg = 0;
+ insn->bits3.dp_write.response_length = response_length;
+ insn->bits3.dp_write.msg_length = msg_length;
+ insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits3.urb.end_of_thread = end_of_thread;
+}
+
+static void brw_set_dp_read_message( struct brw_instruction *insn,
+ unsigned binding_table_index,
+ unsigned msg_control,
+ unsigned msg_type,
+ unsigned target_cache,
+ unsigned msg_length,
+ unsigned response_length,
+ unsigned end_of_thread )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->bits3.dp_read.binding_table_index = binding_table_index;
+ insn->bits3.dp_read.msg_control = msg_control;
+ insn->bits3.dp_read.msg_type = msg_type;
+ insn->bits3.dp_read.target_cache = target_cache;
+ insn->bits3.dp_read.response_length = response_length;
+ insn->bits3.dp_read.msg_length = msg_length;
+ insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
+ insn->bits3.dp_read.end_of_thread = end_of_thread;
+}
+
+static void brw_set_sampler_message( struct brw_instruction *insn,
+ unsigned binding_table_index,
+ unsigned sampler,
+ unsigned msg_type,
+ unsigned response_length,
+ unsigned msg_length,
+ boolean eot)
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->bits3.sampler.binding_table_index = binding_table_index;
+ insn->bits3.sampler.sampler = sampler;
+ insn->bits3.sampler.msg_type = msg_type;
+ insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
+ insn->bits3.sampler.response_length = response_length;
+ insn->bits3.sampler.msg_length = msg_length;
+ insn->bits3.sampler.end_of_thread = eot;
+ insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+}
+
+
+
+static struct brw_instruction *next_insn( struct brw_compile *p,
+ unsigned opcode )
+{
+ struct brw_instruction *insn;
+
+ assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
+
+ insn = &p->store[p->nr_insn++];
+ memcpy(insn, p->current, sizeof(*insn));
+
+ /* Reset this one-shot flag:
+ */
+
+ if (p->current->header.destreg__conditonalmod) {
+ p->current->header.destreg__conditonalmod = 0;
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ }
+
+ insn->header.opcode = opcode;
+ return insn;
+}
+
+
+struct brw_instruction *brw_alu1( struct brw_compile *p,
+ unsigned opcode,
+ struct brw_reg dest,
+ struct brw_reg src )
+{
+ struct brw_instruction *insn = next_insn(p, opcode);
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ return insn;
+}
+
+struct brw_instruction *brw_alu2(struct brw_compile *p,
+ unsigned opcode,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1 )
+{
+ struct brw_instruction *insn = next_insn(p, opcode);
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, src1);
+ return insn;
+}
+
+
+/***********************************************************************
+ * Convenience routines.
+ */
+#define ALU1(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0) \
+{ \
+ return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
+}
+
+#define ALU2(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1) \
+{ \
+ return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
+}
+
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+
+
+
+
+void brw_NOP(struct brw_compile *p)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
+ brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(insn, brw_imm_ud(0x0));
+}
+
+
+
+
+
+/***********************************************************************
+ * Comparisons, if/else/endif
+ */
+
+struct brw_instruction *brw_JMPI(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
+
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ return insn;
+}
+
+/* EU takes the value from the flag register and pushes it onto some
+ * sort of a stack (presumably merging with any flag value already on
+ * the stack). Within an if block, the flags at the top of the stack
+ * control execution on each channel of the unit, eg. on each of the
+ * 16 pixel values in our wm programs.
+ *
+ * When the matching 'else' instruction is reached (presumably by
+ * countdown of the instruction count patched in by our ELSE/ENDIF
+ * functions), the relevent flags are inverted.
+ *
+ * When the matching 'endif' instruction is reached, the flags are
+ * popped off. If the stack is now empty, normal execution resumes.
+ *
+ * No attempt is made to deal with stack overflow (14 elements?).
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p, unsigned execute_size)
+{
+ struct brw_instruction *insn;
+
+ if (p->single_program_flow) {
+ assert(execute_size == BRW_EXECUTE_1);
+
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ insn->header.predicate_inverse = 1;
+ } else {
+ insn = next_insn(p, BRW_OPCODE_IF);
+ }
+
+ /* Override the defaults for this instruction:
+ */
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.execution_size = execute_size;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.predicate_control = BRW_PREDICATE_NORMAL;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ return insn;
+}
+
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p,
+ struct brw_instruction *if_insn)
+{
+ struct brw_instruction *insn;
+
+ if (p->single_program_flow) {
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ } else {
+ insn = next_insn(p, BRW_OPCODE_ELSE);
+ }
+
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = if_insn->header.execution_size;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+
+ /* Patch the if instruction to point at this instruction.
+ */
+ if (p->single_program_flow) {
+ assert(if_insn->header.opcode == BRW_OPCODE_ADD);
+
+ if_insn->bits3.ud = (insn - if_insn + 1) * 16;
+ } else {
+ assert(if_insn->header.opcode == BRW_OPCODE_IF);
+
+ if_insn->bits3.if_else.jump_count = insn - if_insn;
+ if_insn->bits3.if_else.pop_count = 1;
+ if_insn->bits3.if_else.pad0 = 0;
+ }
+
+ return insn;
+}
+
+void brw_ENDIF(struct brw_compile *p,
+ struct brw_instruction *patch_insn)
+{
+ if (p->single_program_flow) {
+ /* In single program flow mode, there's no need to execute an ENDIF,
+ * since we don't need to do any stack operations, and if we're executing
+ * currently, we want to just continue executing.
+ */
+ struct brw_instruction *next = &p->store[p->nr_insn];
+
+ assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
+
+ patch_insn->bits3.ud = (next - patch_insn) * 16;
+ } else {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
+
+ brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = patch_insn->header.execution_size;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+
+ assert(patch_insn->bits3.if_else.jump_count == 0);
+
+ /* Patch the if or else instructions to point at this or the next
+ * instruction respectively.
+ */
+ if (patch_insn->header.opcode == BRW_OPCODE_IF) {
+ /* Automagically turn it into an IFF:
+ */
+ patch_insn->header.opcode = BRW_OPCODE_IFF;
+ patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
+ patch_insn->bits3.if_else.pop_count = 0;
+ patch_insn->bits3.if_else.pad0 = 0;
+ } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
+ patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
+ patch_insn->bits3.if_else.pop_count = 1;
+ patch_insn->bits3.if_else.pad0 = 0;
+ } else {
+ assert(0);
+ }
+
+ /* Also pop item off the stack in the endif instruction:
+ */
+ insn->bits3.if_else.jump_count = 0;
+ insn->bits3.if_else.pop_count = 1;
+ insn->bits3.if_else.pad0 = 0;
+ }
+}
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p)
+{
+ struct brw_instruction *insn;
+ insn = next_insn(p, BRW_OPCODE_BREAK);
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+ insn->bits3.if_else.pad0 = 0;
+ return insn;
+}
+
+struct brw_instruction *brw_CONT(struct brw_compile *p)
+{
+ struct brw_instruction *insn;
+ insn = next_insn(p, BRW_OPCODE_CONTINUE);
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+ insn->bits3.if_else.pad0 = 0;
+ return insn;
+}
+
+/* DO/WHILE loop:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
+{
+ if (p->single_program_flow) {
+ return &p->store[p->nr_insn];
+ } else {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
+
+ /* Override the defaults for this instruction:
+ */
+ brw_set_dest(insn, brw_null_reg());
+ brw_set_src0(insn, brw_null_reg());
+ brw_set_src1(insn, brw_null_reg());
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = execute_size;
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ /* insn->header.mask_control = BRW_MASK_ENABLE; */
+ insn->header.mask_control = BRW_MASK_DISABLE;
+
+ return insn;
+ }
+}
+
+
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p,
+ struct brw_instruction *do_insn)
+{
+ struct brw_instruction *insn;
+
+ if (p->single_program_flow)
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ else
+ insn = next_insn(p, BRW_OPCODE_WHILE);
+
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+
+ if (p->single_program_flow) {
+ insn->header.execution_size = BRW_EXECUTE_1;
+
+ insn->bits3.d = (do_insn - insn) * 16;
+ } else {
+ insn->header.execution_size = do_insn->header.execution_size;
+
+ assert(do_insn->header.opcode == BRW_OPCODE_DO);
+ insn->bits3.if_else.jump_count = do_insn - insn;
+ insn->bits3.if_else.pop_count = 0;
+ insn->bits3.if_else.pad0 = 0;
+ }
+
+/* insn->header.mask_control = BRW_MASK_ENABLE; */
+
+ insn->header.mask_control = BRW_MASK_DISABLE;
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+ return insn;
+}
+
+
+/* FORWARD JUMPS:
+ */
+void brw_land_fwd_jump(struct brw_compile *p,
+ struct brw_instruction *jmp_insn)
+{
+ struct brw_instruction *landing = &p->store[p->nr_insn];
+
+ assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
+ assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
+
+ jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
+}
+
+
+
+/* To integrate with the above, it makes sense that the comparison
+ * instruction should populate the flag register. It might be simpler
+ * just to use the flag reg for most WM tasks?
+ */
+void brw_CMP(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned conditional,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
+
+ insn->header.destreg__conditonalmod = conditional;
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, src1);
+
+/* guess_execution_size(insn, src0); */
+
+
+ /* Make it so that future instructions will use the computed flag
+ * value until brw_set_predicate_control_flag_value() is called
+ * again.
+ */
+ if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ dest.nr == 0) {
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ p->flag_value = 0xff;
+ }
+}
+
+
+
+/***********************************************************************
+ * Helpers for the various SEND message types:
+ */
+
+/* Invert 8 values
+ */
+void brw_math( struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned function,
+ unsigned saturate,
+ unsigned msg_reg_nr,
+ struct brw_reg src,
+ unsigned data_type,
+ unsigned precision )
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+ unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
+ unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+
+ /* Example code doesn't set predicate_control for send
+ * instructions.
+ */
+ insn->header.predicate_control = 0;
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ brw_set_math_message(insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ data_type);
+}
+
+/* Use 2 send instructions to invert 16 elements
+ */
+void brw_math_16( struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned function,
+ unsigned saturate,
+ unsigned msg_reg_nr,
+ struct brw_reg src,
+ unsigned precision )
+{
+ struct brw_instruction *insn;
+ unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
+ unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+
+ /* First instruction:
+ */
+ brw_push_insn_state(p);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ brw_set_math_message(insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ BRW_MATH_DATA_VECTOR);
+
+ /* Second instruction:
+ */
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
+ insn->header.destreg__conditonalmod = msg_reg_nr+1;
+
+ brw_set_dest(insn, offset(dest,1));
+ brw_set_src0(insn, src);
+ brw_set_math_message(insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ BRW_MATH_DATA_VECTOR);
+
+ brw_pop_insn_state(p);
+}
+
+
+
+
+void brw_dp_WRITE_16( struct brw_compile *p,
+ struct brw_reg src,
+ unsigned msg_reg_nr,
+ unsigned scratch_offset )
+{
+ {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ brw_MOV(p,
+ retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+ brw_imm_d(scratch_offset));
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ unsigned msg_length = 3;
+ struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+
+ brw_set_dp_write_message(insn,
+ 255, /* bti */
+ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
+ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
+ msg_length,
+ 0, /* pixel scoreboard */
+ 0, /* response_length */
+ 0); /* eot */
+ }
+
+}
+
+
+void brw_dp_READ_16( struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ unsigned scratch_offset )
+{
+ {
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ brw_MOV(p,
+ retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+ brw_imm_d(scratch_offset));
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest); /* UW? */
+ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
+
+ brw_set_dp_read_message(insn,
+ 255, /* bti */
+ 3, /* msg_control */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 1, /* target cache */
+ 1, /* msg_length */
+ 2, /* response_length */
+ 0); /* eot */
+ }
+}
+
+
+void brw_fb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ unsigned binding_table_index,
+ unsigned msg_length,
+ unsigned response_length,
+ boolean eot)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_dp_write_message(insn,
+ binding_table_index,
+ BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
+ BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
+ msg_length,
+ 1, /* pixel scoreboard */
+ response_length,
+ eot);
+}
+
+
+
+void brw_SAMPLE(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ unsigned binding_table_index,
+ unsigned sampler,
+ unsigned writemask,
+ unsigned msg_type,
+ unsigned response_length,
+ unsigned msg_length,
+ boolean eot)
+{
+ boolean need_stall = 0;
+
+ if(writemask == 0) {
+/* debug_printf("%s: zero writemask??\n", __FUNCTION__); */
+ return;
+ }
+
+ /* Hardware doesn't do destination dependency checking on send
+ * instructions properly. Add a workaround which generates the
+ * dependency by other means. In practice it seems like this bug
+ * only crops up for texture samples, and only where registers are
+ * written by the send and then written again later without being
+ * read in between. Luckily for us, we already track that
+ * information and use it to modify the writemask for the
+ * instruction, so that is a guide for whether a workaround is
+ * needed.
+ */
+ if (writemask != TGSI_WRITEMASK_XYZW) {
+ unsigned dst_offset = 0;
+ unsigned i, newmask = 0, len = 0;
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i))
+ break;
+ dst_offset += 2;
+ }
+ for (; i < 4; i++) {
+ if (!(writemask & (1<<i)))
+ break;
+ newmask |= 1<<i;
+ len++;
+ }
+
+ if (newmask != writemask) {
+ need_stall = 1;
+/* debug_printf("need stall %x %x\n", newmask , writemask); */
+ }
+ else {
+ struct brw_reg m1 = brw_message_reg(msg_reg_nr);
+
+ newmask = ~newmask & TGSI_WRITEMASK_XYZW;
+
+ brw_push_insn_state(p);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ brw_MOV(p, m1, brw_vec8_grf(0,0));
+ brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
+
+ brw_pop_insn_state(p);
+
+ src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+ dest = offset(dest, dst_offset);
+ response_length = len * 2;
+ }
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_sampler_message(insn,
+ binding_table_index,
+ sampler,
+ msg_type,
+ response_length,
+ msg_length,
+ eot);
+ }
+
+ if (need_stall)
+ {
+ struct brw_reg reg = vec8(offset(dest, response_length-1));
+
+ /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
+ */
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, FALSE);
+ brw_MOV(p, reg, reg);
+ brw_pop_insn_state(p);
+ }
+
+}
+
+/* All these variables are pretty confusing - we might be better off
+ * using bitmasks and macros for this, in the old style. Or perhaps
+ * just having the caller instantiate the fields in dword3 itself.
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ boolean allocate,
+ boolean used,
+ unsigned msg_length,
+ unsigned response_length,
+ boolean eot,
+ boolean writes_complete,
+ unsigned offset,
+ unsigned swizzle)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ assert(msg_length < 16);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+
+ brw_set_urb_message(insn,
+ allocate,
+ used,
+ msg_length,
+ response_length,
+ eot,
+ writes_complete,
+ offset,
+ swizzle);
+}
+
diff --git a/src/gallium/drivers/i965simple/brw_eu_util.c b/src/gallium/drivers/i965simple/brw_eu_util.c
new file mode 100644
index 0000000000..3a65b141f0
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_eu_util.c
@@ -0,0 +1,126 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+void brw_math_invert( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ brw_math( p,
+ dst,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 0,
+ src,
+ BRW_MATH_PRECISION_FULL,
+ BRW_MATH_DATA_VECTOR );
+}
+
+
+
+void brw_copy4(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ unsigned count)
+{
+ unsigned i;
+
+ dst = vec4(dst);
+ src = vec4(src);
+
+ for (i = 0; i < count; i++)
+ {
+ unsigned delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta));
+ brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16));
+ }
+}
+
+
+void brw_copy8(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ unsigned count)
+{
+ unsigned i;
+
+ dst = vec8(dst);
+ src = vec8(src);
+
+ for (i = 0; i < count; i++)
+ {
+ unsigned delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta));
+ }
+}
+
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+ struct brw_indirect dst_ptr,
+ struct brw_indirect src_ptr,
+ unsigned count)
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ {
+ unsigned delta = i*32;
+ brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta));
+ brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16));
+ }
+}
+
+
+void brw_copy_from_indirect(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_indirect ptr,
+ unsigned count)
+{
+ unsigned i;
+
+ dst = vec4(dst);
+
+ for (i = 0; i < count; i++)
+ {
+ unsigned delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta));
+ brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16));
+ }
+}
+
+
+
+
diff --git a/src/gallium/drivers/i965simple/brw_flush.c b/src/gallium/drivers/i965simple/brw_flush.c
new file mode 100644
index 0000000000..e6001c30d9
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_flush.c
@@ -0,0 +1,73 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_defines.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_batch.h"
+
+
+static void brw_flush( struct pipe_context *pipe,
+ unsigned flags,
+ struct pipe_fence_handle **fence )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ /* Do we need to emit an MI_FLUSH command to flush the hardware
+ * caches?
+ */
+ if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) {
+ struct brw_mi_flush flush;
+
+ memset(&flush, 0, sizeof(flush));
+ flush.opcode = CMD_MI_FLUSH;
+
+ if (!(flags & PIPE_FLUSH_RENDER_CACHE))
+ flush.flags |= BRW_INHIBIT_FLUSH_RENDER_CACHE;
+
+ if (flags & PIPE_FLUSH_TEXTURE_CACHE)
+ flush.flags |= BRW_FLUSH_READ_CACHE;
+
+ BRW_BATCH_STRUCT(brw, &flush);
+ }
+
+ /* If there are no flags, just flush pending commands to hardware:
+ */
+ FLUSH_BATCH( fence );
+}
+
+
+
+void brw_init_flush_functions( struct brw_context *brw )
+{
+ brw->pipe.flush = brw_flush;
+}
diff --git a/src/gallium/drivers/i965simple/brw_gs.c b/src/gallium/drivers/i965simple/brw_gs.c
new file mode 100644
index 0000000000..de60868ccc
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_gs.c
@@ -0,0 +1,196 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_gs.h"
+
+
+
+static void compile_gs_prog( struct brw_context *brw,
+ struct brw_gs_prog_key *key )
+{
+ struct brw_gs_compile c;
+ const unsigned *program;
+ unsigned program_size;
+
+ memset(&c, 0, sizeof(c));
+
+ c.key = *key;
+
+ /* Need to locate the two positions present in vertex + header.
+ * These are currently hardcoded:
+ */
+ c.nr_attrs = brw_count_bits(c.key.attrs);
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+ c.nr_bytes = c.nr_regs * REG_SIZE;
+
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(&c.func);
+
+ c.func.single_program_flow = 1;
+
+ /* For some reason the thread is spawned with only 4 channels
+ * unmasked.
+ */
+ brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+ /* Note that primitives which don't require a GS program have
+ * already been weeded out by this stage:
+ */
+ switch (key->primitive) {
+ case PIPE_PRIM_QUADS:
+ brw_gs_quads( &c );
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ brw_gs_quad_strip( &c );
+ break;
+ case PIPE_PRIM_LINE_LOOP:
+ brw_gs_lines( &c );
+ break;
+ case PIPE_PRIM_LINES:
+ if (key->hint_gs_always)
+ brw_gs_lines( &c );
+ else {
+ return;
+ }
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ if (key->hint_gs_always)
+ brw_gs_tris( &c );
+ else {
+ return;
+ }
+ break;
+ case PIPE_PRIM_POINTS:
+ if (key->hint_gs_always)
+ brw_gs_points( &c );
+ else {
+ return;
+ }
+ break;
+ default:
+ return;
+ }
+
+ /* get the program
+ */
+ program = brw_get_program(&c.func, &program_size);
+
+ /* Upload
+ */
+ brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG],
+ &c.key,
+ sizeof(c.key),
+ program,
+ program_size,
+ &c.prog_data,
+ &brw->gs.prog_data );
+}
+
+
+static boolean search_cache( struct brw_context *brw,
+ struct brw_gs_prog_key *key )
+{
+ return brw_search_cache(&brw->cache[BRW_GS_PROG],
+ key, sizeof(*key),
+ &brw->gs.prog_data,
+ &brw->gs.prog_gs_offset);
+}
+
+
+static const int gs_prim[PIPE_PRIM_POLYGON+1] = {
+ PIPE_PRIM_POINTS,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINE_LOOP,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_QUADS,
+ PIPE_PRIM_QUAD_STRIP,
+ PIPE_PRIM_TRIANGLES
+};
+
+static void populate_key( struct brw_context *brw,
+ struct brw_gs_prog_key *key )
+{
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_VS_PROG */
+ key->attrs = brw->vs.prog_data->outputs_written;
+
+ /* BRW_NEW_PRIMITIVE */
+ key->primitive = gs_prim[brw->primitive];
+
+ key->hint_gs_always = 0; /* debug code? */
+
+ key->need_gs_prog = (key->hint_gs_always ||
+ brw->primitive == PIPE_PRIM_QUADS ||
+ brw->primitive == PIPE_PRIM_QUAD_STRIP ||
+ brw->primitive == PIPE_PRIM_LINE_LOOP);
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void upload_gs_prog( struct brw_context *brw )
+{
+ struct brw_gs_prog_key key;
+
+ /* Populate the key:
+ */
+ populate_key(brw, &key);
+
+ if (brw->gs.prog_active != key.need_gs_prog) {
+ brw->state.dirty.cache |= CACHE_NEW_GS_PROG;
+ brw->gs.prog_active = key.need_gs_prog;
+ }
+
+ if (brw->gs.prog_active) {
+ if (!search_cache(brw, &key))
+ compile_gs_prog( brw, &key );
+ }
+}
+
+
+const struct brw_tracked_state brw_gs_prog = {
+ .dirty = {
+ .brw = BRW_NEW_PRIMITIVE,
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .update = upload_gs_prog
+};
diff --git a/src/gallium/drivers/i965simple/brw_gs.h b/src/gallium/drivers/i965simple/brw_gs.h
new file mode 100644
index 0000000000..f09141c6aa
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_gs.h
@@ -0,0 +1,75 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_GS_H
+#define BRW_GS_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define MAX_GS_VERTS (4)
+
+struct brw_gs_prog_key {
+ unsigned attrs:32;
+ unsigned primitive:4;
+ unsigned hint_gs_always:1;
+ unsigned need_gs_prog:1;
+ unsigned pad:26;
+};
+
+struct brw_gs_compile {
+ struct brw_compile func;
+ struct brw_gs_prog_key key;
+ struct brw_gs_prog_data prog_data;
+
+ struct {
+ struct brw_reg R0;
+ struct brw_reg vertex[MAX_GS_VERTS];
+ } reg;
+
+ /* 3 different ways of expressing vertex size:
+ */
+ unsigned nr_attrs;
+ unsigned nr_regs;
+ unsigned nr_bytes;
+};
+
+#define ATTR_SIZE (4*4)
+
+void brw_gs_quads( struct brw_gs_compile *c );
+void brw_gs_quad_strip( struct brw_gs_compile *c );
+void brw_gs_tris( struct brw_gs_compile *c );
+void brw_gs_lines( struct brw_gs_compile *c );
+void brw_gs_points( struct brw_gs_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965simple/brw_gs_emit.c b/src/gallium/drivers/i965simple/brw_gs_emit.c
new file mode 100644
index 0000000000..c3cc90b10f
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_gs_emit.c
@@ -0,0 +1,148 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_gs.h"
+
+static void brw_gs_alloc_regs( struct brw_gs_compile *c,
+ unsigned nr_verts )
+{
+ unsigned i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < nr_verts; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ c->prog_data.urb_read_length = c->nr_regs;
+ c->prog_data.total_grf = i;
+}
+
+
+static void brw_gs_emit_vue(struct brw_gs_compile *c,
+ struct brw_reg vert,
+ boolean last,
+ unsigned header)
+{
+ struct brw_compile *p = &c->func;
+ boolean allocate = !last;
+
+ /* Overwrite PrimType and PrimStart in the message header, for
+ * each vertex in turn:
+ */
+ brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+ /* Copy the vertex from vertn into m1..mN+1:
+ */
+ brw_copy8(p, brw_message_reg(1), vert, c->nr_regs);
+
+ /* Send each vertex as a seperate write to the urb. This is
+ * different to the concept in brw_sf_emit.c, where subsequent
+ * writes are used to build up a single urb entry. Each of these
+ * writes instantiates a seperate urb entry, and a new one must be
+ * allocated each time.
+ */
+ brw_urb_WRITE(p,
+ allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ 0,
+ c->reg.R0,
+ allocate,
+ 1, /* used */
+ c->nr_regs + 1, /* msg length */
+ allocate ? 1 : 0, /* response length */
+ allocate ? 0 : 1, /* eot */
+ 1, /* writes_complete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+void brw_gs_quads( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 4);
+
+ /* Use polygons for correct edgeflag behaviour. Note that vertex 3
+ * is the PV for quads, but vertex 0 for polygons:
+ */
+ brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+}
+
+void brw_gs_quad_strip( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 4);
+
+ brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+}
+
+void brw_gs_tris( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 3);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END));
+}
+
+void brw_gs_lines( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 2);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END));
+}
+
+void brw_gs_points( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END));
+}
+
+
+
+
+
+
+
+
diff --git a/src/gallium/drivers/i965simple/brw_gs_state.c b/src/gallium/drivers/i965simple/brw_gs_state.c
new file mode 100644
index 0000000000..5b8016b2e9
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_gs_state.c
@@ -0,0 +1,90 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+
+
+static void upload_gs_unit( struct brw_context *brw )
+{
+ struct brw_gs_unit_state gs;
+
+ memset(&gs, 0, sizeof(gs));
+
+ /* CACHE_NEW_GS_PROG */
+ if (brw->gs.prog_active) {
+ gs.thread0.grf_reg_count =
+ align(brw->gs.prog_data->total_grf, 16) / 16 - 1;
+ gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6;
+ gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length;
+ }
+ else {
+ gs.thread0.grf_reg_count = 0;
+ gs.thread0.kernel_start_pointer = 0;
+ gs.thread3.urb_entry_read_length = 1;
+ }
+
+ /* BRW_NEW_URB_FENCE */
+ gs.thread4.nr_urb_entries = brw->urb.nr_gs_entries;
+ gs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
+
+ gs.thread4.max_threads = 0; /* Hardware requirement */
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ gs.thread4.stats_enable = 1;
+
+ /* CONSTANT */
+ gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ gs.thread1.single_program_flow = 1;
+ gs.thread3.dispatch_grf_start_reg = 1;
+ gs.thread3.const_urb_entry_read_offset = 0;
+ gs.thread3.const_urb_entry_read_length = 0;
+ gs.thread3.urb_entry_read_offset = 0;
+
+
+ brw->gs.state_gs_offset = brw_cache_data( &brw->cache[BRW_GS_UNIT], &gs );
+}
+
+
+const struct brw_tracked_state brw_gs_unit = {
+ .dirty = {
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_URB_FENCE),
+ .cache = CACHE_NEW_GS_PROG
+ },
+ .update = upload_gs_unit
+};
diff --git a/src/gallium/drivers/i965simple/brw_misc_state.c b/src/gallium/drivers/i965simple/brw_misc_state.c
new file mode 100644
index 0000000000..be812c5da9
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_misc_state.c
@@ -0,0 +1,488 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_batch.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+
+
+
+
+/***********************************************************************
+ * Blend color
+ */
+
+static void upload_blend_constant_color(struct brw_context *brw)
+{
+ struct brw_blend_constant_color bcc;
+
+ memset(&bcc, 0, sizeof(bcc));
+ bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR;
+ bcc.header.length = sizeof(bcc)/4-2;
+ bcc.blend_constant_color[0] = brw->attribs.BlendColor.color[0];
+ bcc.blend_constant_color[1] = brw->attribs.BlendColor.color[1];
+ bcc.blend_constant_color[2] = brw->attribs.BlendColor.color[2];
+ bcc.blend_constant_color[3] = brw->attribs.BlendColor.color[3];
+
+ BRW_CACHED_BATCH_STRUCT(brw, &bcc);
+}
+
+
+const struct brw_tracked_state brw_blend_constant_color = {
+ .dirty = {
+ .brw = BRW_NEW_BLEND,
+ .cache = 0
+ },
+ .update = upload_blend_constant_color
+};
+
+
+/***********************************************************************
+ * Drawing rectangle
+ */
+static void upload_drawing_rect(struct brw_context *brw)
+{
+ struct brw_drawrect bdr;
+
+ memset(&bdr, 0, sizeof(bdr));
+ bdr.header.opcode = CMD_DRAW_RECT;
+ bdr.header.length = sizeof(bdr)/4 - 2;
+ bdr.xmin = 0;
+ bdr.ymin = 0;
+ bdr.xmax = brw->attribs.FrameBuffer.cbufs[0]->width;
+ bdr.ymax = brw->attribs.FrameBuffer.cbufs[0]->height;
+ bdr.xorg = 0;
+ bdr.yorg = 0;
+
+ /* Can't use BRW_CACHED_BATCH_STRUCT because this is also emitted
+ * uncached in brw_draw.c:
+ */
+ BRW_BATCH_STRUCT(brw, &bdr);
+}
+
+const struct brw_tracked_state brw_drawing_rect = {
+ .dirty = {
+ .brw = BRW_NEW_SCENE,
+ .cache = 0
+ },
+ .update = upload_drawing_rect
+};
+
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which is the BRW_SS_POOL cache buffer.
+ */
+static void upload_binding_table_pointers(struct brw_context *brw)
+{
+ struct brw_binding_table_pointers btp;
+ memset(&btp, 0, sizeof(btp));
+
+ btp.header.opcode = CMD_BINDING_TABLE_PTRS;
+ btp.header.length = sizeof(btp)/4 - 2;
+ btp.vs = 0;
+ btp.gs = 0;
+ btp.clp = 0;
+ btp.sf = 0;
+ btp.wm = brw->wm.bind_ss_offset;
+
+ BRW_CACHED_BATCH_STRUCT(brw, &btp);
+}
+
+const struct brw_tracked_state brw_binding_table_pointers = {
+ .dirty = {
+ .brw = 0,
+ .cache = CACHE_NEW_SURF_BIND
+ },
+ .update = upload_binding_table_pointers,
+};
+
+
+/**
+ * Upload pointers to the per-stage state.
+ *
+ * The state pointers in this packet are all relative to the general state
+ * base address set by CMD_STATE_BASE_ADDRESS, which is the BRW_GS_POOL buffer.
+ */
+static void upload_pipelined_state_pointers(struct brw_context *brw )
+{
+ struct brw_pipelined_state_pointers psp;
+ memset(&psp, 0, sizeof(psp));
+
+ psp.header.opcode = CMD_PIPELINED_STATE_POINTERS;
+ psp.header.length = sizeof(psp)/4 - 2;
+
+ psp.vs.offset = brw->vs.state_gs_offset >> 5;
+ psp.sf.offset = brw->sf.state_gs_offset >> 5;
+ psp.wm.offset = brw->wm.state_gs_offset >> 5;
+ psp.cc.offset = brw->cc.state_gs_offset >> 5;
+
+ /* GS gets turned on and off regularly. Need to re-emit URB fence
+ * after this occurs.
+ */
+ if (brw->gs.prog_active) {
+ psp.gs.offset = brw->gs.state_gs_offset >> 5;
+ psp.gs.enable = 1;
+ }
+
+ if (0) {
+ psp.clp.offset = brw->clip.state_gs_offset >> 5;
+ psp.clp.enable = 1;
+ }
+
+
+ if (BRW_CACHED_BATCH_STRUCT(brw, &psp))
+ brw->state.dirty.brw |= BRW_NEW_PSP;
+}
+
+const struct brw_tracked_state brw_pipelined_state_pointers = {
+ .dirty = {
+ .brw = 0,
+ .cache = (CACHE_NEW_VS_UNIT |
+ CACHE_NEW_GS_UNIT |
+ CACHE_NEW_GS_PROG |
+ CACHE_NEW_CLIP_UNIT |
+ CACHE_NEW_SF_UNIT |
+ CACHE_NEW_WM_UNIT |
+ CACHE_NEW_CC_UNIT)
+ },
+ .update = upload_pipelined_state_pointers
+};
+
+static void upload_psp_urb_cbs(struct brw_context *brw )
+{
+ upload_pipelined_state_pointers(brw);
+ brw_upload_urb_fence(brw);
+ brw_upload_constant_buffer_state(brw);
+}
+
+
+const struct brw_tracked_state brw_psp_urb_cbs = {
+ .dirty = {
+ .brw = BRW_NEW_URB_FENCE,
+ .cache = (CACHE_NEW_VS_UNIT |
+ CACHE_NEW_GS_UNIT |
+ CACHE_NEW_GS_PROG |
+ CACHE_NEW_CLIP_UNIT |
+ CACHE_NEW_SF_UNIT |
+ CACHE_NEW_WM_UNIT |
+ CACHE_NEW_CC_UNIT)
+ },
+ .update = upload_psp_urb_cbs
+};
+
+/**
+ * Upload the depthbuffer offset and format.
+ *
+ * We have to do this per state validation as we need to emit the relocation
+ * in the batch buffer.
+ */
+static void upload_depthbuffer(struct brw_context *brw)
+{
+ struct pipe_surface *depth_surface = brw->attribs.FrameBuffer.zsbuf;
+
+ BEGIN_BATCH(5, INTEL_BATCH_NO_CLIPRECTS);
+ OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (5 - 2));
+ if (depth_surface == NULL) {
+ OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
+ (BRW_SURFACE_NULL << 29));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ } else {
+ unsigned int format;
+
+ assert(depth_surface->block.width == 1);
+ assert(depth_surface->block.height == 1);
+ switch (depth_surface->block.size) {
+ case 2:
+ format = BRW_DEPTHFORMAT_D16_UNORM;
+ break;
+ case 4:
+ if (depth_surface->format == PIPE_FORMAT_Z32_FLOAT)
+ format = BRW_DEPTHFORMAT_D32_FLOAT;
+ else
+ format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ OUT_BATCH((depth_surface->stride - 1) |
+ (format << 18) |
+ (BRW_TILEWALK_YMAJOR << 26) |
+// (depth_surface->region->tiled << 27) |
+ (BRW_SURFACE_2D << 29));
+ OUT_RELOC(depth_surface->buffer,
+ PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE, 0);
+ OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
+ ((depth_surface->stride/depth_surface->block.size - 1) << 6) |
+ ((depth_surface->height - 1) << 19));
+ OUT_BATCH(0);
+ }
+ ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state brw_depthbuffer = {
+ .dirty = {
+ .brw = BRW_NEW_SCENE,
+ .cache = 0
+ },
+ .update = upload_depthbuffer,
+};
+
+
+
+
+/***********************************************************************
+ * Polygon stipple packet
+ */
+
+static void upload_polygon_stipple(struct brw_context *brw)
+{
+ struct brw_polygon_stipple bps;
+ unsigned i;
+
+ memset(&bps, 0, sizeof(bps));
+ bps.header.opcode = CMD_POLY_STIPPLE_PATTERN;
+ bps.header.length = sizeof(bps)/4-2;
+
+ /* XXX: state tracker should send *all* state down initially!
+ */
+ if (brw->attribs.PolygonStipple)
+ for (i = 0; i < 32; i++)
+ bps.stipple[i] = brw->attribs.PolygonStipple->stipple[31 - i]; /* invert */
+
+ BRW_CACHED_BATCH_STRUCT(brw, &bps);
+}
+
+const struct brw_tracked_state brw_polygon_stipple = {
+ .dirty = {
+ .brw = BRW_NEW_STIPPLE,
+ .cache = 0
+ },
+ .update = upload_polygon_stipple
+};
+
+
+/***********************************************************************
+ * Line stipple packet
+ */
+
+static void upload_line_stipple(struct brw_context *brw)
+{
+ struct brw_line_stipple bls;
+ float tmp;
+ int tmpi;
+
+ memset(&bls, 0, sizeof(bls));
+ bls.header.opcode = CMD_LINE_STIPPLE_PATTERN;
+ bls.header.length = sizeof(bls)/4 - 2;
+
+ bls.bits0.pattern = brw->attribs.Raster->line_stipple_pattern;
+ bls.bits1.repeat_count = brw->attribs.Raster->line_stipple_factor;
+
+ tmp = 1.0 / (float) brw->attribs.Raster->line_stipple_factor;
+ tmpi = tmp * (1<<13);
+
+
+ bls.bits1.inverse_repeat_count = tmpi;
+
+ BRW_CACHED_BATCH_STRUCT(brw, &bls);
+}
+
+const struct brw_tracked_state brw_line_stipple = {
+ .dirty = {
+ .brw = BRW_NEW_STIPPLE,
+ .cache = 0
+ },
+ .update = upload_line_stipple
+};
+
+
+/***********************************************************************
+ * Misc constant state packets
+ */
+
+static void upload_pipe_control(struct brw_context *brw)
+{
+ struct brw_pipe_control pc;
+
+ return;
+
+ memset(&pc, 0, sizeof(pc));
+
+ pc.header.opcode = CMD_PIPE_CONTROL;
+ pc.header.length = sizeof(pc)/4 - 2;
+ pc.header.post_sync_operation = PIPE_CONTROL_NOWRITE;
+
+ pc.header.instruction_state_cache_flush_enable = 1;
+
+ pc.bits1.dest_addr_type = PIPE_CONTROL_GTTWRITE_GLOBAL;
+
+ BRW_BATCH_STRUCT(brw, &pc);
+}
+
+const struct brw_tracked_state brw_pipe_control = {
+ .dirty = {
+ .brw = BRW_NEW_SCENE,
+ .cache = 0
+ },
+ .update = upload_pipe_control
+};
+
+
+/***********************************************************************
+ * Misc invarient state packets
+ */
+
+static void upload_invarient_state( struct brw_context *brw )
+{
+ {
+ struct brw_mi_flush flush;
+
+ memset(&flush, 0, sizeof(flush));
+ flush.opcode = CMD_MI_FLUSH;
+ flush.flags = BRW_FLUSH_STATE_CACHE | BRW_FLUSH_READ_CACHE;
+ BRW_BATCH_STRUCT(brw, &flush);
+ }
+
+ {
+ /* 0x61040000 Pipeline Select */
+ /* PipelineSelect : 0 */
+ struct brw_pipeline_select ps;
+
+ memset(&ps, 0, sizeof(ps));
+ ps.header.opcode = CMD_PIPELINE_SELECT;
+ ps.header.pipeline_select = 0;
+ BRW_BATCH_STRUCT(brw, &ps);
+ }
+
+ {
+ struct brw_global_depth_offset_clamp gdo;
+ memset(&gdo, 0, sizeof(gdo));
+
+ /* Disable depth offset clamping.
+ */
+ gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP;
+ gdo.header.length = sizeof(gdo)/4 - 2;
+ gdo.depth_offset_clamp = 0.0;
+
+ BRW_BATCH_STRUCT(brw, &gdo);
+ }
+
+
+ /* 0x61020000 State Instruction Pointer */
+ {
+ struct brw_system_instruction_pointer sip;
+ memset(&sip, 0, sizeof(sip));
+
+ sip.header.opcode = CMD_STATE_INSN_POINTER;
+ sip.header.length = 0;
+ sip.bits0.pad = 0;
+ sip.bits0.system_instruction_pointer = 0;
+ BRW_BATCH_STRUCT(brw, &sip);
+ }
+
+
+ {
+ struct brw_vf_statistics vfs;
+ memset(&vfs, 0, sizeof(vfs));
+
+ vfs.opcode = CMD_VF_STATISTICS;
+ if (BRW_DEBUG & DEBUG_STATS)
+ vfs.statistics_enable = 1;
+
+ BRW_BATCH_STRUCT(brw, &vfs);
+ }
+
+
+ {
+ struct brw_polygon_stipple_offset bpso;
+
+ memset(&bpso, 0, sizeof(bpso));
+ bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
+ bpso.header.length = sizeof(bpso)/4-2;
+ bpso.bits0.x_offset = 0;
+ bpso.bits0.y_offset = 0;
+
+ BRW_BATCH_STRUCT(brw, &bpso);
+ }
+}
+
+const struct brw_tracked_state brw_invarient_state = {
+ .dirty = {
+ .brw = BRW_NEW_SCENE,
+ .cache = 0
+ },
+ .update = upload_invarient_state
+};
+
+/**
+ * Define the base addresses which some state is referenced from.
+ *
+ * This allows us to avoid having to emit relocations in many places for
+ * cached state, and instead emit pointers inside of large, mostly-static
+ * state pools. This comes at the expense of memory, and more expensive cache
+ * misses.
+ */
+static void upload_state_base_address( struct brw_context *brw )
+{
+ /* Output the structure (brw_state_base_address) directly to the
+ * batchbuffer, so we can emit relocations inline.
+ */
+ BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
+ OUT_RELOC(brw->pool[BRW_GS_POOL].buffer,
+ PIPE_BUFFER_USAGE_GPU_READ,
+ 1); /* General state base address */
+ OUT_RELOC(brw->pool[BRW_SS_POOL].buffer,
+ PIPE_BUFFER_USAGE_GPU_READ,
+ 1); /* Surface state base address */
+ OUT_BATCH(1); /* Indirect object base address */
+ OUT_BATCH(1); /* General state upper bound */
+ OUT_BATCH(1); /* Indirect object upper bound */
+ ADVANCE_BATCH();
+}
+
+
+const struct brw_tracked_state brw_state_base_address = {
+ .dirty = {
+ .brw = BRW_NEW_SCENE,
+ .cache = 0
+ },
+ .update = upload_state_base_address
+};
diff --git a/src/gallium/drivers/i965simple/brw_reg.h b/src/gallium/drivers/i965simple/brw_reg.h
new file mode 100644
index 0000000000..9e885c3b3b
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_reg.h
@@ -0,0 +1,76 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#define CMD_MI (0x0 << 29)
+#define CMD_2D (0x2 << 29)
+#define CMD_3D (0x3 << 29)
+
+#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23)
+
+/* Stalls command execution waiting for the given events to have occurred. */
+#define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23))
+#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6)
+#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2)
+
+/* Primitive dispatch on 830-945 */
+#define _3DPRIMITIVE (CMD_3D | (0x1f << 24))
+#define PRIM_INDIRECT (1<<23)
+#define PRIM_INLINE (0<<23)
+#define PRIM_INDIRECT_SEQUENTIAL (0<<17)
+#define PRIM_INDIRECT_ELTS (1<<17)
+
+#define PRIM3D_TRILIST (0x0<<18)
+#define PRIM3D_TRISTRIP (0x1<<18)
+#define PRIM3D_TRISTRIP_RVRSE (0x2<<18)
+#define PRIM3D_TRIFAN (0x3<<18)
+#define PRIM3D_POLY (0x4<<18)
+#define PRIM3D_LINELIST (0x5<<18)
+#define PRIM3D_LINESTRIP (0x6<<18)
+#define PRIM3D_RECTLIST (0x7<<18)
+#define PRIM3D_POINTLIST (0x8<<18)
+#define PRIM3D_DIB (0x9<<18)
+#define PRIM3D_MASK (0x1f<<18)
+
+#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6)
+
+#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4)
+
+#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6)
+
+/* BR00 */
+#define XY_BLT_WRITE_ALPHA (1 << 21)
+#define XY_BLT_WRITE_RGB (1 << 20)
+#define XY_SRC_TILED (1 << 15)
+#define XY_DST_TILED (1 << 11)
+
+/* BR13 */
+#define BR13_565 (0x1 << 24)
+#define BR13_8888 (0x3 << 24)
+
+#define FENCE_LINEAR 0
+#define FENCE_XMAJOR 1
+#define FENCE_YMAJOR 2
diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c
new file mode 100644
index 0000000000..ab7cd624b2
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_screen.c
@@ -0,0 +1,244 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_memory.h"
+#include "pipe/p_winsys.h"
+#include "util/u_string.h"
+
+#include "brw_context.h"
+#include "brw_screen.h"
+#include "brw_tex_layout.h"
+
+
+static const char *
+brw_get_vendor( struct pipe_screen *screen )
+{
+ return "Tungsten Graphics, Inc.";
+}
+
+
+static const char *
+brw_get_name( struct pipe_screen *screen )
+{
+ static char buffer[128];
+ const char *chipset;
+
+ switch (brw_screen(screen)->pci_id) {
+ case PCI_CHIP_I965_Q:
+ chipset = "Intel(R) 965Q";
+ break;
+ case PCI_CHIP_I965_G:
+ case PCI_CHIP_I965_G_1:
+ chipset = "Intel(R) 965G";
+ break;
+ case PCI_CHIP_I965_GM:
+ chipset = "Intel(R) 965GM";
+ break;
+ case PCI_CHIP_I965_GME:
+ chipset = "Intel(R) 965GME/GLE";
+ break;
+ default:
+ chipset = "unknown";
+ break;
+ }
+
+ util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset);
+ return buffer;
+}
+
+
+static int
+brw_get_param(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 8;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 0;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 0;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 11; /* max 1024x1024 */
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 8; /* max 128x128x128 */
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 11; /* max 1024x1024 */
+ default:
+ return 0;
+ }
+}
+
+
+static float
+brw_get_paramf(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 7.5;
+
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 255.0;
+
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 4.0;
+
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 16.0;
+
+ default:
+ return 0;
+ }
+}
+
+
+static boolean
+brw_is_format_supported( struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage,
+ unsigned geom_flags )
+{
+#if 0
+ /* XXX: This is broken -- rewrite if still needed. */
+ static const unsigned tex_supported[] = {
+ PIPE_FORMAT_R8G8B8A8_UNORM,
+ PIPE_FORMAT_A8R8G8B8_UNORM,
+ PIPE_FORMAT_R5G6B5_UNORM,
+ PIPE_FORMAT_L8_UNORM,
+ PIPE_FORMAT_A8_UNORM,
+ PIPE_FORMAT_I8_UNORM,
+ PIPE_FORMAT_L8A8_UNORM,
+ PIPE_FORMAT_YCBCR,
+ PIPE_FORMAT_YCBCR_REV,
+ PIPE_FORMAT_S8_Z24,
+ };
+
+
+ /* Actually a lot more than this - add later:
+ */
+ static const unsigned render_supported[] = {
+ PIPE_FORMAT_A8R8G8B8_UNORM,
+ PIPE_FORMAT_R5G6B5_UNORM,
+ };
+
+ /*
+ */
+ static const unsigned z_stencil_supported[] = {
+ PIPE_FORMAT_Z16_UNORM,
+ PIPE_FORMAT_Z32_UNORM,
+ PIPE_FORMAT_S8Z24_UNORM,
+ };
+
+ switch (type) {
+ case PIPE_RENDER_FORMAT:
+ *numFormats = Elements(render_supported);
+ return render_supported;
+
+ case PIPE_TEX_FORMAT:
+ *numFormats = Elements(tex_supported);
+ return render_supported;
+
+ case PIPE_Z_STENCIL_FORMAT:
+ *numFormats = Elements(render_supported);
+ return render_supported;
+
+ default:
+ *numFormats = 0;
+ return NULL;
+ }
+#else
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ return TRUE;
+ default:
+ return FALSE;
+ };
+ return FALSE;
+#endif
+}
+
+
+static void
+brw_destroy_screen( struct pipe_screen *screen )
+{
+ struct pipe_winsys *winsys = screen->winsys;
+
+ if(winsys->destroy)
+ winsys->destroy(winsys);
+
+ FREE(screen);
+}
+
+
+/**
+ * Create a new brw_screen object
+ */
+struct pipe_screen *
+brw_create_screen(struct pipe_winsys *winsys, uint pci_id)
+{
+ struct brw_screen *brwscreen = CALLOC_STRUCT(brw_screen);
+
+ if (!brwscreen)
+ return NULL;
+
+ brwscreen->pci_id = pci_id;
+
+ brwscreen->screen.winsys = winsys;
+
+ brwscreen->screen.destroy = brw_destroy_screen;
+
+ brwscreen->screen.get_name = brw_get_name;
+ brwscreen->screen.get_vendor = brw_get_vendor;
+ brwscreen->screen.get_param = brw_get_param;
+ brwscreen->screen.get_paramf = brw_get_paramf;
+ brwscreen->screen.is_format_supported = brw_is_format_supported;
+
+ brw_init_screen_texture_funcs(&brwscreen->screen);
+
+ return &brwscreen->screen;
+}
diff --git a/src/gallium/drivers/i965simple/brw_screen.h b/src/gallium/drivers/i965simple/brw_screen.h
new file mode 100644
index 0000000000..d3c70387e6
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_screen.h
@@ -0,0 +1,68 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef BRW_SCREEN_H
+#define BRW_SCREEN_H
+
+
+#include "pipe/p_screen.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/**
+ * Subclass of pipe_screen
+ */
+struct brw_screen
+{
+ struct pipe_screen screen;
+
+ uint pci_id;
+};
+
+
+/** cast wrapper */
+static INLINE struct brw_screen *
+brw_screen(struct pipe_screen *pscreen)
+{
+ return (struct brw_screen *) pscreen;
+}
+
+
+extern struct pipe_screen *
+brw_create_screen(struct pipe_winsys *winsys, uint pci_id);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* BRW_SCREEN_H */
diff --git a/src/gallium/drivers/i965simple/brw_sf.c b/src/gallium/drivers/i965simple/brw_sf.c
new file mode 100644
index 0000000000..b82a2e143b
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_sf.c
@@ -0,0 +1,351 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+#include "brw_state.h"
+#include "tgsi/tgsi_parse.h"
+
+
+static void compile_sf_prog( struct brw_context *brw,
+ struct brw_sf_prog_key *key )
+{
+ struct brw_sf_compile c;
+ const unsigned *program;
+ unsigned program_size;
+
+ memset(&c, 0, sizeof(c));
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(&c.func);
+
+ c.key = *key;
+
+
+ c.nr_attrs = c.key.vp_output_count;
+ c.nr_attr_regs = (c.nr_attrs+1)/2;
+
+ c.nr_setup_attrs = c.key.fp_input_count + 1; /* +1 for position */
+ c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
+
+ c.prog_data.urb_read_length = c.nr_attr_regs;
+ c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
+
+
+ /* Which primitive? Or all three?
+ */
+ switch (key->primitive) {
+ case SF_TRIANGLES:
+ c.nr_verts = 3;
+ brw_emit_tri_setup( &c );
+ break;
+ case SF_LINES:
+ c.nr_verts = 2;
+ brw_emit_line_setup( &c );
+ break;
+ case SF_POINTS:
+ c.nr_verts = 1;
+ brw_emit_point_setup( &c );
+ break;
+
+ case SF_UNFILLED_TRIS:
+ default:
+ assert(0);
+ return;
+ }
+
+
+
+ /* get the program
+ */
+ program = brw_get_program(&c.func, &program_size);
+
+ /* Upload
+ */
+ brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG],
+ &c.key,
+ sizeof(c.key),
+ program,
+ program_size,
+ &c.prog_data,
+ &brw->sf.prog_data );
+}
+
+
+static boolean search_cache( struct brw_context *brw,
+ struct brw_sf_prog_key *key )
+{
+ return brw_search_cache(&brw->cache[BRW_SF_PROG],
+ key, sizeof(*key),
+ &brw->sf.prog_data,
+ &brw->sf.prog_gs_offset);
+}
+
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void upload_sf_prog( struct brw_context *brw )
+{
+ const struct brw_fragment_program *fs = brw->attribs.FragmentProgram;
+ struct brw_sf_prog_key key;
+ struct tgsi_parse_context parse;
+ int i, done = 0;
+
+
+ memset(&key, 0, sizeof(key));
+
+ /* Populate the key, noting state dependencies:
+ */
+ /* CACHE_NEW_VS_PROG */
+ key.vp_output_count = brw->vs.prog_data->outputs_written;
+
+ /* BRW_NEW_FS */
+ key.fp_input_count = brw->attribs.FragmentProgram->info.file_max[TGSI_FILE_INPUT] + 1;
+
+
+ /* BRW_NEW_REDUCED_PRIMITIVE */
+ switch (brw->reduced_primitive) {
+ case PIPE_PRIM_TRIANGLES:
+// if (key.attrs & (1<<VERT_RESULT_EDGE))
+// key.primitive = SF_UNFILLED_TRIS;
+// else
+ key.primitive = SF_TRIANGLES;
+ break;
+ case PIPE_PRIM_LINES:
+ key.primitive = SF_LINES;
+ break;
+ case PIPE_PRIM_POINTS:
+ key.primitive = SF_POINTS;
+ break;
+ }
+
+
+
+ /* Scan fp inputs to figure out what interpolation modes are
+ * required for each incoming vp output. There is an assumption
+ * that the state tracker makes sure there is a 1:1 linkage between
+ * these sets of attributes (XXX: position??)
+ */
+ tgsi_parse_init( &parse, fs->program.tokens );
+ while( !done &&
+ !tgsi_parse_end_of_tokens( &parse ) )
+ {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT)
+ {
+ int first = parse.FullToken.FullDeclaration.DeclarationRange.First;
+ int last = parse.FullToken.FullDeclaration.DeclarationRange.Last;
+ int interp_mode = parse.FullToken.FullDeclaration.Declaration.Interpolate;
+ //int semantic = parse.FullToken.FullDeclaration.Semantic.SemanticName;
+ //int semantic_index = parse.FullToken.FullDeclaration.Semantic.SemanticIndex;
+
+ debug_printf("fs input %d..%d interp mode %d\n", first, last, interp_mode);
+
+ switch (interp_mode) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ for (i = first; i <= last; i++)
+ key.const_mask |= (1 << i);
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ for (i = first; i <= last; i++)
+ key.linear_mask |= (1 << i);
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ for (i = first; i <= last; i++)
+ key.persp_mask |= (1 << i);
+ break;
+ default:
+ break;
+ }
+
+ /* Also need stuff for flat shading, twosided color.
+ */
+
+ }
+ break;
+ default:
+ done = 1;
+ break;
+ }
+ }
+
+ /* Hack: Adjust for position. Optimize away when not required (ie
+ * for perspective interpolation).
+ */
+ key.persp_mask <<= 1;
+ key.linear_mask <<= 1;
+ key.linear_mask |= 1;
+ key.const_mask <<= 1;
+
+ debug_printf("key.persp_mask: %x\n", key.persp_mask);
+ debug_printf("key.linear_mask: %x\n", key.linear_mask);
+ debug_printf("key.const_mask: %x\n", key.const_mask);
+
+
+// key.do_point_sprite = brw->attribs.Point->PointSprite;
+// key.SpriteOrigin = brw->attribs.Point->SpriteOrigin;
+
+// key.do_flat_shading = (brw->attribs.Raster->flatshade);
+// key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide);
+
+// if (key.do_twoside_color)
+// key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW);
+
+
+ if (!search_cache(brw, &key))
+ compile_sf_prog( brw, &key );
+}
+
+
+const struct brw_tracked_state brw_sf_prog = {
+ .dirty = {
+ .brw = (BRW_NEW_RASTERIZER |
+ BRW_NEW_REDUCED_PRIMITIVE |
+ BRW_NEW_VS |
+ BRW_NEW_FS),
+ .cache = 0,
+ },
+ .update = upload_sf_prog
+};
+
+
+
+#if 0
+/* Build a struct like the one we'd like the state tracker to pass to
+ * us.
+ */
+static void update_sf_linkage( struct brw_context *brw )
+{
+ const struct brw_vertex_program *vs = brw->attribs.VertexProgram;
+ const struct brw_fragment_program *fs = brw->attribs.FragmentProgram;
+ struct pipe_setup_linkage state;
+ struct tgsi_parse_context parse;
+
+ int i, j;
+ int nr_vp_outputs = 0;
+ int done = 0;
+
+ struct {
+ unsigned semantic:8;
+ unsigned semantic_index:16;
+ } fp_semantic[32], vp_semantic[32];
+
+ memset(&state, 0, sizeof(state));
+
+ state.fp_input_count = 0;
+
+
+
+
+
+
+ assert(state.fp_input_count == fs->program.num_inputs);
+
+
+ /* Then scan vp outputs
+ */
+ done = 0;
+ tgsi_parse_init( &parse, vs->program.tokens );
+ while( !done &&
+ !tgsi_parse_end_of_tokens( &parse ) )
+ {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT)
+ {
+ int first = parse.FullToken.FullDeclaration.DeclarationRange.First;
+ int last = parse.FullToken.FullDeclaration.DeclarationRange.Last;
+
+ for (i = first; i < last; i++) {
+ vp_semantic[i].semantic =
+ parse.FullToken.FullDeclaration.Semantic.SemanticName;
+ vp_semantic[i].semantic_index =
+ parse.FullToken.FullDeclaration.Semantic.SemanticIndex;
+ }
+
+ assert(last > nr_vp_outputs);
+ nr_vp_outputs = last;
+ }
+ break;
+ default:
+ done = 1;
+ break;
+ }
+ }
+
+
+ /* Now match based on semantic information.
+ */
+ for (i = 0; i< state.fp_input_count; i++) {
+ for (j = 0; j < nr_vp_outputs; j++) {
+ if (fp_semantic[i].semantic == vp_semantic[j].semantic &&
+ fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) {
+ state.fp_input[i].vp_output = j;
+ }
+ }
+ if (fp_semantic[i].semantic == TGSI_SEMANTIC_COLOR) {
+ for (j = 0; j < nr_vp_outputs; j++) {
+ if (TGSI_SEMANTIC_BCOLOR == vp_semantic[j].semantic &&
+ fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) {
+ state.fp_input[i].bf_vp_output = j;
+ }
+ }
+ }
+ }
+
+ if (memcmp(&brw->sf.linkage, &state, sizeof(state)) != 0) {
+ brw->sf.linkage = state;
+ brw->state.dirty.brw |= BRW_NEW_SF_LINKAGE;
+ }
+}
+
+
+const struct brw_tracked_state brw_sf_linkage = {
+ .dirty = {
+ .brw = (BRW_NEW_VS |
+ BRW_NEW_FS),
+ .cache = 0,
+ },
+ .update = update_sf_linkage
+};
+
+
+#endif
diff --git a/src/gallium/drivers/i965simple/brw_sf.h b/src/gallium/drivers/i965simple/brw_sf.h
new file mode 100644
index 0000000000..b7ada47560
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_sf.h
@@ -0,0 +1,122 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_SF_H
+#define BRW_SF_H
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+
+#define SF_POINTS 0
+#define SF_LINES 1
+#define SF_TRIANGLES 2
+#define SF_UNFILLED_TRIS 3
+
+
+
+struct brw_sf_prog_key {
+ unsigned vp_output_count:5;
+ unsigned fp_input_count:5;
+
+ unsigned primitive:2;
+ unsigned do_twoside_color:1;
+ unsigned do_flat_shading:1;
+ unsigned frontface_ccw:1;
+ unsigned do_point_sprite:1;
+
+ /* Interpolation masks;
+ */
+ unsigned linear_mask;
+ unsigned persp_mask;
+ unsigned const_mask;
+
+
+// int SpriteOrigin;
+};
+
+struct brw_sf_point_tex {
+ boolean CoordReplace;
+};
+
+struct brw_sf_compile {
+ struct brw_compile func;
+ struct brw_sf_prog_key key;
+ struct brw_sf_prog_data prog_data;
+
+ struct brw_reg pv;
+ struct brw_reg det;
+ struct brw_reg dx0;
+ struct brw_reg dx2;
+ struct brw_reg dy0;
+ struct brw_reg dy2;
+
+ /* z and 1/w passed in seperately:
+ */
+ struct brw_reg z[3];
+ struct brw_reg inv_w[3];
+
+ /* The vertices:
+ */
+ struct brw_reg vert[3];
+
+ /* Temporaries, allocated after last vertex reg.
+ */
+ struct brw_reg inv_det;
+ struct brw_reg a1_sub_a0;
+ struct brw_reg a2_sub_a0;
+ struct brw_reg tmp;
+
+ struct brw_reg m1Cx;
+ struct brw_reg m2Cy;
+ struct brw_reg m3C0;
+
+ unsigned nr_verts;
+ unsigned nr_attrs;
+ unsigned nr_attr_regs;
+ unsigned nr_setup_attrs;
+ unsigned nr_setup_regs;
+#if 0
+ ubyte attr_to_idx[VERT_RESULT_MAX];
+ ubyte idx_to_attr[VERT_RESULT_MAX];
+ struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX];
+#endif
+};
+
+
+void brw_emit_tri_setup( struct brw_sf_compile *c );
+void brw_emit_line_setup( struct brw_sf_compile *c );
+void brw_emit_point_setup( struct brw_sf_compile *c );
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c );
+void brw_emit_anyprim_setup( struct brw_sf_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965simple/brw_sf_emit.c b/src/gallium/drivers/i965simple/brw_sf_emit.c
new file mode 100644
index 0000000000..78d6fa5e9e
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_sf_emit.c
@@ -0,0 +1,382 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+
+
+
+/***********************************************************************
+ * Triangle setup.
+ */
+
+
+static void alloc_regs( struct brw_sf_compile *c )
+{
+ unsigned reg, i;
+
+ /* Values computed by fixed function unit:
+ */
+ c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD);
+ c->det = brw_vec1_grf(1, 2);
+ c->dx0 = brw_vec1_grf(1, 3);
+ c->dx2 = brw_vec1_grf(1, 4);
+ c->dy0 = brw_vec1_grf(1, 5);
+ c->dy2 = brw_vec1_grf(1, 6);
+
+ /* z and 1/w passed in seperately:
+ */
+ c->z[0] = brw_vec1_grf(2, 0);
+ c->inv_w[0] = brw_vec1_grf(2, 1);
+ c->z[1] = brw_vec1_grf(2, 2);
+ c->inv_w[1] = brw_vec1_grf(2, 3);
+ c->z[2] = brw_vec1_grf(2, 4);
+ c->inv_w[2] = brw_vec1_grf(2, 5);
+
+ /* The vertices:
+ */
+ reg = 3;
+ for (i = 0; i < c->nr_verts; i++) {
+ c->vert[i] = brw_vec8_grf(reg, 0);
+ reg += c->nr_attr_regs;
+ }
+
+ /* Temporaries, allocated after last vertex reg.
+ */
+ c->inv_det = brw_vec1_grf(reg, 0); reg++;
+ c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++;
+ c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++;
+ c->tmp = brw_vec8_grf(reg, 0); reg++;
+
+ /* Note grf allocation:
+ */
+ c->prog_data.total_grf = reg;
+
+
+ /* Outputs of this program - interpolation coefficients for
+ * rasterization:
+ */
+ c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
+ c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
+ c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
+}
+
+
+static void copy_z_inv_w( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ unsigned i;
+
+ brw_push_insn_state(p);
+
+ /* Copy both scalars with a single MOV:
+ */
+ for (i = 0; i < c->nr_verts; i++)
+ brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
+
+ brw_pop_insn_state(p);
+}
+
+
+static void invert_det( struct brw_sf_compile *c)
+{
+ brw_math(&c->func,
+ c->inv_det,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 0,
+ c->det,
+ BRW_MATH_DATA_SCALAR,
+ BRW_MATH_PRECISION_FULL);
+
+}
+
+#define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \
+ FRAG_BIT_COL0 | \
+ FRAG_BIT_COL1)
+
+static boolean calculate_masks( struct brw_sf_compile *c,
+ unsigned reg,
+ ushort *pc,
+ ushort *pc_persp,
+ ushort *pc_linear)
+{
+ boolean is_last_attr = (reg == c->nr_setup_regs - 1);
+ unsigned persp_mask = c->key.persp_mask;
+ unsigned linear_mask = c->key.linear_mask;
+
+ debug_printf("persp_mask: %x\n", persp_mask);
+ debug_printf("linear_mask: %x\n", linear_mask);
+
+ *pc_persp = 0;
+ *pc_linear = 0;
+ *pc = 0xf;
+
+ if (persp_mask & (1 << (reg*2)))
+ *pc_persp = 0xf;
+
+ if (linear_mask & (1 << (reg*2)))
+ *pc_linear = 0xf;
+
+ /* Maybe only processs one attribute on the final round:
+ */
+ if (reg*2+1 < c->nr_setup_attrs) {
+ *pc |= 0xf0;
+
+ if (persp_mask & (1 << (reg*2+1)))
+ *pc_persp |= 0xf0;
+
+ if (linear_mask & (1 << (reg*2+1)))
+ *pc_linear |= 0xf0;
+ }
+
+ debug_printf("pc: %x\n", *pc);
+ debug_printf("pc_persp: %x\n", *pc_persp);
+ debug_printf("pc_linear: %x\n", *pc_linear);
+
+
+ return is_last_attr;
+}
+
+
+
+void brw_emit_tri_setup( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ unsigned i;
+
+ debug_printf("%s START ==============\n", __FUNCTION__);
+
+ c->nr_verts = 3;
+ alloc_regs(c);
+ invert_det(c);
+ copy_z_inv_w(c);
+
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ /* Pair of incoming attributes:
+ */
+ struct brw_reg a0 = offset(c->vert[0], i);
+ struct brw_reg a1 = offset(c->vert[1], i);
+ struct brw_reg a2 = offset(c->vert[2], i);
+ ushort pc = 0, pc_persp = 0, pc_linear = 0;
+ boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ brw_MUL(p, a1, a1, c->inv_w[1]);
+ brw_MUL(p, a2, a2, c->inv_w[2]);
+ }
+
+
+ /* Calculate coefficients for interpolated values:
+ */
+ if (pc_linear)
+ {
+ brw_set_predicate_control_flag_value(p, pc_linear);
+
+ brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+ brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
+
+ /* calculate dA/dx
+ */
+ brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
+ brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+
+ /* calculate dA/dy
+ */
+ brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
+ brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
+ brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+ }
+
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+ /* start point for interpolation
+ */
+ brw_MOV(p, c->m3C0, a0);
+
+ /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
+ * the send instruction:
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* offset */
+ BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
+ }
+ }
+
+ debug_printf("%s DONE ==============\n", __FUNCTION__);
+
+}
+
+
+
+void brw_emit_line_setup( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ unsigned i;
+
+
+ c->nr_verts = 2;
+ alloc_regs(c);
+ invert_det(c);
+ copy_z_inv_w(c);
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ /* Pair of incoming attributes:
+ */
+ struct brw_reg a0 = offset(c->vert[0], i);
+ struct brw_reg a1 = offset(c->vert[1], i);
+ ushort pc, pc_persp, pc_linear;
+ boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ brw_MUL(p, a1, a1, c->inv_w[1]);
+ }
+
+ /* Calculate coefficients for position, color:
+ */
+ if (pc_linear) {
+ brw_set_predicate_control_flag_value(p, pc_linear);
+
+ brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+
+ brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+
+ brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
+ brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+ }
+
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+
+ /* start point for interpolation
+ */
+ brw_MOV(p, c->m3C0, a0);
+
+ /* Copy m0..m3 to URB.
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+ }
+ }
+}
+
+
+/* Points setup - several simplifications as all attributes are
+ * constant across the face of the point (point sprites excluded!)
+ */
+void brw_emit_point_setup( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ unsigned i;
+
+ c->nr_verts = 1;
+ alloc_regs(c);
+ copy_z_inv_w(c);
+
+ brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
+ brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ struct brw_reg a0 = offset(c->vert[0], i);
+ ushort pc, pc_persp, pc_linear;
+ boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ /* This seems odd as the values are all constant, but the
+ * fragment shader will be expecting it:
+ */
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ }
+
+
+ /* The delta values are always zero, just send the starting
+ * coordinate. Again, this is to fit in with the interpolation
+ * code in the fragment shader.
+ */
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+
+ brw_MOV(p, c->m3C0, a0); /* constant value */
+
+ /* Copy m0..m3 to URB.
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+ }
+ }
+}
diff --git a/src/gallium/drivers/i965simple/brw_sf_state.c b/src/gallium/drivers/i965simple/brw_sf_state.c
new file mode 100644
index 0000000000..2a5de61c21
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_sf_state.c
@@ -0,0 +1,181 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+
+static void upload_sf_vp(struct brw_context *brw)
+{
+ struct brw_sf_viewport sfv;
+
+ memset(&sfv, 0, sizeof(sfv));
+
+
+ /* BRW_NEW_VIEWPORT */
+ {
+ const float *scale = brw->attribs.Viewport.scale;
+ const float *trans = brw->attribs.Viewport.translate;
+
+ sfv.viewport.m00 = scale[0];
+ sfv.viewport.m11 = scale[1];
+ sfv.viewport.m22 = scale[2];
+ sfv.viewport.m30 = trans[0];
+ sfv.viewport.m31 = trans[1];
+ sfv.viewport.m32 = trans[2];
+ }
+
+ /* _NEW_SCISSOR */
+ sfv.scissor.xmin = brw->attribs.Scissor.minx;
+ sfv.scissor.xmax = brw->attribs.Scissor.maxx - 1;
+ sfv.scissor.ymin = brw->attribs.Scissor.miny;
+ sfv.scissor.ymax = brw->attribs.Scissor.maxy - 1;
+
+ brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv );
+}
+
+const struct brw_tracked_state brw_sf_vp = {
+ .dirty = {
+ .brw = (BRW_NEW_SCISSOR |
+ BRW_NEW_VIEWPORT),
+ .cache = 0
+ },
+ .update = upload_sf_vp
+};
+
+static void upload_sf_unit( struct brw_context *brw )
+{
+ struct brw_sf_unit_state sf;
+ memset(&sf, 0, sizeof(sf));
+
+ /* CACHE_NEW_SF_PROG */
+ sf.thread0.grf_reg_count = align(brw->sf.prog_data->total_grf, 16) / 16 - 1;
+ sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6;
+ sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length;
+
+ sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ sf.thread3.dispatch_grf_start_reg = 3;
+ sf.thread3.urb_entry_read_offset = 1;
+
+ /* BRW_NEW_URB_FENCE */
+ sf.thread4.nr_urb_entries = brw->urb.nr_sf_entries;
+ sf.thread4.urb_entry_allocation_size = brw->urb.sfsize - 1;
+ sf.thread4.max_threads = MIN2(12, brw->urb.nr_sf_entries / 2) - 1;
+
+ if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
+ sf.thread4.max_threads = 0;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ sf.thread4.stats_enable = 1;
+
+ /* CACHE_NEW_SF_VP */
+ sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5;
+ sf.sf5.viewport_transform = 1;
+
+ /* BRW_NEW_RASTER */
+ if (brw->attribs.Raster->scissor)
+ sf.sf6.scissor = 1;
+
+#if 0
+ if (brw->attribs.Polygon->FrontFace == GL_CCW)
+ sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
+ else
+ sf.sf5.front_winding = BRW_FRONTWINDING_CW;
+
+
+ if (brw->attribs.Polygon->CullFlag) {
+ switch (brw->attribs.Polygon->CullFaceMode) {
+ case GL_FRONT:
+ sf.sf6.cull_mode = BRW_CULLMODE_FRONT;
+ break;
+ case GL_BACK:
+ sf.sf6.cull_mode = BRW_CULLMODE_BACK;
+ break;
+ case GL_FRONT_AND_BACK:
+ sf.sf6.cull_mode = BRW_CULLMODE_BOTH;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+ else
+ sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+#else
+ sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
+ sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+#endif
+
+ sf.sf6.line_width = CLAMP(brw->attribs.Raster->line_width, 1.0, 5.0) * (1<<1);
+
+ sf.sf6.line_endcap_aa_region_width = 1;
+ if (brw->attribs.Raster->line_smooth)
+ sf.sf6.aa_enable = 1;
+ else if (sf.sf6.line_width <= 0x2)
+ sf.sf6.line_width = 0;
+
+ sf.sf6.point_rast_rule = 1; /* opengl conventions */
+
+ sf.sf7.sprite_point = brw->attribs.Raster->point_sprite;
+ sf.sf7.point_size = CLAMP(brw->attribs.Raster->line_width, 1.0, 255.0) * (1<<3);
+ sf.sf7.use_point_size_state = !brw->attribs.Raster->point_size_per_vertex;
+
+ /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
+ */
+ sf.sf7.trifan_pv = 2;
+ sf.sf7.linestrip_pv = 1;
+ sf.sf7.tristrip_pv = 2;
+ sf.sf7.line_last_pixel_enable = 0;
+
+ /* Set bias for OpenGL rasterization rules:
+ */
+ sf.sf6.dest_org_vbias = 0x8;
+ sf.sf6.dest_org_hbias = 0x8;
+
+ brw->sf.state_gs_offset = brw_cache_data( &brw->cache[BRW_SF_UNIT], &sf );
+}
+
+
+const struct brw_tracked_state brw_sf_unit = {
+ .dirty = {
+ .brw = (BRW_NEW_RASTERIZER |
+ BRW_NEW_URB_FENCE),
+ .cache = (CACHE_NEW_SF_VP |
+ CACHE_NEW_SF_PROG)
+ },
+ .update = upload_sf_unit
+};
+
+
diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c
new file mode 100644
index 0000000000..86d877d7ef
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_shader_info.c
@@ -0,0 +1,48 @@
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "util/u_memory.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+
+
+/**
+ * XXX this obsolete new and no longer compiled.
+ */
+void brw_shader_info(const struct tgsi_token *tokens,
+ struct brw_shader_info *info )
+{
+ struct tgsi_parse_context parse;
+ int done = 0;
+
+ tgsi_parse_init( &parse, tokens );
+
+ while( !done &&
+ !tgsi_parse_end_of_tokens( &parse ) )
+ {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
+ unsigned last = decl->DeclarationRange.Last;
+
+ // Broken by crazy wpos init:
+ //assert( info->nr_regs[decl->Declaration.File] <= last);
+
+ info->nr_regs[decl->Declaration.File] = MAX2(info->nr_regs[decl->Declaration.File],
+ last+1);
+ break;
+ }
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ default:
+ done = 1;
+ break;
+ }
+ }
+
+ tgsi_parse_free (&parse);
+
+}
diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c
new file mode 100644
index 0000000000..af46cb546f
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_state.c
@@ -0,0 +1,469 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Zack Rusin <zack@tungstengraphics.com>
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_winsys.h"
+#include "util/u_memory.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "brw_draw.h"
+
+
+#define DUP( TYPE, VAL ) \
+do { \
+ struct TYPE *x = malloc(sizeof(*x)); \
+ memcpy(x, VAL, sizeof(*x) ); \
+ return x; \
+} while (0)
+
+/************************************************************************
+ * Blend
+ */
+static void *
+brw_create_blend_state(struct pipe_context *pipe,
+ const struct pipe_blend_state *blend)
+{
+ DUP( pipe_blend_state, blend );
+}
+
+static void brw_bind_blend_state(struct pipe_context *pipe,
+ void *blend)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.Blend = (struct pipe_blend_state*)blend;
+ brw->state.dirty.brw |= BRW_NEW_BLEND;
+}
+
+
+static void brw_delete_blend_state(struct pipe_context *pipe, void *blend)
+{
+ free(blend);
+}
+
+static void brw_set_blend_color( struct pipe_context *pipe,
+ const struct pipe_blend_color *blend_color )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.BlendColor = *blend_color;
+
+ brw->state.dirty.brw |= BRW_NEW_BLEND;
+}
+
+/************************************************************************
+ * Sampler
+ */
+
+static void *
+brw_create_sampler_state(struct pipe_context *pipe,
+ const struct pipe_sampler_state *sampler)
+{
+ DUP( pipe_sampler_state, sampler );
+}
+
+static void brw_bind_sampler_states(struct pipe_context *pipe,
+ unsigned num, void **sampler)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ assert(num <= PIPE_MAX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num == brw->num_samplers &&
+ !memcmp(brw->attribs.Samplers, sampler, num * sizeof(void *)))
+ return;
+
+ memcpy(brw->attribs.Samplers, sampler, num * sizeof(void *));
+ memset(&brw->attribs.Samplers[num], 0, (PIPE_MAX_SAMPLERS - num) *
+ sizeof(void *));
+
+ brw->num_samplers = num;
+
+ brw->state.dirty.brw |= BRW_NEW_SAMPLER;
+}
+
+static void brw_delete_sampler_state(struct pipe_context *pipe,
+ void *sampler)
+{
+ free(sampler);
+}
+
+
+/************************************************************************
+ * Depth stencil
+ */
+
+static void *
+brw_create_depth_stencil_state(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *depth_stencil)
+{
+ DUP( pipe_depth_stencil_alpha_state, depth_stencil );
+}
+
+static void brw_bind_depth_stencil_state(struct pipe_context *pipe,
+ void *depth_stencil)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil;
+
+ brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL;
+}
+
+static void brw_delete_depth_stencil_state(struct pipe_context *pipe,
+ void *depth_stencil)
+{
+ free(depth_stencil);
+}
+
+/************************************************************************
+ * Scissor
+ */
+static void brw_set_scissor_state( struct pipe_context *pipe,
+ const struct pipe_scissor_state *scissor )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) );
+ brw->state.dirty.brw |= BRW_NEW_SCISSOR;
+}
+
+
+/************************************************************************
+ * Stipple
+ */
+
+static void brw_set_polygon_stipple( struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple )
+{
+}
+
+
+/************************************************************************
+ * Fragment shader
+ */
+
+static void * brw_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *shader)
+{
+ struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program);
+
+ brw_fp->program.tokens = tgsi_dup_tokens(shader->tokens);
+ brw_fp->id = brw_context(pipe)->program_id++;
+
+ tgsi_scan_shader(shader->tokens, &brw_fp->info);
+
+#if 0
+ brw_shader_info(shader->tokens,
+ &brw_fp->info2);
+#endif
+
+ tgsi_dump(shader->tokens, 0);
+
+
+ return (void *)brw_fp;
+}
+
+static void brw_bind_fs_state(struct pipe_context *pipe, void *shader)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader;
+ brw->state.dirty.brw |= BRW_NEW_FS;
+}
+
+static void brw_delete_fs_state(struct pipe_context *pipe, void *shader)
+{
+ struct brw_fragment_program *brw_fp = (struct brw_fragment_program *) shader;
+
+ FREE((void *) brw_fp->program.tokens);
+ FREE(brw_fp);
+}
+
+
+/************************************************************************
+ * Vertex shader and other TNL state
+ */
+
+static void *brw_create_vs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *shader)
+{
+ struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program);
+
+ brw_vp->program.tokens = tgsi_dup_tokens(shader->tokens);
+ brw_vp->id = brw_context(pipe)->program_id++;
+
+ tgsi_scan_shader(shader->tokens, &brw_vp->info);
+
+#if 0
+ brw_shader_info(shader->tokens,
+ &brw_vp->info2);
+#endif
+ tgsi_dump(shader->tokens, 0);
+
+ return (void *)brw_vp;
+}
+
+static void brw_bind_vs_state(struct pipe_context *pipe, void *vs)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.VertexProgram = (struct brw_vertex_program *)vs;
+ brw->state.dirty.brw |= BRW_NEW_VS;
+
+ debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n");
+}
+
+static void brw_delete_vs_state(struct pipe_context *pipe, void *shader)
+{
+ struct brw_vertex_program *brw_vp = (struct brw_vertex_program *) shader;
+
+ FREE((void *) brw_vp->program.tokens);
+ FREE(brw_vp);
+}
+
+
+static void brw_set_clip_state( struct pipe_context *pipe,
+ const struct pipe_clip_state *clip )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.Clip = *clip;
+}
+
+
+static void brw_set_viewport_state( struct pipe_context *pipe,
+ const struct pipe_viewport_state *viewport )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.Viewport = *viewport; /* struct copy */
+ brw->state.dirty.brw |= BRW_NEW_VIEWPORT;
+
+ /* pass the viewport info to the draw module */
+ //draw_set_viewport_state(brw->draw, viewport);
+}
+
+
+static void brw_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct brw_context *brw = brw_context(pipe);
+ memcpy(brw->vb.vbo_array, buffers, count * sizeof(buffers[0]));
+}
+
+static void brw_set_vertex_elements(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *elements)
+{
+ /* flush ? */
+ struct brw_context *brw = brw_context(pipe);
+ uint i;
+
+ assert(count <= PIPE_MAX_ATTRIBS);
+
+ for (i = 0; i < count; i++) {
+ struct brw_vertex_element_state el;
+ memset(&el, 0, sizeof(el));
+
+ el.ve0.src_offset = elements[i].src_offset;
+ el.ve0.src_format = brw_translate_surface_format(elements[i].src_format);
+ el.ve0.valid = 1;
+ el.ve0.vertex_buffer_index = elements[i].vertex_buffer_index;
+
+ el.ve1.dst_offset = i * 4;
+
+ el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC;
+ el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC;
+ el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC;
+ el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC;
+
+ switch (elements[i].nr_components) {
+ case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0;
+ case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0;
+ case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT;
+ break;
+ }
+
+ brw->vb.inputs[i] = el;
+ }
+}
+
+
+
+/************************************************************************
+ * Constant buffers
+ */
+
+static void brw_set_constant_buffer(struct pipe_context *pipe,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *buf)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ assert(buf == 0 || index == 0);
+
+ brw->attribs.Constants[shader] = buf;
+ brw->state.dirty.brw |= BRW_NEW_CONSTANTS;
+}
+
+
+/************************************************************************
+ * Texture surfaces
+ */
+
+
+static void brw_set_sampler_textures(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_texture **texture)
+{
+ struct brw_context *brw = brw_context(pipe);
+ uint i;
+
+ assert(num <= PIPE_MAX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num == brw->num_textures &&
+ !memcmp(brw->attribs.Texture, texture, num *
+ sizeof(struct pipe_texture *)))
+ return;
+
+ for (i = 0; i < num; i++)
+ pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i],
+ texture[i]);
+
+ for (i = num; i < brw->num_textures; i++)
+ pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i],
+ NULL);
+
+ brw->num_textures = num;
+
+ brw->state.dirty.brw |= BRW_NEW_TEXTURE;
+}
+
+
+/************************************************************************
+ * Render targets, etc
+ */
+
+static void brw_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.FrameBuffer = *fb; /* struct copy */
+
+ brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER;
+}
+
+
+
+/************************************************************************
+ * Rasterizer state
+ */
+
+static void *
+brw_create_rasterizer_state(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *rasterizer)
+{
+ DUP(pipe_rasterizer_state, rasterizer);
+}
+
+static void brw_bind_rasterizer_state( struct pipe_context *pipe,
+ void *setup )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->attribs.Raster = (struct pipe_rasterizer_state *)setup;
+
+ /* Also pass-through to draw module:
+ */
+ //draw_set_rasterizer_state(brw->draw, setup);
+
+ brw->state.dirty.brw |= BRW_NEW_RASTERIZER;
+}
+
+static void brw_delete_rasterizer_state(struct pipe_context *pipe,
+ void *setup)
+{
+ free(setup);
+}
+
+
+
+void
+brw_init_state_functions( struct brw_context *brw )
+{
+ brw->pipe.create_blend_state = brw_create_blend_state;
+ brw->pipe.bind_blend_state = brw_bind_blend_state;
+ brw->pipe.delete_blend_state = brw_delete_blend_state;
+
+ brw->pipe.create_sampler_state = brw_create_sampler_state;
+ brw->pipe.bind_sampler_states = brw_bind_sampler_states;
+ brw->pipe.delete_sampler_state = brw_delete_sampler_state;
+
+ brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state;
+ brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state;
+ brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state;
+
+ brw->pipe.create_rasterizer_state = brw_create_rasterizer_state;
+ brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state;
+ brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state;
+ brw->pipe.create_fs_state = brw_create_fs_state;
+ brw->pipe.bind_fs_state = brw_bind_fs_state;
+ brw->pipe.delete_fs_state = brw_delete_fs_state;
+ brw->pipe.create_vs_state = brw_create_vs_state;
+ brw->pipe.bind_vs_state = brw_bind_vs_state;
+ brw->pipe.delete_vs_state = brw_delete_vs_state;
+
+ brw->pipe.set_blend_color = brw_set_blend_color;
+ brw->pipe.set_clip_state = brw_set_clip_state;
+ brw->pipe.set_constant_buffer = brw_set_constant_buffer;
+ brw->pipe.set_framebuffer_state = brw_set_framebuffer_state;
+
+// brw->pipe.set_feedback_state = brw_set_feedback_state;
+// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer;
+
+ brw->pipe.set_polygon_stipple = brw_set_polygon_stipple;
+ brw->pipe.set_scissor_state = brw_set_scissor_state;
+ brw->pipe.set_sampler_textures = brw_set_sampler_textures;
+ brw->pipe.set_viewport_state = brw_set_viewport_state;
+ brw->pipe.set_vertex_buffers = brw_set_vertex_buffers;
+ brw->pipe.set_vertex_elements = brw_set_vertex_elements;
+}
diff --git a/src/gallium/drivers/i965simple/brw_state.h b/src/gallium/drivers/i965simple/brw_state.h
new file mode 100644
index 0000000000..de0a6371b8
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_state.h
@@ -0,0 +1,151 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_STATE_H
+#define BRW_STATE_H
+
+#include "brw_context.h"
+#include "brw_winsys.h"
+
+
+const struct brw_tracked_state brw_blend_constant_color;
+const struct brw_tracked_state brw_cc_unit;
+const struct brw_tracked_state brw_cc_vp;
+const struct brw_tracked_state brw_clip_prog;
+const struct brw_tracked_state brw_clip_unit;
+const struct brw_tracked_state brw_constant_buffer_state;
+const struct brw_tracked_state brw_constant_buffer;
+const struct brw_tracked_state brw_curbe_offsets;
+const struct brw_tracked_state brw_invarient_state;
+const struct brw_tracked_state brw_gs_prog;
+const struct brw_tracked_state brw_gs_unit;
+const struct brw_tracked_state brw_drawing_rect;
+const struct brw_tracked_state brw_line_stipple;
+const struct brw_tracked_state brw_pipelined_state_pointers;
+const struct brw_tracked_state brw_binding_table_pointers;
+const struct brw_tracked_state brw_depthbuffer;
+const struct brw_tracked_state brw_polygon_stipple_offset;
+const struct brw_tracked_state brw_polygon_stipple;
+const struct brw_tracked_state brw_program_parameters;
+const struct brw_tracked_state brw_recalculate_urb_fence;
+const struct brw_tracked_state brw_sf_prog;
+const struct brw_tracked_state brw_sf_unit;
+const struct brw_tracked_state brw_sf_vp;
+const struct brw_tracked_state brw_state_base_address;
+const struct brw_tracked_state brw_urb_fence;
+const struct brw_tracked_state brw_vertex_state;
+const struct brw_tracked_state brw_vs_prog;
+const struct brw_tracked_state brw_vs_unit;
+const struct brw_tracked_state brw_wm_prog;
+const struct brw_tracked_state brw_wm_samplers;
+const struct brw_tracked_state brw_wm_surfaces;
+const struct brw_tracked_state brw_wm_unit;
+
+const struct brw_tracked_state brw_psp_urb_cbs;
+
+const struct brw_tracked_state brw_active_vertprog;
+const struct brw_tracked_state brw_tnl_vertprog;
+const struct brw_tracked_state brw_pipe_control;
+
+const struct brw_tracked_state brw_clear_surface_cache;
+const struct brw_tracked_state brw_clear_batch_cache;
+
+/***********************************************************************
+ * brw_state_cache.c
+ */
+unsigned brw_cache_data(struct brw_cache *cache,
+ const void *data );
+
+unsigned brw_cache_data_sz(struct brw_cache *cache,
+ const void *data,
+ unsigned data_sz);
+
+unsigned brw_upload_cache( struct brw_cache *cache,
+ const void *key,
+ unsigned key_sz,
+ const void *data,
+ unsigned data_sz,
+ const void *aux,
+ void *aux_return );
+
+boolean brw_search_cache( struct brw_cache *cache,
+ const void *key,
+ unsigned key_size,
+ void *aux_return,
+ unsigned *offset_return);
+
+void brw_init_caches( struct brw_context *brw );
+void brw_destroy_caches( struct brw_context *brw );
+
+static inline struct pipe_buffer *brw_cache_buffer(struct brw_context *brw,
+ enum brw_cache_id id)
+{
+ return brw->cache[id].pool->buffer;
+}
+
+/***********************************************************************
+ * brw_state_batch.c
+ */
+#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
+
+boolean brw_cached_batch_struct( struct brw_context *brw,
+ const void *data,
+ unsigned sz );
+
+void brw_destroy_batch_cache( struct brw_context *brw );
+
+
+/***********************************************************************
+ * brw_state_pool.c
+ */
+void brw_init_pools( struct brw_context *brw );
+void brw_destroy_pools( struct brw_context *brw );
+
+boolean brw_pool_alloc( struct brw_mem_pool *pool,
+ unsigned size,
+ unsigned alignment,
+ unsigned *offset_return);
+
+void brw_pool_fence( struct brw_context *brw,
+ struct brw_mem_pool *pool,
+ unsigned fence );
+
+
+void brw_pool_check_wrap( struct brw_context *brw,
+ struct brw_mem_pool *pool );
+
+void brw_clear_all_caches( struct brw_context *brw );
+void brw_invalidate_pools( struct brw_context *brw );
+void brw_clear_batch_cache_flush( struct brw_context *brw );
+
+
+#endif
diff --git a/src/gallium/drivers/i965simple/brw_state_batch.c b/src/gallium/drivers/i965simple/brw_state_batch.c
new file mode 100644
index 0000000000..43a1c89fc4
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_state_batch.c
@@ -0,0 +1,113 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_state.h"
+#include "brw_winsys.h"
+
+#include "util/u_memory.h"
+
+/* A facility similar to the data caching code above, which aims to
+ * prevent identical commands being issued repeatedly.
+ */
+boolean brw_cached_batch_struct( struct brw_context *brw,
+ const void *data,
+ unsigned sz )
+{
+ struct brw_cached_batch_item *item = brw->cached_batch_items;
+ struct header *newheader = (struct header *)data;
+
+ if (brw->emit_state_always) {
+ brw_batchbuffer_data(brw->winsys, data, sz);
+ return TRUE;
+ }
+
+ while (item) {
+ if (item->header->opcode == newheader->opcode) {
+ if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
+ return FALSE;
+ if (item->sz != sz) {
+ FREE(item->header);
+ item->header = MALLOC(sz);
+ item->sz = sz;
+ }
+ goto emit;
+ }
+ item = item->next;
+ }
+
+ assert(!item);
+ item = CALLOC_STRUCT(brw_cached_batch_item);
+ item->header = MALLOC(sz);
+ item->sz = sz;
+ item->next = brw->cached_batch_items;
+ brw->cached_batch_items = item;
+
+emit:
+ memcpy(item->header, newheader, sz);
+ brw_batchbuffer_data(brw->winsys, data, sz);
+ return TRUE;
+}
+
+static void clear_batch_cache( struct brw_context *brw )
+{
+ struct brw_cached_batch_item *item = brw->cached_batch_items;
+
+ while (item) {
+ struct brw_cached_batch_item *next = item->next;
+ free((void *)item->header);
+ free(item);
+ item = next;
+ }
+
+ brw->cached_batch_items = NULL;
+
+
+ brw_clear_all_caches(brw);
+
+ brw_invalidate_pools(brw);
+}
+
+void brw_clear_batch_cache_flush( struct brw_context *brw )
+{
+ clear_batch_cache(brw);
+
+/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */
+
+ brw->state.dirty.brw |= ~0;
+ brw->state.dirty.cache |= ~0;
+}
+
+
+
+void brw_destroy_batch_cache( struct brw_context *brw )
+{
+ clear_batch_cache(brw);
+}
diff --git a/src/gallium/drivers/i965simple/brw_state_cache.c b/src/gallium/drivers/i965simple/brw_state_cache.c
new file mode 100644
index 0000000000..094248fa69
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_state_cache.c
@@ -0,0 +1,443 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_state.h"
+
+#include "brw_wm.h"
+#include "brw_vs.h"
+#include "brw_clip.h"
+#include "brw_sf.h"
+#include "brw_gs.h"
+
+#include "util/u_memory.h"
+
+
+
+/***********************************************************************
+ * Check cache for uploaded version of struct, else upload new one.
+ * Fail when memory is exhausted.
+ *
+ * XXX: FIXME: Currently search is so slow it would be quicker to
+ * regenerate the data every time...
+ */
+
+static unsigned hash_key( const void *key, unsigned key_size )
+{
+ unsigned *ikey = (unsigned *)key;
+ unsigned hash = 0, i;
+
+ assert(key_size % 4 == 0);
+
+ /* I'm sure this can be improved on:
+ */
+ for (i = 0; i < key_size/4; i++)
+ hash ^= ikey[i];
+
+ return hash;
+}
+
+static struct brw_cache_item *search_cache( struct brw_cache *cache,
+ unsigned hash,
+ const void *key,
+ unsigned key_size)
+{
+ struct brw_cache_item *c;
+
+ for (c = cache->items[hash % cache->size]; c; c = c->next) {
+ if (c->hash == hash &&
+ c->key_size == key_size &&
+ memcmp(c->key, key, key_size) == 0)
+ return c;
+ }
+
+ return NULL;
+}
+
+
+static void rehash( struct brw_cache *cache )
+{
+ struct brw_cache_item **items;
+ struct brw_cache_item *c, *next;
+ unsigned size, i;
+
+ size = cache->size * 3;
+ items = (struct brw_cache_item**) MALLOC(size * sizeof(*items));
+ memset(items, 0, size * sizeof(*items));
+
+ for (i = 0; i < cache->size; i++)
+ for (c = cache->items[i]; c; c = next) {
+ next = c->next;
+ c->next = items[c->hash % size];
+ items[c->hash % size] = c;
+ }
+
+ FREE(cache->items);
+ cache->items = items;
+ cache->size = size;
+}
+
+
+boolean brw_search_cache( struct brw_cache *cache,
+ const void *key,
+ unsigned key_size,
+ void *aux_return,
+ unsigned *offset_return)
+{
+ struct brw_cache_item *item;
+ unsigned addr = 0;
+ unsigned hash = hash_key(key, key_size);
+
+ item = search_cache(cache, hash, key, key_size);
+
+ if (item) {
+ if (aux_return)
+ *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+
+ *offset_return = addr = item->offset;
+ }
+
+ if (item == NULL || addr != cache->last_addr) {
+ cache->brw->state.dirty.cache |= 1<<cache->id;
+ cache->last_addr = addr;
+ }
+
+ return item != NULL;
+}
+
+unsigned brw_upload_cache( struct brw_cache *cache,
+ const void *key,
+ unsigned key_size,
+ const void *data,
+ unsigned data_size,
+ const void *aux,
+ void *aux_return )
+{
+ unsigned offset;
+ struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
+ unsigned hash = hash_key(key, key_size);
+ void *tmp = MALLOC(key_size + cache->aux_size);
+
+ if (!brw_pool_alloc(cache->pool, data_size, 1 << 6, &offset)) {
+ /* Should not be possible:
+ */
+ debug_printf("brw_pool_alloc failed\n");
+ exit(1);
+ }
+
+ memcpy(tmp, key, key_size);
+
+ if (cache->aux_size)
+ memcpy(tmp+key_size, aux, cache->aux_size);
+
+ item->key = tmp;
+ item->hash = hash;
+ item->key_size = key_size;
+ item->offset = offset;
+ item->data_size = data_size;
+
+ if (++cache->n_items > cache->size * 1.5)
+ rehash(cache);
+
+ hash %= cache->size;
+ item->next = cache->items[hash];
+ cache->items[hash] = item;
+
+ if (aux_return) {
+ assert(cache->aux_size);
+ *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+ }
+
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("upload %s: %d bytes to pool buffer %p offset %x\n",
+ cache->name,
+ data_size,
+ (void*)cache->pool->buffer,
+ offset);
+
+ /* Copy data to the buffer:
+ */
+ cache->brw->winsys->buffer_subdata_typed(cache->brw->winsys,
+ cache->pool->buffer,
+ offset,
+ data_size,
+ data,
+ cache->id);
+
+ cache->brw->state.dirty.cache |= 1<<cache->id;
+ cache->last_addr = offset;
+
+ return offset;
+}
+
+/* This doesn't really work with aux data. Use search/upload instead
+ */
+unsigned brw_cache_data_sz(struct brw_cache *cache,
+ const void *data,
+ unsigned data_size)
+{
+ unsigned addr;
+
+ if (!brw_search_cache(cache, data, data_size, NULL, &addr)) {
+ addr = brw_upload_cache(cache,
+ data, data_size,
+ data, data_size,
+ NULL, NULL);
+ }
+
+ return addr;
+}
+
+unsigned brw_cache_data(struct brw_cache *cache,
+ const void *data)
+{
+ return brw_cache_data_sz(cache, data, cache->key_size);
+}
+
+enum pool_type {
+ DW_SURFACE_STATE,
+ DW_GENERAL_STATE
+};
+
+static void brw_init_cache( struct brw_context *brw,
+ const char *name,
+ unsigned id,
+ unsigned key_size,
+ unsigned aux_size,
+ enum pool_type pool_type)
+{
+ struct brw_cache *cache = &brw->cache[id];
+ cache->brw = brw;
+ cache->id = id;
+ cache->name = name;
+ cache->items = NULL;
+
+ cache->size = 7;
+ cache->n_items = 0;
+ cache->items = (struct brw_cache_item **)
+ CALLOC(cache->size, sizeof(struct brw_cache_item));
+
+
+ cache->key_size = key_size;
+ cache->aux_size = aux_size;
+ switch (pool_type) {
+ case DW_GENERAL_STATE: cache->pool = &brw->pool[BRW_GS_POOL]; break;
+ case DW_SURFACE_STATE: cache->pool = &brw->pool[BRW_SS_POOL]; break;
+ default: assert(0); break;
+ }
+}
+
+void brw_init_caches( struct brw_context *brw )
+{
+
+ brw_init_cache(brw,
+ "CC_VP",
+ BRW_CC_VP,
+ sizeof(struct brw_cc_viewport),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "CC_UNIT",
+ BRW_CC_UNIT,
+ sizeof(struct brw_cc_unit_state),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "WM_PROG",
+ BRW_WM_PROG,
+ sizeof(struct brw_wm_prog_key),
+ sizeof(struct brw_wm_prog_data),
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "SAMPLER_DEFAULT_COLOR",
+ BRW_SAMPLER_DEFAULT_COLOR,
+ sizeof(struct brw_sampler_default_color),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "SAMPLER",
+ BRW_SAMPLER,
+ 0, /* variable key/data size */
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "WM_UNIT",
+ BRW_WM_UNIT,
+ sizeof(struct brw_wm_unit_state),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "SF_PROG",
+ BRW_SF_PROG,
+ sizeof(struct brw_sf_prog_key),
+ sizeof(struct brw_sf_prog_data),
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "SF_VP",
+ BRW_SF_VP,
+ sizeof(struct brw_sf_viewport),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "SF_UNIT",
+ BRW_SF_UNIT,
+ sizeof(struct brw_sf_unit_state),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "VS_UNIT",
+ BRW_VS_UNIT,
+ sizeof(struct brw_vs_unit_state),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "VS_PROG",
+ BRW_VS_PROG,
+ sizeof(struct brw_vs_prog_key),
+ sizeof(struct brw_vs_prog_data),
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "CLIP_UNIT",
+ BRW_CLIP_UNIT,
+ sizeof(struct brw_clip_unit_state),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "CLIP_PROG",
+ BRW_CLIP_PROG,
+ sizeof(struct brw_clip_prog_key),
+ sizeof(struct brw_clip_prog_data),
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "GS_UNIT",
+ BRW_GS_UNIT,
+ sizeof(struct brw_gs_unit_state),
+ 0,
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "GS_PROG",
+ BRW_GS_PROG,
+ sizeof(struct brw_gs_prog_key),
+ sizeof(struct brw_gs_prog_data),
+ DW_GENERAL_STATE);
+
+ brw_init_cache(brw,
+ "SS_SURFACE",
+ BRW_SS_SURFACE,
+ sizeof(struct brw_surface_state),
+ 0,
+ DW_SURFACE_STATE);
+
+ brw_init_cache(brw,
+ "SS_SURF_BIND",
+ BRW_SS_SURF_BIND,
+ sizeof(struct brw_surface_binding_table),
+ 0,
+ DW_SURFACE_STATE);
+}
+
+
+/* When we lose hardware context, need to invalidate the surface cache
+ * as these structs must be explicitly re-uploaded. They are subject
+ * to fixup by the memory manager as they contain absolute agp
+ * offsets, so we need to ensure there is a fresh version of the
+ * struct available to receive the fixup.
+ *
+ * XXX: Need to ensure that there aren't two versions of a surface or
+ * bufferobj with different backing data active in the same buffer at
+ * once? Otherwise the cache could confuse them. Maybe better not to
+ * cache at all?
+ *
+ * --> Isn't this the same as saying need to ensure batch is flushed
+ * before new data is uploaded to an existing buffer? We
+ * already try to make sure of that.
+ */
+static void clear_cache( struct brw_cache *cache )
+{
+ struct brw_cache_item *c, *next;
+ unsigned i;
+
+ for (i = 0; i < cache->size; i++) {
+ for (c = cache->items[i]; c; c = next) {
+ next = c->next;
+ free((void *)c->key);
+ free(c);
+ }
+ cache->items[i] = NULL;
+ }
+
+ cache->n_items = 0;
+}
+
+void brw_clear_all_caches( struct brw_context *brw )
+{
+ int i;
+
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("%s\n", __FUNCTION__);
+
+ for (i = 0; i < BRW_MAX_CACHE; i++)
+ clear_cache(&brw->cache[i]);
+
+ if (brw->curbe.last_buf) {
+ FREE(brw->curbe.last_buf);
+ brw->curbe.last_buf = NULL;
+ }
+
+ brw->state.dirty.brw |= ~0;
+ brw->state.dirty.cache |= ~0;
+}
+
+
+
+
+
+void brw_destroy_caches( struct brw_context *brw )
+{
+ unsigned i;
+
+ for (i = 0; i < BRW_MAX_CACHE; i++)
+ clear_cache(&brw->cache[i]);
+}
diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c
new file mode 100644
index 0000000000..007dc8f9de
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_state_pool.c
@@ -0,0 +1,138 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+/** @file brw_state_pool.c
+ * Implements the state pool allocator.
+ *
+ * For the 965, we create two state pools for state cache entries. Objects
+ * will be allocated into the pools depending on which state base address
+ * their pointer is relative to in other 965 state.
+ *
+ * The state pools are relatively simple: As objects are allocated, increment
+ * the offset to allocate space. When the pool is "full" (rather, close to
+ * full), we reset the pool and reset the state cache entries that point into
+ * the pool.
+ */
+
+#include "pipe/p_winsys.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_inlines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+
+boolean brw_pool_alloc( struct brw_mem_pool *pool,
+ unsigned size,
+ unsigned alignment,
+ unsigned *offset_return)
+{
+ unsigned fixup = align(pool->offset, alignment) - pool->offset;
+
+ size = align(size, 4);
+
+ if (pool->offset + fixup + size >= pool->size) {
+ debug_printf("%s failed\n", __FUNCTION__);
+ assert(0);
+ exit(0);
+ }
+
+ pool->offset += fixup;
+ *offset_return = pool->offset;
+ pool->offset += size;
+
+ return TRUE;
+}
+
+static
+void brw_invalidate_pool( struct brw_mem_pool *pool )
+{
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("\n\n\n %s \n\n\n", __FUNCTION__);
+
+ pool->offset = 0;
+
+ brw_clear_all_caches(pool->brw);
+}
+
+
+static void brw_init_pool( struct brw_context *brw,
+ unsigned pool_id,
+ unsigned size )
+{
+ struct brw_mem_pool *pool = &brw->pool[pool_id];
+
+ pool->size = size;
+ pool->brw = brw;
+
+ pool->buffer = pipe_buffer_create(brw->pipe.screen,
+ 4096,
+ 0 /* DRM_BO_FLAG_MEM_TT */,
+ size);
+}
+
+static void brw_destroy_pool( struct brw_context *brw,
+ unsigned pool_id )
+{
+ struct brw_mem_pool *pool = &brw->pool[pool_id];
+
+ pipe_buffer_reference( pool->brw->pipe.screen,
+ &pool->buffer,
+ NULL );
+}
+
+
+void brw_pool_check_wrap( struct brw_context *brw,
+ struct brw_mem_pool *pool )
+{
+ if (pool->offset > (pool->size * 3) / 4) {
+ brw->state.dirty.brw |= BRW_NEW_SCENE;
+ }
+
+}
+
+void brw_init_pools( struct brw_context *brw )
+{
+ brw_init_pool(brw, BRW_GS_POOL, 0x80000);
+ brw_init_pool(brw, BRW_SS_POOL, 0x80000);
+}
+
+void brw_destroy_pools( struct brw_context *brw )
+{
+ brw_destroy_pool(brw, BRW_GS_POOL);
+ brw_destroy_pool(brw, BRW_SS_POOL);
+}
+
+
+void brw_invalidate_pools( struct brw_context *brw )
+{
+ brw_invalidate_pool(&brw->pool[BRW_GS_POOL]);
+ brw_invalidate_pool(&brw->pool[BRW_SS_POOL]);
+}
diff --git a/src/gallium/drivers/i965simple/brw_state_upload.c b/src/gallium/drivers/i965simple/brw_state_upload.c
new file mode 100644
index 0000000000..bac9161b5f
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_state_upload.c
@@ -0,0 +1,202 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+
+#include "util/u_memory.h"
+
+/* This is used to initialize brw->state.atoms[]. We could use this
+ * list directly except for a single atom, brw_constant_buffer, which
+ * has a .dirty value which changes according to the parameters of the
+ * current fragment and vertex programs, and so cannot be a static
+ * value.
+ */
+const struct brw_tracked_state *atoms[] =
+{
+ &brw_vs_prog,
+ &brw_gs_prog,
+ &brw_clip_prog,
+ &brw_sf_prog,
+ &brw_wm_prog,
+
+ /* Once all the programs are done, we know how large urb entry
+ * sizes need to be and can decide if we need to change the urb
+ * layout.
+ */
+ &brw_curbe_offsets,
+ &brw_recalculate_urb_fence,
+
+
+ &brw_cc_vp,
+ &brw_cc_unit,
+
+ &brw_wm_surfaces, /* must do before samplers */
+ &brw_wm_samplers,
+
+ &brw_wm_unit,
+ &brw_sf_vp,
+ &brw_sf_unit,
+ &brw_vs_unit, /* always required, enabled or not */
+ &brw_clip_unit,
+ &brw_gs_unit,
+
+ /* Command packets:
+ */
+ &brw_invarient_state,
+ &brw_state_base_address,
+ &brw_pipe_control,
+
+ &brw_binding_table_pointers,
+ &brw_blend_constant_color,
+
+ &brw_drawing_rect,
+ &brw_depthbuffer,
+
+ &brw_polygon_stipple,
+ &brw_line_stipple,
+
+ &brw_psp_urb_cbs,
+
+ &brw_constant_buffer
+};
+
+
+void brw_init_state( struct brw_context *brw )
+{
+ brw_init_pools(brw);
+ brw_init_caches(brw);
+
+ brw->state.dirty.brw = ~0;
+ brw->emit_state_always = 0;
+}
+
+
+void brw_destroy_state( struct brw_context *brw )
+{
+ brw_destroy_caches(brw);
+ brw_destroy_batch_cache(brw);
+ brw_destroy_pools(brw);
+}
+
+/***********************************************************************
+ */
+
+static boolean check_state( const struct brw_state_flags *a,
+ const struct brw_state_flags *b )
+{
+ return ((a->brw & b->brw) ||
+ (a->cache & b->cache));
+}
+
+static void accumulate_state( struct brw_state_flags *a,
+ const struct brw_state_flags *b )
+{
+ a->brw |= b->brw;
+ a->cache |= b->cache;
+}
+
+
+static void xor_states( struct brw_state_flags *result,
+ const struct brw_state_flags *a,
+ const struct brw_state_flags *b )
+{
+ result->brw = a->brw ^ b->brw;
+ result->cache = a->cache ^ b->cache;
+}
+
+
+/***********************************************************************
+ * Emit all state:
+ */
+void brw_validate_state( struct brw_context *brw )
+{
+ struct brw_state_flags *state = &brw->state.dirty;
+ unsigned i;
+
+ if (brw->emit_state_always)
+ state->brw |= ~0;
+
+ if (state->cache == 0 &&
+ state->brw == 0)
+ return;
+
+ if (brw->state.dirty.brw & BRW_NEW_SCENE)
+ brw_clear_batch_cache_flush(brw);
+
+ if (BRW_DEBUG) {
+ /* Debug version which enforces various sanity checks on the
+ * state flags which are generated and checked to help ensure
+ * state atoms are ordered correctly in the list.
+ */
+ struct brw_state_flags examined, prev;
+ memset(&examined, 0, sizeof(examined));
+ prev = *state;
+
+ for (i = 0; i < Elements(atoms); i++) {
+ const struct brw_tracked_state *atom = atoms[i];
+ struct brw_state_flags generated;
+
+ assert(atom->dirty.brw ||
+ atom->dirty.cache);
+ assert(atom->update);
+
+ if (check_state(state, &atom->dirty)) {
+ atom->update( brw );
+ }
+
+ accumulate_state(&examined, &atom->dirty);
+
+ /* generated = (prev ^ state)
+ * if (examined & generated)
+ * fail;
+ */
+ xor_states(&generated, &prev, state);
+ assert(!check_state(&examined, &generated));
+ prev = *state;
+ }
+ }
+ else {
+ for (i = 0; i < Elements(atoms); i++) {
+ const struct brw_tracked_state *atom = atoms[i];
+
+ assert(atom->dirty.brw ||
+ atom->dirty.cache);
+ assert(atom->update);
+
+ if (check_state(state, &atom->dirty))
+ atom->update( brw );
+ }
+ }
+
+ memset(state, 0, sizeof(*state));
+}
diff --git a/src/gallium/drivers/i965simple/brw_structs.h b/src/gallium/drivers/i965simple/brw_structs.h
new file mode 100644
index 0000000000..bbb087e95d
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_structs.h
@@ -0,0 +1,1348 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_STRUCTS_H
+#define BRW_STRUCTS_H
+
+#include "pipe/p_compiler.h"
+
+/* Command packets:
+ */
+struct header
+{
+ unsigned length:16;
+ unsigned opcode:16;
+};
+
+
+union header_union
+{
+ struct header bits;
+ unsigned dword;
+};
+
+struct brw_3d_control
+{
+ struct
+ {
+ unsigned length:8;
+ unsigned notify_enable:1;
+ unsigned pad:3;
+ unsigned wc_flush_enable:1;
+ unsigned depth_stall_enable:1;
+ unsigned operation:2;
+ unsigned opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned pad:2;
+ unsigned dest_addr_type:1;
+ unsigned dest_addr:29;
+ } dest;
+
+ unsigned dword2;
+ unsigned dword3;
+};
+
+
+struct brw_3d_primitive
+{
+ struct
+ {
+ unsigned length:8;
+ unsigned pad:2;
+ unsigned topology:5;
+ unsigned indexed:1;
+ unsigned opcode:16;
+ } header;
+
+ unsigned verts_per_instance;
+ unsigned start_vert_location;
+ unsigned instance_count;
+ unsigned start_instance_location;
+ unsigned base_vert_location;
+};
+
+/* These seem to be passed around as function args, so it works out
+ * better to keep them as #defines:
+ */
+#define BRW_FLUSH_READ_CACHE 0x1
+#define BRW_FLUSH_STATE_CACHE 0x2
+#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4
+#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8
+
+struct brw_mi_flush
+{
+ unsigned flags:4;
+ unsigned pad:12;
+ unsigned opcode:16;
+};
+
+struct brw_vf_statistics
+{
+ unsigned statistics_enable:1;
+ unsigned pad:15;
+ unsigned opcode:16;
+};
+
+
+
+struct brw_binding_table_pointers
+{
+ struct header header;
+ unsigned vs;
+ unsigned gs;
+ unsigned clp;
+ unsigned sf;
+ unsigned wm;
+};
+
+
+struct brw_blend_constant_color
+{
+ struct header header;
+ float blend_constant_color[4];
+};
+
+
+struct brw_depthbuffer
+{
+ union header_union header;
+
+ union {
+ struct {
+ unsigned pitch:18;
+ unsigned format:3;
+ unsigned pad:4;
+ unsigned depth_offset_disable:1;
+ unsigned tile_walk:1;
+ unsigned tiled_surface:1;
+ unsigned pad2:1;
+ unsigned surface_type:3;
+ } bits;
+ unsigned dword;
+ } dword1;
+
+ unsigned dword2_base_addr;
+
+ union {
+ struct {
+ unsigned pad:1;
+ unsigned mipmap_layout:1;
+ unsigned lod:4;
+ unsigned width:13;
+ unsigned height:13;
+ } bits;
+ unsigned dword;
+ } dword3;
+
+ union {
+ struct {
+ unsigned pad:12;
+ unsigned min_array_element:9;
+ unsigned depth:11;
+ } bits;
+ unsigned dword;
+ } dword4;
+};
+
+struct brw_drawrect
+{
+ struct header header;
+ unsigned xmin:16;
+ unsigned ymin:16;
+ unsigned xmax:16;
+ unsigned ymax:16;
+ unsigned xorg:16;
+ unsigned yorg:16;
+};
+
+
+
+
+struct brw_global_depth_offset_clamp
+{
+ struct header header;
+ float depth_offset_clamp;
+};
+
+struct brw_indexbuffer
+{
+ union {
+ struct
+ {
+ unsigned length:8;
+ unsigned index_format:2;
+ unsigned cut_index_enable:1;
+ unsigned pad:5;
+ unsigned opcode:16;
+ } bits;
+ unsigned dword;
+
+ } header;
+
+ unsigned buffer_start;
+ unsigned buffer_end;
+};
+
+
+struct brw_line_stipple
+{
+ struct header header;
+
+ struct
+ {
+ unsigned pattern:16;
+ unsigned pad:16;
+ } bits0;
+
+ struct
+ {
+ unsigned repeat_count:9;
+ unsigned pad:7;
+ unsigned inverse_repeat_count:16;
+ } bits1;
+};
+
+
+struct brw_pipelined_state_pointers
+{
+ struct header header;
+
+ struct {
+ unsigned pad:5;
+ unsigned offset:27;
+ } vs;
+
+ struct
+ {
+ unsigned enable:1;
+ unsigned pad:4;
+ unsigned offset:27;
+ } gs;
+
+ struct
+ {
+ unsigned enable:1;
+ unsigned pad:4;
+ unsigned offset:27;
+ } clp;
+
+ struct
+ {
+ unsigned pad:5;
+ unsigned offset:27;
+ } sf;
+
+ struct
+ {
+ unsigned pad:5;
+ unsigned offset:27;
+ } wm;
+
+ struct
+ {
+ unsigned pad:5;
+ unsigned offset:27; /* KW: check me! */
+ } cc;
+};
+
+
+struct brw_polygon_stipple_offset
+{
+ struct header header;
+
+ struct {
+ unsigned y_offset:5;
+ unsigned pad:3;
+ unsigned x_offset:5;
+ unsigned pad0:19;
+ } bits0;
+};
+
+
+
+struct brw_polygon_stipple
+{
+ struct header header;
+ unsigned stipple[32];
+};
+
+
+
+struct brw_pipeline_select
+{
+ struct
+ {
+ unsigned pipeline_select:1;
+ unsigned pad:15;
+ unsigned opcode:16;
+ } header;
+};
+
+
+struct brw_pipe_control
+{
+ struct
+ {
+ unsigned length:8;
+ unsigned notify_enable:1;
+ unsigned pad:2;
+ unsigned instruction_state_cache_flush_enable:1;
+ unsigned write_cache_flush_enable:1;
+ unsigned depth_stall_enable:1;
+ unsigned post_sync_operation:2;
+
+ unsigned opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned pad:2;
+ unsigned dest_addr_type:1;
+ unsigned dest_addr:29;
+ } bits1;
+
+ unsigned data0;
+ unsigned data1;
+};
+
+
+struct brw_urb_fence
+{
+ struct
+ {
+ unsigned length:8;
+ unsigned vs_realloc:1;
+ unsigned gs_realloc:1;
+ unsigned clp_realloc:1;
+ unsigned sf_realloc:1;
+ unsigned vfe_realloc:1;
+ unsigned cs_realloc:1;
+ unsigned pad:2;
+ unsigned opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned vs_fence:10;
+ unsigned gs_fence:10;
+ unsigned clp_fence:10;
+ unsigned pad:2;
+ } bits0;
+
+ struct
+ {
+ unsigned sf_fence:10;
+ unsigned vf_fence:10;
+ unsigned cs_fence:10;
+ unsigned pad:2;
+ } bits1;
+};
+
+struct brw_constant_buffer_state /* previously brw_command_streamer */
+{
+ struct header header;
+
+ struct
+ {
+ unsigned nr_urb_entries:3;
+ unsigned pad:1;
+ unsigned urb_entry_size:5;
+ unsigned pad0:23;
+ } bits0;
+};
+
+struct brw_constant_buffer
+{
+ struct
+ {
+ unsigned length:8;
+ unsigned valid:1;
+ unsigned pad:7;
+ unsigned opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned buffer_length:6;
+ unsigned buffer_address:26;
+ } bits0;
+};
+
+struct brw_state_base_address
+{
+ struct header header;
+
+ struct
+ {
+ unsigned modify_enable:1;
+ unsigned pad:4;
+ unsigned general_state_address:27;
+ } bits0;
+
+ struct
+ {
+ unsigned modify_enable:1;
+ unsigned pad:4;
+ unsigned surface_state_address:27;
+ } bits1;
+
+ struct
+ {
+ unsigned modify_enable:1;
+ unsigned pad:4;
+ unsigned indirect_object_state_address:27;
+ } bits2;
+
+ struct
+ {
+ unsigned modify_enable:1;
+ unsigned pad:11;
+ unsigned general_state_upper_bound:20;
+ } bits3;
+
+ struct
+ {
+ unsigned modify_enable:1;
+ unsigned pad:11;
+ unsigned indirect_object_state_upper_bound:20;
+ } bits4;
+};
+
+struct brw_state_prefetch
+{
+ struct header header;
+
+ struct
+ {
+ unsigned prefetch_count:3;
+ unsigned pad:3;
+ unsigned prefetch_pointer:26;
+ } bits0;
+};
+
+struct brw_system_instruction_pointer
+{
+ struct header header;
+
+ struct
+ {
+ unsigned pad:4;
+ unsigned system_instruction_pointer:28;
+ } bits0;
+};
+
+
+
+
+/* State structs for the various fixed function units:
+ */
+
+
+struct thread0
+{
+ unsigned pad0:1;
+ unsigned grf_reg_count:3;
+ unsigned pad1:2;
+ unsigned kernel_start_pointer:26;
+};
+
+struct thread1
+{
+ unsigned ext_halt_exception_enable:1;
+ unsigned sw_exception_enable:1;
+ unsigned mask_stack_exception_enable:1;
+ unsigned timeout_exception_enable:1;
+ unsigned illegal_op_exception_enable:1;
+ unsigned pad0:3;
+ unsigned depth_coef_urb_read_offset:6; /* WM only */
+ unsigned pad1:2;
+ unsigned floating_point_mode:1;
+ unsigned thread_priority:1;
+ unsigned binding_table_entry_count:8;
+ unsigned pad3:5;
+ unsigned single_program_flow:1;
+};
+
+struct thread2
+{
+ unsigned per_thread_scratch_space:4;
+ unsigned pad0:6;
+ unsigned scratch_space_base_pointer:22;
+};
+
+
+struct thread3
+{
+ unsigned dispatch_grf_start_reg:4;
+ unsigned urb_entry_read_offset:6;
+ unsigned pad0:1;
+ unsigned urb_entry_read_length:6;
+ unsigned pad1:1;
+ unsigned const_urb_entry_read_offset:6;
+ unsigned pad2:1;
+ unsigned const_urb_entry_read_length:6;
+ unsigned pad3:1;
+};
+
+
+
+struct brw_clip_unit_state
+{
+ struct thread0 thread0;
+ struct
+ {
+ unsigned pad0:7;
+ unsigned sw_exception_enable:1;
+ unsigned pad1:3;
+ unsigned mask_stack_exception_enable:1;
+ unsigned pad2:1;
+ unsigned illegal_op_exception_enable:1;
+ unsigned pad3:2;
+ unsigned floating_point_mode:1;
+ unsigned thread_priority:1;
+ unsigned binding_table_entry_count:8;
+ unsigned pad4:5;
+ unsigned single_program_flow:1;
+ } thread1;
+
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned pad0:9;
+ unsigned gs_output_stats:1; /* not always */
+ unsigned stats_enable:1;
+ unsigned nr_urb_entries:7;
+ unsigned pad1:1;
+ unsigned urb_entry_allocation_size:5;
+ unsigned pad2:1;
+ unsigned max_threads:1; /* may be less */
+ unsigned pad3:6;
+ } thread4;
+
+ struct
+ {
+ unsigned pad0:13;
+ unsigned clip_mode:3;
+ unsigned userclip_enable_flags:8;
+ unsigned userclip_must_clip:1;
+ unsigned pad1:1;
+ unsigned guard_band_enable:1;
+ unsigned viewport_z_clip_enable:1;
+ unsigned viewport_xy_clip_enable:1;
+ unsigned vertex_position_space:1;
+ unsigned api_mode:1;
+ unsigned pad2:1;
+ } clip5;
+
+ struct
+ {
+ unsigned pad0:5;
+ unsigned clipper_viewport_state_ptr:27;
+ } clip6;
+
+
+ float viewport_xmin;
+ float viewport_xmax;
+ float viewport_ymin;
+ float viewport_ymax;
+};
+
+
+
+struct brw_cc_unit_state
+{
+ struct
+ {
+ unsigned pad0:3;
+ unsigned bf_stencil_pass_depth_pass_op:3;
+ unsigned bf_stencil_pass_depth_fail_op:3;
+ unsigned bf_stencil_fail_op:3;
+ unsigned bf_stencil_func:3;
+ unsigned bf_stencil_enable:1;
+ unsigned pad1:2;
+ unsigned stencil_write_enable:1;
+ unsigned stencil_pass_depth_pass_op:3;
+ unsigned stencil_pass_depth_fail_op:3;
+ unsigned stencil_fail_op:3;
+ unsigned stencil_func:3;
+ unsigned stencil_enable:1;
+ } cc0;
+
+
+ struct
+ {
+ unsigned bf_stencil_ref:8;
+ unsigned stencil_write_mask:8;
+ unsigned stencil_test_mask:8;
+ unsigned stencil_ref:8;
+ } cc1;
+
+
+ struct
+ {
+ unsigned logicop_enable:1;
+ unsigned pad0:10;
+ unsigned depth_write_enable:1;
+ unsigned depth_test_function:3;
+ unsigned depth_test:1;
+ unsigned bf_stencil_write_mask:8;
+ unsigned bf_stencil_test_mask:8;
+ } cc2;
+
+
+ struct
+ {
+ unsigned pad0:8;
+ unsigned alpha_test_func:3;
+ unsigned alpha_test:1;
+ unsigned blend_enable:1;
+ unsigned ia_blend_enable:1;
+ unsigned pad1:1;
+ unsigned alpha_test_format:1;
+ unsigned pad2:16;
+ } cc3;
+
+ struct
+ {
+ unsigned pad0:5;
+ unsigned cc_viewport_state_offset:27;
+ } cc4;
+
+ struct
+ {
+ unsigned pad0:2;
+ unsigned ia_dest_blend_factor:5;
+ unsigned ia_src_blend_factor:5;
+ unsigned ia_blend_function:3;
+ unsigned statistics_enable:1;
+ unsigned logicop_func:4;
+ unsigned pad1:11;
+ unsigned dither_enable:1;
+ } cc5;
+
+ struct
+ {
+ unsigned clamp_post_alpha_blend:1;
+ unsigned clamp_pre_alpha_blend:1;
+ unsigned clamp_range:2;
+ unsigned pad0:11;
+ unsigned y_dither_offset:2;
+ unsigned x_dither_offset:2;
+ unsigned dest_blend_factor:5;
+ unsigned src_blend_factor:5;
+ unsigned blend_function:3;
+ } cc6;
+
+ struct {
+ union {
+ float f;
+ ubyte ub[4];
+ } alpha_ref;
+ } cc7;
+};
+
+
+
+struct brw_sf_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned pad0:10;
+ unsigned stats_enable:1;
+ unsigned nr_urb_entries:7;
+ unsigned pad1:1;
+ unsigned urb_entry_allocation_size:5;
+ unsigned pad2:1;
+ unsigned max_threads:6;
+ unsigned pad3:1;
+ } thread4;
+
+ struct
+ {
+ unsigned front_winding:1;
+ unsigned viewport_transform:1;
+ unsigned pad0:3;
+ unsigned sf_viewport_state_offset:27;
+ } sf5;
+
+ struct
+ {
+ unsigned pad0:9;
+ unsigned dest_org_vbias:4;
+ unsigned dest_org_hbias:4;
+ unsigned scissor:1;
+ unsigned disable_2x2_trifilter:1;
+ unsigned disable_zero_pix_trifilter:1;
+ unsigned point_rast_rule:2;
+ unsigned line_endcap_aa_region_width:2;
+ unsigned line_width:4;
+ unsigned fast_scissor_disable:1;
+ unsigned cull_mode:2;
+ unsigned aa_enable:1;
+ } sf6;
+
+ struct
+ {
+ unsigned point_size:11;
+ unsigned use_point_size_state:1;
+ unsigned subpixel_precision:1;
+ unsigned sprite_point:1;
+ unsigned pad0:11;
+ unsigned trifan_pv:2;
+ unsigned linestrip_pv:2;
+ unsigned tristrip_pv:2;
+ unsigned line_last_pixel_enable:1;
+ } sf7;
+
+};
+
+
+struct brw_gs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned pad0:10;
+ unsigned stats_enable:1;
+ unsigned nr_urb_entries:7;
+ unsigned pad1:1;
+ unsigned urb_entry_allocation_size:5;
+ unsigned pad2:1;
+ unsigned max_threads:1;
+ unsigned pad3:6;
+ } thread4;
+
+ struct
+ {
+ unsigned sampler_count:3;
+ unsigned pad0:2;
+ unsigned sampler_state_pointer:27;
+ } gs5;
+
+
+ struct
+ {
+ unsigned max_vp_index:4;
+ unsigned pad0:26;
+ unsigned reorder_enable:1;
+ unsigned pad1:1;
+ } gs6;
+};
+
+
+struct brw_vs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned pad0:10;
+ unsigned stats_enable:1;
+ unsigned nr_urb_entries:7;
+ unsigned pad1:1;
+ unsigned urb_entry_allocation_size:5;
+ unsigned pad2:1;
+ unsigned max_threads:4;
+ unsigned pad3:3;
+ } thread4;
+
+ struct
+ {
+ unsigned sampler_count:3;
+ unsigned pad0:2;
+ unsigned sampler_state_pointer:27;
+ } vs5;
+
+ struct
+ {
+ unsigned vs_enable:1;
+ unsigned vert_cache_disable:1;
+ unsigned pad0:30;
+ } vs6;
+};
+
+
+struct brw_wm_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct {
+ unsigned stats_enable:1;
+ unsigned pad0:1;
+ unsigned sampler_count:3;
+ unsigned sampler_state_pointer:27;
+ } wm4;
+
+ struct
+ {
+ unsigned enable_8_pix:1;
+ unsigned enable_16_pix:1;
+ unsigned enable_32_pix:1;
+ unsigned pad0:7;
+ unsigned legacy_global_depth_bias:1;
+ unsigned line_stipple:1;
+ unsigned depth_offset:1;
+ unsigned polygon_stipple:1;
+ unsigned line_aa_region_width:2;
+ unsigned line_endcap_aa_region_width:2;
+ unsigned early_depth_test:1;
+ unsigned thread_dispatch_enable:1;
+ unsigned program_uses_depth:1;
+ unsigned program_computes_depth:1;
+ unsigned program_uses_killpixel:1;
+ unsigned legacy_line_rast: 1;
+ unsigned pad1:1;
+ unsigned max_threads:6;
+ unsigned pad2:1;
+ } wm5;
+
+ float global_depth_offset_constant;
+ float global_depth_offset_scale;
+};
+
+struct brw_sampler_default_color {
+ float color[4];
+};
+
+struct brw_sampler_state
+{
+
+ struct
+ {
+ unsigned shadow_function:3;
+ unsigned lod_bias:11;
+ unsigned min_filter:3;
+ unsigned mag_filter:3;
+ unsigned mip_filter:2;
+ unsigned base_level:5;
+ unsigned pad:1;
+ unsigned lod_preclamp:1;
+ unsigned default_color_mode:1;
+ unsigned pad0:1;
+ unsigned disable:1;
+ } ss0;
+
+ struct
+ {
+ unsigned r_wrap_mode:3;
+ unsigned t_wrap_mode:3;
+ unsigned s_wrap_mode:3;
+ unsigned pad:3;
+ unsigned max_lod:10;
+ unsigned min_lod:10;
+ } ss1;
+
+
+ struct
+ {
+ unsigned pad:5;
+ unsigned default_color_pointer:27;
+ } ss2;
+
+ struct
+ {
+ unsigned pad:19;
+ unsigned max_aniso:3;
+ unsigned chroma_key_mode:1;
+ unsigned chroma_key_index:2;
+ unsigned chroma_key_enable:1;
+ unsigned monochrome_filter_width:3;
+ unsigned monochrome_filter_height:3;
+ } ss3;
+};
+
+
+struct brw_clipper_viewport
+{
+ float xmin;
+ float xmax;
+ float ymin;
+ float ymax;
+};
+
+struct brw_cc_viewport
+{
+ float min_depth;
+ float max_depth;
+};
+
+struct brw_sf_viewport
+{
+ struct {
+ float m00;
+ float m11;
+ float m22;
+ float m30;
+ float m31;
+ float m32;
+ } viewport;
+
+ struct {
+ short xmin;
+ short ymin;
+ short xmax;
+ short ymax;
+ } scissor;
+};
+
+/* Documented in the subsystem/shared-functions/sampler chapter...
+ */
+struct brw_surface_state
+{
+ struct {
+ unsigned cube_pos_z:1;
+ unsigned cube_neg_z:1;
+ unsigned cube_pos_y:1;
+ unsigned cube_neg_y:1;
+ unsigned cube_pos_x:1;
+ unsigned cube_neg_x:1;
+ unsigned pad:4;
+ unsigned mipmap_layout_mode:1;
+ unsigned vert_line_stride_ofs:1;
+ unsigned vert_line_stride:1;
+ unsigned color_blend:1;
+ unsigned writedisable_blue:1;
+ unsigned writedisable_green:1;
+ unsigned writedisable_red:1;
+ unsigned writedisable_alpha:1;
+ unsigned surface_format:9;
+ unsigned data_return_format:1;
+ unsigned pad0:1;
+ unsigned surface_type:3;
+ } ss0;
+
+ struct {
+ unsigned base_addr;
+ } ss1;
+
+ struct {
+ unsigned pad:2;
+ unsigned mip_count:4;
+ unsigned width:13;
+ unsigned height:13;
+ } ss2;
+
+ struct {
+ unsigned tile_walk:1;
+ unsigned tiled_surface:1;
+ unsigned pad:1;
+ unsigned pitch:18;
+ unsigned depth:11;
+ } ss3;
+
+ struct {
+ unsigned pad:19;
+ unsigned min_array_elt:9;
+ unsigned min_lod:4;
+ } ss4;
+};
+
+
+
+struct brw_vertex_buffer_state
+{
+ struct {
+ unsigned pitch:11;
+ unsigned pad:15;
+ unsigned access_type:1;
+ unsigned vb_index:5;
+ } vb0;
+
+ unsigned start_addr;
+ unsigned max_index;
+#if 1
+ unsigned instance_data_step_rate; /* not included for sequential/random vertices? */
+#endif
+};
+
+#define BRW_VBP_MAX 17
+
+struct brw_vb_array_state {
+ struct header header;
+ struct brw_vertex_buffer_state vb[BRW_VBP_MAX];
+};
+
+
+struct brw_vertex_element_state
+{
+ struct
+ {
+ unsigned src_offset:11;
+ unsigned pad:5;
+ unsigned src_format:9;
+ unsigned pad0:1;
+ unsigned valid:1;
+ unsigned vertex_buffer_index:5;
+ } ve0;
+
+ struct
+ {
+ unsigned dst_offset:8;
+ unsigned pad:8;
+ unsigned vfcomponent3:4;
+ unsigned vfcomponent2:4;
+ unsigned vfcomponent1:4;
+ unsigned vfcomponent0:4;
+ } ve1;
+};
+
+#define BRW_VEP_MAX 18
+
+struct brw_vertex_element_packet {
+ struct header header;
+ struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
+};
+
+
+struct brw_urb_immediate {
+ unsigned opcode:4;
+ unsigned offset:6;
+ unsigned swizzle_control:2;
+ unsigned pad:1;
+ unsigned allocate:1;
+ unsigned used:1;
+ unsigned complete:1;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+};
+
+/* Instruction format for the execution units:
+ */
+
+struct brw_instruction
+{
+ struct
+ {
+ unsigned opcode:7;
+ unsigned pad:1;
+ unsigned access_mode:1;
+ unsigned mask_control:1;
+ unsigned dependency_control:2;
+ unsigned compression_control:2;
+ unsigned thread_control:2;
+ unsigned predicate_control:4;
+ unsigned predicate_inverse:1;
+ unsigned execution_size:3;
+ unsigned destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */
+ unsigned pad0:2;
+ unsigned debug_control:1;
+ unsigned saturate:1;
+ } header;
+
+ union {
+ struct
+ {
+ unsigned dest_reg_file:2;
+ unsigned dest_reg_type:3;
+ unsigned src0_reg_file:2;
+ unsigned src0_reg_type:3;
+ unsigned src1_reg_file:2;
+ unsigned src1_reg_type:3;
+ unsigned pad:1;
+ unsigned dest_subreg_nr:5;
+ unsigned dest_reg_nr:8;
+ unsigned dest_horiz_stride:2;
+ unsigned dest_address_mode:1;
+ } da1;
+
+ struct
+ {
+ unsigned dest_reg_file:2;
+ unsigned dest_reg_type:3;
+ unsigned src0_reg_file:2;
+ unsigned src0_reg_type:3;
+ unsigned pad:6;
+ int dest_indirect_offset:10; /* offset against the deref'd address reg */
+ unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */
+ unsigned dest_horiz_stride:2;
+ unsigned dest_address_mode:1;
+ } ia1;
+
+ struct
+ {
+ unsigned dest_reg_file:2;
+ unsigned dest_reg_type:3;
+ unsigned src0_reg_file:2;
+ unsigned src0_reg_type:3;
+ unsigned src1_reg_file:2;
+ unsigned src1_reg_type:3;
+ unsigned pad0:1;
+ unsigned dest_writemask:4;
+ unsigned dest_subreg_nr:1;
+ unsigned dest_reg_nr:8;
+ unsigned pad1:2;
+ unsigned dest_address_mode:1;
+ } da16;
+
+ struct
+ {
+ unsigned dest_reg_file:2;
+ unsigned dest_reg_type:3;
+ unsigned src0_reg_file:2;
+ unsigned src0_reg_type:3;
+ unsigned pad0:6;
+ unsigned dest_writemask:4;
+ int dest_indirect_offset:6;
+ unsigned dest_subreg_nr:3;
+ unsigned pad1:2;
+ unsigned dest_address_mode:1;
+ } ia16;
+ } bits1;
+
+
+ union {
+ struct
+ {
+ unsigned src0_subreg_nr:5;
+ unsigned src0_reg_nr:8;
+ unsigned src0_abs:1;
+ unsigned src0_negate:1;
+ unsigned src0_address_mode:1;
+ unsigned src0_horiz_stride:2;
+ unsigned src0_width:3;
+ unsigned src0_vert_stride:4;
+ unsigned flag_reg_nr:1;
+ unsigned pad:6;
+ } da1;
+
+ struct
+ {
+ int src0_indirect_offset:10;
+ unsigned src0_subreg_nr:3;
+ unsigned src0_abs:1;
+ unsigned src0_negate:1;
+ unsigned src0_address_mode:1;
+ unsigned src0_horiz_stride:2;
+ unsigned src0_width:3;
+ unsigned src0_vert_stride:4;
+ unsigned flag_reg_nr:1;
+ unsigned pad:6;
+ } ia1;
+
+ struct
+ {
+ unsigned src0_swz_x:2;
+ unsigned src0_swz_y:2;
+ unsigned src0_subreg_nr:1;
+ unsigned src0_reg_nr:8;
+ unsigned src0_abs:1;
+ unsigned src0_negate:1;
+ unsigned src0_address_mode:1;
+ unsigned src0_swz_z:2;
+ unsigned src0_swz_w:2;
+ unsigned pad0:1;
+ unsigned src0_vert_stride:4;
+ unsigned flag_reg_nr:1;
+ unsigned pad1:6;
+ } da16;
+
+ struct
+ {
+ unsigned src0_swz_x:2;
+ unsigned src0_swz_y:2;
+ int src0_indirect_offset:6;
+ unsigned src0_subreg_nr:3;
+ unsigned src0_abs:1;
+ unsigned src0_negate:1;
+ unsigned src0_address_mode:1;
+ unsigned src0_swz_z:2;
+ unsigned src0_swz_w:2;
+ unsigned pad0:1;
+ unsigned src0_vert_stride:4;
+ unsigned flag_reg_nr:1;
+ unsigned pad1:6;
+ } ia16;
+
+ } bits2;
+
+ union
+ {
+ struct
+ {
+ unsigned src1_subreg_nr:5;
+ unsigned src1_reg_nr:8;
+ unsigned src1_abs:1;
+ unsigned src1_negate:1;
+ unsigned pad:1;
+ unsigned src1_horiz_stride:2;
+ unsigned src1_width:3;
+ unsigned src1_vert_stride:4;
+ unsigned pad0:7;
+ } da1;
+
+ struct
+ {
+ unsigned src1_swz_x:2;
+ unsigned src1_swz_y:2;
+ unsigned src1_subreg_nr:1;
+ unsigned src1_reg_nr:8;
+ unsigned src1_abs:1;
+ unsigned src1_negate:1;
+ unsigned pad0:1;
+ unsigned src1_swz_z:2;
+ unsigned src1_swz_w:2;
+ unsigned pad1:1;
+ unsigned src1_vert_stride:4;
+ unsigned pad2:7;
+ } da16;
+
+ struct
+ {
+ int src1_indirect_offset:10;
+ unsigned src1_subreg_nr:3;
+ unsigned src1_abs:1;
+ unsigned src1_negate:1;
+ unsigned pad0:1;
+ unsigned src1_horiz_stride:2;
+ unsigned src1_width:3;
+ unsigned src1_vert_stride:4;
+ unsigned flag_reg_nr:1;
+ unsigned pad1:6;
+ } ia1;
+
+ struct
+ {
+ unsigned src1_swz_x:2;
+ unsigned src1_swz_y:2;
+ int src1_indirect_offset:6;
+ unsigned src1_subreg_nr:3;
+ unsigned src1_abs:1;
+ unsigned src1_negate:1;
+ unsigned pad0:1;
+ unsigned src1_swz_z:2;
+ unsigned src1_swz_w:2;
+ unsigned pad1:1;
+ unsigned src1_vert_stride:4;
+ unsigned flag_reg_nr:1;
+ unsigned pad2:6;
+ } ia16;
+
+
+ struct
+ {
+ int jump_count:16; /* note: signed */
+ unsigned pop_count:4;
+ unsigned pad0:12;
+ } if_else;
+
+ struct {
+ unsigned function:4;
+ unsigned int_type:1;
+ unsigned precision:1;
+ unsigned saturate:1;
+ unsigned data_type:1;
+ unsigned pad0:8;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+ } math;
+
+ struct {
+ unsigned binding_table_index:8;
+ unsigned sampler:4;
+ unsigned return_format:2;
+ unsigned msg_type:2;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+ } sampler;
+
+ struct brw_urb_immediate urb;
+
+ struct {
+ unsigned binding_table_index:8;
+ unsigned msg_control:4;
+ unsigned msg_type:2;
+ unsigned target_cache:2;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+ } dp_read;
+
+ struct {
+ unsigned binding_table_index:8;
+ unsigned msg_control:3;
+ unsigned pixel_scoreboard_clear:1;
+ unsigned msg_type:3;
+ unsigned send_commit_msg:1;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+ } dp_write;
+
+ struct {
+ unsigned pad:16;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+ } generic;
+
+ int d;
+ unsigned ud;
+ } bits3;
+};
+
+
+#endif
diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c
new file mode 100644
index 0000000000..b89756c47b
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_surface.c
@@ -0,0 +1,124 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "brw_blit.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_winsys.h"
+#include "util/u_tile.h"
+#include "util/u_rect.h"
+
+
+
+/* Assumes all values are within bounds -- no checking at this level -
+ * do it higher up if required.
+ */
+static void
+brw_surface_copy(struct pipe_context *pipe,
+ boolean do_flip,
+ struct pipe_surface *dst,
+ unsigned dstx, unsigned dsty,
+ struct pipe_surface *src,
+ unsigned srcx, unsigned srcy, unsigned width, unsigned height)
+{
+ assert( dst != src );
+ assert( dst->block.size == src->block.size );
+ assert( dst->block.width == src->block.height );
+ assert( dst->block.height == src->block.height );
+
+ if (0) {
+ void *dst_map = pipe->screen->surface_map( pipe->screen,
+ dst,
+ PIPE_BUFFER_USAGE_CPU_WRITE );
+
+ const void *src_map = pipe->screen->surface_map( pipe->screen,
+ src,
+ PIPE_BUFFER_USAGE_CPU_READ );
+
+ pipe_copy_rect(dst_map,
+ &dst->block,
+ dst->stride,
+ dstx, dsty,
+ width, height,
+ src_map,
+ do_flip ? -(int) src->stride : src->stride,
+ srcx, do_flip ? height - 1 - srcy : srcy);
+
+ pipe->screen->surface_unmap(pipe->screen, src);
+ pipe->screen->surface_unmap(pipe->screen, dst);
+ }
+ else {
+ assert(dst->block.width == 1);
+ assert(dst->block.height == 1);
+ brw_copy_blit(brw_context(pipe),
+ do_flip,
+ dst->block.size,
+ (short) src->stride/src->block.size, src->buffer, src->offset, FALSE,
+ (short) dst->stride/dst->block.size, dst->buffer, dst->offset, FALSE,
+ (short) srcx, (short) srcy, (short) dstx, (short) dsty,
+ (short) width, (short) height, PIPE_LOGICOP_COPY);
+ }
+}
+
+
+static void
+brw_surface_fill(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height, unsigned value)
+{
+ if (0) {
+ void *dst_map = pipe->screen->surface_map( pipe->screen,
+ dst,
+ PIPE_BUFFER_USAGE_CPU_WRITE );
+
+ pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value);
+
+ pipe->screen->surface_unmap(pipe->screen, dst);
+ }
+ else {
+ assert(dst->block.width == 1);
+ assert(dst->block.height == 1);
+ brw_fill_blit(brw_context(pipe),
+ dst->block.size,
+ (short) dst->stride/dst->block.size,
+ dst->buffer, dst->offset, FALSE,
+ (short) dstx, (short) dsty,
+ (short) width, (short) height,
+ value);
+ }
+}
+
+
+void
+brw_init_surface_functions(struct brw_context *brw)
+{
+ brw->pipe.surface_copy = brw_surface_copy;
+ brw->pipe.surface_fill = brw_surface_fill;
+}
diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c
new file mode 100644
index 0000000000..12e2e02cfd
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_tex_layout.c
@@ -0,0 +1,401 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+/* Code to layout images in a mipmap tree for i965.
+ */
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_winsys.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "brw_context.h"
+#include "brw_tex_layout.h"
+
+
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+
+#if 0
+unsigned intel_compressed_alignment(unsigned internalFormat)
+{
+ unsigned alignment = 4;
+
+ switch (internalFormat) {
+ case GL_COMPRESSED_RGB_FXT1_3DFX:
+ case GL_COMPRESSED_RGBA_FXT1_3DFX:
+ alignment = 8;
+ break;
+
+ default:
+ break;
+ }
+
+ return alignment;
+}
+#endif
+
+static unsigned minify( unsigned d )
+{
+ return MAX2(1, d>>1);
+}
+
+
+static void intel_miptree_set_image_offset(struct brw_texture *tex,
+ unsigned level,
+ unsigned img,
+ unsigned x, unsigned y)
+{
+ struct pipe_texture *pt = &tex->base;
+ if (img == 0 && level == 0)
+ assert(x == 0 && y == 0);
+ assert(img < tex->nr_images[level]);
+
+ tex->image_offset[level][img] = y * tex->stride + x * pt->block.size;
+}
+
+static void intel_miptree_set_level_info(struct brw_texture *tex,
+ unsigned level,
+ unsigned nr_images,
+ unsigned x, unsigned y,
+ unsigned w, unsigned h, unsigned d)
+{
+ struct pipe_texture *pt = &tex->base;
+
+ assert(level < PIPE_MAX_TEXTURE_LEVELS);
+
+ pt->width[level] = w;
+ pt->height[level] = h;
+ pt->depth[level] = d;
+
+ pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w);
+ pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h);
+
+ tex->level_offset[level] = y * tex->stride + x * tex->base.block.size;
+ tex->nr_images[level] = nr_images;
+
+ /*
+ DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
+ level, w, h, d, x, y, tex->level_offset[level]);
+ */
+
+ /* Not sure when this would happen, but anyway:
+ */
+ if (tex->image_offset[level]) {
+ FREE(tex->image_offset[level]);
+ tex->image_offset[level] = NULL;
+ }
+
+ assert(nr_images);
+ assert(!tex->image_offset[level]);
+
+ tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned));
+ tex->image_offset[level][0] = 0;
+}
+
+static void i945_miptree_layout_2d(struct brw_texture *tex)
+{
+ struct pipe_texture *pt = &tex->base;
+ const int align_x = 2, align_y = 4;
+ unsigned level;
+ unsigned x = 0;
+ unsigned y = 0;
+ unsigned width = pt->width[0];
+ unsigned height = pt->height[0];
+ unsigned nblocksx = pt->nblocksx[0];
+ unsigned nblocksy = pt->nblocksy[0];
+
+ tex->stride = align(pt->nblocksx[0] * pt->block.size, 4);
+
+ /* May need to adjust pitch to accomodate the placement of
+ * the 2nd mipmap level. This occurs when the alignment
+ * constraints of mipmap placement push the right edge of the
+ * 2nd mipmap level out past the width of its parent.
+ */
+ if (pt->last_level > 0) {
+ unsigned mip1_nblocksx
+ = align(pf_get_nblocksx(&pt->block, minify(width)), align_x)
+ + pf_get_nblocksx(&pt->block, minify(minify(width)));
+
+ if (mip1_nblocksx > nblocksx)
+ tex->stride = mip1_nblocksx * pt->block.size;
+ }
+
+ /* Pitch must be a whole number of dwords
+ */
+ tex->stride = align(tex->stride, 64);
+ tex->total_nblocksy = 0;
+
+ for (level = 0; level <= pt->last_level; level++) {
+ intel_miptree_set_level_info(tex, level, 1, x, y, width,
+ height, 1);
+
+ nblocksy = align(nblocksy, align_y);
+
+ /* Because the images are packed better, the final offset
+ * might not be the maximal one:
+ */
+ tex->total_nblocksy = MAX2(tex->total_nblocksy, y + nblocksy);
+
+ /* Layout_below: step right after second mipmap level.
+ */
+ if (level == 1) {
+ x += align(nblocksx, align_x);
+ }
+ else {
+ y += nblocksy;
+ }
+
+ width = minify(width);
+ height = minify(height);
+ nblocksx = pf_get_nblocksx(&pt->block, width);
+ nblocksy = pf_get_nblocksy(&pt->block, height);
+ }
+}
+
+static boolean brw_miptree_layout(struct brw_texture *tex)
+{
+ struct pipe_texture *pt = &tex->base;
+ /* XXX: these vary depending on image format:
+ */
+/* int align_w = 4; */
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_3D: {
+ unsigned width = pt->width[0];
+ unsigned height = pt->height[0];
+ unsigned depth = pt->depth[0];
+ unsigned nblocksx = pt->nblocksx[0];
+ unsigned nblocksy = pt->nblocksy[0];
+ unsigned pack_x_pitch, pack_x_nr;
+ unsigned pack_y_pitch;
+ unsigned level;
+ unsigned align_h = 2;
+ unsigned align_w = 4;
+
+ tex->total_nblocksy = 0;
+
+ tex->stride = align(pt->nblocksx[0], 4);
+ pack_y_pitch = align(pt->nblocksy[0], align_h);
+
+ pack_x_pitch = tex->stride / pt->block.size;
+ pack_x_nr = 1;
+
+ for (level = 0; level <= pt->last_level; level++) {
+ unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6;
+ int x = 0;
+ int y = 0;
+ uint q, j;
+
+ intel_miptree_set_level_info(tex, level, nr_images,
+ 0, tex->total_nblocksy,
+ width, height, depth);
+
+ for (q = 0; q < nr_images;) {
+ for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) {
+ intel_miptree_set_image_offset(tex, level, q, x, y);
+ x += pack_x_pitch;
+ }
+
+ x = 0;
+ y += pack_y_pitch;
+ }
+
+
+ tex->total_nblocksy += y;
+ width = minify(width);
+ height = minify(height);
+ depth = minify(depth);
+ nblocksx = pf_get_nblocksx(&pt->block, width);
+ nblocksy = pf_get_nblocksy(&pt->block, height);
+
+ if (pt->compressed) {
+ pack_y_pitch = (height + 3) / 4;
+
+ if (pack_x_pitch > align(width, align_w)) {
+ pack_x_pitch = align(width, align_w);
+ pack_x_nr <<= 1;
+ }
+ } else {
+ if (pack_x_pitch > 4) {
+ pack_x_pitch >>= 1;
+ pack_x_nr <<= 1;
+ assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride);
+ }
+
+ if (pack_y_pitch > 2) {
+ pack_y_pitch >>= 1;
+ pack_y_pitch = align(pack_y_pitch, align_h);
+ }
+ }
+
+ }
+ break;
+ }
+
+ default:
+ i945_miptree_layout_2d(tex);
+ break;
+ }
+#if 0
+ PRINT("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
+ pt->pitch,
+ pt->total_nblocksy,
+ pt->block.size,
+ pt->stride * pt->total_nblocksy );
+#endif
+
+ return TRUE;
+}
+
+
+static struct pipe_texture *
+brw_texture_create_screen(struct pipe_screen *screen,
+ const struct pipe_texture *templat)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct brw_texture *tex = CALLOC_STRUCT(brw_texture);
+
+ if (tex) {
+ tex->base = *templat;
+ tex->base.refcount = 1;
+
+ tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]);
+ tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]);
+
+ if (brw_miptree_layout(tex))
+ tex->buffer = ws->buffer_create(ws, 64,
+ PIPE_BUFFER_USAGE_PIXEL,
+ tex->stride *
+ tex->total_nblocksy);
+
+ if (!tex->buffer) {
+ FREE(tex);
+ return NULL;
+ }
+ }
+
+ return &tex->base;
+}
+
+
+static void
+brw_texture_release_screen(struct pipe_screen *screen,
+ struct pipe_texture **pt)
+{
+ if (!*pt)
+ return;
+
+ /*
+ DBG("%s %p refcount will be %d\n",
+ __FUNCTION__, (void *) *pt, (*pt)->refcount - 1);
+ */
+ if (--(*pt)->refcount <= 0) {
+ struct pipe_winsys *ws = screen->winsys;
+ struct brw_texture *tex = (struct brw_texture *)*pt;
+ uint i;
+
+ /*
+ DBG("%s deleting %p\n", __FUNCTION__, (void *) tex);
+ */
+
+ winsys_buffer_reference(ws, &tex->buffer, NULL);
+
+ for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++)
+ if (tex->image_offset[i])
+ free(tex->image_offset[i]);
+
+ free(tex);
+ }
+ *pt = NULL;
+}
+
+
+static struct pipe_surface *
+brw_get_tex_surface_screen(struct pipe_screen *screen,
+ struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct brw_texture *tex = (struct brw_texture *)pt;
+ struct pipe_surface *ps;
+ unsigned offset; /* in bytes */
+
+ offset = tex->level_offset[level];
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ offset += tex->image_offset[level][face];
+ }
+ else if (pt->target == PIPE_TEXTURE_3D) {
+ offset += tex->image_offset[level][zslice];
+ }
+ else {
+ assert(face == 0);
+ assert(zslice == 0);
+ }
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (ps) {
+ ps->refcount = 1;
+ pipe_texture_reference(&ps->texture, pt);
+ winsys_buffer_reference(ws, &ps->buffer, tex->buffer);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = tex->stride;
+ ps->offset = offset;
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+ }
+ return ps;
+}
+
+
+void
+brw_init_texture_functions(struct brw_context *brw)
+{
+// brw->pipe.texture_update = brw_texture_update;
+}
+
+
+void
+brw_init_screen_texture_funcs(struct pipe_screen *screen)
+{
+ screen->texture_create = brw_texture_create_screen;
+ screen->texture_release = brw_texture_release_screen;
+ screen->get_tex_surface = brw_get_tex_surface_screen;
+}
+
diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.h b/src/gallium/drivers/i965simple/brw_tex_layout.h
new file mode 100644
index 0000000000..a6b6ba8146
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_tex_layout.h
@@ -0,0 +1,44 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+
+
+#ifndef BRW_TEX_LAYOUT_H
+#define BRW_TEX_LAYOUT_H
+
+
+struct brw_context;
+struct pipe_screen;
+
+
+extern void
+brw_init_texture_functions(struct brw_context *brw);
+
+extern void
+brw_init_screen_texture_funcs(struct pipe_screen *screen);
+
+
+#endif
diff --git a/src/gallium/drivers/i965simple/brw_urb.c b/src/gallium/drivers/i965simple/brw_urb.c
new file mode 100644
index 0000000000..101a4367b9
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_urb.c
@@ -0,0 +1,186 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+//#include "brw_state.h"
+#include "brw_batch.h"
+#include "brw_defines.h"
+
+#define VS 0
+#define GS 1
+#define CLP 2
+#define SF 3
+#define CS 4
+
+/* XXX: Are the min_entry_size numbers useful?
+ * XXX: Verify min_nr_entries, esp for VS.
+ * XXX: Verify SF min_entry_size.
+ */
+static const struct {
+ unsigned min_nr_entries;
+ unsigned preferred_nr_entries;
+ unsigned min_entry_size;
+ unsigned max_entry_size;
+} limits[CS+1] = {
+ { 8, 32, 1, 5 }, /* vs */
+ { 4, 8, 1, 5 }, /* gs */
+ { 6, 8, 1, 5 }, /* clp */
+ { 1, 8, 1, 12 }, /* sf */
+ { 1, 4, 1, 32 } /* cs */
+};
+
+
+static boolean check_urb_layout( struct brw_context *brw )
+{
+ brw->urb.vs_start = 0;
+ brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize;
+ brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize;
+ brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
+ brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
+
+ return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= 256;
+}
+
+/* Most minimal update, forces re-emit of URB fence packet after GS
+ * unit turned on/off.
+ */
+static void recalculate_urb_fence( struct brw_context *brw )
+{
+ unsigned csize = brw->curbe.total_size;
+ unsigned vsize = brw->vs.prog_data->urb_entry_size;
+ unsigned sfsize = brw->sf.prog_data->urb_entry_size;
+
+ if (csize < limits[CS].min_entry_size)
+ csize = limits[CS].min_entry_size;
+
+ if (vsize < limits[VS].min_entry_size)
+ vsize = limits[VS].min_entry_size;
+
+ if (sfsize < limits[SF].min_entry_size)
+ sfsize = limits[SF].min_entry_size;
+
+ if (brw->urb.vsize < vsize ||
+ brw->urb.sfsize < sfsize ||
+ brw->urb.csize < csize ||
+ (brw->urb.constrained && (brw->urb.vsize > brw->urb.vsize ||
+ brw->urb.sfsize > brw->urb.sfsize ||
+ brw->urb.csize > brw->urb.csize))) {
+
+
+ brw->urb.csize = csize;
+ brw->urb.sfsize = sfsize;
+ brw->urb.vsize = vsize;
+
+ brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+ brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries;
+ brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
+ brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;
+
+ if (!check_urb_layout(brw)) {
+ brw->urb.nr_vs_entries = limits[VS].min_nr_entries;
+ brw->urb.nr_gs_entries = limits[GS].min_nr_entries;
+ brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].min_nr_entries;
+ brw->urb.nr_cs_entries = limits[CS].min_nr_entries;
+
+ brw->urb.constrained = 1;
+
+ if (!check_urb_layout(brw)) {
+ /* This is impossible, given the maximal sizes of urb
+ * entries and the values for minimum nr of entries
+ * provided above.
+ */
+ debug_printf("couldn't calculate URB layout!\n");
+ exit(1);
+ }
+
+ if (BRW_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
+ debug_printf("URB CONSTRAINED\n");
+ }
+ else
+ brw->urb.constrained = 0;
+
+ if (BRW_DEBUG & DEBUG_URB)
+ debug_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
+ brw->urb.vs_start,
+ brw->urb.gs_start,
+ brw->urb.clip_start,
+ brw->urb.sf_start,
+ brw->urb.cs_start,
+ 256);
+
+ brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
+ }
+}
+
+
+const struct brw_tracked_state brw_recalculate_urb_fence = {
+ .dirty = {
+ .brw = BRW_NEW_CURBE_OFFSETS,
+ .cache = (CACHE_NEW_VS_PROG |
+ CACHE_NEW_SF_PROG)
+ },
+ .update = recalculate_urb_fence
+};
+
+
+
+
+
+void brw_upload_urb_fence(struct brw_context *brw)
+{
+ struct brw_urb_fence uf;
+ memset(&uf, 0, sizeof(uf));
+
+ uf.header.opcode = CMD_URB_FENCE;
+ uf.header.length = sizeof(uf)/4-2;
+ uf.header.vs_realloc = 1;
+ uf.header.gs_realloc = 1;
+ uf.header.clp_realloc = 1;
+ uf.header.sf_realloc = 1;
+ uf.header.vfe_realloc = 1;
+ uf.header.cs_realloc = 1;
+
+ /* The ordering below is correct, not the layout in the
+ * instruction.
+ *
+ * There are 256 urb reg pairs in total.
+ */
+ uf.bits0.vs_fence = brw->urb.gs_start;
+ uf.bits0.gs_fence = brw->urb.clip_start;
+ uf.bits0.clp_fence = brw->urb.sf_start;
+ uf.bits1.sf_fence = brw->urb.cs_start;
+ uf.bits1.cs_fence = 256;
+
+ BRW_BATCH_STRUCT(brw, &uf);
+}
diff --git a/src/gallium/drivers/i965simple/brw_util.c b/src/gallium/drivers/i965simple/brw_util.c
new file mode 100644
index 0000000000..42391d7c8c
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_util.c
@@ -0,0 +1,104 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_util.h"
+#include "brw_defines.h"
+
+#include "pipe/p_defines.h"
+
+unsigned brw_count_bits( unsigned val )
+{
+ unsigned i;
+ for (i = 0; val ; val >>= 1)
+ if (val & 1)
+ i++;
+ return i;
+}
+
+
+unsigned brw_translate_blend_equation( int mode )
+{
+ switch (mode) {
+ case PIPE_BLEND_ADD:
+ return BRW_BLENDFUNCTION_ADD;
+ case PIPE_BLEND_MIN:
+ return BRW_BLENDFUNCTION_MIN;
+ case PIPE_BLEND_MAX:
+ return BRW_BLENDFUNCTION_MAX;
+ case PIPE_BLEND_SUBTRACT:
+ return BRW_BLENDFUNCTION_SUBTRACT;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
+ default:
+ assert(0);
+ return BRW_BLENDFUNCTION_ADD;
+ }
+}
+
+unsigned brw_translate_blend_factor( int factor )
+{
+ switch(factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ return BRW_BLENDFACTOR_ZERO;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return BRW_BLENDFACTOR_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_ONE:
+ return BRW_BLENDFACTOR_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return BRW_BLENDFACTOR_SRC_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return BRW_BLENDFACTOR_INV_SRC_COLOR;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return BRW_BLENDFACTOR_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return BRW_BLENDFACTOR_INV_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return BRW_BLENDFACTOR_INV_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return BRW_BLENDFACTOR_DST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return BRW_BLENDFACTOR_INV_DST_ALPHA;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return BRW_BLENDFACTOR_CONST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return BRW_BLENDFACTOR_INV_CONST_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return BRW_BLENDFACTOR_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return BRW_BLENDFACTOR_INV_CONST_ALPHA;
+ default:
+ assert(0);
+ return BRW_BLENDFACTOR_ZERO;
+ }
+}
diff --git a/src/gallium/drivers/i965simple/brw_util.h b/src/gallium/drivers/i965simple/brw_util.h
new file mode 100644
index 0000000000..d60e5934db
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_util.h
@@ -0,0 +1,43 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_UTIL_H
+#define BRW_UTIL_H
+
+#include "pipe/p_state.h"
+
+extern unsigned brw_count_bits( unsigned val );
+extern unsigned brw_translate_blend_factor( int factor );
+extern unsigned brw_translate_blend_equation( int mode );
+
+
+#endif
diff --git a/src/gallium/drivers/i965simple/brw_vs.c b/src/gallium/drivers/i965simple/brw_vs.c
new file mode 100644
index 0000000000..92327e896d
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_vs.c
@@ -0,0 +1,120 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_vs.h"
+#include "brw_util.h"
+#include "brw_state.h"
+
+
+static void do_vs_prog( struct brw_context *brw,
+ const struct brw_vertex_program *vp,
+ struct brw_vs_prog_key *key )
+{
+ unsigned program_size;
+ const unsigned *program;
+ struct brw_vs_compile c;
+
+ memset(&c, 0, sizeof(c));
+ memcpy(&c.key, key, sizeof(*key));
+
+ brw_init_compile(&c.func);
+ c.vp = vp;
+
+ c.prog_data.outputs_written = vp->info.num_outputs;
+ c.prog_data.inputs_read = vp->info.num_inputs;
+
+#if 0
+ if (c.key.copy_edgeflag) {
+ c.prog_data.outputs_written |= 1<<VERT_RESULT_EDGE;
+ c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
+ }
+#endif
+
+ /* Emit GEN4 code.
+ */
+ brw_vs_emit(&c);
+
+ /* get the program
+ */
+ program = brw_get_program(&c.func, &program_size);
+
+ /*
+ */
+ brw->vs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_VS_PROG],
+ &c.key,
+ sizeof(c.key),
+ program,
+ program_size,
+ &c.prog_data,
+ &brw->vs.prog_data);
+}
+
+
+static void brw_upload_vs_prog( struct brw_context *brw )
+{
+ struct brw_vs_prog_key key;
+ const struct brw_vertex_program *vp = brw->attribs.VertexProgram;
+
+ assert(vp);
+
+ memset(&key, 0, sizeof(key));
+
+ /* Just upload the program verbatim for now. Always send it all
+ * the inputs it asks for, whether they are varying or not.
+ */
+ key.program_string_id = vp->id;
+ key.nr_userclip = brw->attribs.Clip.nr;
+ key.copy_edgeflag = (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL ||
+ brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL);
+
+ /* Make an early check for the key.
+ */
+ if (brw_search_cache(&brw->cache[BRW_VS_PROG],
+ &key, sizeof(key),
+ &brw->vs.prog_data,
+ &brw->vs.prog_gs_offset))
+ return;
+
+ do_vs_prog(brw, vp, &key);
+}
+
+
+/* See brw_vs.c:
+ */
+const struct brw_tracked_state brw_vs_prog = {
+ .dirty = {
+ .brw = BRW_NEW_VS,
+ .cache = 0
+ },
+ .update = brw_upload_vs_prog
+};
diff --git a/src/gallium/drivers/i965simple/brw_vs.h b/src/gallium/drivers/i965simple/brw_vs.h
new file mode 100644
index 0000000000..070f9dfcae
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_vs.h
@@ -0,0 +1,82 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_VS_H
+#define BRW_VS_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+
+struct brw_vs_prog_key {
+ unsigned program_string_id;
+ unsigned nr_userclip:4;
+ unsigned copy_edgeflag:1;
+ unsigned know_w_is_one:1;
+ unsigned pad:26;
+};
+
+
+struct brw_vs_compile {
+ struct brw_compile func;
+ struct brw_vs_prog_key key;
+ struct brw_vs_prog_data prog_data;
+
+ const struct brw_vertex_program *vp;
+
+ unsigned nr_inputs;
+
+ unsigned first_output;
+ unsigned nr_outputs;
+
+ unsigned first_tmp;
+ unsigned last_tmp;
+
+ struct brw_reg r0;
+ struct brw_reg r1;
+ struct brw_reg regs[12][128];
+ struct brw_reg tmp;
+ struct brw_reg stack;
+
+ struct {
+ boolean used_in_src;
+ struct brw_reg reg;
+ } output_regs[128];
+
+ struct brw_reg userplane[6];
+
+};
+
+void brw_vs_emit( struct brw_vs_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c
new file mode 100644
index 0000000000..34dbc0624d
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_vs_emit.c
@@ -0,0 +1,1330 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_context.h"
+#include "brw_vs.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+
+struct brw_prog_info {
+ unsigned num_temps;
+ unsigned num_addrs;
+ unsigned num_consts;
+
+ unsigned writes_psize;
+
+ unsigned pos_idx;
+ unsigned result_edge_idx;
+ unsigned edge_flag_idx;
+ unsigned psize_idx;
+};
+
+/* Do things as simply as possible. Allocate and populate all regs
+ * ahead of time.
+ */
+static void brw_vs_alloc_regs( struct brw_vs_compile *c,
+ struct brw_prog_info *info )
+{
+ unsigned i, reg = 0, mrf;
+ unsigned nr_params;
+
+ /* r0 -- reserved as usual
+ */
+ c->r0 = brw_vec8_grf(reg, 0); reg++;
+
+ /* User clip planes from curbe:
+ */
+ if (c->key.nr_userclip) {
+ for (i = 0; i < c->key.nr_userclip; i++) {
+ c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
+ }
+
+ /* Deal with curbe alignment:
+ */
+ reg += ((6+c->key.nr_userclip+3)/4)*2;
+ }
+
+ /* Vertex program parameters from curbe:
+ */
+ nr_params = c->prog_data.max_const;
+ for (i = 0; i < nr_params; i++) {
+ c->regs[TGSI_FILE_CONSTANT][i] = stride(brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+ }
+ reg += (nr_params+1)/2;
+ c->prog_data.curb_read_length = reg - 1;
+
+
+
+ /* Allocate input regs:
+ */
+ c->nr_inputs = c->vp->info.num_inputs;
+ for (i = 0; i < c->nr_inputs; i++) {
+ c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+
+
+ /* Allocate outputs: TODO: could organize the non-position outputs
+ * to go straight into message regs.
+ */
+ c->nr_outputs = 0;
+ c->first_output = reg;
+ mrf = 4;
+ for (i = 0; i < c->vp->info.num_outputs; i++) {
+ c->nr_outputs++;
+#if 0
+ if (i == VERT_RESULT_HPOS) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+ else if (i == VERT_RESULT_PSIZ) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ mrf++; /* just a placeholder? XXX fix later stages & remove this */
+ }
+ else {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
+ mrf++;
+ }
+#else
+ /*treat pos differently for now */
+ if (i == info->pos_idx) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ } else {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
+ mrf++;
+ }
+#endif
+ }
+
+ /* Allocate program temporaries:
+ */
+ for (i = 0; i < info->num_temps; i++) {
+ c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+
+ /* Address reg(s). Don't try to use the internal address reg until
+ * deref time.
+ */
+ for (i = 0; i < info->num_addrs; i++) {
+ c->regs[TGSI_FILE_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE,
+ reg,
+ 0,
+ BRW_REGISTER_TYPE_D,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XXXX,
+ TGSI_WRITEMASK_X);
+ reg++;
+ }
+
+ for (i = 0; i < 128; i++) {
+ if (c->output_regs[i].used_in_src) {
+ c->output_regs[i].reg = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+ }
+
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
+ reg += 2;
+
+
+ /* Some opcodes need an internal temporary:
+ */
+ c->first_tmp = reg;
+ c->last_tmp = reg; /* for allocation purposes */
+
+ /* Each input reg holds data from two vertices. The
+ * urb_read_length is the number of registers read from *each*
+ * vertex urb, so is half the amount:
+ */
+ c->prog_data.urb_read_length = (c->nr_inputs+1)/2;
+
+ c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4;
+ c->prog_data.total_grf = reg;
+}
+
+
+static struct brw_reg get_tmp( struct brw_vs_compile *c )
+{
+ struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
+
+ return tmp;
+}
+
+static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
+{
+ if (tmp.nr == c->last_tmp-1)
+ c->last_tmp--;
+}
+
+static void release_tmps( struct brw_vs_compile *c )
+{
+ c->last_tmp = c->first_tmp;
+}
+
+
+static void unalias1( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ void (*func)( struct brw_vs_compile *,
+ struct brw_reg,
+ struct brw_reg ))
+{
+ if (dst.file == arg0.file && dst.nr == arg0.nr) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+ func(c, tmp, arg0);
+ brw_MOV(p, dst, tmp);
+ }
+ else {
+ func(c, dst, arg0);
+ }
+}
+
+static void unalias2( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ void (*func)( struct brw_vs_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg ))
+{
+ if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
+ (dst.file == arg1.file && dst.nr == arg1.nr)) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+ func(c, tmp, arg0, arg1);
+ brw_MOV(p, dst, tmp);
+ }
+ else {
+ func(c, dst, arg0, arg1);
+ }
+}
+
+static void emit_sop( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ unsigned cond)
+{
+ brw_push_insn_state(p);
+ brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_MOV(p, dst, brw_imm_f(1.0f));
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_MOV(p, dst, brw_imm_f(0.0f));
+ brw_pop_insn_state(p);
+}
+
+static void emit_seq( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ);
+}
+
+static void emit_sne( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ);
+}
+static void emit_slt( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L);
+}
+
+static void emit_sle( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE);
+}
+
+static void emit_sgt( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G);
+}
+
+static void emit_sge( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE);
+}
+
+static void emit_max( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
+ brw_SEL(p, dst, arg1, arg0);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+static void emit_min( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
+ brw_SEL(p, dst, arg0, arg1);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+
+static void emit_math1( struct brw_vs_compile *c,
+ unsigned function,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ unsigned precision)
+{
+ /* There are various odd behaviours with SEND on the simulator. In
+ * addition there are documented issues with the fact that the GEN4
+ * processor doesn't do dependency control properly on SEND
+ * results. So, on balance, this kludge to get around failures
+ * with writemasked math results looks like it might be necessary
+ * whether that turns out to be a simulator bug or not:
+ */
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_math(p,
+ tmp,
+ function,
+ BRW_MATH_SATURATE_NONE,
+ 2,
+ arg0,
+ BRW_MATH_DATA_SCALAR,
+ precision);
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+static void emit_math2( struct brw_vs_compile *c,
+ unsigned function,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ unsigned precision)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_MOV(p, brw_message_reg(3), arg1);
+
+ brw_math(p,
+ tmp,
+ function,
+ BRW_MATH_SATURATE_NONE,
+ 2,
+ arg0,
+ BRW_MATH_DATA_SCALAR,
+ precision);
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+
+
+static void emit_exp_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+
+
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) {
+ struct brw_reg tmp = get_tmp(c);
+ struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D);
+
+ /* tmp_d = floor(arg0.x) */
+ brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0));
+
+ /* result[0] = 2.0 ^ tmp */
+
+ /* Adjust exponent for floating point:
+ * exp += 127
+ */
+ brw_ADD(p, brw_writemask(tmp_d, TGSI_WRITEMASK_X), tmp_d, brw_imm_d(127));
+
+ /* Install exponent and sign.
+ * Excess drops off the edge:
+ */
+ brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), TGSI_WRITEMASK_X),
+ tmp_d, brw_imm_d(23));
+
+ release_tmp(c, tmp);
+ }
+
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) {
+ /* result[1] = arg0.x - floor(arg0.x) */
+ brw_FRC(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0, 0));
+ }
+
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) {
+ /* As with the LOG instruction, we might be better off just
+ * doing a taylor expansion here, seeing as we have to do all
+ * the prep work.
+ *
+ * If mathbox partial precision is too low, consider also:
+ * result[3] = result[0] * EXP(result[1])
+ */
+ emit_math1(c,
+ BRW_MATH_FUNCTION_EXP,
+ brw_writemask(dst, TGSI_WRITEMASK_Z),
+ brw_swizzle1(arg0, 0),
+ BRW_MATH_PRECISION_PARTIAL);
+ }
+
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) {
+ /* result[3] = 1.0; */
+ brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), brw_imm_f(1));
+ }
+}
+
+
+static void emit_log_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
+ struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD);
+ boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp) {
+ tmp = get_tmp(c);
+ tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
+ }
+
+ /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
+ * according to spec:
+ *
+ * These almost look likey they could be joined up, but not really
+ * practical:
+ *
+ * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
+ * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
+ */
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_XZ) {
+ brw_AND(p,
+ brw_writemask(tmp_ud, TGSI_WRITEMASK_X),
+ brw_swizzle1(arg0_ud, 0),
+ brw_imm_ud((1U<<31)-1));
+
+ brw_SHR(p,
+ brw_writemask(tmp_ud, TGSI_WRITEMASK_X),
+ tmp_ud,
+ brw_imm_ud(23));
+
+ brw_ADD(p,
+ brw_writemask(tmp, TGSI_WRITEMASK_X),
+ retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */
+ brw_imm_d(-127));
+ }
+
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_YZ) {
+ brw_AND(p,
+ brw_writemask(tmp_ud, TGSI_WRITEMASK_Y),
+ brw_swizzle1(arg0_ud, 0),
+ brw_imm_ud((1<<23)-1));
+
+ brw_OR(p,
+ brw_writemask(tmp_ud, TGSI_WRITEMASK_Y),
+ tmp_ud,
+ brw_imm_ud(127<<23));
+ }
+
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) {
+ /* result[2] = result[0] + LOG2(result[1]); */
+
+ /* Why bother? The above is just a hint how to do this with a
+ * taylor series. Maybe we *should* use a taylor series as by
+ * the time all the above has been done it's almost certainly
+ * quicker than calling the mathbox, even with low precision.
+ *
+ * Options are:
+ * - result[0] + mathbox.LOG2(result[1])
+ * - mathbox.LOG2(arg0.x)
+ * - result[0] + inline_taylor_approx(result[1])
+ */
+ emit_math1(c,
+ BRW_MATH_FUNCTION_LOG,
+ brw_writemask(tmp, TGSI_WRITEMASK_Z),
+ brw_swizzle1(tmp, 1),
+ BRW_MATH_PRECISION_FULL);
+
+ brw_ADD(p,
+ brw_writemask(tmp, TGSI_WRITEMASK_Z),
+ brw_swizzle1(tmp, 2),
+ brw_swizzle1(tmp, 0));
+ }
+
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) {
+ /* result[3] = 1.0; */
+ brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_W), brw_imm_f(1));
+ }
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+
+
+
+/* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1
+ */
+static void emit_dst_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1)
+{
+ struct brw_compile *p = &c->func;
+
+ /* There must be a better way to do this:
+ */
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X)
+ brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_X), brw_imm_f(1.0));
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y)
+ brw_MUL(p, brw_writemask(dst, TGSI_WRITEMASK_Y), arg0, arg1);
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z)
+ brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Z), arg0);
+ if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W)
+ brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), arg1);
+}
+
+static void emit_xpd( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg t,
+ struct brw_reg u)
+{
+ brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3));
+ brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3));
+}
+
+
+
+static void emit_lit_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *if_insn;
+ struct brw_reg tmp = dst;
+ boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_YZ), brw_imm_f(0));
+ brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_XW), brw_imm_f(1));
+
+ /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
+ * to get all channels active inside the IF. In the clipping code
+ * we run with NoMask, so it's not an option and we can use
+ * BRW_EXECUTE_1 for all comparisions.
+ */
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0));
+ if_insn = brw_IF(p, BRW_EXECUTE_8);
+ {
+ brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0,0));
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0));
+ brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_Z), brw_swizzle1(arg0,1));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ emit_math2(c,
+ BRW_MATH_FUNCTION_POW,
+ brw_writemask(dst, TGSI_WRITEMASK_Z),
+ brw_swizzle1(tmp, 2),
+ brw_swizzle1(arg0, 3),
+ BRW_MATH_PRECISION_PARTIAL);
+ }
+
+ brw_ENDIF(p, if_insn);
+}
+
+
+
+
+
+/* TODO: relative addressing!
+ */
+static struct brw_reg get_reg( struct brw_vs_compile *c,
+ unsigned file,
+ unsigned index )
+{
+ switch (file) {
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_OUTPUT:
+ assert(c->regs[file][index].nr != 0);
+ return c->regs[file][index];
+ case TGSI_FILE_CONSTANT:
+ assert(c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm].nr != 0);
+ return c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm];
+ case TGSI_FILE_IMMEDIATE:
+ assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0);
+ return c->regs[TGSI_FILE_CONSTANT][index];
+ case TGSI_FILE_ADDRESS:
+ assert(index == 0);
+ return c->regs[file][index];
+
+ case TGSI_FILE_NULL: /* undef values */
+ return brw_null_reg();
+
+ default:
+ assert(0);
+ return brw_null_reg();
+ }
+}
+
+
+
+static struct brw_reg deref( struct brw_vs_compile *c,
+ struct brw_reg arg,
+ int offset)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = vec4(get_tmp(c));
+ struct brw_reg vp_address = retype(vec1(get_reg(c, TGSI_FILE_ADDRESS, 0)), BRW_REGISTER_TYPE_UW);
+ unsigned byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
+ struct brw_reg indirect = brw_vec4_indirect(0,0);
+
+ {
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ /* This is pretty clunky - load the address register twice and
+ * fetch each 4-dword value in turn. There must be a way to do
+ * this in a single pass, but I couldn't get it to work.
+ */
+ brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
+ brw_MOV(p, tmp, indirect);
+
+ brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
+ brw_MOV(p, suboffset(tmp, 4), indirect);
+
+ brw_pop_insn_state(p);
+ }
+
+ return vec8(tmp);
+}
+
+
+static void emit_arl( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_RNDD(p, tmp, arg0);
+ brw_MUL(p, dst, tmp, brw_imm_d(16));
+
+ if (need_tmp)
+ release_tmp(c, tmp);
+}
+
+
+/* Will return mangled results for SWZ op. The emit_swz() function
+ * ignores this result and recalculates taking extended swizzles into
+ * account.
+ */
+static struct brw_reg get_arg( struct brw_vs_compile *c,
+ struct tgsi_src_register *src )
+{
+ struct brw_reg reg;
+
+ if (src->File == TGSI_FILE_NULL)
+ return brw_null_reg();
+
+#if 0
+ if (src->RelAddr)
+ reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
+ else
+#endif
+ reg = get_reg(c, src->File, src->Index);
+
+ /* Convert 3-bit swizzle to 2-bit.
+ */
+ reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SwizzleX,
+ src->SwizzleY,
+ src->SwizzleZ,
+ src->SwizzleW);
+
+ /* Note this is ok for non-swizzle instructions:
+ */
+ reg.negate = src->Negate ? 1 : 0;
+
+ return reg;
+}
+
+
+static struct brw_reg get_dst( struct brw_vs_compile *c,
+ const struct tgsi_dst_register *dst )
+{
+ struct brw_reg reg = get_reg(c, dst->File, dst->Index);
+
+ reg.dw1.bits.writemask = dst->WriteMask;
+
+ return reg;
+}
+
+
+
+
+static void emit_swz( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct tgsi_src_register src )
+{
+ struct brw_compile *p = &c->func;
+ unsigned zeros_mask = 0;
+ unsigned ones_mask = 0;
+ unsigned src_mask = 0;
+ ubyte src_swz[4];
+ boolean need_tmp = (src.Negate &&
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+ struct brw_reg tmp = dst;
+ unsigned i;
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ for (i = 0; i < 4; i++) {
+ if (dst.dw1.bits.writemask & (1<<i)) {
+ ubyte s = 0;
+ switch(i) {
+ case 0:
+ s = src.SwizzleX;
+ break;
+ s = src.SwizzleY;
+ case 1:
+ break;
+ s = src.SwizzleZ;
+ case 2:
+ break;
+ s = src.SwizzleW;
+ case 3:
+ break;
+ }
+ switch (s) {
+ case TGSI_SWIZZLE_X:
+ case TGSI_SWIZZLE_Y:
+ case TGSI_SWIZZLE_Z:
+ case TGSI_SWIZZLE_W:
+ src_mask |= 1<<i;
+ src_swz[i] = s;
+ break;
+ case TGSI_EXTSWIZZLE_ZERO:
+ zeros_mask |= 1<<i;
+ break;
+ case TGSI_EXTSWIZZLE_ONE:
+ ones_mask |= 1<<i;
+ break;
+ }
+ }
+ }
+
+ /* Do src first, in case dst aliases src:
+ */
+ if (src_mask) {
+ struct brw_reg arg0;
+
+#if 0
+ if (src.RelAddr)
+ arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
+ else
+#endif
+ arg0 = get_reg(c, src.File, src.Index);
+
+ arg0 = brw_swizzle(arg0,
+ src_swz[0], src_swz[1],
+ src_swz[2], src_swz[3]);
+
+ brw_MOV(p, brw_writemask(tmp, src_mask), arg0);
+ }
+
+ if (zeros_mask)
+ brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0));
+
+ if (ones_mask)
+ brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1));
+
+ if (src.Negate)
+ brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp));
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+
+
+/* Post-vertex-program processing. Send the results to the URB.
+ */
+static void emit_vertex_write( struct brw_vs_compile *c, struct brw_prog_info *info)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg m0 = brw_message_reg(0);
+ struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][info->pos_idx];
+ struct brw_reg ndc;
+
+ if (c->key.copy_edgeflag) {
+ brw_MOV(p,
+ get_reg(c, TGSI_FILE_OUTPUT, info->result_edge_idx),
+ get_reg(c, TGSI_FILE_INPUT, info->edge_flag_idx));
+ }
+
+
+ /* Build ndc coords? TODO: Shortcircuit when w is known to be one.
+ */
+ if (!c->key.know_w_is_one) {
+ ndc = get_tmp(c);
+ emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
+ brw_MUL(p, brw_writemask(ndc, TGSI_WRITEMASK_XYZ), pos, ndc);
+ }
+ else {
+ ndc = pos;
+ }
+
+ /* This includes the workaround for -ve rhw, so is no longer an
+ * optional step:
+ */
+ if (info->writes_psize ||
+ c->key.nr_userclip ||
+ !c->key.know_w_is_one)
+ {
+ struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ unsigned i;
+
+ brw_MOV(p, header1, brw_imm_ud(0));
+
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ if (info->writes_psize) {
+ struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][info->psize_idx];
+ brw_MUL(p, brw_writemask(header1, TGSI_WRITEMASK_W),
+ brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
+ brw_AND(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1,
+ brw_imm_ud(0x7ff<<8));
+ }
+
+
+ for (i = 0; i < c->key.nr_userclip; i++) {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
+ brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<i));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+
+
+ /* i965 clipping workaround:
+ * 1) Test for -ve rhw
+ * 2) If set,
+ * set ndc = (0,0,0,0)
+ * set ucp[6] = 1
+ *
+ * Later, clipping will detect ucp[6] and ensure the primitive is
+ * clipped against all fixed planes.
+ */
+ if (!c->key.know_w_is_one) {
+ brw_CMP(p,
+ vec8(brw_null_reg()),
+ BRW_CONDITIONAL_L,
+ brw_swizzle1(ndc, 3),
+ brw_imm_f(0));
+
+ brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<6));
+ brw_MOV(p, ndc, brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+
+ brw_set_access_mode(p, BRW_ALIGN_1); /* why? */
+ brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ release_tmp(c, header1);
+ }
+ else {
+ brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
+ }
+
+
+ /* Emit the (interleaved) headers for the two vertices - an 8-reg
+ * of zeros followed by two sets of NDC coordinates:
+ */
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, offset(m0, 2), ndc);
+ brw_MOV(p, offset(m0, 3), pos);
+
+
+ brw_urb_WRITE(p,
+ brw_null_reg(), /* dest */
+ 0, /* starting mrf reg nr */
+ c->r0, /* src */
+ 0, /* allocate */
+ 1, /* used */
+ c->nr_outputs + 3, /* msg len */
+ 0, /* response len */
+ 1, /* eot */
+ 1, /* writes complete */
+ 0, /* urb destination offset */
+ BRW_URB_SWIZZLE_INTERLEAVE);
+
+}
+
+static void
+post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst )
+{
+ struct tgsi_parse_context parse;
+ const struct tgsi_token *tokens = c->vp->program.tokens;
+ tgsi_parse_init(&parse, tokens);
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+ if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
+#if 0
+ struct brw_instruction *brw_inst1, *brw_inst2;
+ const struct tgsi_full_instruction *inst1, *inst2;
+ int offset;
+ inst1 = &parse.FullToken.FullInstruction;
+ brw_inst1 = inst1->Data;
+ switch (inst1->Opcode) {
+ case TGSI_OPCODE_CAL:
+ case TGSI_OPCODE_BRA:
+ target_insn = inst1->BranchTarget;
+ inst2 = &c->vp->program.Base.Instructions[target_insn];
+ brw_inst2 = inst2->Data;
+ offset = brw_inst2 - brw_inst1;
+ brw_set_src1(brw_inst1, brw_imm_d(offset*16));
+ break;
+ case TGSI_OPCODE_END:
+ offset = end_inst - brw_inst1;
+ brw_set_src1(brw_inst1, brw_imm_d(offset*16));
+ break;
+ default:
+ break;
+ }
+#endif
+ }
+ }
+ tgsi_parse_free(&parse);
+}
+
+static void process_declaration(const struct tgsi_full_declaration *decl,
+ struct brw_prog_info *info)
+{
+ int first = decl->DeclarationRange.First;
+ int last = decl->DeclarationRange.Last;
+
+ switch(decl->Declaration.File) {
+ case TGSI_FILE_CONSTANT:
+ info->num_consts += last - first + 1;
+ break;
+ case TGSI_FILE_INPUT: {
+ }
+ break;
+ case TGSI_FILE_OUTPUT: {
+ assert(last == first); /* for now */
+ if (decl->Declaration.Semantic) {
+ switch (decl->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION: {
+ info->pos_idx = first;
+ }
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ break;
+ case TGSI_SEMANTIC_FOG:
+ break;
+ case TGSI_SEMANTIC_PSIZE: {
+ info->writes_psize = TRUE;
+ info->psize_idx = first;
+ }
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ break;
+ }
+ }
+ }
+ break;
+ case TGSI_FILE_TEMPORARY: {
+ info->num_temps += (last - first) + 1;
+ }
+ break;
+ case TGSI_FILE_SAMPLER: {
+ }
+ break;
+ case TGSI_FILE_ADDRESS: {
+ info->num_addrs += (last - first) + 1;
+ }
+ break;
+ case TGSI_FILE_IMMEDIATE: {
+ }
+ break;
+ case TGSI_FILE_NULL: {
+ }
+ break;
+ }
+}
+
+static void process_instruction(struct brw_vs_compile *c,
+ struct tgsi_full_instruction *inst,
+ struct brw_prog_info *info)
+{
+ struct brw_reg args[3], dst;
+ struct brw_compile *p = &c->func;
+ /*struct brw_indirect stack_index = brw_indirect(0, 0);*/
+ unsigned i;
+ unsigned index;
+ unsigned file;
+ /*FIXME: might not be the only one*/
+ const struct tgsi_dst_register *dst_reg = &inst->FullDstRegisters[0].DstRegister;
+ /*
+ struct brw_instruction *if_inst[MAX_IFSN];
+ unsigned insn, if_insn = 0;
+ */
+
+ for (i = 0; i < 3; i++) {
+ struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+ index = src->SrcRegister.Index;
+ file = src->SrcRegister.File;
+ if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src)
+ args[i] = c->output_regs[index].reg;
+ else
+ args[i] = get_arg(c, &src->SrcRegister);
+ }
+
+ /* Get dest regs. Note that it is possible for a reg to be both
+ * dst and arg, given the static allocation of registers. So
+ * care needs to be taken emitting multi-operation instructions.
+ */
+ index = dst_reg->Index;
+ file = dst_reg->File;
+ if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src)
+ dst = c->output_regs[index].reg;
+ else
+ dst = get_dst(c, dst_reg);
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ brw_MOV(p, dst, brw_abs(args[0]));
+ break;
+ case TGSI_OPCODE_ADD:
+ brw_ADD(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_DP3:
+ brw_DP3(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_DP4:
+ brw_DP4(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_DPH:
+ brw_DPH(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_DST:
+ unalias2(c, dst, args[0], args[1], emit_dst_noalias);
+ break;
+ case TGSI_OPCODE_EXP:
+ unalias1(c, dst, args[0], emit_exp_noalias);
+ break;
+ case TGSI_OPCODE_EX2:
+ emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_ARL:
+ emit_arl(c, dst, args[0]);
+ break;
+ case TGSI_OPCODE_FLR:
+ brw_RNDD(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_FRC:
+ brw_FRC(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ unalias1(c, dst, args[0], emit_log_noalias);
+ break;
+ case TGSI_OPCODE_LG2:
+ emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_LIT:
+ unalias1(c, dst, args[0], emit_lit_noalias);
+ break;
+ case TGSI_OPCODE_MAD:
+ brw_MOV(p, brw_acc_reg(), args[2]);
+ brw_MAC(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MAX:
+ emit_max(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MIN:
+ emit_min(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_SWZ:
+#if 0
+ /* The args[0] value can't be used here as it won't have
+ * correctly encoded the full swizzle:
+ */
+ emit_swz(c, dst, inst->SrcReg[0] );
+#endif
+ brw_MOV(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_MUL:
+ brw_MUL(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_POW:
+ emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_RCP:
+ emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_RSQ:
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+
+ case TGSI_OPCODE_SEQ:
+ emit_seq(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SNE:
+ emit_sne(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGE:
+ emit_sge(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGT:
+ emit_sgt(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SLT:
+ emit_slt(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SLE:
+ emit_sle(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SUB:
+ brw_ADD(p, dst, args[0], negate(args[1]));
+ break;
+ case TGSI_OPCODE_XPD:
+ emit_xpd(p, dst, args[0], args[1]);
+ break;
+#if 0
+ case TGSI_OPCODE_IF:
+ assert(if_insn < MAX_IFSN);
+ if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
+ break;
+ case TGSI_OPCODE_ELSE:
+ if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
+ break;
+ case TGSI_OPCODE_ENDIF:
+ assert(if_insn > 0);
+ brw_ENDIF(p, if_inst[--if_insn]);
+ break;
+ case TGSI_OPCODE_BRA:
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ brw_set_predicate_control_flag_value(p, 0xff);
+ break;
+ case TGSI_OPCODE_CAL:
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_ADD(p, deref_1uw(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_ADD(p, get_addr_reg(stack_index),
+ get_addr_reg(stack_index), brw_imm_d(4));
+ inst->Data = &p->store[p->nr_insn];
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ break;
+#endif
+ case TGSI_OPCODE_RET:
+#if 0
+ brw_ADD(p, get_addr_reg(stack_index),
+ get_addr_reg(stack_index), brw_imm_d(-4));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, brw_ip_reg(), deref_1uw(stack_index, 0));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+#else
+ /*brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));*/
+#endif
+ break;
+ case TGSI_OPCODE_END:
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ break;
+ case TGSI_OPCODE_BGNSUB:
+ case TGSI_OPCODE_ENDSUB:
+ break;
+ default:
+ debug_printf("Unsupport opcode %d in vertex shader\n", inst->Instruction.Opcode);
+ break;
+ }
+
+ if (dst_reg->File == TGSI_FILE_OUTPUT
+ && dst_reg->Index != info->pos_idx
+ && c->output_regs[dst_reg->Index].used_in_src)
+ brw_MOV(p, get_dst(c, dst_reg), dst);
+
+ release_tmps(c);
+}
+
+/* Emit the fragment program instructions here.
+ */
+void brw_vs_emit(struct brw_vs_compile *c)
+{
+#define MAX_IFSN 32
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *end_inst;
+ struct tgsi_parse_context parse;
+ struct brw_indirect stack_index = brw_indirect(0, 0);
+ const struct tgsi_token *tokens = c->vp->program.tokens;
+ struct brw_prog_info prog_info;
+ unsigned allocated_registers = 0;
+ memset(&prog_info, 0, sizeof(struct brw_prog_info));
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ tgsi_parse_init(&parse, tokens);
+ /* Message registers can't be read, so copy the output into GRF register
+ if they are used in source registers */
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+ unsigned i;
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION: {
+ const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
+ for (i = 0; i < 3; ++i) {
+ const struct tgsi_src_register *src = &inst->FullSrcRegisters[i].SrcRegister;
+ unsigned index = src->Index;
+ unsigned file = src->File;
+ if (file == TGSI_FILE_OUTPUT)
+ c->output_regs[index].used_in_src = TRUE;
+ }
+ }
+ break;
+ default:
+ /* nothing */
+ break;
+ }
+ }
+ tgsi_parse_free(&parse);
+
+ tgsi_parse_init(&parse, tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION: {
+ struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
+ process_declaration(decl, &prog_info);
+ }
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE: {
+ struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate;
+ /*assert(imm->Immediate.Size == 4);*/
+ c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u.ImmediateFloat32[0].Float;
+ c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u.ImmediateFloat32[1].Float;
+ c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u.ImmediateFloat32[2].Float;
+ c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u.ImmediateFloat32[3].Float;
+ c->prog_data.num_imm++;
+ }
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION: {
+ struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
+ if (!allocated_registers) {
+ /* first instruction (declerations finished).
+ * now that we know what vars are being used allocate
+ * registers for them.*/
+ c->prog_data.num_consts = prog_info.num_consts;
+ c->prog_data.max_const = prog_info.num_consts + c->prog_data.num_imm;
+ brw_vs_alloc_regs(c, &prog_info);
+
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ allocated_registers = 1;
+ }
+ process_instruction(c, inst, &prog_info);
+ }
+ break;
+ }
+ }
+
+ end_inst = &p->store[p->nr_insn];
+ emit_vertex_write(c, &prog_info);
+ post_vs_emit(c, end_inst);
+ tgsi_parse_free(&parse);
+
+}
diff --git a/src/gallium/drivers/i965simple/brw_vs_state.c b/src/gallium/drivers/i965simple/brw_vs_state.c
new file mode 100644
index 0000000000..1eaff87892
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_vs_state.c
@@ -0,0 +1,103 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+static void upload_vs_unit( struct brw_context *brw )
+{
+ struct brw_vs_unit_state vs;
+
+ memset(&vs, 0, sizeof(vs));
+
+ /* CACHE_NEW_VS_PROG */
+ vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6;
+ vs.thread0.grf_reg_count = align(brw->vs.prog_data->total_grf, 16) / 16 - 1;
+ vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
+ vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
+ vs.thread3.dispatch_grf_start_reg = 1;
+
+
+ /* BRW_NEW_URB_FENCE */
+ vs.thread4.nr_urb_entries = brw->urb.nr_vs_entries;
+ vs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
+ vs.thread4.max_threads = MIN2(
+ MAX2(0, (brw->urb.nr_vs_entries - 6) / 2 - 1),
+ 15);
+
+
+
+ if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
+ vs.thread4.max_threads = 0;
+
+ /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
+ if (0 /*brw->attribs.Clip->ClipPlanesEnabled*/) {
+ /* Note that we read in the userclip planes as well, hence
+ * clip_start:
+ */
+ vs.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
+ }
+ else {
+ vs.thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2;
+ }
+
+ vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ vs.thread3.urb_entry_read_offset = 0;
+
+ /* No samplers for ARB_vp programs:
+ */
+ vs.vs5.sampler_count = 0;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ vs.thread4.stats_enable = 1;
+
+ /* Vertex program always enabled:
+ */
+ vs.vs6.vs_enable = 1;
+
+ brw->vs.state_gs_offset = brw_cache_data( &brw->cache[BRW_VS_UNIT], &vs );
+}
+
+
+const struct brw_tracked_state brw_vs_unit = {
+ .dirty = {
+ .brw = (BRW_NEW_CLIP |
+ BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_URB_FENCE),
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .update = upload_vs_unit
+};
diff --git a/src/gallium/drivers/i965simple/brw_winsys.h b/src/gallium/drivers/i965simple/brw_winsys.h
new file mode 100644
index 0000000000..ec1e400418
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_winsys.h
@@ -0,0 +1,209 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * This is the interface that i965simple requires any window system
+ * hosting it to implement. This is the only include file in i965simple
+ * which is public.
+ *
+ */
+
+#ifndef BRW_WINSYS_H
+#define BRW_WINSYS_H
+
+
+#include "pipe/p_defines.h"
+
+
+/* Pipe drivers are (meant to be!) independent of both GL and the
+ * window system. The window system provides a buffer manager and a
+ * set of additional hooks for things like command buffer submission,
+ * etc.
+ *
+ * There clearly has to be some agreement between the window system
+ * driver and the hardware driver about the format of command buffers,
+ * etc.
+ */
+
+struct pipe_buffer;
+struct pipe_fence_handle;
+struct pipe_winsys;
+struct pipe_screen;
+
+
+/* The pipe driver currently understands the following chipsets:
+ */
+#define PCI_CHIP_I965_G 0x29A2
+#define PCI_CHIP_I965_Q 0x2992
+#define PCI_CHIP_I965_G_1 0x2982
+#define PCI_CHIP_I965_GM 0x2A02
+#define PCI_CHIP_I965_GME 0x2A12
+
+
+/* These are the names of all the state caches managed by the driver.
+ *
+ * When data is uploaded to a buffer with buffer_subdata, we use the
+ * special version of that function below so that information about
+ * what type of data this is can be passed to the winsys backend.
+ * That in turn allows the correct flags to be set in the aub file
+ * dump to allow human-readable file dumps later on.
+ */
+
+enum brw_cache_id {
+ BRW_CC_VP,
+ BRW_CC_UNIT,
+ BRW_WM_PROG,
+ BRW_SAMPLER_DEFAULT_COLOR,
+ BRW_SAMPLER,
+ BRW_WM_UNIT,
+ BRW_SF_PROG,
+ BRW_SF_VP,
+ BRW_SF_UNIT,
+ BRW_VS_UNIT,
+ BRW_VS_PROG,
+ BRW_GS_UNIT,
+ BRW_GS_PROG,
+ BRW_CLIP_VP,
+ BRW_CLIP_UNIT,
+ BRW_CLIP_PROG,
+ BRW_SS_SURFACE,
+ BRW_SS_SURF_BIND,
+
+ BRW_MAX_CACHE
+};
+
+#define BRW_CONSTANT_BUFFER BRW_MAX_CACHE
+
+/**
+ * Additional winsys interface for i965simple.
+ *
+ * It is an over-simple batchbuffer mechanism. Will want to improve the
+ * performance of this, perhaps based on the cmdstream stuff. It
+ * would be pretty impossible to implement swz on top of this
+ * interface.
+ *
+ * Will also need additions/changes to implement static/dynamic
+ * indirect state.
+ */
+struct brw_winsys {
+
+ void (*destroy)(struct brw_winsys *);
+
+ /**
+ * Reserve space on batch buffer.
+ *
+ * Returns a null pointer if there is insufficient space in the batch buffer
+ * to hold the requested number of dwords and relocations.
+ *
+ * The number of dwords should also include the number of relocations.
+ */
+ unsigned *(*batch_start)(struct brw_winsys *sws,
+ unsigned dwords,
+ unsigned relocs);
+
+ void (*batch_dword)(struct brw_winsys *sws,
+ unsigned dword);
+
+ /**
+ * Emit a relocation to a buffer.
+ *
+ * Used not only when the buffer addresses are not pinned, but also to
+ * ensure refered buffers will not be destroyed until the current batch
+ * buffer execution is finished.
+ *
+ * The access flags is a combination of I915_BUFFER_ACCESS_WRITE and
+ * I915_BUFFER_ACCESS_READ macros.
+ */
+ void (*batch_reloc)(struct brw_winsys *sws,
+ struct pipe_buffer *buf,
+ unsigned access_flags,
+ unsigned delta);
+
+
+ /* Not used yet, but really want this:
+ */
+ void (*batch_end)( struct brw_winsys *sws );
+
+ /**
+ * Flush the batch buffer.
+ *
+ * Fence argument must point to NULL or to a previous fence, and the caller
+ * must call fence_reference when done with the fence.
+ */
+ void (*batch_flush)(struct brw_winsys *sws,
+ struct pipe_fence_handle **fence);
+
+
+ /* A version of buffer_subdata that includes information for the
+ * simulator:
+ */
+ void (*buffer_subdata_typed)(struct brw_winsys *sws,
+ struct pipe_buffer *buf,
+ unsigned long offset,
+ unsigned long size,
+ const void *data,
+ unsigned data_type);
+
+
+ /* A cheat so we don't have to think about relocations in a couple
+ * of places yet:
+ */
+ unsigned (*get_buffer_offset)( struct brw_winsys *sws,
+ struct pipe_buffer *buf,
+ unsigned flags );
+
+};
+
+#define BRW_BUFFER_ACCESS_WRITE 0x1
+#define BRW_BUFFER_ACCESS_READ 0x2
+
+#define BRW_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0)
+
+
+struct pipe_context *brw_create(struct pipe_screen *,
+ struct brw_winsys *,
+ unsigned pci_id);
+
+static inline boolean brw_batchbuffer_data(struct brw_winsys *winsys,
+ const void *data,
+ unsigned bytes)
+{
+ static const unsigned incr = sizeof(unsigned);
+ uint i;
+ const unsigned *udata = (const unsigned*)(data);
+ unsigned size = bytes/incr;
+
+ winsys->batch_start(winsys, size, 0);
+ for (i = 0; i < size; ++i) {
+ winsys->batch_dword(winsys, udata[i]);
+ }
+ winsys->batch_end(winsys);
+
+ return (i == size);
+}
+#endif
diff --git a/src/gallium/drivers/i965simple/brw_wm.c b/src/gallium/drivers/i965simple/brw_wm.c
new file mode 100644
index 0000000000..8de565b96c
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_wm.c
@@ -0,0 +1,209 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_util.h"
+#include "brw_wm.h"
+#include "brw_eu.h"
+#include "brw_state.h"
+#include "util/u_memory.h"
+
+
+
+static void do_wm_prog( struct brw_context *brw,
+ struct brw_fragment_program *fp,
+ struct brw_wm_prog_key *key)
+{
+ struct brw_wm_compile *c = CALLOC_STRUCT(brw_wm_compile);
+ const unsigned *program;
+ unsigned program_size;
+
+ c->key = *key;
+ c->fp = fp;
+
+ c->delta_xy[0] = brw_null_reg();
+ c->delta_xy[1] = brw_null_reg();
+ c->pixel_xy[0] = brw_null_reg();
+ c->pixel_xy[1] = brw_null_reg();
+ c->pixel_w = brw_null_reg();
+
+
+ debug_printf("XXXXXXXX FP\n");
+
+ brw_wm_glsl_emit(c);
+
+ /* get the program
+ */
+ program = brw_get_program(&c->func, &program_size);
+
+ /*
+ */
+ brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG],
+ &c->key,
+ sizeof(c->key),
+ program,
+ program_size,
+ &c->prog_data,
+ &brw->wm.prog_data );
+
+ FREE(c);
+}
+
+
+
+static void brw_wm_populate_key( struct brw_context *brw,
+ struct brw_wm_prog_key *key )
+{
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *)brw->attribs.FragmentProgram;
+ unsigned lookup = 0;
+ unsigned line_aa;
+
+ memset(key, 0, sizeof(*key));
+
+ /* Build the index for table lookup
+ */
+ /* BRW_NEW_DEPTH_STENCIL */
+ if (fp->info.uses_kill ||
+ brw->attribs.DepthStencil->alpha.enabled)
+ lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+ if (fp->info.writes_z)
+ lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+ if (brw->attribs.DepthStencil->depth.enabled)
+ lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+
+ if (brw->attribs.DepthStencil->depth.enabled &&
+ brw->attribs.DepthStencil->depth.writemask) /* ?? */
+ lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+
+ if (brw->attribs.DepthStencil->stencil[0].enabled) {
+ lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
+
+ if (brw->attribs.DepthStencil->stencil[0].write_mask ||
+ brw->attribs.DepthStencil->stencil[1].write_mask)
+ lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
+ }
+
+ /* XXX: when should this be disabled?
+ */
+ if (1)
+ lookup |= IZ_EARLY_DEPTH_TEST_BIT;
+
+
+ line_aa = AA_NEVER;
+
+ /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
+ if (brw->attribs.Raster->line_smooth) {
+ if (brw->reduced_primitive == PIPE_PRIM_LINES) {
+ line_aa = AA_ALWAYS;
+ }
+ else if (brw->reduced_primitive == PIPE_PRIM_TRIANGLES) {
+ if (brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_LINE) {
+ line_aa = AA_SOMETIMES;
+
+ if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE ||
+ (brw->attribs.Raster->cull_mode == PIPE_WINDING_CW))
+ line_aa = AA_ALWAYS;
+ }
+ else if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE) {
+ line_aa = AA_SOMETIMES;
+
+ if (brw->attribs.Raster->cull_mode == PIPE_WINDING_CCW)
+ line_aa = AA_ALWAYS;
+ }
+ }
+ }
+
+ brw_wm_lookup_iz(line_aa,
+ lookup,
+ key);
+
+
+#if 0
+ /* BRW_NEW_SAMPLER
+ *
+ * Not doing any of this at the moment:
+ */
+ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+ const struct pipe_sampler_state *unit = brw->attribs.Samplers[i];
+
+ if (unit) {
+
+ if (unit->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ key->shadowtex_mask |= 1<<i;
+ }
+ if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA)
+ key->yuvtex_mask |= 1<<i;
+ }
+ }
+#endif
+
+
+ /* Extra info:
+ */
+ key->program_string_id = fp->id;
+
+}
+
+
+static void brw_upload_wm_prog( struct brw_context *brw )
+{
+ struct brw_wm_prog_key key;
+ struct brw_fragment_program *fp = (struct brw_fragment_program *)
+ brw->attribs.FragmentProgram;
+
+ brw_wm_populate_key(brw, &key);
+
+ /* Make an early check for the key.
+ */
+ if (brw_search_cache(&brw->cache[BRW_WM_PROG],
+ &key, sizeof(key),
+ &brw->wm.prog_data,
+ &brw->wm.prog_gs_offset))
+ return;
+
+ do_wm_prog(brw, fp, &key);
+}
+
+
+const struct brw_tracked_state brw_wm_prog = {
+ .dirty = {
+ .brw = (BRW_NEW_FS |
+ BRW_NEW_REDUCED_PRIMITIVE),
+ .cache = 0
+ },
+ .update = brw_upload_wm_prog
+};
+
diff --git a/src/gallium/drivers/i965simple/brw_wm.h b/src/gallium/drivers/i965simple/brw_wm.h
new file mode 100644
index 0000000000..b29c4393f0
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_wm.h
@@ -0,0 +1,142 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_WM_H
+#define BRW_WM_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+/* A big lookup table is used to figure out which and how many
+ * additional regs will inserted before the main payload in the WM
+ * program execution. These mainly relate to depth and stencil
+ * processing and the early-depth-test optimization.
+ */
+#define IZ_PS_KILL_ALPHATEST_BIT 0x1
+#define IZ_PS_COMPUTES_DEPTH_BIT 0x2
+#define IZ_DEPTH_WRITE_ENABLE_BIT 0x4
+#define IZ_DEPTH_TEST_ENABLE_BIT 0x8
+#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10
+#define IZ_STENCIL_TEST_ENABLE_BIT 0x20
+#define IZ_EARLY_DEPTH_TEST_BIT 0x40
+#define IZ_BIT_MAX 0x80
+
+#define AA_NEVER 0
+#define AA_SOMETIMES 1
+#define AA_ALWAYS 2
+
+struct brw_wm_prog_key {
+ unsigned source_depth_reg:3;
+ unsigned aa_dest_stencil_reg:3;
+ unsigned dest_depth_reg:3;
+ unsigned nr_depth_regs:3;
+ unsigned shadowtex_mask:8;
+ unsigned computes_depth:1; /* could be derived from program string */
+ unsigned source_depth_to_render_target:1;
+ unsigned runtime_check_aads_emit:1;
+
+ unsigned yuvtex_mask:8;
+
+ unsigned program_string_id;
+};
+
+
+
+
+
+#define PROGRAM_INTERNAL_PARAM
+#define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */
+#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + PIPE_MAX_ATTRIBS + 3)
+#define BRW_WM_MAX_GRF 128 /* hardware limit */
+#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4)
+#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12)
+#define BRW_WM_MAX_PARAM 256
+#define BRW_WM_MAX_CONST 256
+#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS
+
+#define PAYLOAD_DEPTH (PIPE_MAX_ATTRIBS)
+
+#define MAX_IFSN 32
+#define MAX_LOOP_DEPTH 32
+
+struct brw_wm_compile {
+ struct brw_compile func;
+ struct brw_wm_prog_key key;
+ struct brw_wm_prog_data prog_data; /* result */
+
+ struct brw_fragment_program *fp;
+
+ unsigned grf_limit;
+ unsigned max_wm_grf;
+
+
+ struct brw_reg pixel_xy[2];
+ struct brw_reg delta_xy[2];
+ struct brw_reg pixel_w;
+
+
+ struct brw_reg wm_regs[8][32][4];
+
+ struct brw_reg payload_depth[4];
+ struct brw_reg payload_coef[16];
+
+ struct brw_reg emit_mask_reg;
+
+ struct brw_instruction *if_inst[MAX_IFSN];
+ int if_insn;
+
+ struct brw_instruction *loop_inst[MAX_LOOP_DEPTH];
+ int loop_insn;
+
+ struct brw_instruction *inst0;
+ struct brw_instruction *inst1;
+
+ struct brw_reg stack;
+ struct brw_indirect stack_index;
+
+ unsigned reg_index;
+
+ unsigned tmp_start;
+ unsigned tmp_index;
+};
+
+
+
+void brw_wm_lookup_iz( unsigned line_aa,
+ unsigned lookup,
+ struct brw_wm_prog_key *key );
+
+void brw_wm_glsl_emit(struct brw_wm_compile *c);
+void brw_wm_emit_decls(struct brw_wm_compile *c);
+
+#endif
diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c
new file mode 100644
index 0000000000..d50e66f613
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_wm_decl.c
@@ -0,0 +1,392 @@
+
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+
+static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
+{
+ c->tmp_index++;
+ c->reg_index = MAX2(c->reg_index, c->tmp_start + c->tmp_index);
+ return brw_vec8_grf(c->tmp_start + c->tmp_index, 0);
+}
+
+static void release_tmps(struct brw_wm_compile *c)
+{
+ c->tmp_index = 0;
+}
+
+
+
+static int is_null( struct brw_reg reg )
+{
+ return (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ reg.nr == BRW_ARF_NULL);
+}
+
+static void emit_pixel_xy( struct brw_wm_compile *c )
+{
+ if (is_null(c->pixel_xy[0])) {
+
+ struct brw_compile *p = &c->func;
+ struct brw_reg r1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
+
+ c->pixel_xy[0] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW));
+ c->pixel_xy[1] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW));
+
+ /* Calculate pixel centers by adding 1 or 0 to each of the
+ * micro-tile coordinates passed in r1.
+ */
+ brw_ADD(p,
+ c->pixel_xy[0],
+ stride(suboffset(r1_uw, 4), 2, 4, 0),
+ brw_imm_v(0x10101010));
+
+ brw_ADD(p,
+ c->pixel_xy[1],
+ stride(suboffset(r1_uw, 5), 2, 4, 0),
+ brw_imm_v(0x11001100));
+ }
+}
+
+
+
+
+
+
+static void emit_delta_xy( struct brw_wm_compile *c )
+{
+ if (is_null(c->delta_xy[0])) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+
+ emit_pixel_xy(c);
+
+ c->delta_xy[0] = alloc_tmp(c);
+ c->delta_xy[1] = alloc_tmp(c);
+
+ /* Calc delta X,Y by subtracting origin in r1 from the pixel
+ * centers.
+ */
+ brw_ADD(p,
+ c->delta_xy[0],
+ retype(c->pixel_xy[0], BRW_REGISTER_TYPE_UW),
+ negate(r1));
+
+ brw_ADD(p,
+ c->delta_xy[1],
+ retype(c->pixel_xy[1], BRW_REGISTER_TYPE_UW),
+ negate(suboffset(r1,1)));
+ }
+}
+
+
+
+#if 0
+static void emit_pixel_w( struct brw_wm_compile *c )
+{
+ if (is_null(c->pixel_w)) {
+ struct brw_compile *p = &c->func;
+
+ struct brw_reg interp_wpos = c->coef_wpos;
+
+ c->pixel_w = alloc_tmp(c);
+
+ emit_delta_xy(c);
+
+ /* Calc 1/w - just linterp wpos[3] optimized by putting the
+ * result straight into a message reg.
+ */
+ struct brw_reg interp3 = brw_vec1_grf(interp_wpos.nr+1, 4);
+ brw_LINE(p, brw_null_reg(), interp3, c->delta_xy[0]);
+ brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), c->delta_xy[1]);
+
+ /* Calc w */
+ brw_math_16( p,
+ c->pixel_w,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+ }
+}
+#endif
+
+
+static void emit_cinterp(struct brw_wm_compile *c,
+ int idx,
+ int mask )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg interp[4];
+ struct brw_reg coef = c->payload_coef[idx];
+ int i;
+
+ interp[0] = brw_vec1_grf(coef.nr, 0);
+ interp[1] = brw_vec1_grf(coef.nr, 4);
+ interp[2] = brw_vec1_grf(coef.nr+1, 0);
+ interp[3] = brw_vec1_grf(coef.nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i];
+ brw_MOV(p, dst, suboffset(interp[i],3));
+ }
+ }
+}
+
+static void emit_linterp(struct brw_wm_compile *c,
+ int idx,
+ int mask )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg interp[4];
+ struct brw_reg coef = c->payload_coef[idx];
+ int i;
+
+ emit_delta_xy(c);
+
+ interp[0] = brw_vec1_grf(coef.nr, 0);
+ interp[1] = brw_vec1_grf(coef.nr, 4);
+ interp[2] = brw_vec1_grf(coef.nr+1, 0);
+ interp[3] = brw_vec1_grf(coef.nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i];
+ brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]);
+ brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]);
+ }
+ }
+}
+
+#if 0
+static void emit_pinterp(struct brw_wm_compile *c,
+ int idx,
+ int mask )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg interp[4];
+ struct brw_reg coef = c->payload_coef[idx];
+ int i;
+
+ get_delta_xy(c);
+ get_pixel_w(c);
+
+ interp[0] = brw_vec1_grf(coef.nr, 0);
+ interp[1] = brw_vec1_grf(coef.nr, 4);
+ interp[2] = brw_vec1_grf(coef.nr+1, 0);
+ interp[3] = brw_vec1_grf(coef.nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ struct brw_reg dst = allocate_reg(c, TGSI_FILE_INPUT, idx, i);
+ brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]);
+ brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]);
+ brw_MUL(p, dst, dst, c->pixel_w);
+ }
+ }
+}
+#endif
+
+
+
+#if 0
+static void emit_wpos( )
+{
+ struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
+ struct tgsi_full_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
+ struct tgsi_full_src_register deltas = get_delta_xy(c);
+ struct tgsi_full_src_register arg2;
+ unsigned opcode;
+
+ opcode = WM_LINTERP;
+ arg2 = src_undef();
+
+ /* Have to treat wpos.xy specially:
+ */
+ emit_op(c,
+ WM_WPOSXY,
+ dst_mask(dst, WRITEMASK_XY),
+ 0, 0, 0,
+ get_pixel_xy(c),
+ src_undef(),
+ src_undef());
+
+ dst = dst_mask(dst, WRITEMASK_ZW);
+
+ /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
+ */
+ emit_op(c,
+ WM_LINTERP,
+ dst,
+ 0, 0, 0,
+ interp,
+ deltas,
+ arg2);
+}
+#endif
+
+
+
+
+/* Perform register allocation:
+ *
+ * -- r0???
+ * -- passthrough depth regs (and stencil/aa??)
+ * -- curbe ??
+ * -- inputs (coefficients)
+ *
+ * Use a totally static register allocation. This will perform poorly
+ * but is an easy way to get started (again).
+ */
+static void prealloc_reg(struct brw_wm_compile *c)
+{
+ int i, j;
+ int nr_curbe_regs = 0;
+
+ /* R0, then some depth related regs:
+ */
+ for (i = 0; i < c->key.nr_depth_regs; i++) {
+ c->payload_depth[i] = brw_vec8_grf(i*2, 0);
+ c->reg_index += 2;
+ }
+
+
+ /* Then a copy of our part of the CURBE entry:
+ */
+ {
+ int nr_constants = c->fp->info.file_max[TGSI_FILE_CONSTANT] + 1;
+ int index = 0;
+
+ /* XXX number of constants, or highest numbered constant? */
+ assert(nr_constants == c->fp->info.file_count[TGSI_FILE_CONSTANT]);
+
+ c->prog_data.max_const = 4*nr_constants;
+ for (i = 0; i < nr_constants; i++) {
+ for (j = 0; j < 4; j++, index++)
+ c->wm_regs[TGSI_FILE_CONSTANT][i][j] = brw_vec1_grf(c->reg_index + index/8,
+ index%8);
+ }
+
+ nr_curbe_regs = 2*((4*nr_constants+15)/16);
+ c->reg_index += nr_curbe_regs;
+ }
+
+ /* Adjust for parameter coefficients for position, which are
+ * currently always provided.
+ */
+// c->position_coef[i] = brw_vec8_grf(c->reg_index, 0);
+ c->reg_index += 2;
+
+ /* Next we receive the plane coefficients for parameter
+ * interpolation:
+ */
+ assert(c->fp->info.file_max[TGSI_FILE_INPUT] == c->fp->info.num_inputs);
+ for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
+ c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0);
+ c->reg_index += 2;
+ }
+
+ c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+ c->prog_data.urb_read_length = (c->fp->info.num_inputs + 1) * 2;
+ c->prog_data.curb_read_length = nr_curbe_regs;
+
+ /* That's the end of the payload, now we can start allocating registers.
+ */
+ c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
+ c->reg_index++;
+
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
+ c->reg_index += 2;
+
+ /* Now allocate room for the interpolated inputs and staging
+ * registers for the outputs:
+ */
+ /* XXX do we want to loop over the _number_ of inputs/outputs or loop
+ * to the highest input/output index that's used?
+ * Probably the same, actually.
+ */
+ assert(c->fp->info.file_max[TGSI_FILE_INPUT] + 1 == c->fp->info.num_inputs);
+ assert(c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1 == c->fp->info.num_outputs);
+ for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++)
+ for (j = 0; j < 4; j++)
+ c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 );
+
+ for (i = 0; i < c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1; i++)
+ for (j = 0; j < 4; j++)
+ c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 );
+
+ /* Beyond this we should only need registers for internal temporaries:
+ */
+ c->tmp_start = c->reg_index;
+}
+
+
+
+
+
+/* Need to interpolate fragment program inputs in as a preamble to the
+ * shader. A more sophisticated compiler would do this on demand, but
+ * we'll do it up front:
+ */
+void brw_wm_emit_decls(struct brw_wm_compile *c)
+{
+ struct tgsi_parse_context parse;
+ int done = 0;
+
+ prealloc_reg(c);
+
+ tgsi_parse_init( &parse, c->fp->program.tokens );
+
+ while( !done &&
+ !tgsi_parse_end_of_tokens( &parse ) )
+ {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
+ unsigned first = decl->DeclarationRange.First;
+ unsigned last = decl->DeclarationRange.Last;
+ unsigned mask = decl->Declaration.UsageMask; /* ? */
+ unsigned i;
+
+ if (decl->Declaration.File != TGSI_FILE_INPUT)
+ break;
+
+ for( i = first; i <= last; i++ ) {
+ switch (decl->Declaration.Interpolate) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ emit_cinterp(c, i, mask);
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ emit_linterp(c, i, mask);
+ break;
+
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ //emit_pinterp(c, i, mask);
+ emit_linterp(c, i, mask);
+ break;
+ }
+ }
+ break;
+ }
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ default:
+ done = 1;
+ break;
+ }
+ }
+
+ tgsi_parse_free (&parse);
+
+ release_tmps(c);
+}
diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c
new file mode 100644
index 0000000000..ab6410aa60
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c
@@ -0,0 +1,1076 @@
+
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+
+
+
+static int get_scalar_dst_index(struct tgsi_full_instruction *inst)
+{
+ struct tgsi_dst_register dst = inst->FullDstRegisters[0].DstRegister;
+ int i;
+ for (i = 0; i < 4; i++)
+ if (dst.WriteMask & (1<<i))
+ break;
+ return i;
+}
+
+static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
+{
+ c->tmp_index++;
+ c->reg_index = MAX2(c->reg_index, c->tmp_index);
+ return brw_vec8_grf(c->tmp_start + c->tmp_index, 0);
+}
+
+static void release_tmps(struct brw_wm_compile *c)
+{
+ c->tmp_index = 0;
+}
+
+
+static struct brw_reg
+get_reg(struct brw_wm_compile *c, int file, int index, int component )
+{
+ switch (file) {
+ case TGSI_FILE_NULL:
+ return brw_null_reg();
+
+ case TGSI_FILE_SAMPLER:
+ /* Should never get here:
+ */
+ assert (0);
+ return brw_null_reg();
+
+ case TGSI_FILE_IMMEDIATE:
+ /* These need a different path:
+ */
+ assert(0);
+ return brw_null_reg();
+
+
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_OUTPUT:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_ADDRESS:
+ return c->wm_regs[file][index][component];
+
+ default:
+ assert(0);
+ return brw_null_reg();
+ }
+}
+
+
+static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst,
+ int component)
+{
+ return get_reg(c,
+ inst->FullDstRegisters[0].DstRegister.File,
+ inst->FullDstRegisters[0].DstRegister.Index,
+ component);
+}
+
+static int get_swz( struct tgsi_src_register src, int index )
+{
+ switch (index & 3) {
+ case 0: return src.SwizzleX;
+ case 1: return src.SwizzleY;
+ case 2: return src.SwizzleZ;
+ case 3: return src.SwizzleW;
+ default: return 0;
+ }
+}
+
+static int get_ext_swz( struct tgsi_src_register_ext_swz src, int index )
+{
+ switch (index & 3) {
+ case 0: return src.ExtSwizzleX;
+ case 1: return src.ExtSwizzleY;
+ case 2: return src.ExtSwizzleZ;
+ case 3: return src.ExtSwizzleW;
+ default: return 0;
+ }
+}
+
+static struct brw_reg get_src_reg(struct brw_wm_compile *c,
+ struct tgsi_full_src_register *src,
+ int index)
+{
+ struct brw_reg reg;
+ int component = index;
+ int neg = 0;
+ int abs = 0;
+
+ if (src->SrcRegister.Negate)
+ neg = 1;
+
+ component = get_swz(src->SrcRegister, component);
+
+ /* Yes, there are multiple negates:
+ */
+ switch (component & 3) {
+ case 0: neg ^= src->SrcRegisterExtSwz.NegateX; break;
+ case 1: neg ^= src->SrcRegisterExtSwz.NegateY; break;
+ case 2: neg ^= src->SrcRegisterExtSwz.NegateZ; break;
+ case 3: neg ^= src->SrcRegisterExtSwz.NegateW; break;
+ }
+
+ /* And multiple swizzles, fun isn't it:
+ */
+ component = get_ext_swz(src->SrcRegisterExtSwz, component);
+
+ /* Not handling indirect lookups yet:
+ */
+ assert(src->SrcRegister.Indirect == 0);
+
+ /* Don't know what dimension means:
+ */
+ assert(src->SrcRegister.Dimension == 0);
+
+ /* Will never handle any of this stuff:
+ */
+ assert(src->SrcRegisterExtMod.Complement == 0);
+ assert(src->SrcRegisterExtMod.Bias == 0);
+ assert(src->SrcRegisterExtMod.Scale2X == 0);
+
+ if (src->SrcRegisterExtMod.Absolute)
+ abs = 1;
+
+ /* Another negate! This is a post-absolute negate, which we
+ * can't do. Need to clean the crap out of tgsi somehow.
+ */
+ assert(src->SrcRegisterExtMod.Negate == 0);
+
+ switch( component ) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ reg = get_reg(c,
+ src->SrcRegister.File,
+ src->SrcRegister.Index,
+ component );
+
+ if (neg)
+ reg = negate(reg);
+
+ if (abs)
+ reg = brw_abs(reg);
+
+ break;
+
+ /* XXX: this won't really work in the general case, but we know
+ * that the extended swizzle is only allowed in the SWZ
+ * instruction (right??), in which case using an immediate
+ * directly will work.
+ */
+ case TGSI_EXTSWIZZLE_ZERO:
+ reg = brw_imm_f(0);
+ break;
+
+ case TGSI_EXTSWIZZLE_ONE:
+ if (neg && !abs)
+ reg = brw_imm_f(-1.0);
+ else
+ reg = brw_imm_f(1.0);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+
+ return reg;
+}
+
+static void emit_abs( struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
+{
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+
+ int i;
+ struct brw_compile *p = &c->func;
+ brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ struct brw_reg src, dst;
+ dst = get_dst_reg(c, inst, i);
+ src = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ brw_MOV(p, dst, brw_abs(src)); /* NOTE */
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_xpd(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
+{
+ int i;
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ for (i = 0; i < 4; i++) {
+ unsigned i2 = (i+2)%3;
+ unsigned i1 = (i+1)%3;
+ if (mask & (1<<i)) {
+ struct brw_reg src0, src1, dst;
+ dst = get_dst_reg(c, inst, i);
+ src0 = negate(get_src_reg(c, &inst->FullSrcRegisters[0], i2));
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i1);
+ brw_MUL(p, brw_null_reg(), src0, src1);
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i1);
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i2);
+ brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE);
+ brw_MAC(p, dst, src0, src1);
+ brw_set_saturate(p, 0);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_dp3(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
+{
+ struct brw_reg src0[3], src1[3], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ for (i = 0; i < 3; i++) {
+ src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+ }
+
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_MAC(p, dst, src0[2], src1[2]);
+ brw_set_saturate(p, 0);
+}
+
+static void emit_dp4(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
+{
+ struct brw_reg src0[4], src1[4], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ for (i = 0; i < 4; i++) {
+ src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+ }
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_MAC(p, dst, src0[3], src1[3]);
+ brw_set_saturate(p, 0);
+}
+
+static void emit_dph(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
+{
+ struct brw_reg src0[4], src1[4], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ for (i = 0; i < 4; i++) {
+ src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+ }
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_MAC(p, dst, src0[2], src1[2]);
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_ADD(p, dst, src0[3], src1[3]);
+ brw_set_saturate(p, 0);
+}
+
+static void emit_math1(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst, unsigned func)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, dst;
+
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0);
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ brw_MOV(p, brw_message_reg(2), src0);
+ brw_math(p,
+ dst,
+ func,
+ ((inst->Instruction.Saturate != TGSI_SAT_NONE)
+ ? BRW_MATH_SATURATE_SATURATE
+ : BRW_MATH_SATURATE_NONE),
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+}
+
+
+static void emit_alu2(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst,
+ unsigned opcode)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, dst;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+ brw_alu2(p, opcode, dst, src0, src1);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_alu1(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst,
+ unsigned opcode)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, dst;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ brw_alu1(p, opcode, dst, src0);
+ }
+ }
+ if (inst->Instruction.Saturate != TGSI_SAT_NONE)
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_max(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ struct brw_reg src0, src1, dst;
+ int i;
+ brw_push_insn_state(p);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_MOV(p, dst, src0);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1);
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_MOV(p, dst, src1);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+ brw_pop_insn_state(p);
+}
+
+static void emit_min(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ struct brw_reg src0, src1, dst;
+ int i;
+ brw_push_insn_state(p);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_MOV(p, dst, src0);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_MOV(p, dst, src1);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+ brw_pop_insn_state(p);
+}
+
+static void emit_pow(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg dst, src0, src1;
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0);
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], 0);
+
+ brw_MOV(p, brw_message_reg(2), src0);
+ brw_MOV(p, brw_message_reg(3), src1);
+
+ brw_math(p,
+ dst,
+ BRW_MATH_FUNCTION_POW,
+ (inst->Instruction.Saturate != TGSI_SAT_NONE
+ ? BRW_MATH_SATURATE_SATURATE
+ : BRW_MATH_SATURATE_NONE),
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+}
+
+static void emit_lrp(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
+ int i;
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+
+ if (src1.nr == dst.nr) {
+ tmp1 = alloc_tmp(c);
+ brw_MOV(p, tmp1, src1);
+ } else
+ tmp1 = src1;
+
+ src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i);
+ if (src2.nr == dst.nr) {
+ tmp2 = alloc_tmp(c);
+ brw_MOV(p, tmp2, src2);
+ } else
+ tmp2 = src2;
+
+ brw_ADD(p, dst, negate(src0), brw_imm_f(1.0));
+ brw_MUL(p, brw_null_reg(), dst, tmp2);
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_MAC(p, dst, src0, tmp1);
+ brw_set_saturate(p, 0);
+ }
+ release_tmps(c);
+ }
+}
+
+static void emit_kil(struct brw_wm_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+ brw_AND(p, depth, c->emit_mask_reg, depth);
+ brw_pop_insn_state(p);
+}
+
+static void emit_mad(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ struct brw_reg dst, src0, src1, src2;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+ src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i);
+ brw_MUL(p, dst, src0, src1);
+
+ brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+ brw_ADD(p, dst, dst, src2);
+ brw_set_saturate(p, 0);
+ }
+ }
+}
+
+static void emit_sop(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst, unsigned cond)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ struct brw_reg dst, src0, src1;
+ int i;
+
+ brw_push_insn_state(p);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+ src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+ brw_CMP(p, brw_null_reg(), cond, src0, src1);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_MOV(p, dst, brw_imm_f(0.0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_MOV(p, dst, brw_imm_f(1.0));
+ }
+ }
+ brw_pop_insn_state(p);
+}
+
+
+static void emit_ddx(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ struct brw_reg interp[4];
+ struct brw_reg dst;
+ struct brw_reg src0, w;
+ unsigned nr, i;
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0);
+ w = get_src_reg(c, &inst->FullSrcRegisters[1], 3);
+ nr = src0.nr;
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+ brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE);
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV(p, dst, interp[i]);
+ brw_MUL(p, dst, dst, w);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_ddy(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ struct brw_reg interp[4];
+ struct brw_reg dst;
+ struct brw_reg src0, w;
+ unsigned nr, i;
+
+ src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0);
+ nr = src0.nr;
+ w = get_src_reg(c, &inst->FullSrcRegisters[1], 3);
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+ brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE);
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV(p, dst, suboffset(interp[i], 1));
+ brw_MUL(p, dst, dst, w);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+/* TODO
+ BIAS on SIMD8 not workind yet...
+*/
+static void emit_txb(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
+{
+#if 0
+ struct brw_compile *p = &c->func;
+ struct brw_reg payload_reg = c->payload_depth[0];
+ struct brw_reg dst[4], src[4];
+ unsigned i;
+ for (i = 0; i < 4; i++)
+ dst[i] = get_dst_reg(c, inst, i);
+ for (i = 0; i < 4; i++)
+ src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+
+#if 0
+ switch (inst->TexSrcTarget) {
+ case TEXTURE_1D_INDEX:
+ brw_MOV(p, brw_message_reg(2), src[0]);
+ brw_MOV(p, brw_message_reg(3), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+ break;
+ case TEXTURE_2D_INDEX:
+ case TEXTURE_RECT_INDEX:
+ brw_MOV(p, brw_message_reg(2), src[0]);
+ brw_MOV(p, brw_message_reg(3), src[1]);
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+ break;
+ default:
+ brw_MOV(p, brw_message_reg(2), src[0]);
+ brw_MOV(p, brw_message_reg(3), src[1]);
+ brw_MOV(p, brw_message_reg(4), src[2]);
+ break;
+ }
+#else
+ brw_MOV(p, brw_message_reg(2), src[0]);
+ brw_MOV(p, brw_message_reg(3), src[1]);
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+#endif
+
+ brw_MOV(p, brw_message_reg(5), src[3]);
+ brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ brw_SAMPLE(p,
+ retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
+ 1,
+ retype(payload_reg, BRW_REGISTER_TYPE_UW),
+ inst->TexSrcUnit + 1, /* surface */
+ inst->TexSrcUnit, /* sampler */
+ inst->FullDstRegisters[0].DstRegister.WriteMask,
+ BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
+ 4,
+ 4,
+ 0);
+#endif
+}
+
+static void emit_tex(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
+{
+#if 0
+ struct brw_compile *p = &c->func;
+ struct brw_reg payload_reg = c->payload_depth[0];
+ struct brw_reg dst[4], src[4];
+ unsigned msg_len;
+ unsigned i, nr;
+ unsigned emit;
+ boolean shadow = (c->key.shadowtex_mask & (1<<inst->TexSrcUnit)) ? 1 : 0;
+
+ for (i = 0; i < 4; i++)
+ dst[i] = get_dst_reg(c, inst, i);
+ for (i = 0; i < 4; i++)
+ src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+
+#if 0
+ switch (inst->TexSrcTarget) {
+ case TEXTURE_1D_INDEX:
+ emit = WRITEMASK_X;
+ nr = 1;
+ break;
+ case TEXTURE_2D_INDEX:
+ case TEXTURE_RECT_INDEX:
+ emit = WRITEMASK_XY;
+ nr = 2;
+ break;
+ default:
+ emit = WRITEMASK_XYZ;
+ nr = 3;
+ break;
+ }
+#else
+ emit = WRITEMASK_XY;
+ nr = 2;
+#endif
+
+ msg_len = 1;
+
+ for (i = 0; i < nr; i++) {
+ static const unsigned swz[4] = {0,1,2,2};
+ if (emit & (1<<i))
+ brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
+ else
+ brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
+ msg_len += 1;
+ }
+
+ if (shadow) {
+ brw_MOV(p, brw_message_reg(5), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(6), src[2]);
+ }
+
+ brw_SAMPLE(p,
+ retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
+ 1,
+ retype(payload_reg, BRW_REGISTER_TYPE_UW),
+ inst->TexSrcUnit + 1, /* surface */
+ inst->TexSrcUnit, /* sampler */
+ inst->FullDstRegisters[0].DstRegister.WriteMask,
+ BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE,
+ 4,
+ shadow ? 6 : 4,
+ 0);
+
+ if (shadow)
+ brw_MOV(p, dst[3], brw_imm_f(1.0));
+#endif
+}
+
+
+
+
+
+
+
+
+static void emit_fb_write(struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ int nr = 2;
+ int channel;
+ int base_reg = 0;
+
+ // src0 = output color
+ // src1 = payload_depth[0]
+ // src2 = output depth
+ // dst = ???
+
+
+
+ /* Reserve a space for AA - may not be needed:
+ */
+ if (c->key.aa_dest_stencil_reg)
+ nr += 1;
+
+ {
+ brw_push_insn_state(p);
+ for (channel = 0; channel < 4; channel++) {
+ struct brw_reg src0 = c->wm_regs[TGSI_FILE_OUTPUT][0][channel];
+
+ /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
+ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
+ brw_MOV(p, brw_message_reg(nr + channel), src0);
+ }
+ /* skip over the regs populated above: */
+ nr += 8;
+ brw_pop_insn_state(p);
+ }
+
+
+ /* Pass through control information:
+ */
+ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
+ {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
+ brw_MOV(p,
+ brw_message_reg(base_reg + 1),
+ brw_vec8_grf(1, 0));
+ brw_pop_insn_state(p);
+ }
+
+ /* Send framebuffer write message: */
+ brw_fb_WRITE(p,
+ retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+ base_reg,
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+ 0, /* render surface always 0 */
+ nr,
+ 0,
+ 1);
+
+}
+
+
+static void brw_wm_emit_instruction( struct brw_wm_compile *c,
+ struct tgsi_full_instruction *inst )
+{
+ struct brw_compile *p = &c->func;
+
+#if 0
+ if (inst->CondUpdate)
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ else
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+#else
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+#endif
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ emit_abs(c, inst);
+ break;
+ case TGSI_OPCODE_ADD:
+ emit_alu2(c, inst, BRW_OPCODE_ADD);
+ break;
+ case TGSI_OPCODE_SUB:
+ assert(0);
+// emit_alu2(c, inst, BRW_OPCODE_SUB);
+ break;
+ case TGSI_OPCODE_FRC:
+ emit_alu1(c, inst, BRW_OPCODE_FRC);
+ break;
+ case TGSI_OPCODE_FLR:
+ assert(0);
+// emit_alu1(c, inst, BRW_OPCODE_FLR);
+ break;
+ case TGSI_OPCODE_LRP:
+ emit_lrp(c, inst);
+ break;
+ case TGSI_OPCODE_INT:
+ emit_alu1(c, inst, BRW_OPCODE_RNDD);
+ break;
+ case TGSI_OPCODE_MOV:
+ emit_alu1(c, inst, BRW_OPCODE_MOV);
+ break;
+ case TGSI_OPCODE_DP3:
+ emit_dp3(c, inst);
+ break;
+ case TGSI_OPCODE_DP4:
+ emit_dp4(c, inst);
+ break;
+ case TGSI_OPCODE_XPD:
+ emit_xpd(c, inst);
+ break;
+ case TGSI_OPCODE_DPH:
+ emit_dph(c, inst);
+ break;
+ case TGSI_OPCODE_RCP:
+ emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
+ break;
+ case TGSI_OPCODE_RSQ:
+ emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
+ break;
+ case TGSI_OPCODE_SIN:
+ emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
+ break;
+ case TGSI_OPCODE_COS:
+ emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
+ break;
+ case TGSI_OPCODE_EX2:
+ emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
+ break;
+ case TGSI_OPCODE_LG2:
+ emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
+ break;
+ case TGSI_OPCODE_MAX:
+ emit_max(c, inst);
+ break;
+ case TGSI_OPCODE_MIN:
+ emit_min(c, inst);
+ break;
+ case TGSI_OPCODE_DDX:
+ emit_ddx(c, inst);
+ break;
+ case TGSI_OPCODE_DDY:
+ emit_ddy(c, inst);
+ break;
+ case TGSI_OPCODE_SLT:
+ emit_sop(c, inst, BRW_CONDITIONAL_L);
+ break;
+ case TGSI_OPCODE_SLE:
+ emit_sop(c, inst, BRW_CONDITIONAL_LE);
+ break;
+ case TGSI_OPCODE_SGT:
+ emit_sop(c, inst, BRW_CONDITIONAL_G);
+ break;
+ case TGSI_OPCODE_SGE:
+ emit_sop(c, inst, BRW_CONDITIONAL_GE);
+ break;
+ case TGSI_OPCODE_SEQ:
+ emit_sop(c, inst, BRW_CONDITIONAL_EQ);
+ break;
+ case TGSI_OPCODE_SNE:
+ emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
+ break;
+ case TGSI_OPCODE_MUL:
+ emit_alu2(c, inst, BRW_OPCODE_MUL);
+ break;
+ case TGSI_OPCODE_POW:
+ emit_pow(c, inst);
+ break;
+ case TGSI_OPCODE_MAD:
+ emit_mad(c, inst);
+ break;
+ case TGSI_OPCODE_TEX:
+ emit_tex(c, inst);
+ break;
+ case TGSI_OPCODE_TXB:
+ emit_txb(c, inst);
+ break;
+ case TGSI_OPCODE_TEXKILL:
+ emit_kil(c);
+ break;
+ case TGSI_OPCODE_IF:
+ assert(c->if_insn < MAX_IFSN);
+ c->if_inst[c->if_insn++] = brw_IF(p, BRW_EXECUTE_8);
+ break;
+ case TGSI_OPCODE_ELSE:
+ c->if_inst[c->if_insn-1] = brw_ELSE(p, c->if_inst[c->if_insn-1]);
+ break;
+ case TGSI_OPCODE_ENDIF:
+ assert(c->if_insn > 0);
+ brw_ENDIF(p, c->if_inst[--c->if_insn]);
+ break;
+ case TGSI_OPCODE_BGNSUB:
+ case TGSI_OPCODE_ENDSUB:
+ break;
+ case TGSI_OPCODE_CAL:
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_ADD(p, deref_1ud(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_ADD(p,
+ get_addr_reg(c->stack_index),
+ get_addr_reg(c->stack_index), brw_imm_d(4));
+// orig_inst = inst->Data;
+// orig_inst->Data = &p->store[p->nr_insn];
+ assert(0);
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ brw_pop_insn_state(p);
+ break;
+
+ case TGSI_OPCODE_RET:
+#if 0
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_ADD(p,
+ get_addr_reg(c->stack_index),
+ get_addr_reg(c->stack_index), brw_imm_d(-4));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, brw_ip_reg(), deref_1ud(c->stack_index, 0));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_pop_insn_state(p);
+#else
+ emit_fb_write(c, inst);
+#endif
+
+ break;
+ case TGSI_OPCODE_LOOP:
+ c->loop_inst[c->loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
+ break;
+ case TGSI_OPCODE_BRK:
+ brw_BREAK(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case TGSI_OPCODE_CONT:
+ brw_CONT(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case TGSI_OPCODE_ENDLOOP:
+ c->loop_insn--;
+ c->inst0 = c->inst1 = brw_WHILE(p, c->loop_inst[c->loop_insn]);
+ /* patch all the BREAK instructions from
+ last BEGINLOOP */
+ while (c->inst0 > c->loop_inst[c->loop_insn]) {
+ c->inst0--;
+ if (c->inst0->header.opcode == BRW_OPCODE_BREAK) {
+ c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0 + 1;
+ c->inst0->bits3.if_else.pop_count = 0;
+ } else if (c->inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0;
+ c->inst0->bits3.if_else.pop_count = 0;
+ }
+ }
+ break;
+ case TGSI_OPCODE_END:
+ emit_fb_write(c, inst);
+ break;
+
+ default:
+ debug_printf("unsupported IR in fragment shader %d\n",
+ inst->Instruction.Opcode);
+ }
+#if 0
+ if (inst->CondUpdate)
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ else
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+#endif
+}
+
+
+
+
+
+
+void brw_wm_glsl_emit(struct brw_wm_compile *c)
+{
+ struct tgsi_parse_context parse;
+ struct brw_compile *p = &c->func;
+
+ brw_init_compile(&c->func);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ c->reg_index = 0;
+ c->if_insn = 0;
+ c->loop_insn = 0;
+ c->stack_index = brw_indirect(0,0);
+
+ /* Do static register allocation and parameter interpolation:
+ */
+ brw_wm_emit_decls( c );
+
+ /* Emit the actual program. All done with very direct translation,
+ * hopefully we can improve on this shortly...
+ */
+ brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack));
+
+ tgsi_parse_init( &parse, c->fp->program.tokens );
+
+ while( !tgsi_parse_end_of_tokens( &parse ) )
+ {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ /* already done */
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ /* not handled yet */
+ assert(0);
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ brw_wm_emit_instruction(c, &parse.FullToken.FullInstruction);
+ break;
+
+ default:
+ assert( 0 );
+ }
+ }
+
+ tgsi_parse_free (&parse);
+
+ /* Fix up call targets:
+ */
+#if 0
+ {
+ unsigned nr_insns = c->fp->program.Base.NumInstructions;
+ unsigned insn, target_insn;
+ struct tgsi_full_instruction *inst1, *inst2;
+ struct brw_instruction *brw_inst1, *brw_inst2;
+ int offset;
+ for (insn = 0; insn < nr_insns; insn++) {
+ inst1 = &c->fp->program.Base.Instructions[insn];
+ brw_inst1 = inst1->Data;
+ switch (inst1->Opcode) {
+ case TGSI_OPCODE_CAL:
+ target_insn = inst1->BranchTarget;
+ inst2 = &c->fp->program.Base.Instructions[target_insn];
+ brw_inst2 = inst2->Data;
+ offset = brw_inst2 - brw_inst1;
+ brw_set_src1(brw_inst1, brw_imm_d(offset*16));
+ break;
+ default:
+ break;
+ }
+ }
+ }
+#endif
+
+ c->prog_data.total_grf = c->reg_index;
+ c->prog_data.total_scratch = 0;
+}
diff --git a/src/gallium/drivers/i965simple/brw_wm_iz.c b/src/gallium/drivers/i965simple/brw_wm_iz.c
new file mode 100644
index 0000000000..6c5f25bf39
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_wm_iz.c
@@ -0,0 +1,214 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_wm.h"
+
+
+#undef P /* prompted depth */
+#undef C /* computed */
+#undef N /* non-promoted? */
+
+#define P 0
+#define C 1
+#define N 2
+
+const struct {
+ unsigned mode:2;
+ unsigned sd_present:1;
+ unsigned sd_to_rt:1;
+ unsigned dd_present:1;
+ unsigned ds_present:1;
+} wm_iz_table[IZ_BIT_MAX] =
+{
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 0, 0 },
+ { C, 0, 1, 0, 0 },
+ { C, 1, 1, 0, 0 },
+ { C, 1, 1, 0, 0 },
+ { C, 0, 1, 0, 0 },
+ { C, 0, 1, 0, 0 },
+ { C, 1, 1, 1, 0 },
+ { C, 1, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 1, 1, 1, 0 },
+ { C, 1, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 0, 0 },
+ { C, 0, 1, 0, 0 },
+ { C, 1, 1, 0, 0 },
+ { C, 1, 1, 0, 0 },
+ { C, 0, 1, 0, 0 },
+ { C, 0, 1, 0, 0 },
+ { C, 1, 1, 1, 0 },
+ { C, 1, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 1, 1, 1, 0 },
+ { C, 1, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 0, 0, 1 },
+ { C, 0, 0, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 1, 1, 0, 1 },
+ { C, 1, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 1, 1, 1, 1 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 1, 1, 1, 1 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 0, 0, 1 },
+ { C, 0, 0, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 1, 1, 0, 1 },
+ { C, 1, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 1, 1, 1, 1 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 1, 1, 1, 1 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 1 },
+ { N, 0, 1, 0, 1 },
+ { N, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 0, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 }
+};
+
+void brw_wm_lookup_iz( unsigned line_aa,
+ unsigned lookup,
+ struct brw_wm_prog_key *key )
+{
+ unsigned reg = 2;
+
+ assert (lookup < IZ_BIT_MAX);
+
+ if (lookup & IZ_PS_COMPUTES_DEPTH_BIT)
+ key->computes_depth = 1;
+
+ if (wm_iz_table[lookup].sd_present) {
+ key->source_depth_reg = reg;
+ reg += 2;
+ }
+
+ if (wm_iz_table[lookup].sd_to_rt)
+ key->source_depth_to_render_target = 1;
+
+ if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) {
+ key->aa_dest_stencil_reg = reg;
+ key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
+ line_aa == AA_SOMETIMES);
+ reg++;
+ }
+
+ if (wm_iz_table[lookup].dd_present) {
+ key->dest_depth_reg = reg;
+ reg+=2;
+ }
+
+ key->nr_depth_regs = (reg+1)/2;
+}
+
diff --git a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c
new file mode 100644
index 0000000000..52b2909a65
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c
@@ -0,0 +1,275 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+
+#define COMPAREFUNC_ALWAYS 0
+#define COMPAREFUNC_NEVER 0x1
+#define COMPAREFUNC_LESS 0x2
+#define COMPAREFUNC_EQUAL 0x3
+#define COMPAREFUNC_LEQUAL 0x4
+#define COMPAREFUNC_GREATER 0x5
+#define COMPAREFUNC_NOTEQUAL 0x6
+#define COMPAREFUNC_GEQUAL 0x7
+
+/* Samplers aren't strictly wm state from the hardware's perspective,
+ * but that is the only situation in which we use them in this driver.
+ */
+
+static int intel_translate_shadow_compare_func(unsigned func)
+{
+ switch(func) {
+ case PIPE_FUNC_NEVER:
+ return COMPAREFUNC_ALWAYS;
+ case PIPE_FUNC_LESS:
+ return COMPAREFUNC_LEQUAL;
+ case PIPE_FUNC_LEQUAL:
+ return COMPAREFUNC_LESS;
+ case PIPE_FUNC_GREATER:
+ return COMPAREFUNC_GEQUAL;
+ case PIPE_FUNC_GEQUAL:
+ return COMPAREFUNC_GREATER;
+ case PIPE_FUNC_NOTEQUAL:
+ return COMPAREFUNC_EQUAL;
+ case PIPE_FUNC_EQUAL:
+ return COMPAREFUNC_NOTEQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return COMPAREFUNC_NEVER;
+ }
+
+ debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func);
+ return COMPAREFUNC_NEVER;
+}
+
+/* The brw (and related graphics cores) do not support GL_CLAMP. The
+ * Intel drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ */
+static unsigned translate_wrap_mode( int wrap )
+{
+ switch( wrap ) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return BRW_TEXCOORDMODE_WRAP;
+ case PIPE_TEX_WRAP_CLAMP:
+ return BRW_TEXCOORDMODE_CLAMP;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return BRW_TEXCOORDMODE_CLAMP_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return BRW_TEXCOORDMODE_MIRROR;
+ default:
+ return BRW_TEXCOORDMODE_WRAP;
+ }
+}
+
+
+static unsigned U_FIXED(float value, unsigned frac_bits)
+{
+ value *= (1<<frac_bits);
+ return value < 0 ? 0 : value;
+}
+
+static int S_FIXED(float value, unsigned frac_bits)
+{
+ return value * (1<<frac_bits);
+}
+
+
+static unsigned upload_default_color( struct brw_context *brw,
+ const float *color )
+{
+ struct brw_sampler_default_color sdc;
+
+ COPY_4V(sdc.color, color);
+
+ return brw_cache_data( &brw->cache[BRW_SAMPLER_DEFAULT_COLOR], &sdc );
+}
+
+
+/*
+ */
+static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_sampler,
+ unsigned sdc_gs_offset,
+ struct brw_sampler_state *sampler)
+{
+ memset(sampler, 0, sizeof(*sampler));
+
+ switch (pipe_sampler->min_mip_filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+ break;
+ case PIPE_TEX_FILTER_LINEAR:
+ sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_ANISO:
+ sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC;
+ break;
+ default:
+ break;
+ }
+
+ switch (pipe_sampler->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+ break;
+ default:
+ break;
+ }
+ /* Set Anisotropy:
+ */
+ switch (pipe_sampler->mag_img_filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+ break;
+ case PIPE_TEX_FILTER_LINEAR:
+ sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_ANISO:
+ sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+ break;
+ default:
+ break;
+ }
+
+ if (pipe_sampler->max_anisotropy > 2.0) {
+ sampler->ss3.max_aniso = MAX2((pipe_sampler->max_anisotropy - 2) / 2,
+ BRW_ANISORATIO_16);
+ }
+
+ sampler->ss1.s_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_s);
+ sampler->ss1.r_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_r);
+ sampler->ss1.t_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_t);
+
+ /* Fulsim complains if I don't do this. Hardware doesn't mind:
+ */
+#if 0
+ if (texObj->Target == GL_TEXTURE_CUBE_MAP_ARB) {
+ sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ }
+#endif
+
+ /* Set shadow function:
+ */
+ if (pipe_sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ /* Shadowing is "enabled" by emitting a particular sampler
+ * message (sample_c). So need to recompile WM program when
+ * shadow comparison is enabled on each/any texture unit.
+ */
+ sampler->ss0.shadow_function = intel_translate_shadow_compare_func(pipe_sampler->compare_func);
+ }
+
+ /* Set LOD bias:
+ */
+ sampler->ss0.lod_bias = S_FIXED(CLAMP(pipe_sampler->lod_bias, -16, 15), 6);
+
+ sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+ sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+
+ /* Set BaseMipLevel, MaxLOD, MinLOD:
+ *
+ * XXX: I don't think that using firstLevel, lastLevel works,
+ * because we always setup the surface state as if firstLevel ==
+ * level zero. Probably have to subtract firstLevel from each of
+ * these:
+ */
+ sampler->ss0.base_level = U_FIXED(0, 1);
+
+ sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(pipe_sampler->max_lod, 0), 13), 6);
+ sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(pipe_sampler->min_lod, 0), 13), 6);
+
+ sampler->ss2.default_color_pointer = sdc_gs_offset >> 5;
+}
+
+
+
+/* All samplers must be uploaded in a single contiguous array, which
+ * complicates various things. However, this is still too confusing -
+ * FIXME: simplify all the different new texture state flags.
+ */
+static void upload_wm_samplers(struct brw_context *brw)
+{
+ unsigned unit;
+ unsigned sampler_count = 0;
+
+ /* BRW_NEW_SAMPLER */
+ for (unit = 0; unit < brw->num_textures && unit < brw->num_samplers;
+ unit++) {
+ /* determine unit enable/disable by looking for a bound texture */
+ if (brw->attribs.Texture[unit]) {
+ const struct pipe_sampler_state *sampler = brw->attribs.Samplers[unit];
+ unsigned sdc_gs_offset = upload_default_color(brw, sampler->border_color);
+
+ brw_update_sampler_state(sampler,
+ sdc_gs_offset,
+ &brw->wm.sampler[unit]);
+
+ sampler_count = unit + 1;
+ }
+ }
+
+ if (brw->wm.sampler_count != sampler_count) {
+ brw->wm.sampler_count = sampler_count;
+ brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
+ }
+
+ brw->wm.sampler_gs_offset = 0;
+
+ if (brw->wm.sampler_count)
+ brw->wm.sampler_gs_offset =
+ brw_cache_data_sz(&brw->cache[BRW_SAMPLER],
+ brw->wm.sampler,
+ sizeof(struct brw_sampler_state) * brw->wm.sampler_count);
+}
+
+const struct brw_tracked_state brw_wm_samplers = {
+ .dirty = {
+ .brw = BRW_NEW_SAMPLER,
+ .cache = 0
+ },
+ .update = upload_wm_samplers
+};
+
diff --git a/src/gallium/drivers/i965simple/brw_wm_state.c b/src/gallium/drivers/i965simple/brw_wm_state.c
new file mode 100644
index 0000000000..37a9bf919c
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_wm_state.c
@@ -0,0 +1,195 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_wm.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+/***********************************************************************
+ * WM unit - fragment programs and rasterization
+ */
+static void upload_wm_unit(struct brw_context *brw )
+{
+ struct brw_wm_unit_state wm;
+ unsigned max_threads;
+ unsigned per_thread;
+
+ if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
+ max_threads = 0;
+ else
+ max_threads = 31;
+
+
+ memset(&wm, 0, sizeof(wm));
+
+ /* CACHE_NEW_WM_PROG */
+ wm.thread0.grf_reg_count = align(brw->wm.prog_data->total_grf, 16) / 16 - 1;
+ wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6;
+ wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
+ wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length;
+ wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length;
+
+ wm.wm5.max_threads = max_threads;
+
+ per_thread = align(brw->wm.prog_data->total_scratch, 1024);
+ assert(per_thread <= 12 * 1024);
+
+#if 0
+ if (brw->wm.prog_data->total_scratch) {
+ unsigned total = per_thread * (max_threads + 1);
+
+ /* Scratch space -- just have to make sure there is sufficient
+ * allocated for the active program and current number of threads.
+ */
+ brw->wm.scratch_buffer_size = total;
+ if (brw->wm.scratch_buffer &&
+ brw->wm.scratch_buffer_size > brw->wm.scratch_buffer->size) {
+ dri_bo_unreference(brw->wm.scratch_buffer);
+ brw->wm.scratch_buffer = NULL;
+ }
+ if (!brw->wm.scratch_buffer) {
+ brw->wm.scratch_buffer = dri_bo_alloc(intel->intelScreen->bufmgr,
+ "wm scratch",
+ brw->wm.scratch_buffer_size,
+ 4096, DRM_BO_FLAG_MEM_TT);
+ }
+ }
+ /* XXX: Scratch buffers are not implemented correectly.
+ *
+ * The scratch offset to be programmed into wm is relative to the general
+ * state base address. However, using dri_bo_alloc/dri_bo_emit_reloc (or
+ * the previous bmGenBuffers scheme), we get an offset relative to the
+ * start of framebuffer. Even before then, it was broken in other ways,
+ * so just fail for now if we hit that path.
+ */
+ assert(brw->wm.prog_data->total_scratch == 0);
+#endif
+
+ /* CACHE_NEW_SURFACE */
+ wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces;
+
+ /* BRW_NEW_CURBE_OFFSETS */
+ wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2;
+
+ wm.thread3.urb_entry_read_offset = 0;
+ wm.thread1.depth_coef_urb_read_offset = 1;
+ wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+
+ /* CACHE_NEW_SAMPLER */
+ wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4;
+ wm.wm4.sampler_state_pointer = brw->wm.sampler_gs_offset >> 5;
+
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ {
+ const struct brw_fragment_program *fp = brw->attribs.FragmentProgram;
+
+ if (fp->UsesDepth)
+ wm.wm5.program_uses_depth = 1; /* as far as we can tell */
+
+ if (fp->info.writes_z)
+ wm.wm5.program_computes_depth = 1;
+
+ /* BRW_NEW_ALPHA_TEST */
+ if (fp->info.uses_kill ||
+ brw->attribs.DepthStencil->alpha.enabled)
+ wm.wm5.program_uses_killpixel = 1;
+
+ wm.wm5.enable_8_pix = 1;
+ }
+
+ wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */
+ wm.wm5.legacy_line_rast = 0;
+ wm.wm5.legacy_global_depth_bias = 0;
+ wm.wm5.early_depth_test = 1; /* never need to disable */
+ wm.wm5.line_aa_region_width = 0;
+ wm.wm5.line_endcap_aa_region_width = 1;
+
+ /* BRW_NEW_RASTERIZER */
+ if (brw->attribs.Raster->poly_stipple_enable)
+ wm.wm5.polygon_stipple = 1;
+
+#if 0
+ if (brw->attribs.Polygon->OffsetFill) {
+ wm.wm5.depth_offset = 1;
+ /* Something wierd going on with legacy_global_depth_bias,
+ * offset_constant, scaling and MRD. This value passes glean
+ * but gives some odd results elsewere (eg. the
+ * quad-offset-units test).
+ */
+ wm.global_depth_offset_constant = brw->attribs.Polygon->OffsetUnits * 2;
+
+ /* This is the only value that passes glean:
+ */
+ wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor;
+ }
+#endif
+
+ if (brw->attribs.Raster->line_stipple_enable) {
+ wm.wm5.line_stipple = 1;
+ }
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ wm.wm4.stats_enable = 1;
+
+ brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm );
+
+ if (brw->wm.prog_data->total_scratch) {
+ /*
+ dri_emit_reloc(brw->cache[BRW_WM_UNIT].pool->buffer,
+ DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE,
+ (per_thread / 1024) - 1,
+ brw->wm.state_gs_offset +
+ ((char *)&wm.thread2 - (char *)&wm),
+ brw->wm.scratch_buffer);
+ */
+ } else {
+ wm.thread2.scratch_space_base_pointer = 0;
+ }
+}
+
+const struct brw_tracked_state brw_wm_unit = {
+ .dirty = {
+ .brw = (BRW_NEW_RASTERIZER |
+ BRW_NEW_ALPHA_TEST |
+ BRW_NEW_FS |
+ BRW_NEW_CURBE_OFFSETS),
+
+ .cache = (CACHE_NEW_SURFACE |
+ CACHE_NEW_WM_PROG |
+ CACHE_NEW_SAMPLER)
+ },
+ .update = upload_wm_unit
+};
+
diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c
new file mode 100644
index 0000000000..1a326f9918
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_wm_surface_state.c
@@ -0,0 +1,304 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+static unsigned translate_tex_target( enum pipe_texture_target target )
+{
+ switch (target) {
+ case PIPE_TEXTURE_1D:
+ return BRW_SURFACE_1D;
+
+ case PIPE_TEXTURE_2D:
+ return BRW_SURFACE_2D;
+
+ case PIPE_TEXTURE_3D:
+ return BRW_SURFACE_3D;
+
+ case PIPE_TEXTURE_CUBE:
+ return BRW_SURFACE_CUBE;
+
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static unsigned translate_tex_format( enum pipe_format pipe_format )
+{
+ switch( pipe_format ) {
+ case PIPE_FORMAT_L8_UNORM:
+ return BRW_SURFACEFORMAT_L8_UNORM;
+
+ case PIPE_FORMAT_I8_UNORM:
+ return BRW_SURFACEFORMAT_I8_UNORM;
+
+ case PIPE_FORMAT_A8_UNORM:
+ return BRW_SURFACEFORMAT_A8_UNORM;
+
+ case PIPE_FORMAT_A8L8_UNORM:
+ return BRW_SURFACEFORMAT_L8A8_UNORM;
+
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ assert(0); /* not supported for sampling */
+ return BRW_SURFACEFORMAT_R8G8B8_UNORM;
+
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ return BRW_SURFACEFORMAT_B5G6R5_UNORM;
+
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
+
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
+
+ case PIPE_FORMAT_YCBCR_REV:
+ return BRW_SURFACEFORMAT_YCRCB_NORMAL;
+
+ case PIPE_FORMAT_YCBCR:
+ return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
+#if 0
+ case PIPE_FORMAT_RGB_FXT1:
+ case PIPE_FORMAT_RGBA_FXT1:
+ return BRW_SURFACEFORMAT_FXT1;
+#endif
+
+ case PIPE_FORMAT_Z16_UNORM:
+ return BRW_SURFACEFORMAT_I16_UNORM;
+#if 0
+ case PIPE_FORMAT_RGB_DXT1:
+ return BRW_SURFACEFORMAT_DXT1_RGB;
+
+ case PIPE_FORMAT_RGBA_DXT1:
+ return BRW_SURFACEFORMAT_BC1_UNORM;
+
+ case PIPE_FORMAT_RGBA_DXT3:
+ return BRW_SURFACEFORMAT_BC2_UNORM;
+
+ case PIPE_FORMAT_RGBA_DXT5:
+ return BRW_SURFACEFORMAT_BC3_UNORM;
+
+ case PIPE_FORMAT_SRGBA8:
+ return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB;
+ case PIPE_FORMAT_SRGB_DXT1:
+ return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
+#endif
+
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static unsigned brw_buffer_offset(struct brw_context *brw,
+ struct pipe_buffer *buffer)
+{
+ return brw->winsys->get_buffer_offset(brw->winsys,
+ buffer,
+ 0);
+}
+
+static
+void brw_update_texture_surface( struct brw_context *brw,
+ unsigned unit )
+{
+ const struct brw_texture *tObj = brw->attribs.Texture[unit];
+ struct brw_surface_state surf;
+
+ memset(&surf, 0, sizeof(surf));
+
+ surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ surf.ss0.surface_type = translate_tex_target(tObj->base.target);
+ surf.ss0.surface_format = translate_tex_format(tObj->base.format);
+
+ /* This is ok for all textures with channel width 8bit or less:
+ */
+/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+
+ /* Updated in emit_reloc */
+ surf.ss1.base_addr = brw_buffer_offset( brw, tObj->buffer );
+
+ surf.ss2.mip_count = tObj->base.last_level;
+ surf.ss2.width = tObj->base.width[0] - 1;
+ surf.ss2.height = tObj->base.height[0] - 1;
+
+ surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+ surf.ss3.tiled_surface = 0; /* always zero */
+ surf.ss3.pitch = tObj->stride - 1;
+ surf.ss3.depth = tObj->base.depth[0] - 1;
+
+ surf.ss4.min_lod = 0;
+
+ if (tObj->base.target == PIPE_TEXTURE_CUBE) {
+ surf.ss0.cube_pos_x = 1;
+ surf.ss0.cube_pos_y = 1;
+ surf.ss0.cube_pos_z = 1;
+ surf.ss0.cube_neg_x = 1;
+ surf.ss0.cube_neg_y = 1;
+ surf.ss0.cube_neg_z = 1;
+ }
+
+ brw->wm.bind.surf_ss_offset[unit + 1] =
+ brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf );
+}
+
+
+
+#define OFFSET(TYPE, FIELD) ( (unsigned)&(((TYPE *)0)->FIELD) )
+
+
+static void upload_wm_surfaces(struct brw_context *brw )
+{
+ unsigned i;
+
+ {
+ struct brw_surface_state surf;
+
+ /* BRW_NEW_FRAMEBUFFER
+ */
+ struct pipe_surface *pipe_surface = brw->attribs.FrameBuffer.cbufs[0];/*fixme*/
+
+ memset(&surf, 0, sizeof(surf));
+
+ if (pipe_surface != NULL) {
+ if (pipe_surface->block.size == 4)
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ else
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+
+ surf.ss0.surface_type = BRW_SURFACE_2D;
+
+ surf.ss1.base_addr = brw_buffer_offset( brw, pipe_surface->buffer );
+
+ surf.ss2.width = pipe_surface->width - 1;
+ surf.ss2.height = pipe_surface->height - 1;
+ surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+ surf.ss3.tiled_surface = 0;
+ surf.ss3.pitch = pipe_surface->stride - 1;
+ } else {
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ surf.ss0.surface_type = BRW_SURFACE_NULL;
+ }
+
+ /* BRW_NEW_BLEND */
+ surf.ss0.color_blend = (!brw->attribs.Blend->logicop_enable &&
+ brw->attribs.Blend->blend_enable);
+
+
+ surf.ss0.writedisable_red = !(brw->attribs.Blend->colormask & PIPE_MASK_R);
+ surf.ss0.writedisable_green = !(brw->attribs.Blend->colormask & PIPE_MASK_G);
+ surf.ss0.writedisable_blue = !(brw->attribs.Blend->colormask & PIPE_MASK_B);
+ surf.ss0.writedisable_alpha = !(brw->attribs.Blend->colormask & PIPE_MASK_A);
+
+
+
+
+ brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf );
+
+ brw->wm.nr_surfaces = 1;
+ }
+
+
+ /* BRW_NEW_TEXTURE
+ */
+ for (i = 0; i < brw->num_textures && i < brw->num_samplers; i++) {
+ const struct brw_texture *texUnit = brw->attribs.Texture[i];
+
+ if (texUnit &&
+ texUnit->base.refcount/*(texUnit->refcount > 0) == really used */) {
+
+ brw_update_texture_surface(brw, i);
+
+ brw->wm.nr_surfaces = i+2;
+ }
+ else {
+ brw->wm.bind.surf_ss_offset[i+1] = 0;
+ }
+ }
+
+ brw->wm.bind_ss_offset = brw_cache_data( &brw->cache[BRW_SS_SURF_BIND],
+ &brw->wm.bind );
+}
+
+
+/* KW: Will find a different way to acheive this, see for example the
+ * state caches with relocs in the i915 swz driver.
+ */
+#if 0
+static void emit_reloc_wm_surfaces(struct brw_context *brw)
+{
+ int unit;
+
+ if (brw->state.draw_region != NULL) {
+ /* Emit framebuffer relocation */
+ dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE),
+ DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE,
+ 0,
+ brw->wm.bind.surf_ss_offset[0] +
+ offsetof(struct brw_surface_state, ss1),
+ brw->state.draw_region->buffer);
+ }
+
+ /* Emit relocations for texture buffers */
+ for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
+ struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit];
+ struct gl_texture_object *tObj = texUnit->_Current;
+ struct intel_texture_object *intelObj = intel_texture_object(tObj);
+
+ if (texUnit->_ReallyEnabled && intelObj->mt != NULL) {
+ dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE),
+ DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+ 0,
+ brw->wm.bind.surf_ss_offset[unit + 1] +
+ offsetof(struct brw_surface_state, ss1),
+ intelObj->mt->region->buffer);
+ }
+ }
+}
+#endif
+
+const struct brw_tracked_state brw_wm_surfaces = {
+ .dirty = {
+ .brw = (BRW_NEW_FRAMEBUFFER |
+ BRW_NEW_BLEND |
+ BRW_NEW_TEXTURE),
+ .cache = 0
+ },
+ .update = upload_wm_surfaces,
+};
diff --git a/src/gallium/drivers/nouveau/nouveau_bo.h b/src/gallium/drivers/nouveau/nouveau_bo.h
new file mode 100644
index 0000000000..65b138283c
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_bo.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2007 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NOUVEAU_BO_H__
+#define __NOUVEAU_BO_H__
+
+/* Relocation/Buffer type flags */
+#define NOUVEAU_BO_VRAM (1 << 0)
+#define NOUVEAU_BO_GART (1 << 1)
+#define NOUVEAU_BO_RD (1 << 2)
+#define NOUVEAU_BO_WR (1 << 3)
+#define NOUVEAU_BO_RDWR (NOUVEAU_BO_RD | NOUVEAU_BO_WR)
+#define NOUVEAU_BO_MAP (1 << 4)
+#define NOUVEAU_BO_PIN (1 << 5)
+#define NOUVEAU_BO_LOW (1 << 6)
+#define NOUVEAU_BO_HIGH (1 << 7)
+#define NOUVEAU_BO_OR (1 << 8)
+#define NOUVEAU_BO_LOCAL (1 << 9)
+#define NOUVEAU_BO_TILED (1 << 10)
+#define NOUVEAU_BO_ZTILE (1 << 11)
+#define NOUVEAU_BO_DUMMY (1 << 31)
+
+struct nouveau_bo {
+ struct nouveau_device *device;
+ uint64_t handle;
+
+ uint64_t size;
+ void *map;
+
+ uint32_t flags;
+ uint64_t offset;
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_channel.h b/src/gallium/drivers/nouveau/nouveau_channel.h
new file mode 100644
index 0000000000..cd99a676bd
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_channel.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2007 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NOUVEAU_CHANNEL_H__
+#define __NOUVEAU_CHANNEL_H__
+
+struct nouveau_channel {
+ struct nouveau_device *device;
+ int id;
+
+ struct nouveau_pushbuf *pushbuf;
+
+ struct nouveau_grobj *nullobj;
+ struct nouveau_grobj *vram;
+ struct nouveau_grobj *gart;
+
+ void *user_private;
+ void (*hang_notify)(struct nouveau_channel *);
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h
new file mode 100644
index 0000000000..3df3d7b2b8
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_class.h
@@ -0,0 +1,8006 @@
+/*************************************************************************
+
+ Autogenerated file, do not edit !
+
+**************************************************************************
+
+ Copyright (C) 2006-2008 :
+ Dmitry Baryshkov,
+ Laurent Carlier,
+ Matthieu Castet,
+ Dawid Gajownik,
+ Jeremy Kolb,
+ Stephane Loeuillet,
+ Patrice Mandin,
+ Stephane Marchesin,
+ Serge Martin,
+ Sylvain Munaut,
+ Simon Raffeiner,
+ Ben Skeggs,
+ Erik Waling,
+ koala_br,
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+*************************************************************************/
+
+
+#ifndef NOUVEAU_REG_H
+#define NOUVEAU_REG_H 1
+
+
+#define NV01_ROOT 0x00000001
+
+
+
+#define NV01_CONTEXT_DMA 0x00000002
+
+
+
+#define NV01_DEVICE 0x00000003
+
+
+
+#define NV01_TIMER 0x00000004
+
+#define NV01_TIMER_SYNCHRONIZE 0x00000100
+#define NV01_TIMER_STOP_ALARM 0x00000104
+#define NV01_TIMER_DMA_NOTIFY 0x00000180
+#define NV01_TIMER_TIME(x) (0x00000300+((x)*4))
+#define NV01_TIMER_TIME__SIZE 0x00000002
+#define NV01_TIMER_ALARM_NOTIFY 0x00000308
+
+
+#define NV_IMAGE_STENCIL 0x00000010
+
+#define NV_IMAGE_STENCIL_NOTIFY 0x00000104
+#define NV_IMAGE_STENCIL_DMA_NOTIFY 0x00000180
+#define NV_IMAGE_STENCIL_IMAGE_OUTPUT 0x00000200
+#define NV_IMAGE_STENCIL_IMAGE_INPUT(x) (0x00000204+((x)*4))
+#define NV_IMAGE_STENCIL_IMAGE_INPUT__SIZE 0x00000002
+
+
+#define NV_IMAGE_BLEND_AND 0x00000011
+
+#define NV_IMAGE_BLEND_AND_NOP 0x00000100
+#define NV_IMAGE_BLEND_AND_NOTIFY 0x00000104
+#define NV_IMAGE_BLEND_AND_DMA_NOTIFY 0x00000180
+#define NV_IMAGE_BLEND_AND_IMAGE_OUTPUT 0x00000200
+#define NV_IMAGE_BLEND_AND_BETA_INPUT 0x00000204
+#define NV_IMAGE_BLEND_AND_IMAGE_INPUT 0x00000208
+
+
+#define NV01_CONTEXT_BETA1 0x00000012
+
+#define NV01_CONTEXT_BETA1_NOP 0x00000100
+#define NV01_CONTEXT_BETA1_NOTIFY 0x00000104
+#define NV01_CONTEXT_BETA1_DMA_NOTIFY 0x00000180
+#define NV01_CONTEXT_BETA1_BETA_1D31 0x00000300
+
+
+#define NV_IMAGE_ROP_AND 0x00000013
+
+#define NV_IMAGE_ROP_AND_NOTIFY 0x00000104
+#define NV_IMAGE_ROP_AND_DMA_NOTIFY 0x00000180
+#define NV_IMAGE_ROP_AND_IMAGE_OUTPUT 0x00000200
+#define NV_IMAGE_ROP_AND_ROP_INPUT 0x00000204
+#define NV_IMAGE_ROP_AND_IMAGE_INPUT(x) (0x00000208+((x)*4))
+#define NV_IMAGE_ROP_AND_IMAGE_INPUT__SIZE 0x00000002
+
+
+#define NV_IMAGE_COLOR_KEY 0x00000015
+
+
+
+#define NV01_CONTEXT_COLOR_KEY 0x00000017
+
+#define NV01_CONTEXT_COLOR_KEY_NOP 0x00000100
+#define NV01_CONTEXT_COLOR_KEY_NOTIFY 0x00000104
+#define NV01_CONTEXT_COLOR_KEY_DMA_NOTIFY 0x00000180
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT 0x00000300
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X16A8Y8 0x00000001
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X24Y8 0x00000002
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X16A1R5G5B5 0x00000003
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X17R5G5B5 0x00000004
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_A8R8G8B8 0x00000005
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X8R8G8B8 0x00000006
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_A16Y16 0x00000007
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X16Y16 0x00000008
+#define NV01_CONTEXT_COLOR_KEY_COLOR 0x00000304
+
+
+#define NV01_CONTEXT_PATTERN 0x00000018
+
+#define NV01_CONTEXT_PATTERN_NOP 0x00000100
+#define NV01_CONTEXT_PATTERN_NOTIFY 0x00000104
+#define NV01_CONTEXT_PATTERN_DMA_NOTIFY 0x00000180
+#define NV01_CONTEXT_PATTERN_COLOR_FORMAT 0x00000300
+#define NV01_CONTEXT_PATTERN_MONOCHROME_FORMAT 0x00000304
+#define NV01_CONTEXT_PATTERN_SHAPE 0x00000308
+#define NV01_CONTEXT_PATTERN_COLOR(x) (0x00000310+((x)*4))
+#define NV01_CONTEXT_PATTERN_COLOR__SIZE 0x00000002
+#define NV01_CONTEXT_PATTERN_PATTERN(x) (0x00000318+((x)*4))
+#define NV01_CONTEXT_PATTERN_PATTERN__SIZE 0x00000002
+
+
+#define NV01_CONTEXT_CLIP_RECTANGLE 0x00000019
+
+#define NV01_CONTEXT_CLIP_RECTANGLE_NOP 0x00000100
+#define NV01_CONTEXT_CLIP_RECTANGLE_NOTIFY 0x00000104
+#define NV01_CONTEXT_CLIP_RECTANGLE_DMA_NOTIFY 0x00000180
+#define NV01_CONTEXT_CLIP_RECTANGLE_POINT 0x00000300
+#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_X_SHIFT 0
+#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_X_MASK 0x0000ffff
+#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_Y_SHIFT 16
+#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_Y_MASK 0xffff0000
+#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE 0x00000304
+#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_W_SHIFT 0
+#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_W_MASK 0x0000ffff
+#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_H_SHIFT 16
+#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_H_MASK 0xffff0000
+
+
+#define NV01_RENDER_SOLID_LINE 0x0000001c
+
+#define NV01_RENDER_SOLID_LINE_NOP 0x00000100
+#define NV01_RENDER_SOLID_LINE_NOTIFY 0x00000104
+#define NV01_RENDER_SOLID_LINE_PATCH 0x0000010c
+#define NV01_RENDER_SOLID_LINE_DMA_NOTIFY 0x00000180
+#define NV01_RENDER_SOLID_LINE_CLIP_RECTANGLE 0x00000184
+#define NV01_RENDER_SOLID_LINE_PATTERN 0x00000188
+#define NV01_RENDER_SOLID_LINE_ROP 0x0000018c
+#define NV01_RENDER_SOLID_LINE_BETA1 0x00000190
+#define NV01_RENDER_SOLID_LINE_SURFACE 0x00000194
+#define NV01_RENDER_SOLID_LINE_OPERATION 0x000002fc
+#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY_AND 0x00000000
+#define NV01_RENDER_SOLID_LINE_OPERATION_ROP_AND 0x00000001
+#define NV01_RENDER_SOLID_LINE_OPERATION_BLEND_AND 0x00000002
+#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY 0x00000003
+#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV01_RENDER_SOLID_LINE_OPERATION_BLEND_PREMULT 0x00000005
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT 0x00000300
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X16A8Y8 0x00000001
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X24Y8 0x00000002
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X16A1R5G5B5 0x00000003
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X17R5G5B5 0x00000004
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_A8R8G8B8 0x00000005
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X8R8G8B8 0x00000006
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_A16Y16 0x00000007
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X16Y16 0x00000008
+#define NV01_RENDER_SOLID_LINE_COLOR 0x00000304
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0(x) (0x00000400+((x)*8))
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0_X_SHIFT 0
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0_Y_SHIFT 16
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1(x) (0x00000404+((x)*8))
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1_X_SHIFT 0
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1_Y_SHIFT 16
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_X(x) (0x00000480+((x)*16))
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_X__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_Y(x) (0x00000484+((x)*16))
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_Y__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_X(x) (0x00000488+((x)*16))
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_X__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_Y(x) (0x0000048c+((x)*16))
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_Y__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_POLYLINE(x) (0x00000500+((x)*4))
+#define NV01_RENDER_SOLID_LINE_POLYLINE__SIZE 0x00000020
+#define NV01_RENDER_SOLID_LINE_POLYLINE_X_SHIFT 0
+#define NV01_RENDER_SOLID_LINE_POLYLINE_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_LINE_POLYLINE_Y_SHIFT 16
+#define NV01_RENDER_SOLID_LINE_POLYLINE_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_X(x) (0x00000580+((x)*8))
+#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_X__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_Y(x) (0x00000584+((x)*8))
+#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_Y__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_COLOR(x) (0x00000600+((x)*8))
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_COLOR__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT(x) (0x00000604+((x)*8))
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_X_SHIFT 0
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_Y_SHIFT 16
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_Y_MASK 0xffff0000
+
+
+#define NV01_RENDER_SOLID_TRIANGLE 0x0000001d
+
+#define NV01_RENDER_SOLID_TRIANGLE_NOP 0x00000100
+#define NV01_RENDER_SOLID_TRIANGLE_NOTIFY 0x00000104
+#define NV01_RENDER_SOLID_TRIANGLE_PATCH 0x0000010c
+#define NV01_RENDER_SOLID_TRIANGLE_DMA_NOTIFY 0x00000180
+#define NV01_RENDER_SOLID_TRIANGLE_CLIP_RECTANGLE 0x00000184
+#define NV01_RENDER_SOLID_TRIANGLE_PATTERN 0x00000188
+#define NV01_RENDER_SOLID_TRIANGLE_ROP 0x0000018c
+#define NV01_RENDER_SOLID_TRIANGLE_BETA1 0x00000190
+#define NV01_RENDER_SOLID_TRIANGLE_SURFACE 0x00000194
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION 0x000002fc
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY_AND 0x00000000
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_ROP_AND 0x00000001
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_BLEND_AND 0x00000002
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY 0x00000003
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_BLEND_PREMULT 0x00000005
+#define NV01_RENDER_SOLID_TRIANGLE_COLOR_FORMAT 0x00000300
+#define NV01_RENDER_SOLID_TRIANGLE_COLOR 0x00000304
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0 0x00000310
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_X_SHIFT 0
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_Y_SHIFT 16
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1 0x00000314
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_X_SHIFT 0
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_Y_SHIFT 16
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2 0x00000318
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_X_SHIFT 0
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_Y_SHIFT 16
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT0_X 0x00000320
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT0_Y 0x00000324
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT1_X 0x00000328
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT1_Y 0x0000032c
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT2_X 0x00000330
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT2_Y 0x00000334
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH(x) (0x00000400+((x)*4))
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH__SIZE 0x00000020
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_X_SHIFT 0
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_Y_SHIFT 16
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_X(x) (0x00000480+((x)*8))
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_X__SIZE 0x00000010
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_Y(x) (0x00000484+((x)*8))
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_Y__SIZE 0x00000010
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_COLOR(x) (0x00000500+((x)*16))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_COLOR__SIZE 0x00000008
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0(x) (0x00000504+((x)*16))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0__SIZE 0x00000008
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_X_SHIFT 0
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_Y_SHIFT 16
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1(x) (0x00000508+((x)*16))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1__SIZE 0x00000008
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_X_SHIFT 0
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_Y_SHIFT 16
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2(x) (0x0000050c+((x)*16))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2__SIZE 0x00000008
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_X_SHIFT 0
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_Y_SHIFT 16
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_COLOR(x) (0x00000580+((x)*8))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_COLOR__SIZE 0x00000010
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT(x) (0x00000584+((x)*8))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT__SIZE 0x00000010
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_X_SHIFT 0
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_Y_SHIFT 16
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_Y_MASK 0xffff0000
+
+
+#define NV01_RENDER_SOLID_RECTANGLE 0x0000001e
+
+#define NV01_RENDER_SOLID_RECTANGLE_NOP 0x00000100
+#define NV01_RENDER_SOLID_RECTANGLE_NOTIFY 0x00000104
+#define NV01_RENDER_SOLID_RECTANGLE_PATCH 0x0000010c
+#define NV01_RENDER_SOLID_RECTANGLE_DMA_NOTIFY 0x00000180
+#define NV01_RENDER_SOLID_RECTANGLE_CLIP_RECTANGLE 0x00000184
+#define NV01_RENDER_SOLID_RECTANGLE_PATTERN 0x00000188
+#define NV01_RENDER_SOLID_RECTANGLE_ROP 0x0000018c
+#define NV01_RENDER_SOLID_RECTANGLE_BETA1 0x00000190
+#define NV01_RENDER_SOLID_RECTANGLE_SURFACE 0x00000194
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION 0x000002fc
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY_AND 0x00000000
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_ROP_AND 0x00000001
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_BLEND_AND 0x00000002
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY 0x00000003
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_BLEND_PREMULT 0x00000005
+#define NV01_RENDER_SOLID_RECTANGLE_COLOR_FORMAT 0x00000300
+#define NV01_RENDER_SOLID_RECTANGLE_COLOR 0x00000304
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT(x) (0x00000400+((x)*8))
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT__SIZE 0x00000010
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_X_SHIFT 0
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_Y_SHIFT 16
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE(x) (0x00000404+((x)*8))
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE__SIZE 0x00000010
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_W_SHIFT 0
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_W_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_H_SHIFT 16
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_H_MASK 0xffff0000
+
+
+#define NV01_IMAGE_BLIT 0x0000001f
+
+#define NV01_IMAGE_BLIT_NOP 0x00000100
+#define NV01_IMAGE_BLIT_NOTIFY 0x00000104
+#define NV01_IMAGE_BLIT_PATCH 0x0000010c
+#define NV01_IMAGE_BLIT_DMA_NOTIFY 0x00000180
+#define NV01_IMAGE_BLIT_COLOR_KEY 0x00000184
+#define NV01_IMAGE_BLIT_CLIP_RECTANGLE 0x00000188
+#define NV01_IMAGE_BLIT_PATTERN 0x0000018c
+#define NV01_IMAGE_BLIT_ROP 0x00000190
+#define NV01_IMAGE_BLIT_BETA1 0x00000194
+#define NV01_IMAGE_BLIT_SURFACE 0x0000019c
+#define NV01_IMAGE_BLIT_OPERATION 0x000002fc
+#define NV01_IMAGE_BLIT_IMAGE_INPUT 0x00000204
+#define NV01_IMAGE_BLIT_POINT_IN 0x00000300
+#define NV01_IMAGE_BLIT_POINT_IN_X_SHIFT 0
+#define NV01_IMAGE_BLIT_POINT_IN_X_MASK 0x0000ffff
+#define NV01_IMAGE_BLIT_POINT_IN_Y_SHIFT 16
+#define NV01_IMAGE_BLIT_POINT_IN_Y_MASK 0xffff0000
+#define NV01_IMAGE_BLIT_POINT_OUT 0x00000304
+#define NV01_IMAGE_BLIT_POINT_OUT_X_SHIFT 0
+#define NV01_IMAGE_BLIT_POINT_OUT_X_MASK 0x0000ffff
+#define NV01_IMAGE_BLIT_POINT_OUT_Y_SHIFT 16
+#define NV01_IMAGE_BLIT_POINT_OUT_Y_MASK 0xffff0000
+#define NV01_IMAGE_BLIT_SIZE 0x00000308
+#define NV01_IMAGE_BLIT_SIZE_W_SHIFT 0
+#define NV01_IMAGE_BLIT_SIZE_W_MASK 0x0000ffff
+#define NV01_IMAGE_BLIT_SIZE_H_SHIFT 16
+#define NV01_IMAGE_BLIT_SIZE_H_MASK 0xffff0000
+
+
+#define NV01_IMAGE_FROM_CPU 0x00000021
+
+#define NV01_IMAGE_FROM_CPU_NOP 0x00000100
+#define NV01_IMAGE_FROM_CPU_NOTIFY 0x00000104
+#define NV01_IMAGE_FROM_CPU_PATCH 0x0000010c
+#define NV01_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180
+#define NV01_IMAGE_FROM_CPU_COLOR_KEY 0x00000184
+#define NV01_IMAGE_FROM_CPU_CLIP_RECTANGLE 0x00000188
+#define NV01_IMAGE_FROM_CPU_PATTERN 0x0000018c
+#define NV01_IMAGE_FROM_CPU_ROP 0x00000190
+#define NV01_IMAGE_FROM_CPU_BETA1 0x00000194
+#define NV01_IMAGE_FROM_CPU_SURFACE 0x00000198
+#define NV01_IMAGE_FROM_CPU_OPERATION 0x000002fc
+#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY_AND 0x00000000
+#define NV01_IMAGE_FROM_CPU_OPERATION_ROP_AND 0x00000001
+#define NV01_IMAGE_FROM_CPU_OPERATION_BLEND_AND 0x00000002
+#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY 0x00000003
+#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV01_IMAGE_FROM_CPU_OPERATION_BLEND_PREMULT 0x00000005
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT 0x00000300
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_Y8 0x00000001
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_A1R5G5B5 0x00000002
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_X1R5G5B5 0x00000003
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_A8R8G8B8 0x00000004
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_X8R8G8B8 0x00000005
+#define NV01_IMAGE_FROM_CPU_POINT 0x00000304
+#define NV01_IMAGE_FROM_CPU_POINT_X_SHIFT 0
+#define NV01_IMAGE_FROM_CPU_POINT_X_MASK 0x0000ffff
+#define NV01_IMAGE_FROM_CPU_POINT_Y_SHIFT 16
+#define NV01_IMAGE_FROM_CPU_POINT_Y_MASK 0xffff0000
+#define NV01_IMAGE_FROM_CPU_SIZE_OUT 0x00000308
+#define NV01_IMAGE_FROM_CPU_SIZE_OUT_W_SHIFT 0
+#define NV01_IMAGE_FROM_CPU_SIZE_OUT_W_MASK 0x0000ffff
+#define NV01_IMAGE_FROM_CPU_SIZE_OUT_H_SHIFT 16
+#define NV01_IMAGE_FROM_CPU_SIZE_OUT_H_MASK 0xffff0000
+#define NV01_IMAGE_FROM_CPU_SIZE_IN 0x0000030c
+#define NV01_IMAGE_FROM_CPU_SIZE_IN_W_SHIFT 0
+#define NV01_IMAGE_FROM_CPU_SIZE_IN_W_MASK 0x0000ffff
+#define NV01_IMAGE_FROM_CPU_SIZE_IN_H_SHIFT 16
+#define NV01_IMAGE_FROM_CPU_SIZE_IN_H_MASK 0xffff0000
+#define NV01_IMAGE_FROM_CPU_COLOR(x) (0x00000400+((x)*4))
+#define NV01_IMAGE_FROM_CPU_COLOR__SIZE 0x00000020
+
+
+#define NV01_NULL 0x00000030
+
+
+
+#define NV03_STRETCHED_IMAGE_FROM_CPU 0x00000036
+
+#define NV03_STRETCHED_IMAGE_FROM_CPU_NOP 0x00000100
+#define NV03_STRETCHED_IMAGE_FROM_CPU_NOTIFY 0x00000104
+#define NV03_STRETCHED_IMAGE_FROM_CPU_PATCH 0x0000010c
+#define NV03_STRETCHED_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180
+#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR_KEY 0x00000184
+#define NV03_STRETCHED_IMAGE_FROM_CPU_PATTERN 0x00000188
+#define NV03_STRETCHED_IMAGE_FROM_CPU_ROP 0x0000018c
+#define NV03_STRETCHED_IMAGE_FROM_CPU_BETA1 0x00000190
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SURFACE 0x00000194
+#define NV03_STRETCHED_IMAGE_FROM_CPU_OPERATION 0x000002fc
+#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR_FORMAT 0x00000300
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN 0x00000304
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_W_SHIFT 0
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_W_MASK 0x0000ffff
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_H_SHIFT 16
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_H_MASK 0xffff0000
+#define NV03_STRETCHED_IMAGE_FROM_CPU_DX_DU 0x00000308
+#define NV03_STRETCHED_IMAGE_FROM_CPU_DY_DV 0x0000030c
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT 0x00000310
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_X_SHIFT 0
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_X_MASK 0x0000ffff
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_Y_SHIFT 16
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_Y_MASK 0xffff0000
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE 0x00000314
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_W_SHIFT 0
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_W_MASK 0x0000ffff
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_H_SHIFT 16
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_H_MASK 0xffff0000
+#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4 0x00000318
+#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_X_SHIFT 0
+#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_X_MASK 0x0000ffff
+#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_Y_SHIFT 16
+#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_Y_MASK 0xffff0000
+#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR(x) (0x00000400+((x)*4))
+#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR__SIZE 0x00000020
+
+
+#define NV03_SCALED_IMAGE_FROM_MEMORY 0x00000037
+
+#define NV03_SCALED_IMAGE_FROM_MEMORY_NOP 0x00000100
+#define NV03_SCALED_IMAGE_FROM_MEMORY_NOTIFY 0x00000104
+#define NV03_SCALED_IMAGE_FROM_MEMORY_DMA_NOTIFY 0x00000180
+#define NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE 0x00000184
+#define NV03_SCALED_IMAGE_FROM_MEMORY_PATTERN 0x00000188
+#define NV03_SCALED_IMAGE_FROM_MEMORY_ROP 0x0000018c
+#define NV03_SCALED_IMAGE_FROM_MEMORY_BETA1 0x00000190
+#define NV03_SCALED_IMAGE_FROM_MEMORY_SURFACE 0x00000194
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT 0x00000300
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5 0x00000001
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X1R5G5B5 0x00000002
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8 0x00000003
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8 0x00000004
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_V8YB8U8YA8 0x00000005
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_YB8V8YA8U8 0x00000006
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5 0x00000007
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8 0x00000008
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_AY8 0x00000009
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION 0x00000304
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_AND 0x00000000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_ROP_AND 0x00000001
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_AND 0x00000002
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY 0x00000003
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_PREMULT 0x00000005
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT 0x00000308
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_SHIFT 0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_MASK 0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT 16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_MASK 0xffff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE 0x0000030c
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_SHIFT 0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_MASK 0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT 16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_MASK 0xffff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT 0x00000310
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_X_SHIFT 0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_X_MASK 0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_Y_SHIFT 16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_Y_MASK 0xffff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE 0x00000314
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_W_SHIFT 0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_W_MASK 0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_H_SHIFT 16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_H_MASK 0xffff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_DELTA_DU_DX 0x00000318
+#define NV03_SCALED_IMAGE_FROM_MEMORY_DELTA_DV_DY 0x0000031c
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE 0x00000400
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_W_SHIFT 0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_W_MASK 0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_H_SHIFT 16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_H_MASK 0xffff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT 0x00000404
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_PITCH_SHIFT 0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_PITCH_MASK 0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_SHIFT 16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_MASK 0x00ff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_CENTER 0x00010000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_CORNER 0x00020000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_INTERPOLATOR_SHIFT 24
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_INTERPOLATOR_MASK 0xff000000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_OFFSET 0x00000408
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT 0x0000040c
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_U_SHIFT 0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_U_MASK 0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_V_SHIFT 16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_V_MASK 0xffff0000
+
+
+#define NV04_DVD_SUBPICTURE 0x00000038
+
+#define NV04_DVD_SUBPICTURE_NOP 0x00000100
+#define NV04_DVD_SUBPICTURE_NOTIFY 0x00000104
+#define NV04_DVD_SUBPICTURE_WAIT_FOR_IDLE 0x00000108
+#define NV04_DVD_SUBPICTURE_DMA_NOTIFY 0x00000180
+#define NV04_DVD_SUBPICTURE_DMA_OVERLAY 0x00000184
+#define NV04_DVD_SUBPICTURE_DMA_IMAGEIN 0x00000188
+#define NV04_DVD_SUBPICTURE_DMA_IMAGEOUT 0x0000018c
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT 0x00000300
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_X_SHIFT 0
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_X_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_Y_SHIFT 16
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_Y_MASK 0xffff0000
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE 0x00000304
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_W_SHIFT 0
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_W_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_H_SHIFT 16
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_H_MASK 0xffff0000
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT 0x00000308
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_PITCH_SHIFT 0
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_PITCH_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_COLOR_SHIFT 16
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_COLOR_MASK 0xffff0000
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_OFFSET 0x0000030c
+#define NV04_DVD_SUBPICTURE_IMAGEIN_DELTA_DU_DX 0x00000310
+#define NV04_DVD_SUBPICTURE_IMAGEIN_DELTA_DV_DY 0x00000314
+#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE 0x00000318
+#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_W_SHIFT 0
+#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_W_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_H_SHIFT 16
+#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_H_MASK 0xffff0000
+#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT 0x0000031c
+#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_PITCH_SHIFT 0
+#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_PITCH_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_COLOR_SHIFT 16
+#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_COLOR_MASK 0xffff0000
+#define NV04_DVD_SUBPICTURE_IMAGEIN_OFFSET 0x00000320
+#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT 0x00000324
+#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_U_SHIFT 0
+#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_U_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_V_SHIFT 16
+#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_V_MASK 0xffff0000
+#define NV04_DVD_SUBPICTURE_OVERLAY_DELTA_DU_DX 0x00000328
+#define NV04_DVD_SUBPICTURE_OVERLAY_DELTA_DV_DY 0x0000032c
+#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE 0x00000330
+#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_W_SHIFT 0
+#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_W_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_H_SHIFT 16
+#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_H_MASK 0xffff0000
+#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT 0x00000334
+#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_PITCH_SHIFT 0
+#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_PITCH_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_COLOR_SHIFT 16
+#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_COLOR_MASK 0xffff0000
+#define NV04_DVD_SUBPICTURE_OVERLAY_OFFSET 0x00000338
+#define NV04_DVD_SUBPICTURE_OVERLAY_POINT 0x0000033c
+#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_U_SHIFT 0
+#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_U_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_V_SHIFT 16
+#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_V_MASK 0xffff0000
+
+
+#define NV04_MEMORY_TO_MEMORY_FORMAT 0x00000039
+
+#define NV04_MEMORY_TO_MEMORY_FORMAT_NOP 0x00000100
+#define NV04_MEMORY_TO_MEMORY_FORMAT_NOTIFY 0x00000104
+#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY 0x00000180
+#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN 0x00000184
+#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_OUT 0x00000188
+#define NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN 0x0000030c
+#define NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT 0x00000310
+#define NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN 0x00000314
+#define NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT 0x00000318
+#define NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN 0x0000031c
+#define NV04_MEMORY_TO_MEMORY_FORMAT_LINE_COUNT 0x00000320
+#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT 0x00000324
+#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_INPUT_INC_SHIFT 0
+#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_INPUT_INC_MASK 0x0000000f
+#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_OUTPUT_INC_SHIFT 8
+#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_OUTPUT_INC_MASK 0x00000f00
+#define NV04_MEMORY_TO_MEMORY_FORMAT_BUF_NOTIFY 0x00000328
+
+
+#define NV01_MEMORY_LOCAL_BANKED 0x0000003d
+
+
+
+#define NV01_MAPPING_SYSTEM 0x0000003e
+
+
+
+#define NV03_MEMORY_LOCAL_CURSOR 0x0000003f
+
+
+
+#define NV01_MEMORY_LOCAL_LINEAR 0x00000040
+
+
+
+#define NV01_MAPPING_LOCAL 0x00000041
+
+
+
+#define NV04_CONTEXT_SURFACES_2D 0x00000042
+
+#define NV04_CONTEXT_SURFACES_2D_NOP 0x00000100
+#define NV04_CONTEXT_SURFACES_2D_NOTIFY 0x00000104
+#define NV04_CONTEXT_SURFACES_2D_PM_TRIGGER 0x00000140
+#define NV04_CONTEXT_SURFACES_2D_DMA_NOTIFY 0x00000180
+#define NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE 0x00000184
+#define NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_DESTIN 0x00000188
+#define NV04_CONTEXT_SURFACES_2D_FORMAT 0x00000300
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y8 0x00000001
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1R5G5B5_Z1R5G5B5 0x00000002
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1R5G5B5_X1R5G5B5 0x00000003
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5 0x00000004
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y16 0x00000005
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X8R8G8B8_Z8R8G8B8 0x00000006
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X8R8G8B8_X8R8G8B8 0x00000007
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1A7R8G8B8_Z1A7R8G8B8 0x00000008
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1A7R8G8B8_X1A7R8G8B8 0x00000009
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8 0x0000000a
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y32 0x0000000b
+#define NV04_CONTEXT_SURFACES_2D_PITCH 0x00000304
+#define NV04_CONTEXT_SURFACES_2D_PITCH_SOURCE_SHIFT 0
+#define NV04_CONTEXT_SURFACES_2D_PITCH_SOURCE_MASK 0x0000ffff
+#define NV04_CONTEXT_SURFACES_2D_PITCH_DESTIN_SHIFT 16
+#define NV04_CONTEXT_SURFACES_2D_PITCH_DESTIN_MASK 0xffff0000
+#define NV04_CONTEXT_SURFACES_2D_OFFSET_SOURCE 0x00000308
+#define NV04_CONTEXT_SURFACES_2D_OFFSET_DESTIN 0x0000030c
+
+
+#define NV03_CONTEXT_ROP 0x00000043
+
+#define NV03_CONTEXT_ROP_NOP 0x00000100
+#define NV03_CONTEXT_ROP_NOTIFY 0x00000104
+#define NV03_CONTEXT_ROP_DMA_NOTIFY 0x00000180
+#define NV03_CONTEXT_ROP_ROP 0x00000300
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_SHIFT 0
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_MASK 0x0000000f
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_CLEAR 0x00000000
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_NOR 0x00000001
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_AND_INVERTED 0x00000002
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_COPY_INVERTED 0x00000003
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_AND_REVERSE 0x00000004
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_INVERT 0x00000005
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_XOR 0x00000006
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_NAND 0x00000007
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_AND 0x00000008
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_EQUI 0x00000009
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_NOOP 0x0000000a
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_OR_INVERTED 0x0000000b
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_COPY 0x0000000c
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_OR_REVERSE 0x0000000d
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_OR 0x0000000e
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_SET 0x0000000f
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_SHIFT 4
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_MASK 0x000000f0
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_CLEAR 0x00000000
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_NOR 0x00000010
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_AND_INVERTED 0x00000020
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_COPY_INVERTED 0x00000030
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_AND_REVERSE 0x00000040
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_INVERT 0x00000050
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_XOR 0x00000060
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_NAND 0x00000070
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_AND 0x00000080
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_EQUI 0x00000090
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_NOOP 0x000000a0
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_OR_INVERTED 0x000000b0
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_COPY 0x000000c0
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_OR_REVERSE 0x000000d0
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_OR 0x000000e0
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_SET 0x000000f0
+
+
+#define NV04_IMAGE_PATTERN 0x00000044
+
+#define NV04_IMAGE_PATTERN_NOP 0x00000100
+#define NV04_IMAGE_PATTERN_NOTIFY 0x00000104
+#define NV04_IMAGE_PATTERN_DMA_NOTIFY 0x00000180
+#define NV04_IMAGE_PATTERN_COLOR_FORMAT 0x00000300
+#define NV04_IMAGE_PATTERN_COLOR_FORMAT_A16R5G6B5 0x00000001
+#define NV04_IMAGE_PATTERN_COLOR_FORMAT_X16A1R5G5B5 0x00000002
+#define NV04_IMAGE_PATTERN_COLOR_FORMAT_A8R8G8B8 0x00000003
+#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT 0x00000304
+#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT_CGA6 0x00000001
+#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT_LE 0x00000002
+#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE 0x00000308
+#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_8X8 0x00000000
+#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_64X1 0x00000001
+#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_1X64 0x00000002
+#define NV04_IMAGE_PATTERN_PATTERN_SELECT 0x0000030c
+#define NV04_IMAGE_PATTERN_PATTERN_SELECT_MONO 0x00000001
+#define NV04_IMAGE_PATTERN_PATTERN_SELECT_COLOR 0x00000002
+#define NV04_IMAGE_PATTERN_MONOCHROME_COLOR0 0x00000310
+#define NV04_IMAGE_PATTERN_MONOCHROME_COLOR1 0x00000314
+#define NV04_IMAGE_PATTERN_MONOCHROME_PATTERN0 0x00000318
+#define NV04_IMAGE_PATTERN_MONOCHROME_PATTERN1 0x0000031c
+#define NV04_IMAGE_PATTERN_PATTERN_Y8(x) (0x00000400+((x)*4))
+#define NV04_IMAGE_PATTERN_PATTERN_Y8__SIZE 0x00000010
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y0_SHIFT 0
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y0_MASK 0x000000ff
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y1_SHIFT 8
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y1_MASK 0x0000ff00
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y2_SHIFT 16
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y2_MASK 0x00ff0000
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y3_SHIFT 24
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y3_MASK 0xff000000
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5(x) (0x00000500+((x)*4))
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5__SIZE 0x00000020
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B0_SHIFT 0
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B0_MASK 0x0000001f
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G0_SHIFT 5
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G0_MASK 0x000007e0
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R0_SHIFT 11
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R0_MASK 0x0000f800
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B1_SHIFT 16
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B1_MASK 0x001f0000
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G1_SHIFT 21
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G1_MASK 0x07e00000
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R1_SHIFT 27
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R1_MASK 0xf8000000
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5(x) (0x00000600+((x)*4))
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5__SIZE 0x00000020
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B0_SHIFT 0
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B0_MASK 0x0000001f
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G0_SHIFT 5
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G0_MASK 0x000003e0
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R0_SHIFT 10
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R0_MASK 0x00007c00
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B1_SHIFT 16
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B1_MASK 0x001f0000
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G1_SHIFT 21
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G1_MASK 0x03e00000
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R1_SHIFT 26
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R1_MASK 0x7c000000
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8(x) (0x00000700+((x)*4))
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8__SIZE 0x00000040
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_B_SHIFT 0
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_B_MASK 0x000000ff
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_G_SHIFT 8
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_G_MASK 0x0000ff00
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_R_SHIFT 16
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_R_MASK 0x00ff0000
+
+
+#define NV03_VIDEO_LUT_CURSOR_DAC 0x00000046
+
+#define NV03_VIDEO_LUT_CURSOR_DAC_SYNCHRONIZE 0x00000100
+#define NV03_VIDEO_LUT_CURSOR_DAC_STOP_IMAGE 0x00000104
+#define NV03_VIDEO_LUT_CURSOR_DAC_STOP_CURSOR 0x00000108
+#define NV03_VIDEO_LUT_CURSOR_DAC_STOP_DAC 0x0000010c
+#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_NOTIFY 0x00000180
+#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_IMAGE(x) (0x00000184+((x)*4))
+#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_IMAGE__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_LUT(x) (0x0000018c+((x)*4))
+#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_LUT__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_CURSOR(x) (0x00000194+((x)*4))
+#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_CURSOR__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_GET 0x000002fc
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_OFFSET(x) (0x00000300+((x)*8))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_OFFSET__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT(x) (0x00000304+((x)*8))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_PITCH_SHIFT 0
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_PITCH_MASK 0x0000ffff
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_COLOR_SHIFT 16
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_COLOR_MASK 0x0fff0000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_NOTIFY_SHIFT 28
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_NOTIFY_MASK 0xf0000000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_OFFSET(x) (0x00000340+((x)*12))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_OFFSET__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT(x) (0x00000344+((x)*12))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_X_SHIFT 0
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_X_MASK 0x0000ffff
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_Y_SHIFT 16
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_Y_MASK 0xffff0000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_FORMAT(x) (0x00000348+((x)*12))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_FORMAT__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A 0x00000358
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_X_SHIFT 0
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_X_MASK 0x0000ffff
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_Y_SHIFT 16
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_Y_MASK 0xffff0000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE(x) (0x00000380+((x)*16))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_W_SHIFT 0
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_W_MASK 0x0000ffff
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_H_SHIFT 16
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_H_MASK 0xffff0000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC(x) (0x00000384+((x)*16))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_START_SHIFT 0
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_START_MASK 0x0000ffff
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_WIDTH_SHIFT 16
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_WIDTH_MASK 0x0fff0000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_POLARITY_SHIFT 28
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_POLARITY_MASK 0xf0000000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC(x) (0x00000388+((x)*16))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_START_SHIFT 0
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_START_MASK 0x0000ffff
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_WIDTH_SHIFT 16
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_WIDTH_MASK 0x0fff0000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_POLARITY_SHIFT 28
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_POLARITY_MASK 0xf0000000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE(x) (0x0000038c+((x)*16))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_WIDTH_SHIFT 0
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_WIDTH_MASK 0x0000ffff
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_HEIGHT_SHIFT 16
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_HEIGHT_MASK 0x0fff0000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_NOTIFY_SHIFT 28
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_NOTIFY_MASK 0xf0000000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_PIXEL_CLOCK 0x000003a0
+
+
+#define NV03_DX3_TEXTURED_TRIANGLE 0x00000048
+
+#define NV03_DX3_TEXTURED_TRIANGLE_NOP 0x00000100
+#define NV03_DX3_TEXTURED_TRIANGLE_NOTIFY 0x00000104
+#define NV03_DX3_TEXTURED_TRIANGLE_PATCH 0x0000010c
+#define NV03_DX3_TEXTURED_TRIANGLE_DMA_NOTIFY 0x00000180
+#define NV03_DX3_TEXTURED_TRIANGLE_DMA_TEXTURE 0x00000184
+#define NV03_DX3_TEXTURED_TRIANGLE_CLIP_RECTANGLE 0x00000188
+#define NV03_DX3_TEXTURED_TRIANGLE_SURFACE 0x0000018c
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_OFFSET 0x00000304
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT 0x00000308
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_MASK_SHIFT 0
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_MASK_MASK 0x0000ffff
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_ENABLE_SHIFT 16
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_ENABLE_MASK 0x000f0000
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_SHIFT 20
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_MASK 0x00f00000
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MIN_SHIFT 24
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MIN_MASK 0x0f000000
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MAX_SHIFT 28
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MAX_MASK 0xf0000000
+#define NV03_DX3_TEXTURED_TRIANGLE_FILTER 0x0000030c
+#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_X_SHIFT 0
+#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_X_MASK 0x0000001f
+#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_Y_SHIFT 8
+#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_Y_MASK 0x00001f00
+#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SIZE_ADJUST_SHIFT 16
+#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SIZE_ADJUST_MASK 0x00ff0000
+#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR 0x00000310
+#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_B_SHIFT 0
+#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_B_MASK 0x000000ff
+#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_G_SHIFT 8
+#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_G_MASK 0x0000ff00
+#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_R_SHIFT 16
+#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_R_MASK 0x00ff0000
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT 0x00000314
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_INTERPOLATOR_SHIFT 0
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_INTERPOLATOR_MASK 0x0000000f
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_U_SHIFT 4
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_U_MASK 0x00000030
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_V_SHIFT 6
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_V_MASK 0x000000c0
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_SOURCE_COLOR_SHIFT 8
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_SOURCE_COLOR_MASK 0x00000f00
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_CULLING_SHIFT 12
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_CULLING_MASK 0x00007000
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_PERSPECTIVE_ENABLE (1 << 15)
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_FUNC_SHIFT 16
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_FUNC_MASK 0x000f0000
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_WRITE_ENABLE_SHIFT 20
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_WRITE_ENABLE_MASK 0x00f00000
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_COLOR_WRITE_ENABLE_SHIFT 24
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_COLOR_WRITE_ENABLE_MASK 0x07000000
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_ROP_SHIFT 27
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_ROP_MASK 0x18000000
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_BETA (1 << 29)
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_DST_BLEND (1 << 30)
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_SRC_BLEND (1 << 31)
+#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL 0x00000318
+#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_REF_SHIFT 0
+#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_REF_MASK 0x000000ff
+#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_FUNC_SHIFT 8
+#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_FUNC_MASK 0xffffff00
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR(x) (0x00001000+((x)*32))
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR__SIZE 0x00000040
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I0_SHIFT 0
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I0_MASK 0x0000000f
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I1_SHIFT 4
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I1_MASK 0x000000f0
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I2_SHIFT 8
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I2_MASK 0x00000f00
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I3_SHIFT 12
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I3_MASK 0x0000f000
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I4_SHIFT 16
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I4_MASK 0x000f0000
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I5_SHIFT 20
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I5_MASK 0x00f00000
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_FOG_SHIFT 24
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_FOG_MASK 0xff000000
+#define NV03_DX3_TEXTURED_TRIANGLE_COLOR(x) (0x00001004+((x)*32))
+#define NV03_DX3_TEXTURED_TRIANGLE_COLOR__SIZE 0x00000040
+#define NV03_DX3_TEXTURED_TRIANGLE_X(x) (0x00001008+((x)*32))
+#define NV03_DX3_TEXTURED_TRIANGLE_X__SIZE 0x00000040
+#define NV03_DX3_TEXTURED_TRIANGLE_Y(x) (0x0000100c+((x)*32))
+#define NV03_DX3_TEXTURED_TRIANGLE_Y__SIZE 0x00000040
+#define NV03_DX3_TEXTURED_TRIANGLE_Z(x) (0x00001010+((x)*32))
+#define NV03_DX3_TEXTURED_TRIANGLE_Z__SIZE 0x00000040
+#define NV03_DX3_TEXTURED_TRIANGLE_M(x) (0x00001014+((x)*32))
+#define NV03_DX3_TEXTURED_TRIANGLE_M__SIZE 0x00000040
+#define NV03_DX3_TEXTURED_TRIANGLE_U(x) (0x00001018+((x)*32))
+#define NV03_DX3_TEXTURED_TRIANGLE_U__SIZE 0x00000040
+#define NV03_DX3_TEXTURED_TRIANGLE_V(x) (0x0000101c+((x)*32))
+#define NV03_DX3_TEXTURED_TRIANGLE_V__SIZE 0x00000040
+
+
+#define NV04_GDI_RECTANGLE_TEXT 0x0000004a
+
+#define NV04_GDI_RECTANGLE_TEXT_NOP 0x00000100
+#define NV04_GDI_RECTANGLE_TEXT_NOTIFY 0x00000104
+#define NV04_GDI_RECTANGLE_TEXT_PATCH 0x0000010c
+#define NV04_GDI_RECTANGLE_TEXT_PM_TRIGGER 0x00000140
+#define NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY 0x00000180
+#define NV04_GDI_RECTANGLE_TEXT_DMA_FONTS 0x00000184
+#define NV04_GDI_RECTANGLE_TEXT_PATTERN 0x00000188
+#define NV04_GDI_RECTANGLE_TEXT_ROP 0x0000018c
+#define NV04_GDI_RECTANGLE_TEXT_BETA1 0x00000190
+#define NV04_GDI_RECTANGLE_TEXT_BETA4 0x00000194
+#define NV04_GDI_RECTANGLE_TEXT_SURFACE 0x00000198
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION 0x000002fc
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY_AND 0x00000000
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_ROP_AND 0x00000001
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_BLEND_AND 0x00000002
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY 0x00000003
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_BLEND_PREMULT 0x00000005
+#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT 0x00000300
+#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5 0x00000001
+#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_X16A1R5G5B5 0x00000002
+#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8 0x00000003
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT 0x00000304
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_CGA6 0x00000001
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE 0x00000002
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_A 0x000003fc
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(x) (0x00000400+((x)*8))
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT__SIZE 0x00000020
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE(x) (0x00000404+((x)*8))
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE__SIZE 0x00000020
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0 0x000005f4
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_L_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_L_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_T_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_T_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1 0x000005f8
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_R_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_R_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_B_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_B_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_B 0x000005fc
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0(x) (0x00000600+((x)*8))
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0__SIZE 0x00000020
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1(x) (0x00000604+((x)*8))
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1__SIZE 0x00000020
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0 0x000007ec
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1 0x000007f0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_C 0x000007f4
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_C 0x000007f8
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_W_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_W_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_H_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_H_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_POINT_C 0x000007fc
+#define NV04_GDI_RECTANGLE_TEXT_POINT_C_X_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_POINT_C_X_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_POINT_C_Y_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_POINT_C_Y_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C(x) (0x00000800+((x)*4))
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__SIZE 0x00000080
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0 0x00000be4
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1 0x00000be8
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_COLOR0_E 0x00000bec
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_E 0x00000bf0
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E 0x00000bf4
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E 0x00000bf8
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_POINT_E 0x00000bfc
+#define NV04_GDI_RECTANGLE_TEXT_POINT_E_X_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_POINT_E_X_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_POINT_E_Y_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_POINT_E_Y_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E(x) (0x00000c00+((x)*4))
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__SIZE 0x00000080
+#define NV04_GDI_RECTANGLE_TEXT_FONT_F 0x00000ff0
+#define NV04_GDI_RECTANGLE_TEXT_FONT_F_OFFSET_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_FONT_F_OFFSET_MASK 0x0fffffff
+#define NV04_GDI_RECTANGLE_TEXT_FONT_F_PITCH_SHIFT 28
+#define NV04_GDI_RECTANGLE_TEXT_FONT_F_PITCH_MASK 0xf0000000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0 0x00000ff4
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_L_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_L_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_T_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_T_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1 0x00000ff8
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_R_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_R_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_B_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_B_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_F 0x00000ffc
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F(x) (0x00001000+((x)*4))
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F__SIZE 0x00000100
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_INDEX_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_INDEX_MASK 0x000000ff
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_X_SHIFT 8
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_X_MASK 0x000fff00
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_Y_SHIFT 20
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_Y_MASK 0xfff00000
+#define NV04_GDI_RECTANGLE_TEXT_FONT_G 0x000017f0
+#define NV04_GDI_RECTANGLE_TEXT_FONT_G_OFFSET_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_FONT_G_OFFSET_MASK 0x0fffffff
+#define NV04_GDI_RECTANGLE_TEXT_FONT_G_PITCH_SHIFT 28
+#define NV04_GDI_RECTANGLE_TEXT_FONT_G_PITCH_MASK 0xf0000000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0 0x000017f4
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_L_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_L_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_T_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_T_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1 0x000017f8
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_R_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_R_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_B_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_B_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_G 0x000017fc
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT(x) (0x00001800+((x)*8))
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT__SIZE 0x00000100
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_X_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_X_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_Y_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_Y_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_INDEX(x) (0x00001804+((x)*8))
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_INDEX__SIZE 0x00000100
+
+
+#define NV03_GDI_RECTANGLE_TEXT 0x0000004b
+
+#define NV03_GDI_RECTANGLE_TEXT_NOP 0x00000100
+#define NV03_GDI_RECTANGLE_TEXT_NOTIFY 0x00000104
+#define NV03_GDI_RECTANGLE_TEXT_DMA_NOTIFY 0x00000180
+#define NV03_GDI_RECTANGLE_TEXT_PATTERN 0x00000184
+#define NV03_GDI_RECTANGLE_TEXT_ROP 0x00000188
+#define NV03_GDI_RECTANGLE_TEXT_BETA1 0x0000018c
+#define NV03_GDI_RECTANGLE_TEXT_SURFACE 0x00000190
+#define NV03_GDI_RECTANGLE_TEXT_OPERATION 0x000002fc
+#define NV03_GDI_RECTANGLE_TEXT_COLOR_FORMAT 0x00000300
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT 0x00000304
+#define NV03_GDI_RECTANGLE_TEXT_COLOR1_A 0x000003fc
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT 0x00000400
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE 0x00000404
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B 0x000007f4
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_L_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_L_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_T_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_T_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B 0x000007f8
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_R_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_R_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_B_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_B_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_COLOR1_B 0x000007fc
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0 0x00000800
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1 0x00000804
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0 0x00000bec
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1 0x00000bf0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_COLOR1_C 0x00000bf4
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_C 0x00000bf8
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_W_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_W_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_H_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_H_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_POINT_C 0x00000bfc
+#define NV03_GDI_RECTANGLE_TEXT_POINT_C_X_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_POINT_C_X_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_POINT_C_Y_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_POINT_C_Y_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C(x) (0x00000c00+((x)*4))
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__SIZE 0x00000020
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0 0x00000fe8
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_L_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_L_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_T_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_T_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1 0x00000fec
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_R_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_R_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_B_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_B_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_COLOR1_D 0x00000ff0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D 0x00000ff4
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_W_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_W_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_H_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_H_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D 0x00000ff8
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_W_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_W_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_H_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_H_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_POINT_D 0x00000ffc
+#define NV03_GDI_RECTANGLE_TEXT_POINT_D_X_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_POINT_D_X_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_POINT_D_Y_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_POINT_D_Y_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_D(x) (0x00001000+((x)*4))
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_D__SIZE 0x00000020
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0 0x000013e4
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1 0x000013e8
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_COLOR0_E 0x000013ec
+#define NV03_GDI_RECTANGLE_TEXT_COLOR1_E 0x000013f0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E 0x000013f4
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E 0x000013f8
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_POINT_E 0x000013fc
+#define NV03_GDI_RECTANGLE_TEXT_POINT_E_X_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_POINT_E_X_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_POINT_E_Y_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_POINT_E_Y_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E(x) (0x00001400+((x)*4))
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__SIZE 0x00000020
+
+
+#define NV04_SWIZZLED_SURFACE 0x00000052
+
+#define NV04_SWIZZLED_SURFACE_NOP 0x00000100
+#define NV04_SWIZZLED_SURFACE_NOTIFY 0x00000104
+#define NV04_SWIZZLED_SURFACE_DMA_NOTIFY 0x00000180
+#define NV04_SWIZZLED_SURFACE_DMA_IMAGE 0x00000184
+#define NV04_SWIZZLED_SURFACE_FORMAT 0x00000300
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_SHIFT 0
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_MASK 0x000000ff
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y8 0x00000001
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1R5G5B5_Z1R5G5B5 0x00000002
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1R5G5B5_X1R5G5B5 0x00000003
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_R5G6B5 0x00000004
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y16 0x00000005
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X8R8G8B8_Z8R8G8B8 0x00000006
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X8R8G8B8_X8R8G8B8 0x00000007
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1A7R8G8B8_Z1A7R8G8B8 0x00000008
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1A7R8G8B8_X1A7R8G8B8 0x00000009
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_A8R8G8B8 0x0000000a
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y32 0x0000000b
+#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT 16
+#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_MASK 0x00ff0000
+#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT 24
+#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_MASK 0xff000000
+#define NV04_SWIZZLED_SURFACE_OFFSET 0x00000304
+
+
+#define NV04_CONTEXT_SURFACES_3D 0x00000053
+
+#define NV04_CONTEXT_SURFACES_3D_NOP 0x00000100
+#define NV04_CONTEXT_SURFACES_3D_NOTIFY 0x00000104
+#define NV04_CONTEXT_SURFACES_3D_DMA_NOTIFY 0x00000180
+#define NV04_CONTEXT_SURFACES_3D_DMA_COLOR 0x00000184
+#define NV04_CONTEXT_SURFACES_3D_DMA_ZETA 0x00000188
+#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL 0x000002f8
+#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_X_SHIFT 0
+#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_X_MASK 0x0000ffff
+#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_W_SHIFT 16
+#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_W_MASK 0xffff0000
+#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL 0x000002fc
+#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_Y_SHIFT 0
+#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_Y_MASK 0x0000ffff
+#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_H_SHIFT 16
+#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_H_MASK 0xffff0000
+#define NV04_CONTEXT_SURFACES_3D_FORMAT 0x00000300
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_SHIFT 0
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_MASK 0x000000ff
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1R5G5B5_Z1R5G5B5 0x00000001
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1R5G5B5_X1R5G5B5 0x00000002
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_R5G6B5 0x00000003
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X8R8G8B8_Z8R8G8B8 0x00000004
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X8R8G8B8_X8R8G8B8 0x00000005
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1A7R8G8B8_Z1A7R8G8B8 0x00000006
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1A7R8G8B8_X1A7R8G8B8 0x00000007
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_A8R8G8B8 0x00000008
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_SHIFT 8
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_MASK 0x0000ff00
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_PITCH 0x00000100
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_SWIZZLE 0x00000200
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_U_SHIFT 16
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_U_MASK 0x00ff0000
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_V_SHIFT 24
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_V_MASK 0xff000000
+#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE 0x00000304
+#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_W_SHIFT 0
+#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_W_MASK 0x0000ffff
+#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_H_SHIFT 16
+#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_H_MASK 0xffff0000
+#define NV04_CONTEXT_SURFACES_3D_PITCH 0x00000308
+#define NV04_CONTEXT_SURFACES_3D_PITCH_COLOR_SHIFT 0
+#define NV04_CONTEXT_SURFACES_3D_PITCH_COLOR_MASK 0x0000ffff
+#define NV04_CONTEXT_SURFACES_3D_PITCH_ZETA_SHIFT 16
+#define NV04_CONTEXT_SURFACES_3D_PITCH_ZETA_MASK 0xffff0000
+#define NV04_CONTEXT_SURFACES_3D_OFFSET_COLOR 0x0000030c
+#define NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA 0x00000310
+
+
+#define NV04_DX5_TEXTURED_TRIANGLE 0x00000054
+
+#define NV04_DX5_TEXTURED_TRIANGLE_NOP 0x00000100
+#define NV04_DX5_TEXTURED_TRIANGLE_NOTIFY 0x00000104
+#define NV04_DX5_TEXTURED_TRIANGLE_DMA_NOTIFY 0x00000180
+#define NV04_DX5_TEXTURED_TRIANGLE_DMA_A 0x00000184
+#define NV04_DX5_TEXTURED_TRIANGLE_DMA_B 0x00000188
+#define NV04_DX5_TEXTURED_TRIANGLE_SURFACE 0x0000018c
+#define NV04_DX5_TEXTURED_TRIANGLE_COLORKEY 0x00000300
+#define NV04_DX5_TEXTURED_TRIANGLE_OFFSET 0x00000304
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT 0x00000308
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_DMA_SHIFT 0
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_DMA_MASK 0x00000003
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_KEY_MATCH_SHIFT 2
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_KEY_MATCH_MASK 0x0000000c
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_SHIFT 4
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_MASK 0x00000030
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CENTER 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER 0x00000020
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_SHIFT 6
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_MASK 0x000000c0
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CENTER 0x00000040
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER 0x00000080
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_SHIFT 8
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_MASK 0x00000f00
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_Y8 0x00000100
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_A1R5G5B5 0x00000200
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_X1R5G5B5 0x00000300
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_A4R4G4B4 0x00000400
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_R5G6B5 0x00000500
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_A8R8G8B8 0x00000600
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_X8R8G8B8 0x00000700
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT 12
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_MASK 0x0000f000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT 16
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_MASK 0x000f0000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT 20
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_MASK 0x00f00000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT 24
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MASK 0x07000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT 0x01000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT 0x02000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE 0x03000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER 0x04000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP 0x05000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_WRAPU (1 << 27)
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT 28
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_MASK 0x70000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_REPEAT 0x10000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_MIRRORED_REPEAT 0x20000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE 0x30000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_BORDER 0x40000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP 0x50000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_WRAPV (1 << 31)
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER 0x0000030c
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_X_SHIFT 0
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_X_MASK 0x000000ff
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_Y_SHIFT 8
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_Y_MASK 0x00007f00
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MIPMAP_DITHER_ENABLE (1 << 15)
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MIPMAP_LODBIAS_SHIFT 16
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MIPMAP_LODBIAS_MASK 0x00ff0000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_SHIFT 24
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_MASK 0x07000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST 0x01000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR 0x02000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x03000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x04000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x05000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x06000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE (1 << 27)
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_SHIFT 28
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_MASK 0x70000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST 0x10000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR 0x20000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE (1 << 31)
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND 0x00000310
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_SHIFT 0
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_MASK 0x0000000f
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_MASK_BIT_SHIFT 4
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_MASK_BIT_MASK 0x00000030
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_SHIFT 6
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_MASK 0x000000c0
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT 0x00000040
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD 0x00000080
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_PHONG 0x000000c0
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_SHIFT 8
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_MASK 0x00000f00
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SPECULAR_ENABLE_SHIFT 12
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SPECULAR_ENABLE_MASK 0x0000f000
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_FOG_ENABLE_SHIFT 16
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_FOG_ENABLE_MASK 0x000f0000
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_ALPHA_ENABLE_SHIFT 20
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_ALPHA_ENABLE_MASK 0x00f00000
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SRC_SHIFT 24
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SRC_MASK 0x0f000000
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_DST_SHIFT 28
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_DST_MASK 0xf0000000
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL 0x00000314
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_REF_SHIFT 0
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_REF_MASK 0x000000ff
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT 8
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_MASK 0x00000f00
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE (1 << 12)
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN (1 << 13)
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT 14
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_MASK 0x0000c000
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT 16
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_MASK 0x000f0000
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT 20
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_MASK 0x00300000
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE (1 << 22)
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE (1 << 23)
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT 24
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_MASK 0x3f000000
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT 30
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_MASK 0xc0000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR 0x00000318
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_B_SHIFT 0
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_B_MASK 0x000000ff
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_G_SHIFT 8
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_G_MASK 0x0000ff00
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_R_SHIFT 16
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_R_MASK 0x00ff0000
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_A_SHIFT 24
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_A_MASK 0xff000000
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(x) (0x00000400+((x)*32))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX__SIZE 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SY(x) (0x00000404+((x)*32))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SY__SIZE 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SZ(x) (0x00000408+((x)*32))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SZ__SIZE 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_RHW(x) (0x0000040c+((x)*32))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_RHW__SIZE 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR(x) (0x00000410+((x)*32))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR__SIZE 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_B_SHIFT 0
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_B_MASK 0x000000ff
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_G_SHIFT 8
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_G_MASK 0x0000ff00
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_R_SHIFT 16
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_R_MASK 0x00ff0000
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_A_SHIFT 24
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_A_MASK 0xff000000
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR(x) (0x00000414+((x)*32))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR__SIZE 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_B_SHIFT 0
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_B_MASK 0x000000ff
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_G_SHIFT 8
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_G_MASK 0x0000ff00
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_R_SHIFT 16
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_R_MASK 0x00ff0000
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_FOG_SHIFT 24
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_FOG_MASK 0xff000000
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TU(x) (0x00000418+((x)*32))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TU__SIZE 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TV(x) (0x0000041c+((x)*32))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TV__SIZE 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(x) (0x00000600+((x)*4))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE__SIZE 0x00000040
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I0_SHIFT 0
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I0_MASK 0x0000000f
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I1_SHIFT 4
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I1_MASK 0x000000f0
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I2_SHIFT 8
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I2_MASK 0x00000f00
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I3_SHIFT 12
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I3_MASK 0x0000f000
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I4_SHIFT 16
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I4_MASK 0x000f0000
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I5_SHIFT 20
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I5_MASK 0x00f00000
+
+
+#define NV04_DX6_MULTITEX_TRIANGLE 0x00000055
+
+#define NV04_DX6_MULTITEX_TRIANGLE_NOP 0x00000100
+#define NV04_DX6_MULTITEX_TRIANGLE_NOTIFY 0x00000104
+#define NV04_DX6_MULTITEX_TRIANGLE_DMA_NOTIFY 0x00000180
+#define NV04_DX6_MULTITEX_TRIANGLE_DMA_A 0x00000184
+#define NV04_DX6_MULTITEX_TRIANGLE_DMA_B 0x00000188
+#define NV04_DX6_MULTITEX_TRIANGLE_SURFACE 0x0000018c
+#define NV04_DX6_MULTITEX_TRIANGLE_OFFSET(x) (0x00000308+((x)*4))
+#define NV04_DX6_MULTITEX_TRIANGLE_OFFSET__SIZE 0x00000002
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT(x) (0x00000310+((x)*4))
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT__SIZE 0x00000002
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_DMA_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_DMA_MASK 0x0000000f
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_ZOH_SHIFT 4
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_ZOH_MASK 0x00000030
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_FOH_SHIFT 6
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_FOH_MASK 0x000000c0
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_COLOR_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_COLOR_MASK 0x00000f00
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT 12
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_MIPMAP_LEVELS_MASK 0x0000f000
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_U_MASK 0x000f0000
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT 20
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_V_MASK 0x00f00000
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSU_SHIFT 24
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSU_MASK 0x07000000
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_WRAPU (1 << 27)
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSV_SHIFT 28
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSV_MASK 0x70000000
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_WRAPV (1 << 31)
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER(x) (0x00000318+((x)*4))
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER__SIZE 0x00000002
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_X_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_X_MASK 0x000000ff
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_Y_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_Y_MASK 0x00007f00
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MIPMAP_DITHER_ENABLE (1 << 15)
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MIPMAP_LODBIAS_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MIPMAP_LODBIAS_MASK 0x00ff0000
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MINIFY_SHIFT 24
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MINIFY_MASK 0x07000000
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE (1 << 27)
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MAGNIFY_SHIFT 28
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MAGNIFY_MASK 0x70000000
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE (1 << 31)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA 0x00000320
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE0 (1 << 0)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA0 (1 << 1)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT0_SHIFT 2
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT0_MASK 0x000000fc
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE1 (1 << 8)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA1 (1 << 9)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT1_SHIFT 10
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT1_MASK 0x0000fc00
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE2 (1 << 16)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA2 (1 << 17)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT2_SHIFT 18
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT2_MASK 0x00fc0000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE3 (1 << 24)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA3 (1 << 25)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT3_SHIFT 26
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT3_MASK 0x1c000000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_OPERATION_SHIFT 29
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_OPERATION_MASK 0xe0000000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR 0x00000324
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE0 (1 << 0)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA0 (1 << 1)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT0_SHIFT 2
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT0_MASK 0x000000fc
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE1 (1 << 8)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA1 (1 << 9)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT1_SHIFT 10
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT1_MASK 0x0000fc00
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE2 (1 << 16)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA2 (1 << 17)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT2_SHIFT 18
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT2_MASK 0x00fc0000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE3 (1 << 24)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA3 (1 << 25)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT3_SHIFT 26
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT3_MASK 0x1c000000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_OPERATION_SHIFT 29
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_OPERATION_MASK 0xe0000000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA 0x0000032c
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE0 (1 << 0)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA0 (1 << 1)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT0_SHIFT 2
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT0_MASK 0x000000fc
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE1 (1 << 8)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA1 (1 << 9)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT1_SHIFT 10
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT1_MASK 0x0000fc00
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE2 (1 << 16)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA2 (1 << 17)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT2_SHIFT 18
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT2_MASK 0x00fc0000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE3 (1 << 24)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA3 (1 << 25)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT3_SHIFT 26
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT3_MASK 0x1c000000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_OPERATION_SHIFT 29
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_OPERATION_MASK 0xe0000000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR 0x00000330
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE0 (1 << 0)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA0 (1 << 1)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT0_SHIFT 2
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT0_MASK 0x000000fc
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE1 (1 << 8)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA1 (1 << 9)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT1_SHIFT 10
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT1_MASK 0x0000fc00
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE2 (1 << 16)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA2 (1 << 17)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT2_SHIFT 18
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT2_MASK 0x00fc0000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE3 (1 << 24)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA3 (1 << 25)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT3_SHIFT 26
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT3_MASK 0x1c000000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_OPERATION_SHIFT 29
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_OPERATION_MASK 0xe0000000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR 0x00000334
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_B_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_B_MASK 0x000000ff
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_G_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_G_MASK 0x0000ff00
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_R_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_R_MASK 0x00ff0000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_A_SHIFT 24
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_A_MASK 0xff000000
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND 0x00000338
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_MASK_BIT_SHIFT 4
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_MASK_BIT_MASK 0x00000030
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SHADE_MODE_SHIFT 6
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SHADE_MODE_MASK 0x000000c0
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_MASK 0x00000f00
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SPECULAR_ENABLE_SHIFT 12
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SPECULAR_ENABLE_MASK 0x0000f000
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_FOG_ENABLE_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_FOG_ENABLE_MASK 0x000f0000
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_ALPHA_ENABLE_SHIFT 20
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_ALPHA_ENABLE_MASK 0x00f00000
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SRC_SHIFT 24
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SRC_MASK 0x0f000000
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_DST_SHIFT 28
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_DST_MASK 0xf0000000
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0 0x0000033c
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_REF_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_REF_MASK 0x000000ff
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_MASK 0x00000f00
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_TEST_ENABLE (1 << 12)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ORIGIN (1 << 13)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_ENABLE_SHIFT 14
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_ENABLE_MASK 0x0000c000
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_MASK 0x000f0000
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_SHIFT 20
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_MASK 0x00300000
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_DITHER_ENABLE (1 << 22)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_PERSPECTIVE_ENABLE (1 << 23)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_WRITE_ENABLE (1 << 24)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_STENCIL_WRITE_ENABLE (1 << 25)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_WRITE_ENABLE (1 << 26)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_RED_WRITE_ENABLE (1 << 27)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_GREEN_WRITE_ENABLE (1 << 28)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_BLUE_WRITE_ENABLE (1 << 29)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FORMAT_SHIFT 30
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FORMAT_MASK 0xc0000000
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1 0x00000340
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_TEST_ENABLE_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_TEST_ENABLE_MASK 0x0000000f
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_FUNC_SHIFT 4
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_FUNC_MASK 0x000000f0
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_REF_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_REF_MASK 0x0000ff00
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_READ_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_READ_MASK 0x00ff0000
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_WRITE_SHIFT 24
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_WRITE_MASK 0xff000000
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2 0x00000344
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_FAIL_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_FAIL_MASK 0x0000000f
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZFAIL_SHIFT 4
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZFAIL_MASK 0x000000f0
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZPASS_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZPASS_MASK 0x00000f00
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR 0x00000348
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_B_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_B_MASK 0x000000ff
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_G_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_G_MASK 0x0000ff00
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_R_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_R_MASK 0x00ff0000
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_A_SHIFT 24
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_A_MASK 0xff000000
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SX(x) (0x00000400+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SX__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SY(x) (0x00000404+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SY__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SZ(x) (0x00000408+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SZ__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_RHW(x) (0x0000040c+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_RHW__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR(x) (0x00000410+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_B_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_B_MASK 0x000000ff
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_G_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_G_MASK 0x0000ff00
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_R_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_R_MASK 0x00ff0000
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_A_SHIFT 24
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_A_MASK 0xff000000
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR(x) (0x00000414+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_B_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_B_MASK 0x000000ff
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_G_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_G_MASK 0x0000ff00
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_R_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_R_MASK 0x00ff0000
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_FOG_SHIFT 24
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_FOG_MASK 0xff000000
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU0(x) (0x00000418+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU0__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV0(x) (0x0000041c+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV0__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU1(x) (0x00000420+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU1__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV1(x) (0x00000424+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV1__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE(x) (0x00000540+((x)*4))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE__SIZE 0x00000030
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I0_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I0_MASK 0x0000000f
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I1_SHIFT 4
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I1_MASK 0x000000f0
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I2_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I2_MASK 0x00000f00
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I3_SHIFT 12
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I3_MASK 0x0000f000
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I4_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I4_MASK 0x000f0000
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I5_SHIFT 20
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I5_MASK 0x00f00000
+
+
+#define NV10_DX5_TEXTURED_TRIANGLE 0x00000094
+
+
+
+#define NV10TCL 0x00000056
+
+#define NV10TCL_NOP 0x00000100
+#define NV10TCL_NOTIFY 0x00000104
+#define NV10TCL_DMA_NOTIFY 0x00000180
+#define NV10TCL_DMA_IN_MEMORY0 0x00000184
+#define NV10TCL_DMA_IN_MEMORY1 0x00000188
+#define NV10TCL_DMA_VTXBUF0 0x0000018c
+#define NV10TCL_DMA_IN_MEMORY2 0x00000194
+#define NV10TCL_DMA_IN_MEMORY3 0x00000198
+#define NV10TCL_RT_HORIZ 0x00000200
+#define NV10TCL_RT_HORIZ_X_SHIFT 0
+#define NV10TCL_RT_HORIZ_X_MASK 0x0000ffff
+#define NV10TCL_RT_HORIZ_W_SHIFT 16
+#define NV10TCL_RT_HORIZ_W_MASK 0xffff0000
+#define NV10TCL_RT_VERT 0x00000204
+#define NV10TCL_RT_VERT_Y_SHIFT 0
+#define NV10TCL_RT_VERT_Y_MASK 0x0000ffff
+#define NV10TCL_RT_VERT_H_SHIFT 16
+#define NV10TCL_RT_VERT_H_MASK 0xffff0000
+#define NV10TCL_RT_FORMAT 0x00000208
+#define NV10TCL_RT_FORMAT_TYPE_SHIFT 8
+#define NV10TCL_RT_FORMAT_TYPE_MASK 0x00000f00
+#define NV10TCL_RT_FORMAT_TYPE_LINEAR 0x00000100
+#define NV10TCL_RT_FORMAT_TYPE_SWIZZLED 0x00000200
+#define NV10TCL_RT_FORMAT_COLOR_SHIFT 0
+#define NV10TCL_RT_FORMAT_COLOR_MASK 0x0000001f
+#define NV10TCL_RT_FORMAT_COLOR_R5G6B5 0x00000003
+#define NV10TCL_RT_FORMAT_COLOR_X8R8G8B8 0x00000005
+#define NV10TCL_RT_FORMAT_COLOR_A8R8G8B8 0x00000008
+#define NV10TCL_RT_FORMAT_COLOR_B8 0x00000009
+#define NV10TCL_RT_FORMAT_COLOR_UNKNOWN 0x0000000d
+#define NV10TCL_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f
+#define NV10TCL_RT_FORMAT_COLOR_A8B8G8R8 0x00000010
+#define NV10TCL_RT_PITCH 0x0000020c
+#define NV10TCL_RT_PITCH_COLOR_PITCH_SHIFT 0
+#define NV10TCL_RT_PITCH_COLOR_PITCH_MASK 0x0000ffff
+#define NV10TCL_RT_PITCH_ZETA_PITCH_SHIFT 16
+#define NV10TCL_RT_PITCH_ZETA_PITCH_MASK 0xffff0000
+#define NV10TCL_COLOR_OFFSET 0x00000210
+#define NV10TCL_ZETA_OFFSET 0x00000214
+#define NV10TCL_TX_OFFSET(x) (0x00000218+((x)*4))
+#define NV10TCL_TX_OFFSET__SIZE 0x00000002
+#define NV10TCL_TX_FORMAT(x) (0x00000220+((x)*4))
+#define NV10TCL_TX_FORMAT__SIZE 0x00000002
+#define NV10TCL_TX_FORMAT_DMA0 (1 << 0)
+#define NV10TCL_TX_FORMAT_DMA1 (1 << 1)
+#define NV10TCL_TX_FORMAT_CUBE_MAP (1 << 2)
+#define NV10TCL_TX_FORMAT_FORMAT_SHIFT 7
+#define NV10TCL_TX_FORMAT_FORMAT_MASK 0x00000780
+#define NV10TCL_TX_FORMAT_FORMAT_L8 0x00000000
+#define NV10TCL_TX_FORMAT_FORMAT_A8 0x00000080
+#define NV10TCL_TX_FORMAT_FORMAT_A1R5G5B5 0x00000100
+#define NV10TCL_TX_FORMAT_FORMAT_A8_RECT 0x00000180
+#define NV10TCL_TX_FORMAT_FORMAT_A4R4G4B4 0x00000200
+#define NV10TCL_TX_FORMAT_FORMAT_R5G6B5 0x00000280
+#define NV10TCL_TX_FORMAT_FORMAT_A8R8G8B8 0x00000300
+#define NV10TCL_TX_FORMAT_FORMAT_X8R8G8B8 0x00000380
+#define NV10TCL_TX_FORMAT_FORMAT_INDEX8 0x00000580
+#define NV10TCL_TX_FORMAT_FORMAT_DXT1 0x00000600
+#define NV10TCL_TX_FORMAT_FORMAT_DXT3 0x00000700
+#define NV10TCL_TX_FORMAT_FORMAT_DXT5 0x00000780
+#define NV10TCL_TX_FORMAT_FORMAT_A1R5G5B5_RECT 0x00000800
+#define NV10TCL_TX_FORMAT_FORMAT_R5G6B5_RECT 0x00000880
+#define NV10TCL_TX_FORMAT_FORMAT_A8R8G8B8_RECT 0x00000900
+#define NV10TCL_TX_FORMAT_FORMAT_L8_RECT 0x00000980
+#define NV10TCL_TX_FORMAT_FORMAT_A8L8 0x00000d00
+#define NV10TCL_TX_FORMAT_FORMAT_A8_RECT2 0x00000d80
+#define NV10TCL_TX_FORMAT_FORMAT_A4R4G4B4_RECT 0x00000e80
+#define NV10TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00000f00
+#define NV10TCL_TX_FORMAT_FORMAT_L8A8_RECT 0x00001000
+#define NV10TCL_TX_FORMAT_FORMAT_DSDT 0x00001400
+#define NV10TCL_TX_FORMAT_FORMAT_A16 0x00001900
+#define NV10TCL_TX_FORMAT_FORMAT_HILO16 0x00001980
+#define NV10TCL_TX_FORMAT_FORMAT_A16_RECT 0x00001a80
+#define NV10TCL_TX_FORMAT_FORMAT_HILO16_RECT 0x00001b00
+#define NV10TCL_TX_FORMAT_FORMAT_HILO8 0x00002200
+#define NV10TCL_TX_FORMAT_FORMAT_SIGNED_HILO8 0x00002280
+#define NV10TCL_TX_FORMAT_FORMAT_HILO8_RECT 0x00002300
+#define NV10TCL_TX_FORMAT_FORMAT_SIGNED_HILO8_RECT 0x00002380
+#define NV10TCL_TX_FORMAT_FORMAT_FLOAT_RGBA16_NV 0x00002500
+#define NV10TCL_TX_FORMAT_FORMAT_FLOAT_RGBA32_NV 0x00002580
+#define NV10TCL_TX_FORMAT_FORMAT_FLOAT_R32_NV 0x00002600
+#define NV10TCL_TX_FORMAT_NPOT (1 << 11)
+#define NV10TCL_TX_FORMAT_MIPMAP (1 << 15)
+#define NV10TCL_TX_FORMAT_BASE_SIZE_U_SHIFT 16
+#define NV10TCL_TX_FORMAT_BASE_SIZE_U_MASK 0x000f0000
+#define NV10TCL_TX_FORMAT_BASE_SIZE_V_SHIFT 20
+#define NV10TCL_TX_FORMAT_BASE_SIZE_V_MASK 0x00f00000
+#define NV10TCL_TX_FORMAT_WRAP_S_SHIFT 24
+#define NV10TCL_TX_FORMAT_WRAP_S_MASK 0x0f000000
+#define NV10TCL_TX_FORMAT_WRAP_S_REPEAT 0x01000000
+#define NV10TCL_TX_FORMAT_WRAP_S_MIRRORED_REPEAT 0x02000000
+#define NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE 0x03000000
+#define NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_BORDER 0x04000000
+#define NV10TCL_TX_FORMAT_WRAP_S_CLAMP 0x05000000
+#define NV10TCL_TX_FORMAT_WRAP_T_SHIFT 28
+#define NV10TCL_TX_FORMAT_WRAP_T_MASK 0xf0000000
+#define NV10TCL_TX_FORMAT_WRAP_T_REPEAT 0x10000000
+#define NV10TCL_TX_FORMAT_WRAP_T_MIRRORED_REPEAT 0x20000000
+#define NV10TCL_TX_FORMAT_WRAP_T_CLAMP_TO_EDGE 0x30000000
+#define NV10TCL_TX_FORMAT_WRAP_T_CLAMP_TO_BORDER 0x40000000
+#define NV10TCL_TX_FORMAT_WRAP_T_CLAMP 0x50000000
+#define NV10TCL_TX_ENABLE(x) (0x00000228+((x)*4))
+#define NV10TCL_TX_ENABLE__SIZE 0x00000002
+#define NV10TCL_TX_ENABLE_ANISOTROPY_SHIFT 4
+#define NV10TCL_TX_ENABLE_ANISOTROPY_MASK 0x00000030
+#define NV10TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT 14
+#define NV10TCL_TX_ENABLE_MIPMAP_MAX_LOD_MASK 0x0003c000
+#define NV10TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT 26
+#define NV10TCL_TX_ENABLE_MIPMAP_MIN_LOD_MASK 0x3c000000
+#define NV10TCL_TX_ENABLE_ENABLE (1 << 30)
+#define NV10TCL_TX_NPOT_PITCH(x) (0x00000230+((x)*4))
+#define NV10TCL_TX_NPOT_PITCH__SIZE 0x00000002
+#define NV10TCL_TX_NPOT_PITCH_PITCH_SHIFT 16
+#define NV10TCL_TX_NPOT_PITCH_PITCH_MASK 0xffff0000
+#define NV10TCL_TX_NPOT_SIZE(x) (0x00000240+((x)*4))
+#define NV10TCL_TX_NPOT_SIZE__SIZE 0x00000002
+#define NV10TCL_TX_NPOT_SIZE_H_SHIFT 0
+#define NV10TCL_TX_NPOT_SIZE_H_MASK 0x0000ffff
+#define NV10TCL_TX_NPOT_SIZE_W_SHIFT 16
+#define NV10TCL_TX_NPOT_SIZE_W_MASK 0xffff0000
+#define NV10TCL_TX_FILTER(x) (0x00000248+((x)*4))
+#define NV10TCL_TX_FILTER__SIZE 0x00000002
+#define NV10TCL_TX_FILTER_LOD_BIAS_SHIFT 8
+#define NV10TCL_TX_FILTER_LOD_BIAS_MASK 0x00000f00
+#define NV10TCL_TX_FILTER_MINIFY_SHIFT 24
+#define NV10TCL_TX_FILTER_MINIFY_MASK 0x0f000000
+#define NV10TCL_TX_FILTER_MINIFY_NEAREST 0x01000000
+#define NV10TCL_TX_FILTER_MINIFY_LINEAR 0x02000000
+#define NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x03000000
+#define NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x04000000
+#define NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x05000000
+#define NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x06000000
+#define NV10TCL_TX_FILTER_MAGNIFY_SHIFT 28
+#define NV10TCL_TX_FILTER_MAGNIFY_MASK 0xf0000000
+#define NV10TCL_TX_FILTER_MAGNIFY_NEAREST 0x10000000
+#define NV10TCL_TX_FILTER_MAGNIFY_LINEAR 0x20000000
+#define NV10TCL_TX_PALETTE_OFFSET(x) (0x00000250+((x)*4))
+#define NV10TCL_TX_PALETTE_OFFSET__SIZE 0x00000002
+#define NV10TCL_RC_IN_ALPHA(x) (0x00000260+((x)*4))
+#define NV10TCL_RC_IN_ALPHA__SIZE 0x00000002
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_SHIFT 0
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_MASK 0x0000000f
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_FOG 0x00000003
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_SPARE0_NV 0x0000000c
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_SPARE1_NV 0x0000000d
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE (1 << 4)
+#define NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_BLUE 0x00000000
+#define NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_SHIFT 5
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_MASK 0x000000e0
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_SHIFT 8
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_MASK 0x00000f00
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_FOG 0x00000300
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_SPARE0_NV 0x00000c00
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_SPARE1_NV 0x00000d00
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE (1 << 12)
+#define NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_BLUE 0x00000000
+#define NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_SHIFT 13
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_MASK 0x0000e000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_SHIFT 16
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_MASK 0x000f0000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_FOG 0x00030000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_SPARE0_NV 0x000c0000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_SPARE1_NV 0x000d0000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE (1 << 20)
+#define NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_BLUE 0x00000000
+#define NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_SHIFT 21
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_MASK 0x00e00000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_SHIFT 24
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_MASK 0x0f000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_FOG 0x03000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_SPARE0_NV 0x0c000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_SPARE1_NV 0x0d000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE (1 << 28)
+#define NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_BLUE 0x00000000
+#define NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_SHIFT 29
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_MASK 0xe0000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV10TCL_RC_IN_RGB(x) (0x00000268+((x)*4))
+#define NV10TCL_RC_IN_RGB__SIZE 0x00000002
+#define NV10TCL_RC_IN_RGB_D_INPUT_SHIFT 0
+#define NV10TCL_RC_IN_RGB_D_INPUT_MASK 0x0000000f
+#define NV10TCL_RC_IN_RGB_D_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV10TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV10TCL_RC_IN_RGB_D_INPUT_FOG 0x00000003
+#define NV10TCL_RC_IN_RGB_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV10TCL_RC_IN_RGB_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV10TCL_RC_IN_RGB_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV10TCL_RC_IN_RGB_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV10TCL_RC_IN_RGB_D_INPUT_SPARE0_NV 0x0000000c
+#define NV10TCL_RC_IN_RGB_D_INPUT_SPARE1_NV 0x0000000d
+#define NV10TCL_RC_IN_RGB_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV10TCL_RC_IN_RGB_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE (1 << 4)
+#define NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV10TCL_RC_IN_RGB_D_MAPPING_SHIFT 5
+#define NV10TCL_RC_IN_RGB_D_MAPPING_MASK 0x000000e0
+#define NV10TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV10TCL_RC_IN_RGB_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV10TCL_RC_IN_RGB_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV10TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV10TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV10TCL_RC_IN_RGB_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV10TCL_RC_IN_RGB_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV10TCL_RC_IN_RGB_C_INPUT_SHIFT 8
+#define NV10TCL_RC_IN_RGB_C_INPUT_MASK 0x00000f00
+#define NV10TCL_RC_IN_RGB_C_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV10TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV10TCL_RC_IN_RGB_C_INPUT_FOG 0x00000300
+#define NV10TCL_RC_IN_RGB_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV10TCL_RC_IN_RGB_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV10TCL_RC_IN_RGB_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV10TCL_RC_IN_RGB_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV10TCL_RC_IN_RGB_C_INPUT_SPARE0_NV 0x00000c00
+#define NV10TCL_RC_IN_RGB_C_INPUT_SPARE1_NV 0x00000d00
+#define NV10TCL_RC_IN_RGB_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV10TCL_RC_IN_RGB_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE (1 << 12)
+#define NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_SHIFT 13
+#define NV10TCL_RC_IN_RGB_C_MAPPING_MASK 0x0000e000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV10TCL_RC_IN_RGB_B_INPUT_SHIFT 16
+#define NV10TCL_RC_IN_RGB_B_INPUT_MASK 0x000f0000
+#define NV10TCL_RC_IN_RGB_B_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV10TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV10TCL_RC_IN_RGB_B_INPUT_FOG 0x00030000
+#define NV10TCL_RC_IN_RGB_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV10TCL_RC_IN_RGB_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV10TCL_RC_IN_RGB_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV10TCL_RC_IN_RGB_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV10TCL_RC_IN_RGB_B_INPUT_SPARE0_NV 0x000c0000
+#define NV10TCL_RC_IN_RGB_B_INPUT_SPARE1_NV 0x000d0000
+#define NV10TCL_RC_IN_RGB_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV10TCL_RC_IN_RGB_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE (1 << 20)
+#define NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_SHIFT 21
+#define NV10TCL_RC_IN_RGB_B_MAPPING_MASK 0x00e00000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV10TCL_RC_IN_RGB_A_INPUT_SHIFT 24
+#define NV10TCL_RC_IN_RGB_A_INPUT_MASK 0x0f000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_FOG 0x03000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_SPARE0_NV 0x0c000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_SPARE1_NV 0x0d000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE (1 << 28)
+#define NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_SHIFT 29
+#define NV10TCL_RC_IN_RGB_A_MAPPING_MASK 0xe0000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV10TCL_RC_COLOR(x) (0x00000270+((x)*4))
+#define NV10TCL_RC_COLOR__SIZE 0x00000002
+#define NV10TCL_RC_COLOR_B_SHIFT 0
+#define NV10TCL_RC_COLOR_B_MASK 0x000000ff
+#define NV10TCL_RC_COLOR_G_SHIFT 8
+#define NV10TCL_RC_COLOR_G_MASK 0x0000ff00
+#define NV10TCL_RC_COLOR_R_SHIFT 16
+#define NV10TCL_RC_COLOR_R_MASK 0x00ff0000
+#define NV10TCL_RC_COLOR_A_SHIFT 24
+#define NV10TCL_RC_COLOR_A_MASK 0xff000000
+#define NV10TCL_RC_OUT_ALPHA(x) (0x00000278+((x)*4))
+#define NV10TCL_RC_OUT_ALPHA__SIZE 0x00000002
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SHIFT 0
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_MASK 0x0000000f
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_ZERO 0x00000000
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_FOG 0x00000003
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE0_ARB 0x00000008
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE1_ARB 0x00000009
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_NV 0x0000000c
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE1_NV 0x0000000d
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_E_TIMES_F_NV 0x0000000f
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SHIFT 4
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_MASK 0x000000f0
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_ZERO 0x00000000
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_FOG 0x00000030
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE0_ARB 0x00000080
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE1_ARB 0x00000090
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_NV 0x000000c0
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE1_NV 0x000000d0
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_E_TIMES_F_NV 0x000000f0
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SHIFT 8
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_MASK 0x00000f00
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_ZERO 0x00000000
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_FOG 0x00000300
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE0_ARB 0x00000800
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE1_ARB 0x00000900
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_NV 0x00000c00
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE1_NV 0x00000d00
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00
+#define NV10TCL_RC_OUT_ALPHA_CD_DOT_PRODUCT (1 << 12)
+#define NV10TCL_RC_OUT_ALPHA_AB_DOT_PRODUCT (1 << 13)
+#define NV10TCL_RC_OUT_ALPHA_MUX_SUM (1 << 14)
+#define NV10TCL_RC_OUT_ALPHA_BIAS (1 << 15)
+#define NV10TCL_RC_OUT_ALPHA_BIAS_NONE 0x00000000
+#define NV10TCL_RC_OUT_ALPHA_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000
+#define NV10TCL_RC_OUT_ALPHA_SCALE_SHIFT 17
+#define NV10TCL_RC_OUT_ALPHA_SCALE_MASK 0x00000000
+#define NV10TCL_RC_OUT_ALPHA_SCALE_NONE 0x00000000
+#define NV10TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_TWO_NV 0x00020000
+#define NV10TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_FOUR_NV 0x00040000
+#define NV10TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000
+#define NV10TCL_RC_OUT_RGB(x) (0x00000280+((x)*4))
+#define NV10TCL_RC_OUT_RGB__SIZE 0x00000002
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SHIFT 0
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_MASK 0x0000000f
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_ZERO 0x00000000
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_FOG 0x00000003
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE0_ARB 0x00000008
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE1_ARB 0x00000009
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_NV 0x0000000c
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SPARE1_NV 0x0000000d
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_E_TIMES_F_NV 0x0000000f
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SHIFT 4
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_MASK 0x000000f0
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_ZERO 0x00000000
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_FOG 0x00000030
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE0_ARB 0x00000080
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE1_ARB 0x00000090
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_NV 0x000000c0
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SPARE1_NV 0x000000d0
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_E_TIMES_F_NV 0x000000f0
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SHIFT 8
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_MASK 0x00000f00
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_ZERO 0x00000000
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_FOG 0x00000300
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE0_ARB 0x00000800
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE1_ARB 0x00000900
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_NV 0x00000c00
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE1_NV 0x00000d00
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00
+#define NV10TCL_RC_OUT_RGB_CD_DOT_PRODUCT (1 << 12)
+#define NV10TCL_RC_OUT_RGB_AB_DOT_PRODUCT (1 << 13)
+#define NV10TCL_RC_OUT_RGB_MUX_SUM (1 << 14)
+#define NV10TCL_RC_OUT_RGB_BIAS (1 << 15)
+#define NV10TCL_RC_OUT_RGB_BIAS_NONE 0x00000000
+#define NV10TCL_RC_OUT_RGB_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000
+#define NV10TCL_RC_OUT_RGB_SCALE_SHIFT 17
+#define NV10TCL_RC_OUT_RGB_SCALE_MASK 0x00000000
+#define NV10TCL_RC_OUT_RGB_SCALE_NONE 0x00000000
+#define NV10TCL_RC_OUT_RGB_SCALE_SCALE_BY_TWO_NV 0x00020000
+#define NV10TCL_RC_OUT_RGB_SCALE_SCALE_BY_FOUR_NV 0x00040000
+#define NV10TCL_RC_OUT_RGB_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000
+#define NV10TCL_RC_OUT_RGB_OPERATION_SHIFT 27
+#define NV10TCL_RC_OUT_RGB_OPERATION_MASK 0x38000000
+#define NV10TCL_RC_FINAL0 0x00000288
+#define NV10TCL_RC_FINAL0_D_INPUT_SHIFT 0
+#define NV10TCL_RC_FINAL0_D_INPUT_MASK 0x0000000f
+#define NV10TCL_RC_FINAL0_D_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV10TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV10TCL_RC_FINAL0_D_INPUT_FOG 0x00000003
+#define NV10TCL_RC_FINAL0_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV10TCL_RC_FINAL0_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV10TCL_RC_FINAL0_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV10TCL_RC_FINAL0_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV10TCL_RC_FINAL0_D_INPUT_SPARE0_NV 0x0000000c
+#define NV10TCL_RC_FINAL0_D_INPUT_SPARE1_NV 0x0000000d
+#define NV10TCL_RC_FINAL0_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV10TCL_RC_FINAL0_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV10TCL_RC_FINAL0_D_COMPONENT_USAGE (1 << 4)
+#define NV10TCL_RC_FINAL0_D_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_FINAL0_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV10TCL_RC_FINAL0_D_MAPPING_SHIFT 5
+#define NV10TCL_RC_FINAL0_D_MAPPING_MASK 0x000000e0
+#define NV10TCL_RC_FINAL0_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_FINAL0_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV10TCL_RC_FINAL0_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV10TCL_RC_FINAL0_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV10TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV10TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV10TCL_RC_FINAL0_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV10TCL_RC_FINAL0_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV10TCL_RC_FINAL0_C_INPUT_SHIFT 8
+#define NV10TCL_RC_FINAL0_C_INPUT_MASK 0x00000f00
+#define NV10TCL_RC_FINAL0_C_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV10TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV10TCL_RC_FINAL0_C_INPUT_FOG 0x00000300
+#define NV10TCL_RC_FINAL0_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV10TCL_RC_FINAL0_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV10TCL_RC_FINAL0_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV10TCL_RC_FINAL0_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV10TCL_RC_FINAL0_C_INPUT_SPARE0_NV 0x00000c00
+#define NV10TCL_RC_FINAL0_C_INPUT_SPARE1_NV 0x00000d00
+#define NV10TCL_RC_FINAL0_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV10TCL_RC_FINAL0_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV10TCL_RC_FINAL0_C_COMPONENT_USAGE (1 << 12)
+#define NV10TCL_RC_FINAL0_C_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_FINAL0_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV10TCL_RC_FINAL0_C_MAPPING_SHIFT 13
+#define NV10TCL_RC_FINAL0_C_MAPPING_MASK 0x0000e000
+#define NV10TCL_RC_FINAL0_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_FINAL0_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV10TCL_RC_FINAL0_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV10TCL_RC_FINAL0_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV10TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV10TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV10TCL_RC_FINAL0_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV10TCL_RC_FINAL0_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV10TCL_RC_FINAL0_B_INPUT_SHIFT 16
+#define NV10TCL_RC_FINAL0_B_INPUT_MASK 0x000f0000
+#define NV10TCL_RC_FINAL0_B_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV10TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV10TCL_RC_FINAL0_B_INPUT_FOG 0x00030000
+#define NV10TCL_RC_FINAL0_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV10TCL_RC_FINAL0_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV10TCL_RC_FINAL0_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV10TCL_RC_FINAL0_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV10TCL_RC_FINAL0_B_INPUT_SPARE0_NV 0x000c0000
+#define NV10TCL_RC_FINAL0_B_INPUT_SPARE1_NV 0x000d0000
+#define NV10TCL_RC_FINAL0_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV10TCL_RC_FINAL0_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV10TCL_RC_FINAL0_B_COMPONENT_USAGE (1 << 20)
+#define NV10TCL_RC_FINAL0_B_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_FINAL0_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV10TCL_RC_FINAL0_B_MAPPING_SHIFT 21
+#define NV10TCL_RC_FINAL0_B_MAPPING_MASK 0x00e00000
+#define NV10TCL_RC_FINAL0_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_FINAL0_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV10TCL_RC_FINAL0_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV10TCL_RC_FINAL0_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV10TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV10TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV10TCL_RC_FINAL0_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV10TCL_RC_FINAL0_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV10TCL_RC_FINAL0_A_INPUT_SHIFT 24
+#define NV10TCL_RC_FINAL0_A_INPUT_MASK 0x0f000000
+#define NV10TCL_RC_FINAL0_A_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV10TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV10TCL_RC_FINAL0_A_INPUT_FOG 0x03000000
+#define NV10TCL_RC_FINAL0_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV10TCL_RC_FINAL0_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV10TCL_RC_FINAL0_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV10TCL_RC_FINAL0_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV10TCL_RC_FINAL0_A_INPUT_SPARE0_NV 0x0c000000
+#define NV10TCL_RC_FINAL0_A_INPUT_SPARE1_NV 0x0d000000
+#define NV10TCL_RC_FINAL0_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV10TCL_RC_FINAL0_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV10TCL_RC_FINAL0_A_COMPONENT_USAGE (1 << 28)
+#define NV10TCL_RC_FINAL0_A_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_FINAL0_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_SHIFT 29
+#define NV10TCL_RC_FINAL0_A_MAPPING_MASK 0xe0000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV10TCL_RC_FINAL1 0x0000028c
+#define NV10TCL_RC_FINAL1_COLOR_SUM_CLAMP (1 << 7)
+#define NV10TCL_RC_FINAL1_G_INPUT_SHIFT 8
+#define NV10TCL_RC_FINAL1_G_INPUT_MASK 0x00000f00
+#define NV10TCL_RC_FINAL1_G_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV10TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV10TCL_RC_FINAL1_G_INPUT_FOG 0x00000300
+#define NV10TCL_RC_FINAL1_G_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV10TCL_RC_FINAL1_G_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV10TCL_RC_FINAL1_G_INPUT_TEXTURE0_ARB 0x00000800
+#define NV10TCL_RC_FINAL1_G_INPUT_TEXTURE1_ARB 0x00000900
+#define NV10TCL_RC_FINAL1_G_INPUT_SPARE0_NV 0x00000c00
+#define NV10TCL_RC_FINAL1_G_INPUT_SPARE1_NV 0x00000d00
+#define NV10TCL_RC_FINAL1_G_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV10TCL_RC_FINAL1_G_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV10TCL_RC_FINAL1_G_COMPONENT_USAGE (1 << 12)
+#define NV10TCL_RC_FINAL1_G_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_FINAL1_G_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV10TCL_RC_FINAL1_G_MAPPING_SHIFT 13
+#define NV10TCL_RC_FINAL1_G_MAPPING_MASK 0x0000e000
+#define NV10TCL_RC_FINAL1_G_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_FINAL1_G_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV10TCL_RC_FINAL1_G_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV10TCL_RC_FINAL1_G_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV10TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV10TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV10TCL_RC_FINAL1_G_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV10TCL_RC_FINAL1_G_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV10TCL_RC_FINAL1_F_INPUT_SHIFT 16
+#define NV10TCL_RC_FINAL1_F_INPUT_MASK 0x000f0000
+#define NV10TCL_RC_FINAL1_F_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV10TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV10TCL_RC_FINAL1_F_INPUT_FOG 0x00030000
+#define NV10TCL_RC_FINAL1_F_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV10TCL_RC_FINAL1_F_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV10TCL_RC_FINAL1_F_INPUT_TEXTURE0_ARB 0x00080000
+#define NV10TCL_RC_FINAL1_F_INPUT_TEXTURE1_ARB 0x00090000
+#define NV10TCL_RC_FINAL1_F_INPUT_SPARE0_NV 0x000c0000
+#define NV10TCL_RC_FINAL1_F_INPUT_SPARE1_NV 0x000d0000
+#define NV10TCL_RC_FINAL1_F_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV10TCL_RC_FINAL1_F_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV10TCL_RC_FINAL1_F_COMPONENT_USAGE (1 << 20)
+#define NV10TCL_RC_FINAL1_F_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_FINAL1_F_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV10TCL_RC_FINAL1_F_MAPPING_SHIFT 21
+#define NV10TCL_RC_FINAL1_F_MAPPING_MASK 0x00e00000
+#define NV10TCL_RC_FINAL1_F_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_FINAL1_F_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV10TCL_RC_FINAL1_F_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV10TCL_RC_FINAL1_F_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV10TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV10TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV10TCL_RC_FINAL1_F_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV10TCL_RC_FINAL1_F_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV10TCL_RC_FINAL1_E_INPUT_SHIFT 24
+#define NV10TCL_RC_FINAL1_E_INPUT_MASK 0x0f000000
+#define NV10TCL_RC_FINAL1_E_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV10TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV10TCL_RC_FINAL1_E_INPUT_FOG 0x03000000
+#define NV10TCL_RC_FINAL1_E_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV10TCL_RC_FINAL1_E_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV10TCL_RC_FINAL1_E_INPUT_TEXTURE0_ARB 0x08000000
+#define NV10TCL_RC_FINAL1_E_INPUT_TEXTURE1_ARB 0x09000000
+#define NV10TCL_RC_FINAL1_E_INPUT_SPARE0_NV 0x0c000000
+#define NV10TCL_RC_FINAL1_E_INPUT_SPARE1_NV 0x0d000000
+#define NV10TCL_RC_FINAL1_E_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV10TCL_RC_FINAL1_E_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV10TCL_RC_FINAL1_E_COMPONENT_USAGE (1 << 28)
+#define NV10TCL_RC_FINAL1_E_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_FINAL1_E_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_SHIFT 29
+#define NV10TCL_RC_FINAL1_E_MAPPING_MASK 0xe0000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV10TCL_LIGHT_MODEL 0x00000294
+#define NV10TCL_LIGHT_MODEL_COLOR_CONTROL (1 << 1)
+#define NV10TCL_LIGHT_MODEL_LOCAL_VIEWER (1 << 16)
+#define NV10TCL_COLOR_MATERIAL_ENABLE 0x00000298
+#define NV10TCL_COLOR_MATERIAL_ENABLE_SPECULAR (1 << 0)
+#define NV10TCL_COLOR_MATERIAL_ENABLE_DIFFUSE (1 << 1)
+#define NV10TCL_COLOR_MATERIAL_ENABLE_AMBIENT (1 << 2)
+#define NV10TCL_COLOR_MATERIAL_ENABLE_EMISSION (1 << 3)
+#define NV10TCL_FOG_MODE 0x0000029c
+#define NV10TCL_FOG_MODE_EXP 0x00000800
+#define NV10TCL_FOG_MODE_EXP_2 0x00000802
+#define NV10TCL_FOG_MODE_EXP2 0x00000803
+#define NV10TCL_FOG_MODE_LINEAR 0x00000804
+#define NV10TCL_FOG_MODE_LINEAR_2 0x00002601
+#define NV10TCL_FOG_COORD_DIST 0x000002a0
+#define NV10TCL_FOG_COORD_DIST_COORD_FALSE 0x00000000
+#define NV10TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_RADIAL_NV 0x00000001
+#define NV10TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_PLANE_ABSOLUTE_NV 0x00000002
+#define NV10TCL_FOG_COORD_DIST_COORD_FOG 0x00000003
+#define NV10TCL_FOG_ENABLE 0x000002a4
+#define NV10TCL_FOG_COLOR 0x000002a8
+#define NV10TCL_FOG_COLOR_R_SHIFT 0
+#define NV10TCL_FOG_COLOR_R_MASK 0x000000ff
+#define NV10TCL_FOG_COLOR_G_SHIFT 8
+#define NV10TCL_FOG_COLOR_G_MASK 0x0000ff00
+#define NV10TCL_FOG_COLOR_B_SHIFT 16
+#define NV10TCL_FOG_COLOR_B_MASK 0x00ff0000
+#define NV10TCL_FOG_COLOR_A_SHIFT 24
+#define NV10TCL_FOG_COLOR_A_MASK 0xff000000
+#define NV10TCL_VIEWPORT_CLIP_MODE 0x000002b4
+#define NV10TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*4))
+#define NV10TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008
+#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_L_SHIFT 0
+#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_L_MASK 0x000007ff
+#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_LEFT_ENABLE (1 << 11)
+#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_R_SHIFT 16
+#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_R_MASK 0x07ff0000
+#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_RIGHT_ENABLE (1 << 27)
+#define NV10TCL_VIEWPORT_CLIP_VERT(x) (0x000002e0+((x)*4))
+#define NV10TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008
+#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_T_SHIFT 0
+#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_T_MASK 0x000007ff
+#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_TOP_ENABLE (1 << 11)
+#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_B_SHIFT 16
+#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_B_MASK 0x07ff0000
+#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_BOTTOM_ENABLE (1 << 27)
+#define NV10TCL_ALPHA_FUNC_ENABLE 0x00000300
+#define NV10TCL_BLEND_FUNC_ENABLE 0x00000304
+#define NV10TCL_CULL_FACE_ENABLE 0x00000308
+#define NV10TCL_DEPTH_TEST_ENABLE 0x0000030c
+#define NV10TCL_DITHER_ENABLE 0x00000310
+#define NV10TCL_LIGHTING_ENABLE 0x00000314
+#define NV10TCL_POINT_PARAMETERS_ENABLE 0x00000318
+#define NV10TCL_POINT_SMOOTH_ENABLE 0x0000031c
+#define NV10TCL_LINE_SMOOTH_ENABLE 0x00000320
+#define NV10TCL_POLYGON_SMOOTH_ENABLE 0x00000324
+#define NV10TCL_VERTEX_WEIGHT_ENABLE 0x00000328
+#define NV10TCL_STENCIL_ENABLE 0x0000032c
+#define NV10TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000330
+#define NV10TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000334
+#define NV10TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000338
+#define NV10TCL_ALPHA_FUNC_FUNC 0x0000033c
+#define NV10TCL_ALPHA_FUNC_FUNC_NEVER 0x00000200
+#define NV10TCL_ALPHA_FUNC_FUNC_LESS 0x00000201
+#define NV10TCL_ALPHA_FUNC_FUNC_EQUAL 0x00000202
+#define NV10TCL_ALPHA_FUNC_FUNC_LEQUAL 0x00000203
+#define NV10TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204
+#define NV10TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204
+#define NV10TCL_ALPHA_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV10TCL_ALPHA_FUNC_FUNC_GEQUAL 0x00000206
+#define NV10TCL_ALPHA_FUNC_FUNC_ALWAYS 0x00000207
+#define NV10TCL_ALPHA_FUNC_REF 0x00000340
+#define NV10TCL_BLEND_FUNC_SRC 0x00000344
+#define NV10TCL_BLEND_FUNC_SRC_ZERO 0x00000000
+#define NV10TCL_BLEND_FUNC_SRC_ONE 0x00000001
+#define NV10TCL_BLEND_FUNC_SRC_SRC_COLOR 0x00000300
+#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV10TCL_BLEND_FUNC_SRC_SRC_ALPHA 0x00000302
+#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV10TCL_BLEND_FUNC_SRC_DST_ALPHA 0x00000304
+#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV10TCL_BLEND_FUNC_SRC_DST_COLOR 0x00000306
+#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_COLOR 0x00000307
+#define NV10TCL_BLEND_FUNC_SRC_SRC_ALPHA_SATURATE 0x00000308
+#define NV10TCL_BLEND_FUNC_SRC_CONSTANT_COLOR 0x00008001
+#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV10TCL_BLEND_FUNC_SRC_CONSTANT_ALPHA 0x00008003
+#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV10TCL_BLEND_FUNC_DST 0x00000348
+#define NV10TCL_BLEND_FUNC_DST_ZERO 0x00000000
+#define NV10TCL_BLEND_FUNC_DST_ONE 0x00000001
+#define NV10TCL_BLEND_FUNC_DST_SRC_COLOR 0x00000300
+#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV10TCL_BLEND_FUNC_DST_SRC_ALPHA 0x00000302
+#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV10TCL_BLEND_FUNC_DST_DST_ALPHA 0x00000304
+#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV10TCL_BLEND_FUNC_DST_DST_COLOR 0x00000306
+#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_DST_COLOR 0x00000307
+#define NV10TCL_BLEND_FUNC_DST_SRC_ALPHA_SATURATE 0x00000308
+#define NV10TCL_BLEND_FUNC_DST_CONSTANT_COLOR 0x00008001
+#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV10TCL_BLEND_FUNC_DST_CONSTANT_ALPHA 0x00008003
+#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV10TCL_BLEND_COLOR 0x0000034c
+#define NV10TCL_BLEND_COLOR_B_SHIFT 0
+#define NV10TCL_BLEND_COLOR_B_MASK 0x000000ff
+#define NV10TCL_BLEND_COLOR_G_SHIFT 8
+#define NV10TCL_BLEND_COLOR_G_MASK 0x0000ff00
+#define NV10TCL_BLEND_COLOR_R_SHIFT 16
+#define NV10TCL_BLEND_COLOR_R_MASK 0x00ff0000
+#define NV10TCL_BLEND_COLOR_A_SHIFT 24
+#define NV10TCL_BLEND_COLOR_A_MASK 0xff000000
+#define NV10TCL_BLEND_EQUATION 0x00000350
+#define NV10TCL_BLEND_EQUATION_FUNC_ADD 0x00008006
+#define NV10TCL_BLEND_EQUATION_MIN 0x00008007
+#define NV10TCL_BLEND_EQUATION_MAX 0x00008008
+#define NV10TCL_BLEND_EQUATION_FUNC_SUBTRACT 0x0000800a
+#define NV10TCL_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b
+#define NV10TCL_DEPTH_FUNC 0x00000354
+#define NV10TCL_DEPTH_FUNC_NEVER 0x00000200
+#define NV10TCL_DEPTH_FUNC_LESS 0x00000201
+#define NV10TCL_DEPTH_FUNC_EQUAL 0x00000202
+#define NV10TCL_DEPTH_FUNC_LEQUAL 0x00000203
+#define NV10TCL_DEPTH_FUNC_GREATER 0x00000204
+#define NV10TCL_DEPTH_FUNC_GREATER 0x00000204
+#define NV10TCL_DEPTH_FUNC_NOTEQUAL 0x00000205
+#define NV10TCL_DEPTH_FUNC_GEQUAL 0x00000206
+#define NV10TCL_DEPTH_FUNC_ALWAYS 0x00000207
+#define NV10TCL_COLOR_MASK 0x00000358
+#define NV10TCL_COLOR_MASK_B (1 << 0)
+#define NV10TCL_COLOR_MASK_G (1 << 8)
+#define NV10TCL_COLOR_MASK_R (1 << 16)
+#define NV10TCL_COLOR_MASK_A (1 << 24)
+#define NV10TCL_DEPTH_WRITE_ENABLE 0x0000035c
+#define NV10TCL_STENCIL_MASK 0x00000360
+#define NV10TCL_STENCIL_FUNC_FUNC 0x00000364
+#define NV10TCL_STENCIL_FUNC_FUNC_NEVER 0x00000200
+#define NV10TCL_STENCIL_FUNC_FUNC_LESS 0x00000201
+#define NV10TCL_STENCIL_FUNC_FUNC_EQUAL 0x00000202
+#define NV10TCL_STENCIL_FUNC_FUNC_LEQUAL 0x00000203
+#define NV10TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204
+#define NV10TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204
+#define NV10TCL_STENCIL_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV10TCL_STENCIL_FUNC_FUNC_GEQUAL 0x00000206
+#define NV10TCL_STENCIL_FUNC_FUNC_ALWAYS 0x00000207
+#define NV10TCL_STENCIL_FUNC_REF 0x00000368
+#define NV10TCL_STENCIL_FUNC_MASK 0x0000036c
+#define NV10TCL_STENCIL_OP_FAIL 0x00000370
+#define NV10TCL_STENCIL_OP_FAIL_ZERO 0x00000000
+#define NV10TCL_STENCIL_OP_FAIL_INVERT 0x0000150a
+#define NV10TCL_STENCIL_OP_FAIL_KEEP 0x00001e00
+#define NV10TCL_STENCIL_OP_FAIL_REPLACE 0x00001e01
+#define NV10TCL_STENCIL_OP_FAIL_INCR 0x00001e02
+#define NV10TCL_STENCIL_OP_FAIL_DECR 0x00001e03
+#define NV10TCL_STENCIL_OP_FAIL_INCR_WRAP 0x00008507
+#define NV10TCL_STENCIL_OP_FAIL_DECR_WRAP 0x00008508
+#define NV10TCL_STENCIL_OP_ZFAIL 0x00000374
+#define NV10TCL_STENCIL_OP_ZFAIL_ZERO 0x00000000
+#define NV10TCL_STENCIL_OP_ZFAIL_INVERT 0x0000150a
+#define NV10TCL_STENCIL_OP_ZFAIL_KEEP 0x00001e00
+#define NV10TCL_STENCIL_OP_ZFAIL_REPLACE 0x00001e01
+#define NV10TCL_STENCIL_OP_ZFAIL_INCR 0x00001e02
+#define NV10TCL_STENCIL_OP_ZFAIL_DECR 0x00001e03
+#define NV10TCL_STENCIL_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV10TCL_STENCIL_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NV10TCL_STENCIL_OP_ZPASS 0x00000378
+#define NV10TCL_STENCIL_OP_ZPASS_ZERO 0x00000000
+#define NV10TCL_STENCIL_OP_ZPASS_INVERT 0x0000150a
+#define NV10TCL_STENCIL_OP_ZPASS_KEEP 0x00001e00
+#define NV10TCL_STENCIL_OP_ZPASS_REPLACE 0x00001e01
+#define NV10TCL_STENCIL_OP_ZPASS_INCR 0x00001e02
+#define NV10TCL_STENCIL_OP_ZPASS_DECR 0x00001e03
+#define NV10TCL_STENCIL_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV10TCL_STENCIL_OP_ZPASS_DECR_WRAP 0x00008508
+#define NV10TCL_SHADE_MODEL 0x0000037c
+#define NV10TCL_SHADE_MODEL_FLAT 0x00001d00
+#define NV10TCL_SHADE_MODEL_SMOOTH 0x00001d01
+#define NV10TCL_LINE_WIDTH 0x00000380
+#define NV10TCL_POLYGON_OFFSET_FACTOR 0x00000384
+#define NV10TCL_POLYGON_OFFSET_UNITS 0x00000388
+#define NV10TCL_POLYGON_MODE_FRONT 0x0000038c
+#define NV10TCL_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NV10TCL_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NV10TCL_POLYGON_MODE_FRONT_FILL 0x00001b02
+#define NV10TCL_POLYGON_MODE_BACK 0x00000390
+#define NV10TCL_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NV10TCL_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NV10TCL_POLYGON_MODE_BACK_FILL 0x00001b02
+#define NV10TCL_DEPTH_RANGE_NEAR 0x00000394
+#define NV10TCL_DEPTH_RANGE_FAR 0x00000398
+#define NV10TCL_CULL_FACE 0x0000039c
+#define NV10TCL_CULL_FACE_FRONT 0x00000404
+#define NV10TCL_CULL_FACE_BACK 0x00000405
+#define NV10TCL_CULL_FACE_FRONT_AND_BACK 0x00000408
+#define NV10TCL_FRONT_FACE 0x000003a0
+#define NV10TCL_FRONT_FACE_CW 0x00000900
+#define NV10TCL_FRONT_FACE_CCW 0x00000901
+#define NV10TCL_NORMALIZE_ENABLE 0x000003a4
+#define NV10TCL_COLOR_MATERIAL_R 0x000003a8
+#define NV10TCL_COLOR_MATERIAL_G 0x000003ac
+#define NV10TCL_COLOR_MATERIAL_B 0x000003b0
+#define NV10TCL_COLOR_MATERIAL_A 0x000003b4
+#define NV10TCL_COLOR_CONTROL 0x000003b8
+#define NV10TCL_ENABLED_LIGHTS 0x000003bc
+#define NV10TCL_ENABLED_LIGHTS_LIGHT0 (1 << 0)
+#define NV10TCL_ENABLED_LIGHTS_LIGHT1 (1 << 2)
+#define NV10TCL_ENABLED_LIGHTS_LIGHT2 (1 << 4)
+#define NV10TCL_ENABLED_LIGHTS_LIGHT3 (1 << 6)
+#define NV10TCL_ENABLED_LIGHTS_LIGHT4 (1 << 8)
+#define NV10TCL_ENABLED_LIGHTS_LIGHT5 (1 << 10)
+#define NV10TCL_ENABLED_LIGHTS_LIGHT6 (1 << 12)
+#define NV10TCL_ENABLED_LIGHTS_LIGHT7 (1 << 14)
+#define NV10TCL_TX_GEN_S(x) (0x000003c0+((x)*16))
+#define NV10TCL_TX_GEN_S__SIZE 0x00000002
+#define NV10TCL_TX_GEN_S_FALSE 0x00000000
+#define NV10TCL_TX_GEN_S_EYE_LINEAR 0x00002400
+#define NV10TCL_TX_GEN_S_OBJECT_LINEAR 0x00002401
+#define NV10TCL_TX_GEN_S_SPHERE_MAP 0x00002402
+#define NV10TCL_TX_GEN_S_NORMAL_MAP 0x00008511
+#define NV10TCL_TX_GEN_S_REFLECTION_MAP 0x00008512
+#define NV10TCL_TX_GEN_T(x) (0x000003c4+((x)*16))
+#define NV10TCL_TX_GEN_T__SIZE 0x00000002
+#define NV10TCL_TX_GEN_T_FALSE 0x00000000
+#define NV10TCL_TX_GEN_T_EYE_LINEAR 0x00002400
+#define NV10TCL_TX_GEN_T_OBJECT_LINEAR 0x00002401
+#define NV10TCL_TX_GEN_T_SPHERE_MAP 0x00002402
+#define NV10TCL_TX_GEN_T_NORMAL_MAP 0x00008511
+#define NV10TCL_TX_GEN_T_REFLECTION_MAP 0x00008512
+#define NV10TCL_TX_GEN_R(x) (0x000003c8+((x)*16))
+#define NV10TCL_TX_GEN_R__SIZE 0x00000002
+#define NV10TCL_TX_GEN_R_FALSE 0x00000000
+#define NV10TCL_TX_GEN_R_EYE_LINEAR 0x00002400
+#define NV10TCL_TX_GEN_R_OBJECT_LINEAR 0x00002401
+#define NV10TCL_TX_GEN_R_SPHERE_MAP 0x00002402
+#define NV10TCL_TX_GEN_R_NORMAL_MAP 0x00008511
+#define NV10TCL_TX_GEN_R_REFLECTION_MAP 0x00008512
+#define NV10TCL_TX_GEN_Q(x) (0x000003cc+((x)*16))
+#define NV10TCL_TX_GEN_Q__SIZE 0x00000002
+#define NV10TCL_TX_GEN_Q_FALSE 0x00000000
+#define NV10TCL_TX_GEN_Q_EYE_LINEAR 0x00002400
+#define NV10TCL_TX_GEN_Q_OBJECT_LINEAR 0x00002401
+#define NV10TCL_TX_GEN_Q_SPHERE_MAP 0x00002402
+#define NV10TCL_TX_GEN_Q_NORMAL_MAP 0x00008511
+#define NV10TCL_TX_GEN_Q_REFLECTION_MAP 0x00008512
+#define NV10TCL_TX_MATRIX_ENABLE(x) (0x000003e0+((x)*4))
+#define NV10TCL_TX_MATRIX_ENABLE__SIZE 0x00000002
+#define NV10TCL_VIEW_MATRIX_ENABLE 0x000003e8
+#define NV10TCL_VIEW_MATRIX_ENABLE_MODELVIEW1 (1 << 0)
+#define NV10TCL_VIEW_MATRIX_ENABLE_MODELVIEW0 (1 << 1)
+#define NV10TCL_VIEW_MATRIX_ENABLE_PROJECTION (1 << 2)
+#define NV10TCL_POINT_SIZE 0x000003ec
+#define NV10TCL_MODELVIEW0_MATRIX(x) (0x00000400+((x)*4))
+#define NV10TCL_MODELVIEW0_MATRIX__SIZE 0x00000010
+#define NV10TCL_MODELVIEW1_MATRIX(x) (0x00000440+((x)*4))
+#define NV10TCL_MODELVIEW1_MATRIX__SIZE 0x00000010
+#define NV10TCL_INVERSE_MODELVIEW0_MATRIX(x) (0x00000480+((x)*4))
+#define NV10TCL_INVERSE_MODELVIEW0_MATRIX__SIZE 0x00000010
+#define NV10TCL_INVERSE_MODELVIEW1_MATRIX(x) (0x000004c0+((x)*4))
+#define NV10TCL_INVERSE_MODELVIEW1_MATRIX__SIZE 0x00000010
+#define NV10TCL_PROJECTION_MATRIX(x) (0x00000500+((x)*4))
+#define NV10TCL_PROJECTION_MATRIX__SIZE 0x00000010
+#define NV10TCL_TX0_MATRIX(x) (0x00000540+((x)*4))
+#define NV10TCL_TX0_MATRIX__SIZE 0x00000010
+#define NV10TCL_TX1_MATRIX(x) (0x00000580+((x)*4))
+#define NV10TCL_TX1_MATRIX__SIZE 0x00000010
+#define NV10TCL_CLIP_PLANE_A(x) (0x00000600+((x)*16))
+#define NV10TCL_CLIP_PLANE_A__SIZE 0x00000008
+#define NV10TCL_CLIP_PLANE_B(x) (0x00000604+((x)*16))
+#define NV10TCL_CLIP_PLANE_B__SIZE 0x00000008
+#define NV10TCL_CLIP_PLANE_C(x) (0x00000608+((x)*16))
+#define NV10TCL_CLIP_PLANE_C__SIZE 0x00000008
+#define NV10TCL_CLIP_PLANE_D(x) (0x0000060c+((x)*16))
+#define NV10TCL_CLIP_PLANE_D__SIZE 0x00000008
+#define NV10TCL_FOG_EQUATION_CONSTANT 0x00000680
+#define NV10TCL_FOG_EQUATION_LINEAR 0x00000684
+#define NV10TCL_FOG_EQUATION_QUADRATIC 0x00000688
+#define NV10TCL_FRONT_MATERIAL_SHININESS(x) (0x000006a0+((x)*4))
+#define NV10TCL_FRONT_MATERIAL_SHININESS__SIZE 0x00000006
+#define NV10TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x000006c4
+#define NV10TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x000006c8
+#define NV10TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x000006cc
+#define NV10TCL_VIEWPORT_SCALE_X 0x000006e8
+#define NV10TCL_VIEWPORT_SCALE_Y 0x000006ec
+#define NV10TCL_VIEWPORT_SCALE_Z 0x000006f0
+#define NV10TCL_VIEWPORT_SCALE_W 0x000006f4
+#define NV10TCL_POINT_PARAMETER(x) (0x000006f8+((x)*4))
+#define NV10TCL_POINT_PARAMETER__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R(x) (0x00000800+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G(x) (0x00000804+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B(x) (0x00000808+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R(x) (0x0000080c+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G(x) (0x00000810+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B(x) (0x00000814+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R(x) (0x00000818+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G(x) (0x0000081c+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B(x) (0x00000820+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B__SIZE 0x00000008
+#define NV10TCL_LIGHT_HALF_VECTOR_X(x) (0x00000828+((x)*128))
+#define NV10TCL_LIGHT_HALF_VECTOR_X__SIZE 0x00000008
+#define NV10TCL_LIGHT_HALF_VECTOR_Y(x) (0x0000082c+((x)*128))
+#define NV10TCL_LIGHT_HALF_VECTOR_Y__SIZE 0x00000008
+#define NV10TCL_LIGHT_HALF_VECTOR_Z(x) (0x00000830+((x)*128))
+#define NV10TCL_LIGHT_HALF_VECTOR_Z__SIZE 0x00000008
+#define NV10TCL_LIGHT_DIRECTION_X(x) (0x00000834+((x)*128))
+#define NV10TCL_LIGHT_DIRECTION_X__SIZE 0x00000008
+#define NV10TCL_LIGHT_DIRECTION_Y(x) (0x00000838+((x)*128))
+#define NV10TCL_LIGHT_DIRECTION_Y__SIZE 0x00000008
+#define NV10TCL_LIGHT_DIRECTION_Z(x) (0x0000083c+((x)*128))
+#define NV10TCL_LIGHT_DIRECTION_Z__SIZE 0x00000008
+#define NV10TCL_LIGHT_SPOT_CUTOFF_A(x) (0x00000840+((x)*128))
+#define NV10TCL_LIGHT_SPOT_CUTOFF_A__SIZE 0x00000008
+#define NV10TCL_LIGHT_SPOT_CUTOFF_B(x) (0x00000844+((x)*128))
+#define NV10TCL_LIGHT_SPOT_CUTOFF_B__SIZE 0x00000008
+#define NV10TCL_LIGHT_SPOT_CUTOFF_C(x) (0x00000848+((x)*128))
+#define NV10TCL_LIGHT_SPOT_CUTOFF_C__SIZE 0x00000008
+#define NV10TCL_LIGHT_SPOT_DIR_X(x) (0x0000084c+((x)*128))
+#define NV10TCL_LIGHT_SPOT_DIR_X__SIZE 0x00000008
+#define NV10TCL_LIGHT_SPOT_DIR_Y(x) (0x00000850+((x)*128))
+#define NV10TCL_LIGHT_SPOT_DIR_Y__SIZE 0x00000008
+#define NV10TCL_LIGHT_SPOT_DIR_Z(x) (0x00000854+((x)*128))
+#define NV10TCL_LIGHT_SPOT_DIR_Z__SIZE 0x00000008
+#define NV10TCL_LIGHT_SPOT_CUTOFF_D(x) (0x00000858+((x)*128))
+#define NV10TCL_LIGHT_SPOT_CUTOFF_D__SIZE 0x00000008
+#define NV10TCL_LIGHT_POSITION_X(x) (0x0000085c+((x)*128))
+#define NV10TCL_LIGHT_POSITION_X__SIZE 0x00000008
+#define NV10TCL_LIGHT_POSITION_Y(x) (0x00000860+((x)*128))
+#define NV10TCL_LIGHT_POSITION_Y__SIZE 0x00000008
+#define NV10TCL_LIGHT_POSITION_Z(x) (0x00000864+((x)*128))
+#define NV10TCL_LIGHT_POSITION_Z__SIZE 0x00000008
+#define NV10TCL_LIGHT_ATTENUATION_CONSTANT(x) (0x00000868+((x)*128))
+#define NV10TCL_LIGHT_ATTENUATION_CONSTANT__SIZE 0x00000008
+#define NV10TCL_LIGHT_ATTENUATION_LINEAR(x) (0x0000086c+((x)*128))
+#define NV10TCL_LIGHT_ATTENUATION_LINEAR__SIZE 0x00000008
+#define NV10TCL_LIGHT_ATTENUATION_QUADRATIC(x) (0x00000870+((x)*128))
+#define NV10TCL_LIGHT_ATTENUATION_QUADRATIC__SIZE 0x00000008
+#define NV10TCL_VERTEX_POS_3F_X 0x00000c00
+#define NV10TCL_VERTEX_POS_3F_Y 0x00000c04
+#define NV10TCL_VERTEX_POS_3F_Z 0x00000c08
+#define NV10TCL_VERTEX_POS_4F_X 0x00000c18
+#define NV10TCL_VERTEX_POS_4F_Y 0x00000c1c
+#define NV10TCL_VERTEX_POS_4F_Z 0x00000c20
+#define NV10TCL_VERTEX_POS_4F_W 0x00000c24
+#define NV10TCL_VERTEX_NOR_3F_X 0x00000c30
+#define NV10TCL_VERTEX_NOR_3F_Y 0x00000c34
+#define NV10TCL_VERTEX_NOR_3F_Z 0x00000c38
+#define NV10TCL_VERTEX_NOR_3I_XY 0x00000c40
+#define NV10TCL_VERTEX_NOR_3I_XY_X_SHIFT 0
+#define NV10TCL_VERTEX_NOR_3I_XY_X_MASK 0x0000ffff
+#define NV10TCL_VERTEX_NOR_3I_XY_Y_SHIFT 16
+#define NV10TCL_VERTEX_NOR_3I_XY_Y_MASK 0xffff0000
+#define NV10TCL_VERTEX_NOR_3I_Z 0x00000c44
+#define NV10TCL_VERTEX_NOR_3I_Z_Z_SHIFT 0
+#define NV10TCL_VERTEX_NOR_3I_Z_Z_MASK 0x0000ffff
+#define NV10TCL_VERTEX_COL_4F_R 0x00000c50
+#define NV10TCL_VERTEX_COL_4F_G 0x00000c54
+#define NV10TCL_VERTEX_COL_4F_B 0x00000c58
+#define NV10TCL_VERTEX_COL_4F_A 0x00000c5c
+#define NV10TCL_VERTEX_COL_3F_R 0x00000c60
+#define NV10TCL_VERTEX_COL_3F_G 0x00000c64
+#define NV10TCL_VERTEX_COL_3F_B 0x00000c68
+#define NV10TCL_VERTEX_COL_4I 0x00000c6c
+#define NV10TCL_VERTEX_COL_4I_R_SHIFT 0
+#define NV10TCL_VERTEX_COL_4I_R_MASK 0x000000ff
+#define NV10TCL_VERTEX_COL_4I_G_SHIFT 8
+#define NV10TCL_VERTEX_COL_4I_G_MASK 0x0000ff00
+#define NV10TCL_VERTEX_COL_4I_B_SHIFT 16
+#define NV10TCL_VERTEX_COL_4I_B_MASK 0x00ff0000
+#define NV10TCL_VERTEX_COL_4I_A_SHIFT 24
+#define NV10TCL_VERTEX_COL_4I_A_MASK 0xff000000
+#define NV10TCL_VERTEX_COL2_3F_R 0x00000c80
+#define NV10TCL_VERTEX_COL2_3F_G 0x00000c84
+#define NV10TCL_VERTEX_COL2_3F_B 0x00000c88
+#define NV10TCL_VERTEX_COL2_3I 0x00000c8c
+#define NV10TCL_VERTEX_COL2_3I_R_SHIFT 0
+#define NV10TCL_VERTEX_COL2_3I_R_MASK 0x000000ff
+#define NV10TCL_VERTEX_COL2_3I_G_SHIFT 8
+#define NV10TCL_VERTEX_COL2_3I_G_MASK 0x0000ff00
+#define NV10TCL_VERTEX_COL2_3I_B_SHIFT 16
+#define NV10TCL_VERTEX_COL2_3I_B_MASK 0x00ff0000
+#define NV10TCL_VERTEX_TX0_2F_S 0x00000c90
+#define NV10TCL_VERTEX_TX0_2F_T 0x00000c94
+#define NV10TCL_VERTEX_TX0_2I 0x00000c98
+#define NV10TCL_VERTEX_TX0_2I_S_SHIFT 0
+#define NV10TCL_VERTEX_TX0_2I_S_MASK 0x0000ffff
+#define NV10TCL_VERTEX_TX0_2I_T_SHIFT 16
+#define NV10TCL_VERTEX_TX0_2I_T_MASK 0xffff0000
+#define NV10TCL_VERTEX_TX0_4F_S 0x00000ca0
+#define NV10TCL_VERTEX_TX0_4F_T 0x00000ca4
+#define NV10TCL_VERTEX_TX0_4F_R 0x00000ca8
+#define NV10TCL_VERTEX_TX0_4F_Q 0x00000cac
+#define NV10TCL_VERTEX_TX0_4I_ST 0x00000cb0
+#define NV10TCL_VERTEX_TX0_4I_ST_S_SHIFT 0
+#define NV10TCL_VERTEX_TX0_4I_ST_S_MASK 0x0000ffff
+#define NV10TCL_VERTEX_TX0_4I_ST_T_SHIFT 16
+#define NV10TCL_VERTEX_TX0_4I_ST_T_MASK 0xffff0000
+#define NV10TCL_VERTEX_TX0_4I_RQ 0x00000cb4
+#define NV10TCL_VERTEX_TX0_4I_RQ_R_SHIFT 0
+#define NV10TCL_VERTEX_TX0_4I_RQ_R_MASK 0x0000ffff
+#define NV10TCL_VERTEX_TX0_4I_RQ_Q_SHIFT 16
+#define NV10TCL_VERTEX_TX0_4I_RQ_Q_MASK 0xffff0000
+#define NV10TCL_VERTEX_TX1_2F_S 0x00000cb8
+#define NV10TCL_VERTEX_TX1_2F_T 0x00000cbc
+#define NV10TCL_VERTEX_TX1_2I 0x00000cc0
+#define NV10TCL_VERTEX_TX1_2I_S_SHIFT 0
+#define NV10TCL_VERTEX_TX1_2I_S_MASK 0x0000ffff
+#define NV10TCL_VERTEX_TX1_2I_T_SHIFT 16
+#define NV10TCL_VERTEX_TX1_2I_T_MASK 0xffff0000
+#define NV10TCL_VERTEX_TX1_4F_S 0x00000cc8
+#define NV10TCL_VERTEX_TX1_4F_T 0x00000ccc
+#define NV10TCL_VERTEX_TX1_4F_R 0x00000cd0
+#define NV10TCL_VERTEX_TX1_4F_Q 0x00000cd4
+#define NV10TCL_VERTEX_TX1_4I_ST 0x00000cd8
+#define NV10TCL_VERTEX_TX1_4I_ST_S_SHIFT 0
+#define NV10TCL_VERTEX_TX1_4I_ST_S_MASK 0x0000ffff
+#define NV10TCL_VERTEX_TX1_4I_ST_T_SHIFT 16
+#define NV10TCL_VERTEX_TX1_4I_ST_T_MASK 0xffff0000
+#define NV10TCL_VERTEX_TX1_4I_RQ 0x00000cdc
+#define NV10TCL_VERTEX_TX1_4I_RQ_R_SHIFT 0
+#define NV10TCL_VERTEX_TX1_4I_RQ_R_MASK 0x0000ffff
+#define NV10TCL_VERTEX_TX1_4I_RQ_Q_SHIFT 16
+#define NV10TCL_VERTEX_TX1_4I_RQ_Q_MASK 0xffff0000
+#define NV10TCL_VERTEX_FOG_1F 0x00000ce0
+#define NV10TCL_VERTEX_WGH_1F 0x00000ce4
+#define NV10TCL_EDGEFLAG_ENABLE 0x00000cec
+#define NV10TCL_VERTEX_ARRAY_VALIDATE 0x00000cf0
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET(x) (0x00000d00+((x)*8))
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET__SIZE 0x00000008
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT(x) (0x00000d04+((x)*8))
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT__SIZE 0x00000008
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_ARRAY_OFFSET_POS 0x00000d00
+#define NV10TCL_VERTEX_ARRAY_FORMAT_POS 0x00000d04
+#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_ARRAY_OFFSET_COL 0x00000d08
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL 0x00000d0c
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_ARRAY_OFFSET_COL2 0x00000d10
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2 0x00000d14
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_ARRAY_OFFSET_TX0 0x00000d18
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0 0x00000d1c
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_ARRAY_OFFSET_TX1 0x00000d20
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1 0x00000d24
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_ARRAY_OFFSET_NOR 0x00000d28
+#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR 0x00000d2c
+#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_ARRAY_OFFSET_WGH 0x00000d30
+#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH 0x00000d34
+#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_ARRAY_OFFSET_FOG 0x00000d38
+#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG 0x00000d3c
+#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_BEGIN_END 0x00000dfc
+#define NV10TCL_VERTEX_BEGIN_END_STOP 0x00000000
+#define NV10TCL_VERTEX_BEGIN_END_POINTS 0x00000001
+#define NV10TCL_VERTEX_BEGIN_END_LINES 0x00000002
+#define NV10TCL_VERTEX_BEGIN_END_LINE_LOOP 0x00000003
+#define NV10TCL_VERTEX_BEGIN_END_LINE_STRIP 0x00000004
+#define NV10TCL_VERTEX_BEGIN_END_TRIANGLES 0x00000005
+#define NV10TCL_VERTEX_BEGIN_END_TRIANGLE_STRIP 0x00000006
+#define NV10TCL_VERTEX_BEGIN_END_TRIANGLE_FAN 0x00000007
+#define NV10TCL_VERTEX_BEGIN_END_QUADS 0x00000008
+#define NV10TCL_VERTEX_BEGIN_END_QUAD_STRIP 0x00000009
+#define NV10TCL_VERTEX_BEGIN_END_POLYGON 0x0000000a
+#define NV10TCL_VB_ELEMENT_U16 0x00000e00
+#define NV10TCL_VB_ELEMENT_U16_I0_SHIFT 0
+#define NV10TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff
+#define NV10TCL_VB_ELEMENT_U16_I1_SHIFT 16
+#define NV10TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000
+#define NV10TCL_VB_ELEMENT_U32 0x00001100
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END 0x000013fc
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_STOP 0x00000000
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_POINTS 0x00000001
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_LINES 0x00000002
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_LINE_LOOP 0x00000003
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_LINE_STRIP 0x00000004
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_TRIANGLES 0x00000005
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_TRIANGLE_STRIP 0x00000006
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_TRIANGLE_FAN 0x00000007
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_QUADS 0x00000008
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_QUAD_STRIP 0x00000009
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_POLYGON 0x0000000a
+#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS 0x00001400
+#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_FIRST_SHIFT 0
+#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_FIRST_MASK 0x0000ffff
+#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_LAST_SHIFT 24
+#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_LAST_MASK 0xff000000
+#define NV10TCL_VERTEX_ARRAY_DATA 0x00001800
+
+
+#define NV04_CONTEXT_COLOR_KEY 0x00000057
+
+
+
+#define NV03_CONTEXT_SURFACES_2D 0x00000058
+
+#define NV03_CONTEXT_SURFACES_2D_SYNCHRONIZE 0x00000100
+#define NV03_CONTEXT_SURFACES_2D_DMA_NOTIFY 0x00000180
+#define NV03_CONTEXT_SURFACES_2D_DMA_SOURCE 0x00000184
+#define NV03_CONTEXT_SURFACES_2D_DMA_DESTIN 0x00000188
+#define NV03_CONTEXT_SURFACES_2D_COLOR_FORMAT 0x00000300
+#define NV03_CONTEXT_SURFACES_2D_PITCH 0x00000304
+#define NV03_CONTEXT_SURFACES_2D_PITCH_SOURCE_SHIFT 0
+#define NV03_CONTEXT_SURFACES_2D_PITCH_SOURCE_MASK 0x0000ffff
+#define NV03_CONTEXT_SURFACES_2D_PITCH_DESTIN_SHIFT 16
+#define NV03_CONTEXT_SURFACES_2D_PITCH_DESTIN_MASK 0xffff0000
+#define NV03_CONTEXT_SURFACES_2D_OFFSET_SOURCE 0x00000308
+#define NV03_CONTEXT_SURFACES_2D_OFFSET_DESTIN 0x0000030c
+
+
+#define NV03_CONTEXT_SURFACES_3D 0x0000005a
+
+#define NV03_CONTEXT_SURFACES_3D_SYNCHRONIZE 0x00000100
+#define NV03_CONTEXT_SURFACES_3D_DMA_NOTIFY 0x00000180
+#define NV03_CONTEXT_SURFACES_3D_DMA_SURFACE 0x00000184
+#define NV03_CONTEXT_SURFACES_3D_PITCH 0x00000300
+#define NV03_CONTEXT_SURFACES_3D_OFFSET_COLOR 0x00000304
+#define NV03_CONTEXT_SURFACES_3D_OFFSET_ZETA 0x00000308
+
+
+#define NV04_RENDER_SOLID_LINE 0x0000005c
+
+#define NV04_RENDER_SOLID_LINE_SURFACE 0x00000198
+
+
+#define NV04_RENDER_SOLID_TRIANGLE 0x0000005d
+
+
+
+#define NV04_RENDER_SOLID_RECTANGLE 0x0000005e
+
+#define NV04_RENDER_SOLID_RECTANGLE_SURFACE 0x00000198
+
+
+#define NV04_IMAGE_BLIT 0x0000005f
+
+#define NV04_IMAGE_BLIT_NOP 0x00000100
+#define NV04_IMAGE_BLIT_NOTIFY 0x00000104
+#define NV04_IMAGE_BLIT_DMA_NOTIFY 0x00000180
+#define NV04_IMAGE_BLIT_COLOR_KEY 0x00000184
+#define NV04_IMAGE_BLIT_CLIP_RECTANGLE 0x00000188
+#define NV04_IMAGE_BLIT_PATTERN 0x0000018c
+#define NV04_IMAGE_BLIT_ROP 0x00000190
+#define NV04_IMAGE_BLIT_BETA4 0x00000198
+#define NV04_IMAGE_BLIT_SURFACE 0x0000019c
+#define NV04_IMAGE_BLIT_OPERATION 0x000002fc
+#define NV04_IMAGE_BLIT_OPERATION_SRCCOPY_AND 0x00000000
+#define NV04_IMAGE_BLIT_OPERATION_ROP_AND 0x00000001
+#define NV04_IMAGE_BLIT_OPERATION_BLEND_AND 0x00000002
+#define NV04_IMAGE_BLIT_OPERATION_SRCCOPY 0x00000003
+#define NV04_IMAGE_BLIT_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV04_IMAGE_BLIT_OPERATION_BLEND_PREMULT 0x00000005
+
+
+#define NV04_INDEXED_IMAGE_FROM_CPU 0x00000060
+
+#define NV04_INDEXED_IMAGE_FROM_CPU_NOP 0x00000100
+#define NV04_INDEXED_IMAGE_FROM_CPU_NOTIFY 0x00000104
+#define NV04_INDEXED_IMAGE_FROM_CPU_PATCH 0x0000010c
+#define NV04_INDEXED_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180
+#define NV04_INDEXED_IMAGE_FROM_CPU_DMA_LUT 0x00000184
+#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR_FORMAT 0x000003e8
+#define NV04_INDEXED_IMAGE_FROM_CPU_INDEX_FORMAT 0x000003ec
+#define NV04_INDEXED_IMAGE_FROM_CPU_LUT_OFFSET 0x000003f0
+#define NV04_INDEXED_IMAGE_FROM_CPU_POINT 0x000003f4
+#define NV04_INDEXED_IMAGE_FROM_CPU_SIZE_OUT 0x000003f8
+#define NV04_INDEXED_IMAGE_FROM_CPU_SIZE_IN 0x000003fc
+#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR 0x00000400
+
+
+#define NV04_IMAGE_FROM_CPU 0x00000061
+
+#define NV04_IMAGE_FROM_CPU_BETA4 0x00000198
+#define NV04_IMAGE_FROM_CPU_SURFACE 0x0000019c
+
+
+#define NV10_CONTEXT_SURFACES_2D 0x00000062
+
+
+
+#define NV05_SCALED_IMAGE_FROM_MEMORY 0x00000063
+
+#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION 0x000002fc
+#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_DITHER 0x00000000
+#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE 0x00000001
+#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_SUBTR_TRUNCATE 0x00000002
+
+
+#define NV01_IMAGE_SRCCOPY_AND 0x00000064
+
+#define NV01_IMAGE_SRCCOPY_AND_NOTIFY 0x00000104
+#define NV01_IMAGE_SRCCOPY_AND_DMA_NOTIFY 0x00000180
+#define NV01_IMAGE_SRCCOPY_AND_IMAGE_OUTPUT 0x00000200
+#define NV01_IMAGE_SRCCOPY_AND_IMAGE_INPUT 0x00000204
+
+
+#define NV05_INDEXED_IMAGE_FROM_CPU 0x00000064
+
+#define NV05_INDEXED_IMAGE_FROM_CPU_COLOR_KEY 0x00000188
+#define NV05_INDEXED_IMAGE_FROM_CPU_CLIP_RECTANGLE 0x0000018c
+#define NV05_INDEXED_IMAGE_FROM_CPU_PATTERN 0x00000190
+#define NV05_INDEXED_IMAGE_FROM_CPU_ROP 0x00000194
+#define NV05_INDEXED_IMAGE_FROM_CPU_BETA1 0x00000198
+#define NV05_INDEXED_IMAGE_FROM_CPU_BETA4 0x0000019c
+#define NV05_INDEXED_IMAGE_FROM_CPU_SURFACE 0x000001a0
+#define NV05_INDEXED_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000003e0
+#define NV05_INDEXED_IMAGE_FROM_CPU_OPERATION 0x000003e4
+#define NV05_INDEXED_IMAGE_FROM_CPU_INDICES 0x00000400
+
+
+#define NV05_IMAGE_FROM_CPU 0x00000065
+
+#define NV05_IMAGE_FROM_CPU_BETA4 0x00000198
+#define NV05_IMAGE_FROM_CPU_SURFACE 0x0000019c
+
+
+#define NV05_STRETCHED_IMAGE_FROM_CPU 0x00000066
+
+#define NV05_STRETCHED_IMAGE_FROM_CPU_BETA4 0x00000194
+#define NV05_STRETCHED_IMAGE_FROM_CPU_SURFACE 0x00000198
+#define NV05_STRETCHED_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000002f8
+
+
+#define NV04_IMAGE_BLEND_PREMULT 0x00000067
+
+#define NV04_IMAGE_BLEND_PREMULT_NOP 0x00000100
+#define NV04_IMAGE_BLEND_PREMULT_NOTIFY 0x00000104
+#define NV04_IMAGE_BLEND_PREMULT_DMA_NOTIFY 0x00000180
+#define NV04_IMAGE_BLEND_PREMULT_IMAGE_OUTPUT 0x00000200
+#define NV04_IMAGE_BLEND_PREMULT_BETA_INPUT 0x00000204
+#define NV04_IMAGE_BLEND_PREMULT_IMAGE_INPUT 0x00000208
+
+
+#define NV03_CHANNEL_PIO 0x0000006a
+
+
+
+#define NV03_CHANNEL_DMA 0x0000006b
+
+
+
+#define NV04_BETA_SOLID 0x00000072
+
+#define NV04_BETA_SOLID_NOP 0x00000100
+#define NV04_BETA_SOLID_NOTIFY 0x00000104
+#define NV04_BETA_SOLID_DMA_NOTIFY 0x00000180
+#define NV04_BETA_SOLID_BETA_OUTPUT 0x00000200
+#define NV04_BETA_SOLID_BETA_FACTOR 0x00000300
+
+
+#define NV04_STRETCHED_IMAGE_FROM_CPU 0x00000076
+
+
+
+#define NV04_SCALED_IMAGE_FROM_MEMORY 0x00000077
+
+#define NV04_SCALED_IMAGE_FROM_MEMORY_NOP 0x00000100
+#define NV04_SCALED_IMAGE_FROM_MEMORY_NOTIFY 0x00000104
+#define NV04_SCALED_IMAGE_FROM_MEMORY_DMA_NOTIFY 0x00000180
+#define NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE 0x00000184
+#define NV04_SCALED_IMAGE_FROM_MEMORY_PATTERN 0x00000188
+#define NV04_SCALED_IMAGE_FROM_MEMORY_ROP 0x0000018c
+#define NV04_SCALED_IMAGE_FROM_MEMORY_BETA1 0x00000190
+#define NV04_SCALED_IMAGE_FROM_MEMORY_BETA4 0x00000194
+#define NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE 0x00000198
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION 0x000002fc
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_DITHER 0x00000000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE 0x00000001
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_SUBTR_TRUNCATE 0x00000002
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT 0x00000300
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5 0x00000001
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X1R5G5B5 0x00000002
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8 0x00000003
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8 0x00000004
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_V8YB8U8YA8 0x00000005
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_YB8V8YA8U8 0x00000006
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5 0x00000007
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8 0x00000008
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_AY8 0x00000009
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION 0x00000304
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_AND 0x00000000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_ROP_AND 0x00000001
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_AND 0x00000002
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY 0x00000003
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_PREMULT 0x00000005
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT 0x00000308
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_SHIFT 0
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_MASK 0x0000ffff
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT 16
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_MASK 0xffff0000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE 0x0000030c
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_SHIFT 0
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_MASK 0x0000ffff
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT 16
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_MASK 0xffff0000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT 0x00000310
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_X_SHIFT 0
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_X_MASK 0x0000ffff
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT 16
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_MASK 0xffff0000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE 0x00000314
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_W_SHIFT 0
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_W_MASK 0x0000ffff
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT 16
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_MASK 0xffff0000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_DU_DX 0x00000318
+#define NV04_SCALED_IMAGE_FROM_MEMORY_DV_DY 0x0000031c
+#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE 0x00000400
+#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_W_SHIFT 0
+#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_W_MASK 0x0000ffff
+#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT 16
+#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_H_MASK 0xffff0000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT 0x00000404
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_PITCH_SHIFT 0
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_PITCH_MASK 0x0000ffff
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_SHIFT 16
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_MASK 0x00ff0000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER 0x00010000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CORNER 0x00020000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_SHIFT 24
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_MASK 0xff000000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE 0x00000000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_BILINEAR 0x01000000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_ADDRESS 0x00000408
+#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT 0x0000040c
+#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_X_SHIFT 0
+#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_X_MASK 0x0000ffff
+#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_Y_SHIFT 16
+#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_Y_MASK 0xffff0000
+
+
+#define NV10_TEXTURE_FROM_CPU 0x0000007b
+
+#define NV10_TEXTURE_FROM_CPU_NOP 0x00000100
+#define NV10_TEXTURE_FROM_CPU_NOTIFY 0x00000104
+#define NV10_TEXTURE_FROM_CPU_WAIT_FOR_IDLE 0x00000108
+#define NV10_TEXTURE_FROM_CPU_PM_TRIGGER 0x00000140
+#define NV10_TEXTURE_FROM_CPU_DMA_NOTIFY 0x00000180
+#define NV10_TEXTURE_FROM_CPU_SURFACE 0x00000184
+#define NV10_TEXTURE_FROM_CPU_COLOR_FORMAT 0x00000300
+#define NV10_TEXTURE_FROM_CPU_POINT 0x00000304
+#define NV10_TEXTURE_FROM_CPU_POINT_X_SHIFT 0
+#define NV10_TEXTURE_FROM_CPU_POINT_X_MASK 0x0000ffff
+#define NV10_TEXTURE_FROM_CPU_POINT_Y_SHIFT 16
+#define NV10_TEXTURE_FROM_CPU_POINT_Y_MASK 0xffff0000
+#define NV10_TEXTURE_FROM_CPU_SIZE 0x00000308
+#define NV10_TEXTURE_FROM_CPU_SIZE_W_SHIFT 0
+#define NV10_TEXTURE_FROM_CPU_SIZE_W_MASK 0x0000ffff
+#define NV10_TEXTURE_FROM_CPU_SIZE_H_SHIFT 16
+#define NV10_TEXTURE_FROM_CPU_SIZE_H_MASK 0xffff0000
+#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL 0x0000030c
+#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_X_SHIFT 0
+#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_X_MASK 0x0000ffff
+#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_W_SHIFT 16
+#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_W_MASK 0xffff0000
+#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL 0x00000310
+#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_Y_SHIFT 0
+#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_Y_MASK 0x0000ffff
+#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_H_SHIFT 16
+#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_H_MASK 0xffff0000
+#define NV10_TEXTURE_FROM_CPU_COLOR(x) (0x00000400+((x)*4))
+#define NV10_TEXTURE_FROM_CPU_COLOR__SIZE 0x00000700
+
+
+#define NV10_VIDEO_DISPLAY 0x0000007c
+
+
+
+#define NV10_DVD_SUBPICTURE 0x00000088
+
+
+
+#define NV10_SCALED_IMAGE_FROM_MEMORY 0x00000089
+
+#define NV10_SCALED_IMAGE_FROM_MEMORY_WAIT_FOR_IDLE 0x00000108
+
+
+#define NV10_IMAGE_FROM_CPU 0x0000008a
+
+#define NV10_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000002f8
+
+
+#define NV10_CONTEXT_SURFACES_3D 0x00000093
+
+
+
+#define NV10_DX5_TEXTURE_TRIANGLE 0x00000094
+
+
+
+#define NV10_DX6_MULTI_TEXTURE_TRIANGLE 0x00000095
+
+
+
+#define NV11TCL 0x00000096
+
+#define NV11TCL_COLOR_LOGIC_OP_ENABLE 0x00000d40
+#define NV11TCL_COLOR_LOGIC_OP_OP 0x00000d44
+#define NV11TCL_COLOR_LOGIC_OP_OP_CLEAR 0x00001500
+#define NV11TCL_COLOR_LOGIC_OP_OP_AND 0x00001501
+#define NV11TCL_COLOR_LOGIC_OP_OP_AND_REVERSE 0x00001502
+#define NV11TCL_COLOR_LOGIC_OP_OP_COPY 0x00001503
+#define NV11TCL_COLOR_LOGIC_OP_OP_AND_INVERTED 0x00001504
+#define NV11TCL_COLOR_LOGIC_OP_OP_NOOP 0x00001505
+#define NV11TCL_COLOR_LOGIC_OP_OP_XOR 0x00001506
+#define NV11TCL_COLOR_LOGIC_OP_OP_OR 0x00001507
+#define NV11TCL_COLOR_LOGIC_OP_OP_NOR 0x00001508
+#define NV11TCL_COLOR_LOGIC_OP_OP_EQUIV 0x00001509
+#define NV11TCL_COLOR_LOGIC_OP_OP_INVERT 0x0000150a
+#define NV11TCL_COLOR_LOGIC_OP_OP_OR_REVERSE 0x0000150b
+#define NV11TCL_COLOR_LOGIC_OP_OP_COPY_INVERTED 0x0000150c
+#define NV11TCL_COLOR_LOGIC_OP_OP_OR_INVERTED 0x0000150d
+#define NV11TCL_COLOR_LOGIC_OP_OP_NAND 0x0000150e
+#define NV11TCL_COLOR_LOGIC_OP_OP_SET 0x0000150f
+
+
+#define NV20TCL 0x00000097
+
+#define NV20TCL_NOP 0x00000100
+#define NV20TCL_NOTIFY 0x00000104
+#define NV20TCL_DMA_NOTIFY 0x00000180
+#define NV20TCL_DMA_TEXTURE0 0x00000184
+#define NV20TCL_DMA_TEXTURE1 0x00000188
+#define NV20TCL_DMA_COLOR 0x00000194
+#define NV20TCL_DMA_ZETA 0x00000198
+#define NV20TCL_DMA_VTXBUF0 0x0000019c
+#define NV20TCL_DMA_VTXBUF1 0x000001a0
+#define NV20TCL_DMA_FENCE 0x000001a4
+#define NV20TCL_DMA_QUERY 0x000001a8
+#define NV20TCL_RT_HORIZ 0x00000200
+#define NV20TCL_RT_HORIZ_X_SHIFT 0
+#define NV20TCL_RT_HORIZ_X_MASK 0x0000ffff
+#define NV20TCL_RT_HORIZ_W_SHIFT 16
+#define NV20TCL_RT_HORIZ_W_MASK 0xffff0000
+#define NV20TCL_RT_VERT 0x00000204
+#define NV20TCL_RT_VERT_Y_SHIFT 0
+#define NV20TCL_RT_VERT_Y_MASK 0x0000ffff
+#define NV20TCL_RT_VERT_H_SHIFT 16
+#define NV20TCL_RT_VERT_H_MASK 0xffff0000
+#define NV20TCL_RT_FORMAT 0x00000208
+#define NV20TCL_RT_FORMAT_TYPE_SHIFT 8
+#define NV20TCL_RT_FORMAT_TYPE_MASK 0x00000f00
+#define NV20TCL_RT_FORMAT_TYPE_LINEAR 0x00000100
+#define NV20TCL_RT_FORMAT_TYPE_SWIZZLED 0x00000200
+#define NV20TCL_RT_FORMAT_COLOR_SHIFT 0
+#define NV20TCL_RT_FORMAT_COLOR_MASK 0x0000001f
+#define NV20TCL_RT_FORMAT_COLOR_R5G6B5 0x00000003
+#define NV20TCL_RT_FORMAT_COLOR_X8R8G8B8 0x00000005
+#define NV20TCL_RT_FORMAT_COLOR_A8R8G8B8 0x00000008
+#define NV20TCL_RT_FORMAT_COLOR_B8 0x00000009
+#define NV20TCL_RT_FORMAT_COLOR_UNKNOWN 0x0000000d
+#define NV20TCL_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f
+#define NV20TCL_RT_FORMAT_COLOR_A8B8G8R8 0x00000010
+#define NV20TCL_RT_PITCH 0x0000020c
+#define NV20TCL_RT_PITCH_COLOR_PITCH_SHIFT 0
+#define NV20TCL_RT_PITCH_COLOR_PITCH_MASK 0x0000ffff
+#define NV20TCL_RT_PITCH_ZETA_PITCH_SHIFT 16
+#define NV20TCL_RT_PITCH_ZETA_PITCH_MASK 0xffff0000
+#define NV20TCL_COLOR_OFFSET 0x00000210
+#define NV20TCL_ZETA_OFFSET 0x00000214
+#define NV20TCL_RC_IN_ALPHA(x) (0x00000260+((x)*4))
+#define NV20TCL_RC_IN_ALPHA__SIZE 0x00000008
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_SHIFT 0
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_MASK 0x0000000f
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_FOG 0x00000003
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_SPARE0_NV 0x0000000c
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_SPARE1_NV 0x0000000d
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV20TCL_RC_IN_ALPHA_D_COMPONENT_USAGE (1 << 4)
+#define NV20TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_BLUE 0x00000000
+#define NV20TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_SHIFT 5
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_MASK 0x000000e0
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_SHIFT 8
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_MASK 0x00000f00
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_FOG 0x00000300
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_SPARE0_NV 0x00000c00
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_SPARE1_NV 0x00000d00
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV20TCL_RC_IN_ALPHA_C_COMPONENT_USAGE (1 << 12)
+#define NV20TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_BLUE 0x00000000
+#define NV20TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_SHIFT 13
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_MASK 0x0000e000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_SHIFT 16
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_MASK 0x000f0000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_FOG 0x00030000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_SPARE0_NV 0x000c0000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_SPARE1_NV 0x000d0000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV20TCL_RC_IN_ALPHA_B_COMPONENT_USAGE (1 << 20)
+#define NV20TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_BLUE 0x00000000
+#define NV20TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_SHIFT 21
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_MASK 0x00e00000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_SHIFT 24
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_MASK 0x0f000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_FOG 0x03000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_SPARE0_NV 0x0c000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_SPARE1_NV 0x0d000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV20TCL_RC_IN_ALPHA_A_COMPONENT_USAGE (1 << 28)
+#define NV20TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_BLUE 0x00000000
+#define NV20TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_SHIFT 29
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_MASK 0xe0000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV20TCL_RC_FINAL0 0x00000288
+#define NV20TCL_RC_FINAL0_D_INPUT_SHIFT 0
+#define NV20TCL_RC_FINAL0_D_INPUT_MASK 0x0000000f
+#define NV20TCL_RC_FINAL0_D_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV20TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV20TCL_RC_FINAL0_D_INPUT_FOG 0x00000003
+#define NV20TCL_RC_FINAL0_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV20TCL_RC_FINAL0_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV20TCL_RC_FINAL0_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV20TCL_RC_FINAL0_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV20TCL_RC_FINAL0_D_INPUT_SPARE0_NV 0x0000000c
+#define NV20TCL_RC_FINAL0_D_INPUT_SPARE1_NV 0x0000000d
+#define NV20TCL_RC_FINAL0_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV20TCL_RC_FINAL0_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV20TCL_RC_FINAL0_D_COMPONENT_USAGE (1 << 4)
+#define NV20TCL_RC_FINAL0_D_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_FINAL0_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV20TCL_RC_FINAL0_D_MAPPING_SHIFT 5
+#define NV20TCL_RC_FINAL0_D_MAPPING_MASK 0x000000e0
+#define NV20TCL_RC_FINAL0_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_FINAL0_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV20TCL_RC_FINAL0_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV20TCL_RC_FINAL0_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV20TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV20TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV20TCL_RC_FINAL0_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV20TCL_RC_FINAL0_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV20TCL_RC_FINAL0_C_INPUT_SHIFT 8
+#define NV20TCL_RC_FINAL0_C_INPUT_MASK 0x00000f00
+#define NV20TCL_RC_FINAL0_C_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV20TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV20TCL_RC_FINAL0_C_INPUT_FOG 0x00000300
+#define NV20TCL_RC_FINAL0_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV20TCL_RC_FINAL0_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV20TCL_RC_FINAL0_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV20TCL_RC_FINAL0_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV20TCL_RC_FINAL0_C_INPUT_SPARE0_NV 0x00000c00
+#define NV20TCL_RC_FINAL0_C_INPUT_SPARE1_NV 0x00000d00
+#define NV20TCL_RC_FINAL0_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV20TCL_RC_FINAL0_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV20TCL_RC_FINAL0_C_COMPONENT_USAGE (1 << 12)
+#define NV20TCL_RC_FINAL0_C_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_FINAL0_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV20TCL_RC_FINAL0_C_MAPPING_SHIFT 13
+#define NV20TCL_RC_FINAL0_C_MAPPING_MASK 0x0000e000
+#define NV20TCL_RC_FINAL0_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_FINAL0_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV20TCL_RC_FINAL0_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV20TCL_RC_FINAL0_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV20TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV20TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV20TCL_RC_FINAL0_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV20TCL_RC_FINAL0_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV20TCL_RC_FINAL0_B_INPUT_SHIFT 16
+#define NV20TCL_RC_FINAL0_B_INPUT_MASK 0x000f0000
+#define NV20TCL_RC_FINAL0_B_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV20TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV20TCL_RC_FINAL0_B_INPUT_FOG 0x00030000
+#define NV20TCL_RC_FINAL0_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV20TCL_RC_FINAL0_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV20TCL_RC_FINAL0_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV20TCL_RC_FINAL0_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV20TCL_RC_FINAL0_B_INPUT_SPARE0_NV 0x000c0000
+#define NV20TCL_RC_FINAL0_B_INPUT_SPARE1_NV 0x000d0000
+#define NV20TCL_RC_FINAL0_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV20TCL_RC_FINAL0_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV20TCL_RC_FINAL0_B_COMPONENT_USAGE (1 << 20)
+#define NV20TCL_RC_FINAL0_B_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_FINAL0_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV20TCL_RC_FINAL0_B_MAPPING_SHIFT 21
+#define NV20TCL_RC_FINAL0_B_MAPPING_MASK 0x00e00000
+#define NV20TCL_RC_FINAL0_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_FINAL0_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV20TCL_RC_FINAL0_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV20TCL_RC_FINAL0_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV20TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV20TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV20TCL_RC_FINAL0_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV20TCL_RC_FINAL0_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV20TCL_RC_FINAL0_A_INPUT_SHIFT 24
+#define NV20TCL_RC_FINAL0_A_INPUT_MASK 0x0f000000
+#define NV20TCL_RC_FINAL0_A_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV20TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV20TCL_RC_FINAL0_A_INPUT_FOG 0x03000000
+#define NV20TCL_RC_FINAL0_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV20TCL_RC_FINAL0_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV20TCL_RC_FINAL0_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV20TCL_RC_FINAL0_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV20TCL_RC_FINAL0_A_INPUT_SPARE0_NV 0x0c000000
+#define NV20TCL_RC_FINAL0_A_INPUT_SPARE1_NV 0x0d000000
+#define NV20TCL_RC_FINAL0_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV20TCL_RC_FINAL0_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV20TCL_RC_FINAL0_A_COMPONENT_USAGE (1 << 28)
+#define NV20TCL_RC_FINAL0_A_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_FINAL0_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_SHIFT 29
+#define NV20TCL_RC_FINAL0_A_MAPPING_MASK 0xe0000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV20TCL_RC_FINAL1 0x0000028c
+#define NV20TCL_RC_FINAL1_COLOR_SUM_CLAMP (1 << 7)
+#define NV20TCL_RC_FINAL1_G_INPUT_SHIFT 8
+#define NV20TCL_RC_FINAL1_G_INPUT_MASK 0x00000f00
+#define NV20TCL_RC_FINAL1_G_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV20TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV20TCL_RC_FINAL1_G_INPUT_FOG 0x00000300
+#define NV20TCL_RC_FINAL1_G_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV20TCL_RC_FINAL1_G_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV20TCL_RC_FINAL1_G_INPUT_TEXTURE0_ARB 0x00000800
+#define NV20TCL_RC_FINAL1_G_INPUT_TEXTURE1_ARB 0x00000900
+#define NV20TCL_RC_FINAL1_G_INPUT_SPARE0_NV 0x00000c00
+#define NV20TCL_RC_FINAL1_G_INPUT_SPARE1_NV 0x00000d00
+#define NV20TCL_RC_FINAL1_G_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV20TCL_RC_FINAL1_G_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV20TCL_RC_FINAL1_G_COMPONENT_USAGE (1 << 12)
+#define NV20TCL_RC_FINAL1_G_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_FINAL1_G_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV20TCL_RC_FINAL1_G_MAPPING_SHIFT 13
+#define NV20TCL_RC_FINAL1_G_MAPPING_MASK 0x0000e000
+#define NV20TCL_RC_FINAL1_G_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_FINAL1_G_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV20TCL_RC_FINAL1_G_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV20TCL_RC_FINAL1_G_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV20TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV20TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV20TCL_RC_FINAL1_G_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV20TCL_RC_FINAL1_G_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV20TCL_RC_FINAL1_F_INPUT_SHIFT 16
+#define NV20TCL_RC_FINAL1_F_INPUT_MASK 0x000f0000
+#define NV20TCL_RC_FINAL1_F_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV20TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV20TCL_RC_FINAL1_F_INPUT_FOG 0x00030000
+#define NV20TCL_RC_FINAL1_F_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV20TCL_RC_FINAL1_F_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV20TCL_RC_FINAL1_F_INPUT_TEXTURE0_ARB 0x00080000
+#define NV20TCL_RC_FINAL1_F_INPUT_TEXTURE1_ARB 0x00090000
+#define NV20TCL_RC_FINAL1_F_INPUT_SPARE0_NV 0x000c0000
+#define NV20TCL_RC_FINAL1_F_INPUT_SPARE1_NV 0x000d0000
+#define NV20TCL_RC_FINAL1_F_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV20TCL_RC_FINAL1_F_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV20TCL_RC_FINAL1_F_COMPONENT_USAGE (1 << 20)
+#define NV20TCL_RC_FINAL1_F_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_FINAL1_F_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV20TCL_RC_FINAL1_F_MAPPING_SHIFT 21
+#define NV20TCL_RC_FINAL1_F_MAPPING_MASK 0x00e00000
+#define NV20TCL_RC_FINAL1_F_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_FINAL1_F_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV20TCL_RC_FINAL1_F_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV20TCL_RC_FINAL1_F_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV20TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV20TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV20TCL_RC_FINAL1_F_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV20TCL_RC_FINAL1_F_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV20TCL_RC_FINAL1_E_INPUT_SHIFT 24
+#define NV20TCL_RC_FINAL1_E_INPUT_MASK 0x0f000000
+#define NV20TCL_RC_FINAL1_E_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV20TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV20TCL_RC_FINAL1_E_INPUT_FOG 0x03000000
+#define NV20TCL_RC_FINAL1_E_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV20TCL_RC_FINAL1_E_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV20TCL_RC_FINAL1_E_INPUT_TEXTURE0_ARB 0x08000000
+#define NV20TCL_RC_FINAL1_E_INPUT_TEXTURE1_ARB 0x09000000
+#define NV20TCL_RC_FINAL1_E_INPUT_SPARE0_NV 0x0c000000
+#define NV20TCL_RC_FINAL1_E_INPUT_SPARE1_NV 0x0d000000
+#define NV20TCL_RC_FINAL1_E_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV20TCL_RC_FINAL1_E_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV20TCL_RC_FINAL1_E_COMPONENT_USAGE (1 << 28)
+#define NV20TCL_RC_FINAL1_E_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_FINAL1_E_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_SHIFT 29
+#define NV20TCL_RC_FINAL1_E_MAPPING_MASK 0xe0000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV20TCL_LIGHT_CONTROL 0x00000294
+#define NV20TCL_FOG_MODE 0x0000029c
+#define NV20TCL_FOG_MODE_EXP 0x00000800
+#define NV20TCL_FOG_MODE_EXP_2 0x00000802
+#define NV20TCL_FOG_MODE_EXP2 0x00000803
+#define NV20TCL_FOG_MODE_LINEAR 0x00000804
+#define NV20TCL_FOG_MODE_LINEAR_2 0x00002601
+#define NV20TCL_FOG_COORD_DIST 0x000002a0
+#define NV20TCL_FOG_COORD_DIST_COORD_FALSE 0x00000000
+#define NV20TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_RADIAL_NV 0x00000001
+#define NV20TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_PLANE_ABSOLUTE_NV 0x00000002
+#define NV20TCL_FOG_COORD_DIST_COORD_FOG 0x00000003
+#define NV20TCL_FOG_ENABLE 0x000002a4
+#define NV20TCL_FOG_COLOR 0x000002a8
+#define NV20TCL_FOG_COLOR_R_SHIFT 0
+#define NV20TCL_FOG_COLOR_R_MASK 0x000000ff
+#define NV20TCL_FOG_COLOR_G_SHIFT 8
+#define NV20TCL_FOG_COLOR_G_MASK 0x0000ff00
+#define NV20TCL_FOG_COLOR_B_SHIFT 16
+#define NV20TCL_FOG_COLOR_B_MASK 0x00ff0000
+#define NV20TCL_FOG_COLOR_A_SHIFT 24
+#define NV20TCL_FOG_COLOR_A_MASK 0xff000000
+#define NV20TCL_VIEWPORT_CLIP_MODE 0x000002b4
+#define NV20TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*4))
+#define NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008
+#define NV20TCL_VIEWPORT_CLIP_VERT(x) (0x000002e0+((x)*4))
+#define NV20TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008
+#define NV20TCL_ALPHA_FUNC_ENABLE 0x00000300
+#define NV20TCL_BLEND_FUNC_ENABLE 0x00000304
+#define NV20TCL_CULL_FACE_ENABLE 0x00000308
+#define NV20TCL_DEPTH_TEST_ENABLE 0x0000030c
+#define NV20TCL_DITHER_ENABLE 0x00000310
+#define NV20TCL_LIGHTING_ENABLE 0x00000314
+#define NV20TCL_POINT_PARAMETERS_ENABLE 0x00000318
+#define NV20TCL_POINT_SMOOTH_ENABLE 0x0000031c
+#define NV20TCL_LINE_SMOOTH_ENABLE 0x00000320
+#define NV20TCL_POLYGON_SMOOTH_ENABLE 0x00000324
+#define NV20TCL_STENCIL_ENABLE 0x0000032c
+#define NV20TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000330
+#define NV20TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000334
+#define NV20TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000338
+#define NV20TCL_ALPHA_FUNC_FUNC 0x0000033c
+#define NV20TCL_ALPHA_FUNC_FUNC_NEVER 0x00000200
+#define NV20TCL_ALPHA_FUNC_FUNC_LESS 0x00000201
+#define NV20TCL_ALPHA_FUNC_FUNC_EQUAL 0x00000202
+#define NV20TCL_ALPHA_FUNC_FUNC_LEQUAL 0x00000203
+#define NV20TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204
+#define NV20TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204
+#define NV20TCL_ALPHA_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV20TCL_ALPHA_FUNC_FUNC_GEQUAL 0x00000206
+#define NV20TCL_ALPHA_FUNC_FUNC_ALWAYS 0x00000207
+#define NV20TCL_ALPHA_FUNC_REF 0x00000340
+#define NV20TCL_BLEND_FUNC_SRC 0x00000344
+#define NV20TCL_BLEND_FUNC_SRC_ZERO 0x00000000
+#define NV20TCL_BLEND_FUNC_SRC_ONE 0x00000001
+#define NV20TCL_BLEND_FUNC_SRC_SRC_COLOR 0x00000300
+#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV20TCL_BLEND_FUNC_SRC_SRC_ALPHA 0x00000302
+#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV20TCL_BLEND_FUNC_SRC_DST_ALPHA 0x00000304
+#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV20TCL_BLEND_FUNC_SRC_DST_COLOR 0x00000306
+#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_COLOR 0x00000307
+#define NV20TCL_BLEND_FUNC_SRC_SRC_ALPHA_SATURATE 0x00000308
+#define NV20TCL_BLEND_FUNC_SRC_CONSTANT_COLOR 0x00008001
+#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV20TCL_BLEND_FUNC_SRC_CONSTANT_ALPHA 0x00008003
+#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV20TCL_BLEND_FUNC_DST 0x00000348
+#define NV20TCL_BLEND_FUNC_DST_ZERO 0x00000000
+#define NV20TCL_BLEND_FUNC_DST_ONE 0x00000001
+#define NV20TCL_BLEND_FUNC_DST_SRC_COLOR 0x00000300
+#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV20TCL_BLEND_FUNC_DST_SRC_ALPHA 0x00000302
+#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV20TCL_BLEND_FUNC_DST_DST_ALPHA 0x00000304
+#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV20TCL_BLEND_FUNC_DST_DST_COLOR 0x00000306
+#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_DST_COLOR 0x00000307
+#define NV20TCL_BLEND_FUNC_DST_SRC_ALPHA_SATURATE 0x00000308
+#define NV20TCL_BLEND_FUNC_DST_CONSTANT_COLOR 0x00008001
+#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV20TCL_BLEND_FUNC_DST_CONSTANT_ALPHA 0x00008003
+#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV20TCL_BLEND_COLOR 0x0000034c
+#define NV20TCL_BLEND_COLOR_B_SHIFT 0
+#define NV20TCL_BLEND_COLOR_B_MASK 0x000000ff
+#define NV20TCL_BLEND_COLOR_G_SHIFT 8
+#define NV20TCL_BLEND_COLOR_G_MASK 0x0000ff00
+#define NV20TCL_BLEND_COLOR_R_SHIFT 16
+#define NV20TCL_BLEND_COLOR_R_MASK 0x00ff0000
+#define NV20TCL_BLEND_COLOR_A_SHIFT 24
+#define NV20TCL_BLEND_COLOR_A_MASK 0xff000000
+#define NV20TCL_BLEND_EQUATION 0x00000350
+#define NV20TCL_BLEND_EQUATION_FUNC_ADD 0x00008006
+#define NV20TCL_BLEND_EQUATION_MIN 0x00008007
+#define NV20TCL_BLEND_EQUATION_MAX 0x00008008
+#define NV20TCL_BLEND_EQUATION_FUNC_SUBTRACT 0x0000800a
+#define NV20TCL_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b
+#define NV20TCL_DEPTH_FUNC 0x00000354
+#define NV20TCL_DEPTH_FUNC_NEVER 0x00000200
+#define NV20TCL_DEPTH_FUNC_LESS 0x00000201
+#define NV20TCL_DEPTH_FUNC_EQUAL 0x00000202
+#define NV20TCL_DEPTH_FUNC_LEQUAL 0x00000203
+#define NV20TCL_DEPTH_FUNC_GREATER 0x00000204
+#define NV20TCL_DEPTH_FUNC_GREATER 0x00000204
+#define NV20TCL_DEPTH_FUNC_NOTEQUAL 0x00000205
+#define NV20TCL_DEPTH_FUNC_GEQUAL 0x00000206
+#define NV20TCL_DEPTH_FUNC_ALWAYS 0x00000207
+#define NV20TCL_COLOR_MASK 0x00000358
+#define NV20TCL_COLOR_MASK_B (1 << 0)
+#define NV20TCL_COLOR_MASK_G (1 << 8)
+#define NV20TCL_COLOR_MASK_R (1 << 16)
+#define NV20TCL_COLOR_MASK_A (1 << 24)
+#define NV20TCL_DEPTH_WRITE_ENABLE 0x0000035c
+#define NV20TCL_STENCIL_MASK 0x00000360
+#define NV20TCL_STENCIL_FUNC_FUNC 0x00000364
+#define NV20TCL_STENCIL_FUNC_FUNC_NEVER 0x00000200
+#define NV20TCL_STENCIL_FUNC_FUNC_LESS 0x00000201
+#define NV20TCL_STENCIL_FUNC_FUNC_EQUAL 0x00000202
+#define NV20TCL_STENCIL_FUNC_FUNC_LEQUAL 0x00000203
+#define NV20TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204
+#define NV20TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204
+#define NV20TCL_STENCIL_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV20TCL_STENCIL_FUNC_FUNC_GEQUAL 0x00000206
+#define NV20TCL_STENCIL_FUNC_FUNC_ALWAYS 0x00000207
+#define NV20TCL_STENCIL_FUNC_REF 0x00000368
+#define NV20TCL_STENCIL_FUNC_MASK 0x0000036c
+#define NV20TCL_STENCIL_OP_FAIL 0x00000370
+#define NV20TCL_STENCIL_OP_FAIL_ZERO 0x00000000
+#define NV20TCL_STENCIL_OP_FAIL_INVERT 0x0000150a
+#define NV20TCL_STENCIL_OP_FAIL_KEEP 0x00001e00
+#define NV20TCL_STENCIL_OP_FAIL_REPLACE 0x00001e01
+#define NV20TCL_STENCIL_OP_FAIL_INCR 0x00001e02
+#define NV20TCL_STENCIL_OP_FAIL_DECR 0x00001e03
+#define NV20TCL_STENCIL_OP_FAIL_INCR_WRAP 0x00008507
+#define NV20TCL_STENCIL_OP_FAIL_DECR_WRAP 0x00008508
+#define NV20TCL_STENCIL_OP_ZFAIL 0x00000374
+#define NV20TCL_STENCIL_OP_ZFAIL_ZERO 0x00000000
+#define NV20TCL_STENCIL_OP_ZFAIL_INVERT 0x0000150a
+#define NV20TCL_STENCIL_OP_ZFAIL_KEEP 0x00001e00
+#define NV20TCL_STENCIL_OP_ZFAIL_REPLACE 0x00001e01
+#define NV20TCL_STENCIL_OP_ZFAIL_INCR 0x00001e02
+#define NV20TCL_STENCIL_OP_ZFAIL_DECR 0x00001e03
+#define NV20TCL_STENCIL_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV20TCL_STENCIL_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NV20TCL_STENCIL_OP_ZPASS 0x00000378
+#define NV20TCL_STENCIL_OP_ZPASS_ZERO 0x00000000
+#define NV20TCL_STENCIL_OP_ZPASS_INVERT 0x0000150a
+#define NV20TCL_STENCIL_OP_ZPASS_KEEP 0x00001e00
+#define NV20TCL_STENCIL_OP_ZPASS_REPLACE 0x00001e01
+#define NV20TCL_STENCIL_OP_ZPASS_INCR 0x00001e02
+#define NV20TCL_STENCIL_OP_ZPASS_DECR 0x00001e03
+#define NV20TCL_STENCIL_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV20TCL_STENCIL_OP_ZPASS_DECR_WRAP 0x00008508
+#define NV20TCL_SHADE_MODEL 0x0000037c
+#define NV20TCL_SHADE_MODEL_FLAT 0x00001d00
+#define NV20TCL_SHADE_MODEL_SMOOTH 0x00001d01
+#define NV20TCL_LINE_WIDTH 0x00000380
+#define NV20TCL_POLYGON_OFFSET_FACTOR 0x00000384
+#define NV20TCL_POLYGON_OFFSET_UNITS 0x00000388
+#define NV20TCL_POLYGON_MODE_FRONT 0x0000038c
+#define NV20TCL_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NV20TCL_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NV20TCL_POLYGON_MODE_FRONT_FILL 0x00001b02
+#define NV20TCL_POLYGON_MODE_BACK 0x00000390
+#define NV20TCL_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NV20TCL_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NV20TCL_POLYGON_MODE_BACK_FILL 0x00001b02
+#define NV20TCL_DEPTH_RANGE_NEAR 0x00000394
+#define NV20TCL_DEPTH_RANGE_FAR 0x00000398
+#define NV20TCL_CULL_FACE 0x0000039c
+#define NV20TCL_CULL_FACE_FRONT 0x00000404
+#define NV20TCL_CULL_FACE_BACK 0x00000405
+#define NV20TCL_CULL_FACE_FRONT_AND_BACK 0x00000408
+#define NV20TCL_FRONT_FACE 0x000003a0
+#define NV20TCL_FRONT_FACE_CW 0x00000900
+#define NV20TCL_FRONT_FACE_CCW 0x00000901
+#define NV20TCL_NORMALIZE_ENABLE 0x000003a4
+#define NV20TCL_COLOR_MATERIAL_FRONT_R 0x000003a8
+#define NV20TCL_COLOR_MATERIAL_FRONT_G 0x000003ac
+#define NV20TCL_COLOR_MATERIAL_FRONT_B 0x000003b0
+#define NV20TCL_COLOR_MATERIAL_FRONT_A 0x000003b4
+#define NV20TCL_SEPARATE_SPECULAR_ENABLE 0x000003b8
+#define NV20TCL_ENABLED_LIGHTS 0x000003bc
+#define NV20TCL_TX_GEN_S(x) (0x000003c0+((x)*16))
+#define NV20TCL_TX_GEN_S__SIZE 0x00000004
+#define NV20TCL_TX_GEN_S_FALSE 0x00000000
+#define NV20TCL_TX_GEN_S_EYE_LINEAR 0x00002400
+#define NV20TCL_TX_GEN_S_OBJECT_LINEAR 0x00002401
+#define NV20TCL_TX_GEN_S_SPHERE_MAP 0x00002402
+#define NV20TCL_TX_GEN_S_NORMAL_MAP 0x00008511
+#define NV20TCL_TX_GEN_S_REFLECTION_MAP 0x00008512
+#define NV20TCL_TX_GEN_T(x) (0x000003c4+((x)*16))
+#define NV20TCL_TX_GEN_T__SIZE 0x00000004
+#define NV20TCL_TX_GEN_T_FALSE 0x00000000
+#define NV20TCL_TX_GEN_T_EYE_LINEAR 0x00002400
+#define NV20TCL_TX_GEN_T_OBJECT_LINEAR 0x00002401
+#define NV20TCL_TX_GEN_T_SPHERE_MAP 0x00002402
+#define NV20TCL_TX_GEN_T_NORMAL_MAP 0x00008511
+#define NV20TCL_TX_GEN_T_REFLECTION_MAP 0x00008512
+#define NV20TCL_TX_GEN_R(x) (0x000003c8+((x)*16))
+#define NV20TCL_TX_GEN_R__SIZE 0x00000004
+#define NV20TCL_TX_GEN_R_FALSE 0x00000000
+#define NV20TCL_TX_GEN_R_EYE_LINEAR 0x00002400
+#define NV20TCL_TX_GEN_R_OBJECT_LINEAR 0x00002401
+#define NV20TCL_TX_GEN_R_SPHERE_MAP 0x00002402
+#define NV20TCL_TX_GEN_R_NORMAL_MAP 0x00008511
+#define NV20TCL_TX_GEN_R_REFLECTION_MAP 0x00008512
+#define NV20TCL_TX_GEN_Q(x) (0x000003cc+((x)*16))
+#define NV20TCL_TX_GEN_Q__SIZE 0x00000004
+#define NV20TCL_TX_GEN_Q_FALSE 0x00000000
+#define NV20TCL_TX_GEN_Q_EYE_LINEAR 0x00002400
+#define NV20TCL_TX_GEN_Q_OBJECT_LINEAR 0x00002401
+#define NV20TCL_TX_GEN_Q_SPHERE_MAP 0x00002402
+#define NV20TCL_TX_GEN_Q_NORMAL_MAP 0x00008511
+#define NV20TCL_TX_GEN_Q_REFLECTION_MAP 0x00008512
+#define NV20TCL_TX_MATRIX_ENABLE(x) (0x00000420+((x)*4))
+#define NV20TCL_TX_MATRIX_ENABLE__SIZE 0x00000004
+#define NV20TCL_POINT_SIZE 0x0000043c
+#define NV20TCL_MODELVIEW0_MATRIX(x) (0x00000480+((x)*4))
+#define NV20TCL_MODELVIEW0_MATRIX__SIZE 0x00000010
+#define NV20TCL_MODELVIEW1_MATRIX(x) (0x000004c0+((x)*4))
+#define NV20TCL_MODELVIEW1_MATRIX__SIZE 0x00000010
+#define NV20TCL_MODELVIEW2_MATRIX(x) (0x00000500+((x)*4))
+#define NV20TCL_MODELVIEW2_MATRIX__SIZE 0x00000010
+#define NV20TCL_MODELVIEW3_MATRIX(x) (0x00000540+((x)*4))
+#define NV20TCL_MODELVIEW3_MATRIX__SIZE 0x00000010
+#define NV20TCL_INVERSE_MODELVIEW0_MATRIX(x) (0x00000580+((x)*4))
+#define NV20TCL_INVERSE_MODELVIEW0_MATRIX__SIZE 0x00000010
+#define NV20TCL_INVERSE_MODELVIEW1_MATRIX(x) (0x000005c0+((x)*4))
+#define NV20TCL_INVERSE_MODELVIEW1_MATRIX__SIZE 0x00000010
+#define NV20TCL_INVERSE_MODELVIEW2_MATRIX(x) (0x00000600+((x)*4))
+#define NV20TCL_INVERSE_MODELVIEW2_MATRIX__SIZE 0x00000010
+#define NV20TCL_INVERSE_MODELVIEW3_MATRIX(x) (0x00000640+((x)*4))
+#define NV20TCL_INVERSE_MODELVIEW3_MATRIX__SIZE 0x00000010
+#define NV20TCL_PROJECTION_MATRIX(x) (0x00000680+((x)*4))
+#define NV20TCL_PROJECTION_MATRIX__SIZE 0x00000010
+#define NV20TCL_TX0_MATRIX(x) (0x000006c0+((x)*4))
+#define NV20TCL_TX0_MATRIX__SIZE 0x00000010
+#define NV20TCL_TX1_MATRIX(x) (0x00000700+((x)*4))
+#define NV20TCL_TX1_MATRIX__SIZE 0x00000010
+#define NV20TCL_TX2_MATRIX(x) (0x00000740+((x)*4))
+#define NV20TCL_TX2_MATRIX__SIZE 0x00000010
+#define NV20TCL_TX3_MATRIX(x) (0x00000780+((x)*4))
+#define NV20TCL_TX3_MATRIX__SIZE 0x00000010
+#define NV20TCL_TX0_CLIP_PLANE_A(x) (0x00000840+((x)*16))
+#define NV20TCL_TX0_CLIP_PLANE_A__SIZE 0x00000004
+#define NV20TCL_TX0_CLIP_PLANE_B(x) (0x00000844+((x)*16))
+#define NV20TCL_TX0_CLIP_PLANE_B__SIZE 0x00000004
+#define NV20TCL_TX0_CLIP_PLANE_C(x) (0x00000848+((x)*16))
+#define NV20TCL_TX0_CLIP_PLANE_C__SIZE 0x00000004
+#define NV20TCL_TX0_CLIP_PLANE_D(x) (0x0000084c+((x)*16))
+#define NV20TCL_TX0_CLIP_PLANE_D__SIZE 0x00000004
+#define NV20TCL_TX1_CLIP_PLANE_A(x) (0x00000880+((x)*16))
+#define NV20TCL_TX1_CLIP_PLANE_A__SIZE 0x00000004
+#define NV20TCL_TX1_CLIP_PLANE_B(x) (0x00000884+((x)*16))
+#define NV20TCL_TX1_CLIP_PLANE_B__SIZE 0x00000004
+#define NV20TCL_TX1_CLIP_PLANE_C(x) (0x00000888+((x)*16))
+#define NV20TCL_TX1_CLIP_PLANE_C__SIZE 0x00000004
+#define NV20TCL_TX1_CLIP_PLANE_D(x) (0x0000088c+((x)*16))
+#define NV20TCL_TX1_CLIP_PLANE_D__SIZE 0x00000004
+#define NV20TCL_TX2_CLIP_PLANE_A(x) (0x000008c0+((x)*16))
+#define NV20TCL_TX2_CLIP_PLANE_A__SIZE 0x00000004
+#define NV20TCL_TX2_CLIP_PLANE_B(x) (0x000008c4+((x)*16))
+#define NV20TCL_TX2_CLIP_PLANE_B__SIZE 0x00000004
+#define NV20TCL_TX2_CLIP_PLANE_C(x) (0x000008c8+((x)*16))
+#define NV20TCL_TX2_CLIP_PLANE_C__SIZE 0x00000004
+#define NV20TCL_TX2_CLIP_PLANE_D(x) (0x000008cc+((x)*16))
+#define NV20TCL_TX2_CLIP_PLANE_D__SIZE 0x00000004
+#define NV20TCL_TX3_CLIP_PLANE_A(x) (0x00000900+((x)*16))
+#define NV20TCL_TX3_CLIP_PLANE_A__SIZE 0x00000004
+#define NV20TCL_TX3_CLIP_PLANE_B(x) (0x00000904+((x)*16))
+#define NV20TCL_TX3_CLIP_PLANE_B__SIZE 0x00000004
+#define NV20TCL_TX3_CLIP_PLANE_C(x) (0x00000908+((x)*16))
+#define NV20TCL_TX3_CLIP_PLANE_C__SIZE 0x00000004
+#define NV20TCL_TX3_CLIP_PLANE_D(x) (0x0000090c+((x)*16))
+#define NV20TCL_TX3_CLIP_PLANE_D__SIZE 0x00000004
+#define NV20TCL_FOG_EQUATION_CONSTANT 0x000009c0
+#define NV20TCL_FOG_EQUATION_LINEAR 0x000009c4
+#define NV20TCL_FOG_EQUATION_QUADRATIC 0x000009c8
+#define NV20TCL_FRONT_MATERIAL_SHININESS(x) (0x000009e0+((x)*4))
+#define NV20TCL_FRONT_MATERIAL_SHININESS__SIZE 0x00000006
+#define NV20TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x00000a10
+#define NV20TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x00000a14
+#define NV20TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x00000a18
+#define NV20TCL_VIEWPORT_SCALE0_X 0x00000a20
+#define NV20TCL_VIEWPORT_SCALE0_Y 0x00000a24
+#define NV20TCL_VIEWPORT_SCALE0_Z 0x00000a28
+#define NV20TCL_VIEWPORT_SCALE0_W 0x00000a2c
+#define NV20TCL_POINT_PARAMETER(x) (0x00000a30+((x)*4))
+#define NV20TCL_POINT_PARAMETER__SIZE 0x00000008
+#define NV20TCL_RC_CONSTANT_COLOR0(x) (0x00000a60+((x)*4))
+#define NV20TCL_RC_CONSTANT_COLOR0__SIZE 0x00000008
+#define NV20TCL_RC_CONSTANT_COLOR0_B_SHIFT 0
+#define NV20TCL_RC_CONSTANT_COLOR0_B_MASK 0x000000ff
+#define NV20TCL_RC_CONSTANT_COLOR0_G_SHIFT 8
+#define NV20TCL_RC_CONSTANT_COLOR0_G_MASK 0x0000ff00
+#define NV20TCL_RC_CONSTANT_COLOR0_R_SHIFT 16
+#define NV20TCL_RC_CONSTANT_COLOR0_R_MASK 0x00ff0000
+#define NV20TCL_RC_CONSTANT_COLOR0_A_SHIFT 24
+#define NV20TCL_RC_CONSTANT_COLOR0_A_MASK 0xff000000
+#define NV20TCL_RC_CONSTANT_COLOR1(x) (0x00000a80+((x)*4))
+#define NV20TCL_RC_CONSTANT_COLOR1__SIZE 0x00000008
+#define NV20TCL_RC_CONSTANT_COLOR1_B_SHIFT 0
+#define NV20TCL_RC_CONSTANT_COLOR1_B_MASK 0x000000ff
+#define NV20TCL_RC_CONSTANT_COLOR1_G_SHIFT 8
+#define NV20TCL_RC_CONSTANT_COLOR1_G_MASK 0x0000ff00
+#define NV20TCL_RC_CONSTANT_COLOR1_R_SHIFT 16
+#define NV20TCL_RC_CONSTANT_COLOR1_R_MASK 0x00ff0000
+#define NV20TCL_RC_CONSTANT_COLOR1_A_SHIFT 24
+#define NV20TCL_RC_CONSTANT_COLOR1_A_MASK 0xff000000
+#define NV20TCL_RC_OUT_ALPHA(x) (0x00000aa0+((x)*4))
+#define NV20TCL_RC_OUT_ALPHA__SIZE 0x00000008
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_SHIFT 0
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_MASK 0x0000000f
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_ZERO 0x00000000
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_FOG 0x00000003
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE0_ARB 0x00000008
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE1_ARB 0x00000009
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_NV 0x0000000c
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE1_NV 0x0000000d
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_E_TIMES_F_NV 0x0000000f
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_SHIFT 4
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_MASK 0x000000f0
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_ZERO 0x00000000
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_FOG 0x00000030
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE0_ARB 0x00000080
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE1_ARB 0x00000090
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_NV 0x000000c0
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE1_NV 0x000000d0
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_E_TIMES_F_NV 0x000000f0
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_SHIFT 8
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_MASK 0x00000f00
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_ZERO 0x00000000
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_FOG 0x00000300
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE0_ARB 0x00000800
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE1_ARB 0x00000900
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_NV 0x00000c00
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE1_NV 0x00000d00
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00
+#define NV20TCL_RC_OUT_ALPHA_CD_DOT_PRODUCT (1 << 12)
+#define NV20TCL_RC_OUT_ALPHA_AB_DOT_PRODUCT (1 << 13)
+#define NV20TCL_RC_OUT_ALPHA_MUX_SUM (1 << 14)
+#define NV20TCL_RC_OUT_ALPHA_BIAS (1 << 15)
+#define NV20TCL_RC_OUT_ALPHA_BIAS_NONE 0x00000000
+#define NV20TCL_RC_OUT_ALPHA_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000
+#define NV20TCL_RC_OUT_ALPHA_SCALE_SHIFT 17
+#define NV20TCL_RC_OUT_ALPHA_SCALE_MASK 0x00000000
+#define NV20TCL_RC_OUT_ALPHA_SCALE_NONE 0x00000000
+#define NV20TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_TWO_NV 0x00020000
+#define NV20TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_FOUR_NV 0x00040000
+#define NV20TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000
+#define NV20TCL_RC_IN_RGB(x) (0x00000ac0+((x)*4))
+#define NV20TCL_RC_IN_RGB__SIZE 0x00000008
+#define NV20TCL_RC_IN_RGB_D_INPUT_SHIFT 0
+#define NV20TCL_RC_IN_RGB_D_INPUT_MASK 0x0000000f
+#define NV20TCL_RC_IN_RGB_D_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV20TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV20TCL_RC_IN_RGB_D_INPUT_FOG 0x00000003
+#define NV20TCL_RC_IN_RGB_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV20TCL_RC_IN_RGB_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV20TCL_RC_IN_RGB_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV20TCL_RC_IN_RGB_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV20TCL_RC_IN_RGB_D_INPUT_SPARE0_NV 0x0000000c
+#define NV20TCL_RC_IN_RGB_D_INPUT_SPARE1_NV 0x0000000d
+#define NV20TCL_RC_IN_RGB_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV20TCL_RC_IN_RGB_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV20TCL_RC_IN_RGB_D_COMPONENT_USAGE (1 << 4)
+#define NV20TCL_RC_IN_RGB_D_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_IN_RGB_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV20TCL_RC_IN_RGB_D_MAPPING_SHIFT 5
+#define NV20TCL_RC_IN_RGB_D_MAPPING_MASK 0x000000e0
+#define NV20TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV20TCL_RC_IN_RGB_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV20TCL_RC_IN_RGB_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV20TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV20TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV20TCL_RC_IN_RGB_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV20TCL_RC_IN_RGB_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV20TCL_RC_IN_RGB_C_INPUT_SHIFT 8
+#define NV20TCL_RC_IN_RGB_C_INPUT_MASK 0x00000f00
+#define NV20TCL_RC_IN_RGB_C_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV20TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV20TCL_RC_IN_RGB_C_INPUT_FOG 0x00000300
+#define NV20TCL_RC_IN_RGB_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV20TCL_RC_IN_RGB_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV20TCL_RC_IN_RGB_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV20TCL_RC_IN_RGB_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV20TCL_RC_IN_RGB_C_INPUT_SPARE0_NV 0x00000c00
+#define NV20TCL_RC_IN_RGB_C_INPUT_SPARE1_NV 0x00000d00
+#define NV20TCL_RC_IN_RGB_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV20TCL_RC_IN_RGB_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV20TCL_RC_IN_RGB_C_COMPONENT_USAGE (1 << 12)
+#define NV20TCL_RC_IN_RGB_C_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_IN_RGB_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_SHIFT 13
+#define NV20TCL_RC_IN_RGB_C_MAPPING_MASK 0x0000e000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV20TCL_RC_IN_RGB_B_INPUT_SHIFT 16
+#define NV20TCL_RC_IN_RGB_B_INPUT_MASK 0x000f0000
+#define NV20TCL_RC_IN_RGB_B_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV20TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV20TCL_RC_IN_RGB_B_INPUT_FOG 0x00030000
+#define NV20TCL_RC_IN_RGB_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV20TCL_RC_IN_RGB_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV20TCL_RC_IN_RGB_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV20TCL_RC_IN_RGB_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV20TCL_RC_IN_RGB_B_INPUT_SPARE0_NV 0x000c0000
+#define NV20TCL_RC_IN_RGB_B_INPUT_SPARE1_NV 0x000d0000
+#define NV20TCL_RC_IN_RGB_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV20TCL_RC_IN_RGB_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV20TCL_RC_IN_RGB_B_COMPONENT_USAGE (1 << 20)
+#define NV20TCL_RC_IN_RGB_B_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_IN_RGB_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_SHIFT 21
+#define NV20TCL_RC_IN_RGB_B_MAPPING_MASK 0x00e00000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV20TCL_RC_IN_RGB_A_INPUT_SHIFT 24
+#define NV20TCL_RC_IN_RGB_A_INPUT_MASK 0x0f000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_FOG 0x03000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_SPARE0_NV 0x0c000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_SPARE1_NV 0x0d000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV20TCL_RC_IN_RGB_A_COMPONENT_USAGE (1 << 28)
+#define NV20TCL_RC_IN_RGB_A_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_IN_RGB_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_SHIFT 29
+#define NV20TCL_RC_IN_RGB_A_MAPPING_MASK 0xe0000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV20TCL_VIEWPORT_SCALE1_X 0x00000af0
+#define NV20TCL_VIEWPORT_SCALE1_Y 0x00000af4
+#define NV20TCL_VIEWPORT_SCALE1_Z 0x00000af8
+#define NV20TCL_VIEWPORT_SCALE1_W 0x00000afc
+#define NV20TCL_VP_UPLOAD_INST(x) (0x00000b00+((x)*4))
+#define NV20TCL_VP_UPLOAD_INST__SIZE 0x00000004
+#define NV20TCL_VP_UPLOAD_CONST(x) (0x00000b80+((x)*4))
+#define NV20TCL_VP_UPLOAD_CONST__SIZE 0x00000004
+#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_R(x) (0x00000c00+((x)*64))
+#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_R__SIZE 0x00000008
+#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_G(x) (0x00000c04+((x)*64))
+#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_G__SIZE 0x00000008
+#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_B(x) (0x00000c08+((x)*64))
+#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_B__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R(x) (0x00001000+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G(x) (0x00001004+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B(x) (0x00001008+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R(x) (0x0000100c+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G(x) (0x00001010+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B(x) (0x00001014+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R(x) (0x00001018+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G(x) (0x0000101c+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B(x) (0x00001020+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B__SIZE 0x00000008
+#define NV20TCL_LIGHT_HALF_VECTOR_X(x) (0x00001028+((x)*128))
+#define NV20TCL_LIGHT_HALF_VECTOR_X__SIZE 0x00000008
+#define NV20TCL_LIGHT_HALF_VECTOR_Y(x) (0x0000102c+((x)*128))
+#define NV20TCL_LIGHT_HALF_VECTOR_Y__SIZE 0x00000008
+#define NV20TCL_LIGHT_HALF_VECTOR_Z(x) (0x00001030+((x)*128))
+#define NV20TCL_LIGHT_HALF_VECTOR_Z__SIZE 0x00000008
+#define NV20TCL_LIGHT_DIRECTION_X(x) (0x00001034+((x)*128))
+#define NV20TCL_LIGHT_DIRECTION_X__SIZE 0x00000008
+#define NV20TCL_LIGHT_DIRECTION_Y(x) (0x00001038+((x)*128))
+#define NV20TCL_LIGHT_DIRECTION_Y__SIZE 0x00000008
+#define NV20TCL_LIGHT_DIRECTION_Z(x) (0x0000103c+((x)*128))
+#define NV20TCL_LIGHT_DIRECTION_Z__SIZE 0x00000008
+#define NV20TCL_LIGHT_POSITION_X(x) (0x0000105c+((x)*128))
+#define NV20TCL_LIGHT_POSITION_X__SIZE 0x00000008
+#define NV20TCL_LIGHT_POSITION_Y(x) (0x00001060+((x)*128))
+#define NV20TCL_LIGHT_POSITION_Y__SIZE 0x00000008
+#define NV20TCL_LIGHT_POSITION_Z(x) (0x00001064+((x)*128))
+#define NV20TCL_LIGHT_POSITION_Z__SIZE 0x00000008
+#define NV20TCL_LIGHT_CONSTANT_ATTENUATION(x) (0x00001068+((x)*128))
+#define NV20TCL_LIGHT_CONSTANT_ATTENUATION__SIZE 0x00000008
+#define NV20TCL_LIGHT_LINEAR_ATTENUATION(x) (0x0000106c+((x)*128))
+#define NV20TCL_LIGHT_LINEAR_ATTENUATION__SIZE 0x00000008
+#define NV20TCL_LIGHT_QUADRATIC_ATTENUATION(x) (0x00001070+((x)*128))
+#define NV20TCL_LIGHT_QUADRATIC_ATTENUATION__SIZE 0x00000008
+#define NV20TCL_POLYGON_STIPPLE_ENABLE 0x0000147c
+#define NV20TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4))
+#define NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020
+#define NV20TCL_VERTEX_POS_3F_X 0x00001500
+#define NV20TCL_VERTEX_POS_3F_Y 0x00001504
+#define NV20TCL_VERTEX_POS_3F_Z 0x00001508
+#define NV20TCL_VERTEX_POS_4F_X 0x00001518
+#define NV20TCL_VERTEX_POS_4F_Y 0x0000151c
+#define NV20TCL_VERTEX_POS_4F_Z 0x00001520
+#define NV20TCL_VERTEX_POS_3I_XY 0x00001528
+#define NV20TCL_VERTEX_POS_3I_XY_X_SHIFT 0
+#define NV20TCL_VERTEX_POS_3I_XY_X_MASK 0x0000ffff
+#define NV20TCL_VERTEX_POS_3I_XY_Y_SHIFT 16
+#define NV20TCL_VERTEX_POS_3I_XY_Y_MASK 0xffff0000
+#define NV20TCL_VERTEX_POS_3I_Z 0x0000152c
+#define NV20TCL_VERTEX_POS_3I_Z_Z_SHIFT 0
+#define NV20TCL_VERTEX_POS_3I_Z_Z_MASK 0x0000ffff
+#define NV20TCL_VERTEX_NOR_3F_X 0x00001530
+#define NV20TCL_VERTEX_NOR_3F_Y 0x00001534
+#define NV20TCL_VERTEX_NOR_3F_Z 0x00001538
+#define NV20TCL_VERTEX_NOR_3I_XY 0x00001540
+#define NV20TCL_VERTEX_NOR_3I_XY_X_SHIFT 0
+#define NV20TCL_VERTEX_NOR_3I_XY_X_MASK 0x0000ffff
+#define NV20TCL_VERTEX_NOR_3I_XY_Y_SHIFT 16
+#define NV20TCL_VERTEX_NOR_3I_XY_Y_MASK 0xffff0000
+#define NV20TCL_VERTEX_NOR_3I_Z 0x00001544
+#define NV20TCL_VERTEX_NOR_3I_Z_Z_SHIFT 0
+#define NV20TCL_VERTEX_NOR_3I_Z_Z_MASK 0x0000ffff
+#define NV20TCL_VERTEX_COL_4F_X 0x00001550
+#define NV20TCL_VERTEX_COL_4F_Y 0x00001554
+#define NV20TCL_VERTEX_COL_4F_Z 0x00001558
+#define NV20TCL_VERTEX_COL_4F_W 0x0000155c
+#define NV20TCL_VERTEX_COL_3F_X 0x00001560
+#define NV20TCL_VERTEX_COL_3F_Y 0x00001564
+#define NV20TCL_VERTEX_COL_3F_Z 0x00001568
+#define NV20TCL_VERTEX_COL_4I 0x0000156c
+#define NV20TCL_VERTEX_COL_4I_R_SHIFT 0
+#define NV20TCL_VERTEX_COL_4I_R_MASK 0x000000ff
+#define NV20TCL_VERTEX_COL_4I_G_SHIFT 8
+#define NV20TCL_VERTEX_COL_4I_G_MASK 0x0000ff00
+#define NV20TCL_VERTEX_COL_4I_B_SHIFT 16
+#define NV20TCL_VERTEX_COL_4I_B_MASK 0x00ff0000
+#define NV20TCL_VERTEX_COL_4I_A_SHIFT 24
+#define NV20TCL_VERTEX_COL_4I_A_MASK 0xff000000
+#define NV20TCL_VERTEX_COL2_3F_X 0x00001580
+#define NV20TCL_VERTEX_COL2_3F_Y 0x00001584
+#define NV20TCL_VERTEX_COL2_3F_Z 0x00001588
+#define NV20TCL_VERTEX_COL2_4I 0x0000158c
+#define NV20TCL_VERTEX_COL2_4I_R_SHIFT 0
+#define NV20TCL_VERTEX_COL2_4I_R_MASK 0x000000ff
+#define NV20TCL_VERTEX_COL2_4I_G_SHIFT 8
+#define NV20TCL_VERTEX_COL2_4I_G_MASK 0x0000ff00
+#define NV20TCL_VERTEX_COL2_4I_B_SHIFT 16
+#define NV20TCL_VERTEX_COL2_4I_B_MASK 0x00ff0000
+#define NV20TCL_VERTEX_COL2_4I_A_SHIFT 24
+#define NV20TCL_VERTEX_COL2_4I_A_MASK 0xff000000
+#define NV20TCL_VERTEX_TX0_2F_S 0x00001590
+#define NV20TCL_VERTEX_TX0_2F_T 0x00001594
+#define NV20TCL_VERTEX_TX0_2I 0x00001598
+#define NV20TCL_VERTEX_TX0_2I_S_SHIFT 0
+#define NV20TCL_VERTEX_TX0_2I_S_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX0_2I_T_SHIFT 16
+#define NV20TCL_VERTEX_TX0_2I_T_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX0_4F_S 0x000015a0
+#define NV20TCL_VERTEX_TX0_4F_T 0x000015a4
+#define NV20TCL_VERTEX_TX0_4F_R 0x000015a8
+#define NV20TCL_VERTEX_TX0_4F_Q 0x000015ac
+#define NV20TCL_VERTEX_TX0_4I_ST 0x000015b0
+#define NV20TCL_VERTEX_TX0_4I_ST_S_SHIFT 0
+#define NV20TCL_VERTEX_TX0_4I_ST_S_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX0_4I_ST_T_SHIFT 16
+#define NV20TCL_VERTEX_TX0_4I_ST_T_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX0_4I_RQ 0x000015b4
+#define NV20TCL_VERTEX_TX0_4I_RQ_R_SHIFT 0
+#define NV20TCL_VERTEX_TX0_4I_RQ_R_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX0_4I_RQ_Q_SHIFT 16
+#define NV20TCL_VERTEX_TX0_4I_RQ_Q_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX1_2F_S 0x000015b8
+#define NV20TCL_VERTEX_TX1_2F_T 0x000015bc
+#define NV20TCL_VERTEX_TX1_2I 0x000015c0
+#define NV20TCL_VERTEX_TX1_2I_S_SHIFT 0
+#define NV20TCL_VERTEX_TX1_2I_S_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX1_2I_T_SHIFT 16
+#define NV20TCL_VERTEX_TX1_2I_T_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX1_4F_S 0x000015c8
+#define NV20TCL_VERTEX_TX1_4F_T 0x000015cc
+#define NV20TCL_VERTEX_TX1_4F_R 0x000015d0
+#define NV20TCL_VERTEX_TX1_4F_Q 0x000015d4
+#define NV20TCL_VERTEX_TX1_4I_ST 0x000015d8
+#define NV20TCL_VERTEX_TX1_4I_ST_S_SHIFT 0
+#define NV20TCL_VERTEX_TX1_4I_ST_S_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX1_4I_ST_T_SHIFT 16
+#define NV20TCL_VERTEX_TX1_4I_ST_T_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX1_4I_RQ 0x000015dc
+#define NV20TCL_VERTEX_TX1_4I_RQ_R_SHIFT 0
+#define NV20TCL_VERTEX_TX1_4I_RQ_R_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX1_4I_RQ_Q_SHIFT 16
+#define NV20TCL_VERTEX_TX1_4I_RQ_Q_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX2_2F_S 0x000015e0
+#define NV20TCL_VERTEX_TX2_2F_T 0x000015e4
+#define NV20TCL_VERTEX_TX2_2I 0x000015e8
+#define NV20TCL_VERTEX_TX2_2I_S_SHIFT 0
+#define NV20TCL_VERTEX_TX2_2I_S_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX2_2I_T_SHIFT 16
+#define NV20TCL_VERTEX_TX2_2I_T_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX2_4F_S 0x000015f0
+#define NV20TCL_VERTEX_TX2_4F_T 0x000015f4
+#define NV20TCL_VERTEX_TX2_4F_R 0x000015f8
+#define NV20TCL_VERTEX_TX2_4F_Q 0x000015fc
+#define NV20TCL_VERTEX_TX2_4I_ST 0x00001600
+#define NV20TCL_VERTEX_TX2_4I_ST_S_SHIFT 0
+#define NV20TCL_VERTEX_TX2_4I_ST_S_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX2_4I_ST_T_SHIFT 16
+#define NV20TCL_VERTEX_TX2_4I_ST_T_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX2_4I_RQ 0x00001604
+#define NV20TCL_VERTEX_TX2_4I_RQ_R_SHIFT 0
+#define NV20TCL_VERTEX_TX2_4I_RQ_R_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX2_4I_RQ_Q_SHIFT 16
+#define NV20TCL_VERTEX_TX2_4I_RQ_Q_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX3_2F_S 0x00001608
+#define NV20TCL_VERTEX_TX3_2F_T 0x0000160c
+#define NV20TCL_VERTEX_TX3_2I 0x00001610
+#define NV20TCL_VERTEX_TX3_2I_S_SHIFT 0
+#define NV20TCL_VERTEX_TX3_2I_S_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX3_2I_T_SHIFT 16
+#define NV20TCL_VERTEX_TX3_2I_T_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX3_4F_S 0x00001620
+#define NV20TCL_VERTEX_TX3_4F_T 0x00001624
+#define NV20TCL_VERTEX_TX3_4F_R 0x00001628
+#define NV20TCL_VERTEX_TX3_4F_Q 0x0000162c
+#define NV20TCL_VERTEX_TX3_4I_ST 0x00001630
+#define NV20TCL_VERTEX_TX3_4I_ST_S_SHIFT 0
+#define NV20TCL_VERTEX_TX3_4I_ST_S_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX3_4I_ST_T_SHIFT 16
+#define NV20TCL_VERTEX_TX3_4I_ST_T_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX3_4I_RQ 0x00001634
+#define NV20TCL_VERTEX_TX3_4I_RQ_R_SHIFT 0
+#define NV20TCL_VERTEX_TX3_4I_RQ_R_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX3_4I_RQ_Q_SHIFT 16
+#define NV20TCL_VERTEX_TX3_4I_RQ_Q_MASK 0xffff0000
+#define NV20TCL_VERTEX_FOG_1F 0x00001698
+#define NV20TCL_EDGEFLAG_ENABLE 0x000016bc
+#define NV20TCL_VTXBUF_ADDRESS(x) (0x00001720+((x)*4))
+#define NV20TCL_VTXBUF_ADDRESS__SIZE 0x00000010
+#define NV20TCL_VTXBUF_ADDRESS_DMA1 (1 << 31)
+#define NV20TCL_VTXBUF_ADDRESS_OFFSET_SHIFT 0
+#define NV20TCL_VTXBUF_ADDRESS_OFFSET_MASK 0x0fffffff
+#define NV20TCL_VTXFMT(x) (0x00001760+((x)*4))
+#define NV20TCL_VTXFMT__SIZE 0x00000010
+#define NV20TCL_VTXFMT_TYPE_SHIFT 0
+#define NV20TCL_VTXFMT_TYPE_MASK 0x0000000f
+#define NV20TCL_VTXFMT_TYPE_FLOAT 0x00000002
+#define NV20TCL_VTXFMT_TYPE_UBYTE 0x00000004
+#define NV20TCL_VTXFMT_TYPE_USHORT 0x00000005
+#define NV20TCL_VTXFMT_SIZE_SHIFT 4
+#define NV20TCL_VTXFMT_SIZE_MASK 0x000000f0
+#define NV20TCL_VTXFMT_STRIDE_SHIFT 8
+#define NV20TCL_VTXFMT_STRIDE_MASK 0x0000ff00
+#define NV20TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x000017a0
+#define NV20TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x000017a4
+#define NV20TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x000017a8
+#define NV20TCL_COLOR_MATERIAL_BACK_A 0x000017ac
+#define NV20TCL_COLOR_MATERIAL_BACK_R 0x000017b0
+#define NV20TCL_COLOR_MATERIAL_BACK_G 0x000017b4
+#define NV20TCL_COLOR_MATERIAL_BACK_B 0x000017b8
+#define NV20TCL_COLOR_LOGIC_OP_ENABLE 0x000017bc
+#define NV20TCL_COLOR_LOGIC_OP_OP 0x000017c0
+#define NV20TCL_COLOR_LOGIC_OP_OP_CLEAR 0x00001500
+#define NV20TCL_COLOR_LOGIC_OP_OP_AND 0x00001501
+#define NV20TCL_COLOR_LOGIC_OP_OP_AND_REVERSE 0x00001502
+#define NV20TCL_COLOR_LOGIC_OP_OP_COPY 0x00001503
+#define NV20TCL_COLOR_LOGIC_OP_OP_AND_INVERTED 0x00001504
+#define NV20TCL_COLOR_LOGIC_OP_OP_NOOP 0x00001505
+#define NV20TCL_COLOR_LOGIC_OP_OP_XOR 0x00001506
+#define NV20TCL_COLOR_LOGIC_OP_OP_OR 0x00001507
+#define NV20TCL_COLOR_LOGIC_OP_OP_NOR 0x00001508
+#define NV20TCL_COLOR_LOGIC_OP_OP_EQUIV 0x00001509
+#define NV20TCL_COLOR_LOGIC_OP_OP_INVERT 0x0000150a
+#define NV20TCL_COLOR_LOGIC_OP_OP_OR_REVERSE 0x0000150b
+#define NV20TCL_COLOR_LOGIC_OP_OP_COPY_INVERTED 0x0000150c
+#define NV20TCL_COLOR_LOGIC_OP_OP_OR_INVERTED 0x0000150d
+#define NV20TCL_COLOR_LOGIC_OP_OP_NAND 0x0000150e
+#define NV20TCL_COLOR_LOGIC_OP_OP_SET 0x0000150f
+#define NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE 0x000017c4
+#define NV20TCL_TX_SHADER_CULL_MODE 0x000017f8
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_S (1 << 0)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_S_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_S_LESS 0x00000001
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_T (1 << 1)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_T_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_T_LESS 0x00000002
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_R (1 << 2)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_R_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_R_LESS 0x00000004
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_Q (1 << 3)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_Q_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_Q_LESS 0x00000008
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_S (1 << 4)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_S_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_S_LESS 0x00000010
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_T (1 << 5)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_T_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_T_LESS 0x00000020
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_R (1 << 6)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_R_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_R_LESS 0x00000040
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_Q (1 << 7)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_Q_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_Q_LESS 0x00000080
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_S (1 << 8)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_S_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_S_LESS 0x00000100
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_T (1 << 9)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_T_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_T_LESS 0x00000200
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_R (1 << 10)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_R_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_R_LESS 0x00000400
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_Q (1 << 11)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_Q_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_Q_LESS 0x00000800
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_S (1 << 12)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_S_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_S_LESS 0x00001000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_T (1 << 13)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_T_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_T_LESS 0x00002000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_R (1 << 14)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_R_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_R_LESS 0x00004000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_Q (1 << 15)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_Q_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_Q_LESS 0x00008000
+#define NV20TCL_VERTEX_BEGIN_END 0x000017fc
+#define NV20TCL_VERTEX_BEGIN_END_STOP 0x00000000
+#define NV20TCL_VERTEX_BEGIN_END_POINTS 0x00000001
+#define NV20TCL_VERTEX_BEGIN_END_LINES 0x00000002
+#define NV20TCL_VERTEX_BEGIN_END_LINE_LOOP 0x00000003
+#define NV20TCL_VERTEX_BEGIN_END_LINE_STRIP 0x00000004
+#define NV20TCL_VERTEX_BEGIN_END_TRIANGLES 0x00000005
+#define NV20TCL_VERTEX_BEGIN_END_TRIANGLE_STRIP 0x00000006
+#define NV20TCL_VERTEX_BEGIN_END_TRIANGLE_FAN 0x00000007
+#define NV20TCL_VERTEX_BEGIN_END_QUADS 0x00000008
+#define NV20TCL_VERTEX_BEGIN_END_QUAD_STRIP 0x00000009
+#define NV20TCL_VERTEX_BEGIN_END_POLYGON 0x0000000a
+#define NV20TCL_VB_ELEMENT_U16 0x00001800
+#define NV20TCL_VB_ELEMENT_U16_I0_SHIFT 0
+#define NV20TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff
+#define NV20TCL_VB_ELEMENT_U16_I1_SHIFT 16
+#define NV20TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000
+#define NV20TCL_VB_VERTEX_BATCH 0x00001810
+#define NV20TCL_VB_VERTEX_BATCH_OFFSET_SHIFT 0
+#define NV20TCL_VB_VERTEX_BATCH_OFFSET_MASK 0x00ffffff
+#define NV20TCL_VB_VERTEX_BATCH_COUNT_SHIFT 24
+#define NV20TCL_VB_VERTEX_BATCH_COUNT_MASK 0xff000000
+#define NV20TCL_VERTEX_DATA 0x00001818
+#define NV20TCL_TX_SHADER_CONST_EYE_X 0x0000181c
+#define NV20TCL_TX_SHADER_CONST_EYE_Y 0x00001820
+#define NV20TCL_TX_SHADER_CONST_EYE_Z 0x00001824
+#define NV20TCL_VTX_ATTR_4F_X(x) (0x00001a00+((x)*16))
+#define NV20TCL_VTX_ATTR_4F_X__SIZE 0x00000010
+#define NV20TCL_VTX_ATTR_4F_Y(x) (0x00001a04+((x)*16))
+#define NV20TCL_VTX_ATTR_4F_Y__SIZE 0x00000010
+#define NV20TCL_VTX_ATTR_4F_Z(x) (0x00001a08+((x)*16))
+#define NV20TCL_VTX_ATTR_4F_Z__SIZE 0x00000010
+#define NV20TCL_VTX_ATTR_4F_W(x) (0x00001a0c+((x)*16))
+#define NV20TCL_VTX_ATTR_4F_W__SIZE 0x00000010
+#define NV20TCL_TX_OFFSET(x) (0x00001b00+((x)*64))
+#define NV20TCL_TX_OFFSET__SIZE 0x00000004
+#define NV20TCL_TX_FORMAT(x) (0x00001b04+((x)*64))
+#define NV20TCL_TX_FORMAT__SIZE 0x00000004
+#define NV20TCL_TX_FORMAT_DMA0 (1 << 0)
+#define NV20TCL_TX_FORMAT_DMA1 (1 << 1)
+#define NV20TCL_TX_FORMAT_CUBIC (1 << 2)
+#define NV20TCL_TX_FORMAT_NO_BORDER (1 << 3)
+#define NV20TCL_TX_FORMAT_DIMS_SHIFT 4
+#define NV20TCL_TX_FORMAT_DIMS_MASK 0x000000f0
+#define NV20TCL_TX_FORMAT_DIMS_1D 0x00000010
+#define NV20TCL_TX_FORMAT_DIMS_2D 0x00000020
+#define NV20TCL_TX_FORMAT_DIMS_3D 0x00000030
+#define NV20TCL_TX_FORMAT_FORMAT_SHIFT 8
+#define NV20TCL_TX_FORMAT_FORMAT_MASK 0x0000ff00
+#define NV20TCL_TX_FORMAT_FORMAT_L8 0x00000000
+#define NV20TCL_TX_FORMAT_FORMAT_A8 0x00000100
+#define NV20TCL_TX_FORMAT_FORMAT_A1R5G5B5 0x00000200
+#define NV20TCL_TX_FORMAT_FORMAT_A8_RECT 0x00000300
+#define NV20TCL_TX_FORMAT_FORMAT_A4R4G4B4 0x00000400
+#define NV20TCL_TX_FORMAT_FORMAT_R5G6B5 0x00000500
+#define NV20TCL_TX_FORMAT_FORMAT_A8R8G8B8 0x00000600
+#define NV20TCL_TX_FORMAT_FORMAT_X8R8G8B8 0x00000700
+#define NV20TCL_TX_FORMAT_FORMAT_INDEX8 0x00000b00
+#define NV20TCL_TX_FORMAT_FORMAT_DXT1 0x00000c00
+#define NV20TCL_TX_FORMAT_FORMAT_DXT3 0x00000e00
+#define NV20TCL_TX_FORMAT_FORMAT_DXT5 0x00000f00
+#define NV20TCL_TX_FORMAT_FORMAT_A1R5G5B5_RECT 0x00001000
+#define NV20TCL_TX_FORMAT_FORMAT_R5G6B5_RECT 0x00001100
+#define NV20TCL_TX_FORMAT_FORMAT_A8R8G8B8_RECT 0x00001200
+#define NV20TCL_TX_FORMAT_FORMAT_L8_RECT 0x00001300
+#define NV20TCL_TX_FORMAT_FORMAT_A8L8 0x00001a00
+#define NV20TCL_TX_FORMAT_FORMAT_A8_RECT2 0x00001b00
+#define NV20TCL_TX_FORMAT_FORMAT_A4R4G4B4_RECT 0x00001d00
+#define NV20TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00001e00
+#define NV20TCL_TX_FORMAT_FORMAT_L8A8_RECT 0x00002000
+#define NV20TCL_TX_FORMAT_FORMAT_DSDT 0x00002800
+#define NV20TCL_TX_FORMAT_FORMAT_A16 0x00003200
+#define NV20TCL_TX_FORMAT_FORMAT_HILO16 0x00003300
+#define NV20TCL_TX_FORMAT_FORMAT_A16_RECT 0x00003500
+#define NV20TCL_TX_FORMAT_FORMAT_HILO16_RECT 0x00003600
+#define NV20TCL_TX_FORMAT_FORMAT_HILO8 0x00004400
+#define NV20TCL_TX_FORMAT_FORMAT_SIGNED_HILO8 0x00004500
+#define NV20TCL_TX_FORMAT_FORMAT_HILO8_RECT 0x00004600
+#define NV20TCL_TX_FORMAT_FORMAT_SIGNED_HILO8_RECT 0x00004700
+#define NV20TCL_TX_FORMAT_FORMAT_FLOAT_RGBA16_NV 0x00004a00
+#define NV20TCL_TX_FORMAT_FORMAT_FLOAT_RGBA32_NV 0x00004b00
+#define NV20TCL_TX_FORMAT_FORMAT_FLOAT_R32_NV 0x00004c00
+#define NV20TCL_TX_FORMAT_MIPMAP (1 << 19)
+#define NV20TCL_TX_FORMAT_BASE_SIZE_U_SHIFT 20
+#define NV20TCL_TX_FORMAT_BASE_SIZE_U_MASK 0x00f00000
+#define NV20TCL_TX_FORMAT_BASE_SIZE_V_SHIFT 24
+#define NV20TCL_TX_FORMAT_BASE_SIZE_V_MASK 0x0f000000
+#define NV20TCL_TX_FORMAT_BASE_SIZE_W_SHIFT 28
+#define NV20TCL_TX_FORMAT_BASE_SIZE_W_MASK 0xf0000000
+#define NV20TCL_TX_WRAP(x) (0x00001b08+((x)*64))
+#define NV20TCL_TX_WRAP__SIZE 0x00000004
+#define NV20TCL_TX_WRAP_S_SHIFT 0
+#define NV20TCL_TX_WRAP_S_MASK 0x000000ff
+#define NV20TCL_TX_WRAP_S_REPEAT 0x00000001
+#define NV20TCL_TX_WRAP_S_MIRRORED_REPEAT 0x00000002
+#define NV20TCL_TX_WRAP_S_CLAMP_TO_EDGE 0x00000003
+#define NV20TCL_TX_WRAP_S_CLAMP_TO_BORDER 0x00000004
+#define NV20TCL_TX_WRAP_S_CLAMP 0x00000005
+#define NV20TCL_TX_WRAP_T_SHIFT 8
+#define NV20TCL_TX_WRAP_T_MASK 0x00000f00
+#define NV20TCL_TX_WRAP_T_REPEAT 0x00000100
+#define NV20TCL_TX_WRAP_T_MIRRORED_REPEAT 0x00000200
+#define NV20TCL_TX_WRAP_T_CLAMP_TO_EDGE 0x00000300
+#define NV20TCL_TX_WRAP_T_CLAMP_TO_BORDER 0x00000400
+#define NV20TCL_TX_WRAP_T_CLAMP 0x00000500
+#define NV20TCL_TX_WRAP_R_SHIFT 16
+#define NV20TCL_TX_WRAP_R_MASK 0x000f0000
+#define NV20TCL_TX_WRAP_R_REPEAT 0x00010000
+#define NV20TCL_TX_WRAP_R_MIRRORED_REPEAT 0x00020000
+#define NV20TCL_TX_WRAP_R_CLAMP_TO_EDGE 0x00030000
+#define NV20TCL_TX_WRAP_R_CLAMP_TO_BORDER 0x00040000
+#define NV20TCL_TX_WRAP_R_CLAMP 0x00050000
+#define NV20TCL_TX_ENABLE(x) (0x00001b0c+((x)*64))
+#define NV20TCL_TX_ENABLE__SIZE 0x00000004
+#define NV20TCL_TX_ENABLE_ANISO_SHIFT 4
+#define NV20TCL_TX_ENABLE_ANISO_MASK 0x00000030
+#define NV20TCL_TX_ENABLE_ANISO_NONE 0x00000000
+#define NV20TCL_TX_ENABLE_ANISO_2X 0x00000010
+#define NV20TCL_TX_ENABLE_ANISO_4X 0x00000020
+#define NV20TCL_TX_ENABLE_ANISO_8X 0x00000030
+#define NV20TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT 14
+#define NV20TCL_TX_ENABLE_MIPMAP_MAX_LOD_MASK 0x0003c000
+#define NV20TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT 26
+#define NV20TCL_TX_ENABLE_MIPMAP_MIN_LOD_MASK 0x3c000000
+#define NV20TCL_TX_ENABLE_ENABLE (1 << 30)
+#define NV20TCL_TX_SWIZZLE(x) (0x00001b10+((x)*64))
+#define NV20TCL_TX_SWIZZLE__SIZE 0x00000004
+#define NV20TCL_TX_SWIZZLE_RECT_PITCH_SHIFT 16
+#define NV20TCL_TX_SWIZZLE_RECT_PITCH_MASK 0xffff0000
+#define NV20TCL_TX_FILTER(x) (0x00001b14+((x)*64))
+#define NV20TCL_TX_FILTER__SIZE 0x00000004
+#define NV20TCL_TX_FILTER_LOD_BIAS_SHIFT 8
+#define NV20TCL_TX_FILTER_LOD_BIAS_MASK 0x00000f00
+#define NV20TCL_TX_FILTER_MINIFY_SHIFT 16
+#define NV20TCL_TX_FILTER_MINIFY_MASK 0x000f0000
+#define NV20TCL_TX_FILTER_MINIFY_NEAREST 0x00010000
+#define NV20TCL_TX_FILTER_MINIFY_LINEAR 0x00020000
+#define NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x00030000
+#define NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x00040000
+#define NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x00050000
+#define NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x00060000
+#define NV20TCL_TX_FILTER_MAGNIFY_SHIFT 24
+#define NV20TCL_TX_FILTER_MAGNIFY_MASK 0x0f000000
+#define NV20TCL_TX_FILTER_MAGNIFY_NEAREST 0x01000000
+#define NV20TCL_TX_FILTER_MAGNIFY_LINEAR 0x02000000
+#define NV20TCL_TX_NPOT_SIZE(x) (0x00001b1c+((x)*64))
+#define NV20TCL_TX_NPOT_SIZE__SIZE 0x00000004
+#define NV20TCL_TX_NPOT_SIZE_H_SHIFT 0
+#define NV20TCL_TX_NPOT_SIZE_H_MASK 0x0000ffff
+#define NV20TCL_TX_NPOT_SIZE_W_SHIFT 16
+#define NV20TCL_TX_NPOT_SIZE_W_MASK 0xffff0000
+#define NV20TCL_TX_PALETTE_OFFSET(x) (0x00001b20+((x)*64))
+#define NV20TCL_TX_PALETTE_OFFSET__SIZE 0x00000004
+#define NV20TCL_TX_BORDER_COLOR(x) (0x00001b24+((x)*64))
+#define NV20TCL_TX_BORDER_COLOR__SIZE 0x00000004
+#define NV20TCL_TX_BORDER_COLOR_B_SHIFT 0
+#define NV20TCL_TX_BORDER_COLOR_B_MASK 0x000000ff
+#define NV20TCL_TX_BORDER_COLOR_G_SHIFT 8
+#define NV20TCL_TX_BORDER_COLOR_G_MASK 0x0000ff00
+#define NV20TCL_TX_BORDER_COLOR_R_SHIFT 16
+#define NV20TCL_TX_BORDER_COLOR_R_MASK 0x00ff0000
+#define NV20TCL_TX_BORDER_COLOR_A_SHIFT 24
+#define NV20TCL_TX_BORDER_COLOR_A_MASK 0xff000000
+#define NV20TCL_TX_SHADER_OFFSET_MATRIX00(x) (0x00001b28+((x)*64))
+#define NV20TCL_TX_SHADER_OFFSET_MATRIX00__SIZE 0x00000004
+#define NV20TCL_TX_SHADER_OFFSET_MATRIX01(x) (0x00001b2c+((x)*64))
+#define NV20TCL_TX_SHADER_OFFSET_MATRIX01__SIZE 0x00000004
+#define NV20TCL_TX_SHADER_OFFSET_MATRIX11(x) (0x00001b30+((x)*64))
+#define NV20TCL_TX_SHADER_OFFSET_MATRIX11__SIZE 0x00000004
+#define NV20TCL_TX_SHADER_OFFSET_MATRIX10(x) (0x00001b34+((x)*64))
+#define NV20TCL_TX_SHADER_OFFSET_MATRIX10__SIZE 0x00000004
+#define NV20TCL_DEPTH_UNK17D8 0x00001d78
+#define NV20TCL_DEPTH_UNK17D8_CLAMP_SHIFT 4
+#define NV20TCL_DEPTH_UNK17D8_CLAMP_MASK 0x000000f0
+#define NV20TCL_MULTISAMPLE_CONTROL 0x00001d7c
+#define NV20TCL_CLEAR_DEPTH_VALUE 0x00001d8c
+#define NV20TCL_CLEAR_VALUE 0x00001d90
+#define NV20TCL_CLEAR_BUFFERS 0x00001d94
+#define NV20TCL_CLEAR_BUFFERS_COLOR_A (1 << 7)
+#define NV20TCL_CLEAR_BUFFERS_COLOR_B (1 << 6)
+#define NV20TCL_CLEAR_BUFFERS_COLOR_G (1 << 5)
+#define NV20TCL_CLEAR_BUFFERS_COLOR_R (1 << 4)
+#define NV20TCL_CLEAR_BUFFERS_STENCIL (1 << 1)
+#define NV20TCL_CLEAR_BUFFERS_DEPTH (1 << 0)
+#define NV20TCL_RC_COLOR0 0x00001e20
+#define NV20TCL_RC_COLOR0_B_SHIFT 0
+#define NV20TCL_RC_COLOR0_B_MASK 0x000000ff
+#define NV20TCL_RC_COLOR0_G_SHIFT 8
+#define NV20TCL_RC_COLOR0_G_MASK 0x0000ff00
+#define NV20TCL_RC_COLOR0_R_SHIFT 16
+#define NV20TCL_RC_COLOR0_R_MASK 0x00ff0000
+#define NV20TCL_RC_COLOR0_A_SHIFT 24
+#define NV20TCL_RC_COLOR0_A_MASK 0xff000000
+#define NV20TCL_RC_COLOR1 0x00001e24
+#define NV20TCL_RC_COLOR1_B_SHIFT 0
+#define NV20TCL_RC_COLOR1_B_MASK 0x000000ff
+#define NV20TCL_RC_COLOR1_G_SHIFT 8
+#define NV20TCL_RC_COLOR1_G_MASK 0x0000ff00
+#define NV20TCL_RC_COLOR1_R_SHIFT 16
+#define NV20TCL_RC_COLOR1_R_MASK 0x00ff0000
+#define NV20TCL_RC_COLOR1_A_SHIFT 24
+#define NV20TCL_RC_COLOR1_A_MASK 0xff000000
+#define NV20TCL_BACK_MATERIAL_SHININESS(x) (0x00001e28+((x)*4))
+#define NV20TCL_BACK_MATERIAL_SHININESS__SIZE 0x00000006
+#define NV20TCL_RC_OUT_RGB(x) (0x00001e40+((x)*4))
+#define NV20TCL_RC_OUT_RGB__SIZE 0x00000008
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_SHIFT 0
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_MASK 0x0000000f
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_ZERO 0x00000000
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_FOG 0x00000003
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE0_ARB 0x00000008
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE1_ARB 0x00000009
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_NV 0x0000000c
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_SPARE1_NV 0x0000000d
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_E_TIMES_F_NV 0x0000000f
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_SHIFT 4
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_MASK 0x000000f0
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_ZERO 0x00000000
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_FOG 0x00000030
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE0_ARB 0x00000080
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE1_ARB 0x00000090
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_NV 0x000000c0
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_SPARE1_NV 0x000000d0
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_E_TIMES_F_NV 0x000000f0
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_SHIFT 8
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_MASK 0x00000f00
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_ZERO 0x00000000
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_FOG 0x00000300
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE0_ARB 0x00000800
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE1_ARB 0x00000900
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_NV 0x00000c00
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE1_NV 0x00000d00
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00
+#define NV20TCL_RC_OUT_RGB_CD_DOT_PRODUCT (1 << 12)
+#define NV20TCL_RC_OUT_RGB_AB_DOT_PRODUCT (1 << 13)
+#define NV20TCL_RC_OUT_RGB_MUX_SUM (1 << 14)
+#define NV20TCL_RC_OUT_RGB_BIAS (1 << 15)
+#define NV20TCL_RC_OUT_RGB_BIAS_NONE 0x00000000
+#define NV20TCL_RC_OUT_RGB_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000
+#define NV20TCL_RC_OUT_RGB_SCALE_SHIFT 17
+#define NV20TCL_RC_OUT_RGB_SCALE_MASK 0x00000000
+#define NV20TCL_RC_OUT_RGB_SCALE_NONE 0x00000000
+#define NV20TCL_RC_OUT_RGB_SCALE_SCALE_BY_TWO_NV 0x00020000
+#define NV20TCL_RC_OUT_RGB_SCALE_SCALE_BY_FOUR_NV 0x00040000
+#define NV20TCL_RC_OUT_RGB_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000
+#define NV20TCL_RC_ENABLE 0x00001e60
+#define NV20TCL_RC_ENABLE_NUM_COMBINERS_SHIFT 0
+#define NV20TCL_RC_ENABLE_NUM_COMBINERS_MASK 0x0000000f
+#define NV20TCL_TX_RCOMP 0x00001e6c
+#define NV20TCL_TX_RCOMP_NEVER 0x00000000
+#define NV20TCL_TX_RCOMP_GREATER 0x00000001
+#define NV20TCL_TX_RCOMP_EQUAL 0x00000002
+#define NV20TCL_TX_RCOMP_GEQUAL 0x00000003
+#define NV20TCL_TX_RCOMP_LESS 0x00000004
+#define NV20TCL_TX_RCOMP_NOTEQUAL 0x00000005
+#define NV20TCL_TX_RCOMP_LEQUAL 0x00000006
+#define NV20TCL_TX_RCOMP_ALWAYS 0x00000007
+#define NV20TCL_TX_SHADER_OP 0x00001e70
+#define NV20TCL_TX_SHADER_OP_TX0_SHIFT 0
+#define NV20TCL_TX_SHADER_OP_TX0_MASK 0x0000001f
+#define NV20TCL_TX_SHADER_OP_TX0_NONE 0x00000000
+#define NV20TCL_TX_SHADER_OP_TX0_TEXTURE_2D 0x00000001
+#define NV20TCL_TX_SHADER_OP_TX0_PASS_THROUGH 0x00000004
+#define NV20TCL_TX_SHADER_OP_TX0_CULL_FRAGMENT 0x00000005
+#define NV20TCL_TX_SHADER_OP_TX0_OFFSET_TEXTURE_2D 0x00000006
+#define NV20TCL_TX_SHADER_OP_TX0_DOT_PRODUCT_TEXTURE_2D 0x00000009
+#define NV20TCL_TX_SHADER_OP_TX0_DOT_PRODUCT_DEPTH_REPLACE 0x0000000a
+#define NV20TCL_TX_SHADER_OP_TX0_DEPENDANT_AR_TEXTURE_2D 0x0000000f
+#define NV20TCL_TX_SHADER_OP_TX0_DEPENDANT_GB_TEXTURE_2D 0x00000010
+#define NV20TCL_TX_SHADER_OP_TX0_DOT_PRODUCT 0x00000011
+#define NV20TCL_TX_SHADER_OP_TX1_SHIFT 5
+#define NV20TCL_TX_SHADER_OP_TX1_MASK 0x000003e0
+#define NV20TCL_TX_SHADER_OP_TX1_NONE 0x00000000
+#define NV20TCL_TX_SHADER_OP_TX1_TEXTURE_2D 0x00000020
+#define NV20TCL_TX_SHADER_OP_TX1_PASS_THROUGH 0x00000080
+#define NV20TCL_TX_SHADER_OP_TX1_CULL_FRAGMENT 0x000000a0
+#define NV20TCL_TX_SHADER_OP_TX1_OFFSET_TEXTURE_2D 0x000000c0
+#define NV20TCL_TX_SHADER_OP_TX1_DOT_PRODUCT_TEXTURE_2D 0x00000120
+#define NV20TCL_TX_SHADER_OP_TX1_DOT_PRODUCT_DEPTH_REPLACE 0x00000140
+#define NV20TCL_TX_SHADER_OP_TX1_DEPENDANT_AR_TEXTURE_2D 0x000001e0
+#define NV20TCL_TX_SHADER_OP_TX1_DEPENDANT_GB_TEXTURE_2D 0x00000200
+#define NV20TCL_TX_SHADER_OP_TX1_DOT_PRODUCT 0x00000220
+#define NV20TCL_TX_SHADER_OP_TX2_SHIFT 10
+#define NV20TCL_TX_SHADER_OP_TX2_MASK 0x00007c00
+#define NV20TCL_TX_SHADER_OP_TX2_NONE 0x00000000
+#define NV20TCL_TX_SHADER_OP_TX2_TEXTURE_2D 0x00000400
+#define NV20TCL_TX_SHADER_OP_TX2_PASS_THROUGH 0x00001000
+#define NV20TCL_TX_SHADER_OP_TX2_CULL_FRAGMENT 0x00001400
+#define NV20TCL_TX_SHADER_OP_TX2_OFFSET_TEXTURE_2D 0x00001800
+#define NV20TCL_TX_SHADER_OP_TX2_DOT_PRODUCT_TEXTURE_2D 0x00002400
+#define NV20TCL_TX_SHADER_OP_TX2_DOT_PRODUCT_DEPTH_REPLACE 0x00002800
+#define NV20TCL_TX_SHADER_OP_TX2_DEPENDANT_AR_TEXTURE_2D 0x00003c00
+#define NV20TCL_TX_SHADER_OP_TX2_DEPENDANT_GB_TEXTURE_2D 0x00004000
+#define NV20TCL_TX_SHADER_OP_TX2_DOT_PRODUCT 0x00004400
+#define NV20TCL_TX_SHADER_OP_TX3_SHIFT 15
+#define NV20TCL_TX_SHADER_OP_TX3_MASK 0x000f8000
+#define NV20TCL_TX_SHADER_OP_TX3_NONE 0x00000000
+#define NV20TCL_TX_SHADER_OP_TX3_TEXTURE_2D 0x00008000
+#define NV20TCL_TX_SHADER_OP_TX3_PASS_THROUGH 0x00020000
+#define NV20TCL_TX_SHADER_OP_TX3_CULL_FRAGMENT 0x00028000
+#define NV20TCL_TX_SHADER_OP_TX3_OFFSET_TEXTURE_2D 0x00030000
+#define NV20TCL_TX_SHADER_OP_TX3_DOT_PRODUCT_TEXTURE_2D 0x00048000
+#define NV20TCL_TX_SHADER_OP_TX3_DOT_PRODUCT_DEPTH_REPLACE 0x00050000
+#define NV20TCL_TX_SHADER_OP_TX3_DEPENDANT_AR_TEXTURE_2D 0x00078000
+#define NV20TCL_TX_SHADER_OP_TX3_DEPENDANT_GB_TEXTURE_2D 0x00080000
+#define NV20TCL_TX_SHADER_OP_TX3_DOT_PRODUCT 0x00088000
+#define NV20TCL_TX_SHADER_DOTMAPPING 0x00001e74
+#define NV20TCL_TX_SHADER_DOTMAPPING_TX0_SHIFT 0
+#define NV20TCL_TX_SHADER_DOTMAPPING_TX0_MASK 0x0000000f
+#define NV20TCL_TX_SHADER_DOTMAPPING_TX1_SHIFT 4
+#define NV20TCL_TX_SHADER_DOTMAPPING_TX1_MASK 0x000000f0
+#define NV20TCL_TX_SHADER_DOTMAPPING_TX2_SHIFT 8
+#define NV20TCL_TX_SHADER_DOTMAPPING_TX2_MASK 0x00000f00
+#define NV20TCL_TX_SHADER_DOTMAPPING_TX3_SHIFT 12
+#define NV20TCL_TX_SHADER_DOTMAPPING_TX3_MASK 0x0000f000
+#define NV20TCL_TX_SHADER_PREVIOUS 0x00001e78
+#define NV20TCL_TX_SHADER_PREVIOUS_TX0_SHIFT 8
+#define NV20TCL_TX_SHADER_PREVIOUS_TX0_MASK 0x00000f00
+#define NV20TCL_TX_SHADER_PREVIOUS_TX1_SHIFT 12
+#define NV20TCL_TX_SHADER_PREVIOUS_TX1_MASK 0x0000f000
+#define NV20TCL_TX_SHADER_PREVIOUS_TX2_SHIFT 16
+#define NV20TCL_TX_SHADER_PREVIOUS_TX2_MASK 0x00030000
+#define NV20TCL_TX_SHADER_PREVIOUS_TX3_SHIFT 20
+#define NV20TCL_TX_SHADER_PREVIOUS_TX3_MASK 0x00300000
+#define NV20TCL_ENGINE 0x00001e94
+#define NV20TCL_ENGINE_VP (1 << 1)
+#define NV20TCL_ENGINE_FIXED (1 << 2)
+#define NV20TCL_VP_UPLOAD_FROM_ID 0x00001e9c
+#define NV20TCL_VP_START_FROM_ID 0x00001ea0
+#define NV20TCL_VP_UPLOAD_CONST_ID 0x00001ea4
+#define NV20TCL_VIEWPORT_TRANSLATE_X 0x00001f00
+#define NV20TCL_VIEWPORT_TRANSLATE_Y 0x00001f04
+#define NV20TCL_VIEWPORT_TRANSLATE_Z 0x00001f08
+#define NV20TCL_VIEWPORT_TRANSLATE_W 0x00001f0c
+
+
+#define NV17TCL 0x00000099
+
+#define NV17TCL_DMA_IN_MEMORY4 0x000001ac
+#define NV17TCL_DMA_IN_MEMORY5 0x000001b0
+#define NV17TCL_COLOR_MASK_ENABLE 0x000002bc
+#define NV17TCL_LMA_DEPTH_BUFFER_PITCH 0x00000d5c
+#define NV17TCL_LMA_DEPTH_BUFFER_OFFSET 0x00000d60
+#define NV17TCL_LMA_DEPTH_FILL_VALUE 0x00000d68
+#define NV17TCL_LMA_DEPTH_BUFFER_CLEAR 0x00000d6c
+#define NV17TCL_LMA_DEPTH_ENABLE 0x00001658
+
+
+#define NV20_SWIZZLED_SURFACE 0x0000009e
+
+
+
+#define NV12_IMAGE_BLIT 0x0000009f
+
+
+
+#define NV30_CONTEXT_SURFACES_2D 0x00000362
+
+
+
+#define NV30_STRETCHED_IMAGE_FROM_CPU 0x00000366
+
+
+
+#define NV30_TEXTURE_FROM_CPU 0x0000037b
+
+
+
+#define NV30_SCALED_IMAGE_FROM_MEMORY 0x00000389
+
+
+
+#define NV30_IMAGE_FROM_CPU 0x0000038a
+
+
+
+#define NV30TCL 0x00000397
+
+
+
+#define NV30_SWIZZLED_SURFACE 0x0000039e
+
+
+
+#define NV35TCL 0x00000497
+
+
+
+#define NV25TCL 0x00000597
+
+#define NV25TCL_DMA_IN_MEMORY4 0x0000019c
+#define NV25TCL_DMA_IN_MEMORY5 0x000001a0
+#define NV25TCL_DMA_IN_MEMORY8 0x000001ac
+#define NV25TCL_DMA_IN_MEMORY9 0x000001b0
+
+
+#define NV34TCL 0x00000697
+
+#define NV34TCL_NOP 0x00000100
+#define NV34TCL_NOTIFY 0x00000104
+#define NV34TCL_DMA_NOTIFY 0x00000180
+#define NV34TCL_DMA_TEXTURE0 0x00000184
+#define NV34TCL_DMA_TEXTURE1 0x00000188
+#define NV34TCL_DMA_COLOR1 0x0000018c
+#define NV34TCL_DMA_COLOR0 0x00000194
+#define NV34TCL_DMA_ZETA 0x00000198
+#define NV34TCL_DMA_VTXBUF0 0x0000019c
+#define NV34TCL_DMA_VTXBUF1 0x000001a0
+#define NV34TCL_DMA_FENCE 0x000001a4
+#define NV34TCL_DMA_QUERY 0x000001a8
+#define NV34TCL_DMA_IN_MEMORY7 0x000001ac
+#define NV34TCL_DMA_IN_MEMORY8 0x000001b0
+#define NV34TCL_RT_HORIZ 0x00000200
+#define NV34TCL_RT_HORIZ_X_SHIFT 0
+#define NV34TCL_RT_HORIZ_X_MASK 0x0000ffff
+#define NV34TCL_RT_HORIZ_W_SHIFT 16
+#define NV34TCL_RT_HORIZ_W_MASK 0xffff0000
+#define NV34TCL_RT_VERT 0x00000204
+#define NV34TCL_RT_VERT_Y_SHIFT 0
+#define NV34TCL_RT_VERT_Y_MASK 0x0000ffff
+#define NV34TCL_RT_VERT_H_SHIFT 16
+#define NV34TCL_RT_VERT_H_MASK 0xffff0000
+#define NV34TCL_RT_FORMAT 0x00000208
+#define NV34TCL_RT_FORMAT_TYPE_SHIFT 8
+#define NV34TCL_RT_FORMAT_TYPE_MASK 0x00000f00
+#define NV34TCL_RT_FORMAT_TYPE_LINEAR 0x00000100
+#define NV34TCL_RT_FORMAT_TYPE_SWIZZLED 0x00000200
+#define NV34TCL_RT_FORMAT_ZETA_SHIFT 5
+#define NV34TCL_RT_FORMAT_ZETA_MASK 0x000000e0
+#define NV34TCL_RT_FORMAT_ZETA_Z16 0x00000020
+#define NV34TCL_RT_FORMAT_ZETA_Z24S8 0x00000040
+#define NV34TCL_RT_FORMAT_COLOR_SHIFT 0
+#define NV34TCL_RT_FORMAT_COLOR_MASK 0x0000001f
+#define NV34TCL_RT_FORMAT_COLOR_R5G6B5 0x00000003
+#define NV34TCL_RT_FORMAT_COLOR_X8R8G8B8 0x00000005
+#define NV34TCL_RT_FORMAT_COLOR_A8R8G8B8 0x00000008
+#define NV34TCL_RT_FORMAT_COLOR_B8 0x00000009
+#define NV34TCL_RT_FORMAT_COLOR_UNKNOWN 0x0000000d
+#define NV34TCL_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f
+#define NV34TCL_RT_FORMAT_COLOR_A8B8G8R8 0x00000010
+#define NV34TCL_COLOR0_PITCH 0x0000020c
+#define NV34TCL_COLOR0_PITCH_COLOR0_SHIFT 0
+#define NV34TCL_COLOR0_PITCH_COLOR0_MASK 0x0000ffff
+#define NV34TCL_COLOR0_PITCH_ZETA_SHIFT 16
+#define NV34TCL_COLOR0_PITCH_ZETA_MASK 0xffff0000
+#define NV34TCL_COLOR0_OFFSET 0x00000210
+#define NV34TCL_ZETA_OFFSET 0x00000214
+#define NV34TCL_COLOR1_OFFSET 0x00000218
+#define NV34TCL_COLOR1_PITCH 0x0000021c
+#define NV34TCL_RT_ENABLE 0x00000220
+#define NV34TCL_RT_ENABLE_MRT (1 << 4)
+#define NV34TCL_RT_ENABLE_COLOR1 (1 << 1)
+#define NV34TCL_RT_ENABLE_COLOR0 (1 << 0)
+#define NV34TCL_LMA_DEPTH_PITCH 0x0000022c
+#define NV34TCL_LMA_DEPTH_OFFSET 0x00000230
+#define NV34TCL_TX_UNITS_ENABLE 0x0000023c
+#define NV34TCL_TX_UNITS_ENABLE_TX0 (1 << 0)
+#define NV34TCL_TX_UNITS_ENABLE_TX1 (1 << 1)
+#define NV34TCL_TX_UNITS_ENABLE_TX2 (1 << 2)
+#define NV34TCL_TX_UNITS_ENABLE_TX3 (1 << 3)
+#define NV34TCL_TX_UNITS_ENABLE_TX4 (1 << 4)
+#define NV34TCL_TX_UNITS_ENABLE_TX5 (1 << 5)
+#define NV34TCL_TX_UNITS_ENABLE_TX6 (1 << 6)
+#define NV34TCL_TX_UNITS_ENABLE_TX7 (1 << 7)
+#define NV34TCL_TX_MATRIX_ENABLE(x) (0x00000240+((x)*4))
+#define NV34TCL_TX_MATRIX_ENABLE__SIZE 0x00000008
+#define NV34TCL_VIEWPORT_TX_ORIGIN 0x000002b8
+#define NV34TCL_VIEWPORT_TX_ORIGIN_X_SHIFT 0
+#define NV34TCL_VIEWPORT_TX_ORIGIN_X_MASK 0x0000ffff
+#define NV34TCL_VIEWPORT_TX_ORIGIN_Y_SHIFT 16
+#define NV34TCL_VIEWPORT_TX_ORIGIN_Y_MASK 0xffff0000
+#define NV34TCL_VIEWPORT_CLIP_MODE 0x000002bc
+#define NV34TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*8))
+#define NV34TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008
+#define NV34TCL_VIEWPORT_CLIP_HORIZ_L_SHIFT 0
+#define NV34TCL_VIEWPORT_CLIP_HORIZ_L_MASK 0x0000ffff
+#define NV34TCL_VIEWPORT_CLIP_HORIZ_R_SHIFT 16
+#define NV34TCL_VIEWPORT_CLIP_HORIZ_R_MASK 0xffff0000
+#define NV34TCL_VIEWPORT_CLIP_VERT(x) (0x000002c4+((x)*8))
+#define NV34TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008
+#define NV34TCL_VIEWPORT_CLIP_VERT_T_SHIFT 0
+#define NV34TCL_VIEWPORT_CLIP_VERT_T_MASK 0x0000ffff
+#define NV34TCL_VIEWPORT_CLIP_VERT_D_SHIFT 16
+#define NV34TCL_VIEWPORT_CLIP_VERT_D_MASK 0xffff0000
+#define NV34TCL_DITHER_ENABLE 0x00000300
+#define NV34TCL_ALPHA_FUNC_ENABLE 0x00000304
+#define NV34TCL_ALPHA_FUNC_FUNC 0x00000308
+#define NV34TCL_ALPHA_FUNC_FUNC_NEVER 0x00000200
+#define NV34TCL_ALPHA_FUNC_FUNC_LESS 0x00000201
+#define NV34TCL_ALPHA_FUNC_FUNC_EQUAL 0x00000202
+#define NV34TCL_ALPHA_FUNC_FUNC_LEQUAL 0x00000203
+#define NV34TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204
+#define NV34TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204
+#define NV34TCL_ALPHA_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV34TCL_ALPHA_FUNC_FUNC_GEQUAL 0x00000206
+#define NV34TCL_ALPHA_FUNC_FUNC_ALWAYS 0x00000207
+#define NV34TCL_ALPHA_FUNC_REF 0x0000030c
+#define NV34TCL_BLEND_FUNC_ENABLE 0x00000310
+#define NV34TCL_BLEND_FUNC_SRC 0x00000314
+#define NV34TCL_BLEND_FUNC_SRC_RGB_SHIFT 0
+#define NV34TCL_BLEND_FUNC_SRC_RGB_MASK 0x0000ffff
+#define NV34TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000
+#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001
+#define NV34TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300
+#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV34TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302
+#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV34TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304
+#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV34TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306
+#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307
+#define NV34TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308
+#define NV34TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001
+#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV34TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003
+#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SHIFT 16
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_MASK 0xffff0000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00010000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x03000000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x03020000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x03040000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x03060000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x03070000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x03080000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x80010000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x80030000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000
+#define NV34TCL_BLEND_FUNC_DST 0x00000318
+#define NV34TCL_BLEND_FUNC_DST_RGB_SHIFT 0
+#define NV34TCL_BLEND_FUNC_DST_RGB_MASK 0x0000ffff
+#define NV34TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000
+#define NV34TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001
+#define NV34TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300
+#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV34TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302
+#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV34TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304
+#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV34TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306
+#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307
+#define NV34TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308
+#define NV34TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001
+#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV34TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003
+#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_SHIFT 16
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_MASK 0xffff0000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00010000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x03000000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x03020000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x03040000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x03060000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x03070000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x03080000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x80010000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x80030000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000
+#define NV34TCL_BLEND_COLOR 0x0000031c
+#define NV34TCL_BLEND_COLOR_B_SHIFT 0
+#define NV34TCL_BLEND_COLOR_B_MASK 0x000000ff
+#define NV34TCL_BLEND_COLOR_G_SHIFT 8
+#define NV34TCL_BLEND_COLOR_G_MASK 0x0000ff00
+#define NV34TCL_BLEND_COLOR_R_SHIFT 16
+#define NV34TCL_BLEND_COLOR_R_MASK 0x00ff0000
+#define NV34TCL_BLEND_COLOR_A_SHIFT 24
+#define NV34TCL_BLEND_COLOR_A_MASK 0xff000000
+#define NV34TCL_BLEND_EQUATION 0x00000320
+#define NV34TCL_BLEND_EQUATION_FUNC_ADD 0x00008006
+#define NV34TCL_BLEND_EQUATION_MIN 0x00008007
+#define NV34TCL_BLEND_EQUATION_MAX 0x00008008
+#define NV34TCL_BLEND_EQUATION_FUNC_SUBTRACT 0x0000800a
+#define NV34TCL_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b
+#define NV34TCL_COLOR_MASK 0x00000324
+#define NV34TCL_COLOR_MASK_B_SHIFT 0
+#define NV34TCL_COLOR_MASK_B_MASK 0x000000ff
+#define NV34TCL_COLOR_MASK_G_SHIFT 8
+#define NV34TCL_COLOR_MASK_G_MASK 0x0000ff00
+#define NV34TCL_COLOR_MASK_R_SHIFT 16
+#define NV34TCL_COLOR_MASK_R_MASK 0x00ff0000
+#define NV34TCL_COLOR_MASK_A_SHIFT 24
+#define NV34TCL_COLOR_MASK_A_MASK 0xff000000
+#define NV34TCL_STENCIL_BACK_ENABLE 0x00000328
+#define NV34TCL_STENCIL_BACK_MASK 0x0000032c
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC 0x00000330
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207
+#define NV34TCL_STENCIL_BACK_FUNC_REF 0x00000334
+#define NV34TCL_STENCIL_BACK_FUNC_MASK 0x00000338
+#define NV34TCL_STENCIL_BACK_OP_FAIL 0x0000033c
+#define NV34TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000
+#define NV34TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a
+#define NV34TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00
+#define NV34TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01
+#define NV34TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02
+#define NV34TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03
+#define NV34TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507
+#define NV34TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL 0x00000340
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NV34TCL_STENCIL_BACK_OP_ZPASS 0x00000344
+#define NV34TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000
+#define NV34TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a
+#define NV34TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00
+#define NV34TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01
+#define NV34TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02
+#define NV34TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03
+#define NV34TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV34TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508
+#define NV34TCL_STENCIL_FRONT_ENABLE 0x00000348
+#define NV34TCL_STENCIL_FRONT_MASK 0x0000034c
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC 0x00000350
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207
+#define NV34TCL_STENCIL_FRONT_FUNC_REF 0x00000354
+#define NV34TCL_STENCIL_FRONT_FUNC_MASK 0x00000358
+#define NV34TCL_STENCIL_FRONT_OP_FAIL 0x0000035c
+#define NV34TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
+#define NV34TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a
+#define NV34TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00
+#define NV34TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01
+#define NV34TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02
+#define NV34TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03
+#define NV34TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507
+#define NV34TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL 0x00000360
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS 0x00000364
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508
+#define NV34TCL_SHADE_MODEL 0x00000368
+#define NV34TCL_SHADE_MODEL_FLAT 0x00001d00
+#define NV34TCL_SHADE_MODEL_SMOOTH 0x00001d01
+#define NV34TCL_FOG_ENABLE 0x0000036c
+#define NV34TCL_FOG_COLOR 0x00000370
+#define NV34TCL_FOG_COLOR_R_SHIFT 0
+#define NV34TCL_FOG_COLOR_R_MASK 0x000000ff
+#define NV34TCL_FOG_COLOR_G_SHIFT 8
+#define NV34TCL_FOG_COLOR_G_MASK 0x0000ff00
+#define NV34TCL_FOG_COLOR_B_SHIFT 16
+#define NV34TCL_FOG_COLOR_B_MASK 0x00ff0000
+#define NV34TCL_FOG_COLOR_A_SHIFT 24
+#define NV34TCL_FOG_COLOR_A_MASK 0xff000000
+#define NV34TCL_COLOR_LOGIC_OP_ENABLE 0x00000374
+#define NV34TCL_COLOR_LOGIC_OP_OP 0x00000378
+#define NV34TCL_COLOR_LOGIC_OP_OP_CLEAR 0x00001500
+#define NV34TCL_COLOR_LOGIC_OP_OP_AND 0x00001501
+#define NV34TCL_COLOR_LOGIC_OP_OP_AND_REVERSE 0x00001502
+#define NV34TCL_COLOR_LOGIC_OP_OP_COPY 0x00001503
+#define NV34TCL_COLOR_LOGIC_OP_OP_AND_INVERTED 0x00001504
+#define NV34TCL_COLOR_LOGIC_OP_OP_NOOP 0x00001505
+#define NV34TCL_COLOR_LOGIC_OP_OP_XOR 0x00001506
+#define NV34TCL_COLOR_LOGIC_OP_OP_OR 0x00001507
+#define NV34TCL_COLOR_LOGIC_OP_OP_NOR 0x00001508
+#define NV34TCL_COLOR_LOGIC_OP_OP_EQUIV 0x00001509
+#define NV34TCL_COLOR_LOGIC_OP_OP_INVERT 0x0000150a
+#define NV34TCL_COLOR_LOGIC_OP_OP_OR_REVERSE 0x0000150b
+#define NV34TCL_COLOR_LOGIC_OP_OP_COPY_INVERTED 0x0000150c
+#define NV34TCL_COLOR_LOGIC_OP_OP_OR_INVERTED 0x0000150d
+#define NV34TCL_COLOR_LOGIC_OP_OP_NAND 0x0000150e
+#define NV34TCL_COLOR_LOGIC_OP_OP_SET 0x0000150f
+#define NV34TCL_NORMALIZE_ENABLE 0x0000037c
+#define NV34TCL_COLOR_MATERIAL 0x00000390
+#define NV34TCL_COLOR_MATERIAL_FRONT_EMISSION_ENABLE (1 << 0)
+#define NV34TCL_COLOR_MATERIAL_FRONT_AMBIENT_ENABLE (1 << 2)
+#define NV34TCL_COLOR_MATERIAL_FRONT_DIFFUSE_ENABLE (1 << 4)
+#define NV34TCL_COLOR_MATERIAL_FRONT_SPECULAR_ENABLE (1 << 6)
+#define NV34TCL_COLOR_MATERIAL_BACK_EMISSION_ENABLE (1 << 8)
+#define NV34TCL_COLOR_MATERIAL_BACK_AMBIENT_ENABLE (1 << 10)
+#define NV34TCL_COLOR_MATERIAL_BACK_DIFFUSE_ENABLE (1 << 12)
+#define NV34TCL_COLOR_MATERIAL_BACK_SPECULAR_ENABLE (1 << 14)
+#define NV34TCL_DEPTH_RANGE_NEAR 0x00000394
+#define NV34TCL_DEPTH_RANGE_FAR 0x00000398
+#define NV34TCL_COLOR_MATERIAL_FRONT_R 0x000003a0
+#define NV34TCL_COLOR_MATERIAL_FRONT_G 0x000003a4
+#define NV34TCL_COLOR_MATERIAL_FRONT_B 0x000003a8
+#define NV34TCL_COLOR_MATERIAL_FRONT_A 0x000003b4
+#define NV34TCL_LINE_WIDTH 0x000003b8
+#define NV34TCL_LINE_SMOOTH_ENABLE 0x000003bc
+#define NV34TCL_TX_GEN_S(x) (0x00000400+((x)*16))
+#define NV34TCL_TX_GEN_S__SIZE 0x00000008
+#define NV34TCL_TX_GEN_S_FALSE 0x00000000
+#define NV34TCL_TX_GEN_S_EYE_LINEAR 0x00002400
+#define NV34TCL_TX_GEN_S_OBJECT_LINEAR 0x00002401
+#define NV34TCL_TX_GEN_S_SPHERE_MAP 0x00002402
+#define NV34TCL_TX_GEN_S_NORMAL_MAP 0x00008511
+#define NV34TCL_TX_GEN_S_REFLECTION_MAP 0x00008512
+#define NV34TCL_TX_GEN_T(x) (0x00000404+((x)*16))
+#define NV34TCL_TX_GEN_T__SIZE 0x00000008
+#define NV34TCL_TX_GEN_T_FALSE 0x00000000
+#define NV34TCL_TX_GEN_T_EYE_LINEAR 0x00002400
+#define NV34TCL_TX_GEN_T_OBJECT_LINEAR 0x00002401
+#define NV34TCL_TX_GEN_T_SPHERE_MAP 0x00002402
+#define NV34TCL_TX_GEN_T_NORMAL_MAP 0x00008511
+#define NV34TCL_TX_GEN_T_REFLECTION_MAP 0x00008512
+#define NV34TCL_TX_GEN_R(x) (0x00000408+((x)*16))
+#define NV34TCL_TX_GEN_R__SIZE 0x00000008
+#define NV34TCL_TX_GEN_R_FALSE 0x00000000
+#define NV34TCL_TX_GEN_R_EYE_LINEAR 0x00002400
+#define NV34TCL_TX_GEN_R_OBJECT_LINEAR 0x00002401
+#define NV34TCL_TX_GEN_R_SPHERE_MAP 0x00002402
+#define NV34TCL_TX_GEN_R_NORMAL_MAP 0x00008511
+#define NV34TCL_TX_GEN_R_REFLECTION_MAP 0x00008512
+#define NV34TCL_TX_GEN_Q(x) (0x0000040c+((x)*16))
+#define NV34TCL_TX_GEN_Q__SIZE 0x00000008
+#define NV34TCL_TX_GEN_Q_FALSE 0x00000000
+#define NV34TCL_TX_GEN_Q_EYE_LINEAR 0x00002400
+#define NV34TCL_TX_GEN_Q_OBJECT_LINEAR 0x00002401
+#define NV34TCL_TX_GEN_Q_SPHERE_MAP 0x00002402
+#define NV34TCL_TX_GEN_Q_NORMAL_MAP 0x00008511
+#define NV34TCL_TX_GEN_Q_REFLECTION_MAP 0x00008512
+#define NV34TCL_MODELVIEW_MATRIX(x) (0x00000480+((x)*4))
+#define NV34TCL_MODELVIEW_MATRIX__SIZE 0x00000010
+#define NV34TCL_INVERSE_MODELVIEW_MATRIX(x) (0x00000580+((x)*4))
+#define NV34TCL_INVERSE_MODELVIEW_MATRIX__SIZE 0x0000000c
+#define NV34TCL_PROJECTION_MATRIX(x) (0x00000680+((x)*4))
+#define NV34TCL_PROJECTION_MATRIX__SIZE 0x00000010
+#define NV34TCL_TX0_MATRIX(x) (0x000006c0+((x)*4))
+#define NV34TCL_TX0_MATRIX__SIZE 0x00000010
+#define NV34TCL_TX1_MATRIX(x) (0x00000700+((x)*4))
+#define NV34TCL_TX1_MATRIX__SIZE 0x00000010
+#define NV34TCL_TX2_MATRIX(x) (0x00000740+((x)*4))
+#define NV34TCL_TX2_MATRIX__SIZE 0x00000010
+#define NV34TCL_TX3_MATRIX(x) (0x00000780+((x)*4))
+#define NV34TCL_TX3_MATRIX__SIZE 0x00000010
+#define NV34TCL_TX4_MATRIX(x) (0x000007c0+((x)*4))
+#define NV34TCL_TX4_MATRIX__SIZE 0x00000010
+#define NV34TCL_TX5_MATRIX(x) (0x00000800+((x)*4))
+#define NV34TCL_TX5_MATRIX__SIZE 0x00000010
+#define NV34TCL_TX6_MATRIX(x) (0x00000840+((x)*4))
+#define NV34TCL_TX6_MATRIX__SIZE 0x00000010
+#define NV34TCL_TX7_MATRIX(x) (0x00000880+((x)*4))
+#define NV34TCL_TX7_MATRIX__SIZE 0x00000010
+#define NV34TCL_SCISSOR_HORIZ 0x000008c0
+#define NV34TCL_SCISSOR_HORIZ_X_SHIFT 0
+#define NV34TCL_SCISSOR_HORIZ_X_MASK 0x0000ffff
+#define NV34TCL_SCISSOR_HORIZ_W_SHIFT 16
+#define NV34TCL_SCISSOR_HORIZ_W_MASK 0xffff0000
+#define NV34TCL_SCISSOR_VERT 0x000008c4
+#define NV34TCL_SCISSOR_VERT_Y_SHIFT 0
+#define NV34TCL_SCISSOR_VERT_Y_MASK 0x0000ffff
+#define NV34TCL_SCISSOR_VERT_H_SHIFT 16
+#define NV34TCL_SCISSOR_VERT_H_MASK 0xffff0000
+#define NV34TCL_FOG_COORD_DIST 0x000008c8
+#define NV34TCL_FOG_COORD_DIST_COORD_FALSE 0x00000000
+#define NV34TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_RADIAL_NV 0x00000001
+#define NV34TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_PLANE_ABSOLUTE_NV 0x00000002
+#define NV34TCL_FOG_COORD_DIST_COORD_FOG 0x00000003
+#define NV34TCL_FOG_MODE 0x000008cc
+#define NV34TCL_FOG_MODE_EXP 0x00000800
+#define NV34TCL_FOG_MODE_EXP_2 0x00000802
+#define NV34TCL_FOG_MODE_EXP2 0x00000803
+#define NV34TCL_FOG_MODE_LINEAR 0x00000804
+#define NV34TCL_FOG_MODE_LINEAR_2 0x00002601
+#define NV34TCL_FOG_EQUATION_CONSTANT 0x000008d0
+#define NV34TCL_FOG_EQUATION_LINEAR 0x000008d4
+#define NV34TCL_FOG_EQUATION_QUADRATIC 0x000008d8
+#define NV34TCL_FP_ACTIVE_PROGRAM 0x000008e4
+#define NV34TCL_FP_ACTIVE_PROGRAM_DMA0 (1 << 0)
+#define NV34TCL_FP_ACTIVE_PROGRAM_DMA1 (1 << 1)
+#define NV34TCL_FP_ACTIVE_PROGRAM_OFFSET_SHIFT 2
+#define NV34TCL_FP_ACTIVE_PROGRAM_OFFSET_MASK 0xfffffffc
+#define NV34TCL_RC_COLOR0 0x000008ec
+#define NV34TCL_RC_COLOR0_B_SHIFT 0
+#define NV34TCL_RC_COLOR0_B_MASK 0x000000ff
+#define NV34TCL_RC_COLOR0_G_SHIFT 8
+#define NV34TCL_RC_COLOR0_G_MASK 0x0000ff00
+#define NV34TCL_RC_COLOR0_R_SHIFT 16
+#define NV34TCL_RC_COLOR0_R_MASK 0x00ff0000
+#define NV34TCL_RC_COLOR0_A_SHIFT 24
+#define NV34TCL_RC_COLOR0_A_MASK 0xff000000
+#define NV34TCL_RC_COLOR1 0x000008f0
+#define NV34TCL_RC_COLOR1_B_SHIFT 0
+#define NV34TCL_RC_COLOR1_B_MASK 0x000000ff
+#define NV34TCL_RC_COLOR1_G_SHIFT 8
+#define NV34TCL_RC_COLOR1_G_MASK 0x0000ff00
+#define NV34TCL_RC_COLOR1_R_SHIFT 16
+#define NV34TCL_RC_COLOR1_R_MASK 0x00ff0000
+#define NV34TCL_RC_COLOR1_A_SHIFT 24
+#define NV34TCL_RC_COLOR1_A_MASK 0xff000000
+#define NV34TCL_RC_FINAL0 0x000008f4
+#define NV34TCL_RC_FINAL0_D_INPUT_SHIFT 0
+#define NV34TCL_RC_FINAL0_D_INPUT_MASK 0x0000000f
+#define NV34TCL_RC_FINAL0_D_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV34TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV34TCL_RC_FINAL0_D_INPUT_FOG 0x00000003
+#define NV34TCL_RC_FINAL0_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV34TCL_RC_FINAL0_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV34TCL_RC_FINAL0_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV34TCL_RC_FINAL0_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV34TCL_RC_FINAL0_D_INPUT_SPARE0_NV 0x0000000c
+#define NV34TCL_RC_FINAL0_D_INPUT_SPARE1_NV 0x0000000d
+#define NV34TCL_RC_FINAL0_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV34TCL_RC_FINAL0_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV34TCL_RC_FINAL0_D_COMPONENT_USAGE (1 << 4)
+#define NV34TCL_RC_FINAL0_D_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_FINAL0_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV34TCL_RC_FINAL0_D_MAPPING_SHIFT 5
+#define NV34TCL_RC_FINAL0_D_MAPPING_MASK 0x000000e0
+#define NV34TCL_RC_FINAL0_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_FINAL0_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV34TCL_RC_FINAL0_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV34TCL_RC_FINAL0_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV34TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV34TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV34TCL_RC_FINAL0_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV34TCL_RC_FINAL0_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV34TCL_RC_FINAL0_C_INPUT_SHIFT 8
+#define NV34TCL_RC_FINAL0_C_INPUT_MASK 0x00000f00
+#define NV34TCL_RC_FINAL0_C_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV34TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV34TCL_RC_FINAL0_C_INPUT_FOG 0x00000300
+#define NV34TCL_RC_FINAL0_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV34TCL_RC_FINAL0_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV34TCL_RC_FINAL0_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV34TCL_RC_FINAL0_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV34TCL_RC_FINAL0_C_INPUT_SPARE0_NV 0x00000c00
+#define NV34TCL_RC_FINAL0_C_INPUT_SPARE1_NV 0x00000d00
+#define NV34TCL_RC_FINAL0_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV34TCL_RC_FINAL0_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV34TCL_RC_FINAL0_C_COMPONENT_USAGE (1 << 12)
+#define NV34TCL_RC_FINAL0_C_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_FINAL0_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV34TCL_RC_FINAL0_C_MAPPING_SHIFT 13
+#define NV34TCL_RC_FINAL0_C_MAPPING_MASK 0x0000e000
+#define NV34TCL_RC_FINAL0_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_FINAL0_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV34TCL_RC_FINAL0_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV34TCL_RC_FINAL0_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV34TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV34TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV34TCL_RC_FINAL0_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV34TCL_RC_FINAL0_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV34TCL_RC_FINAL0_B_INPUT_SHIFT 16
+#define NV34TCL_RC_FINAL0_B_INPUT_MASK 0x000f0000
+#define NV34TCL_RC_FINAL0_B_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV34TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV34TCL_RC_FINAL0_B_INPUT_FOG 0x00030000
+#define NV34TCL_RC_FINAL0_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV34TCL_RC_FINAL0_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV34TCL_RC_FINAL0_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV34TCL_RC_FINAL0_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV34TCL_RC_FINAL0_B_INPUT_SPARE0_NV 0x000c0000
+#define NV34TCL_RC_FINAL0_B_INPUT_SPARE1_NV 0x000d0000
+#define NV34TCL_RC_FINAL0_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV34TCL_RC_FINAL0_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV34TCL_RC_FINAL0_B_COMPONENT_USAGE (1 << 20)
+#define NV34TCL_RC_FINAL0_B_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_FINAL0_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV34TCL_RC_FINAL0_B_MAPPING_SHIFT 21
+#define NV34TCL_RC_FINAL0_B_MAPPING_MASK 0x00e00000
+#define NV34TCL_RC_FINAL0_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_FINAL0_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV34TCL_RC_FINAL0_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV34TCL_RC_FINAL0_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV34TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV34TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV34TCL_RC_FINAL0_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV34TCL_RC_FINAL0_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV34TCL_RC_FINAL0_A_INPUT_SHIFT 24
+#define NV34TCL_RC_FINAL0_A_INPUT_MASK 0x0f000000
+#define NV34TCL_RC_FINAL0_A_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV34TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV34TCL_RC_FINAL0_A_INPUT_FOG 0x03000000
+#define NV34TCL_RC_FINAL0_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV34TCL_RC_FINAL0_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV34TCL_RC_FINAL0_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV34TCL_RC_FINAL0_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV34TCL_RC_FINAL0_A_INPUT_SPARE0_NV 0x0c000000
+#define NV34TCL_RC_FINAL0_A_INPUT_SPARE1_NV 0x0d000000
+#define NV34TCL_RC_FINAL0_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV34TCL_RC_FINAL0_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV34TCL_RC_FINAL0_A_COMPONENT_USAGE (1 << 28)
+#define NV34TCL_RC_FINAL0_A_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_FINAL0_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_SHIFT 29
+#define NV34TCL_RC_FINAL0_A_MAPPING_MASK 0xe0000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV34TCL_RC_FINAL1 0x000008f8
+#define NV34TCL_RC_FINAL1_COLOR_SUM_CLAMP (1 << 7)
+#define NV34TCL_RC_FINAL1_G_INPUT_SHIFT 8
+#define NV34TCL_RC_FINAL1_G_INPUT_MASK 0x00000f00
+#define NV34TCL_RC_FINAL1_G_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV34TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV34TCL_RC_FINAL1_G_INPUT_FOG 0x00000300
+#define NV34TCL_RC_FINAL1_G_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV34TCL_RC_FINAL1_G_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV34TCL_RC_FINAL1_G_INPUT_TEXTURE0_ARB 0x00000800
+#define NV34TCL_RC_FINAL1_G_INPUT_TEXTURE1_ARB 0x00000900
+#define NV34TCL_RC_FINAL1_G_INPUT_SPARE0_NV 0x00000c00
+#define NV34TCL_RC_FINAL1_G_INPUT_SPARE1_NV 0x00000d00
+#define NV34TCL_RC_FINAL1_G_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV34TCL_RC_FINAL1_G_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV34TCL_RC_FINAL1_G_COMPONENT_USAGE (1 << 12)
+#define NV34TCL_RC_FINAL1_G_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_FINAL1_G_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV34TCL_RC_FINAL1_G_MAPPING_SHIFT 13
+#define NV34TCL_RC_FINAL1_G_MAPPING_MASK 0x0000e000
+#define NV34TCL_RC_FINAL1_G_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_FINAL1_G_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV34TCL_RC_FINAL1_G_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV34TCL_RC_FINAL1_G_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV34TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV34TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV34TCL_RC_FINAL1_G_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV34TCL_RC_FINAL1_G_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV34TCL_RC_FINAL1_F_INPUT_SHIFT 16
+#define NV34TCL_RC_FINAL1_F_INPUT_MASK 0x000f0000
+#define NV34TCL_RC_FINAL1_F_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV34TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV34TCL_RC_FINAL1_F_INPUT_FOG 0x00030000
+#define NV34TCL_RC_FINAL1_F_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV34TCL_RC_FINAL1_F_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV34TCL_RC_FINAL1_F_INPUT_TEXTURE0_ARB 0x00080000
+#define NV34TCL_RC_FINAL1_F_INPUT_TEXTURE1_ARB 0x00090000
+#define NV34TCL_RC_FINAL1_F_INPUT_SPARE0_NV 0x000c0000
+#define NV34TCL_RC_FINAL1_F_INPUT_SPARE1_NV 0x000d0000
+#define NV34TCL_RC_FINAL1_F_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV34TCL_RC_FINAL1_F_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV34TCL_RC_FINAL1_F_COMPONENT_USAGE (1 << 20)
+#define NV34TCL_RC_FINAL1_F_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_FINAL1_F_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV34TCL_RC_FINAL1_F_MAPPING_SHIFT 21
+#define NV34TCL_RC_FINAL1_F_MAPPING_MASK 0x00e00000
+#define NV34TCL_RC_FINAL1_F_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_FINAL1_F_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV34TCL_RC_FINAL1_F_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV34TCL_RC_FINAL1_F_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV34TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV34TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV34TCL_RC_FINAL1_F_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV34TCL_RC_FINAL1_F_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV34TCL_RC_FINAL1_E_INPUT_SHIFT 24
+#define NV34TCL_RC_FINAL1_E_INPUT_MASK 0x0f000000
+#define NV34TCL_RC_FINAL1_E_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV34TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV34TCL_RC_FINAL1_E_INPUT_FOG 0x03000000
+#define NV34TCL_RC_FINAL1_E_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV34TCL_RC_FINAL1_E_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV34TCL_RC_FINAL1_E_INPUT_TEXTURE0_ARB 0x08000000
+#define NV34TCL_RC_FINAL1_E_INPUT_TEXTURE1_ARB 0x09000000
+#define NV34TCL_RC_FINAL1_E_INPUT_SPARE0_NV 0x0c000000
+#define NV34TCL_RC_FINAL1_E_INPUT_SPARE1_NV 0x0d000000
+#define NV34TCL_RC_FINAL1_E_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV34TCL_RC_FINAL1_E_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV34TCL_RC_FINAL1_E_COMPONENT_USAGE (1 << 28)
+#define NV34TCL_RC_FINAL1_E_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_FINAL1_E_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_SHIFT 29
+#define NV34TCL_RC_FINAL1_E_MAPPING_MASK 0xe0000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV34TCL_RC_ENABLE 0x000008fc
+#define NV34TCL_RC_ENABLE_NUM_COMBINERS_SHIFT 0
+#define NV34TCL_RC_ENABLE_NUM_COMBINERS_MASK 0x0000000f
+#define NV34TCL_RC_ENABLE_STAGE_CONSTANT_COLOR0_SHIFT 12
+#define NV34TCL_RC_ENABLE_STAGE_CONSTANT_COLOR0_MASK 0x0000f000
+#define NV34TCL_RC_ENABLE_STAGE_CONSTANT_COLOR1_SHIFT 16
+#define NV34TCL_RC_ENABLE_STAGE_CONSTANT_COLOR1_MASK 0x000f0000
+#define NV34TCL_RC_IN_ALPHA(x) (0x00000900+((x)*32))
+#define NV34TCL_RC_IN_ALPHA__SIZE 0x00000008
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_SHIFT 0
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_MASK 0x0000000f
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_FOG 0x00000003
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_SPARE0_NV 0x0000000c
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_SPARE1_NV 0x0000000d
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV34TCL_RC_IN_ALPHA_D_COMPONENT_USAGE (1 << 4)
+#define NV34TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_BLUE 0x00000000
+#define NV34TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_SHIFT 5
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_MASK 0x000000e0
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_SHIFT 8
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_MASK 0x00000f00
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_FOG 0x00000300
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_SPARE0_NV 0x00000c00
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_SPARE1_NV 0x00000d00
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV34TCL_RC_IN_ALPHA_C_COMPONENT_USAGE (1 << 12)
+#define NV34TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_BLUE 0x00000000
+#define NV34TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_SHIFT 13
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_MASK 0x0000e000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_SHIFT 16
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_MASK 0x000f0000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_FOG 0x00030000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_SPARE0_NV 0x000c0000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_SPARE1_NV 0x000d0000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV34TCL_RC_IN_ALPHA_B_COMPONENT_USAGE (1 << 20)
+#define NV34TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_BLUE 0x00000000
+#define NV34TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_SHIFT 21
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_MASK 0x00e00000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_SHIFT 24
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_MASK 0x0f000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_FOG 0x03000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_SPARE0_NV 0x0c000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_SPARE1_NV 0x0d000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV34TCL_RC_IN_ALPHA_A_COMPONENT_USAGE (1 << 28)
+#define NV34TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_BLUE 0x00000000
+#define NV34TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_SHIFT 29
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_MASK 0xe0000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV34TCL_RC_IN_RGB(x) (0x00000904+((x)*32))
+#define NV34TCL_RC_IN_RGB__SIZE 0x00000008
+#define NV34TCL_RC_IN_RGB_D_INPUT_SHIFT 0
+#define NV34TCL_RC_IN_RGB_D_INPUT_MASK 0x0000000f
+#define NV34TCL_RC_IN_RGB_D_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV34TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV34TCL_RC_IN_RGB_D_INPUT_FOG 0x00000003
+#define NV34TCL_RC_IN_RGB_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV34TCL_RC_IN_RGB_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV34TCL_RC_IN_RGB_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV34TCL_RC_IN_RGB_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV34TCL_RC_IN_RGB_D_INPUT_SPARE0_NV 0x0000000c
+#define NV34TCL_RC_IN_RGB_D_INPUT_SPARE1_NV 0x0000000d
+#define NV34TCL_RC_IN_RGB_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV34TCL_RC_IN_RGB_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV34TCL_RC_IN_RGB_D_COMPONENT_USAGE (1 << 4)
+#define NV34TCL_RC_IN_RGB_D_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_IN_RGB_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV34TCL_RC_IN_RGB_D_MAPPING_SHIFT 5
+#define NV34TCL_RC_IN_RGB_D_MAPPING_MASK 0x000000e0
+#define NV34TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV34TCL_RC_IN_RGB_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV34TCL_RC_IN_RGB_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV34TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV34TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV34TCL_RC_IN_RGB_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV34TCL_RC_IN_RGB_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV34TCL_RC_IN_RGB_C_INPUT_SHIFT 8
+#define NV34TCL_RC_IN_RGB_C_INPUT_MASK 0x00000f00
+#define NV34TCL_RC_IN_RGB_C_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV34TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV34TCL_RC_IN_RGB_C_INPUT_FOG 0x00000300
+#define NV34TCL_RC_IN_RGB_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV34TCL_RC_IN_RGB_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV34TCL_RC_IN_RGB_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV34TCL_RC_IN_RGB_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV34TCL_RC_IN_RGB_C_INPUT_SPARE0_NV 0x00000c00
+#define NV34TCL_RC_IN_RGB_C_INPUT_SPARE1_NV 0x00000d00
+#define NV34TCL_RC_IN_RGB_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV34TCL_RC_IN_RGB_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV34TCL_RC_IN_RGB_C_COMPONENT_USAGE (1 << 12)
+#define NV34TCL_RC_IN_RGB_C_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_IN_RGB_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_SHIFT 13
+#define NV34TCL_RC_IN_RGB_C_MAPPING_MASK 0x0000e000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV34TCL_RC_IN_RGB_B_INPUT_SHIFT 16
+#define NV34TCL_RC_IN_RGB_B_INPUT_MASK 0x000f0000
+#define NV34TCL_RC_IN_RGB_B_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV34TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV34TCL_RC_IN_RGB_B_INPUT_FOG 0x00030000
+#define NV34TCL_RC_IN_RGB_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV34TCL_RC_IN_RGB_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV34TCL_RC_IN_RGB_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV34TCL_RC_IN_RGB_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV34TCL_RC_IN_RGB_B_INPUT_SPARE0_NV 0x000c0000
+#define NV34TCL_RC_IN_RGB_B_INPUT_SPARE1_NV 0x000d0000
+#define NV34TCL_RC_IN_RGB_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV34TCL_RC_IN_RGB_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV34TCL_RC_IN_RGB_B_COMPONENT_USAGE (1 << 20)
+#define NV34TCL_RC_IN_RGB_B_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_IN_RGB_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_SHIFT 21
+#define NV34TCL_RC_IN_RGB_B_MAPPING_MASK 0x00e00000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV34TCL_RC_IN_RGB_A_INPUT_SHIFT 24
+#define NV34TCL_RC_IN_RGB_A_INPUT_MASK 0x0f000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_FOG 0x03000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_SPARE0_NV 0x0c000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_SPARE1_NV 0x0d000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV34TCL_RC_IN_RGB_A_COMPONENT_USAGE (1 << 28)
+#define NV34TCL_RC_IN_RGB_A_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_IN_RGB_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_SHIFT 29
+#define NV34TCL_RC_IN_RGB_A_MAPPING_MASK 0xe0000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV34TCL_RC_CONSTANT_COLOR0(x) (0x00000908+((x)*32))
+#define NV34TCL_RC_CONSTANT_COLOR0__SIZE 0x00000008
+#define NV34TCL_RC_CONSTANT_COLOR0_B_SHIFT 0
+#define NV34TCL_RC_CONSTANT_COLOR0_B_MASK 0x000000ff
+#define NV34TCL_RC_CONSTANT_COLOR0_G_SHIFT 8
+#define NV34TCL_RC_CONSTANT_COLOR0_G_MASK 0x0000ff00
+#define NV34TCL_RC_CONSTANT_COLOR0_R_SHIFT 16
+#define NV34TCL_RC_CONSTANT_COLOR0_R_MASK 0x00ff0000
+#define NV34TCL_RC_CONSTANT_COLOR0_A_SHIFT 24
+#define NV34TCL_RC_CONSTANT_COLOR0_A_MASK 0xff000000
+#define NV34TCL_RC_CONSTANT_COLOR1(x) (0x0000090c+((x)*32))
+#define NV34TCL_RC_CONSTANT_COLOR1__SIZE 0x00000008
+#define NV34TCL_RC_CONSTANT_COLOR1_B_SHIFT 0
+#define NV34TCL_RC_CONSTANT_COLOR1_B_MASK 0x000000ff
+#define NV34TCL_RC_CONSTANT_COLOR1_G_SHIFT 8
+#define NV34TCL_RC_CONSTANT_COLOR1_G_MASK 0x0000ff00
+#define NV34TCL_RC_CONSTANT_COLOR1_R_SHIFT 16
+#define NV34TCL_RC_CONSTANT_COLOR1_R_MASK 0x00ff0000
+#define NV34TCL_RC_CONSTANT_COLOR1_A_SHIFT 24
+#define NV34TCL_RC_CONSTANT_COLOR1_A_MASK 0xff000000
+#define NV34TCL_RC_OUT_ALPHA(x) (0x00000910+((x)*32))
+#define NV34TCL_RC_OUT_ALPHA__SIZE 0x00000008
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_SHIFT 0
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_MASK 0x0000000f
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_ZERO 0x00000000
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_FOG 0x00000003
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE0_ARB 0x00000008
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE1_ARB 0x00000009
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_NV 0x0000000c
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE1_NV 0x0000000d
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_E_TIMES_F_NV 0x0000000f
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_SHIFT 4
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_MASK 0x000000f0
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_ZERO 0x00000000
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_FOG 0x00000030
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE0_ARB 0x00000080
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE1_ARB 0x00000090
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_NV 0x000000c0
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE1_NV 0x000000d0
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_E_TIMES_F_NV 0x000000f0
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_SHIFT 8
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_MASK 0x00000f00
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_ZERO 0x00000000
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_FOG 0x00000300
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE0_ARB 0x00000800
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE1_ARB 0x00000900
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_NV 0x00000c00
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE1_NV 0x00000d00
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00
+#define NV34TCL_RC_OUT_ALPHA_CD_DOT_PRODUCT (1 << 12)
+#define NV34TCL_RC_OUT_ALPHA_AB_DOT_PRODUCT (1 << 13)
+#define NV34TCL_RC_OUT_ALPHA_MUX_SUM (1 << 14)
+#define NV34TCL_RC_OUT_ALPHA_BIAS (1 << 15)
+#define NV34TCL_RC_OUT_ALPHA_BIAS_NONE 0x00000000
+#define NV34TCL_RC_OUT_ALPHA_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000
+#define NV34TCL_RC_OUT_ALPHA_SCALE_SHIFT 17
+#define NV34TCL_RC_OUT_ALPHA_SCALE_MASK 0x00000000
+#define NV34TCL_RC_OUT_ALPHA_SCALE_NONE 0x00000000
+#define NV34TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_TWO_NV 0x00020000
+#define NV34TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_FOUR_NV 0x00040000
+#define NV34TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000
+#define NV34TCL_RC_OUT_RGB(x) (0x00000914+((x)*32))
+#define NV34TCL_RC_OUT_RGB__SIZE 0x00000008
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_SHIFT 0
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_MASK 0x0000000f
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_ZERO 0x00000000
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_FOG 0x00000003
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE0_ARB 0x00000008
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE1_ARB 0x00000009
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_NV 0x0000000c
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_SPARE1_NV 0x0000000d
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_E_TIMES_F_NV 0x0000000f
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_SHIFT 4
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_MASK 0x000000f0
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_ZERO 0x00000000
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_FOG 0x00000030
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE0_ARB 0x00000080
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE1_ARB 0x00000090
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_NV 0x000000c0
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_SPARE1_NV 0x000000d0
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_E_TIMES_F_NV 0x000000f0
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_SHIFT 8
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_MASK 0x00000f00
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_ZERO 0x00000000
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_FOG 0x00000300
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE0_ARB 0x00000800
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE1_ARB 0x00000900
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_NV 0x00000c00
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE1_NV 0x00000d00
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00
+#define NV34TCL_RC_OUT_RGB_CD_DOT_PRODUCT (1 << 12)
+#define NV34TCL_RC_OUT_RGB_AB_DOT_PRODUCT (1 << 13)
+#define NV34TCL_RC_OUT_RGB_MUX_SUM (1 << 14)
+#define NV34TCL_RC_OUT_RGB_BIAS (1 << 15)
+#define NV34TCL_RC_OUT_RGB_BIAS_NONE 0x00000000
+#define NV34TCL_RC_OUT_RGB_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000
+#define NV34TCL_RC_OUT_RGB_SCALE_SHIFT 17
+#define NV34TCL_RC_OUT_RGB_SCALE_MASK 0x00000000
+#define NV34TCL_RC_OUT_RGB_SCALE_NONE 0x00000000
+#define NV34TCL_RC_OUT_RGB_SCALE_SCALE_BY_TWO_NV 0x00020000
+#define NV34TCL_RC_OUT_RGB_SCALE_SCALE_BY_FOUR_NV 0x00040000
+#define NV34TCL_RC_OUT_RGB_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000
+#define NV34TCL_VIEWPORT_HORIZ 0x00000a00
+#define NV34TCL_VIEWPORT_HORIZ_X_SHIFT 0
+#define NV34TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff
+#define NV34TCL_VIEWPORT_HORIZ_W_SHIFT 16
+#define NV34TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000
+#define NV34TCL_VIEWPORT_VERT 0x00000a04
+#define NV34TCL_VIEWPORT_VERT_Y_SHIFT 0
+#define NV34TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff
+#define NV34TCL_VIEWPORT_VERT_H_SHIFT 16
+#define NV34TCL_VIEWPORT_VERT_H_MASK 0xffff0000
+#define NV34TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x00000a10
+#define NV34TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x00000a14
+#define NV34TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x00000a18
+#define NV34TCL_VIEWPORT_TRANSLATE_X 0x00000a20
+#define NV34TCL_VIEWPORT_TRANSLATE_Y 0x00000a24
+#define NV34TCL_VIEWPORT_TRANSLATE_Z 0x00000a28
+#define NV34TCL_VIEWPORT_TRANSLATE_W 0x00000a2c
+#define NV34TCL_VIEWPORT_SCALE_X 0x00000a30
+#define NV34TCL_VIEWPORT_SCALE_Y 0x00000a34
+#define NV34TCL_VIEWPORT_SCALE_Z 0x00000a38
+#define NV34TCL_VIEWPORT_SCALE_W 0x00000a3c
+#define NV34TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000a60
+#define NV34TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000a64
+#define NV34TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000a68
+#define NV34TCL_DEPTH_FUNC 0x00000a6c
+#define NV34TCL_DEPTH_FUNC_NEVER 0x00000200
+#define NV34TCL_DEPTH_FUNC_LESS 0x00000201
+#define NV34TCL_DEPTH_FUNC_EQUAL 0x00000202
+#define NV34TCL_DEPTH_FUNC_LEQUAL 0x00000203
+#define NV34TCL_DEPTH_FUNC_GREATER 0x00000204
+#define NV34TCL_DEPTH_FUNC_GREATER 0x00000204
+#define NV34TCL_DEPTH_FUNC_NOTEQUAL 0x00000205
+#define NV34TCL_DEPTH_FUNC_GEQUAL 0x00000206
+#define NV34TCL_DEPTH_FUNC_ALWAYS 0x00000207
+#define NV34TCL_DEPTH_WRITE_ENABLE 0x00000a70
+#define NV34TCL_DEPTH_TEST_ENABLE 0x00000a74
+#define NV34TCL_POLYGON_OFFSET_FACTOR 0x00000a78
+#define NV34TCL_POLYGON_OFFSET_UNITS 0x00000a7c
+#define NV34TCL_VTX_ATTR_3I_XY(x) (0x00000a80+((x)*8))
+#define NV34TCL_VTX_ATTR_3I_XY__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_3I_XY_X_SHIFT 0
+#define NV34TCL_VTX_ATTR_3I_XY_X_MASK 0x0000ffff
+#define NV34TCL_VTX_ATTR_3I_XY_Y_SHIFT 16
+#define NV34TCL_VTX_ATTR_3I_XY_Y_MASK 0xffff0000
+#define NV34TCL_VTX_ATTR_3I_Z(x) (0x00000a84+((x)*8))
+#define NV34TCL_VTX_ATTR_3I_Z__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_3I_Z_Z_SHIFT 0
+#define NV34TCL_VTX_ATTR_3I_Z_Z_MASK 0x0000ffff
+#define NV34TCL_VP_UPLOAD_INST(x) (0x00000b80+((x)*4))
+#define NV34TCL_VP_UPLOAD_INST__SIZE 0x00000004
+#define NV34TCL_TX0_CLIP_PLANE_A(x) (0x00000e00+((x)*16))
+#define NV34TCL_TX0_CLIP_PLANE_A__SIZE 0x00000004
+#define NV34TCL_TX0_CLIP_PLANE_B(x) (0x00000e04+((x)*16))
+#define NV34TCL_TX0_CLIP_PLANE_B__SIZE 0x00000004
+#define NV34TCL_TX0_CLIP_PLANE_C(x) (0x00000e08+((x)*16))
+#define NV34TCL_TX0_CLIP_PLANE_C__SIZE 0x00000004
+#define NV34TCL_TX0_CLIP_PLANE_D(x) (0x00000e0c+((x)*16))
+#define NV34TCL_TX0_CLIP_PLANE_D__SIZE 0x00000004
+#define NV34TCL_TX1_CLIP_PLANE_A(x) (0x00000e40+((x)*16))
+#define NV34TCL_TX1_CLIP_PLANE_A__SIZE 0x00000004
+#define NV34TCL_TX1_CLIP_PLANE_B(x) (0x00000e44+((x)*16))
+#define NV34TCL_TX1_CLIP_PLANE_B__SIZE 0x00000004
+#define NV34TCL_TX1_CLIP_PLANE_C(x) (0x00000e48+((x)*16))
+#define NV34TCL_TX1_CLIP_PLANE_C__SIZE 0x00000004
+#define NV34TCL_TX1_CLIP_PLANE_D(x) (0x00000e4c+((x)*16))
+#define NV34TCL_TX1_CLIP_PLANE_D__SIZE 0x00000004
+#define NV34TCL_TX2_CLIP_PLANE_A(x) (0x00000e80+((x)*16))
+#define NV34TCL_TX2_CLIP_PLANE_A__SIZE 0x00000004
+#define NV34TCL_TX2_CLIP_PLANE_B(x) (0x00000e84+((x)*16))
+#define NV34TCL_TX2_CLIP_PLANE_B__SIZE 0x00000004
+#define NV34TCL_TX2_CLIP_PLANE_C(x) (0x00000e88+((x)*16))
+#define NV34TCL_TX2_CLIP_PLANE_C__SIZE 0x00000004
+#define NV34TCL_TX2_CLIP_PLANE_D(x) (0x00000e8c+((x)*16))
+#define NV34TCL_TX2_CLIP_PLANE_D__SIZE 0x00000004
+#define NV34TCL_TX3_CLIP_PLANE_A(x) (0x00000ec0+((x)*16))
+#define NV34TCL_TX3_CLIP_PLANE_A__SIZE 0x00000004
+#define NV34TCL_TX3_CLIP_PLANE_B(x) (0x00000ec4+((x)*16))
+#define NV34TCL_TX3_CLIP_PLANE_B__SIZE 0x00000004
+#define NV34TCL_TX3_CLIP_PLANE_C(x) (0x00000ec8+((x)*16))
+#define NV34TCL_TX3_CLIP_PLANE_C__SIZE 0x00000004
+#define NV34TCL_TX3_CLIP_PLANE_D(x) (0x00000ecc+((x)*16))
+#define NV34TCL_TX3_CLIP_PLANE_D__SIZE 0x00000004
+#define NV34TCL_TX4_CLIP_PLANE_A(x) (0x00000f00+((x)*16))
+#define NV34TCL_TX4_CLIP_PLANE_A__SIZE 0x00000004
+#define NV34TCL_TX4_CLIP_PLANE_B(x) (0x00000f04+((x)*16))
+#define NV34TCL_TX4_CLIP_PLANE_B__SIZE 0x00000004
+#define NV34TCL_TX4_CLIP_PLANE_C(x) (0x00000f08+((x)*16))
+#define NV34TCL_TX4_CLIP_PLANE_C__SIZE 0x00000004
+#define NV34TCL_TX4_CLIP_PLANE_D(x) (0x00000f0c+((x)*16))
+#define NV34TCL_TX4_CLIP_PLANE_D__SIZE 0x00000004
+#define NV34TCL_TX5_CLIP_PLANE_A(x) (0x00000f40+((x)*16))
+#define NV34TCL_TX5_CLIP_PLANE_A__SIZE 0x00000004
+#define NV34TCL_TX5_CLIP_PLANE_B(x) (0x00000f44+((x)*16))
+#define NV34TCL_TX5_CLIP_PLANE_B__SIZE 0x00000004
+#define NV34TCL_TX5_CLIP_PLANE_C(x) (0x00000f48+((x)*16))
+#define NV34TCL_TX5_CLIP_PLANE_C__SIZE 0x00000004
+#define NV34TCL_TX5_CLIP_PLANE_D(x) (0x00000f4c+((x)*16))
+#define NV34TCL_TX5_CLIP_PLANE_D__SIZE 0x00000004
+#define NV34TCL_TX6_CLIP_PLANE_A(x) (0x00000f80+((x)*16))
+#define NV34TCL_TX6_CLIP_PLANE_A__SIZE 0x00000004
+#define NV34TCL_TX6_CLIP_PLANE_B(x) (0x00000f84+((x)*16))
+#define NV34TCL_TX6_CLIP_PLANE_B__SIZE 0x00000004
+#define NV34TCL_TX6_CLIP_PLANE_C(x) (0x00000f88+((x)*16))
+#define NV34TCL_TX6_CLIP_PLANE_C__SIZE 0x00000004
+#define NV34TCL_TX6_CLIP_PLANE_D(x) (0x00000f8c+((x)*16))
+#define NV34TCL_TX6_CLIP_PLANE_D__SIZE 0x00000004
+#define NV34TCL_TX7_CLIP_PLANE_A(x) (0x00000fc0+((x)*16))
+#define NV34TCL_TX7_CLIP_PLANE_A__SIZE 0x00000004
+#define NV34TCL_TX7_CLIP_PLANE_B(x) (0x00000fc4+((x)*16))
+#define NV34TCL_TX7_CLIP_PLANE_B__SIZE 0x00000004
+#define NV34TCL_TX7_CLIP_PLANE_C(x) (0x00000fc8+((x)*16))
+#define NV34TCL_TX7_CLIP_PLANE_C__SIZE 0x00000004
+#define NV34TCL_TX7_CLIP_PLANE_D(x) (0x00000fcc+((x)*16))
+#define NV34TCL_TX7_CLIP_PLANE_D__SIZE 0x00000004
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R(x) (0x00001000+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R__SIZE 0x00000008
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G(x) (0x00001004+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G__SIZE 0x00000008
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B(x) (0x00001008+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B__SIZE 0x00000008
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R(x) (0x0000100c+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R__SIZE 0x00000008
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G(x) (0x00001010+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G__SIZE 0x00000008
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B(x) (0x00001014+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B__SIZE 0x00000008
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R(x) (0x00001018+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R__SIZE 0x00000008
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G(x) (0x0000101c+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G__SIZE 0x00000008
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B(x) (0x00001020+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B__SIZE 0x00000008
+#define NV34TCL_LIGHT_HALF_VECTOR_X(x) (0x00001028+((x)*64))
+#define NV34TCL_LIGHT_HALF_VECTOR_X__SIZE 0x00000008
+#define NV34TCL_LIGHT_HALF_VECTOR_Y(x) (0x0000102c+((x)*64))
+#define NV34TCL_LIGHT_HALF_VECTOR_Y__SIZE 0x00000008
+#define NV34TCL_LIGHT_HALF_VECTOR_Z(x) (0x00001030+((x)*64))
+#define NV34TCL_LIGHT_HALF_VECTOR_Z__SIZE 0x00000008
+#define NV34TCL_LIGHT_DIRECTION_X(x) (0x00001034+((x)*64))
+#define NV34TCL_LIGHT_DIRECTION_X__SIZE 0x00000008
+#define NV34TCL_LIGHT_DIRECTION_Y(x) (0x00001038+((x)*64))
+#define NV34TCL_LIGHT_DIRECTION_Y__SIZE 0x00000008
+#define NV34TCL_LIGHT_DIRECTION_Z(x) (0x0000103c+((x)*64))
+#define NV34TCL_LIGHT_DIRECTION_Z__SIZE 0x00000008
+#define NV34TCL_LIGHT_SPOT_CUTOFF_A(x) (0x00001200+((x)*64))
+#define NV34TCL_LIGHT_SPOT_CUTOFF_A__SIZE 0x00000008
+#define NV34TCL_LIGHT_SPOT_CUTOFF_B(x) (0x00001204+((x)*64))
+#define NV34TCL_LIGHT_SPOT_CUTOFF_B__SIZE 0x00000008
+#define NV34TCL_LIGHT_SPOT_CUTOFF_C(x) (0x00001208+((x)*64))
+#define NV34TCL_LIGHT_SPOT_CUTOFF_C__SIZE 0x00000008
+#define NV34TCL_LIGHT_SPOT_DIR_X(x) (0x0000120c+((x)*64))
+#define NV34TCL_LIGHT_SPOT_DIR_X__SIZE 0x00000008
+#define NV34TCL_LIGHT_SPOT_DIR_Y(x) (0x00001210+((x)*64))
+#define NV34TCL_LIGHT_SPOT_DIR_Y__SIZE 0x00000008
+#define NV34TCL_LIGHT_SPOT_DIR_Z(x) (0x00001214+((x)*64))
+#define NV34TCL_LIGHT_SPOT_DIR_Z__SIZE 0x00000008
+#define NV34TCL_LIGHT_SPOT_CUTOFF_D(x) (0x00001218+((x)*64))
+#define NV34TCL_LIGHT_SPOT_CUTOFF_D__SIZE 0x00000008
+#define NV34TCL_LIGHT_POSITION_X(x) (0x0000121c+((x)*64))
+#define NV34TCL_LIGHT_POSITION_X__SIZE 0x00000008
+#define NV34TCL_LIGHT_POSITION_Y(x) (0x00001220+((x)*64))
+#define NV34TCL_LIGHT_POSITION_Y__SIZE 0x00000008
+#define NV34TCL_LIGHT_POSITION_Z(x) (0x00001224+((x)*64))
+#define NV34TCL_LIGHT_POSITION_Z__SIZE 0x00000008
+#define NV34TCL_LIGHT_ATTENUATION_CONSTANT(x) (0x00001228+((x)*64))
+#define NV34TCL_LIGHT_ATTENUATION_CONSTANT__SIZE 0x00000008
+#define NV34TCL_LIGHT_ATTENUATION_LINEAR(x) (0x0000122c+((x)*64))
+#define NV34TCL_LIGHT_ATTENUATION_LINEAR__SIZE 0x00000008
+#define NV34TCL_LIGHT_ATTENUATION_QUADRATIC(x) (0x00001230+((x)*64))
+#define NV34TCL_LIGHT_ATTENUATION_QUADRATIC__SIZE 0x00000008
+#define NV34TCL_FRONT_MATERIAL_SHININESS(x) (0x00001400+((x)*4))
+#define NV34TCL_FRONT_MATERIAL_SHININESS__SIZE 0x00000006
+#define NV34TCL_ENABLED_LIGHTS 0x00001420
+#define NV34TCL_FP_REG_CONTROL 0x00001450
+#define NV34TCL_FP_REG_CONTROL_UNK1_SHIFT 16
+#define NV34TCL_FP_REG_CONTROL_UNK1_MASK 0xffff0000
+#define NV34TCL_FP_REG_CONTROL_UNK0_SHIFT 0
+#define NV34TCL_FP_REG_CONTROL_UNK0_MASK 0x0000ffff
+#define NV34TCL_VP_CLIP_PLANES_ENABLE 0x00001478
+#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 (1 << 1)
+#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 (1 << 5)
+#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 (1 << 9)
+#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3 (1 << 13)
+#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4 (1 << 17)
+#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5 (1 << 21)
+#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE6 (1 << 25)
+#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE7 (1 << 29)
+#define NV34TCL_POLYGON_STIPPLE_ENABLE 0x0000147c
+#define NV34TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4))
+#define NV34TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020
+#define NV34TCL_VTX_ATTR_3F_X(x) (0x00001500+((x)*16))
+#define NV34TCL_VTX_ATTR_3F_X__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_3F_Y(x) (0x00001504+((x)*16))
+#define NV34TCL_VTX_ATTR_3F_Y__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_3F_Z(x) (0x00001508+((x)*16))
+#define NV34TCL_VTX_ATTR_3F_Z__SIZE 0x00000010
+#define NV34TCL_VP_CLIP_PLANE_A(x) (0x00001600+((x)*16))
+#define NV34TCL_VP_CLIP_PLANE_A__SIZE 0x00000006
+#define NV34TCL_VP_CLIP_PLANE_B(x) (0x00001604+((x)*16))
+#define NV34TCL_VP_CLIP_PLANE_B__SIZE 0x00000006
+#define NV34TCL_VP_CLIP_PLANE_C(x) (0x00001608+((x)*16))
+#define NV34TCL_VP_CLIP_PLANE_C__SIZE 0x00000006
+#define NV34TCL_VP_CLIP_PLANE_D(x) (0x0000160c+((x)*16))
+#define NV34TCL_VP_CLIP_PLANE_D__SIZE 0x00000006
+#define NV34TCL_VTXBUF_ADDRESS(x) (0x00001680+((x)*4))
+#define NV34TCL_VTXBUF_ADDRESS__SIZE 0x00000010
+#define NV34TCL_VTXBUF_ADDRESS_DMA1 (1 << 31)
+#define NV34TCL_VTXBUF_ADDRESS_OFFSET_SHIFT 0
+#define NV34TCL_VTXBUF_ADDRESS_OFFSET_MASK 0x0fffffff
+#define NV34TCL_VTXFMT(x) (0x00001740+((x)*4))
+#define NV34TCL_VTXFMT__SIZE 0x00000010
+#define NV34TCL_VTXFMT_TYPE_SHIFT 0
+#define NV34TCL_VTXFMT_TYPE_MASK 0x0000000f
+#define NV34TCL_VTXFMT_TYPE_FLOAT 0x00000002
+#define NV34TCL_VTXFMT_TYPE_UBYTE 0x00000004
+#define NV34TCL_VTXFMT_TYPE_USHORT 0x00000005
+#define NV34TCL_VTXFMT_SIZE_SHIFT 4
+#define NV34TCL_VTXFMT_SIZE_MASK 0x000000f0
+#define NV34TCL_VTXFMT_STRIDE_SHIFT 8
+#define NV34TCL_VTXFMT_STRIDE_MASK 0x0000ff00
+#define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x000017a0
+#define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x000017a4
+#define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x000017a8
+#define NV34TCL_COLOR_MATERIAL_BACK_R 0x000017b0
+#define NV34TCL_COLOR_MATERIAL_BACK_G 0x000017b4
+#define NV34TCL_COLOR_MATERIAL_BACK_B 0x000017b8
+#define NV34TCL_COLOR_MATERIAL_BACK_A 0x000017c0
+#define NV34TCL_QUERY_RESET 0x000017c8
+#define NV34TCL_QUERY_UNK17CC 0x000017cc
+#define NV34TCL_QUERY_GET 0x00001800
+#define NV34TCL_QUERY_GET_UNK24_SHIFT 24
+#define NV34TCL_QUERY_GET_UNK24_MASK 0xff000000
+#define NV34TCL_QUERY_GET_OFFSET_SHIFT 0
+#define NV34TCL_QUERY_GET_OFFSET_MASK 0x00ffffff
+#define NV34TCL_VERTEX_BEGIN_END 0x00001808
+#define NV34TCL_VERTEX_BEGIN_END_STOP 0x00000000
+#define NV34TCL_VERTEX_BEGIN_END_POINTS 0x00000001
+#define NV34TCL_VERTEX_BEGIN_END_LINES 0x00000002
+#define NV34TCL_VERTEX_BEGIN_END_LINE_LOOP 0x00000003
+#define NV34TCL_VERTEX_BEGIN_END_LINE_STRIP 0x00000004
+#define NV34TCL_VERTEX_BEGIN_END_TRIANGLES 0x00000005
+#define NV34TCL_VERTEX_BEGIN_END_TRIANGLE_STRIP 0x00000006
+#define NV34TCL_VERTEX_BEGIN_END_TRIANGLE_FAN 0x00000007
+#define NV34TCL_VERTEX_BEGIN_END_QUADS 0x00000008
+#define NV34TCL_VERTEX_BEGIN_END_QUAD_STRIP 0x00000009
+#define NV34TCL_VERTEX_BEGIN_END_POLYGON 0x0000000a
+#define NV34TCL_VB_ELEMENT_U16 0x0000180c
+#define NV34TCL_VB_ELEMENT_U16_I0_SHIFT 0
+#define NV34TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff
+#define NV34TCL_VB_ELEMENT_U16_I1_SHIFT 16
+#define NV34TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000
+#define NV34TCL_VB_ELEMENT_U32 0x00001810
+#define NV34TCL_VB_VERTEX_BATCH 0x00001814
+#define NV34TCL_VB_VERTEX_BATCH_OFFSET_SHIFT 0
+#define NV34TCL_VB_VERTEX_BATCH_OFFSET_MASK 0x00ffffff
+#define NV34TCL_VB_VERTEX_BATCH_COUNT_SHIFT 24
+#define NV34TCL_VB_VERTEX_BATCH_COUNT_MASK 0xff000000
+#define NV34TCL_VERTEX_DATA 0x00001818
+#define NV34TCL_IDXBUF_ADDRESS 0x0000181c
+#define NV34TCL_IDXBUF_FORMAT 0x00001820
+#define NV34TCL_IDXBUF_FORMAT_TYPE_SHIFT 4
+#define NV34TCL_IDXBUF_FORMAT_TYPE_MASK 0x000000f0
+#define NV34TCL_IDXBUF_FORMAT_TYPE_U32 0x00000000
+#define NV34TCL_IDXBUF_FORMAT_TYPE_U16 0x00000010
+#define NV34TCL_IDXBUF_FORMAT_DMA1 (1 << 0)
+#define NV34TCL_VB_INDEX_BATCH 0x00001824
+#define NV34TCL_VB_INDEX_BATCH_COUNT_SHIFT 24
+#define NV34TCL_VB_INDEX_BATCH_COUNT_MASK 0xff000000
+#define NV34TCL_VB_INDEX_BATCH_START_SHIFT 0
+#define NV34TCL_VB_INDEX_BATCH_START_MASK 0x00ffffff
+#define NV34TCL_POLYGON_MODE_FRONT 0x00001828
+#define NV34TCL_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NV34TCL_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NV34TCL_POLYGON_MODE_FRONT_FILL 0x00001b02
+#define NV34TCL_POLYGON_MODE_BACK 0x0000182c
+#define NV34TCL_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NV34TCL_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NV34TCL_POLYGON_MODE_BACK_FILL 0x00001b02
+#define NV34TCL_CULL_FACE 0x00001830
+#define NV34TCL_CULL_FACE_FRONT 0x00000404
+#define NV34TCL_CULL_FACE_BACK 0x00000405
+#define NV34TCL_CULL_FACE_FRONT_AND_BACK 0x00000408
+#define NV34TCL_FRONT_FACE 0x00001834
+#define NV34TCL_FRONT_FACE_CW 0x00000900
+#define NV34TCL_FRONT_FACE_CCW 0x00000901
+#define NV34TCL_POLYGON_SMOOTH_ENABLE 0x00001838
+#define NV34TCL_CULL_FACE_ENABLE 0x0000183c
+#define NV34TCL_TX_PALETTE_OFFSET(x) (0x00001840+((x)*4))
+#define NV34TCL_TX_PALETTE_OFFSET__SIZE 0x00000004
+#define NV34TCL_VTX_ATTR_2F_X(x) (0x00001880+((x)*8))
+#define NV34TCL_VTX_ATTR_2F_X__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_2F_Y(x) (0x00001884+((x)*8))
+#define NV34TCL_VTX_ATTR_2F_Y__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_2I(x) (0x00001900+((x)*4))
+#define NV34TCL_VTX_ATTR_2I__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_2I_X_SHIFT 0
+#define NV34TCL_VTX_ATTR_2I_X_MASK 0x0000ffff
+#define NV34TCL_VTX_ATTR_2I_Y_SHIFT 16
+#define NV34TCL_VTX_ATTR_2I_Y_MASK 0xffff0000
+#define NV34TCL_VTX_ATTR_4UB(x) (0x00001940+((x)*4))
+#define NV34TCL_VTX_ATTR_4UB__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_4UB_X_SHIFT 0
+#define NV34TCL_VTX_ATTR_4UB_X_MASK 0x000000ff
+#define NV34TCL_VTX_ATTR_4UB_Y_SHIFT 8
+#define NV34TCL_VTX_ATTR_4UB_Y_MASK 0x0000ff00
+#define NV34TCL_VTX_ATTR_4UB_Z_SHIFT 16
+#define NV34TCL_VTX_ATTR_4UB_Z_MASK 0x00ff0000
+#define NV34TCL_VTX_ATTR_4UB_W_SHIFT 24
+#define NV34TCL_VTX_ATTR_4UB_W_MASK 0xff000000
+#define NV34TCL_VTX_ATTR_4I_XY(x) (0x00001980+((x)*8))
+#define NV34TCL_VTX_ATTR_4I_XY__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_4I_XY_X_SHIFT 0
+#define NV34TCL_VTX_ATTR_4I_XY_X_MASK 0x0000ffff
+#define NV34TCL_VTX_ATTR_4I_XY_Y_SHIFT 16
+#define NV34TCL_VTX_ATTR_4I_XY_Y_MASK 0xffff0000
+#define NV34TCL_VTX_ATTR_4I_ZW(x) (0x00001984+((x)*8))
+#define NV34TCL_VTX_ATTR_4I_ZW__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_4I_ZW_Z_SHIFT 0
+#define NV34TCL_VTX_ATTR_4I_ZW_Z_MASK 0x0000ffff
+#define NV34TCL_VTX_ATTR_4I_ZW_W_SHIFT 16
+#define NV34TCL_VTX_ATTR_4I_ZW_W_MASK 0xffff0000
+#define NV34TCL_TX_OFFSET(x) (0x00001a00+((x)*32))
+#define NV34TCL_TX_OFFSET__SIZE 0x00000004
+#define NV34TCL_TX_FORMAT(x) (0x00001a04+((x)*32))
+#define NV34TCL_TX_FORMAT__SIZE 0x00000004
+#define NV34TCL_TX_FORMAT_DMA0 (1 << 0)
+#define NV34TCL_TX_FORMAT_DMA1 (1 << 1)
+#define NV34TCL_TX_FORMAT_CUBIC (1 << 2)
+#define NV34TCL_TX_FORMAT_NO_BORDER (1 << 3)
+#define NV34TCL_TX_FORMAT_DIMS_SHIFT 4
+#define NV34TCL_TX_FORMAT_DIMS_MASK 0x000000f0
+#define NV34TCL_TX_FORMAT_DIMS_1D 0x00000010
+#define NV34TCL_TX_FORMAT_DIMS_2D 0x00000020
+#define NV34TCL_TX_FORMAT_DIMS_3D 0x00000030
+#define NV34TCL_TX_FORMAT_FORMAT_SHIFT 8
+#define NV34TCL_TX_FORMAT_FORMAT_MASK 0x0000ff00
+#define NV34TCL_TX_FORMAT_FORMAT_L8 0x00000000
+#define NV34TCL_TX_FORMAT_FORMAT_A8 0x00000100
+#define NV34TCL_TX_FORMAT_FORMAT_A1R5G5B5 0x00000200
+#define NV34TCL_TX_FORMAT_FORMAT_A8_RECT 0x00000300
+#define NV34TCL_TX_FORMAT_FORMAT_A4R4G4B4 0x00000400
+#define NV34TCL_TX_FORMAT_FORMAT_R5G6B5 0x00000500
+#define NV34TCL_TX_FORMAT_FORMAT_A8R8G8B8 0x00000600
+#define NV34TCL_TX_FORMAT_FORMAT_X8R8G8B8 0x00000700
+#define NV34TCL_TX_FORMAT_FORMAT_INDEX8 0x00000b00
+#define NV34TCL_TX_FORMAT_FORMAT_DXT1 0x00000c00
+#define NV34TCL_TX_FORMAT_FORMAT_DXT3 0x00000e00
+#define NV34TCL_TX_FORMAT_FORMAT_DXT5 0x00000f00
+#define NV34TCL_TX_FORMAT_FORMAT_A1R5G5B5_RECT 0x00001000
+#define NV34TCL_TX_FORMAT_FORMAT_R5G6B5_RECT 0x00001100
+#define NV34TCL_TX_FORMAT_FORMAT_A8R8G8B8_RECT 0x00001200
+#define NV34TCL_TX_FORMAT_FORMAT_L8_RECT 0x00001300
+#define NV34TCL_TX_FORMAT_FORMAT_A8L8 0x00001a00
+#define NV34TCL_TX_FORMAT_FORMAT_A8_RECT2 0x00001b00
+#define NV34TCL_TX_FORMAT_FORMAT_A4R4G4B4_RECT 0x00001d00
+#define NV34TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00001e00
+#define NV34TCL_TX_FORMAT_FORMAT_L8A8_RECT 0x00002000
+#define NV34TCL_TX_FORMAT_FORMAT_DSDT 0x00002800
+#define NV34TCL_TX_FORMAT_FORMAT_A16 0x00003200
+#define NV34TCL_TX_FORMAT_FORMAT_HILO16 0x00003300
+#define NV34TCL_TX_FORMAT_FORMAT_A16_RECT 0x00003500
+#define NV34TCL_TX_FORMAT_FORMAT_HILO16_RECT 0x00003600
+#define NV34TCL_TX_FORMAT_FORMAT_HILO8 0x00004400
+#define NV34TCL_TX_FORMAT_FORMAT_SIGNED_HILO8 0x00004500
+#define NV34TCL_TX_FORMAT_FORMAT_HILO8_RECT 0x00004600
+#define NV34TCL_TX_FORMAT_FORMAT_SIGNED_HILO8_RECT 0x00004700
+#define NV34TCL_TX_FORMAT_FORMAT_FLOAT_RGBA16_NV 0x00004a00
+#define NV34TCL_TX_FORMAT_FORMAT_FLOAT_RGBA32_NV 0x00004b00
+#define NV34TCL_TX_FORMAT_FORMAT_FLOAT_R32_NV 0x00004c00
+#define NV34TCL_TX_FORMAT_MIPMAP (1 << 19)
+#define NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT 20
+#define NV34TCL_TX_FORMAT_BASE_SIZE_U_MASK 0x00f00000
+#define NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT 24
+#define NV34TCL_TX_FORMAT_BASE_SIZE_V_MASK 0x0f000000
+#define NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT 28
+#define NV34TCL_TX_FORMAT_BASE_SIZE_W_MASK 0xf0000000
+#define NV34TCL_TX_WRAP(x) (0x00001a08+((x)*32))
+#define NV34TCL_TX_WRAP__SIZE 0x00000004
+#define NV34TCL_TX_WRAP_S_SHIFT 0
+#define NV34TCL_TX_WRAP_S_MASK 0x000000ff
+#define NV34TCL_TX_WRAP_S_REPEAT 0x00000001
+#define NV34TCL_TX_WRAP_S_MIRRORED_REPEAT 0x00000002
+#define NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE 0x00000003
+#define NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER 0x00000004
+#define NV34TCL_TX_WRAP_S_CLAMP 0x00000005
+#define NV34TCL_TX_WRAP_T_SHIFT 8
+#define NV34TCL_TX_WRAP_T_MASK 0x00000f00
+#define NV34TCL_TX_WRAP_T_REPEAT 0x00000100
+#define NV34TCL_TX_WRAP_T_MIRRORED_REPEAT 0x00000200
+#define NV34TCL_TX_WRAP_T_CLAMP_TO_EDGE 0x00000300
+#define NV34TCL_TX_WRAP_T_CLAMP_TO_BORDER 0x00000400
+#define NV34TCL_TX_WRAP_T_CLAMP 0x00000500
+#define NV34TCL_TX_WRAP_EXPAND_NORMAL_SHIFT 12
+#define NV34TCL_TX_WRAP_EXPAND_NORMAL_MASK 0x0000f000
+#define NV34TCL_TX_WRAP_R_SHIFT 16
+#define NV34TCL_TX_WRAP_R_MASK 0x000f0000
+#define NV34TCL_TX_WRAP_R_REPEAT 0x00010000
+#define NV34TCL_TX_WRAP_R_MIRRORED_REPEAT 0x00020000
+#define NV34TCL_TX_WRAP_R_CLAMP_TO_EDGE 0x00030000
+#define NV34TCL_TX_WRAP_R_CLAMP_TO_BORDER 0x00040000
+#define NV34TCL_TX_WRAP_R_CLAMP 0x00050000
+#define NV34TCL_TX_WRAP_RCOMP_SHIFT 28
+#define NV34TCL_TX_WRAP_RCOMP_MASK 0xf0000000
+#define NV34TCL_TX_WRAP_RCOMP_NEVER 0x00000000
+#define NV34TCL_TX_WRAP_RCOMP_GREATER 0x10000000
+#define NV34TCL_TX_WRAP_RCOMP_EQUAL 0x20000000
+#define NV34TCL_TX_WRAP_RCOMP_GEQUAL 0x30000000
+#define NV34TCL_TX_WRAP_RCOMP_LESS 0x40000000
+#define NV34TCL_TX_WRAP_RCOMP_NOTEQUAL 0x50000000
+#define NV34TCL_TX_WRAP_RCOMP_LEQUAL 0x60000000
+#define NV34TCL_TX_WRAP_RCOMP_ALWAYS 0x70000000
+#define NV34TCL_TX_ENABLE(x) (0x00001a0c+((x)*32))
+#define NV34TCL_TX_ENABLE__SIZE 0x00000004
+#define NV34TCL_TX_ENABLE_ANISO_SHIFT 4
+#define NV34TCL_TX_ENABLE_ANISO_MASK 0x00000030
+#define NV34TCL_TX_ENABLE_ANISO_NONE 0x00000000
+#define NV34TCL_TX_ENABLE_ANISO_2X 0x00000010
+#define NV34TCL_TX_ENABLE_ANISO_4X 0x00000020
+#define NV34TCL_TX_ENABLE_ANISO_8X 0x00000030
+#define NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT 14
+#define NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_MASK 0x0003c000
+#define NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT 26
+#define NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_MASK 0x3c000000
+#define NV34TCL_TX_ENABLE_ENABLE (1 << 30)
+#define NV34TCL_TX_SWIZZLE(x) (0x00001a10+((x)*32))
+#define NV34TCL_TX_SWIZZLE__SIZE 0x00000004
+#define NV34TCL_TX_SWIZZLE_S0_X_SHIFT 14
+#define NV34TCL_TX_SWIZZLE_S0_X_MASK 0x0000c000
+#define NV34TCL_TX_SWIZZLE_S0_X_ZERO 0x00000000
+#define NV34TCL_TX_SWIZZLE_S0_X_ONE 0x00004000
+#define NV34TCL_TX_SWIZZLE_S0_X_S1 0x00008000
+#define NV34TCL_TX_SWIZZLE_S0_Y_SHIFT 12
+#define NV34TCL_TX_SWIZZLE_S0_Y_MASK 0x00003000
+#define NV34TCL_TX_SWIZZLE_S0_Y_ZERO 0x00000000
+#define NV34TCL_TX_SWIZZLE_S0_Y_ONE 0x00001000
+#define NV34TCL_TX_SWIZZLE_S0_Y_S1 0x00002000
+#define NV34TCL_TX_SWIZZLE_S0_Z_SHIFT 10
+#define NV34TCL_TX_SWIZZLE_S0_Z_MASK 0x00000c00
+#define NV34TCL_TX_SWIZZLE_S0_Z_ZERO 0x00000000
+#define NV34TCL_TX_SWIZZLE_S0_Z_ONE 0x00000400
+#define NV34TCL_TX_SWIZZLE_S0_Z_S1 0x00000800
+#define NV34TCL_TX_SWIZZLE_S0_W_SHIFT 8
+#define NV34TCL_TX_SWIZZLE_S0_W_MASK 0x00000300
+#define NV34TCL_TX_SWIZZLE_S0_W_ZERO 0x00000000
+#define NV34TCL_TX_SWIZZLE_S0_W_ONE 0x00000100
+#define NV34TCL_TX_SWIZZLE_S0_W_S1 0x00000200
+#define NV34TCL_TX_SWIZZLE_S1_X_SHIFT 6
+#define NV34TCL_TX_SWIZZLE_S1_X_MASK 0x000000c0
+#define NV34TCL_TX_SWIZZLE_S1_X_W 0x00000000
+#define NV34TCL_TX_SWIZZLE_S1_X_Z 0x00000040
+#define NV34TCL_TX_SWIZZLE_S1_X_Y 0x00000080
+#define NV34TCL_TX_SWIZZLE_S1_X_X 0x000000c0
+#define NV34TCL_TX_SWIZZLE_S1_Y_SHIFT 4
+#define NV34TCL_TX_SWIZZLE_S1_Y_MASK 0x00000030
+#define NV34TCL_TX_SWIZZLE_S1_Y_W 0x00000000
+#define NV34TCL_TX_SWIZZLE_S1_Y_Z 0x00000010
+#define NV34TCL_TX_SWIZZLE_S1_Y_Y 0x00000020
+#define NV34TCL_TX_SWIZZLE_S1_Y_X 0x00000030
+#define NV34TCL_TX_SWIZZLE_S1_Z_SHIFT 2
+#define NV34TCL_TX_SWIZZLE_S1_Z_MASK 0x0000000c
+#define NV34TCL_TX_SWIZZLE_S1_Z_W 0x00000000
+#define NV34TCL_TX_SWIZZLE_S1_Z_Z 0x00000004
+#define NV34TCL_TX_SWIZZLE_S1_Z_Y 0x00000008
+#define NV34TCL_TX_SWIZZLE_S1_Z_X 0x0000000c
+#define NV34TCL_TX_SWIZZLE_S1_W_SHIFT 0
+#define NV34TCL_TX_SWIZZLE_S1_W_MASK 0x00000003
+#define NV34TCL_TX_SWIZZLE_S1_W_W 0x00000000
+#define NV34TCL_TX_SWIZZLE_S1_W_Z 0x00000001
+#define NV34TCL_TX_SWIZZLE_S1_W_Y 0x00000002
+#define NV34TCL_TX_SWIZZLE_S1_W_X 0x00000003
+#define NV34TCL_TX_SWIZZLE_RECT_PITCH_SHIFT 16
+#define NV34TCL_TX_SWIZZLE_RECT_PITCH_MASK 0xffff0000
+#define NV34TCL_TX_FILTER(x) (0x00001a14+((x)*32))
+#define NV34TCL_TX_FILTER__SIZE 0x00000004
+#define NV34TCL_TX_FILTER_LOD_BIAS_SHIFT 8
+#define NV34TCL_TX_FILTER_LOD_BIAS_MASK 0x00000f00
+#define NV34TCL_TX_FILTER_MINIFY_SHIFT 16
+#define NV34TCL_TX_FILTER_MINIFY_MASK 0x000f0000
+#define NV34TCL_TX_FILTER_MINIFY_NEAREST 0x00010000
+#define NV34TCL_TX_FILTER_MINIFY_LINEAR 0x00020000
+#define NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x00030000
+#define NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x00040000
+#define NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x00050000
+#define NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x00060000
+#define NV34TCL_TX_FILTER_MAGNIFY_SHIFT 24
+#define NV34TCL_TX_FILTER_MAGNIFY_MASK 0x0f000000
+#define NV34TCL_TX_FILTER_MAGNIFY_NEAREST 0x01000000
+#define NV34TCL_TX_FILTER_MAGNIFY_LINEAR 0x02000000
+#define NV34TCL_TX_FILTER_SIGNED_BLUE (1 << 28)
+#define NV34TCL_TX_FILTER_SIGNED_GREEN (1 << 29)
+#define NV34TCL_TX_FILTER_SIGNED_RED (1 << 30)
+#define NV34TCL_TX_FILTER_SIGNED_ALPHA (1 << 31)
+#define NV34TCL_TX_NPOT_SIZE(x) (0x00001a18+((x)*32))
+#define NV34TCL_TX_NPOT_SIZE__SIZE 0x00000004
+#define NV34TCL_TX_NPOT_SIZE_H_SHIFT 0
+#define NV34TCL_TX_NPOT_SIZE_H_MASK 0x0000ffff
+#define NV34TCL_TX_NPOT_SIZE_W_SHIFT 16
+#define NV34TCL_TX_NPOT_SIZE_W_MASK 0xffff0000
+#define NV34TCL_TX_BORDER_COLOR(x) (0x00001a1c+((x)*32))
+#define NV34TCL_TX_BORDER_COLOR__SIZE 0x00000004
+#define NV34TCL_TX_BORDER_COLOR_B_SHIFT 0
+#define NV34TCL_TX_BORDER_COLOR_B_MASK 0x000000ff
+#define NV34TCL_TX_BORDER_COLOR_G_SHIFT 8
+#define NV34TCL_TX_BORDER_COLOR_G_MASK 0x0000ff00
+#define NV34TCL_TX_BORDER_COLOR_R_SHIFT 16
+#define NV34TCL_TX_BORDER_COLOR_R_MASK 0x00ff0000
+#define NV34TCL_TX_BORDER_COLOR_A_SHIFT 24
+#define NV34TCL_TX_BORDER_COLOR_A_MASK 0xff000000
+#define NV34TCL_VTX_ATTR_4F_X(x) (0x00001c00+((x)*16))
+#define NV34TCL_VTX_ATTR_4F_X__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_4F_Y(x) (0x00001c04+((x)*16))
+#define NV34TCL_VTX_ATTR_4F_Y__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_4F_Z(x) (0x00001c08+((x)*16))
+#define NV34TCL_VTX_ATTR_4F_Z__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_4F_W(x) (0x00001c0c+((x)*16))
+#define NV34TCL_VTX_ATTR_4F_W__SIZE 0x00000010
+#define NV34TCL_FP_CONTROL 0x00001d60
+#define NV34TCL_FP_CONTROL_USES_KIL (1 << 7)
+#define NV34TCL_FP_CONTROL_USED_REGS_MINUS1_DIV2_SHIFT 0
+#define NV34TCL_FP_CONTROL_USED_REGS_MINUS1_DIV2_MASK 0x0000000f
+#define NV34TCL_DEPTH_UNK17D8 0x00001d78
+#define NV34TCL_DEPTH_UNK17D8_CLAMP_SHIFT 4
+#define NV34TCL_DEPTH_UNK17D8_CLAMP_MASK 0x000000f0
+#define NV34TCL_MULTISAMPLE_CONTROL 0x00001d7c
+#define NV34TCL_MULTISAMPLE_CONTROL_ENABLE (1 << 0)
+#define NV34TCL_MULTISAMPLE_CONTROL_SAMPLE_ALPHA_TO_COVERAGE (1 << 4)
+#define NV34TCL_MULTISAMPLE_CONTROL_SAMPLE_ALPHA_TO_ONE (1 << 8)
+#define NV34TCL_MULTISAMPLE_CONTROL_SAMPLE_COVERAGE_SHIFT 16
+#define NV34TCL_MULTISAMPLE_CONTROL_SAMPLE_COVERAGE_MASK 0xffff0000
+#define NV34TCL_CLEAR_DEPTH_VALUE 0x00001d8c
+#define NV34TCL_CLEAR_COLOR_VALUE 0x00001d90
+#define NV34TCL_CLEAR_COLOR_VALUE_B_SHIFT 0
+#define NV34TCL_CLEAR_COLOR_VALUE_B_MASK 0x000000ff
+#define NV34TCL_CLEAR_COLOR_VALUE_G_SHIFT 8
+#define NV34TCL_CLEAR_COLOR_VALUE_G_MASK 0x0000ff00
+#define NV34TCL_CLEAR_COLOR_VALUE_R_SHIFT 16
+#define NV34TCL_CLEAR_COLOR_VALUE_R_MASK 0x00ff0000
+#define NV34TCL_CLEAR_COLOR_VALUE_A_SHIFT 24
+#define NV34TCL_CLEAR_COLOR_VALUE_A_MASK 0xff000000
+#define NV34TCL_CLEAR_BUFFERS 0x00001d94
+#define NV34TCL_CLEAR_BUFFERS_COLOR_A (1 << 7)
+#define NV34TCL_CLEAR_BUFFERS_COLOR_B (1 << 6)
+#define NV34TCL_CLEAR_BUFFERS_COLOR_G (1 << 5)
+#define NV34TCL_CLEAR_BUFFERS_COLOR_R (1 << 4)
+#define NV34TCL_CLEAR_BUFFERS_STENCIL (1 << 1)
+#define NV34TCL_CLEAR_BUFFERS_DEPTH (1 << 0)
+#define NV34TCL_DO_VERTICES 0x00001dac
+#define NV34TCL_LINE_STIPPLE_ENABLE 0x00001db4
+#define NV34TCL_LINE_STIPPLE_PATTERN 0x00001db8
+#define NV34TCL_LINE_STIPPLE_PATTERN_FACTOR_SHIFT 0
+#define NV34TCL_LINE_STIPPLE_PATTERN_FACTOR_MASK 0x0000ffff
+#define NV34TCL_LINE_STIPPLE_PATTERN_PATTERN_SHIFT 16
+#define NV34TCL_LINE_STIPPLE_PATTERN_PATTERN_MASK 0xffff0000
+#define NV34TCL_BACK_MATERIAL_SHININESS(x) (0x00001e20+((x)*4))
+#define NV34TCL_BACK_MATERIAL_SHININESS__SIZE 0x00000006
+#define NV34TCL_VTX_ATTR_1F(x) (0x00001e40+((x)*4))
+#define NV34TCL_VTX_ATTR_1F__SIZE 0x00000010
+#define NV34TCL_ENGINE 0x00001e94
+#define NV34TCL_ENGINE_FP (1 << 0)
+#define NV34TCL_ENGINE_VP (1 << 1)
+#define NV34TCL_ENGINE_FIXED (1 << 2)
+#define NV34TCL_VP_UPLOAD_FROM_ID 0x00001e9c
+#define NV34TCL_VP_START_FROM_ID 0x00001ea0
+#define NV34TCL_POINT_PARAMETERS(x) (0x00001ec0+((x)*4))
+#define NV34TCL_POINT_PARAMETERS__SIZE 0x00000008
+#define NV34TCL_POINT_SIZE 0x00001ee0
+#define NV34TCL_POINT_PARAMETERS_ENABLE 0x00001ee4
+#define NV34TCL_POINT_SPRITE 0x00001ee8
+#define NV34TCL_POINT_SPRITE_ENABLE (1 << 0)
+#define NV34TCL_POINT_SPRITE_R_MODE_SHIFT 1
+#define NV34TCL_POINT_SPRITE_R_MODE_MASK 0x00000006
+#define NV34TCL_POINT_SPRITE_R_MODE_ZERO 0x00000000
+#define NV34TCL_POINT_SPRITE_R_MODE_R 0x00000002
+#define NV34TCL_POINT_SPRITE_R_MODE_S 0x00000004
+#define NV34TCL_POINT_SPRITE_COORD_REPLACE (1 << 11)
+#define NV34TCL_VP_UPLOAD_CONST_ID 0x00001efc
+#define NV34TCL_VP_UPLOAD_CONST_X(x) (0x00001f00+((x)*16))
+#define NV34TCL_VP_UPLOAD_CONST_X__SIZE 0x00000004
+#define NV34TCL_VP_UPLOAD_CONST_Y(x) (0x00001f04+((x)*16))
+#define NV34TCL_VP_UPLOAD_CONST_Y__SIZE 0x00000004
+#define NV34TCL_VP_UPLOAD_CONST_Z(x) (0x00001f08+((x)*16))
+#define NV34TCL_VP_UPLOAD_CONST_Z__SIZE 0x00000004
+#define NV34TCL_VP_UPLOAD_CONST_W(x) (0x00001f0c+((x)*16))
+#define NV34TCL_VP_UPLOAD_CONST_W__SIZE 0x00000004
+#define NV34TCL_UNK1f80(x) (0x00001f80+((x)*4))
+#define NV34TCL_UNK1f80__SIZE 0x00000010
+
+
+#define NV40_CONTEXT_SURFACES_2D 0x00003062
+
+
+
+#define NV40_STRETCHED_IMAGE_FROM_CPU 0x00003066
+
+
+
+#define NV40_TEXTURE_FROM_CPU 0x0000307b
+
+
+
+#define NV40_SCALED_IMAGE_FROM_MEMORY 0x00003089
+
+
+
+#define NV40_IMAGE_FROM_CPU 0x0000308a
+
+
+
+#define NV40_SWIZZLED_SURFACE 0x0000309e
+
+
+
+#define NV40TCL 0x00004097
+
+#define NV40TCL_REF_CNT 0x00000050
+#define NV40TCL_NOP 0x00000100
+#define NV40TCL_NOTIFY 0x00000104
+#define NV40TCL_DMA_NOTIFY 0x00000180
+#define NV40TCL_DMA_TEXTURE0 0x00000184
+#define NV40TCL_DMA_TEXTURE1 0x00000188
+#define NV40TCL_DMA_COLOR1 0x0000018c
+#define NV40TCL_DMA_COLOR0 0x00000194
+#define NV40TCL_DMA_ZETA 0x00000198
+#define NV40TCL_DMA_VTXBUF0 0x0000019c
+#define NV40TCL_DMA_VTXBUF1 0x000001a0
+#define NV40TCL_DMA_FENCE 0x000001a4
+#define NV40TCL_DMA_QUERY 0x000001a8
+#define NV40TCL_DMA_UNK01AC 0x000001ac
+#define NV40TCL_DMA_UNK01B0 0x000001b0
+#define NV40TCL_DMA_COLOR2 0x000001b4
+#define NV40TCL_DMA_COLOR3 0x000001b8
+#define NV40TCL_RT_HORIZ 0x00000200
+#define NV40TCL_RT_HORIZ_W_SHIFT 16
+#define NV40TCL_RT_HORIZ_W_MASK 0xffff0000
+#define NV40TCL_RT_HORIZ_X_SHIFT 0
+#define NV40TCL_RT_HORIZ_X_MASK 0x0000ffff
+#define NV40TCL_RT_VERT 0x00000204
+#define NV40TCL_RT_VERT_H_SHIFT 16
+#define NV40TCL_RT_VERT_H_MASK 0xffff0000
+#define NV40TCL_RT_VERT_Y_SHIFT 0
+#define NV40TCL_RT_VERT_Y_MASK 0x0000ffff
+#define NV40TCL_RT_FORMAT 0x00000208
+#define NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT 24
+#define NV40TCL_RT_FORMAT_LOG2_HEIGHT_MASK 0xff000000
+#define NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT 16
+#define NV40TCL_RT_FORMAT_LOG2_WIDTH_MASK 0x00ff0000
+#define NV40TCL_RT_FORMAT_TYPE_SHIFT 8
+#define NV40TCL_RT_FORMAT_TYPE_MASK 0x00000f00
+#define NV40TCL_RT_FORMAT_TYPE_LINEAR 0x00000100
+#define NV40TCL_RT_FORMAT_TYPE_SWIZZLED 0x00000200
+#define NV40TCL_RT_FORMAT_ZETA_SHIFT 5
+#define NV40TCL_RT_FORMAT_ZETA_MASK 0x000000e0
+#define NV40TCL_RT_FORMAT_ZETA_Z16 0x00000020
+#define NV40TCL_RT_FORMAT_ZETA_Z24S8 0x00000040
+#define NV40TCL_RT_FORMAT_COLOR_SHIFT 0
+#define NV40TCL_RT_FORMAT_COLOR_MASK 0x0000001f
+#define NV40TCL_RT_FORMAT_COLOR_R5G6B5 0x00000003
+#define NV40TCL_RT_FORMAT_COLOR_X8R8G8B8 0x00000005
+#define NV40TCL_RT_FORMAT_COLOR_A8R8G8B8 0x00000008
+#define NV40TCL_RT_FORMAT_COLOR_B8 0x00000009
+#define NV40TCL_RT_FORMAT_COLOR_UNKNOWN 0x0000000d
+#define NV40TCL_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f
+#define NV40TCL_RT_FORMAT_COLOR_A8B8G8R8 0x00000010
+#define NV40TCL_COLOR0_PITCH 0x0000020c
+#define NV40TCL_COLOR0_OFFSET 0x00000210
+#define NV40TCL_ZETA_OFFSET 0x00000214
+#define NV40TCL_COLOR1_OFFSET 0x00000218
+#define NV40TCL_COLOR1_PITCH 0x0000021c
+#define NV40TCL_RT_ENABLE 0x00000220
+#define NV40TCL_RT_ENABLE_MRT (1 << 4)
+#define NV40TCL_RT_ENABLE_COLOR3 (1 << 3)
+#define NV40TCL_RT_ENABLE_COLOR2 (1 << 2)
+#define NV40TCL_RT_ENABLE_COLOR1 (1 << 1)
+#define NV40TCL_RT_ENABLE_COLOR0 (1 << 0)
+#define NV40TCL_ZETA_PITCH 0x0000022c
+#define NV40TCL_COLOR2_PITCH 0x00000280
+#define NV40TCL_COLOR3_PITCH 0x00000284
+#define NV40TCL_COLOR2_OFFSET 0x00000288
+#define NV40TCL_COLOR3_OFFSET 0x0000028c
+#define NV40TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*8))
+#define NV40TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008
+#define NV40TCL_VIEWPORT_CLIP_VERT(x) (0x000002c4+((x)*8))
+#define NV40TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008
+#define NV40TCL_DITHER_ENABLE 0x00000300
+#define NV40TCL_ALPHA_TEST_ENABLE 0x00000304
+#define NV40TCL_ALPHA_TEST_FUNC 0x00000308
+#define NV40TCL_ALPHA_TEST_FUNC_NEVER 0x00000200
+#define NV40TCL_ALPHA_TEST_FUNC_LESS 0x00000201
+#define NV40TCL_ALPHA_TEST_FUNC_EQUAL 0x00000202
+#define NV40TCL_ALPHA_TEST_FUNC_LEQUAL 0x00000203
+#define NV40TCL_ALPHA_TEST_FUNC_GREATER 0x00000204
+#define NV40TCL_ALPHA_TEST_FUNC_GREATER 0x00000204
+#define NV40TCL_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205
+#define NV40TCL_ALPHA_TEST_FUNC_GEQUAL 0x00000206
+#define NV40TCL_ALPHA_TEST_FUNC_ALWAYS 0x00000207
+#define NV40TCL_ALPHA_TEST_REF 0x0000030c
+#define NV40TCL_BLEND_ENABLE 0x00000310
+#define NV40TCL_BLEND_FUNC_SRC 0x00000314
+#define NV40TCL_BLEND_FUNC_SRC_RGB_SHIFT 0
+#define NV40TCL_BLEND_FUNC_SRC_RGB_MASK 0x0000ffff
+#define NV40TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000
+#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001
+#define NV40TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300
+#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV40TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302
+#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV40TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304
+#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV40TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306
+#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307
+#define NV40TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308
+#define NV40TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001
+#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV40TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003
+#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SHIFT 16
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_MASK 0xffff0000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00010000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x03000000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x03020000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x03040000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x03060000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x03070000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x03080000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x80010000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x80030000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000
+#define NV40TCL_BLEND_FUNC_DST 0x00000318
+#define NV40TCL_BLEND_FUNC_DST_RGB_SHIFT 0
+#define NV40TCL_BLEND_FUNC_DST_RGB_MASK 0x0000ffff
+#define NV40TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000
+#define NV40TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001
+#define NV40TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300
+#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV40TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302
+#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV40TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304
+#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV40TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306
+#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307
+#define NV40TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308
+#define NV40TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001
+#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV40TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003
+#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_SHIFT 16
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_MASK 0xffff0000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00010000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x03000000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x03020000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x03040000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x03060000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x03070000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x03080000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x80010000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x80030000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000
+#define NV40TCL_BLEND_COLOR 0x0000031c
+#define NV40TCL_BLEND_EQUATION 0x00000320
+#define NV40TCL_BLEND_EQUATION_RGB_SHIFT 0
+#define NV40TCL_BLEND_EQUATION_RGB_MASK 0x0000ffff
+#define NV40TCL_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NV40TCL_BLEND_EQUATION_RGB_MIN 0x00008007
+#define NV40TCL_BLEND_EQUATION_RGB_MAX 0x00008008
+#define NV40TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NV40TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+#define NV40TCL_BLEND_EQUATION_ALPHA_SHIFT 16
+#define NV40TCL_BLEND_EQUATION_ALPHA_MASK 0xffff0000
+#define NV40TCL_BLEND_EQUATION_ALPHA_FUNC_ADD 0x80060000
+#define NV40TCL_BLEND_EQUATION_ALPHA_MIN 0x80070000
+#define NV40TCL_BLEND_EQUATION_ALPHA_MAX 0x80080000
+#define NV40TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x800a0000
+#define NV40TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x800b0000
+#define NV40TCL_COLOR_MASK 0x00000324
+#define NV40TCL_COLOR_MASK_BUFFER0_B_SHIFT 0
+#define NV40TCL_COLOR_MASK_BUFFER0_B_MASK 0x000000ff
+#define NV40TCL_COLOR_MASK_BUFFER0_G_SHIFT 8
+#define NV40TCL_COLOR_MASK_BUFFER0_G_MASK 0x0000ff00
+#define NV40TCL_COLOR_MASK_BUFFER0_R_SHIFT 16
+#define NV40TCL_COLOR_MASK_BUFFER0_R_MASK 0x00ff0000
+#define NV40TCL_COLOR_MASK_BUFFER0_A_SHIFT 24
+#define NV40TCL_COLOR_MASK_BUFFER0_A_MASK 0xff000000
+#define NV40TCL_STENCIL_FRONT_ENABLE 0x00000328
+#define NV40TCL_STENCIL_FRONT_MASK 0x0000032c
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC 0x00000330
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207
+#define NV40TCL_STENCIL_FRONT_FUNC_REF 0x00000334
+#define NV40TCL_STENCIL_FRONT_FUNC_MASK 0x00000338
+#define NV40TCL_STENCIL_FRONT_OP_FAIL 0x0000033c
+#define NV40TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
+#define NV40TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a
+#define NV40TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00
+#define NV40TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01
+#define NV40TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02
+#define NV40TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03
+#define NV40TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507
+#define NV40TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL 0x00000340
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS 0x00000344
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508
+#define NV40TCL_STENCIL_BACK_ENABLE 0x00000348
+#define NV40TCL_STENCIL_BACK_MASK 0x0000034c
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC 0x00000350
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207
+#define NV40TCL_STENCIL_BACK_FUNC_REF 0x00000354
+#define NV40TCL_STENCIL_BACK_FUNC_MASK 0x00000358
+#define NV40TCL_STENCIL_BACK_OP_FAIL 0x0000035c
+#define NV40TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000
+#define NV40TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a
+#define NV40TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00
+#define NV40TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01
+#define NV40TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02
+#define NV40TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03
+#define NV40TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507
+#define NV40TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL 0x00000360
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NV40TCL_STENCIL_BACK_OP_ZPASS 0x00000364
+#define NV40TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000
+#define NV40TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a
+#define NV40TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00
+#define NV40TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01
+#define NV40TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02
+#define NV40TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03
+#define NV40TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV40TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508
+#define NV40TCL_SHADE_MODEL 0x00000368
+#define NV40TCL_SHADE_MODEL_FLAT 0x00001d00
+#define NV40TCL_SHADE_MODEL_SMOOTH 0x00001d01
+#define NV40TCL_MRT_COLOR_MASK 0x00000370
+#define NV40TCL_MRT_COLOR_MASK_BUFFER1_A (1 << 4)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER1_R (1 << 5)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER1_G (1 << 6)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER1_B (1 << 7)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER2_A (1 << 8)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER2_R (1 << 9)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER2_G (1 << 10)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER2_B (1 << 11)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER3_A (1 << 12)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER3_R (1 << 13)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER3_G (1 << 14)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER3_B (1 << 15)
+#define NV40TCL_COLOR_LOGIC_OP_ENABLE 0x00000374
+#define NV40TCL_COLOR_LOGIC_OP 0x00000378
+#define NV40TCL_COLOR_LOGIC_OP_CLEAR 0x00001500
+#define NV40TCL_COLOR_LOGIC_OP_AND 0x00001501
+#define NV40TCL_COLOR_LOGIC_OP_AND_REVERSE 0x00001502
+#define NV40TCL_COLOR_LOGIC_OP_COPY 0x00001503
+#define NV40TCL_COLOR_LOGIC_OP_AND_INVERTED 0x00001504
+#define NV40TCL_COLOR_LOGIC_OP_NOOP 0x00001505
+#define NV40TCL_COLOR_LOGIC_OP_XOR 0x00001506
+#define NV40TCL_COLOR_LOGIC_OP_OR 0x00001507
+#define NV40TCL_COLOR_LOGIC_OP_NOR 0x00001508
+#define NV40TCL_COLOR_LOGIC_OP_EQUIV 0x00001509
+#define NV40TCL_COLOR_LOGIC_OP_INVERT 0x0000150a
+#define NV40TCL_COLOR_LOGIC_OP_OR_REVERSE 0x0000150b
+#define NV40TCL_COLOR_LOGIC_OP_COPY_INVERTED 0x0000150c
+#define NV40TCL_COLOR_LOGIC_OP_OR_INVERTED 0x0000150d
+#define NV40TCL_COLOR_LOGIC_OP_NAND 0x0000150e
+#define NV40TCL_COLOR_LOGIC_OP_SET 0x0000150f
+#define NV40TCL_DEPTH_RANGE_NEAR 0x00000394
+#define NV40TCL_DEPTH_RANGE_FAR 0x00000398
+#define NV40TCL_LINE_WIDTH 0x000003b8
+#define NV40TCL_LINE_SMOOTH_ENABLE 0x000003bc
+#define NV40TCL_UNK03C0(x) (0x000003c0+((x)*4))
+#define NV40TCL_UNK03C0__SIZE 0x00000010
+#define NV40TCL_UNK0400(x) (0x00000400+((x)*4))
+#define NV40TCL_UNK0400__SIZE 0x00000010
+#define NV40TCL_UNK0440(x) (0x00000440+((x)*4))
+#define NV40TCL_UNK0440__SIZE 0x00000020
+#define NV40TCL_SCISSOR_HORIZ 0x000008c0
+#define NV40TCL_SCISSOR_HORIZ_X_SHIFT 0
+#define NV40TCL_SCISSOR_HORIZ_X_MASK 0x0000ffff
+#define NV40TCL_SCISSOR_HORIZ_W_SHIFT 16
+#define NV40TCL_SCISSOR_HORIZ_W_MASK 0xffff0000
+#define NV40TCL_SCISSOR_VERT 0x000008c4
+#define NV40TCL_SCISSOR_VERT_Y_SHIFT 0
+#define NV40TCL_SCISSOR_VERT_Y_MASK 0x0000ffff
+#define NV40TCL_SCISSOR_VERT_H_SHIFT 16
+#define NV40TCL_SCISSOR_VERT_H_MASK 0xffff0000
+#define NV40TCL_FOG_MODE 0x000008cc
+#define NV40TCL_FOG_EQUATION_CONSTANT 0x000008d0
+#define NV40TCL_FOG_EQUATION_LINEAR 0x000008d4
+#define NV40TCL_FOG_EQUATION_QUADRATIC 0x000008d8
+#define NV40TCL_FP_ADDRESS 0x000008e4
+#define NV40TCL_FP_ADDRESS_OFFSET_SHIFT 8
+#define NV40TCL_FP_ADDRESS_OFFSET_MASK 0xffffff00
+#define NV40TCL_FP_ADDRESS_DMA1 (1 << 1)
+#define NV40TCL_FP_ADDRESS_DMA0 (1 << 0)
+#define NV40TCL_VIEWPORT_HORIZ 0x00000a00
+#define NV40TCL_VIEWPORT_HORIZ_W_SHIFT 16
+#define NV40TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000
+#define NV40TCL_VIEWPORT_HORIZ_X_SHIFT 0
+#define NV40TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff
+#define NV40TCL_VIEWPORT_VERT 0x00000a04
+#define NV40TCL_VIEWPORT_VERT_H_SHIFT 16
+#define NV40TCL_VIEWPORT_VERT_H_MASK 0xffff0000
+#define NV40TCL_VIEWPORT_VERT_Y_SHIFT 0
+#define NV40TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff
+#define NV40TCL_VIEWPORT_TRANSLATE_X 0x00000a20
+#define NV40TCL_VIEWPORT_TRANSLATE_Y 0x00000a24
+#define NV40TCL_VIEWPORT_TRANSLATE_Z 0x00000a28
+#define NV40TCL_VIEWPORT_TRANSLATE_W 0x00000a2c
+#define NV40TCL_VIEWPORT_SCALE_X 0x00000a30
+#define NV40TCL_VIEWPORT_SCALE_Y 0x00000a34
+#define NV40TCL_VIEWPORT_SCALE_Z 0x00000a38
+#define NV40TCL_VIEWPORT_SCALE_W 0x00000a3c
+#define NV40TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000a60
+#define NV40TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000a64
+#define NV40TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000a68
+#define NV40TCL_DEPTH_FUNC 0x00000a6c
+#define NV40TCL_DEPTH_FUNC_NEVER 0x00000200
+#define NV40TCL_DEPTH_FUNC_LESS 0x00000201
+#define NV40TCL_DEPTH_FUNC_EQUAL 0x00000202
+#define NV40TCL_DEPTH_FUNC_LEQUAL 0x00000203
+#define NV40TCL_DEPTH_FUNC_GREATER 0x00000204
+#define NV40TCL_DEPTH_FUNC_GREATER 0x00000204
+#define NV40TCL_DEPTH_FUNC_NOTEQUAL 0x00000205
+#define NV40TCL_DEPTH_FUNC_GEQUAL 0x00000206
+#define NV40TCL_DEPTH_FUNC_ALWAYS 0x00000207
+#define NV40TCL_DEPTH_WRITE_ENABLE 0x00000a70
+#define NV40TCL_DEPTH_TEST_ENABLE 0x00000a74
+#define NV40TCL_POLYGON_OFFSET_FACTOR 0x00000a78
+#define NV40TCL_POLYGON_OFFSET_UNITS 0x00000a7c
+#define NV40TCL_VTX_ATTR_3I_XY(x) (0x00000a80+((x)*8))
+#define NV40TCL_VTX_ATTR_3I_XY__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_3I_XY_X_SHIFT 0
+#define NV40TCL_VTX_ATTR_3I_XY_X_MASK 0x0000ffff
+#define NV40TCL_VTX_ATTR_3I_XY_Y_SHIFT 16
+#define NV40TCL_VTX_ATTR_3I_XY_Y_MASK 0xffff0000
+#define NV40TCL_VTX_ATTR_3I_Z(x) (0x00000a84+((x)*8))
+#define NV40TCL_VTX_ATTR_3I_Z__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_3I_Z_Z_SHIFT 0
+#define NV40TCL_VTX_ATTR_3I_Z_Z_MASK 0x0000ffff
+#define NV40TCL_UNK0B40(x) (0x00000b40+((x)*4))
+#define NV40TCL_UNK0B40__SIZE 0x00000008
+#define NV40TCL_VP_UPLOAD_INST(x) (0x00000b80+((x)*4))
+#define NV40TCL_VP_UPLOAD_INST__SIZE 0x00000004
+#define NV40TCL_CLIP_PLANE_ENABLE 0x00001478
+#define NV40TCL_CLIP_PLANE_ENABLE_PLANE0 (1 << 1)
+#define NV40TCL_CLIP_PLANE_ENABLE_PLANE1 (1 << 5)
+#define NV40TCL_CLIP_PLANE_ENABLE_PLANE2 (1 << 9)
+#define NV40TCL_CLIP_PLANE_ENABLE_PLANE3 (1 << 13)
+#define NV40TCL_CLIP_PLANE_ENABLE_PLANE4 (1 << 17)
+#define NV40TCL_CLIP_PLANE_ENABLE_PLANE5 (1 << 21)
+#define NV40TCL_POLYGON_STIPPLE_ENABLE 0x0000147c
+#define NV40TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4))
+#define NV40TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020
+#define NV40TCL_VTX_ATTR_3F_X(x) (0x00001500+((x)*16))
+#define NV40TCL_VTX_ATTR_3F_X__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_3F_Y(x) (0x00001504+((x)*16))
+#define NV40TCL_VTX_ATTR_3F_Y__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_3F_Z(x) (0x00001508+((x)*16))
+#define NV40TCL_VTX_ATTR_3F_Z__SIZE 0x00000010
+#define NV40TCL_VTXBUF_ADDRESS(x) (0x00001680+((x)*4))
+#define NV40TCL_VTXBUF_ADDRESS__SIZE 0x00000010
+#define NV40TCL_VTXBUF_ADDRESS_DMA1 (1 << 31)
+#define NV40TCL_VTXBUF_ADDRESS_OFFSET_SHIFT 0
+#define NV40TCL_VTXBUF_ADDRESS_OFFSET_MASK 0x0fffffff
+#define NV40TCL_VTX_CACHE_INVALIDATE 0x00001714
+#define NV40TCL_VTXFMT(x) (0x00001740+((x)*4))
+#define NV40TCL_VTXFMT__SIZE 0x00000010
+#define NV40TCL_VTXFMT_TYPE_SHIFT 0
+#define NV40TCL_VTXFMT_TYPE_MASK 0x0000000f
+#define NV40TCL_VTXFMT_TYPE_FLOAT 0x00000002
+#define NV40TCL_VTXFMT_TYPE_UBYTE 0x00000004
+#define NV40TCL_VTXFMT_TYPE_USHORT 0x00000005
+#define NV40TCL_VTXFMT_SIZE_SHIFT 4
+#define NV40TCL_VTXFMT_SIZE_MASK 0x000000f0
+#define NV40TCL_VTXFMT_STRIDE_SHIFT 8
+#define NV40TCL_VTXFMT_STRIDE_MASK 0x0000ff00
+#define NV40TCL_QUERY_RESET 0x000017c8
+#define NV40TCL_QUERY_UNK17CC 0x000017cc
+#define NV40TCL_QUERY_GET 0x00001800
+#define NV40TCL_QUERY_GET_UNK24_SHIFT 24
+#define NV40TCL_QUERY_GET_UNK24_MASK 0xff000000
+#define NV40TCL_QUERY_GET_OFFSET_SHIFT 0
+#define NV40TCL_QUERY_GET_OFFSET_MASK 0x00ffffff
+#define NV40TCL_BEGIN_END 0x00001808
+#define NV40TCL_BEGIN_END_STOP 0x00000000
+#define NV40TCL_BEGIN_END_POINTS 0x00000001
+#define NV40TCL_BEGIN_END_LINES 0x00000002
+#define NV40TCL_BEGIN_END_LINE_LOOP 0x00000003
+#define NV40TCL_BEGIN_END_LINE_STRIP 0x00000004
+#define NV40TCL_BEGIN_END_TRIANGLES 0x00000005
+#define NV40TCL_BEGIN_END_TRIANGLE_STRIP 0x00000006
+#define NV40TCL_BEGIN_END_TRIANGLE_FAN 0x00000007
+#define NV40TCL_BEGIN_END_QUADS 0x00000008
+#define NV40TCL_BEGIN_END_QUAD_STRIP 0x00000009
+#define NV40TCL_BEGIN_END_POLYGON 0x0000000a
+#define NV40TCL_VB_ELEMENT_U16 0x0000180c
+#define NV40TCL_VB_ELEMENT_U16_1_SHIFT 16
+#define NV40TCL_VB_ELEMENT_U16_1_MASK 0xffff0000
+#define NV40TCL_VB_ELEMENT_U16_0_SHIFT 0
+#define NV40TCL_VB_ELEMENT_U16_0_MASK 0x0000ffff
+#define NV40TCL_VB_ELEMENT_U32 0x00001810
+#define NV40TCL_VB_VERTEX_BATCH 0x00001814
+#define NV40TCL_VB_VERTEX_BATCH_COUNT_SHIFT 24
+#define NV40TCL_VB_VERTEX_BATCH_COUNT_MASK 0xff000000
+#define NV40TCL_VB_VERTEX_BATCH_START_SHIFT 0
+#define NV40TCL_VB_VERTEX_BATCH_START_MASK 0x00ffffff
+#define NV40TCL_VERTEX_DATA 0x00001818
+#define NV40TCL_IDXBUF_ADDRESS 0x0000181c
+#define NV40TCL_IDXBUF_FORMAT 0x00001820
+#define NV40TCL_IDXBUF_FORMAT_TYPE_SHIFT 4
+#define NV40TCL_IDXBUF_FORMAT_TYPE_MASK 0x000000f0
+#define NV40TCL_IDXBUF_FORMAT_TYPE_U32 0x00000000
+#define NV40TCL_IDXBUF_FORMAT_TYPE_U16 0x00000010
+#define NV40TCL_IDXBUF_FORMAT_DMA1 (1 << 0)
+#define NV40TCL_VB_INDEX_BATCH 0x00001824
+#define NV40TCL_VB_INDEX_BATCH_COUNT_SHIFT 24
+#define NV40TCL_VB_INDEX_BATCH_COUNT_MASK 0xff000000
+#define NV40TCL_VB_INDEX_BATCH_START_SHIFT 0
+#define NV40TCL_VB_INDEX_BATCH_START_MASK 0x00ffffff
+#define NV40TCL_POLYGON_MODE_FRONT 0x00001828
+#define NV40TCL_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NV40TCL_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NV40TCL_POLYGON_MODE_FRONT_FILL 0x00001b02
+#define NV40TCL_POLYGON_MODE_BACK 0x0000182c
+#define NV40TCL_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NV40TCL_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NV40TCL_POLYGON_MODE_BACK_FILL 0x00001b02
+#define NV40TCL_CULL_FACE 0x00001830
+#define NV40TCL_CULL_FACE_FRONT 0x00000404
+#define NV40TCL_CULL_FACE_BACK 0x00000405
+#define NV40TCL_CULL_FACE_FRONT_AND_BACK 0x00000408
+#define NV40TCL_FRONT_FACE 0x00001834
+#define NV40TCL_FRONT_FACE_CW 0x00000900
+#define NV40TCL_FRONT_FACE_CCW 0x00000901
+#define NV40TCL_POLYGON_SMOOTH_ENABLE 0x00001838
+#define NV40TCL_CULL_FACE_ENABLE 0x0000183c
+#define NV40TCL_TEX_SIZE1(x) (0x00001840+((x)*4))
+#define NV40TCL_TEX_SIZE1__SIZE 0x00000008
+#define NV40TCL_TEX_SIZE1_DEPTH_SHIFT 20
+#define NV40TCL_TEX_SIZE1_DEPTH_MASK 0xfff00000
+#define NV40TCL_TEX_SIZE1_PITCH_SHIFT 0
+#define NV40TCL_TEX_SIZE1_PITCH_MASK 0x0000ffff
+#define NV40TCL_VTX_ATTR_2F_X(x) (0x00001880+((x)*8))
+#define NV40TCL_VTX_ATTR_2F_X__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_2F_Y(x) (0x00001884+((x)*8))
+#define NV40TCL_VTX_ATTR_2F_Y__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_2I(x) (0x00001900+((x)*4))
+#define NV40TCL_VTX_ATTR_2I__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_2I_X_SHIFT 0
+#define NV40TCL_VTX_ATTR_2I_X_MASK 0x0000ffff
+#define NV40TCL_VTX_ATTR_2I_Y_SHIFT 16
+#define NV40TCL_VTX_ATTR_2I_Y_MASK 0xffff0000
+#define NV40TCL_VTX_ATTR_4UB(x) (0x00001940+((x)*4))
+#define NV40TCL_VTX_ATTR_4UB__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_4UB_X_SHIFT 0
+#define NV40TCL_VTX_ATTR_4UB_X_MASK 0x000000ff
+#define NV40TCL_VTX_ATTR_4UB_Y_SHIFT 8
+#define NV40TCL_VTX_ATTR_4UB_Y_MASK 0x0000ff00
+#define NV40TCL_VTX_ATTR_4UB_Z_SHIFT 16
+#define NV40TCL_VTX_ATTR_4UB_Z_MASK 0x00ff0000
+#define NV40TCL_VTX_ATTR_4UB_W_SHIFT 24
+#define NV40TCL_VTX_ATTR_4UB_W_MASK 0xff000000
+#define NV40TCL_VTX_ATTR_4I_XY(x) (0x00001980+((x)*8))
+#define NV40TCL_VTX_ATTR_4I_XY__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_4I_XY_X_SHIFT 0
+#define NV40TCL_VTX_ATTR_4I_XY_X_MASK 0x0000ffff
+#define NV40TCL_VTX_ATTR_4I_XY_Y_SHIFT 16
+#define NV40TCL_VTX_ATTR_4I_XY_Y_MASK 0xffff0000
+#define NV40TCL_VTX_ATTR_4I_ZW(x) (0x00001984+((x)*8))
+#define NV40TCL_VTX_ATTR_4I_ZW__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_4I_ZW_Z_SHIFT 0
+#define NV40TCL_VTX_ATTR_4I_ZW_Z_MASK 0x0000ffff
+#define NV40TCL_VTX_ATTR_4I_ZW_W_SHIFT 16
+#define NV40TCL_VTX_ATTR_4I_ZW_W_MASK 0xffff0000
+#define NV40TCL_TEX_OFFSET(x) (0x00001a00+((x)*32))
+#define NV40TCL_TEX_OFFSET__SIZE 0x00000010
+#define NV40TCL_TEX_FORMAT(x) (0x00001a04+((x)*32))
+#define NV40TCL_TEX_FORMAT__SIZE 0x00000010
+#define NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT 16
+#define NV40TCL_TEX_FORMAT_MIPMAP_COUNT_MASK 0x000f0000
+#define NV40TCL_TEX_FORMAT_RECT (1 << 14)
+#define NV40TCL_TEX_FORMAT_LINEAR (1 << 13)
+#define NV40TCL_TEX_FORMAT_FORMAT_SHIFT 8
+#define NV40TCL_TEX_FORMAT_FORMAT_MASK 0x00001f00
+#define NV40TCL_TEX_FORMAT_FORMAT_L8 0x00000100
+#define NV40TCL_TEX_FORMAT_FORMAT_A1R5G5B5 0x00000200
+#define NV40TCL_TEX_FORMAT_FORMAT_A4R4G4B4 0x00000300
+#define NV40TCL_TEX_FORMAT_FORMAT_R5G6B5 0x00000400
+#define NV40TCL_TEX_FORMAT_FORMAT_A8R8G8B8 0x00000500
+#define NV40TCL_TEX_FORMAT_FORMAT_DXT1 0x00000600
+#define NV40TCL_TEX_FORMAT_FORMAT_DXT3 0x00000700
+#define NV40TCL_TEX_FORMAT_FORMAT_DXT5 0x00000800
+#define NV40TCL_TEX_FORMAT_FORMAT_A8L8 0x00000b00
+#define NV40TCL_TEX_FORMAT_FORMAT_Z24 0x00001000
+#define NV40TCL_TEX_FORMAT_FORMAT_Z16 0x00001200
+#define NV40TCL_TEX_FORMAT_FORMAT_A16 0x00001400
+#define NV40TCL_TEX_FORMAT_FORMAT_A16L16 0x00001500
+#define NV40TCL_TEX_FORMAT_FORMAT_HILO8 0x00001800
+#define NV40TCL_TEX_FORMAT_FORMAT_RGBA16F 0x00001a00
+#define NV40TCL_TEX_FORMAT_FORMAT_RGBA32F 0x00001b00
+#define NV40TCL_TEX_FORMAT_DIMS_SHIFT 4
+#define NV40TCL_TEX_FORMAT_DIMS_MASK 0x000000f0
+#define NV40TCL_TEX_FORMAT_DIMS_1D 0x00000010
+#define NV40TCL_TEX_FORMAT_DIMS_2D 0x00000020
+#define NV40TCL_TEX_FORMAT_DIMS_3D 0x00000030
+#define NV40TCL_TEX_FORMAT_NO_BORDER (1 << 3)
+#define NV40TCL_TEX_FORMAT_CUBIC (1 << 2)
+#define NV40TCL_TEX_FORMAT_DMA1 (1 << 1)
+#define NV40TCL_TEX_FORMAT_DMA0 (1 << 0)
+#define NV40TCL_TEX_WRAP(x) (0x00001a08+((x)*32))
+#define NV40TCL_TEX_WRAP__SIZE 0x00000010
+#define NV40TCL_TEX_WRAP_S_SHIFT 0
+#define NV40TCL_TEX_WRAP_S_MASK 0x000000ff
+#define NV40TCL_TEX_WRAP_S_REPEAT 0x00000001
+#define NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT 0x00000002
+#define NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE 0x00000003
+#define NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER 0x00000004
+#define NV40TCL_TEX_WRAP_S_CLAMP 0x00000005
+#define NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE 0x00000006
+#define NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER 0x00000007
+#define NV40TCL_TEX_WRAP_S_MIRROR_CLAMP 0x00000008
+#define NV40TCL_TEX_WRAP_T_SHIFT 8
+#define NV40TCL_TEX_WRAP_T_MASK 0x00000f00
+#define NV40TCL_TEX_WRAP_T_REPEAT 0x00000100
+#define NV40TCL_TEX_WRAP_T_MIRRORED_REPEAT 0x00000200
+#define NV40TCL_TEX_WRAP_T_CLAMP_TO_EDGE 0x00000300
+#define NV40TCL_TEX_WRAP_T_CLAMP_TO_BORDER 0x00000400
+#define NV40TCL_TEX_WRAP_T_CLAMP 0x00000500
+#define NV40TCL_TEX_WRAP_T_MIRROR_CLAMP_TO_EDGE 0x00000600
+#define NV40TCL_TEX_WRAP_T_MIRROR_CLAMP_TO_BORDER 0x00000700
+#define NV40TCL_TEX_WRAP_T_MIRROR_CLAMP 0x00000800
+#define NV40TCL_TEX_WRAP_EXPAND_NORMAL_SHIFT 12
+#define NV40TCL_TEX_WRAP_EXPAND_NORMAL_MASK 0x0000f000
+#define NV40TCL_TEX_WRAP_R_SHIFT 16
+#define NV40TCL_TEX_WRAP_R_MASK 0x00ff0000
+#define NV40TCL_TEX_WRAP_R_REPEAT 0x00010000
+#define NV40TCL_TEX_WRAP_R_MIRRORED_REPEAT 0x00020000
+#define NV40TCL_TEX_WRAP_R_CLAMP_TO_EDGE 0x00030000
+#define NV40TCL_TEX_WRAP_R_CLAMP_TO_BORDER 0x00040000
+#define NV40TCL_TEX_WRAP_R_CLAMP 0x00050000
+#define NV40TCL_TEX_WRAP_R_MIRROR_CLAMP_TO_EDGE 0x00060000
+#define NV40TCL_TEX_WRAP_R_MIRROR_CLAMP_TO_BORDER 0x00070000
+#define NV40TCL_TEX_WRAP_R_MIRROR_CLAMP 0x00080000
+#define NV40TCL_TEX_WRAP_RCOMP_SHIFT 28
+#define NV40TCL_TEX_WRAP_RCOMP_MASK 0xf0000000
+#define NV40TCL_TEX_WRAP_RCOMP_NEVER 0x00000000
+#define NV40TCL_TEX_WRAP_RCOMP_GREATER 0x10000000
+#define NV40TCL_TEX_WRAP_RCOMP_EQUAL 0x20000000
+#define NV40TCL_TEX_WRAP_RCOMP_GEQUAL 0x30000000
+#define NV40TCL_TEX_WRAP_RCOMP_LESS 0x40000000
+#define NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL 0x50000000
+#define NV40TCL_TEX_WRAP_RCOMP_LEQUAL 0x60000000
+#define NV40TCL_TEX_WRAP_RCOMP_ALWAYS 0x70000000
+#define NV40TCL_TEX_ENABLE(x) (0x00001a0c+((x)*32))
+#define NV40TCL_TEX_ENABLE__SIZE 0x00000010
+#define NV40TCL_TEX_ENABLE_ENABLE (1 << 31)
+#define NV40TCL_TEX_ENABLE_MIPMAP_MIN_LOD_SHIFT 27
+#define NV40TCL_TEX_ENABLE_MIPMAP_MIN_LOD_MASK 0x38000000
+#define NV40TCL_TEX_ENABLE_MIPMAP_MAX_LOD_SHIFT 15
+#define NV40TCL_TEX_ENABLE_MIPMAP_MAX_LOD_MASK 0x00038000
+#define NV40TCL_TEX_ENABLE_ANISO_SHIFT 4
+#define NV40TCL_TEX_ENABLE_ANISO_MASK 0x000000f0
+#define NV40TCL_TEX_ENABLE_ANISO_NONE 0x00000000
+#define NV40TCL_TEX_ENABLE_ANISO_2X 0x00000010
+#define NV40TCL_TEX_ENABLE_ANISO_4X 0x00000020
+#define NV40TCL_TEX_ENABLE_ANISO_6X 0x00000030
+#define NV40TCL_TEX_ENABLE_ANISO_8X 0x00000040
+#define NV40TCL_TEX_ENABLE_ANISO_10X 0x00000050
+#define NV40TCL_TEX_ENABLE_ANISO_12X 0x00000060
+#define NV40TCL_TEX_ENABLE_ANISO_16X 0x00000070
+#define NV40TCL_TEX_SWIZZLE(x) (0x00001a10+((x)*32))
+#define NV40TCL_TEX_SWIZZLE__SIZE 0x00000010
+#define NV40TCL_TEX_SWIZZLE_S0_X_SHIFT 14
+#define NV40TCL_TEX_SWIZZLE_S0_X_MASK 0x0000c000
+#define NV40TCL_TEX_SWIZZLE_S0_X_ZERO 0x00000000
+#define NV40TCL_TEX_SWIZZLE_S0_X_ONE 0x00004000
+#define NV40TCL_TEX_SWIZZLE_S0_X_S1 0x00008000
+#define NV40TCL_TEX_SWIZZLE_S0_Y_SHIFT 12
+#define NV40TCL_TEX_SWIZZLE_S0_Y_MASK 0x00003000
+#define NV40TCL_TEX_SWIZZLE_S0_Y_ZERO 0x00000000
+#define NV40TCL_TEX_SWIZZLE_S0_Y_ONE 0x00001000
+#define NV40TCL_TEX_SWIZZLE_S0_Y_S1 0x00002000
+#define NV40TCL_TEX_SWIZZLE_S0_Z_SHIFT 10
+#define NV40TCL_TEX_SWIZZLE_S0_Z_MASK 0x00000c00
+#define NV40TCL_TEX_SWIZZLE_S0_Z_ZERO 0x00000000
+#define NV40TCL_TEX_SWIZZLE_S0_Z_ONE 0x00000400
+#define NV40TCL_TEX_SWIZZLE_S0_Z_S1 0x00000800
+#define NV40TCL_TEX_SWIZZLE_S0_W_SHIFT 8
+#define NV40TCL_TEX_SWIZZLE_S0_W_MASK 0x00000300
+#define NV40TCL_TEX_SWIZZLE_S0_W_ZERO 0x00000000
+#define NV40TCL_TEX_SWIZZLE_S0_W_ONE 0x00000100
+#define NV40TCL_TEX_SWIZZLE_S0_W_S1 0x00000200
+#define NV40TCL_TEX_SWIZZLE_S1_X_SHIFT 6
+#define NV40TCL_TEX_SWIZZLE_S1_X_MASK 0x000000c0
+#define NV40TCL_TEX_SWIZZLE_S1_X_W 0x00000000
+#define NV40TCL_TEX_SWIZZLE_S1_X_Z 0x00000040
+#define NV40TCL_TEX_SWIZZLE_S1_X_Y 0x00000080
+#define NV40TCL_TEX_SWIZZLE_S1_X_X 0x000000c0
+#define NV40TCL_TEX_SWIZZLE_S1_Y_SHIFT 4
+#define NV40TCL_TEX_SWIZZLE_S1_Y_MASK 0x00000030
+#define NV40TCL_TEX_SWIZZLE_S1_Y_W 0x00000000
+#define NV40TCL_TEX_SWIZZLE_S1_Y_Z 0x00000010
+#define NV40TCL_TEX_SWIZZLE_S1_Y_Y 0x00000020
+#define NV40TCL_TEX_SWIZZLE_S1_Y_X 0x00000030
+#define NV40TCL_TEX_SWIZZLE_S1_Z_SHIFT 2
+#define NV40TCL_TEX_SWIZZLE_S1_Z_MASK 0x0000000c
+#define NV40TCL_TEX_SWIZZLE_S1_Z_W 0x00000000
+#define NV40TCL_TEX_SWIZZLE_S1_Z_Z 0x00000004
+#define NV40TCL_TEX_SWIZZLE_S1_Z_Y 0x00000008
+#define NV40TCL_TEX_SWIZZLE_S1_Z_X 0x0000000c
+#define NV40TCL_TEX_SWIZZLE_S1_W_SHIFT 0
+#define NV40TCL_TEX_SWIZZLE_S1_W_MASK 0x00000003
+#define NV40TCL_TEX_SWIZZLE_S1_W_W 0x00000000
+#define NV40TCL_TEX_SWIZZLE_S1_W_Z 0x00000001
+#define NV40TCL_TEX_SWIZZLE_S1_W_Y 0x00000002
+#define NV40TCL_TEX_SWIZZLE_S1_W_X 0x00000003
+#define NV40TCL_TEX_FILTER(x) (0x00001a14+((x)*32))
+#define NV40TCL_TEX_FILTER__SIZE 0x00000010
+#define NV40TCL_TEX_FILTER_SIGNED_ALPHA (1 << 31)
+#define NV40TCL_TEX_FILTER_SIGNED_RED (1 << 30)
+#define NV40TCL_TEX_FILTER_SIGNED_GREEN (1 << 29)
+#define NV40TCL_TEX_FILTER_SIGNED_BLUE (1 << 28)
+#define NV40TCL_TEX_FILTER_MIN_SHIFT 16
+#define NV40TCL_TEX_FILTER_MIN_MASK 0x000f0000
+#define NV40TCL_TEX_FILTER_MIN_NEAREST 0x00010000
+#define NV40TCL_TEX_FILTER_MIN_LINEAR 0x00020000
+#define NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST 0x00030000
+#define NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST 0x00040000
+#define NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR 0x00050000
+#define NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR 0x00060000
+#define NV40TCL_TEX_FILTER_MAG_SHIFT 24
+#define NV40TCL_TEX_FILTER_MAG_MASK 0x0f000000
+#define NV40TCL_TEX_FILTER_MAG_NEAREST 0x01000000
+#define NV40TCL_TEX_FILTER_MAG_LINEAR 0x02000000
+#define NV40TCL_TEX_SIZE0(x) (0x00001a18+((x)*32))
+#define NV40TCL_TEX_SIZE0__SIZE 0x00000010
+#define NV40TCL_TEX_SIZE0_H_SHIFT 0
+#define NV40TCL_TEX_SIZE0_H_MASK 0x0000ffff
+#define NV40TCL_TEX_SIZE0_W_SHIFT 16
+#define NV40TCL_TEX_SIZE0_W_MASK 0xffff0000
+#define NV40TCL_TEX_BORDER_COLOR(x) (0x00001a1c+((x)*32))
+#define NV40TCL_TEX_BORDER_COLOR__SIZE 0x00000010
+#define NV40TCL_TEX_BORDER_COLOR_B_SHIFT 0
+#define NV40TCL_TEX_BORDER_COLOR_B_MASK 0x000000ff
+#define NV40TCL_TEX_BORDER_COLOR_G_SHIFT 8
+#define NV40TCL_TEX_BORDER_COLOR_G_MASK 0x0000ff00
+#define NV40TCL_TEX_BORDER_COLOR_R_SHIFT 16
+#define NV40TCL_TEX_BORDER_COLOR_R_MASK 0x00ff0000
+#define NV40TCL_TEX_BORDER_COLOR_A_SHIFT 24
+#define NV40TCL_TEX_BORDER_COLOR_A_MASK 0xff000000
+#define NV40TCL_VTX_ATTR_4F_X(x) (0x00001c00+((x)*16))
+#define NV40TCL_VTX_ATTR_4F_X__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_4F_Y(x) (0x00001c04+((x)*16))
+#define NV40TCL_VTX_ATTR_4F_Y__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_4F_Z(x) (0x00001c08+((x)*16))
+#define NV40TCL_VTX_ATTR_4F_Z__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_4F_W(x) (0x00001c0c+((x)*16))
+#define NV40TCL_VTX_ATTR_4F_W__SIZE 0x00000010
+#define NV40TCL_FP_CONTROL 0x00001d60
+#define NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT 24
+#define NV40TCL_FP_CONTROL_TEMP_COUNT_MASK 0xff000000
+#define NV40TCL_FP_CONTROL_KIL (1 << 7)
+#define NV40TCL_MULTISAMPLE_CONTROL 0x00001d7c
+#define NV40TCL_CLEAR_VALUE_DEPTH 0x00001d8c
+#define NV40TCL_CLEAR_VALUE_COLOR 0x00001d90
+#define NV40TCL_CLEAR_BUFFERS 0x00001d94
+#define NV40TCL_CLEAR_BUFFERS_COLOR_A (1 << 7)
+#define NV40TCL_CLEAR_BUFFERS_COLOR_B (1 << 6)
+#define NV40TCL_CLEAR_BUFFERS_COLOR_G (1 << 5)
+#define NV40TCL_CLEAR_BUFFERS_COLOR_R (1 << 4)
+#define NV40TCL_CLEAR_BUFFERS_STENCIL (1 << 1)
+#define NV40TCL_CLEAR_BUFFERS_DEPTH (1 << 0)
+#define NV40TCL_LINE_STIPPLE_ENABLE 0x00001db4
+#define NV40TCL_LINE_STIPPLE_PATTERN 0x00001db8
+#define NV40TCL_LINE_STIPPLE_PATTERN_FACTOR_SHIFT 0
+#define NV40TCL_LINE_STIPPLE_PATTERN_FACTOR_MASK 0x0000ffff
+#define NV40TCL_LINE_STIPPLE_PATTERN_PATTERN_SHIFT 16
+#define NV40TCL_LINE_STIPPLE_PATTERN_PATTERN_MASK 0xffff0000
+#define NV40TCL_VTX_ATTR_1F(x) (0x00001e40+((x)*4))
+#define NV40TCL_VTX_ATTR_1F__SIZE 0x00000010
+#define NV40TCL_VP_UPLOAD_FROM_ID 0x00001e9c
+#define NV40TCL_VP_START_FROM_ID 0x00001ea0
+#define NV40TCL_POINT_SIZE 0x00001ee0
+#define NV40TCL_POINT_SPRITE 0x00001ee8
+#define NV40TCL_VP_UPLOAD_CONST_ID 0x00001efc
+#define NV40TCL_VP_UPLOAD_CONST_X(x) (0x00001f00+((x)*16))
+#define NV40TCL_VP_UPLOAD_CONST_X__SIZE 0x00000004
+#define NV40TCL_VP_UPLOAD_CONST_Y(x) (0x00001f04+((x)*16))
+#define NV40TCL_VP_UPLOAD_CONST_Y__SIZE 0x00000004
+#define NV40TCL_VP_UPLOAD_CONST_Z(x) (0x00001f08+((x)*16))
+#define NV40TCL_VP_UPLOAD_CONST_Z__SIZE 0x00000004
+#define NV40TCL_VP_UPLOAD_CONST_W(x) (0x00001f0c+((x)*16))
+#define NV40TCL_VP_UPLOAD_CONST_W__SIZE 0x00000004
+#define NV40TCL_TEX_CACHE_CTL 0x00001fd8
+#define NV40TCL_VP_ATTRIB_EN 0x00001ff0
+#define NV40TCL_VP_RESULT_EN 0x00001ff4
+
+
+#define NV44TCL 0x00004497
+
+
+
+#define NV50_2D 0x0000502d
+
+#define NV50_2D_NOP 0x00000100
+#define NV50_2D_NOTIFY 0x00000104
+#define NV50_2D_DMA_NOTIFY 0x00000180
+#define NV50_2D_DMA_IN_MEMORY0 0x00000184
+#define NV50_2D_DMA_IN_MEMORY1 0x00000188
+#define NV50_2D_DMA_IN_MEMORY2 0x0000018c
+#define NV50_2D_DST_FORMAT 0x00000200
+#define NV50_2D_DST_FORMAT_32BPP 0x000000cf
+#define NV50_2D_DST_FORMAT_24BPP 0x000000e6
+#define NV50_2D_DST_FORMAT_16BPP 0x000000e8
+#define NV50_2D_DST_FORMAT_8BPP 0x000000f3
+#define NV50_2D_DST_FORMAT_15BPP 0x000000f8
+#define NV50_2D_DST_PITCH 0x00000214
+#define NV50_2D_DST_WIDTH 0x00000218
+#define NV50_2D_DST_HEIGHT 0x0000021c
+#define NV50_2D_DST_ADDRESS_HIGH 0x00000220
+#define NV50_2D_DST_ADDRESS_LOW 0x00000224
+#define NV50_2D_SRC_FORMAT 0x00000230
+#define NV50_2D_SRC_FORMAT_32BPP 0x000000cf
+#define NV50_2D_SRC_FORMAT_24BPP 0x000000e6
+#define NV50_2D_SRC_FORMAT_16BPP 0x000000e8
+#define NV50_2D_SRC_FORMAT_8BPP 0x000000f3
+#define NV50_2D_SRC_FORMAT_15BPP 0x000000f8
+#define NV50_2D_SRC_PITCH 0x00000244
+#define NV50_2D_SRC_WIDTH 0x00000248
+#define NV50_2D_SRC_HEIGHT 0x0000024c
+#define NV50_2D_SRC_ADDRESS_HIGH 0x00000250
+#define NV50_2D_SRC_ADDRESS_LOW 0x00000254
+#define NV50_2D_CLIP_X 0x00000280
+#define NV50_2D_CLIP_Y 0x00000284
+#define NV50_2D_CLIP_Z 0x00000288
+#define NV50_2D_CLIP_W 0x0000028c
+#define NV50_2D_ROP 0x000002a0
+#define NV50_2D_OPERATION 0x000002ac
+#define NV50_2D_OPERATION_SRCCOPY_AND 0x00000000
+#define NV50_2D_OPERATION_ROP_AND 0x00000001
+#define NV50_2D_OPERATION_BLEND_AND 0x00000002
+#define NV50_2D_OPERATION_SRCCOPY 0x00000003
+#define NV50_2D_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV50_2D_OPERATION_BLEND_PREMULT 0x00000005
+#define NV50_2D_PATTERN_FORMAT 0x000002e8
+#define NV50_2D_PATTERN_FORMAT_16BPP 0x00000000
+#define NV50_2D_PATTERN_FORMAT_15BPP 0x00000001
+#define NV50_2D_PATTERN_FORMAT_32BPP 0x00000002
+#define NV50_2D_PATTERN_FORMAT_8BPP 0x00000003
+#define NV50_2D_PATTERN_COLOR(x) (0x000002f0+((x)*4))
+#define NV50_2D_PATTERN_COLOR__SIZE 0x00000002
+#define NV50_2D_PATTERN_BITMAP(x) (0x000002f8+((x)*4))
+#define NV50_2D_PATTERN_BITMAP__SIZE 0x00000002
+#define NV50_2D_RECT_FORMAT 0x00000584
+#define NV50_2D_RECT_FORMAT_32BPP 0x000000cf
+#define NV50_2D_RECT_FORMAT_24BPP 0x000000e6
+#define NV50_2D_RECT_FORMAT_16BPP 0x000000e8
+#define NV50_2D_RECT_FORMAT_8BPP 0x000000f3
+#define NV50_2D_RECT_FORMAT_15BPP 0x000000f8
+#define NV50_2D_RECT_COLOR 0x00000588
+#define NV50_2D_RECT_X1 0x00000600
+#define NV50_2D_RECT_Y1 0x00000604
+#define NV50_2D_RECT_X2 0x00000608
+#define NV50_2D_RECT_Y2 0x0000060c
+#define NV50_2D_SIFC_UNK0800 0x00000800
+#define NV50_2D_SIFC_FORMAT 0x00000804
+#define NV50_2D_SIFC_FORMAT_32BPP 0x000000cf
+#define NV50_2D_SIFC_FORMAT_24BPP 0x000000e6
+#define NV50_2D_SIFC_FORMAT_16BPP 0x000000e8
+#define NV50_2D_SIFC_FORMAT_8BPP 0x000000f3
+#define NV50_2D_SIFC_FORMAT_15BPP 0x000000f8
+#define NV50_2D_SIFC_WIDTH 0x00000838
+#define NV50_2D_SIFC_HEIGHT 0x0000083c
+#define NV50_2D_SIFC_SCALE_UNK0840 0x00000840
+#define NV50_2D_SIFC_SCALE_UNK0844 0x00000844
+#define NV50_2D_SIFC_SCALE_UNK0848 0x00000848
+#define NV50_2D_SIFC_SCALE_UNK084C 0x0000084c
+#define NV50_2D_SIFC_UNK0850 0x00000850
+#define NV50_2D_SIFC_DST_X 0x00000854
+#define NV50_2D_SIFC_UNK0858 0x00000858
+#define NV50_2D_SIFC_DST_Y 0x0000085c
+#define NV50_2D_SIFC_DATA 0x00000860
+#define NV50_2D_BLIT_DST_X 0x000008b0
+#define NV50_2D_BLIT_DST_Y 0x000008b4
+#define NV50_2D_BLIT_DST_W 0x000008b8
+#define NV50_2D_BLIT_DST_H 0x000008bc
+#define NV50_2D_BLIT_SRC_X 0x000008d4
+#define NV50_2D_BLIT_SRC_Y 0x000008dc
+
+
+#define NV50_MEMORY_TO_MEMORY_FORMAT 0x00005039
+
+#define NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH 0x00000238
+#define NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_HIGH 0x0000023c
+
+
+#define NV50TCL 0x00005097
+
+#define NV50TCL_NOP 0x00000100
+#define NV50TCL_NOTIFY 0x00000104
+#define NV50TCL_DMA_NOTIFY 0x00000180
+#define NV50TCL_DMA_UNK0(x) (0x00000184+((x)*4))
+#define NV50TCL_DMA_UNK0__SIZE 0x0000000b
+#define NV50TCL_DMA_UNK1(x) (0x000001c0+((x)*4))
+#define NV50TCL_DMA_UNK1__SIZE 0x00000008
+#define NV50TCL_RT_ADDRESS_HIGH(x) (0x00000200+((x)*32))
+#define NV50TCL_RT_ADDRESS_HIGH__SIZE 0x00000008
+#define NV50TCL_RT_ADDRESS_LOW(x) (0x00000204+((x)*32))
+#define NV50TCL_RT_ADDRESS_LOW__SIZE 0x00000008
+#define NV50TCL_RT_FORMAT(x) (0x00000208+((x)*32))
+#define NV50TCL_RT_FORMAT__SIZE 0x00000008
+#define NV50TCL_RT_FORMAT_32BPP 0x000000cf
+#define NV50TCL_RT_FORMAT_24BPP 0x000000e6
+#define NV50TCL_RT_FORMAT_16BPP 0x000000e8
+#define NV50TCL_RT_FORMAT_8BPP 0x000000f3
+#define NV50TCL_RT_FORMAT_15BPP 0x000000f8
+#define NV50TCL_RT_TILE_UNK(x) (0x0000020c+((x)*32))
+#define NV50TCL_RT_TILE_UNK__SIZE 0x00000008
+#define NV50TCL_RT_UNK4(x) (0x00000210+((x)*32))
+#define NV50TCL_RT_UNK4__SIZE 0x00000008
+#define NV50TCL_VTX_ATTR_1F(x) (0x00000300+((x)*4))
+#define NV50TCL_VTX_ATTR_1F__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_2F_X(x) (0x00000380+((x)*8))
+#define NV50TCL_VTX_ATTR_2F_X__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_2F_Y(x) (0x00000384+((x)*8))
+#define NV50TCL_VTX_ATTR_2F_Y__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_3F_X(x) (0x00000400+((x)*16))
+#define NV50TCL_VTX_ATTR_3F_X__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_3F_Y(x) (0x00000404+((x)*16))
+#define NV50TCL_VTX_ATTR_3F_Y__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_3F_Z(x) (0x00000408+((x)*16))
+#define NV50TCL_VTX_ATTR_3F_Z__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_3F_W(x) (0x0000040c+((x)*16))
+#define NV50TCL_VTX_ATTR_3F_W__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_4F_X(x) (0x00000500+((x)*16))
+#define NV50TCL_VTX_ATTR_4F_X__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_4F_Y(x) (0x00000504+((x)*16))
+#define NV50TCL_VTX_ATTR_4F_Y__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_4F_Z(x) (0x00000508+((x)*16))
+#define NV50TCL_VTX_ATTR_4F_Z__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_4F_W(x) (0x0000050c+((x)*16))
+#define NV50TCL_VTX_ATTR_4F_W__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_2I(x) (0x00000680+((x)*4))
+#define NV50TCL_VTX_ATTR_2I__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_2I_X_SHIFT 0
+#define NV50TCL_VTX_ATTR_2I_X_MASK 0x0000ffff
+#define NV50TCL_VTX_ATTR_2I_Y_SHIFT 16
+#define NV50TCL_VTX_ATTR_2I_Y_MASK 0xffff0000
+#define NV50TCL_VTX_ATTR_4I_0(x) (0x00000700+((x)*8))
+#define NV50TCL_VTX_ATTR_4I_0__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_4I_0_X_SHIFT 0
+#define NV50TCL_VTX_ATTR_4I_0_X_MASK 0x0000ffff
+#define NV50TCL_VTX_ATTR_4I_0_Y_SHIFT 16
+#define NV50TCL_VTX_ATTR_4I_0_Y_MASK 0xffff0000
+#define NV50TCL_VTX_ATTR_4I_1(x) (0x00000704+((x)*8))
+#define NV50TCL_VTX_ATTR_4I_1__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_4I_1_Z_SHIFT 0
+#define NV50TCL_VTX_ATTR_4I_1_Z_MASK 0x0000ffff
+#define NV50TCL_VTX_ATTR_4I_1_W_SHIFT 16
+#define NV50TCL_VTX_ATTR_4I_1_W_MASK 0xffff0000
+#define NV50TCL_VTX_ATTR_4NI_0(x) (0x00000780+((x)*8))
+#define NV50TCL_VTX_ATTR_4NI_0__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_4NI_0_X_SHIFT 0
+#define NV50TCL_VTX_ATTR_4NI_0_X_MASK 0x0000ffff
+#define NV50TCL_VTX_ATTR_4NI_0_Y_SHIFT 16
+#define NV50TCL_VTX_ATTR_4NI_0_Y_MASK 0xffff0000
+#define NV50TCL_VTX_ATTR_4NI_1(x) (0x00000784+((x)*8))
+#define NV50TCL_VTX_ATTR_4NI_1__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_4NI_1_Z_SHIFT 0
+#define NV50TCL_VTX_ATTR_4NI_1_Z_MASK 0x0000ffff
+#define NV50TCL_VTX_ATTR_4NI_1_W_SHIFT 16
+#define NV50TCL_VTX_ATTR_4NI_1_W_MASK 0xffff0000
+#define NV50TCL_VERTEX_ARRAY_FORMAT(x) (0x00000900+((x)*16))
+#define NV50TCL_VERTEX_ARRAY_FORMAT__SIZE 0x00000010
+#define NV50TCL_VIEWPORT_UNK0(x) (0x00000a00+((x)*4))
+#define NV50TCL_VIEWPORT_UNK0__SIZE 0x00000003
+#define NV50TCL_VIEWPORT_UNK1(x) (0x00000a0c+((x)*4))
+#define NV50TCL_VIEWPORT_UNK1__SIZE 0x00000003
+#define NV50TCL_VIEWPORT_HORIZ 0x00000c00
+#define NV50TCL_VIEWPORT_HORIZ_X_SHIFT 0
+#define NV50TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff
+#define NV50TCL_VIEWPORT_HORIZ_W_SHIFT 16
+#define NV50TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000
+#define NV50TCL_VIEWPORT_VERT 0x00000c04
+#define NV50TCL_VIEWPORT_VERT_Y_SHIFT 0
+#define NV50TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff
+#define NV50TCL_VIEWPORT_VERT_H_SHIFT 16
+#define NV50TCL_VIEWPORT_VERT_H_MASK 0xffff0000
+#define NV50TCL_DEPTH_RANGE_NEAR 0x00000c08
+#define NV50TCL_DEPTH_RANGE_FAR 0x00000c0c
+#define NV50TCL_VIEWPORT_CLIP_HORIZ(x) (0x00000d00+((x)*8))
+#define NV50TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008
+#define NV50TCL_VIEWPORT_CLIP_VERT(x) (0x00000d04+((x)*8))
+#define NV50TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008
+#define NV50TCL_VERTEX_BUFFER_FIRST 0x00000d74
+#define NV50TCL_VERTEX_BUFFER_COUNT 0x00000d78
+#define NV50TCL_CLEAR_COLOR(x) (0x00000d80+((x)*4))
+#define NV50TCL_CLEAR_COLOR__SIZE 0x00000004
+#define NV50TCL_CLEAR_DEPTH 0x00000d90
+#define NV50TCL_CLEAR_STENCIL 0x00000da0
+#define NV50TCL_POLYGON_MODE_FRONT 0x00000dac
+#define NV50TCL_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NV50TCL_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NV50TCL_POLYGON_MODE_FRONT_FILL 0x00001b02
+#define NV50TCL_POLYGON_MODE_BACK 0x00000db0
+#define NV50TCL_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NV50TCL_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NV50TCL_POLYGON_MODE_BACK_FILL 0x00001b02
+#define NV50TCL_POLYGON_SMOOTH_ENABLE 0x00000db4
+#define NV50TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0
+#define NV50TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4
+#define NV50TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8
+#define NV50TCL_SCISSOR_HORIZ 0x00000e04
+#define NV50TCL_SCISSOR_HORIZ_L_SHIFT 0
+#define NV50TCL_SCISSOR_HORIZ_L_MASK 0x0000ffff
+#define NV50TCL_SCISSOR_HORIZ_R_SHIFT 16
+#define NV50TCL_SCISSOR_HORIZ_R_MASK 0xffff0000
+#define NV50TCL_SCISSOR_VERT 0x00000e08
+#define NV50TCL_SCISSOR_VERT_T_SHIFT 0
+#define NV50TCL_SCISSOR_VERT_T_MASK 0x0000ffff
+#define NV50TCL_SCISSOR_VERT_B_SHIFT 16
+#define NV50TCL_SCISSOR_VERT_B_MASK 0xffff0000
+#define NV50TCL_CB_ADDR 0x00000f00
+#define NV50TCL_CB_ADDR_ID_SHIFT 8
+#define NV50TCL_CB_ADDR_ID_MASK 0xffffff00
+#define NV50TCL_CB_ADDR_BUFFER_SHIFT 0
+#define NV50TCL_CB_ADDR_BUFFER_MASK 0x000000ff
+#define NV50TCL_CB_DATA(x) (0x00000f04+((x)*4))
+#define NV50TCL_CB_DATA__SIZE 0x00000010
+#define NV50TCL_STENCIL_FRONT_FUNC_REF 0x00000f54
+#define NV50TCL_STENCIL_FRONT_MASK 0x00000f58
+#define NV50TCL_STENCIL_FRONT_FUNC_MASK 0x00000f5c
+#define NV50TCL_GP_ADDRESS_HIGH 0x00000f70
+#define NV50TCL_GP_ADDRESS_LOW 0x00000f74
+#define NV50TCL_VP_ADDRESS_HIGH 0x00000f7c
+#define NV50TCL_VP_ADDRESS_LOW 0x00000f80
+#define NV50TCL_FP_ADDRESS_HIGH 0x00000fa4
+#define NV50TCL_FP_ADDRESS_LOW 0x00000fa8
+#define NV50TCL_ZETA_ADDRESS_HIGH 0x00000fe0
+#define NV50TCL_ZETA_ADDRESS_LOW 0x00000fe4
+#define NV50TCL_UNKFF4 0x00000ff4
+#define NV50TCL_UNKFF4_W_SHIFT 16
+#define NV50TCL_UNKFF4_W_MASK 0xffff0000
+#define NV50TCL_UNKFF8 0x00000ff8
+#define NV50TCL_UNKFF8_H_SHIFT 16
+#define NV50TCL_UNKFF8_H_MASK 0xffff0000
+#define NV50TCL_RT_HORIZ(x) (0x00001240+((x)*8))
+#define NV50TCL_RT_HORIZ__SIZE 0x00000008
+#define NV50TCL_RT_VERT(x) (0x00001244+((x)*8))
+#define NV50TCL_RT_VERT__SIZE 0x00000008
+#define NV50TCL_CB_DEF_ADDRESS_HIGH 0x00001280
+#define NV50TCL_CB_DEF_ADDRESS_LOW 0x00001284
+#define NV50TCL_CB_DEF_SET 0x00001288
+#define NV50TCL_CB_DEF_SET_SIZE_SHIFT 0
+#define NV50TCL_CB_DEF_SET_SIZE_MASK 0x0000ffff
+#define NV50TCL_CB_DEF_SET_BUFFER_SHIFT 16
+#define NV50TCL_CB_DEF_SET_BUFFER_MASK 0xffff0000
+#define NV50TCL_DEPTH_TEST_ENABLE 0x000012cc
+#define NV50TCL_SHADE_MODEL 0x000012d4
+#define NV50TCL_SHADE_MODEL_FLAT 0x00001d00
+#define NV50TCL_SHADE_MODEL_SMOOTH 0x00001d01
+#define NV50TCL_DEPTH_WRITE_ENABLE 0x000012e8
+#define NV50TCL_ALPHA_TEST_ENABLE 0x000012ec
+#define NV50TCL_DEPTH_TEST_FUNC 0x0000130c
+#define NV50TCL_DEPTH_TEST_FUNC_NEVER 0x00000200
+#define NV50TCL_DEPTH_TEST_FUNC_LESS 0x00000201
+#define NV50TCL_DEPTH_TEST_FUNC_EQUAL 0x00000202
+#define NV50TCL_DEPTH_TEST_FUNC_LEQUAL 0x00000203
+#define NV50TCL_DEPTH_TEST_FUNC_GREATER 0x00000204
+#define NV50TCL_DEPTH_TEST_FUNC_GREATER 0x00000204
+#define NV50TCL_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205
+#define NV50TCL_DEPTH_TEST_FUNC_GEQUAL 0x00000206
+#define NV50TCL_DEPTH_TEST_FUNC_ALWAYS 0x00000207
+#define NV50TCL_ALPHA_TEST_REF 0x00001310
+#define NV50TCL_ALPHA_TEST_FUNC 0x00001314
+#define NV50TCL_ALPHA_TEST_FUNC_NEVER 0x00000200
+#define NV50TCL_ALPHA_TEST_FUNC_LESS 0x00000201
+#define NV50TCL_ALPHA_TEST_FUNC_EQUAL 0x00000202
+#define NV50TCL_ALPHA_TEST_FUNC_LEQUAL 0x00000203
+#define NV50TCL_ALPHA_TEST_FUNC_GREATER 0x00000204
+#define NV50TCL_ALPHA_TEST_FUNC_GREATER 0x00000204
+#define NV50TCL_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205
+#define NV50TCL_ALPHA_TEST_FUNC_GEQUAL 0x00000206
+#define NV50TCL_ALPHA_TEST_FUNC_ALWAYS 0x00000207
+#define NV50TCL_BLEND_COLOR(x) (0x0000131c+((x)*4))
+#define NV50TCL_BLEND_COLOR__SIZE 0x00000004
+#define NV50TCL_BLEND_EQUATION_RGB 0x00001340
+#define NV50TCL_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NV50TCL_BLEND_EQUATION_RGB_MIN 0x00008007
+#define NV50TCL_BLEND_EQUATION_RGB_MAX 0x00008008
+#define NV50TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NV50TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+#define NV50TCL_BLEND_FUNC_SRC_RGB 0x00001344
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001
+#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307
+#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308
+#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV50TCL_BLEND_FUNC_DST_RGB 0x00001348
+#define NV50TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001
+#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV50TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV50TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307
+#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308
+#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV50TCL_BLEND_EQUATION_ALPHA 0x0000134c
+#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
+#define NV50TCL_BLEND_EQUATION_ALPHA_MIN 0x00008007
+#define NV50TCL_BLEND_EQUATION_ALPHA_MAX 0x00008008
+#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA 0x00001350
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00000001
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x00000300
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x00000302
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x00000304
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x00000306
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x00000307
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x00000308
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x00008001
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x00008003
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV50TCL_BLEND_FUNC_DST_ALPHA 0x00001358
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00000001
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x00000300
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x00000302
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x00000304
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x00000306
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x00000307
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x00000308
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x00008001
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x00008003
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV50TCL_BLEND_ENABLE(x) (0x00001360+((x)*4))
+#define NV50TCL_BLEND_ENABLE__SIZE 0x00000008
+#define NV50TCL_STENCIL_BACK_ENABLE 0x00001380
+#define NV50TCL_STENCIL_BACK_OP_FAIL 0x00001384
+#define NV50TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000
+#define NV50TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a
+#define NV50TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00
+#define NV50TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01
+#define NV50TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02
+#define NV50TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03
+#define NV50TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507
+#define NV50TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL 0x00001388
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NV50TCL_STENCIL_BACK_OP_ZPASS 0x0000138c
+#define NV50TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000
+#define NV50TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a
+#define NV50TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00
+#define NV50TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01
+#define NV50TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02
+#define NV50TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03
+#define NV50TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV50TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC 0x00001390
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207
+#define NV50TCL_STENCIL_BACK_FUNC_REF 0x00001394
+#define NV50TCL_STENCIL_BACK_MASK 0x00001398
+#define NV50TCL_STENCIL_BACK_FUNC_MASK 0x0000139c
+#define NV50TCL_LINE_WIDTH 0x000013b0
+#define NV50TCL_VP_START_ID 0x0000140c
+#define NV50TCL_GP_START_ID 0x00001410
+#define NV50TCL_FP_START_ID 0x00001414
+#define NV50TCL_POINT_SIZE 0x00001518
+#define NV50TCL_TSC_ADDRESS_HIGH 0x0000155c
+#define NV50TCL_TSC_ADDRESS_LOW 0x00001560
+#define NV50TCL_POLYGON_OFFSET_FACTOR 0x0000156c
+#define NV50TCL_LINE_SMOOTH_ENABLE 0x00001570
+#define NV50TCL_TIC_ADDRESS_HIGH 0x00001574
+#define NV50TCL_TIC_ADDRESS_LOW 0x00001578
+#define NV50TCL_STENCIL_FRONT_ENABLE 0x00001594
+#define NV50TCL_STENCIL_FRONT_OP_FAIL 0x00001598
+#define NV50TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
+#define NV50TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a
+#define NV50TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00
+#define NV50TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01
+#define NV50TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02
+#define NV50TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03
+#define NV50TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507
+#define NV50TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL 0x0000159c
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS 0x000015a0
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC 0x000015a4
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207
+#define NV50TCL_POLYGON_OFFSET_UNITS 0x000015bc
+#define NV50TCL_VERTEX_BEGIN 0x000015dc
+#define NV50TCL_VERTEX_BEGIN_POINTS 0x00000000
+#define NV50TCL_VERTEX_BEGIN_LINES 0x00000001
+#define NV50TCL_VERTEX_BEGIN_LINE_LOOP 0x00000002
+#define NV50TCL_VERTEX_BEGIN_LINE_STRIP 0x00000003
+#define NV50TCL_VERTEX_BEGIN_TRIANGLES 0x00000004
+#define NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP 0x00000005
+#define NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN 0x00000006
+#define NV50TCL_VERTEX_BEGIN_QUADS 0x00000007
+#define NV50TCL_VERTEX_BEGIN_QUAD_STRIP 0x00000008
+#define NV50TCL_VERTEX_BEGIN_POLYGON 0x00000009
+#define NV50TCL_VERTEX_END 0x000015e0
+#define NV50TCL_VERTEX_DATA 0x00001640
+#define NV50TCL_VP_ATTR_EN_0 0x00001650
+#define NV50TCL_VP_ATTR_EN_0_7_SHIFT 28
+#define NV50TCL_VP_ATTR_EN_0_7_MASK 0xf0000000
+#define NV50TCL_VP_ATTR_EN_0_7_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_0_7_XNNN 0x10000000
+#define NV50TCL_VP_ATTR_EN_0_7_NYNN 0x20000000
+#define NV50TCL_VP_ATTR_EN_0_7_XYNN 0x30000000
+#define NV50TCL_VP_ATTR_EN_0_7_NNZN 0x40000000
+#define NV50TCL_VP_ATTR_EN_0_7_XNZN 0x50000000
+#define NV50TCL_VP_ATTR_EN_0_7_NYZN 0x60000000
+#define NV50TCL_VP_ATTR_EN_0_7_XYZN 0x70000000
+#define NV50TCL_VP_ATTR_EN_0_7_NNNW 0x80000000
+#define NV50TCL_VP_ATTR_EN_0_7_XNNW 0x90000000
+#define NV50TCL_VP_ATTR_EN_0_7_NYNW 0xa0000000
+#define NV50TCL_VP_ATTR_EN_0_7_XYNW 0xb0000000
+#define NV50TCL_VP_ATTR_EN_0_7_NNZW 0xc0000000
+#define NV50TCL_VP_ATTR_EN_0_7_XNZW 0xd0000000
+#define NV50TCL_VP_ATTR_EN_0_7_NYZW 0xe0000000
+#define NV50TCL_VP_ATTR_EN_0_7_XYZW 0xf0000000
+#define NV50TCL_VP_ATTR_EN_0_6_SHIFT 24
+#define NV50TCL_VP_ATTR_EN_0_6_MASK 0x0f000000
+#define NV50TCL_VP_ATTR_EN_0_6_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_0_6_XNNN 0x01000000
+#define NV50TCL_VP_ATTR_EN_0_6_NYNN 0x02000000
+#define NV50TCL_VP_ATTR_EN_0_6_XYNN 0x03000000
+#define NV50TCL_VP_ATTR_EN_0_6_NNZN 0x04000000
+#define NV50TCL_VP_ATTR_EN_0_6_XNZN 0x05000000
+#define NV50TCL_VP_ATTR_EN_0_6_NYZN 0x06000000
+#define NV50TCL_VP_ATTR_EN_0_6_XYZN 0x07000000
+#define NV50TCL_VP_ATTR_EN_0_6_NNNW 0x08000000
+#define NV50TCL_VP_ATTR_EN_0_6_XNNW 0x09000000
+#define NV50TCL_VP_ATTR_EN_0_6_NYNW 0x0a000000
+#define NV50TCL_VP_ATTR_EN_0_6_XYNW 0x0b000000
+#define NV50TCL_VP_ATTR_EN_0_6_NNZW 0x0c000000
+#define NV50TCL_VP_ATTR_EN_0_6_XNZW 0x0d000000
+#define NV50TCL_VP_ATTR_EN_0_6_NYZW 0x0e000000
+#define NV50TCL_VP_ATTR_EN_0_6_XYZW 0x0f000000
+#define NV50TCL_VP_ATTR_EN_0_5_SHIFT 20
+#define NV50TCL_VP_ATTR_EN_0_5_MASK 0x00f00000
+#define NV50TCL_VP_ATTR_EN_0_5_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_0_5_XNNN 0x00100000
+#define NV50TCL_VP_ATTR_EN_0_5_NYNN 0x00200000
+#define NV50TCL_VP_ATTR_EN_0_5_XYNN 0x00300000
+#define NV50TCL_VP_ATTR_EN_0_5_NNZN 0x00400000
+#define NV50TCL_VP_ATTR_EN_0_5_XNZN 0x00500000
+#define NV50TCL_VP_ATTR_EN_0_5_NYZN 0x00600000
+#define NV50TCL_VP_ATTR_EN_0_5_XYZN 0x00700000
+#define NV50TCL_VP_ATTR_EN_0_5_NNNW 0x00800000
+#define NV50TCL_VP_ATTR_EN_0_5_XNNW 0x00900000
+#define NV50TCL_VP_ATTR_EN_0_5_NYNW 0x00a00000
+#define NV50TCL_VP_ATTR_EN_0_5_XYNW 0x00b00000
+#define NV50TCL_VP_ATTR_EN_0_5_NNZW 0x00c00000
+#define NV50TCL_VP_ATTR_EN_0_5_XNZW 0x00d00000
+#define NV50TCL_VP_ATTR_EN_0_5_NYZW 0x00e00000
+#define NV50TCL_VP_ATTR_EN_0_5_XYZW 0x00f00000
+#define NV50TCL_VP_ATTR_EN_0_4_SHIFT 16
+#define NV50TCL_VP_ATTR_EN_0_4_MASK 0x000f0000
+#define NV50TCL_VP_ATTR_EN_0_4_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_0_4_XNNN 0x00010000
+#define NV50TCL_VP_ATTR_EN_0_4_NYNN 0x00020000
+#define NV50TCL_VP_ATTR_EN_0_4_XYNN 0x00030000
+#define NV50TCL_VP_ATTR_EN_0_4_NNZN 0x00040000
+#define NV50TCL_VP_ATTR_EN_0_4_XNZN 0x00050000
+#define NV50TCL_VP_ATTR_EN_0_4_NYZN 0x00060000
+#define NV50TCL_VP_ATTR_EN_0_4_XYZN 0x00070000
+#define NV50TCL_VP_ATTR_EN_0_4_NNNW 0x00080000
+#define NV50TCL_VP_ATTR_EN_0_4_XNNW 0x00090000
+#define NV50TCL_VP_ATTR_EN_0_4_NYNW 0x000a0000
+#define NV50TCL_VP_ATTR_EN_0_4_XYNW 0x000b0000
+#define NV50TCL_VP_ATTR_EN_0_4_NNZW 0x000c0000
+#define NV50TCL_VP_ATTR_EN_0_4_XNZW 0x000d0000
+#define NV50TCL_VP_ATTR_EN_0_4_NYZW 0x000e0000
+#define NV50TCL_VP_ATTR_EN_0_4_XYZW 0x000f0000
+#define NV50TCL_VP_ATTR_EN_0_3_SHIFT 12
+#define NV50TCL_VP_ATTR_EN_0_3_MASK 0x0000f000
+#define NV50TCL_VP_ATTR_EN_0_3_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_0_3_XNNN 0x00001000
+#define NV50TCL_VP_ATTR_EN_0_3_NYNN 0x00002000
+#define NV50TCL_VP_ATTR_EN_0_3_XYNN 0x00003000
+#define NV50TCL_VP_ATTR_EN_0_3_NNZN 0x00004000
+#define NV50TCL_VP_ATTR_EN_0_3_XNZN 0x00005000
+#define NV50TCL_VP_ATTR_EN_0_3_NYZN 0x00006000
+#define NV50TCL_VP_ATTR_EN_0_3_XYZN 0x00007000
+#define NV50TCL_VP_ATTR_EN_0_3_NNNW 0x00008000
+#define NV50TCL_VP_ATTR_EN_0_3_XNNW 0x00009000
+#define NV50TCL_VP_ATTR_EN_0_3_NYNW 0x0000a000
+#define NV50TCL_VP_ATTR_EN_0_3_XYNW 0x0000b000
+#define NV50TCL_VP_ATTR_EN_0_3_NNZW 0x0000c000
+#define NV50TCL_VP_ATTR_EN_0_3_XNZW 0x0000d000
+#define NV50TCL_VP_ATTR_EN_0_3_NYZW 0x0000e000
+#define NV50TCL_VP_ATTR_EN_0_3_XYZW 0x0000f000
+#define NV50TCL_VP_ATTR_EN_0_2_SHIFT 8
+#define NV50TCL_VP_ATTR_EN_0_2_MASK 0x00000f00
+#define NV50TCL_VP_ATTR_EN_0_2_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_0_2_XNNN 0x00000100
+#define NV50TCL_VP_ATTR_EN_0_2_NYNN 0x00000200
+#define NV50TCL_VP_ATTR_EN_0_2_XYNN 0x00000300
+#define NV50TCL_VP_ATTR_EN_0_2_NNZN 0x00000400
+#define NV50TCL_VP_ATTR_EN_0_2_XNZN 0x00000500
+#define NV50TCL_VP_ATTR_EN_0_2_NYZN 0x00000600
+#define NV50TCL_VP_ATTR_EN_0_2_XYZN 0x00000700
+#define NV50TCL_VP_ATTR_EN_0_2_NNNW 0x00000800
+#define NV50TCL_VP_ATTR_EN_0_2_XNNW 0x00000900
+#define NV50TCL_VP_ATTR_EN_0_2_NYNW 0x00000a00
+#define NV50TCL_VP_ATTR_EN_0_2_XYNW 0x00000b00
+#define NV50TCL_VP_ATTR_EN_0_2_NNZW 0x00000c00
+#define NV50TCL_VP_ATTR_EN_0_2_XNZW 0x00000d00
+#define NV50TCL_VP_ATTR_EN_0_2_NYZW 0x00000e00
+#define NV50TCL_VP_ATTR_EN_0_2_XYZW 0x00000f00
+#define NV50TCL_VP_ATTR_EN_0_1_SHIFT 4
+#define NV50TCL_VP_ATTR_EN_0_1_MASK 0x000000f0
+#define NV50TCL_VP_ATTR_EN_0_1_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_0_1_XNNN 0x00000010
+#define NV50TCL_VP_ATTR_EN_0_1_NYNN 0x00000020
+#define NV50TCL_VP_ATTR_EN_0_1_XYNN 0x00000030
+#define NV50TCL_VP_ATTR_EN_0_1_NNZN 0x00000040
+#define NV50TCL_VP_ATTR_EN_0_1_XNZN 0x00000050
+#define NV50TCL_VP_ATTR_EN_0_1_NYZN 0x00000060
+#define NV50TCL_VP_ATTR_EN_0_1_XYZN 0x00000070
+#define NV50TCL_VP_ATTR_EN_0_1_NNNW 0x00000080
+#define NV50TCL_VP_ATTR_EN_0_1_XNNW 0x00000090
+#define NV50TCL_VP_ATTR_EN_0_1_NYNW 0x000000a0
+#define NV50TCL_VP_ATTR_EN_0_1_XYNW 0x000000b0
+#define NV50TCL_VP_ATTR_EN_0_1_NNZW 0x000000c0
+#define NV50TCL_VP_ATTR_EN_0_1_XNZW 0x000000d0
+#define NV50TCL_VP_ATTR_EN_0_1_NYZW 0x000000e0
+#define NV50TCL_VP_ATTR_EN_0_1_XYZW 0x000000f0
+#define NV50TCL_VP_ATTR_EN_0_0_SHIFT 0
+#define NV50TCL_VP_ATTR_EN_0_0_MASK 0x0000000f
+#define NV50TCL_VP_ATTR_EN_0_0_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_0_0_XNNN 0x00000001
+#define NV50TCL_VP_ATTR_EN_0_0_NYNN 0x00000002
+#define NV50TCL_VP_ATTR_EN_0_0_XYNN 0x00000003
+#define NV50TCL_VP_ATTR_EN_0_0_NNZN 0x00000004
+#define NV50TCL_VP_ATTR_EN_0_0_XNZN 0x00000005
+#define NV50TCL_VP_ATTR_EN_0_0_NYZN 0x00000006
+#define NV50TCL_VP_ATTR_EN_0_0_XYZN 0x00000007
+#define NV50TCL_VP_ATTR_EN_0_0_NNNW 0x00000008
+#define NV50TCL_VP_ATTR_EN_0_0_XNNW 0x00000009
+#define NV50TCL_VP_ATTR_EN_0_0_NYNW 0x0000000a
+#define NV50TCL_VP_ATTR_EN_0_0_XYNW 0x0000000b
+#define NV50TCL_VP_ATTR_EN_0_0_NNZW 0x0000000c
+#define NV50TCL_VP_ATTR_EN_0_0_XNZW 0x0000000d
+#define NV50TCL_VP_ATTR_EN_0_0_NYZW 0x0000000e
+#define NV50TCL_VP_ATTR_EN_0_0_XYZW 0x0000000f
+#define NV50TCL_VP_ATTR_EN_1 0x00001654
+#define NV50TCL_VP_ATTR_EN_1_15_SHIFT 28
+#define NV50TCL_VP_ATTR_EN_1_15_MASK 0xf0000000
+#define NV50TCL_VP_ATTR_EN_1_15_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_1_15_XNNN 0x10000000
+#define NV50TCL_VP_ATTR_EN_1_15_NYNN 0x20000000
+#define NV50TCL_VP_ATTR_EN_1_15_XYNN 0x30000000
+#define NV50TCL_VP_ATTR_EN_1_15_NNZN 0x40000000
+#define NV50TCL_VP_ATTR_EN_1_15_XNZN 0x50000000
+#define NV50TCL_VP_ATTR_EN_1_15_NYZN 0x60000000
+#define NV50TCL_VP_ATTR_EN_1_15_XYZN 0x70000000
+#define NV50TCL_VP_ATTR_EN_1_15_NNNW 0x80000000
+#define NV50TCL_VP_ATTR_EN_1_15_XNNW 0x90000000
+#define NV50TCL_VP_ATTR_EN_1_15_NYNW 0xa0000000
+#define NV50TCL_VP_ATTR_EN_1_15_XYNW 0xb0000000
+#define NV50TCL_VP_ATTR_EN_1_15_NNZW 0xc0000000
+#define NV50TCL_VP_ATTR_EN_1_15_XNZW 0xd0000000
+#define NV50TCL_VP_ATTR_EN_1_15_NYZW 0xe0000000
+#define NV50TCL_VP_ATTR_EN_1_15_XYZW 0xf0000000
+#define NV50TCL_VP_ATTR_EN_1_14_SHIFT 24
+#define NV50TCL_VP_ATTR_EN_1_14_MASK 0x0f000000
+#define NV50TCL_VP_ATTR_EN_1_14_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_1_14_XNNN 0x01000000
+#define NV50TCL_VP_ATTR_EN_1_14_NYNN 0x02000000
+#define NV50TCL_VP_ATTR_EN_1_14_XYNN 0x03000000
+#define NV50TCL_VP_ATTR_EN_1_14_NNZN 0x04000000
+#define NV50TCL_VP_ATTR_EN_1_14_XNZN 0x05000000
+#define NV50TCL_VP_ATTR_EN_1_14_NYZN 0x06000000
+#define NV50TCL_VP_ATTR_EN_1_14_XYZN 0x07000000
+#define NV50TCL_VP_ATTR_EN_1_14_NNNW 0x08000000
+#define NV50TCL_VP_ATTR_EN_1_14_XNNW 0x09000000
+#define NV50TCL_VP_ATTR_EN_1_14_NYNW 0x0a000000
+#define NV50TCL_VP_ATTR_EN_1_14_XYNW 0x0b000000
+#define NV50TCL_VP_ATTR_EN_1_14_NNZW 0x0c000000
+#define NV50TCL_VP_ATTR_EN_1_14_XNZW 0x0d000000
+#define NV50TCL_VP_ATTR_EN_1_14_NYZW 0x0e000000
+#define NV50TCL_VP_ATTR_EN_1_14_XYZW 0x0f000000
+#define NV50TCL_VP_ATTR_EN_1_13_SHIFT 20
+#define NV50TCL_VP_ATTR_EN_1_13_MASK 0x00f00000
+#define NV50TCL_VP_ATTR_EN_1_13_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_1_13_XNNN 0x00100000
+#define NV50TCL_VP_ATTR_EN_1_13_NYNN 0x00200000
+#define NV50TCL_VP_ATTR_EN_1_13_XYNN 0x00300000
+#define NV50TCL_VP_ATTR_EN_1_13_NNZN 0x00400000
+#define NV50TCL_VP_ATTR_EN_1_13_XNZN 0x00500000
+#define NV50TCL_VP_ATTR_EN_1_13_NYZN 0x00600000
+#define NV50TCL_VP_ATTR_EN_1_13_XYZN 0x00700000
+#define NV50TCL_VP_ATTR_EN_1_13_NNNW 0x00800000
+#define NV50TCL_VP_ATTR_EN_1_13_XNNW 0x00900000
+#define NV50TCL_VP_ATTR_EN_1_13_NYNW 0x00a00000
+#define NV50TCL_VP_ATTR_EN_1_13_XYNW 0x00b00000
+#define NV50TCL_VP_ATTR_EN_1_13_NNZW 0x00c00000
+#define NV50TCL_VP_ATTR_EN_1_13_XNZW 0x00d00000
+#define NV50TCL_VP_ATTR_EN_1_13_NYZW 0x00e00000
+#define NV50TCL_VP_ATTR_EN_1_13_XYZW 0x00f00000
+#define NV50TCL_VP_ATTR_EN_1_12_SHIFT 16
+#define NV50TCL_VP_ATTR_EN_1_12_MASK 0x000f0000
+#define NV50TCL_VP_ATTR_EN_1_12_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_1_12_XNNN 0x00010000
+#define NV50TCL_VP_ATTR_EN_1_12_NYNN 0x00020000
+#define NV50TCL_VP_ATTR_EN_1_12_XYNN 0x00030000
+#define NV50TCL_VP_ATTR_EN_1_12_NNZN 0x00040000
+#define NV50TCL_VP_ATTR_EN_1_12_XNZN 0x00050000
+#define NV50TCL_VP_ATTR_EN_1_12_NYZN 0x00060000
+#define NV50TCL_VP_ATTR_EN_1_12_XYZN 0x00070000
+#define NV50TCL_VP_ATTR_EN_1_12_NNNW 0x00080000
+#define NV50TCL_VP_ATTR_EN_1_12_XNNW 0x00090000
+#define NV50TCL_VP_ATTR_EN_1_12_NYNW 0x000a0000
+#define NV50TCL_VP_ATTR_EN_1_12_XYNW 0x000b0000
+#define NV50TCL_VP_ATTR_EN_1_12_NNZW 0x000c0000
+#define NV50TCL_VP_ATTR_EN_1_12_XNZW 0x000d0000
+#define NV50TCL_VP_ATTR_EN_1_12_NYZW 0x000e0000
+#define NV50TCL_VP_ATTR_EN_1_12_XYZW 0x000f0000
+#define NV50TCL_VP_ATTR_EN_1_11_SHIFT 12
+#define NV50TCL_VP_ATTR_EN_1_11_MASK 0x0000f000
+#define NV50TCL_VP_ATTR_EN_1_11_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_1_11_XNNN 0x00001000
+#define NV50TCL_VP_ATTR_EN_1_11_NYNN 0x00002000
+#define NV50TCL_VP_ATTR_EN_1_11_XYNN 0x00003000
+#define NV50TCL_VP_ATTR_EN_1_11_NNZN 0x00004000
+#define NV50TCL_VP_ATTR_EN_1_11_XNZN 0x00005000
+#define NV50TCL_VP_ATTR_EN_1_11_NYZN 0x00006000
+#define NV50TCL_VP_ATTR_EN_1_11_XYZN 0x00007000
+#define NV50TCL_VP_ATTR_EN_1_11_NNNW 0x00008000
+#define NV50TCL_VP_ATTR_EN_1_11_XNNW 0x00009000
+#define NV50TCL_VP_ATTR_EN_1_11_NYNW 0x0000a000
+#define NV50TCL_VP_ATTR_EN_1_11_XYNW 0x0000b000
+#define NV50TCL_VP_ATTR_EN_1_11_NNZW 0x0000c000
+#define NV50TCL_VP_ATTR_EN_1_11_XNZW 0x0000d000
+#define NV50TCL_VP_ATTR_EN_1_11_NYZW 0x0000e000
+#define NV50TCL_VP_ATTR_EN_1_11_XYZW 0x0000f000
+#define NV50TCL_VP_ATTR_EN_1_10_SHIFT 8
+#define NV50TCL_VP_ATTR_EN_1_10_MASK 0x00000f00
+#define NV50TCL_VP_ATTR_EN_1_10_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_1_10_XNNN 0x00000100
+#define NV50TCL_VP_ATTR_EN_1_10_NYNN 0x00000200
+#define NV50TCL_VP_ATTR_EN_1_10_XYNN 0x00000300
+#define NV50TCL_VP_ATTR_EN_1_10_NNZN 0x00000400
+#define NV50TCL_VP_ATTR_EN_1_10_XNZN 0x00000500
+#define NV50TCL_VP_ATTR_EN_1_10_NYZN 0x00000600
+#define NV50TCL_VP_ATTR_EN_1_10_XYZN 0x00000700
+#define NV50TCL_VP_ATTR_EN_1_10_NNNW 0x00000800
+#define NV50TCL_VP_ATTR_EN_1_10_XNNW 0x00000900
+#define NV50TCL_VP_ATTR_EN_1_10_NYNW 0x00000a00
+#define NV50TCL_VP_ATTR_EN_1_10_XYNW 0x00000b00
+#define NV50TCL_VP_ATTR_EN_1_10_NNZW 0x00000c00
+#define NV50TCL_VP_ATTR_EN_1_10_XNZW 0x00000d00
+#define NV50TCL_VP_ATTR_EN_1_10_NYZW 0x00000e00
+#define NV50TCL_VP_ATTR_EN_1_10_XYZW 0x00000f00
+#define NV50TCL_VP_ATTR_EN_1_9_SHIFT 4
+#define NV50TCL_VP_ATTR_EN_1_9_MASK 0x000000f0
+#define NV50TCL_VP_ATTR_EN_1_9_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_1_9_XNNN 0x00000010
+#define NV50TCL_VP_ATTR_EN_1_9_NYNN 0x00000020
+#define NV50TCL_VP_ATTR_EN_1_9_XYNN 0x00000030
+#define NV50TCL_VP_ATTR_EN_1_9_NNZN 0x00000040
+#define NV50TCL_VP_ATTR_EN_1_9_XNZN 0x00000050
+#define NV50TCL_VP_ATTR_EN_1_9_NYZN 0x00000060
+#define NV50TCL_VP_ATTR_EN_1_9_XYZN 0x00000070
+#define NV50TCL_VP_ATTR_EN_1_9_NNNW 0x00000080
+#define NV50TCL_VP_ATTR_EN_1_9_XNNW 0x00000090
+#define NV50TCL_VP_ATTR_EN_1_9_NYNW 0x000000a0
+#define NV50TCL_VP_ATTR_EN_1_9_XYNW 0x000000b0
+#define NV50TCL_VP_ATTR_EN_1_9_NNZW 0x000000c0
+#define NV50TCL_VP_ATTR_EN_1_9_XNZW 0x000000d0
+#define NV50TCL_VP_ATTR_EN_1_9_NYZW 0x000000e0
+#define NV50TCL_VP_ATTR_EN_1_9_XYZW 0x000000f0
+#define NV50TCL_VP_ATTR_EN_1_8_SHIFT 0
+#define NV50TCL_VP_ATTR_EN_1_8_MASK 0x0000000f
+#define NV50TCL_VP_ATTR_EN_1_8_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_1_8_XNNN 0x00000001
+#define NV50TCL_VP_ATTR_EN_1_8_NYNN 0x00000002
+#define NV50TCL_VP_ATTR_EN_1_8_XYNN 0x00000003
+#define NV50TCL_VP_ATTR_EN_1_8_NNZN 0x00000004
+#define NV50TCL_VP_ATTR_EN_1_8_XNZN 0x00000005
+#define NV50TCL_VP_ATTR_EN_1_8_NYZN 0x00000006
+#define NV50TCL_VP_ATTR_EN_1_8_XYZN 0x00000007
+#define NV50TCL_VP_ATTR_EN_1_8_NNNW 0x00000008
+#define NV50TCL_VP_ATTR_EN_1_8_XNNW 0x00000009
+#define NV50TCL_VP_ATTR_EN_1_8_NYNW 0x0000000a
+#define NV50TCL_VP_ATTR_EN_1_8_XYNW 0x0000000b
+#define NV50TCL_VP_ATTR_EN_1_8_NNZW 0x0000000c
+#define NV50TCL_VP_ATTR_EN_1_8_XNZW 0x0000000d
+#define NV50TCL_VP_ATTR_EN_1_8_NYZW 0x0000000e
+#define NV50TCL_VP_ATTR_EN_1_8_XYZW 0x0000000f
+#define NV50TCL_LINE_STIPPLE_ENABLE 0x0000166c
+#define NV50TCL_LINE_STIPPLE_PATTERN 0x00001680
+#define NV50TCL_POLYGON_STIPPLE_ENABLE 0x0000168c
+#define NV50TCL_VP_REG_HPOS 0x000016bc
+#define NV50TCL_VP_REG_HPOS_X_SHIFT 0
+#define NV50TCL_VP_REG_HPOS_X_MASK 0x000000ff
+#define NV50TCL_VP_REG_HPOS_Y_SHIFT 8
+#define NV50TCL_VP_REG_HPOS_Y_MASK 0x0000ff00
+#define NV50TCL_VP_REG_HPOS_Z_SHIFT 16
+#define NV50TCL_VP_REG_HPOS_Z_MASK 0x00ff0000
+#define NV50TCL_VP_REG_HPOS_W_SHIFT 24
+#define NV50TCL_VP_REG_HPOS_W_MASK 0xff000000
+#define NV50TCL_VP_REG_COL0 0x000016c0
+#define NV50TCL_VP_REG_COL0_X_SHIFT 0
+#define NV50TCL_VP_REG_COL0_X_MASK 0x000000ff
+#define NV50TCL_VP_REG_COL0_Y_SHIFT 8
+#define NV50TCL_VP_REG_COL0_Y_MASK 0x0000ff00
+#define NV50TCL_VP_REG_COL0_Z_SHIFT 16
+#define NV50TCL_VP_REG_COL0_Z_MASK 0x00ff0000
+#define NV50TCL_VP_REG_COL0_W_SHIFT 24
+#define NV50TCL_VP_REG_COL0_W_MASK 0xff000000
+#define NV50TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001700+((x)*4))
+#define NV50TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020
+#define NV50TCL_CULL_FACE_ENABLE 0x00001918
+#define NV50TCL_FRONT_FACE 0x0000191c
+#define NV50TCL_FRONT_FACE_CW 0x00000900
+#define NV50TCL_FRONT_FACE_CCW 0x00000901
+#define NV50TCL_CULL_FACE 0x00001920
+#define NV50TCL_CULL_FACE_FRONT 0x00000404
+#define NV50TCL_CULL_FACE_BACK 0x00000405
+#define NV50TCL_CULL_FACE_FRONT_AND_BACK 0x00000408
+#define NV50TCL_LOGIC_OP_ENABLE 0x000019c4
+#define NV50TCL_LOGIC_OP 0x000019c8
+#define NV50TCL_LOGIC_OP_CLEAR 0x00001500
+#define NV50TCL_LOGIC_OP_AND 0x00001501
+#define NV50TCL_LOGIC_OP_AND_REVERSE 0x00001502
+#define NV50TCL_LOGIC_OP_COPY 0x00001503
+#define NV50TCL_LOGIC_OP_AND_INVERTED 0x00001504
+#define NV50TCL_LOGIC_OP_NOOP 0x00001505
+#define NV50TCL_LOGIC_OP_XOR 0x00001506
+#define NV50TCL_LOGIC_OP_OR 0x00001507
+#define NV50TCL_LOGIC_OP_NOR 0x00001508
+#define NV50TCL_LOGIC_OP_EQUIV 0x00001509
+#define NV50TCL_LOGIC_OP_INVERT 0x0000150a
+#define NV50TCL_LOGIC_OP_OR_REVERSE 0x0000150b
+#define NV50TCL_LOGIC_OP_COPY_INVERTED 0x0000150c
+#define NV50TCL_LOGIC_OP_OR_INVERTED 0x0000150d
+#define NV50TCL_LOGIC_OP_NAND 0x0000150e
+#define NV50TCL_LOGIC_OP_SET 0x0000150f
+#define NV50TCL_CLEAR_BUFFERS 0x000019d0
+#define NV50TCL_COLOR_MASK(x) (0x00001a00+((x)*4))
+#define NV50TCL_COLOR_MASK__SIZE 0x00000008
+#define NV50TCL_COLOR_MASK_R_SHIFT 0
+#define NV50TCL_COLOR_MASK_R_MASK 0x0000000f
+#define NV50TCL_COLOR_MASK_G_SHIFT 4
+#define NV50TCL_COLOR_MASK_G_MASK 0x000000f0
+#define NV50TCL_COLOR_MASK_B_SHIFT 8
+#define NV50TCL_COLOR_MASK_B_MASK 0x00000f00
+#define NV50TCL_COLOR_MASK_A_SHIFT 12
+#define NV50TCL_COLOR_MASK_A_MASK 0x0000f000
+
+
+#define NV50_COMPUTE 0x000050c0
+
+#define NV50_COMPUTE_DMA_UNK0 0x000001a0
+#define NV50_COMPUTE_DMA_STATUS 0x000001a4
+#define NV50_COMPUTE_DMA_UNK1 0x000001b8
+#define NV50_COMPUTE_DMA_UNK2 0x000001bc
+#define NV50_COMPUTE_DMA_UNK3 0x000001c0
+#define NV50_COMPUTE_UNK4_HIGH 0x00000210
+#define NV50_COMPUTE_UNK4_LOW 0x00000214
+#define NV50_COMPUTE_UNK5_HIGH 0x00000218
+#define NV50_COMPUTE_UNK5_LOW 0x0000021c
+#define NV50_COMPUTE_UNK6_HIGH 0x00000294
+#define NV50_COMPUTE_UNK6_LOW 0x00000298
+#define NV50_COMPUTE_CONST_BASE_HIGH 0x000002a4
+#define NV50_COMPUTE_CONST_BASE_LO 0x000002a8
+#define NV50_COMPUTE_CONST_SIZE_SEG 0x000002ac
+#define NV50_COMPUTE_REG_COUNT 0x000002c0
+#define NV50_COMPUTE_STATUS_HIGH 0x00000310
+#define NV50_COMPUTE_STATUS_LOW 0x00000314
+#define NV50_COMPUTE_EXECUTE 0x0000031c
+#define NV50_COMPUTE_USER_PARAM_COUNT 0x00000374
+#define NV50_COMPUTE_GRIDDIM_YX 0x000003a4
+#define NV50_COMPUTE_SHARED_SIZE 0x000003a8
+#define NV50_COMPUTE_BLOCKDIM_YX 0x000003ac
+#define NV50_COMPUTE_BLOCKDIM_Z 0x000003b0
+#define NV50_COMPUTE_CALL_ADDRESS 0x000003b4
+#define NV50_COMPUTE_GLOBAL_BASE_HIGH(x) (0x00000400+((x)*32))
+#define NV50_COMPUTE_GLOBAL_BASE_HIGH__SIZE 0x00000010
+#define NV50_COMPUTE_GLOBAL_BASE_LOW(x) (0x00000404+((x)*32))
+#define NV50_COMPUTE_GLOBAL_BASE_LOW__SIZE 0x00000010
+#define NV50_COMPUTE_GLOBAL_LIMIT_HIGH(x) (0x00000408+((x)*32))
+#define NV50_COMPUTE_GLOBAL_LIMIT_HIGH__SIZE 0x00000010
+#define NV50_COMPUTE_GLOBAL_LIMIT_LOW(x) (0x0000040c+((x)*32))
+#define NV50_COMPUTE_GLOBAL_LIMIT_LOW__SIZE 0x00000010
+#define NV50_COMPUTE_GLOBAL_UNK(x) (0x00000410+((x)*32))
+#define NV50_COMPUTE_GLOBAL_UNK__SIZE 0x00000010
+#define NV50_COMPUTE_USER_PARAM(x) (0x00000600+((x)*4))
+#define NV50_COMPUTE_USER_PARAM__SIZE 0x00000040
+
+
+#define NV54TCL 0x00008297
+
+
+
+#endif /* NOUVEAU_REG_H */
diff --git a/src/gallium/drivers/nouveau/nouveau_device.h b/src/gallium/drivers/nouveau/nouveau_device.h
new file mode 100644
index 0000000000..e25e89fedd
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_device.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2007 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NOUVEAU_DEVICE_H__
+#define __NOUVEAU_DEVICE_H__
+
+struct nouveau_device {
+ unsigned chipset;
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_gldefs.h b/src/gallium/drivers/nouveau/nouveau_gldefs.h
new file mode 100644
index 0000000000..ff97aaa9af
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_gldefs.h
@@ -0,0 +1,196 @@
+#ifndef __NOUVEAU_GLDEFS_H__
+#define __NOUVEAU_GLDEFS_H__
+
+static INLINE unsigned
+nvgl_blend_func(unsigned factor)
+{
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ return 0x0000;
+ case PIPE_BLENDFACTOR_ONE:
+ return 0x0001;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return 0x0300;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return 0x0301;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return 0x0302;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return 0x0303;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return 0x0304;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return 0x0305;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return 0x0306;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return 0x0307;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return 0x0308;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return 0x8001;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return 0x8002;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return 0x8003;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return 0x8004;
+ default:
+ return 0x0000;
+ }
+}
+
+static INLINE unsigned
+nvgl_blend_eqn(unsigned func)
+{
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return 0x8006;
+ case PIPE_BLEND_MIN:
+ return 0x8007;
+ case PIPE_BLEND_MAX:
+ return 0x8008;
+ case PIPE_BLEND_SUBTRACT:
+ return 0x800a;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return 0x800b;
+ default:
+ return 0x8006;
+ }
+}
+
+static INLINE unsigned
+nvgl_logicop_func(unsigned func)
+{
+ switch (func) {
+ case PIPE_LOGICOP_CLEAR:
+ return 0x1500;
+ case PIPE_LOGICOP_NOR:
+ return 0x1508;
+ case PIPE_LOGICOP_AND_INVERTED:
+ return 0x1504;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ return 0x150c;
+ case PIPE_LOGICOP_AND_REVERSE:
+ return 0x1502;
+ case PIPE_LOGICOP_INVERT:
+ return 0x150a;
+ case PIPE_LOGICOP_XOR:
+ return 0x1506;
+ case PIPE_LOGICOP_NAND:
+ return 0x150e;
+ case PIPE_LOGICOP_AND:
+ return 0x1501;
+ case PIPE_LOGICOP_EQUIV:
+ return 0x1509;
+ case PIPE_LOGICOP_NOOP:
+ return 0x1505;
+ case PIPE_LOGICOP_OR_INVERTED:
+ return 0x150d;
+ case PIPE_LOGICOP_COPY:
+ return 0x1503;
+ case PIPE_LOGICOP_OR_REVERSE:
+ return 0x150b;
+ case PIPE_LOGICOP_OR:
+ return 0x1507;
+ case PIPE_LOGICOP_SET:
+ return 0x150f;
+ default:
+ return 0x1505;
+ }
+}
+
+static INLINE unsigned
+nvgl_comparison_op(unsigned op)
+{
+ switch (op) {
+ case PIPE_FUNC_NEVER:
+ return 0x0200;
+ case PIPE_FUNC_LESS:
+ return 0x0201;
+ case PIPE_FUNC_EQUAL:
+ return 0x0202;
+ case PIPE_FUNC_LEQUAL:
+ return 0x0203;
+ case PIPE_FUNC_GREATER:
+ return 0x0204;
+ case PIPE_FUNC_NOTEQUAL:
+ return 0x0205;
+ case PIPE_FUNC_GEQUAL:
+ return 0x0206;
+ case PIPE_FUNC_ALWAYS:
+ return 0x0207;
+ default:
+ return 0x0207;
+ }
+}
+
+static INLINE unsigned
+nvgl_polygon_mode(unsigned mode)
+{
+ switch (mode) {
+ case PIPE_POLYGON_MODE_POINT:
+ return 0x1b00;
+ case PIPE_POLYGON_MODE_LINE:
+ return 0x1b01;
+ case PIPE_POLYGON_MODE_FILL:
+ return 0x1b02;
+ default:
+ return 0x1b02;
+ }
+}
+
+static INLINE unsigned
+nvgl_stencil_op(unsigned op)
+{
+ switch (op) {
+ case PIPE_STENCIL_OP_ZERO:
+ return 0x0000;
+ case PIPE_STENCIL_OP_INVERT:
+ return 0x150a;
+ case PIPE_STENCIL_OP_KEEP:
+ return 0x1e00;
+ case PIPE_STENCIL_OP_REPLACE:
+ return 0x1e01;
+ case PIPE_STENCIL_OP_INCR:
+ return 0x1e02;
+ case PIPE_STENCIL_OP_DECR:
+ return 0x1e03;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ return 0x8507;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ return 0x8508;
+ default:
+ return 0x1e00;
+ }
+}
+
+static INLINE unsigned
+nvgl_primitive(unsigned prim) {
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ return 0x0001;
+ case PIPE_PRIM_LINES:
+ return 0x0002;
+ case PIPE_PRIM_LINE_LOOP:
+ return 0x0003;
+ case PIPE_PRIM_LINE_STRIP:
+ return 0x0004;
+ case PIPE_PRIM_TRIANGLES:
+ return 0x0005;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ return 0x0006;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ return 0x0007;
+ case PIPE_PRIM_QUADS:
+ return 0x0008;
+ case PIPE_PRIM_QUAD_STRIP:
+ return 0x0009;
+ case PIPE_PRIM_POLYGON:
+ return 0x000a;
+ default:
+ return 0;
+ }
+}
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_grobj.h b/src/gallium/drivers/nouveau/nouveau_grobj.h
new file mode 100644
index 0000000000..8f5abf9051
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_grobj.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2007 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NOUVEAU_GROBJ_H__
+#define __NOUVEAU_GROBJ_H__
+
+#include "nouveau_channel.h"
+
+struct nouveau_grobj {
+ struct nouveau_channel *channel;
+ int grclass;
+ uint32_t handle;
+ int subc;
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_notifier.h b/src/gallium/drivers/nouveau/nouveau_notifier.h
new file mode 100644
index 0000000000..35adde1e32
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_notifier.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2007 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NOUVEAU_NOTIFIER_H__
+#define __NOUVEAU_NOTIFIER_H__
+
+#define NV_NOTIFIER_SIZE 32
+#define NV_NOTIFY_TIME_0 0x00000000
+#define NV_NOTIFY_TIME_1 0x00000004
+#define NV_NOTIFY_RETURN_VALUE 0x00000008
+#define NV_NOTIFY_STATE 0x0000000C
+#define NV_NOTIFY_STATE_STATUS_MASK 0xFF000000
+#define NV_NOTIFY_STATE_STATUS_SHIFT 24
+#define NV_NOTIFY_STATE_STATUS_COMPLETED 0x00
+#define NV_NOTIFY_STATE_STATUS_IN_PROCESS 0x01
+#define NV_NOTIFY_STATE_ERROR_CODE_MASK 0x0000FFFF
+#define NV_NOTIFY_STATE_ERROR_CODE_SHIFT 0
+
+struct nouveau_notifier {
+ struct nouveau_channel *channel;
+ uint32_t handle;
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_push.h b/src/gallium/drivers/nouveau/nouveau_push.h
new file mode 100644
index 0000000000..54ef1c1291
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_push.h
@@ -0,0 +1,82 @@
+#ifndef __NOUVEAU_PUSH_H__
+#define __NOUVEAU_PUSH_H__
+
+#include "nouveau/nouveau_winsys.h"
+
+#ifndef NOUVEAU_PUSH_CONTEXT
+#error undefined push context
+#endif
+
+#define OUT_RING(data) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ (*pc->nvws->channel->pushbuf->cur++) = (data); \
+} while(0)
+
+#define OUT_RINGp(src,size) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ memcpy(pc->nvws->channel->pushbuf->cur, (src), (size) * 4); \
+ pc->nvws->channel->pushbuf->cur += (size); \
+} while(0)
+
+#define OUT_RINGf(data) do { \
+ union { float v; uint32_t u; } c; \
+ c.v = (data); \
+ OUT_RING(c.u); \
+} while(0)
+
+#define BEGIN_RING(obj,mthd,size) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \
+ pc->nvws->push_flush(pc->nvws, ((size) + 1), NULL); \
+ OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd)); \
+ pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \
+} while(0)
+
+#define BEGIN_RING_NI(obj,mthd,size) do { \
+ BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \
+} while(0)
+
+#define FIRE_RING(fence) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ pc->nvws->push_flush(pc->nvws, 0, fence); \
+} while(0)
+
+#define OUT_RELOC(bo,data,flags,vor,tor) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ pc->nvws->push_reloc(pc->nvws, pc->nvws->channel->pushbuf->cur++, \
+ (bo), (data), (flags), (vor), (tor)); \
+} while(0)
+
+/* Raw data + flags depending on FB/TT buffer */
+#define OUT_RELOCd(bo,data,flags,vor,tor) do { \
+ OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \
+} while(0)
+
+/* FB/TT object handle */
+#define OUT_RELOCo(bo,flags) do { \
+ OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \
+ pc->nvws->channel->vram->handle, \
+ pc->nvws->channel->gart->handle); \
+} while(0)
+
+/* Low 32-bits of offset */
+#define OUT_RELOCl(bo,delta,flags) do { \
+ OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \
+} while(0)
+
+/* High 32-bits of offset */
+#define OUT_RELOCh(bo,delta,flags) do { \
+ OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \
+} while(0)
+
+/* A reloc which'll recombine into a NV_DMA_METHOD packet header */
+#define OUT_RELOCm(bo, flags, obj, mthd, size) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \
+ pc->nvws->push_flush(pc->nvws->channel, ((size) + 1), NULL); \
+ OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \
+ (flags), 0, 0); \
+ pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \
+} while(0)
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_pushbuf.h b/src/gallium/drivers/nouveau/nouveau_pushbuf.h
new file mode 100644
index 0000000000..1909765098
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_pushbuf.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2007 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NOUVEAU_PUSHBUF_H__
+#define __NOUVEAU_PUSHBUF_H__
+
+struct nouveau_pushbuf {
+ struct nouveau_channel *channel;
+ unsigned remaining;
+ uint32_t *cur;
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_resource.h b/src/gallium/drivers/nouveau/nouveau_resource.h
new file mode 100644
index 0000000000..1af7961d30
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_resource.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2007 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NOUVEAU_RESOURCE_H__
+#define __NOUVEAU_RESOURCE_H__
+
+struct nouveau_resource {
+ struct nouveau_resource *prev;
+ struct nouveau_resource *next;
+
+ int in_use;
+ void *priv;
+
+ unsigned int start;
+ unsigned int size;
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h
new file mode 100644
index 0000000000..729988b095
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h
@@ -0,0 +1,158 @@
+#ifndef __NOUVEAU_STATEOBJ_H__
+#define __NOUVEAU_STATEOBJ_H__
+
+#include "pipe/p_debug.h"
+
+struct nouveau_stateobj_reloc {
+ struct pipe_buffer *bo;
+
+ unsigned offset;
+ unsigned packet;
+
+ unsigned data;
+ unsigned flags;
+ unsigned vor;
+ unsigned tor;
+};
+
+struct nouveau_stateobj {
+ int refcount;
+
+ unsigned *push;
+ struct nouveau_stateobj_reloc *reloc;
+
+ unsigned *cur;
+ unsigned cur_packet;
+ unsigned cur_reloc;
+};
+
+static INLINE struct nouveau_stateobj *
+so_new(unsigned push, unsigned reloc)
+{
+ struct nouveau_stateobj *so;
+
+ so = MALLOC(sizeof(struct nouveau_stateobj));
+ so->refcount = 0;
+ so->push = MALLOC(sizeof(unsigned) * push);
+ so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc);
+
+ so->cur = so->push;
+ so->cur_reloc = so->cur_packet = 0;
+
+ return so;
+}
+
+static INLINE void
+so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso)
+{
+ struct nouveau_stateobj *so = *pso;
+
+ if (ref) {
+ ref->refcount++;
+ }
+
+ if (so && --so->refcount <= 0) {
+ free(so->push);
+ free(so->reloc);
+ free(so);
+ }
+
+ *pso = ref;
+}
+
+static INLINE void
+so_data(struct nouveau_stateobj *so, unsigned data)
+{
+ (*so->cur++) = (data);
+ so->cur_packet += 4;
+}
+
+static INLINE void
+so_datap(struct nouveau_stateobj *so, unsigned *data, unsigned size)
+{
+ so->cur_packet += (4 * size);
+ while (size--)
+ (*so->cur++) = (*data++);
+}
+
+static INLINE void
+so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr,
+ unsigned mthd, unsigned size)
+{
+ so->cur_packet = (gr->subc << 13) | (1 << 18) | (mthd - 4);
+ so_data(so, (gr->subc << 13) | (size << 18) | mthd);
+}
+
+static INLINE void
+so_reloc(struct nouveau_stateobj *so, struct pipe_buffer *bo,
+ unsigned data, unsigned flags, unsigned vor, unsigned tor)
+{
+ struct nouveau_stateobj_reloc *r = &so->reloc[so->cur_reloc++];
+
+ r->bo = bo;
+ r->offset = so->cur - so->push;
+ r->packet = so->cur_packet;
+ r->data = data;
+ r->flags = flags;
+ r->vor = vor;
+ r->tor = tor;
+ so_data(so, data);
+}
+
+static INLINE void
+so_dump(struct nouveau_stateobj *so)
+{
+ unsigned i, nr = so->cur - so->push;
+
+ for (i = 0; i < nr; i++)
+ debug_printf("+0x%04x: 0x%08x\n", i, so->push[i]);
+}
+
+static INLINE void
+so_emit(struct nouveau_winsys *nvws, struct nouveau_stateobj *so)
+{
+ struct nouveau_pushbuf *pb = nvws->channel->pushbuf;
+ unsigned nr, i;
+
+ nr = so->cur - so->push;
+ if (pb->remaining < nr)
+ nvws->push_flush(nvws, nr, NULL);
+ pb->remaining -= nr;
+
+ memcpy(pb->cur, so->push, nr * 4);
+ for (i = 0; i < so->cur_reloc; i++) {
+ struct nouveau_stateobj_reloc *r = &so->reloc[i];
+
+ nvws->push_reloc(nvws, pb->cur + r->offset, r->bo,
+ r->data, r->flags, r->vor, r->tor);
+ }
+ pb->cur += nr;
+}
+
+static INLINE void
+so_emit_reloc_markers(struct nouveau_winsys *nvws, struct nouveau_stateobj *so)
+{
+ struct nouveau_pushbuf *pb = nvws->channel->pushbuf;
+ unsigned i;
+
+ if (!so)
+ return;
+
+ i = so->cur_reloc << 1;
+ if (nvws->channel->pushbuf->remaining < i)
+ nvws->push_flush(nvws, i, NULL);
+ nvws->channel->pushbuf->remaining -= i;
+
+ for (i = 0; i < so->cur_reloc; i++) {
+ struct nouveau_stateobj_reloc *r = &so->reloc[i];
+
+ nvws->push_reloc(nvws, pb->cur++, r->bo, r->packet,
+ (r->flags &
+ (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART)) |
+ NOUVEAU_BO_DUMMY, 0, 0);
+ nvws->push_reloc(nvws, pb->cur++, r->bo, r->data,
+ r->flags | NOUVEAU_BO_DUMMY, r->vor, r->tor);
+ }
+}
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h
new file mode 100644
index 0000000000..a10114beab
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_util.h
@@ -0,0 +1,91 @@
+#ifndef __NOUVEAU_UTIL_H__
+#define __NOUVEAU_UTIL_H__
+
+/* Determine how many vertices can be pushed into the command stream.
+ * Where the remaining space isn't large enough to represent all verices,
+ * split the buffer at primitive boundaries.
+ *
+ * Returns a count of vertices that can be rendered, and an index to
+ * restart drawing at after a flush.
+ */
+static INLINE unsigned
+nouveau_vbuf_split(unsigned remaining, unsigned overhead, unsigned vpp,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned *restart)
+{
+ int max, adj = 0;
+
+ max = remaining - overhead;
+ if (max < 0)
+ return 0;
+
+ max *= vpp;
+ if (max >= count)
+ return count;
+
+ switch (mode) {
+ case PIPE_PRIM_POINTS:
+ break;
+ case PIPE_PRIM_LINES:
+ max = max & 1;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ max = max - (max % 3);
+ break;
+ case PIPE_PRIM_QUADS:
+ max = max & 3;
+ break;
+ case PIPE_PRIM_LINE_LOOP:
+ case PIPE_PRIM_LINE_STRIP:
+ if (max < 2)
+ max = 0;
+ adj = 1;
+ break;
+ case PIPE_PRIM_POLYGON:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (max < 3)
+ max = 0;
+ adj = 2;
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ if (max < 4)
+ max = 0;
+ adj = 3;
+ break;
+ default:
+ assert(0);
+ }
+
+ *restart = start + max - adj;
+ return max;
+}
+
+/* Integer base-2 logarithm, rounded towards zero. */
+static INLINE unsigned log2i(unsigned i)
+{
+ unsigned r = 0;
+
+ if (i & 0xffff0000) {
+ i >>= 16;
+ r += 16;
+ }
+ if (i & 0x0000ff00) {
+ i >>= 8;
+ r += 8;
+ }
+ if (i & 0x000000f0) {
+ i >>= 4;
+ r += 4;
+ }
+ if (i & 0x0000000c) {
+ i >>= 2;
+ r += 2;
+ }
+ if (i & 0x00000002) {
+ r += 1;
+ }
+ return r;
+}
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
new file mode 100644
index 0000000000..5535ebb6a9
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -0,0 +1,98 @@
+#ifndef NOUVEAU_WINSYS_H
+#define NOUVEAU_WINSYS_H
+
+#include <stdint.h>
+#include "pipe/p_winsys.h"
+#include "pipe/p_defines.h"
+
+#include "nouveau/nouveau_bo.h"
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_class.h"
+#include "nouveau/nouveau_device.h"
+#include "nouveau/nouveau_grobj.h"
+#include "nouveau/nouveau_notifier.h"
+#include "nouveau/nouveau_resource.h"
+#include "nouveau/nouveau_pushbuf.h"
+
+#define NOUVEAU_CAP_HW_VTXBUF (0xbeef0000)
+#define NOUVEAU_CAP_HW_IDXBUF (0xbeef0001)
+
+#define NOUVEAU_TEXTURE_USAGE_LINEAR (1 << 16)
+
+#define NOUVEAU_BUFFER_USAGE_TEXTURE (1 << 16)
+#define NOUVEAU_BUFFER_USAGE_ZETA (1 << 17)
+
+struct nouveau_winsys {
+ struct nouveau_context *nv;
+
+ struct nouveau_channel *channel;
+
+ int (*res_init)(struct nouveau_resource **heap, unsigned start,
+ unsigned size);
+ int (*res_alloc)(struct nouveau_resource *heap, int size, void *priv,
+ struct nouveau_resource **);
+ void (*res_free)(struct nouveau_resource **);
+
+ int (*push_reloc)(struct nouveau_winsys *, void *ptr,
+ struct pipe_buffer *, uint32_t data,
+ uint32_t flags, uint32_t vor, uint32_t tor);
+ int (*push_flush)(struct nouveau_winsys *, unsigned size,
+ struct pipe_fence_handle **fence);
+
+ int (*grobj_alloc)(struct nouveau_winsys *, int grclass,
+ struct nouveau_grobj **);
+ void (*grobj_free)(struct nouveau_grobj **);
+
+ int (*notifier_alloc)(struct nouveau_winsys *, int count,
+ struct nouveau_notifier **);
+ void (*notifier_free)(struct nouveau_notifier **);
+ void (*notifier_reset)(struct nouveau_notifier *, int id);
+ uint32_t (*notifier_status)(struct nouveau_notifier *, int id);
+ uint32_t (*notifier_retval)(struct nouveau_notifier *, int id);
+ int (*notifier_wait)(struct nouveau_notifier *, int id,
+ int status, int timeout);
+
+ int (*surface_copy)(struct nouveau_winsys *, struct pipe_surface *,
+ unsigned, unsigned, struct pipe_surface *,
+ unsigned, unsigned, unsigned, unsigned);
+ int (*surface_fill)(struct nouveau_winsys *, struct pipe_surface *,
+ unsigned, unsigned, unsigned, unsigned, unsigned);
+};
+
+extern struct pipe_screen *
+nv04_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv04_create(struct pipe_screen *, unsigned pctx_id);
+
+extern struct pipe_screen *
+nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv10_create(struct pipe_screen *, unsigned pctx_id);
+
+extern struct pipe_screen *
+nv20_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv20_create(struct pipe_screen *, unsigned pctx_id);
+
+extern struct pipe_screen *
+nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv30_create(struct pipe_screen *, unsigned pctx_id);
+
+extern struct pipe_screen *
+nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv40_create(struct pipe_screen *, unsigned pctx_id);
+
+extern struct pipe_screen *
+nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv50_create(struct pipe_screen *, unsigned pctx_id);
+
+#endif
diff --git a/src/gallium/drivers/nv04/Makefile b/src/gallium/drivers/nv04/Makefile
new file mode 100644
index 0000000000..5ea51a2f42
--- /dev/null
+++ b/src/gallium/drivers/nv04/Makefile
@@ -0,0 +1,28 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv04
+
+DRIVER_SOURCES = \
+ nv04_clear.c \
+ nv04_context.c \
+ nv04_fragprog.c \
+ nv04_fragtex.c \
+ nv04_miptree.c \
+ nv04_prim_vbuf.c \
+ nv04_screen.c \
+ nv04_state.c \
+ nv04_state_emit.c \
+ nv04_surface.c \
+ nv04_vbo.c
+
+C_SOURCES = \
+ $(COMMON_SOURCES) \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/drivers/nv04/nv04_clear.c b/src/gallium/drivers/nv04/nv04_clear.c
new file mode 100644
index 0000000000..01cacd36fe
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_clear.c
@@ -0,0 +1,12 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv04_context.h"
+
+void
+nv04_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+}
diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c
new file mode 100644
index 0000000000..9f75253363
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_context.c
@@ -0,0 +1,106 @@
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+
+#include "nv04_context.h"
+#include "nv04_screen.h"
+
+static void
+nv04_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ draw_flush(nv04->draw);
+
+ FIRE_RING(fence);
+}
+
+static void
+nv04_destroy(struct pipe_context *pipe)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ if (nv04->draw)
+ draw_destroy(nv04->draw);
+
+ FREE(nv04);
+}
+
+static void
+nv04_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+}
+
+static boolean
+nv04_init_hwctx(struct nv04_context *nv04)
+{
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOTIFY, 1);
+ OUT_RING(0);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOP, 1);
+ OUT_RING(0);
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
+ OUT_RING(0x40182800);
+// OUT_RING(1<<20/*no cull*/);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1);
+// OUT_RING(0x24|(1<<6)|(1<<8));
+ OUT_RING(0x120001a4);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FORMAT, 1);
+ OUT_RING(0x332213a1);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FILTER, 1);
+ OUT_RING(0x11001010);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_COLORKEY, 1);
+ OUT_RING(0x0);
+// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 1);
+// OUT_RING(SCREEN_OFFSET);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR, 1);
+ OUT_RING(0xff000000);
+
+
+
+ FIRE_RING (NULL);
+ return TRUE;
+}
+
+struct pipe_context *
+nv04_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct nv04_screen *screen = nv04_screen(pscreen);
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv04_context *nv04;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nv04 = CALLOC(1, sizeof(struct nv04_context));
+ if (!nv04)
+ return NULL;
+ nv04->screen = screen;
+ nv04->pctx_id = pctx_id;
+
+ nv04->nvws = nvws;
+
+ nv04->pipe.winsys = ws;
+ nv04->pipe.screen = pscreen;
+ nv04->pipe.destroy = nv04_destroy;
+ nv04->pipe.set_edgeflags = nv04_set_edgeflags;
+ nv04->pipe.draw_arrays = nv04_draw_arrays;
+ nv04->pipe.draw_elements = nv04_draw_elements;
+ nv04->pipe.clear = nv04_clear;
+ nv04->pipe.flush = nv04_flush;
+
+ nv04_init_surface_functions(nv04);
+ nv04_init_state_functions(nv04);
+
+ nv04->draw = draw_create();
+ assert(nv04->draw);
+ draw_wide_point_threshold(nv04->draw, 0.0);
+ draw_wide_line_threshold(nv04->draw, 0.0);
+ draw_enable_line_stipple(nv04->draw, FALSE);
+ draw_enable_point_sprites(nv04->draw, FALSE);
+ draw_set_rasterize_stage(nv04->draw, nv04_draw_vbuf_stage(nv04));
+
+ nv04_init_hwctx(nv04);
+
+ return &nv04->pipe;
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h
new file mode 100644
index 0000000000..3e6a085270
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_context.h
@@ -0,0 +1,146 @@
+#ifndef __NV04_CONTEXT_H__
+#define __NV04_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx) \
+ struct nv04_screen *ctx = nv04->screen
+#include "nouveau/nouveau_push.h"
+
+#include "nv04_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+#include "nv04_screen.h"
+
+#define NV04_NEW_VERTPROG (1 << 1)
+#define NV04_NEW_FRAGPROG (1 << 2)
+#define NV04_NEW_BLEND (1 << 3)
+#define NV04_NEW_RAST (1 << 4)
+#define NV04_NEW_CONTROL (1 << 5)
+#define NV04_NEW_VIEWPORT (1 << 6)
+#define NV04_NEW_SAMPLER (1 << 7)
+
+struct nv04_context {
+ struct pipe_context pipe;
+
+ struct nouveau_winsys *nvws;
+ struct nv04_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ int chipset;
+ struct nouveau_notifier *sync;
+
+ uint32_t dirty;
+
+ struct nv04_blend_state *blend;
+ struct nv04_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+ struct nv04_fragtex_state fragtex;
+ struct nv04_rasterizer_state *rast;
+ struct nv04_depth_stencil_alpha_state *dsa;
+
+ struct nv04_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
+ unsigned dirty_samplers;
+ unsigned fp_samplers;
+ unsigned vp_samplers;
+
+ uint32_t rt_enable;
+ struct pipe_buffer *rt[4];
+ struct pipe_buffer *zeta;
+
+ struct {
+ struct pipe_buffer *buffer;
+ uint32_t format;
+ } tex[16];
+
+ unsigned vb_enable;
+ struct {
+ struct pipe_buffer *buffer;
+ unsigned delta;
+ } vb[16];
+
+ struct vertex_info vertex_info;
+ struct {
+
+ struct nouveau_resource *exec_heap;
+ struct nouveau_resource *data_heap;
+
+ struct nv04_vertex_program *active;
+
+ struct nv04_vertex_program *current;
+ struct pipe_buffer *constant_buf;
+ } vertprog;
+
+ struct {
+ struct nv04_fragment_program *active;
+
+ struct nv04_fragment_program *current;
+ struct pipe_buffer *constant_buf;
+ } fragprog;
+
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ unsigned num_vertex_buffers;
+ unsigned num_vertex_elements;
+
+ struct pipe_viewport_state viewport;
+};
+
+static INLINE struct nv04_context *
+nv04_context(struct pipe_context *pipe)
+{
+ return (struct nv04_context *)pipe;
+}
+
+extern void nv04_init_state_functions(struct nv04_context *nv04);
+extern void nv04_init_surface_functions(struct nv04_context *nv04);
+extern void nv04_init_miptree_functions(struct pipe_screen *screen);
+
+/* nv04_clear.c */
+extern void nv04_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+/* nv04_draw.c */
+extern struct draw_stage *nv04_draw_render_stage(struct nv04_context *nv04);
+
+/* nv04_fragprog.c */
+extern void nv04_fragprog_bind(struct nv04_context *,
+ struct nv04_fragment_program *);
+extern void nv04_fragprog_destroy(struct nv04_context *,
+ struct nv04_fragment_program *);
+
+/* nv04_fragtex.c */
+extern void nv04_fragtex_bind(struct nv04_context *);
+
+/* nv04_prim_vbuf.c */
+struct draw_stage *nv04_draw_vbuf_stage( struct nv04_context *nv04 );
+
+/* nv04_state.c and friends */
+extern void nv04_emit_hw_state(struct nv04_context *nv04);
+extern void nv04_state_tex_update(struct nv04_context *nv04);
+
+/* nv04_vbo.c */
+extern boolean nv04_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv04_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count);
+
+
+#endif
diff --git a/src/gallium/drivers/nv04/nv04_fragprog.c b/src/gallium/drivers/nv04/nv04_fragprog.c
new file mode 100644
index 0000000000..8a2af41fe0
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_fragprog.c
@@ -0,0 +1,21 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv04_context.h"
+
+void
+nv04_fragprog_bind(struct nv04_context *nv04, struct nv04_fragment_program *fp)
+{
+}
+
+void
+nv04_fragprog_destroy(struct nv04_context *nv04,
+ struct nv04_fragment_program *fp)
+{
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c
new file mode 100644
index 0000000000..21f990fd53
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_fragtex.c
@@ -0,0 +1,73 @@
+#include "nv04_context.h"
+#include "nouveau/nouveau_util.h"
+
+#define _(m,tf) \
+{ \
+ PIPE_FORMAT_##m, \
+ NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \
+}
+
+struct nv04_texture_format {
+ uint pipe;
+ int format;
+};
+
+static struct nv04_texture_format
+nv04_texture_formats[] = {
+ _(A8R8G8B8_UNORM, A8R8G8B8),
+ _(X8R8G8B8_UNORM, X8R8G8B8),
+ _(A1R5G5B5_UNORM, A1R5G5B5),
+ _(A4R4G4B4_UNORM, A4R4G4B4),
+ _(L8_UNORM, Y8 ),
+ _(A8_UNORM, Y8 ),
+};
+
+static uint32_t
+nv04_fragtex_format(uint pipe_format)
+{
+ struct nv04_texture_format *tf = nv04_texture_formats;
+ int i;
+
+ for (i=0; i< sizeof(nv04_texture_formats)/sizeof(nv04_texture_formats[0]); i++) {
+ if (tf->pipe == pipe_format)
+ return tf->format;
+ tf++;
+ }
+
+ NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format));
+ return 0;
+}
+
+
+static void
+nv04_fragtex_build(struct nv04_context *nv04, int unit)
+{
+ struct nv04_miptree *nv04mt = nv04->tex_miptree[unit];
+ struct pipe_texture *pt = &nv04mt->base;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_2D:
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return;
+ }
+
+ nv04->fragtex.format = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER
+ | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER
+ | nv04_fragtex_format(pt->format)
+ | ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT )
+ | ( log2i(pt->width[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT )
+ | ( log2i(pt->height[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT )
+ | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE
+ | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE
+ ;
+}
+
+
+void
+nv04_fragtex_bind(struct nv04_context *nv04)
+{
+ nv04_fragtex_build(nv04, 0);
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c
new file mode 100644
index 0000000000..094c38256b
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_miptree.c
@@ -0,0 +1,136 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv04_context.h"
+#include "nv04_screen.h"
+
+static void
+nv04_miptree_layout(struct nv04_miptree *nv04mt)
+{
+ struct pipe_texture *pt = &nv04mt->base;
+ uint width = pt->width[0], height = pt->height[0];
+ uint offset = 0;
+ int nr_faces, l, f;
+
+ nr_faces = 1;
+
+ for (l = 0; l <= pt->last_level; l++) {
+ pt->width[l] = width;
+ pt->height[l] = height;
+
+ pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+
+ nv04mt->level[l].pitch = pt->width[0] * pt->block.size;
+ nv04mt->level[l].pitch = (nv04mt->level[l].pitch + 63) & ~63;
+
+ nv04mt->level[l].image_offset =
+ CALLOC(nr_faces, sizeof(unsigned));
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+
+ }
+
+ for (f = 0; f < nr_faces; f++) {
+ for (l = 0; l <= pt->last_level; l++) {
+ nv04mt->level[l].image_offset[f] = offset;
+ offset += nv04mt->level[l].pitch * pt->height[l];
+ }
+ }
+
+ nv04mt->total_size = offset;
+}
+
+static struct pipe_texture *
+nv04_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct nv04_miptree *mt;
+
+ mt = MALLOC(sizeof(struct nv04_miptree));
+ if (!mt)
+ return NULL;
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = screen;
+
+ nv04_miptree_layout(mt);
+
+ mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL,
+ mt->total_size);
+ if (!mt->buffer) {
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static void
+nv04_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt)
+{
+ struct pipe_texture *mt = *pt;
+
+ *pt = NULL;
+ if (--mt->refcount <= 0) {
+ struct nv04_miptree *nv04mt = (struct nv04_miptree *)mt;
+ int l;
+
+ pipe_buffer_reference(screen, &nv04mt->buffer, NULL);
+ for (l = 0; l <= mt->last_level; l++) {
+ if (nv04mt->level[l].image_offset)
+ FREE(nv04mt->level[l].image_offset);
+ }
+ FREE(nv04mt);
+ }
+}
+
+static struct pipe_surface *
+nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv04_miptree *nv04mt = (struct nv04_miptree *)pt;
+ struct pipe_surface *ps;
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (!ps)
+ return NULL;
+ pipe_texture_reference(&ps->texture, pt);
+ pipe_buffer_reference(pscreen, &ps->buffer, nv04mt->buffer);
+ ps->format = pt->format;
+ ps->block = pt->block;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = nv04mt->level[level].pitch;
+ ps->refcount = 1;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ ps->offset = nv04mt->level[level].image_offset[face];
+ } else {
+ ps->offset = nv04mt->level[level].image_offset[0];
+ }
+
+ return ps;
+}
+
+static void
+nv04_miptree_surface_del(struct pipe_screen *pscreen,
+ struct pipe_surface **psurface)
+{
+}
+
+void
+nv04_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv04_miptree_create;
+ pscreen->texture_release = nv04_miptree_release;
+ pscreen->get_tex_surface = nv04_miptree_surface_new;
+ pscreen->tex_surface_release = nv04_miptree_surface_del;
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c
new file mode 100644
index 0000000000..19979fff79
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c
@@ -0,0 +1,309 @@
+
+#include "pipe/p_debug.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_compiler.h"
+
+#include "draw/draw_vbuf.h"
+
+#include "nv04_context.h"
+#include "nv04_state.h"
+
+#define VERTEX_SIZE 40
+#define VERTEX_BUFFER_SIZE (4096*VERTEX_SIZE) // 4096 vertices of 40 bytes each
+
+/**
+ * Primitive renderer for nv04.
+ */
+struct nv04_vbuf_render {
+ struct vbuf_render base;
+
+ struct nv04_context *nv04;
+
+ /** Vertex buffer */
+ unsigned char* buffer;
+
+ /** Vertex size in bytes */
+ unsigned vertex_size;
+
+ /** Current primitive */
+ unsigned prim;
+};
+
+
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct nv04_vbuf_render *
+nv04_vbuf_render( struct vbuf_render *render )
+{
+ assert(render);
+ return (struct nv04_vbuf_render *)render;
+}
+
+
+static const struct vertex_info *
+nv04_vbuf_render_get_vertex_info( struct vbuf_render *render )
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+ struct nv04_context *nv04 = nv04_render->nv04;
+ return &nv04->vertex_info;
+}
+
+
+static void *
+nv04_vbuf_render_allocate_vertices( struct vbuf_render *render,
+ ushort vertex_size,
+ ushort nr_vertices )
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+
+ nv04_render->buffer = (unsigned char*) MALLOC(VERTEX_BUFFER_SIZE);
+ assert(!nv04_render->buffer);
+
+ return nv04_render->buffer;
+}
+
+
+static boolean
+nv04_vbuf_render_set_primitive( struct vbuf_render *render,
+ unsigned prim )
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+
+ if (prim <= PIPE_PRIM_LINE_STRIP)
+ return FALSE;
+
+ nv04_render->prim = prim;
+ return TRUE;
+}
+
+static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5)
+{
+ BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49);
+ OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v3,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v4,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v5,8);
+ OUT_RING(0xFEDCBA);
+}
+
+static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2)
+{
+ BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25);
+ OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
+ OUT_RING(0xFED);
+}
+
+static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3)
+{
+ BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33);
+ OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v3,8);
+ OUT_RING(0xFECEDC);
+}
+
+static void nv04_vbuf_render_triangles_elts(struct nv04_vbuf_render * render, const ushort * indices, uint nr_indices)
+{
+ unsigned char* buffer = render->buffer;
+ struct nv04_context* nv04 = render->nv04;
+ int i;
+
+ for( i=0; i< nr_indices-5; i+=6)
+ nv04_2triangles(nv04,
+ buffer,
+ indices[i+0],
+ indices[i+1],
+ indices[i+2],
+ indices[i+3],
+ indices[i+4],
+ indices[i+5]
+ );
+ if (i != nr_indices)
+ {
+ nv04_1triangle(nv04,
+ buffer,
+ indices[i+0],
+ indices[i+1],
+ indices[i+2]
+ );
+ i+=3;
+ }
+ if (i != nr_indices)
+ NOUVEAU_ERR("Houston, we have lost some vertices\n");
+}
+
+static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices)
+{
+ const uint32_t striptbl[]={0x321210,0x543432,0x765654,0x987876,0xBA9A98,0xDCBCBA,0xFEDEDC};
+ unsigned char* buffer = render->buffer;
+ struct nv04_context* nv04 = render->nv04;
+ int i,j;
+
+ for(i = 0; i<nr_indices; i+=14)
+ {
+ int numvert = MIN2(16, nr_indices - i);
+ int numtri = numvert - 2;
+ if (numvert<3)
+ break;
+
+ BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 );
+ for(j = 0; j<numvert; j++)
+ OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 );
+
+ BEGIN_RING_NI( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2 );
+ for(j = 0; j<numtri/2; j++ )
+ OUT_RING(striptbl[j]);
+ if (numtri%2)
+ OUT_RING(striptbl[numtri/2]&0xFFF);
+ }
+}
+
+static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices)
+{
+ const uint32_t fantbl[]={0x320210,0x540430,0x760650,0x980870,0xBA0A90,0xDC0CB0,0xFE0ED0};
+ unsigned char* buffer = render->buffer;
+ struct nv04_context* nv04 = render->nv04;
+ int i,j;
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8);
+ OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8);
+
+ for(i = 1; i<nr_indices; i+=14)
+ {
+ int numvert=MIN2(15, nr_indices - i);
+ int numtri=numvert-2;
+ if (numvert < 3)
+ break;
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8);
+
+ for(j=0;j<numvert;j++)
+ OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 );
+
+ BEGIN_RING_NI(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2);
+ for(j = 0; j<numtri/2; j++)
+ OUT_RING(fantbl[j]);
+ if (numtri%2)
+ OUT_RING(fantbl[numtri/2]&0xFFF);
+ }
+}
+
+static void nv04_vbuf_render_quads_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices)
+{
+ unsigned char* buffer = render->buffer;
+ struct nv04_context* nv04 = render->nv04;
+ int i;
+
+ for(i = 0; i < nr_indices; i += 4)
+ nv04_1quad(nv04,
+ buffer,
+ indices[i+0],
+ indices[i+1],
+ indices[i+2],
+ indices[i+3]
+ );
+}
+
+
+static void
+nv04_vbuf_render_draw( struct vbuf_render *render,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+
+ // emit the indices
+ switch( nv04_render->prim )
+ {
+ case PIPE_PRIM_TRIANGLES:
+ nv04_vbuf_render_triangles_elts(nv04_render, indices, nr_indices);
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ nv04_vbuf_render_tri_strip_elts(nv04_render, indices, nr_indices);
+ break;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ nv04_vbuf_render_tri_fan_elts(nv04_render, indices, nr_indices);
+ break;
+ case PIPE_PRIM_QUADS:
+ nv04_vbuf_render_quads_elts(nv04_render, indices, nr_indices);
+ break;
+ default:
+ NOUVEAU_ERR("You have to implement primitive %d, young padawan\n", nv04_render->prim);
+ break;
+ }
+}
+
+
+static void
+nv04_vbuf_render_release_vertices( struct vbuf_render *render,
+ void *vertices,
+ unsigned vertex_size,
+ unsigned vertices_used )
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+
+ free(nv04_render->buffer);
+ nv04_render->buffer = NULL;
+}
+
+
+static void
+nv04_vbuf_render_destroy( struct vbuf_render *render )
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+ FREE(nv04_render);
+}
+
+
+/**
+ * Create a new primitive render.
+ */
+static struct vbuf_render *
+nv04_vbuf_render_create( struct nv04_context *nv04 )
+{
+ struct nv04_vbuf_render *nv04_render = CALLOC_STRUCT(nv04_vbuf_render);
+
+ nv04_render->nv04 = nv04;
+
+ nv04_render->base.max_vertex_buffer_bytes = VERTEX_BUFFER_SIZE;
+ nv04_render->base.max_indices = 65536;
+ nv04_render->base.get_vertex_info = nv04_vbuf_render_get_vertex_info;
+ nv04_render->base.allocate_vertices = nv04_vbuf_render_allocate_vertices;
+ nv04_render->base.set_primitive = nv04_vbuf_render_set_primitive;
+ nv04_render->base.draw = nv04_vbuf_render_draw;
+ nv04_render->base.release_vertices = nv04_vbuf_render_release_vertices;
+ nv04_render->base.destroy = nv04_vbuf_render_destroy;
+
+ return &nv04_render->base;
+}
+
+
+/**
+ * Create a new primitive vbuf/render stage.
+ */
+struct draw_stage *nv04_draw_vbuf_stage( struct nv04_context *nv04 )
+{
+ struct vbuf_render *render;
+ struct draw_stage *stage;
+
+ render = nv04_vbuf_render_create(nv04);
+ if(!render)
+ return NULL;
+
+ stage = draw_vbuf_stage( nv04->draw, render );
+ if(!stage) {
+ render->destroy(render);
+ return NULL;
+ }
+
+ return stage;
+}
diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c
new file mode 100644
index 0000000000..65eacde6b2
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_screen.c
@@ -0,0 +1,214 @@
+#include "pipe/p_screen.h"
+
+#include "nv04_context.h"
+#include "nv04_screen.h"
+
+static const char *
+nv04_screen_get_name(struct pipe_screen *screen)
+{
+ struct nv04_screen *nv04screen = nv04_screen(screen);
+ struct nouveau_device *dev = nv04screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv04_screen_get_vendor(struct pipe_screen *screen)
+{
+ return "nouveau";
+}
+
+static int
+nv04_screen_get_param(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 1;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 0;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 0;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 0;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 0;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 0;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv04_screen_get_paramf(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 0.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 0.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 0.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 0.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static boolean
+nv04_screen_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static void *
+nv04_surface_map(struct pipe_screen *screen, struct pipe_surface *surface,
+ unsigned flags )
+{
+ struct pipe_winsys *ws = screen->winsys;
+ void *map;
+
+ map = ws->buffer_map(ws, surface->buffer, flags);
+ if (!map)
+ return NULL;
+
+ return map + surface->offset;
+}
+
+static void
+nv04_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface)
+{
+ struct pipe_winsys *ws = screen->winsys;
+
+ ws->buffer_unmap(ws, surface->buffer);
+}
+
+static void
+nv04_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nv04_screen *screen = nv04_screen(pscreen);
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nvws->notifier_free(&screen->sync);
+ nvws->grobj_free(&screen->fahrenheit);
+
+ FREE(pscreen);
+}
+
+struct pipe_screen *
+nv04_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv04_screen *screen = CALLOC_STRUCT(nv04_screen);
+ unsigned fahrenheit_class = 0, sub3d_class = 0;
+ unsigned chipset = nvws->channel->device->chipset;
+ int ret;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ if (chipset>=0x20) {
+ fahrenheit_class = 0;
+ sub3d_class = 0;
+ } else if (chipset>=0x10) {
+ fahrenheit_class = NV10_DX5_TEXTURED_TRIANGLE;
+ sub3d_class = NV10_CONTEXT_SURFACES_3D;
+ } else {
+ fahrenheit_class=NV04_DX5_TEXTURED_TRIANGLE;
+ sub3d_class = NV04_CONTEXT_SURFACES_3D;
+ }
+
+ if (!fahrenheit_class) {
+ NOUVEAU_ERR("Unknown nv04 chipset: nv%02x\n", chipset);
+ return NULL;
+ }
+
+ /* 3D object */
+ ret = nvws->grobj_alloc(nvws, fahrenheit_class, &screen->fahrenheit);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ return NULL;
+ }
+
+ /* 3D surface object */
+ ret = nvws->grobj_alloc(nvws, sub3d_class, &screen->context_surfaces_3d);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D surface object: %d\n", ret);
+ return NULL;
+ }
+
+ /* Notifier for sync purposes */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv04_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ screen->pipe.winsys = ws;
+ screen->pipe.destroy = nv04_screen_destroy;
+
+ screen->pipe.get_name = nv04_screen_get_name;
+ screen->pipe.get_vendor = nv04_screen_get_vendor;
+ screen->pipe.get_param = nv04_screen_get_param;
+ screen->pipe.get_paramf = nv04_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv04_screen_is_format_supported;
+
+ screen->pipe.surface_map = nv04_surface_map;
+ screen->pipe.surface_unmap = nv04_surface_unmap;
+
+ nv04_screen_init_miptree_functions(&screen->pipe);
+
+ return &screen->pipe;
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_screen.h b/src/gallium/drivers/nv04/nv04_screen.h
new file mode 100644
index 0000000000..99a49cdf7a
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_screen.h
@@ -0,0 +1,25 @@
+#ifndef __NV04_SCREEN_H__
+#define __NV04_SCREEN_H__
+
+#include "pipe/p_screen.h"
+
+struct nv04_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+ unsigned chipset;
+
+ /* HW graphics objects */
+ struct nouveau_grobj *fahrenheit;
+ struct nouveau_grobj *context_surfaces_3d;
+ struct nouveau_notifier *sync;
+
+};
+
+static INLINE struct nv04_screen *
+nv04_screen(struct pipe_screen *screen)
+{
+ return (struct nv04_screen *)screen;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c
new file mode 100644
index 0000000000..ff1933b550
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_state.c
@@ -0,0 +1,495 @@
+#include "draw/draw_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv04_context.h"
+#include "nv04_state.h"
+
+static void *
+nv04_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv04_blend_state *cb;
+
+ cb = MALLOC(sizeof(struct nv04_blend_state));
+
+ cb->b_enable = cso->blend_enable ? 1 : 0;
+ cb->b_src = ((nvgl_blend_func(cso->alpha_src_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_src_factor)));
+ cb->b_dst = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_dst_factor)));
+
+
+ return (void *)cb;
+}
+
+static void
+nv04_blend_state_bind(struct pipe_context *pipe, void *blend)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ nv04->blend = (struct nv04_blend_state*)blend;
+
+ nv04->dirty |= NV04_NEW_BLEND;
+}
+
+static void
+nv04_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+
+static INLINE unsigned
+wrap_mode(unsigned wrap) {
+ unsigned ret;
+
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_CLAMP:
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
+ }
+ return ret >> NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT;
+}
+
+static void *
+nv04_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+
+ struct nv04_sampler_state *ss;
+ uint32_t filter = 0;
+
+ ss = MALLOC(sizeof(struct nv04_sampler_state));
+
+ ss->format = ((wrap_mode(cso->wrap_s) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT));
+
+ if (cso->max_anisotropy > 1.0) {
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE;
+ }
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR;
+ break;
+ }
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST;
+ break;
+ }
+ break;
+ }
+
+ ss->filter = filter;
+
+ return (void *)ss;
+}
+
+static void
+nv04_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv04->sampler[unit] = sampler[unit];
+ nv04->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void
+nv04_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+static void
+nv04_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **miptree)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv04->tex_miptree[unit] = (struct nv04_miptree *)miptree[unit];
+ nv04->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void *
+nv04_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv04_rasterizer_state *rs;
+
+ /*XXX: ignored:
+ * scissor
+ * points/lines (no hw support, emulated with tris in gallium)
+ */
+ rs = MALLOC(sizeof(struct nv04_rasterizer_state));
+
+ rs->blend = cso->flatshade ? NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD;
+
+ return (void *)rs;
+}
+
+static void
+nv04_rasterizer_state_bind(struct pipe_context *pipe, void *rast)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ nv04->rast = (struct nv04_rasterizer_state*)rast;
+
+ draw_set_rasterizer_state(nv04->draw, (nv04->rast ? nv04->rast->templ : NULL));
+
+ nv04->dirty |= NV04_NEW_RAST | NV04_NEW_BLEND;
+}
+
+static void
+nv04_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+static INLINE uint32_t nv04_compare_func(uint32_t f)
+{
+ switch ( f ) {
+ case PIPE_FUNC_NEVER: return 1;
+ case PIPE_FUNC_LESS: return 2;
+ case PIPE_FUNC_EQUAL: return 3;
+ case PIPE_FUNC_LEQUAL: return 4;
+ case PIPE_FUNC_GREATER: return 5;
+ case PIPE_FUNC_NOTEQUAL: return 6;
+ case PIPE_FUNC_GEQUAL: return 7;
+ case PIPE_FUNC_ALWAYS: return 8;
+ }
+ NOUVEAU_MSG("Unable to find the function\n");
+ return 0;
+}
+
+static void *
+nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv04_depth_stencil_alpha_state *hw;
+
+ hw = MALLOC(sizeof(struct nv04_depth_stencil_alpha_state));
+
+ hw->control = float_to_ubyte(cso->alpha.ref);
+ hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT );
+ hw->control |= cso->alpha.enabled ? NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE : 0;
+ hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN;
+ hw->control |= cso->depth.enabled ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT) : 0;
+ hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT );
+ hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module
+ hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE;
+ hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE;
+ hw->control |= cso->depth.writemask ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT) : 0;
+ hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format
+
+ return (void *)hw;
+}
+
+static void
+nv04_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ nv04->dsa = hwcso;
+ nv04->dirty |= NV04_NEW_CONTROL;
+}
+
+static void
+nv04_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+static void *
+nv04_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ return draw_create_vertex_shader(nv04->draw, templ);
+}
+
+static void
+nv04_vp_state_bind(struct pipe_context *pipe, void *shader)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ draw_bind_vertex_shader(nv04->draw, (struct draw_vertex_shader *) shader);
+
+ nv04->dirty |= NV04_NEW_VERTPROG;
+}
+
+static void
+nv04_vp_state_delete(struct pipe_context *pipe, void *shader)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ draw_delete_vertex_shader(nv04->draw, (struct draw_vertex_shader *) shader);
+}
+
+static void *
+nv04_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv04_fragment_program *fp;
+
+ fp = CALLOC(1, sizeof(struct nv04_fragment_program));
+ fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ return (void *)fp;
+}
+
+static void
+nv04_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct nv04_fragment_program *fp = hwcso;
+
+ nv04->fragprog.current = fp;
+ nv04->dirty |= NV04_NEW_FRAGPROG;
+}
+
+static void
+nv04_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct nv04_fragment_program *fp = hwcso;
+
+ nv04_fragprog_destroy(nv04, fp);
+ free((void*)fp->pipe.tokens);
+ free(fp);
+}
+
+static void
+nv04_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+}
+
+static void
+nv04_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+}
+
+static void
+nv04_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ if (shader == PIPE_SHADER_VERTEX) {
+ nv04->vertprog.constant_buf = buf->buffer;
+ nv04->dirty |= NV04_NEW_VERTPROG;
+ } else
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ nv04->fragprog.constant_buf = buf->buffer;
+ nv04->dirty |= NV04_NEW_FRAGPROG;
+ }
+}
+
+static void
+nv04_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct pipe_surface *rt, *zeta;
+ uint32_t rt_format, w, h;
+ int colour_format = 0, zeta_format = 0;
+
+ w = fb->cbufs[0]->width;
+ h = fb->cbufs[0]->height;
+ colour_format = fb->cbufs[0]->format;
+ rt = fb->cbufs[0];
+
+ if (fb->zsbuf) {
+ if (colour_format) {
+ assert(w == fb->zsbuf->width);
+ assert(h == fb->zsbuf->height);
+ } else {
+ w = fb->zsbuf->width;
+ h = fb->zsbuf->height;
+ }
+
+ zeta_format = fb->zsbuf->format;
+ zeta = fb->zsbuf;
+ }
+
+ switch (colour_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case 0:
+ rt_format = 0x108;
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ rt_format = 0x103;
+ break;
+ default:
+ assert(0);
+ }
+
+ BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1);
+ OUT_RING(rt_format);
+
+ /* FIXME pitches have to be aligned ! */
+ BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
+ OUT_RING(rt->stride|(zeta->stride<<16));
+ OUT_RELOCl(rt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ if (fb->zsbuf) {
+ BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
+ OUT_RELOCl(zeta->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ }
+}
+
+static void
+nv04_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ NOUVEAU_ERR("line stipple hahaha\n");
+}
+
+static void
+nv04_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+/* struct nv04_context *nv04 = nv04_context(pipe);
+
+ // XXX
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2);
+ OUT_RING (((s->maxx - s->minx) << 16) | s->minx);
+ OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/
+}
+
+static void
+nv04_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *viewport)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ nv04->viewport = *viewport;
+
+ draw_set_viewport_state(nv04->draw, &nv04->viewport);
+}
+
+static void
+nv04_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ draw_flush(nv04->draw);
+
+ memcpy(nv04->vertex_buffer, buffers, count * sizeof(buffers[0]));
+ nv04->num_vertex_buffers = count;
+
+ draw_set_vertex_buffers(nv04->draw, count, buffers);
+}
+
+static void
+nv04_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *elements)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ draw_flush(nv04->draw);
+
+ nv04->num_vertex_elements = count;
+ draw_set_vertex_elements(nv04->draw, count, elements);
+}
+
+void
+nv04_init_state_functions(struct nv04_context *nv04)
+{
+ nv04->pipe.create_blend_state = nv04_blend_state_create;
+ nv04->pipe.bind_blend_state = nv04_blend_state_bind;
+ nv04->pipe.delete_blend_state = nv04_blend_state_delete;
+
+ nv04->pipe.create_sampler_state = nv04_sampler_state_create;
+ nv04->pipe.bind_sampler_states = nv04_sampler_state_bind;
+ nv04->pipe.delete_sampler_state = nv04_sampler_state_delete;
+ nv04->pipe.set_sampler_textures = nv04_set_sampler_texture;
+
+ nv04->pipe.create_rasterizer_state = nv04_rasterizer_state_create;
+ nv04->pipe.bind_rasterizer_state = nv04_rasterizer_state_bind;
+ nv04->pipe.delete_rasterizer_state = nv04_rasterizer_state_delete;
+
+ nv04->pipe.create_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_create;
+ nv04->pipe.bind_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_bind;
+ nv04->pipe.delete_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_delete;
+
+ nv04->pipe.create_vs_state = nv04_vp_state_create;
+ nv04->pipe.bind_vs_state = nv04_vp_state_bind;
+ nv04->pipe.delete_vs_state = nv04_vp_state_delete;
+
+ nv04->pipe.create_fs_state = nv04_fp_state_create;
+ nv04->pipe.bind_fs_state = nv04_fp_state_bind;
+ nv04->pipe.delete_fs_state = nv04_fp_state_delete;
+
+ nv04->pipe.set_blend_color = nv04_set_blend_color;
+ nv04->pipe.set_clip_state = nv04_set_clip_state;
+ nv04->pipe.set_constant_buffer = nv04_set_constant_buffer;
+ nv04->pipe.set_framebuffer_state = nv04_set_framebuffer_state;
+ nv04->pipe.set_polygon_stipple = nv04_set_polygon_stipple;
+ nv04->pipe.set_scissor_state = nv04_set_scissor_state;
+ nv04->pipe.set_viewport_state = nv04_set_viewport_state;
+
+ nv04->pipe.set_vertex_buffers = nv04_set_vertex_buffers;
+ nv04->pipe.set_vertex_elements = nv04_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_state.h b/src/gallium/drivers/nv04/nv04_state.h
new file mode 100644
index 0000000000..399f750dbe
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_state.h
@@ -0,0 +1,71 @@
+#ifndef __NV04_STATE_H__
+#define __NV04_STATE_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv04_blend_state {
+ uint32_t b_enable;
+ uint32_t b_src;
+ uint32_t b_dst;
+};
+
+struct nv04_fragtex_state {
+ uint32_t format;
+};
+
+struct nv04_sampler_state {
+ uint32_t filter;
+ uint32_t format;
+};
+
+struct nv04_depth_stencil_alpha_state {
+ uint32_t control;
+};
+
+struct nv04_rasterizer_state {
+ uint32_t blend;
+
+ const struct pipe_rasterizer_state *templ;
+};
+
+struct nv04_miptree {
+ struct pipe_texture base;
+
+ struct pipe_buffer *buffer;
+ uint total_size;
+
+ struct {
+ uint pitch;
+ uint *image_offset;
+ } level[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+struct nv04_fragment_program_data {
+ unsigned offset;
+ unsigned index;
+};
+
+struct nv04_fragment_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+
+ boolean translated;
+ boolean on_hw;
+ unsigned samplers;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct nv04_fragment_program_data *consts;
+ unsigned nr_consts;
+
+ struct pipe_buffer *buffer;
+
+ uint32_t fp_control;
+ uint32_t fp_reg_control;
+};
+
+
+
+#endif
diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c
new file mode 100644
index 0000000000..0ad40a092e
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_state_emit.c
@@ -0,0 +1,156 @@
+#include "nv04_context.h"
+#include "nv04_state.h"
+
+static void nv04_vertex_layout(struct pipe_context* pipe)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct nv04_fragment_program *fp = nv04->fragprog.current;
+ uint32_t src = 0;
+ int i;
+ struct vertex_info vinfo;
+
+ memset(&vinfo, 0, sizeof(vinfo));
+
+ for (i = 0; i < fp->info.num_inputs; i++) {
+ switch (i) {
+ case TGSI_SEMANTIC_POSITION:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++);
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++);
+ break;
+ default:
+ case TGSI_SEMANTIC_GENERIC:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++);
+ break;
+ case TGSI_SEMANTIC_FOG:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++);
+ break;
+ }
+ }
+ draw_compute_vertex_size(&vinfo);
+}
+
+static uint32_t nv04_blend_func(uint32_t f)
+{
+ switch ( f ) {
+ case PIPE_BLENDFACTOR_ZERO: return 0x1;
+ case PIPE_BLENDFACTOR_ONE: return 0x2;
+ case PIPE_BLENDFACTOR_SRC_COLOR: return 0x3;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR: return 0x4;
+ case PIPE_BLENDFACTOR_SRC_ALPHA: return 0x5;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return 0x6;
+ case PIPE_BLENDFACTOR_DST_ALPHA: return 0x7;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA: return 0x8;
+ case PIPE_BLENDFACTOR_DST_COLOR: return 0x9;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR: return 0xA;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return 0xB;
+ }
+ NOUVEAU_MSG("Unable to find the blend function 0x%x\n",f);
+ return 0;
+}
+
+static void nv04_emit_control(struct nv04_context* nv04)
+{
+ uint32_t control = nv04->dsa->control;
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
+ OUT_RING(control);
+}
+
+static void nv04_emit_blend(struct nv04_context* nv04)
+{
+ uint32_t blend;
+
+ blend=0x4; // texture MODULATE_ALPHA
+ blend|=0x20; // alpha is MSB
+ blend|=(2<<6); // flat shading
+ blend|=(1<<8); // persp correct
+ blend|=(0<<16); // no fog
+ blend|=(nv04->blend->b_enable<<20);
+ blend|=(nv04_blend_func(nv04->blend->b_src)<<24);
+ blend|=(nv04_blend_func(nv04->blend->b_dst)<<28);
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1);
+ OUT_RING(blend);
+}
+
+static void nv04_emit_sampler(struct nv04_context *nv04, int unit)
+{
+ struct nv04_miptree *nv04mt = nv04->tex_miptree[unit];
+ struct pipe_texture *pt = &nv04mt->base;
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 3);
+ OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ OUT_RING(nv04->sampler[unit]->filter);
+}
+
+void
+nv04_emit_hw_state(struct nv04_context *nv04)
+{
+ int i;
+
+ if (nv04->dirty & NV04_NEW_VERTPROG) {
+ //nv04_vertprog_bind(nv04, nv04->vertprog.current);
+ nv04->dirty &= ~NV04_NEW_VERTPROG;
+ }
+
+ if (nv04->dirty & NV04_NEW_FRAGPROG) {
+ nv04_fragprog_bind(nv04, nv04->fragprog.current);
+ /*XXX: clear NV04_NEW_FRAGPROG if no new program uploaded */
+ nv04->dirty_samplers |= (1<<10);
+ nv04->dirty_samplers = 0;
+ }
+
+ if (nv04->dirty & NV04_NEW_CONTROL) {
+ nv04->dirty &= ~NV04_NEW_CONTROL;
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
+ OUT_RING(nv04->dsa->control);
+ }
+
+ if (nv04->dirty & NV04_NEW_BLEND) {
+ nv04->dirty &= ~NV04_NEW_BLEND;
+
+ nv04_emit_blend(nv04);
+ }
+
+ if (nv04->dirty & NV04_NEW_SAMPLER) {
+ nv04->dirty &= ~NV04_NEW_SAMPLER;
+
+ nv04_emit_sampler(nv04, 0);
+ }
+
+ if (nv04->dirty & NV04_NEW_VIEWPORT) {
+ nv04->dirty &= ~NV04_NEW_VIEWPORT;
+// nv04_state_emit_viewport(nv04);
+ }
+
+ /* Emit relocs for every referenced buffer.
+ * This is to ensure the bufmgr has an accurate idea of how
+ * the buffer is used. This isn't very efficient, but we don't
+ * seem to take a significant performance hit. Will be improved
+ * at some point. Vertex arrays are emitted by nv04_vbo.c
+ */
+
+ /* Render target */
+/* BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
+ OUT_RING(rt->stride|(zeta->stride<<16));
+ OUT_RELOCl(rt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ if (fb->zsbuf) {
+ BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
+ OUT_RELOCl(zeta->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ }*/
+
+ /* Texture images */
+ for (i = 0; i < 1; i++) {
+ if (!(nv04->fp_samplers & (1 << i)))
+ continue;
+ struct nv04_miptree *nv04mt = nv04->tex_miptree[i];
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 2);
+ OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ }
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_surface.c b/src/gallium/drivers/nv04/nv04_surface.c
new file mode 100644
index 0000000000..9d9943ed4e
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_surface.c
@@ -0,0 +1,64 @@
+
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nv04_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+#include "util/u_tile.h"
+
+static void
+nv04_surface_copy(struct pipe_context *pipe, boolean do_flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct nouveau_winsys *nvws = nv04->nvws;
+
+ nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy,
+ width, height);
+}
+
+static void
+nv04_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct nouveau_winsys *nvws = nv04->nvws;
+
+ nvws->surface_fill(nvws, dest, destx, desty, width, height, value);
+}
+
+void
+nv04_init_surface_functions(struct nv04_context *nv04)
+{
+ nv04->pipe.surface_copy = nv04_surface_copy;
+ nv04->pipe.surface_fill = nv04_surface_fill;
+}
diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c
new file mode 100644
index 0000000000..91f919d48e
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_vbo.c
@@ -0,0 +1,71 @@
+#include "draw/draw_context.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv04_context.h"
+#include "nv04_state.h"
+
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_pushbuf.h"
+
+boolean nv04_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count)
+{
+ struct nv04_context *nv04 = nv04_context( pipe );
+ struct draw_context *draw = nv04->draw;
+ unsigned i;
+
+ /*
+ * Map vertex buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv04->vertex_buffer[i].buffer) {
+ void *buf
+ = pipe->winsys->buffer_map(pipe->winsys,
+ nv04->vertex_buffer[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(draw, i, buf);
+ }
+ }
+ /* Map index buffer, if present */
+ if (indexBuffer) {
+ void *mapped_indexes
+ = pipe->winsys->buffer_map(pipe->winsys, indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes);
+ }
+ else {
+ /* no index/element buffer */
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ /* draw! */
+ draw_arrays(nv04->draw, prim, start, count);
+
+ /*
+ * unmap vertex/index buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv04->vertex_buffer[i].buffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, nv04->vertex_buffer[i].buffer);
+ draw_set_mapped_vertex_buffer(draw, i, NULL);
+ }
+ }
+ if (indexBuffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer);
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ return TRUE;
+}
+
+boolean nv04_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
+{
+ return nv04_draw_elements(pipe, NULL, 0, prim, start, count);
+}
+
+
+
diff --git a/src/gallium/drivers/nv10/Makefile b/src/gallium/drivers/nv10/Makefile
new file mode 100644
index 0000000000..4ba7ce586d
--- /dev/null
+++ b/src/gallium/drivers/nv10/Makefile
@@ -0,0 +1,28 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv10
+
+DRIVER_SOURCES = \
+ nv10_clear.c \
+ nv10_context.c \
+ nv10_fragprog.c \
+ nv10_fragtex.c \
+ nv10_miptree.c \
+ nv10_prim_vbuf.c \
+ nv10_screen.c \
+ nv10_state.c \
+ nv10_state_emit.c \
+ nv10_surface.c \
+ nv10_vbo.c
+
+C_SOURCES = \
+ $(COMMON_SOURCES) \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/drivers/nv10/nv10_clear.c b/src/gallium/drivers/nv10/nv10_clear.c
new file mode 100644
index 0000000000..be7e09cf4b
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_clear.c
@@ -0,0 +1,12 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv10_context.h"
+
+void
+nv10_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+}
diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c
new file mode 100644
index 0000000000..4eb4ed9185
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_context.c
@@ -0,0 +1,296 @@
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+
+#include "nv10_context.h"
+#include "nv10_screen.h"
+
+static void
+nv10_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ draw_flush(nv10->draw);
+
+ FIRE_RING(fence);
+}
+
+static void
+nv10_destroy(struct pipe_context *pipe)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ if (nv10->draw)
+ draw_destroy(nv10->draw);
+
+ FREE(nv10);
+}
+
+static void nv10_init_hwctx(struct nv10_context *nv10)
+{
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_winsys *nvws = screen->nvws;
+ int i;
+ float projectionmatrix[16];
+
+ BEGIN_RING(celsius, NV10TCL_DMA_NOTIFY, 1);
+ OUT_RING (screen->sync->handle);
+ BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY0, 2);
+ OUT_RING (nvws->channel->vram->handle);
+ OUT_RING (nvws->channel->gart->handle);
+ BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY2, 2);
+ OUT_RING (nvws->channel->vram->handle);
+ OUT_RING (nvws->channel->vram->handle);
+
+ BEGIN_RING(celsius, NV10TCL_NOP, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
+ OUT_RING ((0x7ff<<16)|0x800);
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
+ OUT_RING ((0x7ff<<16)|0x800);
+
+ for (i=1;i<8;i++) {
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1);
+ OUT_RING (0);
+ }
+
+ BEGIN_RING(celsius, 0x290, 1);
+ OUT_RING ((0x10<<16)|1);
+ BEGIN_RING(celsius, 0x3f4, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(celsius, NV10TCL_NOP, 1);
+ OUT_RING (0);
+
+ if (nv10->screen->celsius->grclass != NV10TCL) {
+ /* For nv11, nv17 */
+ BEGIN_RING(celsius, 0x120, 3);
+ OUT_RING (0);
+ OUT_RING (1);
+ OUT_RING (2);
+
+ BEGIN_RING(celsius, NV10TCL_NOP, 1);
+ OUT_RING (0);
+ }
+
+ BEGIN_RING(celsius, NV10TCL_NOP, 1);
+ OUT_RING (0);
+
+ /* Set state */
+ BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 2);
+ OUT_RING (0x207);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_TX_ENABLE(0), 2);
+ OUT_RING (0);
+ OUT_RING (0);
+
+ BEGIN_RING(celsius, NV10TCL_RC_IN_ALPHA(0), 12);
+ OUT_RING (0x30141010);
+ OUT_RING (0);
+ OUT_RING (0x20040000);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0x00000c00);
+ OUT_RING (0);
+ OUT_RING (0x00000c00);
+ OUT_RING (0x18000000);
+ OUT_RING (0x300e0300);
+ OUT_RING (0x0c091c80);
+
+ BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 2);
+ OUT_RING (1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_SRC, 4);
+ OUT_RING (1);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0x8006);
+ BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 8);
+ OUT_RING (0xff);
+ OUT_RING (0x207);
+ OUT_RING (0);
+ OUT_RING (0xff);
+ OUT_RING (0x1e00);
+ OUT_RING (0x1e00);
+ OUT_RING (0x1e00);
+ OUT_RING (0x1d01);
+ BEGIN_RING(celsius, NV10TCL_NORMALIZE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_LIGHT_MODEL, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_COLOR_CONTROL, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_ENABLED_LIGHTS, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1);
+ OUT_RING (0x201);
+ BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1);
+ OUT_RING (8);
+ BEGIN_RING(celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_LINE_WIDTH, 1);
+ OUT_RING (8);
+ BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (0x1b02);
+ OUT_RING (0x1b02);
+ BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2);
+ OUT_RING (0x405);
+ OUT_RING (0x901);
+ BEGIN_RING(celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_TX_GEN_S(0), 8);
+ for (i=0;i<8;i++) {
+ OUT_RING (0);
+ }
+ BEGIN_RING(celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3);
+ OUT_RING (0x3fc00000); /* -1.50 */
+ OUT_RING (0xbdb8aa0a); /* -0.09 */
+ OUT_RING (0); /* 0.00 */
+
+ BEGIN_RING(celsius, NV10TCL_NOP, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(celsius, NV10TCL_FOG_MODE, 2);
+ OUT_RING (0x802);
+ OUT_RING (2);
+ /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when
+ * using texturing, except when using the texture matrix
+ */
+ BEGIN_RING(celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1);
+ OUT_RING (6);
+ BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1);
+ OUT_RING (0x01010101);
+
+ /* Set vertex component */
+ BEGIN_RING(celsius, NV10TCL_VERTEX_COL_4F_R, 4);
+ OUT_RINGf (1.0);
+ OUT_RINGf (1.0);
+ OUT_RINGf (1.0);
+ OUT_RINGf (1.0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_COL2_3F_R, 3);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_NOR_3F_X, 3);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RINGf (1.0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_TX0_4F_S, 4);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (1.0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_TX1_4F_S, 4);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (1.0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_FOG_1F, 1);
+ OUT_RINGf (0.0);
+ BEGIN_RING(celsius, NV10TCL_EDGEFLAG_ENABLE, 1);
+ OUT_RING (1);
+
+ memset(projectionmatrix, 0, sizeof(projectionmatrix));
+ BEGIN_RING(celsius, NV10TCL_PROJECTION_MATRIX(0), 16);
+ projectionmatrix[0*4+0] = 1.0;
+ projectionmatrix[1*4+1] = 1.0;
+ projectionmatrix[2*4+2] = 1.0;
+ projectionmatrix[3*4+3] = 1.0;
+ for (i=0;i<16;i++) {
+ OUT_RINGf (projectionmatrix[i]);
+ }
+
+ BEGIN_RING(celsius, NV10TCL_DEPTH_RANGE_NEAR, 2);
+ OUT_RING (0.0);
+ OUT_RINGf (16777216.0);
+
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_SCALE_X, 4);
+ OUT_RINGf (-2048.0);
+ OUT_RINGf (-2048.0);
+ OUT_RINGf (16777215.0 * 0.5);
+ OUT_RING (0);
+
+ FIRE_RING (NULL);
+}
+
+static void
+nv10_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+}
+
+struct pipe_context *
+nv10_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct nv10_screen *screen = nv10_screen(pscreen);
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv10_context *nv10;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nv10 = CALLOC(1, sizeof(struct nv10_context));
+ if (!nv10)
+ return NULL;
+ nv10->screen = screen;
+ nv10->pctx_id = pctx_id;
+
+ nv10->nvws = nvws;
+
+ nv10->pipe.winsys = ws;
+ nv10->pipe.screen = pscreen;
+ nv10->pipe.destroy = nv10_destroy;
+ nv10->pipe.set_edgeflags = nv10_set_edgeflags;
+ nv10->pipe.draw_arrays = nv10_draw_arrays;
+ nv10->pipe.draw_elements = nv10_draw_elements;
+ nv10->pipe.clear = nv10_clear;
+ nv10->pipe.flush = nv10_flush;
+
+ nv10_init_surface_functions(nv10);
+ nv10_init_state_functions(nv10);
+
+ nv10->draw = draw_create();
+ assert(nv10->draw);
+ draw_set_rasterize_stage(nv10->draw, nv10_draw_vbuf_stage(nv10));
+
+ nv10_init_hwctx(nv10);
+
+ return &nv10->pipe;
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h
new file mode 100644
index 0000000000..f3b56de25a
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_context.h
@@ -0,0 +1,153 @@
+#ifndef __NV10_CONTEXT_H__
+#define __NV10_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx) \
+ struct nv10_screen *ctx = nv10->screen
+#include "nouveau/nouveau_push.h"
+
+#include "nv10_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+#define NV10_NEW_VERTPROG (1 << 0)
+#define NV10_NEW_FRAGPROG (1 << 1)
+#define NV10_NEW_VTXARRAYS (1 << 2)
+#define NV10_NEW_BLEND (1 << 3)
+#define NV10_NEW_BLENDCOL (1 << 4)
+#define NV10_NEW_RAST (1 << 5)
+#define NV10_NEW_DSA (1 << 6)
+#define NV10_NEW_VIEWPORT (1 << 7)
+#define NV10_NEW_SCISSOR (1 << 8)
+#define NV10_NEW_FRAMEBUFFER (1 << 9)
+
+#include "nv10_screen.h"
+
+struct nv10_context {
+ struct pipe_context pipe;
+
+ struct nouveau_winsys *nvws;
+ struct nv10_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ uint32_t dirty;
+
+ struct nv10_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
+ struct nv10_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
+ unsigned dirty_samplers;
+ unsigned fp_samplers;
+ unsigned vp_samplers;
+
+ uint32_t rt_enable;
+ struct pipe_buffer *rt[4];
+ struct pipe_buffer *zeta;
+ uint32_t lma_offset;
+
+ struct nv10_blend_state *blend;
+ struct pipe_blend_color *blend_color;
+ struct nv10_rasterizer_state *rast;
+ struct nv10_depth_stencil_alpha_state *dsa;
+ struct pipe_viewport_state *viewport;
+ struct pipe_scissor_state *scissor;
+ struct pipe_framebuffer_state *framebuffer;
+
+ //struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
+ float *constbuf[PIPE_SHADER_TYPES][32][4];
+ unsigned constbuf_nr[PIPE_SHADER_TYPES];
+
+ struct vertex_info vertex_info;
+
+ struct {
+ struct pipe_buffer *buffer;
+ uint32_t format;
+ } tex[2];
+
+ unsigned vb_enable;
+ struct {
+ struct pipe_buffer *buffer;
+ unsigned delta;
+ } vb[16];
+
+/* struct {
+
+ struct nouveau_resource *exec_heap;
+ struct nouveau_resource *data_heap;
+
+ struct nv10_vertex_program *active;
+
+ struct nv10_vertex_program *current;
+ } vertprog;
+*/
+ struct {
+ struct nv10_fragment_program *active;
+
+ struct nv10_fragment_program *current;
+ struct pipe_buffer *constant_buf;
+ } fragprog;
+
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
+};
+
+static INLINE struct nv10_context *
+nv10_context(struct pipe_context *pipe)
+{
+ return (struct nv10_context *)pipe;
+}
+
+extern void nv10_init_state_functions(struct nv10_context *nv10);
+extern void nv10_init_surface_functions(struct nv10_context *nv10);
+
+extern void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen);
+
+/* nv10_clear.c */
+extern void nv10_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+/* nv10_draw.c */
+extern struct draw_stage *nv10_draw_render_stage(struct nv10_context *nv10);
+
+/* nv10_fragprog.c */
+extern void nv10_fragprog_bind(struct nv10_context *,
+ struct nv10_fragment_program *);
+extern void nv10_fragprog_destroy(struct nv10_context *,
+ struct nv10_fragment_program *);
+
+/* nv10_fragtex.c */
+extern void nv10_fragtex_bind(struct nv10_context *);
+
+/* nv10_prim_vbuf.c */
+struct draw_stage *nv10_draw_vbuf_stage( struct nv10_context *nv10 );
+extern void nv10_vtxbuf_bind(struct nv10_context* nv10);
+
+/* nv10_state.c and friends */
+extern void nv10_emit_hw_state(struct nv10_context *nv10);
+extern void nv10_state_tex_update(struct nv10_context *nv10);
+
+/* nv10_vbo.c */
+extern boolean nv10_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv10_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count);
+
+
+#endif
diff --git a/src/gallium/drivers/nv10/nv10_fragprog.c b/src/gallium/drivers/nv10/nv10_fragprog.c
new file mode 100644
index 0000000000..698db5a16a
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_fragprog.c
@@ -0,0 +1,21 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv10_context.h"
+
+void
+nv10_fragprog_bind(struct nv10_context *nv10, struct nv10_fragment_program *fp)
+{
+}
+
+void
+nv10_fragprog_destroy(struct nv10_context *nv10,
+ struct nv10_fragment_program *fp)
+{
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c
new file mode 100644
index 0000000000..27f2f87584
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_fragtex.c
@@ -0,0 +1,124 @@
+#include "nv10_context.h"
+#include "nouveau/nouveau_util.h"
+
+#define _(m,tf) \
+{ \
+ TRUE, \
+ PIPE_FORMAT_##m, \
+ NV10TCL_TX_FORMAT_FORMAT_##tf, \
+}
+
+struct nv10_texture_format {
+ boolean defined;
+ uint pipe;
+ int format;
+};
+
+static struct nv10_texture_format
+nv10_texture_formats[] = {
+ _(A8R8G8B8_UNORM, A8R8G8B8),
+ _(A1R5G5B5_UNORM, A1R5G5B5),
+ _(A4R4G4B4_UNORM, A4R4G4B4),
+ _(L8_UNORM , L8 ),
+ _(A8_UNORM , A8 ),
+ _(A8L8_UNORM , A8L8 ),
+// _(RGB_DXT1 , DXT1, ),
+// _(RGBA_DXT1 , DXT1, ),
+// _(RGBA_DXT3 , DXT3, ),
+// _(RGBA_DXT5 , DXT5, ),
+ {},
+};
+
+static struct nv10_texture_format *
+nv10_fragtex_format(uint pipe_format)
+{
+ struct nv10_texture_format *tf = nv10_texture_formats;
+
+ while (tf->defined) {
+ if (tf->pipe == pipe_format)
+ return tf;
+ tf++;
+ }
+
+ return NULL;
+}
+
+
+static void
+nv10_fragtex_build(struct nv10_context *nv10, int unit)
+{
+#if 0
+ struct nv10_sampler_state *ps = nv10->tex_sampler[unit];
+ struct nv10_miptree *nv10mt = nv10->tex_miptree[unit];
+ struct pipe_texture *pt = &nv10mt->base;
+ struct nv10_texture_format *tf;
+ uint32_t txf, txs, txp;
+
+ tf = nv10_fragtex_format(pt->format);
+ if (!tf || !tf->defined) {
+ NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format);
+ return;
+ }
+
+ txf = tf->format << 8;
+ txf |= (pt->last_level + 1) << 16;
+ txf |= log2i(pt->width[0]) << 20;
+ txf |= log2i(pt->height[0]) << 24;
+ txf |= log2i(pt->depth[0]) << 28;
+ txf |= 8;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ txf |= NV10TCL_TX_FORMAT_CUBE_MAP;
+ /* fall-through */
+ case PIPE_TEXTURE_2D:
+ txf |= (2<<4);
+ break;
+ case PIPE_TEXTURE_1D:
+ txf |= (1<<4);
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return;
+ }
+
+ BEGIN_RING(celsius, NV10TCL_TX_OFFSET(unit), 8);
+ OUT_RELOCl(nv10mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(nv10mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ OUT_RING (ps->wrap);
+ OUT_RING (0x40000000); /* enable */
+ OUT_RING (txs);
+ OUT_RING (ps->filt | 0x2000 /* magic */);
+ OUT_RING ((pt->width[0] << 16) | pt->height[0]);
+ OUT_RING (ps->bcol);
+#endif
+}
+
+void
+nv10_fragtex_bind(struct nv10_context *nv10)
+{
+#if 0
+ struct nv10_fragment_program *fp = nv10->fragprog.active;
+ unsigned samplers, unit;
+
+ samplers = nv10->fp_samplers & ~fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ BEGIN_RING(celsius, NV10TCL_TX_ENABLE(unit), 1);
+ OUT_RING (0);
+ }
+
+ samplers = nv10->dirty_samplers & fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ nv10_fragtex_build(nv10, unit);
+ }
+
+ nv10->fp_samplers = fp->samplers;
+#endif
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c
new file mode 100644
index 0000000000..f8c021261b
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_miptree.c
@@ -0,0 +1,149 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv10_context.h"
+#include "nv10_screen.h"
+
+static void
+nv10_miptree_layout(struct nv10_miptree *nv10mt)
+{
+ struct pipe_texture *pt = &nv10mt->base;
+ boolean swizzled = FALSE;
+ uint width = pt->width[0], height = pt->height[0];
+ uint offset = 0;
+ int nr_faces, l, f;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ nr_faces = 6;
+ } else {
+ nr_faces = 1;
+ }
+
+ for (l = 0; l <= pt->last_level; l++) {
+ pt->width[l] = width;
+ pt->height[l] = height;
+ pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+
+ if (swizzled)
+ nv10mt->level[l].pitch = pt->nblocksx[l] * pt->block.size;
+ else
+ nv10mt->level[l].pitch = pt->nblocksx[0] * pt->block.size;
+ nv10mt->level[l].pitch = (nv10mt->level[l].pitch + 63) & ~63;
+
+ nv10mt->level[l].image_offset =
+ CALLOC(nr_faces, sizeof(unsigned));
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+
+ }
+
+ for (f = 0; f < nr_faces; f++) {
+ for (l = 0; l <= pt->last_level; l++) {
+ nv10mt->level[l].image_offset[f] = offset;
+ offset += nv10mt->level[l].pitch * pt->height[l];
+ }
+ }
+
+ nv10mt->total_size = offset;
+}
+
+static struct pipe_texture *
+nv10_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct nv10_miptree *mt;
+
+ mt = MALLOC(sizeof(struct nv10_miptree));
+ if (!mt)
+ return NULL;
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = screen;
+
+ nv10_miptree_layout(mt);
+
+ mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL,
+ mt->total_size);
+ if (!mt->buffer) {
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static void
+nv10_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt)
+{
+ struct pipe_texture *mt = *pt;
+
+ *pt = NULL;
+ if (--mt->refcount <= 0) {
+ struct nv10_miptree *nv10mt = (struct nv10_miptree *)mt;
+ int l;
+
+ pipe_buffer_reference(screen, &nv10mt->buffer, NULL);
+ for (l = 0; l <= mt->last_level; l++) {
+ if (nv10mt->level[l].image_offset)
+ FREE(nv10mt->level[l].image_offset);
+ }
+ FREE(nv10mt);
+ }
+}
+
+static void
+nv10_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt,
+ uint face, uint levels)
+{
+}
+
+
+static struct pipe_surface *
+nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct nv10_miptree *nv10mt = (struct nv10_miptree *)pt;
+ struct pipe_surface *ps;
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (!ps)
+ return NULL;
+ pipe_texture_reference(&ps->texture, pt);
+ pipe_buffer_reference(screen, &ps->buffer, nv10mt->buffer);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = nv10mt->level[level].pitch;
+ ps->refcount = 1;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ ps->offset = nv10mt->level[level].image_offset[face];
+ } else {
+ ps->offset = nv10mt->level[level].image_offset[0];
+ }
+
+ return ps;
+}
+
+static void
+nv10_miptree_surface_release(struct pipe_screen *screen,
+ struct pipe_surface **surface)
+{
+}
+
+void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv10_miptree_create;
+ pscreen->texture_release = nv10_miptree_release;
+ pscreen->get_tex_surface = nv10_miptree_surface_get;
+ pscreen->tex_surface_release = nv10_miptree_surface_release;
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c
new file mode 100644
index 0000000000..e7e81d3dff
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c
@@ -0,0 +1,245 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Build post-transformation, post-clipping vertex buffers and element
+ * lists by hooking into the end of the primitive pipeline and
+ * manipulating the vertex_id field in the vertex headers.
+ *
+ * XXX: work in progress
+ *
+ * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_debug.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_winsys.h"
+
+#include "nv10_context.h"
+#include "nv10_state.h"
+
+#include "draw/draw_vbuf.h"
+
+/**
+ * Primitive renderer for nv10.
+ */
+struct nv10_vbuf_render {
+ struct vbuf_render base;
+
+ struct nv10_context *nv10;
+
+ /** Vertex buffer */
+ struct pipe_buffer* buffer;
+
+ /** Vertex size in bytes */
+ unsigned vertex_size;
+
+ /** Hardware primitive */
+ unsigned hwprim;
+};
+
+
+void nv10_vtxbuf_bind( struct nv10_context* nv10 )
+{
+ int i;
+ for(i = 0; i < 8; i++) {
+ BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET(i), 1);
+ OUT_RING(0/*nv10->vtxbuf*/);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT(i) ,1);
+ OUT_RING(0/*XXX*/);
+ }
+}
+
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct nv10_vbuf_render *
+nv10_vbuf_render( struct vbuf_render *render )
+{
+ assert(render);
+ return (struct nv10_vbuf_render *)render;
+}
+
+
+static const struct vertex_info *
+nv10_vbuf_render_get_vertex_info( struct vbuf_render *render )
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ struct nv10_context *nv10 = nv10_render->nv10;
+
+ nv10_emit_hw_state(nv10);
+
+ return &nv10->vertex_info;
+}
+
+
+static void *
+nv10_vbuf_render_allocate_vertices( struct vbuf_render *render,
+ ushort vertex_size,
+ ushort nr_vertices )
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ struct nv10_context *nv10 = nv10_render->nv10;
+ struct pipe_winsys *winsys = nv10->pipe.winsys;
+ size_t size = (size_t)vertex_size * (size_t)nr_vertices;
+
+ assert(!nv10_render->buffer);
+ nv10_render->buffer = winsys->buffer_create(winsys, 64, PIPE_BUFFER_USAGE_VERTEX, size);
+
+ nv10->dirty |= NV10_NEW_VTXARRAYS;
+
+ return winsys->buffer_map(winsys,
+ nv10_render->buffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+}
+
+
+static boolean
+nv10_vbuf_render_set_primitive( struct vbuf_render *render,
+ unsigned prim )
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ unsigned hwp = nvgl_primitive(prim);
+ if (hwp == 0)
+ return FALSE;
+
+ nv10_render->hwprim = hwp;
+ return TRUE;
+}
+
+
+static void
+nv10_vbuf_render_draw( struct vbuf_render *render,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ struct nv10_context *nv10 = nv10_render->nv10;
+ int push, i;
+
+ nv10_emit_hw_state(nv10);
+
+ BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
+ OUT_RELOCl(nv10_render->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+
+ BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING(nv10_render->hwprim);
+
+ if (nr_indices & 1) {
+ BEGIN_RING(celsius, NV10TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (indices[0]);
+ indices++; nr_indices--;
+ }
+
+ while (nr_indices) {
+ // XXX too big/small ? check the size
+ push = MIN2(nr_indices, 1200 * 2);
+
+ BEGIN_RING_NI(celsius, NV10TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((indices[i+1] << 16) | indices[i]);
+
+ nr_indices -= push;
+ indices += push;
+ }
+
+ BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING (0);
+}
+
+
+static void
+nv10_vbuf_render_release_vertices( struct vbuf_render *render,
+ void *vertices,
+ unsigned vertex_size,
+ unsigned vertices_used )
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ struct nv10_context *nv10 = nv10_render->nv10;
+ struct pipe_winsys *winsys = nv10->pipe.winsys;
+ struct pipe_screen *pscreen = &nv10->screen->pipe;
+
+ assert(nv10_render->buffer);
+ winsys->buffer_unmap(winsys, nv10_render->buffer);
+ pipe_buffer_reference(pscreen, &nv10_render->buffer, NULL);
+}
+
+
+static void
+nv10_vbuf_render_destroy( struct vbuf_render *render )
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ FREE(nv10_render);
+}
+
+
+/**
+ * Create a new primitive render.
+ */
+static struct vbuf_render *
+nv10_vbuf_render_create( struct nv10_context *nv10 )
+{
+ struct nv10_vbuf_render *nv10_render = CALLOC_STRUCT(nv10_vbuf_render);
+
+ nv10_render->nv10 = nv10;
+
+ nv10_render->base.max_vertex_buffer_bytes = 16*1024;
+ nv10_render->base.max_indices = 1024;
+ nv10_render->base.get_vertex_info = nv10_vbuf_render_get_vertex_info;
+ nv10_render->base.allocate_vertices = nv10_vbuf_render_allocate_vertices;
+ nv10_render->base.set_primitive = nv10_vbuf_render_set_primitive;
+ nv10_render->base.draw = nv10_vbuf_render_draw;
+ nv10_render->base.release_vertices = nv10_vbuf_render_release_vertices;
+ nv10_render->base.destroy = nv10_vbuf_render_destroy;
+
+ return &nv10_render->base;
+}
+
+
+/**
+ * Create a new primitive vbuf/render stage.
+ */
+struct draw_stage *nv10_draw_vbuf_stage( struct nv10_context *nv10 )
+{
+ struct vbuf_render *render;
+ struct draw_stage *stage;
+
+ render = nv10_vbuf_render_create(nv10);
+ if(!render)
+ return NULL;
+
+ stage = draw_vbuf_stage( nv10->draw, render );
+ if(!stage) {
+ render->destroy(render);
+ return NULL;
+ }
+
+ return stage;
+}
diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c
new file mode 100644
index 0000000000..4d9fbd4b5f
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_screen.c
@@ -0,0 +1,210 @@
+#include "pipe/p_screen.h"
+
+#include "nv10_context.h"
+#include "nv10_screen.h"
+
+static const char *
+nv10_screen_get_name(struct pipe_screen *screen)
+{
+ struct nv10_screen *nv10screen = nv10_screen(screen);
+ struct nouveau_device *dev = nv10screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv10_screen_get_vendor(struct pipe_screen *screen)
+{
+ return "nouveau";
+}
+
+static int
+nv10_screen_get_param(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 2;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 0;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 0;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 0;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 12;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 12;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 0;
+ case NOUVEAU_CAP_HW_VTXBUF:
+ case NOUVEAU_CAP_HW_IDXBUF:
+ return 0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv10_screen_get_paramf(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 2.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 4.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static boolean
+nv10_screen_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static void *
+nv10_surface_map(struct pipe_screen *screen, struct pipe_surface *surface,
+ unsigned flags )
+{
+ struct pipe_winsys *ws = screen->winsys;
+ void *map;
+
+ map = ws->buffer_map(ws, surface->buffer, flags);
+ if (!map)
+ return NULL;
+
+ return map + surface->offset;
+}
+
+static void
+nv10_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface)
+{
+ struct pipe_winsys *ws = screen->winsys;
+
+ ws->buffer_unmap(ws, surface->buffer);
+}
+
+static void
+nv10_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nv10_screen *screen = nv10_screen(pscreen);
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nvws->notifier_free(&screen->sync);
+ nvws->grobj_free(&screen->celsius);
+
+ FREE(pscreen);
+}
+
+struct pipe_screen *
+nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv10_screen *screen = CALLOC_STRUCT(nv10_screen);
+ unsigned celsius_class;
+ unsigned chipset = nvws->channel->device->chipset;
+ int ret;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ /* 3D object */
+ if (chipset>=0x20)
+ celsius_class=NV11TCL;
+ else if (chipset>=0x17)
+ celsius_class=NV17TCL;
+ else if (chipset>=0x11)
+ celsius_class=NV11TCL;
+ else
+ celsius_class=NV10TCL;
+
+ if (!celsius_class) {
+ NOUVEAU_ERR("Unknown nv1x chipset: nv%02x\n", chipset);
+ return NULL;
+ }
+
+ ret = nvws->grobj_alloc(nvws, celsius_class, &screen->celsius);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ return FALSE;
+ }
+
+ /* Notifier for sync purposes */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv10_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ screen->pipe.winsys = ws;
+ screen->pipe.destroy = nv10_screen_destroy;
+
+ screen->pipe.get_name = nv10_screen_get_name;
+ screen->pipe.get_vendor = nv10_screen_get_vendor;
+ screen->pipe.get_param = nv10_screen_get_param;
+ screen->pipe.get_paramf = nv10_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv10_screen_is_format_supported;
+
+ screen->pipe.surface_map = nv10_surface_map;
+ screen->pipe.surface_unmap = nv10_surface_unmap;
+
+ nv10_screen_init_miptree_functions(&screen->pipe);
+
+ return &screen->pipe;
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_screen.h b/src/gallium/drivers/nv10/nv10_screen.h
new file mode 100644
index 0000000000..3f8750a13f
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_screen.h
@@ -0,0 +1,22 @@
+#ifndef __NV10_SCREEN_H__
+#define __NV10_SCREEN_H__
+
+#include "pipe/p_screen.h"
+
+struct nv10_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+
+ /* HW graphics objects */
+ struct nouveau_grobj *celsius;
+ struct nouveau_notifier *sync;
+};
+
+static INLINE struct nv10_screen *
+nv10_screen(struct pipe_screen *screen)
+{
+ return (struct nv10_screen *)screen;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c
new file mode 100644
index 0000000000..d2375aa2f6
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_state.c
@@ -0,0 +1,588 @@
+#include "draw/draw_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv10_context.h"
+#include "nv10_state.h"
+
+static void *
+nv10_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv10_blend_state *cb;
+
+ cb = MALLOC(sizeof(struct nv10_blend_state));
+
+ cb->b_enable = cso->blend_enable ? 1 : 0;
+ cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_src_factor)));
+ cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_dst_factor)));
+
+ cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) |
+ ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) |
+ ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) |
+ ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0));
+
+ cb->d_enable = cso->dither ? 1 : 0;
+
+ return (void *)cb;
+}
+
+static void
+nv10_blend_state_bind(struct pipe_context *pipe, void *blend)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->blend = (struct nv10_blend_state*)blend;
+
+ nv10->dirty |= NV10_NEW_BLEND;
+}
+
+static void
+nv10_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+
+static INLINE unsigned
+wrap_mode(unsigned wrap) {
+ unsigned ret;
+
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ ret = NV10TCL_TX_FORMAT_WRAP_S_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ ret = NV10TCL_TX_FORMAT_WRAP_S_MIRRORED_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_CLAMP:
+ ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ ret = NV10TCL_TX_FORMAT_WRAP_S_REPEAT;
+ break;
+ }
+
+ return ret >> NV10TCL_TX_FORMAT_WRAP_S_SHIFT;
+}
+
+static void *
+nv10_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nv10_sampler_state *ps;
+ uint32_t filter = 0;
+
+ ps = MALLOC(sizeof(struct nv10_sampler_state));
+
+ ps->wrap = ((wrap_mode(cso->wrap_s) << NV10TCL_TX_FORMAT_WRAP_S_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV10TCL_TX_FORMAT_WRAP_T_SHIFT));
+
+ ps->en = 0;
+ if (cso->max_anisotropy > 1.0) {
+ /* no idea, binary driver sets it, works without it.. meh.. */
+ ps->wrap |= (1 << 5);
+
+/* if (cso->max_anisotropy >= 16.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_16X;
+ } else
+ if (cso->max_anisotropy >= 12.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_12X;
+ } else
+ if (cso->max_anisotropy >= 10.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_10X;
+ } else
+ if (cso->max_anisotropy >= 8.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_8X;
+ } else
+ if (cso->max_anisotropy >= 6.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_6X;
+ } else
+ if (cso->max_anisotropy >= 4.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_4X;
+ } else {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_2X;
+ }*/
+ }
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ filter |= NV10TCL_TX_FILTER_MAGNIFY_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ filter |= NV10TCL_TX_FILTER_MAGNIFY_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR;
+ break;
+ }
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST;
+ break;
+ }
+ break;
+ }
+
+ ps->filt = filter;
+
+/* if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ switch (cso->compare_func) {
+ case PIPE_FUNC_NEVER:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NEVER;
+ break;
+ case PIPE_FUNC_GREATER:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GREATER;
+ break;
+ case PIPE_FUNC_EQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_EQUAL;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GEQUAL;
+ break;
+ case PIPE_FUNC_LESS:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LESS;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NOTEQUAL;
+ break;
+ case PIPE_FUNC_LEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LEQUAL;
+ break;
+ case PIPE_FUNC_ALWAYS:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_ALWAYS;
+ break;
+ default:
+ break;
+ }
+ }*/
+
+ ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) |
+ (float_to_ubyte(cso->border_color[0]) << 16) |
+ (float_to_ubyte(cso->border_color[1]) << 8) |
+ (float_to_ubyte(cso->border_color[2]) << 0));
+
+ return (void *)ps;
+}
+
+static void
+nv10_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv10->tex_sampler[unit] = sampler[unit];
+ nv10->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void
+nv10_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nv10_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **miptree)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv10->tex_miptree[unit] = (struct nv10_miptree *)miptree[unit];
+ nv10->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void *
+nv10_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv10_rasterizer_state *rs;
+ int i;
+
+ /*XXX: ignored:
+ * light_twoside
+ * offset_cw/ccw -nohw
+ * scissor
+ * point_smooth -nohw
+ * multisample
+ * offset_units / offset_scale
+ */
+ rs = MALLOC(sizeof(struct nv10_rasterizer_state));
+
+ rs->templ = cso;
+
+ rs->shade_model = cso->flatshade ? 0x1d00 : 0x1d01;
+
+ rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff;
+ rs->line_smooth_en = cso->line_smooth ? 1 : 0;
+
+ rs->point_size = *(uint32_t*)&cso->point_size;
+
+ rs->poly_smooth_en = cso->poly_smooth ? 1 : 0;
+
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ rs->front_face = NV10TCL_FRONT_FACE_CCW;
+ rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw);
+ rs->poly_mode_back = nvgl_polygon_mode(cso->fill_cw);
+ } else {
+ rs->front_face = NV10TCL_FRONT_FACE_CW;
+ rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw);
+ rs->poly_mode_back = nvgl_polygon_mode(cso->fill_ccw);
+ }
+
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ rs->cull_face_en = 1;
+ if (cso->front_winding == PIPE_WINDING_CCW)
+ rs->cull_face = NV10TCL_CULL_FACE_FRONT;
+ else
+ rs->cull_face = NV10TCL_CULL_FACE_BACK;
+ break;
+ case PIPE_WINDING_CW:
+ rs->cull_face_en = 1;
+ if (cso->front_winding == PIPE_WINDING_CW)
+ rs->cull_face = NV10TCL_CULL_FACE_FRONT;
+ else
+ rs->cull_face = NV10TCL_CULL_FACE_BACK;
+ break;
+ case PIPE_WINDING_BOTH:
+ rs->cull_face_en = 1;
+ rs->cull_face = NV10TCL_CULL_FACE_FRONT_AND_BACK;
+ break;
+ case PIPE_WINDING_NONE:
+ default:
+ rs->cull_face_en = 0;
+ rs->cull_face = 0;
+ break;
+ }
+
+ if (cso->point_sprite) {
+ rs->point_sprite = (1 << 0);
+ for (i = 0; i < 8; i++) {
+ if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE)
+ rs->point_sprite |= (1 << (8 + i));
+ }
+ } else {
+ rs->point_sprite = 0;
+ }
+
+ return (void *)rs;
+}
+
+static void
+nv10_rasterizer_state_bind(struct pipe_context *pipe, void *rast)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->rast = (struct nv10_rasterizer_state*)rast;
+
+ draw_set_rasterizer_state(nv10->draw, (nv10->rast ? nv10->rast->templ : NULL));
+
+ nv10->dirty |= NV10_NEW_RAST;
+}
+
+static void
+nv10_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nv10_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv10_depth_stencil_alpha_state *hw;
+
+ hw = MALLOC(sizeof(struct nv10_depth_stencil_alpha_state));
+
+ hw->depth.func = nvgl_comparison_op(cso->depth.func);
+ hw->depth.write_enable = cso->depth.writemask ? 1 : 0;
+ hw->depth.test_enable = cso->depth.enabled ? 1 : 0;
+
+ hw->stencil.enable = cso->stencil[0].enabled ? 1 : 0;
+ hw->stencil.wmask = cso->stencil[0].write_mask;
+ hw->stencil.func = nvgl_comparison_op(cso->stencil[0].func);
+ hw->stencil.ref = cso->stencil[0].ref_value;
+ hw->stencil.vmask = cso->stencil[0].value_mask;
+ hw->stencil.fail = nvgl_stencil_op(cso->stencil[0].fail_op);
+ hw->stencil.zfail = nvgl_stencil_op(cso->stencil[0].zfail_op);
+ hw->stencil.zpass = nvgl_stencil_op(cso->stencil[0].zpass_op);
+
+ hw->alpha.enabled = cso->alpha.enabled ? 1 : 0;
+ hw->alpha.func = nvgl_comparison_op(cso->alpha.func);
+ hw->alpha.ref = float_to_ubyte(cso->alpha.ref);
+
+ return (void *)hw;
+}
+
+static void
+nv10_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *dsa)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->dsa = (struct nv10_depth_stencil_alpha_state*)dsa;
+
+ nv10->dirty |= NV10_NEW_DSA;
+}
+
+static void
+nv10_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nv10_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ return draw_create_vertex_shader(nv10->draw, templ);
+}
+
+static void
+nv10_vp_state_bind(struct pipe_context *pipe, void *shader)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ draw_bind_vertex_shader(nv10->draw, (struct draw_vertex_shader *) shader);
+
+ nv10->dirty |= NV10_NEW_VERTPROG;
+}
+
+static void
+nv10_vp_state_delete(struct pipe_context *pipe, void *shader)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ draw_delete_vertex_shader(nv10->draw, (struct draw_vertex_shader *) shader);
+}
+
+static void *
+nv10_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv10_fragment_program *fp;
+
+ fp = CALLOC(1, sizeof(struct nv10_fragment_program));
+ fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ tgsi_scan_shader(cso->tokens, &fp->info);
+
+ return (void *)fp;
+}
+
+static void
+nv10_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ struct nv10_fragment_program *fp = hwcso;
+
+ nv10->fragprog.current = fp;
+ nv10->dirty |= NV10_NEW_FRAGPROG;
+}
+
+static void
+nv10_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ struct nv10_fragment_program *fp = hwcso;
+
+ nv10_fragprog_destroy(nv10, fp);
+ FREE((void*)fp->pipe.tokens);
+ FREE(fp);
+}
+
+static void
+nv10_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->blend_color = (struct pipe_blend_color*)bcol;
+
+ nv10->dirty |= NV10_NEW_BLENDCOL;
+}
+
+static void
+nv10_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ draw_set_clip_state(nv10->draw, clip);
+}
+
+static void
+nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(index == 0);
+
+ if (buf) {
+ void *mapped;
+ if (buf->size && (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)))
+ {
+ memcpy(nv10->constbuf[shader], mapped, buf->size);
+ nv10->constbuf_nr[shader] =
+ buf->size / (4 * sizeof(float));
+ ws->buffer_unmap(ws, buf->buffer);
+ }
+ }
+}
+
+static void
+nv10_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->framebuffer = (struct pipe_framebuffer_state*)fb;
+
+ nv10->dirty |= NV10_NEW_FRAMEBUFFER;
+}
+
+static void
+nv10_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ NOUVEAU_ERR("line stipple hahaha\n");
+}
+
+static void
+nv10_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->scissor = (struct pipe_scissor_state*)s;
+
+ nv10->dirty |= NV10_NEW_SCISSOR;
+}
+
+static void
+nv10_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->viewport = (struct pipe_viewport_state*)vpt;
+
+ draw_set_viewport_state(nv10->draw, nv10->viewport);
+
+ nv10->dirty |= NV10_NEW_VIEWPORT;
+}
+
+static void
+nv10_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ memcpy(nv10->vtxbuf, vb, sizeof(*vb) * count);
+ nv10->dirty |= NV10_NEW_VTXARRAYS;
+
+ draw_set_vertex_buffers(nv10->draw, count, vb);
+}
+
+static void
+nv10_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *ve)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ memcpy(nv10->vtxelt, ve, sizeof(*ve) * count);
+ nv10->dirty |= NV10_NEW_VTXARRAYS;
+
+ draw_set_vertex_elements(nv10->draw, count, ve);
+}
+
+void
+nv10_init_state_functions(struct nv10_context *nv10)
+{
+ nv10->pipe.create_blend_state = nv10_blend_state_create;
+ nv10->pipe.bind_blend_state = nv10_blend_state_bind;
+ nv10->pipe.delete_blend_state = nv10_blend_state_delete;
+
+ nv10->pipe.create_sampler_state = nv10_sampler_state_create;
+ nv10->pipe.bind_sampler_states = nv10_sampler_state_bind;
+ nv10->pipe.delete_sampler_state = nv10_sampler_state_delete;
+ nv10->pipe.set_sampler_textures = nv10_set_sampler_texture;
+
+ nv10->pipe.create_rasterizer_state = nv10_rasterizer_state_create;
+ nv10->pipe.bind_rasterizer_state = nv10_rasterizer_state_bind;
+ nv10->pipe.delete_rasterizer_state = nv10_rasterizer_state_delete;
+
+ nv10->pipe.create_depth_stencil_alpha_state =
+ nv10_depth_stencil_alpha_state_create;
+ nv10->pipe.bind_depth_stencil_alpha_state =
+ nv10_depth_stencil_alpha_state_bind;
+ nv10->pipe.delete_depth_stencil_alpha_state =
+ nv10_depth_stencil_alpha_state_delete;
+
+ nv10->pipe.create_vs_state = nv10_vp_state_create;
+ nv10->pipe.bind_vs_state = nv10_vp_state_bind;
+ nv10->pipe.delete_vs_state = nv10_vp_state_delete;
+
+ nv10->pipe.create_fs_state = nv10_fp_state_create;
+ nv10->pipe.bind_fs_state = nv10_fp_state_bind;
+ nv10->pipe.delete_fs_state = nv10_fp_state_delete;
+
+ nv10->pipe.set_blend_color = nv10_set_blend_color;
+ nv10->pipe.set_clip_state = nv10_set_clip_state;
+ nv10->pipe.set_constant_buffer = nv10_set_constant_buffer;
+ nv10->pipe.set_framebuffer_state = nv10_set_framebuffer_state;
+ nv10->pipe.set_polygon_stipple = nv10_set_polygon_stipple;
+ nv10->pipe.set_scissor_state = nv10_set_scissor_state;
+ nv10->pipe.set_viewport_state = nv10_set_viewport_state;
+
+ nv10->pipe.set_vertex_buffers = nv10_set_vertex_buffers;
+ nv10->pipe.set_vertex_elements = nv10_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_state.h b/src/gallium/drivers/nv10/nv10_state.h
new file mode 100644
index 0000000000..3a3fd0d4f4
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_state.h
@@ -0,0 +1,139 @@
+#ifndef __NV10_STATE_H__
+#define __NV10_STATE_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv10_blend_state {
+ uint32_t b_enable;
+ uint32_t b_srcfunc;
+ uint32_t b_dstfunc;
+
+ uint32_t c_mask;
+
+ uint32_t d_enable;
+};
+
+struct nv10_sampler_state {
+ uint32_t wrap;
+ uint32_t en;
+ uint32_t filt;
+ uint32_t bcol;
+};
+
+struct nv10_rasterizer_state {
+ uint32_t shade_model;
+
+ uint32_t line_width;
+ uint32_t line_smooth_en;
+
+ uint32_t point_size;
+
+ uint32_t poly_smooth_en;
+
+ uint32_t poly_mode_front;
+ uint32_t poly_mode_back;
+
+ uint32_t front_face;
+ uint32_t cull_face;
+ uint32_t cull_face_en;
+
+ uint32_t point_sprite;
+
+ const struct pipe_rasterizer_state *templ;
+};
+
+struct nv10_vertex_program_exec {
+ uint32_t data[4];
+ boolean has_branch_offset;
+ int const_index;
+};
+
+struct nv10_vertex_program_data {
+ int index; /* immediates == -1 */
+ float value[4];
+};
+
+struct nv10_vertex_program {
+ const struct pipe_shader_state *pipe;
+
+ boolean translated;
+ struct nv10_vertex_program_exec *insns;
+ unsigned nr_insns;
+ struct nv10_vertex_program_data *consts;
+ unsigned nr_consts;
+
+ struct nouveau_resource *exec;
+ unsigned exec_start;
+ struct nouveau_resource *data;
+ unsigned data_start;
+ unsigned data_start_min;
+
+ uint32_t ir;
+ uint32_t or;
+};
+
+struct nv10_fragment_program_data {
+ unsigned offset;
+ unsigned index;
+};
+
+struct nv10_fragment_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+
+ boolean translated;
+ boolean on_hw;
+ unsigned samplers;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct nv10_fragment_program_data *consts;
+ unsigned nr_consts;
+
+ struct pipe_buffer *buffer;
+
+ uint32_t fp_control;
+ uint32_t fp_reg_control;
+};
+
+
+struct nv10_depth_stencil_alpha_state {
+ struct {
+ uint32_t func;
+ uint32_t write_enable;
+ uint32_t test_enable;
+ } depth;
+
+ struct {
+ uint32_t enable;
+ uint32_t wmask;
+ uint32_t func;
+ uint32_t ref;
+ uint32_t vmask;
+ uint32_t fail;
+ uint32_t zfail;
+ uint32_t zpass;
+ } stencil;
+
+ struct {
+ uint32_t enabled;
+ uint32_t func;
+ uint32_t ref;
+ } alpha;
+};
+
+struct nv10_miptree {
+ struct pipe_texture base;
+
+ struct pipe_buffer *buffer;
+ uint total_size;
+
+ struct {
+ uint pitch;
+ uint *image_offset;
+ } level[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+#endif
diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c
new file mode 100644
index 0000000000..46c7e1d753
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_state_emit.c
@@ -0,0 +1,303 @@
+#include "nv10_context.h"
+#include "nv10_state.h"
+
+static void nv10_state_emit_blend(struct nv10_context* nv10)
+{
+ struct nv10_blend_state *b = nv10->blend;
+
+ BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 1);
+ OUT_RING (b->d_enable);
+
+ BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 3);
+ OUT_RING (b->b_enable);
+ OUT_RING (b->b_srcfunc);
+ OUT_RING (b->b_dstfunc);
+
+ BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1);
+ OUT_RING (b->c_mask);
+}
+
+static void nv10_state_emit_blend_color(struct nv10_context* nv10)
+{
+ struct pipe_blend_color *c = nv10->blend_color;
+
+ BEGIN_RING(celsius, NV10TCL_BLEND_COLOR, 1);
+ OUT_RING ((float_to_ubyte(c->color[3]) << 24)|
+ (float_to_ubyte(c->color[0]) << 16)|
+ (float_to_ubyte(c->color[1]) << 8) |
+ (float_to_ubyte(c->color[2]) << 0));
+}
+
+static void nv10_state_emit_rast(struct nv10_context* nv10)
+{
+ struct nv10_rasterizer_state *r = nv10->rast;
+
+ BEGIN_RING(celsius, NV10TCL_SHADE_MODEL, 2);
+ OUT_RING (r->shade_model);
+ OUT_RING (r->line_width);
+
+
+ BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1);
+ OUT_RING (r->point_size);
+
+ BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (r->poly_mode_front);
+ OUT_RING (r->poly_mode_back);
+
+
+ BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2);
+ OUT_RING (r->cull_face);
+ OUT_RING (r->front_face);
+
+ BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2);
+ OUT_RING (r->line_smooth_en);
+ OUT_RING (r->poly_smooth_en);
+
+ BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (r->cull_face_en);
+}
+
+static void nv10_state_emit_dsa(struct nv10_context* nv10)
+{
+ struct nv10_depth_stencil_alpha_state *d = nv10->dsa;
+
+ BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1);
+ OUT_RING (d->depth.func);
+
+ BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (d->depth.write_enable);
+
+ BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (d->depth.test_enable);
+
+#if 0
+ BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1);
+ OUT_RING (d->stencil.enable);
+ BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7);
+ OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7);
+#endif
+
+ BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (d->alpha.enabled);
+
+ BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 1);
+ OUT_RING (d->alpha.func);
+
+ BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_REF, 1);
+ OUT_RING (d->alpha.ref);
+}
+
+static void nv10_state_emit_viewport(struct nv10_context* nv10)
+{
+}
+
+static void nv10_state_emit_scissor(struct nv10_context* nv10)
+{
+ // XXX this is so not working
+/* struct pipe_scissor_state *s = nv10->scissor;
+ BEGIN_RING(celsius, NV10TCL_SCISSOR_HORIZ, 2);
+ OUT_RING (((s->maxx - s->minx) << 16) | s->minx);
+ OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/
+}
+
+static void nv10_state_emit_framebuffer(struct nv10_context* nv10)
+{
+ struct pipe_framebuffer_state* fb = nv10->framebuffer;
+ struct pipe_surface *rt, *zeta = NULL;
+ uint32_t rt_format, w, h;
+ int colour_format = 0, zeta_format = 0;
+
+ w = fb->cbufs[0]->width;
+ h = fb->cbufs[0]->height;
+ colour_format = fb->cbufs[0]->format;
+ rt = fb->cbufs[0];
+
+ if (fb->zsbuf) {
+ if (colour_format) {
+ assert(w == fb->zsbuf->width);
+ assert(h == fb->zsbuf->height);
+ } else {
+ w = fb->zsbuf->width;
+ h = fb->zsbuf->height;
+ }
+
+ zeta_format = fb->zsbuf->format;
+ zeta = fb->zsbuf;
+ }
+
+ rt_format = NV10TCL_RT_FORMAT_TYPE_LINEAR;
+
+ switch (colour_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case 0:
+ rt_format |= NV10TCL_RT_FORMAT_COLOR_A8R8G8B8;
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ rt_format |= NV10TCL_RT_FORMAT_COLOR_R5G6B5;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (zeta) {
+ BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1);
+ OUT_RING (rt->stride | (zeta->stride << 16));
+ } else {
+ BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1);
+ OUT_RING (rt->stride | (rt->stride << 16));
+ }
+
+ nv10->rt[0] = rt->buffer;
+
+ if (zeta_format)
+ {
+ nv10->zeta = zeta->buffer;
+ }
+
+ BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3);
+ OUT_RING ((w << 16) | 0);
+ OUT_RING ((h << 16) | 0);
+ OUT_RING (rt_format);
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+ OUT_RING (((w - 1) << 16) | 0 | 0x08000800);
+ OUT_RING (((h - 1) << 16) | 0 | 0x08000800);
+}
+
+static void nv10_vertex_layout(struct nv10_context *nv10)
+{
+ struct nv10_fragment_program *fp = nv10->fragprog.current;
+ uint32_t src = 0;
+ int i;
+ struct vertex_info vinfo;
+
+ memset(&vinfo, 0, sizeof(vinfo));
+
+ for (i = 0; i < fp->info.num_inputs; i++) {
+ switch (fp->info.input_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++);
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++);
+ break;
+ default:
+ case TGSI_SEMANTIC_GENERIC:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++);
+ break;
+ case TGSI_SEMANTIC_FOG:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++);
+ break;
+ }
+ }
+ draw_compute_vertex_size(&vinfo);
+}
+
+void
+nv10_emit_hw_state(struct nv10_context *nv10)
+{
+ int i;
+
+ if (nv10->dirty & NV10_NEW_VERTPROG) {
+ //nv10_vertprog_bind(nv10, nv10->vertprog.current);
+ nv10->dirty &= ~NV10_NEW_VERTPROG;
+ }
+
+ if (nv10->dirty & NV10_NEW_FRAGPROG) {
+ nv10_fragprog_bind(nv10, nv10->fragprog.current);
+ /*XXX: clear NV10_NEW_FRAGPROG if no new program uploaded */
+ nv10->dirty_samplers |= (1<<10);
+ nv10->dirty_samplers = 0;
+ }
+
+ if (nv10->dirty_samplers || (nv10->dirty & NV10_NEW_FRAGPROG)) {
+ nv10_fragtex_bind(nv10);
+ nv10->dirty &= ~NV10_NEW_FRAGPROG;
+ }
+
+ if (nv10->dirty & NV10_NEW_VTXARRAYS) {
+ nv10->dirty &= ~NV10_NEW_VTXARRAYS;
+ nv10_vertex_layout(nv10);
+ nv10_vtxbuf_bind(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_BLEND) {
+ nv10->dirty &= ~NV10_NEW_BLEND;
+ nv10_state_emit_blend(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_BLENDCOL) {
+ nv10->dirty &= ~NV10_NEW_BLENDCOL;
+ nv10_state_emit_blend_color(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_RAST) {
+ nv10->dirty &= ~NV10_NEW_RAST;
+ nv10_state_emit_rast(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_DSA) {
+ nv10->dirty &= ~NV10_NEW_DSA;
+ nv10_state_emit_dsa(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_VIEWPORT) {
+ nv10->dirty &= ~NV10_NEW_VIEWPORT;
+ nv10_state_emit_viewport(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_SCISSOR) {
+ nv10->dirty &= ~NV10_NEW_SCISSOR;
+ nv10_state_emit_scissor(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_FRAMEBUFFER) {
+ nv10->dirty &= ~NV10_NEW_FRAMEBUFFER;
+ nv10_state_emit_framebuffer(nv10);
+ }
+
+ /* Emit relocs for every referenced buffer.
+ * This is to ensure the bufmgr has an accurate idea of how
+ * the buffer is used. This isn't very efficient, but we don't
+ * seem to take a significant performance hit. Will be improved
+ * at some point. Vertex arrays are emitted by nv10_vbo.c
+ */
+
+ /* Render target */
+// XXX figre out who's who for NV10TCL_DMA_* and fill accordingly
+// BEGIN_RING(celsius, NV10TCL_DMA_COLOR0, 1);
+// OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ if (nv10->zeta) {
+// XXX
+// BEGIN_RING(celsius, NV10TCL_DMA_ZETA, 1);
+// OUT_RELOCo(nv10->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(celsius, NV10TCL_ZETA_OFFSET, 1);
+ OUT_RELOCl(nv10->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ /* XXX for when we allocate LMA on nv17 */
+/* BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
+ OUT_RELOCl(nv10->zeta + lma_offset);*/
+ }
+
+ /* Vertex buffer */
+ BEGIN_RING(celsius, NV10TCL_DMA_VTXBUF0, 1);
+ OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ /* Texture images */
+ for (i = 0; i < 2; i++) {
+ if (!(nv10->fp_samplers & (1 << i)))
+ continue;
+ BEGIN_RING(celsius, NV10TCL_TX_OFFSET(i), 1);
+ OUT_RELOCl(nv10->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ BEGIN_RING(celsius, NV10TCL_TX_FORMAT(i), 1);
+ OUT_RELOCd(nv10->tex[i].buffer, nv10->tex[i].format,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
+ NOUVEAU_BO_OR, NV10TCL_TX_FORMAT_DMA0,
+ NV10TCL_TX_FORMAT_DMA1);
+ }
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_surface.c b/src/gallium/drivers/nv10/nv10_surface.c
new file mode 100644
index 0000000000..be44c7bed5
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_surface.c
@@ -0,0 +1,64 @@
+
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nv10_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+#include "util/u_tile.h"
+
+static void
+nv10_surface_copy(struct pipe_context *pipe, boolean do_flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ struct nouveau_winsys *nvws = nv10->nvws;
+
+ nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy,
+ width, height);
+}
+
+static void
+nv10_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ struct nouveau_winsys *nvws = nv10->nvws;
+
+ nvws->surface_fill(nvws, dest, destx, desty, width, height, value);
+}
+
+void
+nv10_init_surface_functions(struct nv10_context *nv10)
+{
+ nv10->pipe.surface_copy = nv10_surface_copy;
+ nv10->pipe.surface_fill = nv10_surface_fill;
+}
diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c
new file mode 100644
index 0000000000..d0e788ac03
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_vbo.c
@@ -0,0 +1,77 @@
+#include "draw/draw_context.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv10_context.h"
+#include "nv10_state.h"
+
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_pushbuf.h"
+
+boolean nv10_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count)
+{
+ struct nv10_context *nv10 = nv10_context( pipe );
+ struct draw_context *draw = nv10->draw;
+ unsigned i;
+
+ nv10_emit_hw_state(nv10);
+
+ /*
+ * Map vertex buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv10->vtxbuf[i].buffer) {
+ void *buf
+ = pipe->winsys->buffer_map(pipe->winsys,
+ nv10->vtxbuf[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(draw, i, buf);
+ }
+ }
+ /* Map index buffer, if present */
+ if (indexBuffer) {
+ void *mapped_indexes
+ = pipe->winsys->buffer_map(pipe->winsys, indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes);
+ }
+ else {
+ /* no index/element buffer */
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ draw_set_mapped_constant_buffer(draw,
+ nv10->constbuf[PIPE_SHADER_VERTEX],
+ nv10->constbuf_nr[PIPE_SHADER_VERTEX]);
+
+ /* draw! */
+ draw_arrays(nv10->draw, prim, start, count);
+
+ /*
+ * unmap vertex/index buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv10->vtxbuf[i].buffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, nv10->vtxbuf[i].buffer);
+ draw_set_mapped_vertex_buffer(draw, i, NULL);
+ }
+ }
+ if (indexBuffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer);
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ return TRUE;
+}
+
+boolean nv10_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
+{
+ return nv10_draw_elements(pipe, NULL, 0, prim, start, count);
+}
+
+
+
diff --git a/src/gallium/drivers/nv20/Makefile b/src/gallium/drivers/nv20/Makefile
new file mode 100644
index 0000000000..d777fd3d8b
--- /dev/null
+++ b/src/gallium/drivers/nv20/Makefile
@@ -0,0 +1,29 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv20
+
+DRIVER_SOURCES = \
+ nv20_clear.c \
+ nv20_context.c \
+ nv20_fragprog.c \
+ nv20_fragtex.c \
+ nv20_miptree.c \
+ nv20_prim_vbuf.c \
+ nv20_screen.c \
+ nv20_state.c \
+ nv20_state_emit.c \
+ nv20_surface.c \
+ nv20_vbo.c
+# nv20_vertprog.c
+
+C_SOURCES = \
+ $(COMMON_SOURCES) \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/drivers/nv20/nv20_clear.c b/src/gallium/drivers/nv20/nv20_clear.c
new file mode 100644
index 0000000000..81b6f3e78a
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_clear.c
@@ -0,0 +1,12 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv20_context.h"
+
+void
+nv20_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+}
diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c
new file mode 100644
index 0000000000..9a17f4af57
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_context.c
@@ -0,0 +1,419 @@
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+
+#include "nv20_context.h"
+#include "nv20_screen.h"
+
+static void
+nv20_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ draw_flush(nv20->draw);
+
+ FIRE_RING(fence);
+}
+
+static void
+nv20_destroy(struct pipe_context *pipe)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ if (nv20->draw)
+ draw_destroy(nv20->draw);
+
+ FREE(nv20);
+}
+
+static void nv20_init_hwctx(struct nv20_context *nv20)
+{
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_winsys *nvws = screen->nvws;
+ int i;
+ float projectionmatrix[16];
+ const boolean is_nv25tcl = (nv20->screen->kelvin->grclass == NV25TCL);
+
+ BEGIN_RING(kelvin, NV20TCL_DMA_NOTIFY, 1);
+ OUT_RING (screen->sync->handle);
+ BEGIN_RING(kelvin, NV20TCL_DMA_TEXTURE0, 2);
+ OUT_RING (nvws->channel->vram->handle);
+ OUT_RING (nvws->channel->gart->handle); /* TEXTURE1 */
+ BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 2);
+ OUT_RING (nvws->channel->vram->handle);
+ OUT_RING (nvws->channel->vram->handle); /* ZETA */
+
+ BEGIN_RING(kelvin, NV20TCL_DMA_QUERY, 1);
+ OUT_RING (0); /* renouveau: beef0351, unique */
+
+ BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1);
+ OUT_RING ((0xfff << 16) | 0x0);
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1);
+ OUT_RING ((0xfff << 16) | 0x0);
+
+ for (i = 1; i < NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE; i++) {
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1);
+ OUT_RING (0);
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(kelvin, 0x17e0, 3);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (1.0);
+
+ if (is_nv25tcl) {
+ BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1);
+ OUT_RING (NV20TCL_TX_RCOMP_LEQUAL | 0xdb0);
+ } else {
+ BEGIN_RING(kelvin, 0x1e68, 1);
+ OUT_RING (0x4b800000); /* 16777216.000000 */
+ BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1);
+ OUT_RING (NV20TCL_TX_RCOMP_LEQUAL);
+ }
+
+ BEGIN_RING(kelvin, 0x290, 1);
+ OUT_RING ((0x10 << 16) | 1);
+ BEGIN_RING(kelvin, 0x9fc, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, 0x1d80, 1);
+ OUT_RING (1);
+ BEGIN_RING(kelvin, 0x9f8, 1);
+ OUT_RING (4);
+ BEGIN_RING(kelvin, 0x17ec, 3);
+ OUT_RINGf (0.0);
+ OUT_RINGf (1.0);
+ OUT_RINGf (0.0);
+
+ if (is_nv25tcl) {
+ BEGIN_RING(kelvin, 0x1d88, 1);
+ OUT_RING (3);
+
+ BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY9, 1);
+ OUT_RING (nvws->channel->vram->handle);
+ BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY8, 1);
+ OUT_RING (nvws->channel->vram->handle);
+ }
+ BEGIN_RING(kelvin, NV20TCL_DMA_FENCE, 1);
+ OUT_RING (0); /* renouveau: beef1e10 */
+
+ BEGIN_RING(kelvin, 0x1e98, 1);
+ OUT_RING (0);
+#if 0
+ if (is_nv25tcl) {
+ BEGIN_RING(NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2);
+ OUT_RING (NvDmaTT); /* renouveau: beef0202 */
+ OUT_RING (NvDmaFB); /* renouveau: beef0201 */
+
+ BEGIN_RING(NvSub3D, NV20TCL_DMA_TEXTURE1, 1);
+ OUT_RING (NvDmaTT); /* renouveau: beef0202 */
+ }
+#endif
+ BEGIN_RING(kelvin, NV20TCL_NOTIFY, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(kelvin, 0x120, 3);
+ OUT_RING (0);
+ OUT_RING (1);
+ OUT_RING (2);
+
+/* error: ILLEGAL_MTHD, PROTECTION_FAULT
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4);
+ OUT_RINGf (0.0);
+ OUT_RINGf (512.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+*/
+
+ if (is_nv25tcl) {
+ BEGIN_RING(kelvin, 0x022c, 2);
+ OUT_RING (0x280);
+ OUT_RING (0x07d28000);
+ }
+
+/* * illegal method, protection fault
+ BEGIN_RING(NvSub3D, 0x1c2c, 1);
+ OUT_RING (0); */
+
+ if (is_nv25tcl) {
+ BEGIN_RING(kelvin, 0x1da4, 1);
+ OUT_RING (0);
+ }
+
+/* * crashes with illegal method, protection fault
+ BEGIN_RING(NvSub3D, 0x1c18, 1);
+ OUT_RING (0x200); */
+
+ BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2);
+ OUT_RING ((0 << 16) | 0);
+ OUT_RING ((0 << 16) | 0);
+
+ /* *** Set state *** */
+
+ BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2);
+ OUT_RING (NV20TCL_ALPHA_FUNC_FUNC_ALWAYS);
+ OUT_RING (0); /* NV20TCL_ALPHA_FUNC_REF */
+
+ for (i = 0; i < NV20TCL_TX_ENABLE__SIZE; ++i) {
+ BEGIN_RING(kelvin, NV20TCL_TX_ENABLE(i), 1);
+ OUT_RING (0);
+ }
+ BEGIN_RING(kelvin, NV20TCL_TX_SHADER_OP, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_IN_ALPHA(0), 4);
+ OUT_RING (0x30d410d0);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_OUT_RGB(0), 4);
+ OUT_RING (0x00000c00);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_ENABLE, 1);
+ OUT_RING (0x00011101);
+ BEGIN_RING(kelvin, NV20TCL_RC_FINAL0, 2);
+ OUT_RING (0x130e0300);
+ OUT_RING (0x0c091c80);
+ BEGIN_RING(kelvin, NV20TCL_RC_OUT_ALPHA(0), 4);
+ OUT_RING (0x00000c00);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_IN_RGB(0), 4);
+ OUT_RING (0x20c400c0);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_COLOR0, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4);
+ OUT_RING (0x035125a0);
+ OUT_RING (0);
+ OUT_RING (0x40002000);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1);
+ OUT_RING (0xffff0000);
+
+ BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 4);
+ OUT_RING (NV20TCL_BLEND_FUNC_SRC_ONE);
+ OUT_RING (NV20TCL_BLEND_FUNC_DST_ZERO);
+ OUT_RING (0); /* NV20TCL_BLEND_COLOR */
+ OUT_RING (NV20TCL_BLEND_EQUATION_FUNC_ADD);
+ BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7);
+ OUT_RING (0xff);
+ OUT_RING (NV20TCL_STENCIL_FUNC_FUNC_ALWAYS);
+ OUT_RING (0); /* NV20TCL_STENCIL_FUNC_REF */
+ OUT_RING (0xff); /* NV20TCL_STENCIL_FUNC_MASK */
+ OUT_RING (NV20TCL_STENCIL_OP_FAIL_KEEP);
+ OUT_RING (NV20TCL_STENCIL_OP_ZFAIL_KEEP);
+ OUT_RING (NV20TCL_STENCIL_OP_ZPASS_KEEP);
+
+ BEGIN_RING(kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (NV20TCL_COLOR_LOGIC_OP_OP_COPY);
+ BEGIN_RING(kelvin, 0x17cc, 1);
+ OUT_RING (0);
+ if (is_nv25tcl) {
+ BEGIN_RING(kelvin, 0x1d84, 1);
+ OUT_RING (1);
+ }
+ BEGIN_RING(kelvin, NV20TCL_LIGHTING_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_LIGHT_CONTROL, 1);
+ OUT_RING (0x00020000);
+ BEGIN_RING(kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_ENABLED_LIGHTS, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_NORMALIZE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0),
+ NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE);
+ for (i = 0; i < NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE; ++i) {
+ OUT_RING(0xffffffff);
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ OUT_RING (0);
+ OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */
+ OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1);
+ OUT_RING (NV20TCL_DEPTH_FUNC_LESS);
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (1);
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1);
+ OUT_RING (1);
+ if (!is_nv25tcl) {
+ BEGIN_RING(kelvin, 0x1d84, 1);
+ OUT_RING (3);
+ }
+ BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1);
+ if (!is_nv25tcl) {
+ OUT_RING (8);
+ } else {
+ OUT_RINGf (1.0);
+ }
+ if (!is_nv25tcl) {
+ BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (0); /* NV20TCL.POINT_SMOOTH_ENABLE */
+ } else {
+ BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, 0x0a1c, 1);
+ OUT_RING (0x800);
+ }
+ BEGIN_RING(kelvin, NV20TCL_LINE_WIDTH, 1);
+ OUT_RING (8);
+ BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (NV20TCL_POLYGON_MODE_FRONT_FILL);
+ OUT_RING (NV20TCL_POLYGON_MODE_BACK_FILL);
+ BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2);
+ OUT_RING (NV20TCL_CULL_FACE_BACK);
+ OUT_RING (NV20TCL_FRONT_FACE_CCW);
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 1);
+ OUT_RING (NV20TCL_SHADE_MODEL_SMOOTH);
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE);
+ for (i=0; i < 4 * NV20TCL_TX_GEN_S__SIZE; ++i) {
+ OUT_RING(0);
+ }
+ BEGIN_RING(kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3);
+ OUT_RINGf (1.5);
+ OUT_RINGf (-0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */
+ OUT_RINGf (0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */
+ BEGIN_RING(kelvin, NV20TCL_FOG_MODE, 2);
+ OUT_RING (NV20TCL_FOG_MODE_EXP_2);
+ OUT_RING (NV20TCL_FOG_COORD_DIST_COORD_FOG);
+ BEGIN_RING(kelvin, NV20TCL_FOG_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (0); /* NV20TCL.FOG_COLOR */
+ BEGIN_RING(kelvin, NV20TCL_ENGINE, 1);
+ OUT_RING (NV20TCL_ENGINE_FIXED);
+
+ for (i = 0; i < NV20TCL_TX_MATRIX_ENABLE__SIZE; ++i) {
+ BEGIN_RING(kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1);
+ OUT_RING (0);
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15);
+ OUT_RINGf(1.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0);
+ OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); OUT_RINGf(1.0);
+ OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0);
+ for (i = 4; i < 16; ++i) {
+ OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0);
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_EDGEFLAG_ENABLE, 1);
+ OUT_RING (1);
+ BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1);
+ OUT_RING (0x00010101);
+ BEGIN_RING(kelvin, NV20TCL_CLEAR_VALUE, 1);
+ OUT_RING (0);
+
+ memset(projectionmatrix, 0, sizeof(projectionmatrix));
+ projectionmatrix[0*4+0] = 1.0;
+ projectionmatrix[1*4+1] = 1.0;
+ projectionmatrix[2*4+2] = 1.0;
+ projectionmatrix[3*4+3] = 1.0;
+ BEGIN_RING(kelvin, NV20TCL_PROJECTION_MATRIX(0), 16);
+ for (i = 0; i < 16; i++) {
+ OUT_RINGf (projectionmatrix[i]);
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2);
+ OUT_RINGf (0.0);
+ OUT_RINGf (16777216.0); /* bpp dependant? */
+
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE0_X, 4);
+ OUT_RINGf (-2048.0);
+ OUT_RINGf (-2048.0);
+ OUT_RINGf (16777215.0 * 0.5);
+ OUT_RING (0);
+
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE1_X, 4);
+ OUT_RINGf (-2048.0);
+ OUT_RINGf (-2048.0);
+ OUT_RINGf (16777215.0 * 0.5);
+ OUT_RING (0);
+
+ FIRE_RING (NULL);
+}
+
+static void
+nv20_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+}
+
+struct pipe_context *
+nv20_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct nv20_screen *screen = nv20_screen(pscreen);
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv20_context *nv20;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nv20 = CALLOC(1, sizeof(struct nv20_context));
+ if (!nv20)
+ return NULL;
+ nv20->screen = screen;
+ nv20->pctx_id = pctx_id;
+
+ nv20->nvws = nvws;
+
+ nv20->pipe.winsys = ws;
+ nv20->pipe.screen = pscreen;
+ nv20->pipe.destroy = nv20_destroy;
+ nv20->pipe.set_edgeflags = nv20_set_edgeflags;
+ nv20->pipe.draw_arrays = nv20_draw_arrays;
+ nv20->pipe.draw_elements = nv20_draw_elements;
+ nv20->pipe.clear = nv20_clear;
+ nv20->pipe.flush = nv20_flush;
+
+ nv20_init_surface_functions(nv20);
+ nv20_init_state_functions(nv20);
+
+ nv20->draw = draw_create();
+ assert(nv20->draw);
+ draw_set_rasterize_stage(nv20->draw, nv20_draw_vbuf_stage(nv20));
+
+ nv20_init_hwctx(nv20);
+
+ return &nv20->pipe;
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h
new file mode 100644
index 0000000000..8ad926db20
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_context.h
@@ -0,0 +1,153 @@
+#ifndef __NV20_CONTEXT_H__
+#define __NV20_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx) \
+ struct nv20_screen *ctx = nv20->screen
+#include "nouveau/nouveau_push.h"
+
+#include "nv20_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+#define NV20_NEW_VERTPROG (1 << 0)
+#define NV20_NEW_FRAGPROG (1 << 1)
+#define NV20_NEW_VTXARRAYS (1 << 2)
+#define NV20_NEW_BLEND (1 << 3)
+#define NV20_NEW_BLENDCOL (1 << 4)
+#define NV20_NEW_RAST (1 << 5)
+#define NV20_NEW_DSA (1 << 6)
+#define NV20_NEW_VIEWPORT (1 << 7)
+#define NV20_NEW_SCISSOR (1 << 8)
+#define NV20_NEW_FRAMEBUFFER (1 << 9)
+
+#include "nv20_screen.h"
+
+struct nv20_context {
+ struct pipe_context pipe;
+
+ struct nouveau_winsys *nvws;
+ struct nv20_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ uint32_t dirty;
+
+ struct nv20_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
+ struct nv20_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
+ unsigned dirty_samplers;
+ unsigned fp_samplers;
+ unsigned vp_samplers;
+
+ uint32_t rt_enable;
+ struct pipe_buffer *rt[4];
+ struct pipe_buffer *zeta;
+ uint32_t lma_offset;
+
+ struct nv20_blend_state *blend;
+ struct pipe_blend_color *blend_color;
+ struct nv20_rasterizer_state *rast;
+ struct nv20_depth_stencil_alpha_state *dsa;
+ struct pipe_viewport_state *viewport;
+ struct pipe_scissor_state *scissor;
+ struct pipe_framebuffer_state *framebuffer;
+
+ //struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
+ float *constbuf[PIPE_SHADER_TYPES][32][4];
+ unsigned constbuf_nr[PIPE_SHADER_TYPES];
+
+ struct vertex_info vertex_info;
+
+ struct {
+ struct pipe_buffer *buffer;
+ uint32_t format;
+ } tex[2];
+
+ unsigned vb_enable;
+ struct {
+ struct pipe_buffer *buffer;
+ unsigned delta;
+ } vb[16];
+
+/* struct {
+
+ struct nouveau_resource *exec_heap;
+ struct nouveau_resource *data_heap;
+
+ struct nv20_vertex_program *active;
+
+ struct nv20_vertex_program *current;
+ } vertprog;
+*/
+ struct {
+ struct nv20_fragment_program *active;
+
+ struct nv20_fragment_program *current;
+ struct pipe_buffer *constant_buf;
+ } fragprog;
+
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
+};
+
+static INLINE struct nv20_context *
+nv20_context(struct pipe_context *pipe)
+{
+ return (struct nv20_context *)pipe;
+}
+
+extern void nv20_init_state_functions(struct nv20_context *nv20);
+extern void nv20_init_surface_functions(struct nv20_context *nv20);
+
+extern void nv20_screen_init_miptree_functions(struct pipe_screen *pscreen);
+
+/* nv20_clear.c */
+extern void nv20_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+/* nv20_draw.c */
+extern struct draw_stage *nv20_draw_render_stage(struct nv20_context *nv20);
+
+/* nv20_fragprog.c */
+extern void nv20_fragprog_bind(struct nv20_context *,
+ struct nv20_fragment_program *);
+extern void nv20_fragprog_destroy(struct nv20_context *,
+ struct nv20_fragment_program *);
+
+/* nv20_fragtex.c */
+extern void nv20_fragtex_bind(struct nv20_context *);
+
+/* nv20_prim_vbuf.c */
+struct draw_stage *nv20_draw_vbuf_stage( struct nv20_context *nv20 );
+extern void nv20_vtxbuf_bind(struct nv20_context* nv20);
+
+/* nv20_state.c and friends */
+extern void nv20_emit_hw_state(struct nv20_context *nv20);
+extern void nv20_state_tex_update(struct nv20_context *nv20);
+
+/* nv20_vbo.c */
+extern boolean nv20_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv20_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count);
+
+
+#endif
diff --git a/src/gallium/drivers/nv20/nv20_fragprog.c b/src/gallium/drivers/nv20/nv20_fragprog.c
new file mode 100644
index 0000000000..4f496369dd
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_fragprog.c
@@ -0,0 +1,21 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv20_context.h"
+
+void
+nv20_fragprog_bind(struct nv20_context *nv20, struct nv20_fragment_program *fp)
+{
+}
+
+void
+nv20_fragprog_destroy(struct nv20_context *nv20,
+ struct nv20_fragment_program *fp)
+{
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c
new file mode 100644
index 0000000000..495a7be912
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_fragtex.c
@@ -0,0 +1,124 @@
+#include "nv20_context.h"
+#include "nouveau/nouveau_util.h"
+
+#define _(m,tf) \
+{ \
+ TRUE, \
+ PIPE_FORMAT_##m, \
+ NV20TCL_TX_FORMAT_FORMAT_##tf, \
+}
+
+struct nv20_texture_format {
+ boolean defined;
+ uint pipe;
+ int format;
+};
+
+static struct nv20_texture_format
+nv20_texture_formats[] = {
+ _(A8R8G8B8_UNORM, A8R8G8B8),
+ _(A1R5G5B5_UNORM, A1R5G5B5),
+ _(A4R4G4B4_UNORM, A4R4G4B4),
+ _(L8_UNORM , L8 ),
+ _(A8_UNORM , A8 ),
+ _(A8L8_UNORM , A8L8 ),
+/* _(RGB_DXT1 , DXT1, ), */
+/* _(RGBA_DXT1 , DXT1, ), */
+/* _(RGBA_DXT3 , DXT3, ), */
+/* _(RGBA_DXT5 , DXT5, ), */
+ {},
+};
+
+static struct nv20_texture_format *
+nv20_fragtex_format(uint pipe_format)
+{
+ struct nv20_texture_format *tf = nv20_texture_formats;
+
+ while (tf->defined) {
+ if (tf->pipe == pipe_format)
+ return tf;
+ tf++;
+ }
+
+ return NULL;
+}
+
+
+static void
+nv20_fragtex_build(struct nv20_context *nv20, int unit)
+{
+#if 0
+ struct nv20_sampler_state *ps = nv20->tex_sampler[unit];
+ struct nv20_miptree *nv20mt = nv20->tex_miptree[unit];
+ struct pipe_texture *pt = &nv20mt->base;
+ struct nv20_texture_format *tf;
+ uint32_t txf, txs, txp;
+
+ tf = nv20_fragtex_format(pt->format);
+ if (!tf || !tf->defined) {
+ NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format);
+ return;
+ }
+
+ txf = tf->format << 8;
+ txf |= (pt->last_level + 1) << 16;
+ txf |= log2i(pt->width[0]) << 20;
+ txf |= log2i(pt->height[0]) << 24;
+ txf |= log2i(pt->depth[0]) << 28;
+ txf |= 8;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ txf |= NV10TCL_TX_FORMAT_CUBE_MAP;
+ /* fall-through */
+ case PIPE_TEXTURE_2D:
+ txf |= (2<<4);
+ break;
+ case PIPE_TEXTURE_1D:
+ txf |= (1<<4);
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return;
+ }
+
+ BEGIN_RING(kelvin, NV10TCL_TX_OFFSET(unit), 8);
+ OUT_RELOCl(nv20mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(nv20mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ OUT_RING (ps->wrap);
+ OUT_RING (0x40000000); /* enable */
+ OUT_RING (txs);
+ OUT_RING (ps->filt | 0x2000 /* magic */);
+ OUT_RING ((pt->width[0] << 16) | pt->height[0]);
+ OUT_RING (ps->bcol);
+#endif
+}
+
+void
+nv20_fragtex_bind(struct nv20_context *nv20)
+{
+#if 0
+ struct nv20_fragment_program *fp = nv20->fragprog.active;
+ unsigned samplers, unit;
+
+ samplers = nv20->fp_samplers & ~fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ BEGIN_RING(kelvin, NV10TCL_TX_ENABLE(unit), 1);
+ OUT_RING (0);
+ }
+
+ samplers = nv20->dirty_samplers & fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ nv20_fragtex_build(nv20, unit);
+ }
+
+ nv20->fp_samplers = fp->samplers;
+#endif
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c
new file mode 100644
index 0000000000..d2038c391d
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_miptree.c
@@ -0,0 +1,155 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv20_context.h"
+#include "nv20_screen.h"
+
+static void
+nv20_miptree_layout(struct nv20_miptree *nv20mt)
+{
+ struct pipe_texture *pt = &nv20mt->base;
+ boolean swizzled = FALSE;
+ uint width = pt->width[0], height = pt->height[0];
+ uint offset = 0;
+ int nr_faces, l, f;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ nr_faces = 6;
+ } else {
+ nr_faces = 1;
+ }
+
+ for (l = 0; l <= pt->last_level; l++) {
+ pt->width[l] = width;
+ pt->height[l] = height;
+ pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+
+ if (swizzled)
+ nv20mt->level[l].pitch = pt->nblocksx[l] * pt->block.size;
+ else
+ nv20mt->level[l].pitch = pt->nblocksx[0] * pt->block.size;
+ nv20mt->level[l].pitch = (nv20mt->level[l].pitch + 63) & ~63;
+
+ nv20mt->level[l].image_offset =
+ CALLOC(nr_faces, sizeof(unsigned));
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+
+ }
+
+ for (f = 0; f < nr_faces; f++) {
+ for (l = 0; l <= pt->last_level; l++) {
+ nv20mt->level[l].image_offset[f] = offset;
+ offset += nv20mt->level[l].pitch * pt->height[l];
+ }
+ }
+
+ nv20mt->total_size = offset;
+}
+
+static struct pipe_texture *
+nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct nv20_miptree *mt;
+
+ mt = MALLOC(sizeof(struct nv20_miptree));
+ if (!mt)
+ return NULL;
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = screen;
+
+ nv20_miptree_layout(mt);
+
+ mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL,
+ mt->total_size);
+ if (!mt->buffer) {
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static void
+nv20_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt)
+{
+ struct pipe_texture *mt = *pt;
+
+ *pt = NULL;
+ if (--mt->refcount <= 0) {
+ struct nv20_miptree *nv20mt = (struct nv20_miptree *)mt;
+ int l;
+
+ pipe_buffer_reference(screen, &nv20mt->buffer, NULL);
+ for (l = 0; l <= mt->last_level; l++) {
+ if (nv20mt->level[l].image_offset)
+ FREE(nv20mt->level[l].image_offset);
+ }
+ FREE(nv20mt);
+ }
+}
+
+static struct pipe_surface *
+nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct nv20_miptree *nv20mt = (struct nv20_miptree *)pt;
+ struct pipe_surface *ps;
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (!ps)
+ return NULL;
+ pipe_texture_reference(&ps->texture, pt);
+ pipe_buffer_reference(screen, &ps->buffer, nv20mt->buffer);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = nv20mt->level[level].pitch;
+ ps->usage = flags;
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+ ps->refcount = 1;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ ps->offset = nv20mt->level[level].image_offset[face];
+ } else
+ if (pt->target == PIPE_TEXTURE_3D) {
+ ps->offset = nv20mt->level[level].image_offset[zslice];
+ } else {
+ ps->offset = nv20mt->level[level].image_offset[0];
+ }
+
+ return ps;
+}
+
+static void
+nv20_miptree_surface_release(struct pipe_screen *pscreen,
+ struct pipe_surface **psurface)
+{
+ struct pipe_surface *ps = *psurface;
+
+ *psurface = NULL;
+ if (--ps->refcount > 0)
+ return;
+
+ pipe_texture_reference(&ps->texture, NULL);
+ pipe_buffer_reference(pscreen, &ps->buffer, NULL);
+ FREE(ps);
+}
+
+void nv20_screen_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv20_miptree_create;
+ pscreen->texture_release = nv20_miptree_release;
+ pscreen->get_tex_surface = nv20_miptree_surface_get;
+ pscreen->tex_surface_release = nv20_miptree_surface_release;
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_prim_vbuf.c b/src/gallium/drivers/nv20/nv20_prim_vbuf.c
new file mode 100644
index 0000000000..74540845a8
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_prim_vbuf.c
@@ -0,0 +1,402 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Build post-transformation, post-clipping vertex buffers and element
+ * lists by hooking into the end of the primitive pipeline and
+ * manipulating the vertex_id field in the vertex headers.
+ *
+ * XXX: work in progress
+ *
+ * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_debug.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_winsys.h"
+
+#include "nv20_context.h"
+#include "nv20_state.h"
+
+#include "draw/draw_vbuf.h"
+
+/**
+ * Primitive renderer for nv20.
+ */
+struct nv20_vbuf_render {
+ struct vbuf_render base;
+
+ struct nv20_context *nv20;
+
+ /** Vertex buffer in VRAM */
+ struct pipe_buffer *pbuffer;
+
+ /** Vertex buffer in normal memory */
+ void *mbuffer;
+
+ /** Vertex size in bytes */
+ /*unsigned vertex_size;*/
+
+ /** Hardware primitive */
+ unsigned hwprim;
+};
+
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct nv20_vbuf_render *
+nv20_vbuf_render(struct vbuf_render *render)
+{
+ assert(render);
+ return (struct nv20_vbuf_render *)render;
+}
+
+void nv20_vtxbuf_bind( struct nv20_context* nv20 )
+{
+#if 0
+ int i;
+ for(i = 0; i < NV20TCL_VTXBUF_ADDRESS__SIZE; i++) {
+ BEGIN_RING(kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1);
+ OUT_RING(0/*nv20->vtxbuf*/);
+ BEGIN_RING(kelvin, NV20TCL_VTXFMT(i) ,1);
+ OUT_RING(0/*XXX*/);
+ }
+#endif
+}
+
+static const struct vertex_info *
+nv20_vbuf_render_get_vertex_info( struct vbuf_render *render )
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+ struct nv20_context *nv20 = nv20_render->nv20;
+
+ nv20_emit_hw_state(nv20);
+
+ return &nv20->vertex_info;
+}
+
+static void *
+nv20__allocate_mbuffer(struct nv20_vbuf_render *nv20_render, size_t size)
+{
+ nv20_render->mbuffer = MALLOC(size);
+ return nv20_render->mbuffer;
+}
+
+static void *
+nv20__allocate_pbuffer(struct nv20_vbuf_render *nv20_render, size_t size)
+{
+ struct pipe_winsys *winsys = nv20_render->nv20->pipe.winsys;
+ nv20_render->pbuffer = winsys->buffer_create(winsys, 64,
+ PIPE_BUFFER_USAGE_VERTEX, size);
+ return winsys->buffer_map(winsys,
+ nv20_render->pbuffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+}
+
+static void *
+nv20_vbuf_render_allocate_vertices( struct vbuf_render *render,
+ ushort vertex_size,
+ ushort nr_vertices )
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+ size_t size = (size_t)vertex_size * (size_t)nr_vertices;
+ void *buf;
+
+ assert(!nv20_render->pbuffer);
+ assert(!nv20_render->mbuffer);
+
+ /*
+ * For small amount of vertices, don't bother with pipe vertex
+ * buffer, the data will be passed directly via the fifo.
+ */
+ /* XXX: Pipe vertex buffers don't work. */
+ if (0 && size > 16 * 1024)
+ buf = nv20__allocate_pbuffer(nv20_render, size);
+ else
+ buf = nv20__allocate_mbuffer(nv20_render, size);
+
+ if (buf)
+ nv20_render->nv20->dirty |= NV20_NEW_VTXARRAYS;
+
+ return buf;
+}
+
+static boolean
+nv20_vbuf_render_set_primitive( struct vbuf_render *render,
+ unsigned prim )
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+ unsigned hwp = nvgl_primitive(prim);
+ if (hwp == 0)
+ return FALSE;
+
+ nv20_render->hwprim = hwp;
+ return TRUE;
+}
+
+static uint32_t
+nv20__vtxhwformat(unsigned stride, unsigned fields, unsigned type)
+{
+ return (stride << NV20TCL_VTXFMT_STRIDE_SHIFT) |
+ (fields << NV20TCL_VTXFMT_SIZE_SHIFT) |
+ (type << NV20TCL_VTXFMT_TYPE_SHIFT);
+}
+
+static unsigned
+nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr)
+{
+ uint32_t hwfmt = 0;
+ unsigned fields;
+
+ switch (type) {
+ case EMIT_OMIT:
+ hwfmt = nv20__vtxhwformat(0, 0, 2);
+ fields = 0;
+ break;
+ case EMIT_1F:
+ hwfmt = nv20__vtxhwformat(4, 1, 2);
+ fields = 1;
+ break;
+ case EMIT_2F:
+ hwfmt = nv20__vtxhwformat(8, 2, 2);
+ fields = 2;
+ break;
+ case EMIT_3F:
+ hwfmt = nv20__vtxhwformat(12, 3, 2);
+ fields = 3;
+ break;
+ case EMIT_4F:
+ hwfmt = nv20__vtxhwformat(16, 4, 2);
+ fields = 4;
+ break;
+ default:
+ NOUVEAU_ERR("unhandled attrib_emit %d\n", type);
+ return 0;
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_VTXFMT(hwattr), 1);
+ OUT_RING(hwfmt);
+ return fields;
+}
+
+static unsigned
+nv20__emit_vertex_array_format(struct nv20_context *nv20)
+{
+ struct vertex_info *vinfo = &nv20->vertex_info;
+ int hwattr = NV20TCL_VTXFMT__SIZE;
+ int attr = 0;
+ unsigned nr_fields = 0;
+
+ while (hwattr-- > 0) {
+ if (vinfo->hwfmt[0] & (1 << hwattr)) {
+ nr_fields += nv20__emit_format(nv20,
+ vinfo->attrib[attr].emit, hwattr);
+ attr++;
+ } else
+ nv20__emit_format(nv20, EMIT_OMIT, hwattr);
+ }
+
+ return nr_fields;
+}
+
+static void
+nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct nv20_context *nv20 = nv20_render->nv20;
+ struct vertex_info *vinfo = &nv20->vertex_info;
+ unsigned nr_fields;
+ int max_push;
+ ubyte *data = nv20_render->mbuffer;
+ int vsz = 4 * vinfo->size;
+
+ nr_fields = nv20__emit_vertex_array_format(nv20);
+
+ BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING(nv20_render->hwprim);
+
+ max_push = 1200 / nr_fields;
+ while (nr_indices) {
+ int i;
+ int push = MIN2(nr_indices, max_push);
+
+ BEGIN_RING_NI(kelvin, NV20TCL_VERTEX_DATA, push * nr_fields);
+ for (i = 0; i < push; i++) {
+ /* XXX: fixme to handle other than floats? */
+ int f = nr_fields;
+ float *attrv = (float*)&data[indices[i] * vsz];
+ while (f-- > 0)
+ OUT_RINGf(*attrv++);
+ }
+
+ nr_indices -= push;
+ indices += push;
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING(NV20TCL_VERTEX_BEGIN_END_STOP);
+}
+
+static void
+nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct nv20_context *nv20 = nv20_render->nv20;
+ int push, i;
+
+ NOUVEAU_ERR("nv20__draw_pbuffer: this path is broken.\n");
+
+ BEGIN_RING(kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
+ OUT_RELOCl(nv20_render->pbuffer, 0,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+
+ BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING(nv20_render->hwprim);
+
+ if (nr_indices & 1) {
+ BEGIN_RING(kelvin, NV10TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (indices[0]);
+ indices++; nr_indices--;
+ }
+
+ while (nr_indices) {
+ // XXX too big/small ? check the size
+ push = MIN2(nr_indices, 1200 * 2);
+
+ BEGIN_RING_NI(kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((indices[i+1] << 16) | indices[i]);
+
+ nr_indices -= push;
+ indices += push;
+ }
+
+ BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING (0);
+}
+
+static void
+nv20_vbuf_render_draw( struct vbuf_render *render,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+
+ nv20_emit_hw_state(nv20_render->nv20);
+
+ if (nv20_render->pbuffer)
+ nv20__draw_pbuffer(nv20_render, indices, nr_indices);
+ else if (nv20_render->mbuffer)
+ nv20__draw_mbuffer(nv20_render, indices, nr_indices);
+ else
+ assert(0);
+}
+
+
+static void
+nv20_vbuf_render_release_vertices( struct vbuf_render *render,
+ void *vertices,
+ unsigned vertex_size,
+ unsigned vertices_used )
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+ struct nv20_context *nv20 = nv20_render->nv20;
+ struct pipe_winsys *winsys = nv20->pipe.winsys;
+ struct pipe_screen *pscreen = &nv20->screen->pipe;
+
+ if (nv20_render->pbuffer) {
+ winsys->buffer_unmap(winsys, nv20_render->pbuffer);
+ pipe_buffer_reference(pscreen, &nv20_render->pbuffer, NULL);
+ } else if (nv20_render->mbuffer) {
+ FREE(nv20_render->mbuffer);
+ nv20_render->mbuffer = NULL;
+ } else
+ assert(0);
+}
+
+
+static void
+nv20_vbuf_render_destroy( struct vbuf_render *render )
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+
+ assert(!nv20_render->pbuffer);
+ assert(!nv20_render->mbuffer);
+
+ FREE(nv20_render);
+}
+
+
+/**
+ * Create a new primitive render.
+ */
+static struct vbuf_render *
+nv20_vbuf_render_create( struct nv20_context *nv20 )
+{
+ struct nv20_vbuf_render *nv20_render = CALLOC_STRUCT(nv20_vbuf_render);
+
+ nv20_render->nv20 = nv20;
+
+ nv20_render->base.max_vertex_buffer_bytes = 16*1024;
+ nv20_render->base.max_indices = 1024;
+ nv20_render->base.get_vertex_info = nv20_vbuf_render_get_vertex_info;
+ nv20_render->base.allocate_vertices =
+ nv20_vbuf_render_allocate_vertices;
+ nv20_render->base.set_primitive = nv20_vbuf_render_set_primitive;
+ nv20_render->base.draw = nv20_vbuf_render_draw;
+ nv20_render->base.release_vertices = nv20_vbuf_render_release_vertices;
+ nv20_render->base.destroy = nv20_vbuf_render_destroy;
+
+ return &nv20_render->base;
+}
+
+
+/**
+ * Create a new primitive vbuf/render stage.
+ */
+struct draw_stage *nv20_draw_vbuf_stage( struct nv20_context *nv20 )
+{
+ struct vbuf_render *render;
+ struct draw_stage *stage;
+
+ render = nv20_vbuf_render_create(nv20);
+ if(!render)
+ return NULL;
+
+ stage = draw_vbuf_stage( nv20->draw, render );
+ if(!stage) {
+ render->destroy(render);
+ return NULL;
+ }
+
+ return stage;
+}
diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c
new file mode 100644
index 0000000000..2ca6e6b149
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_screen.c
@@ -0,0 +1,206 @@
+#include "pipe/p_screen.h"
+
+#include "nv20_context.h"
+#include "nv20_screen.h"
+
+static const char *
+nv20_screen_get_name(struct pipe_screen *screen)
+{
+ struct nv20_screen *nv20screen = nv20_screen(screen);
+ struct nouveau_device *dev = nv20screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv20_screen_get_vendor(struct pipe_screen *screen)
+{
+ return "nouveau";
+}
+
+static int
+nv20_screen_get_param(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 2;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 0;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 0;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 0;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 12;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 12;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 0;
+ case NOUVEAU_CAP_HW_VTXBUF:
+ case NOUVEAU_CAP_HW_IDXBUF:
+ return 0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv20_screen_get_paramf(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 2.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 4.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static boolean
+nv20_screen_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static void *
+nv20_surface_map(struct pipe_screen *screen, struct pipe_surface *surface,
+ unsigned flags )
+{
+ struct pipe_winsys *ws = screen->winsys;
+ void *map;
+
+ map = ws->buffer_map(ws, surface->buffer, flags);
+ if (!map)
+ return NULL;
+
+ return map + surface->offset;
+}
+
+static void
+nv20_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface)
+{
+ struct pipe_winsys *ws = screen->winsys;
+
+ ws->buffer_unmap(ws, surface->buffer);
+}
+
+static void
+nv20_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nv20_screen *screen = nv20_screen(pscreen);
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nvws->notifier_free(&screen->sync);
+ nvws->grobj_free(&screen->kelvin);
+
+ FREE(pscreen);
+}
+
+struct pipe_screen *
+nv20_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv20_screen *screen = CALLOC_STRUCT(nv20_screen);
+ unsigned kelvin_class = 0;
+ unsigned chipset = nvws->channel->device->chipset;
+ int ret;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ /* 3D object */
+ if (chipset >= 0x25)
+ kelvin_class = NV25TCL;
+ else if (chipset >= 0x20)
+ kelvin_class = NV20TCL;
+
+ if (!kelvin_class || chipset >= 0x30) {
+ NOUVEAU_ERR("Unknown nv2x chipset: nv%02x\n", chipset);
+ return NULL;
+ }
+
+ ret = nvws->grobj_alloc(nvws, kelvin_class, &screen->kelvin);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ return FALSE;
+ }
+
+ /* Notifier for sync purposes */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv20_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ screen->pipe.winsys = ws;
+ screen->pipe.destroy = nv20_screen_destroy;
+
+ screen->pipe.get_name = nv20_screen_get_name;
+ screen->pipe.get_vendor = nv20_screen_get_vendor;
+ screen->pipe.get_param = nv20_screen_get_param;
+ screen->pipe.get_paramf = nv20_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv20_screen_is_format_supported;
+
+ screen->pipe.surface_map = nv20_surface_map;
+ screen->pipe.surface_unmap = nv20_surface_unmap;
+
+ nv20_screen_init_miptree_functions(&screen->pipe);
+
+ return &screen->pipe;
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_screen.h b/src/gallium/drivers/nv20/nv20_screen.h
new file mode 100644
index 0000000000..8f2f2e341d
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_screen.h
@@ -0,0 +1,22 @@
+#ifndef __NV20_SCREEN_H__
+#define __NV20_SCREEN_H__
+
+#include "pipe/p_screen.h"
+
+struct nv20_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+
+ /* HW graphics objects */
+ struct nouveau_grobj *kelvin;
+ struct nouveau_notifier *sync;
+};
+
+static INLINE struct nv20_screen *
+nv20_screen(struct pipe_screen *screen)
+{
+ return (struct nv20_screen *)screen;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c
new file mode 100644
index 0000000000..21bde5b81f
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_state.c
@@ -0,0 +1,581 @@
+#include "draw/draw_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv20_context.h"
+#include "nv20_state.h"
+
+static void *
+nv20_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv20_blend_state *cb;
+
+ cb = MALLOC(sizeof(struct nv20_blend_state));
+
+ cb->b_enable = cso->blend_enable ? 1 : 0;
+ cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_src_factor)));
+ cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_dst_factor)));
+
+ cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) |
+ ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) |
+ ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) |
+ ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0));
+
+ cb->d_enable = cso->dither ? 1 : 0;
+
+ return (void *)cb;
+}
+
+static void
+nv20_blend_state_bind(struct pipe_context *pipe, void *blend)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->blend = (struct nv20_blend_state*)blend;
+
+ nv20->dirty |= NV20_NEW_BLEND;
+}
+
+static void
+nv20_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+
+static INLINE unsigned
+wrap_mode(unsigned wrap) {
+ unsigned ret;
+
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ ret = NV20TCL_TX_WRAP_S_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ ret = NV20TCL_TX_WRAP_S_MIRRORED_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ ret = NV20TCL_TX_WRAP_S_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ ret = NV20TCL_TX_WRAP_S_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_CLAMP:
+ ret = NV20TCL_TX_WRAP_S_CLAMP;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ ret = NV20TCL_TX_WRAP_S_REPEAT;
+ break;
+ }
+
+ return (ret >> NV20TCL_TX_WRAP_S_SHIFT);
+}
+
+static void *
+nv20_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nv20_sampler_state *ps;
+ uint32_t filter = 0;
+
+ ps = MALLOC(sizeof(struct nv20_sampler_state));
+
+ ps->wrap = ((wrap_mode(cso->wrap_s) << NV20TCL_TX_WRAP_S_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV20TCL_TX_WRAP_T_SHIFT));
+
+ ps->en = 0;
+ if (cso->max_anisotropy > 1.0) {
+ /* no idea, binary driver sets it, works without it.. meh.. */
+ ps->wrap |= (1 << 5);
+
+/* if (cso->max_anisotropy >= 8.0) {
+ ps->en |= NV20TCL_TX_ENABLE_ANISO_8X;
+ } else
+ if (cso->max_anisotropy >= 4.0) {
+ ps->en |= NV20TCL_TX_ENABLE_ANISO_4X;
+ } else {
+ ps->en |= NV20TCL_TX_ENABLE_ANISO_2X;
+ }*/
+ }
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ filter |= NV20TCL_TX_FILTER_MAGNIFY_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ filter |= NV20TCL_TX_FILTER_MAGNIFY_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |=
+ NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV20TCL_TX_FILTER_MINIFY_LINEAR;
+ break;
+ }
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |=
+ NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |=
+ NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV20TCL_TX_FILTER_MINIFY_NEAREST;
+ break;
+ }
+ break;
+ }
+
+ ps->filt = filter;
+
+/* if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ switch (cso->compare_func) {
+ case PIPE_FUNC_NEVER:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NEVER;
+ break;
+ case PIPE_FUNC_GREATER:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GREATER;
+ break;
+ case PIPE_FUNC_EQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_EQUAL;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GEQUAL;
+ break;
+ case PIPE_FUNC_LESS:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LESS;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NOTEQUAL;
+ break;
+ case PIPE_FUNC_LEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LEQUAL;
+ break;
+ case PIPE_FUNC_ALWAYS:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_ALWAYS;
+ break;
+ default:
+ break;
+ }
+ }*/
+
+ ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) |
+ (float_to_ubyte(cso->border_color[0]) << 16) |
+ (float_to_ubyte(cso->border_color[1]) << 8) |
+ (float_to_ubyte(cso->border_color[2]) << 0));
+
+ return (void *)ps;
+}
+
+static void
+nv20_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv20->tex_sampler[unit] = sampler[unit];
+ nv20->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void
+nv20_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nv20_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **miptree)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv20->tex_miptree[unit] = (struct nv20_miptree *)miptree[unit];
+ nv20->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void *
+nv20_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv20_rasterizer_state *rs;
+ int i;
+
+ /*XXX: ignored:
+ * light_twoside
+ * offset_cw/ccw -nohw
+ * scissor
+ * point_smooth -nohw
+ * multisample
+ * offset_units / offset_scale
+ */
+ rs = MALLOC(sizeof(struct nv20_rasterizer_state));
+
+ rs->templ = cso;
+
+ rs->shade_model = cso->flatshade ? NV20TCL_SHADE_MODEL_FLAT :
+ NV20TCL_SHADE_MODEL_SMOOTH;
+
+ rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff;
+ rs->line_smooth_en = cso->line_smooth ? 1 : 0;
+
+ /* XXX: nv20 and nv25 different! */
+ rs->point_size = *(uint32_t*)&cso->point_size;
+
+ rs->poly_smooth_en = cso->poly_smooth ? 1 : 0;
+
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ rs->front_face = NV20TCL_FRONT_FACE_CCW;
+ rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw);
+ rs->poly_mode_back = nvgl_polygon_mode(cso->fill_cw);
+ } else {
+ rs->front_face = NV20TCL_FRONT_FACE_CW;
+ rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw);
+ rs->poly_mode_back = nvgl_polygon_mode(cso->fill_ccw);
+ }
+
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ rs->cull_face_en = 1;
+ if (cso->front_winding == PIPE_WINDING_CCW)
+ rs->cull_face = NV20TCL_CULL_FACE_FRONT;
+ else
+ rs->cull_face = NV20TCL_CULL_FACE_BACK;
+ break;
+ case PIPE_WINDING_CW:
+ rs->cull_face_en = 1;
+ if (cso->front_winding == PIPE_WINDING_CW)
+ rs->cull_face = NV20TCL_CULL_FACE_FRONT;
+ else
+ rs->cull_face = NV20TCL_CULL_FACE_BACK;
+ break;
+ case PIPE_WINDING_BOTH:
+ rs->cull_face_en = 1;
+ rs->cull_face = NV20TCL_CULL_FACE_FRONT_AND_BACK;
+ break;
+ case PIPE_WINDING_NONE:
+ default:
+ rs->cull_face_en = 0;
+ rs->cull_face = 0;
+ break;
+ }
+
+ if (cso->point_sprite) {
+ rs->point_sprite = (1 << 0);
+ for (i = 0; i < 8; i++) {
+ if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE)
+ rs->point_sprite |= (1 << (8 + i));
+ }
+ } else {
+ rs->point_sprite = 0;
+ }
+
+ return (void *)rs;
+}
+
+static void
+nv20_rasterizer_state_bind(struct pipe_context *pipe, void *rast)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->rast = (struct nv20_rasterizer_state*)rast;
+
+ draw_set_rasterizer_state(nv20->draw, (nv20->rast ? nv20->rast->templ : NULL));
+
+ nv20->dirty |= NV20_NEW_RAST;
+}
+
+static void
+nv20_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nv20_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv20_depth_stencil_alpha_state *hw;
+
+ hw = MALLOC(sizeof(struct nv20_depth_stencil_alpha_state));
+
+ hw->depth.func = nvgl_comparison_op(cso->depth.func);
+ hw->depth.write_enable = cso->depth.writemask ? 1 : 0;
+ hw->depth.test_enable = cso->depth.enabled ? 1 : 0;
+
+ hw->stencil.enable = cso->stencil[0].enabled ? 1 : 0;
+ hw->stencil.wmask = cso->stencil[0].write_mask;
+ hw->stencil.func = nvgl_comparison_op(cso->stencil[0].func);
+ hw->stencil.ref = cso->stencil[0].ref_value;
+ hw->stencil.vmask = cso->stencil[0].value_mask;
+ hw->stencil.fail = nvgl_stencil_op(cso->stencil[0].fail_op);
+ hw->stencil.zfail = nvgl_stencil_op(cso->stencil[0].zfail_op);
+ hw->stencil.zpass = nvgl_stencil_op(cso->stencil[0].zpass_op);
+
+ hw->alpha.enabled = cso->alpha.enabled ? 1 : 0;
+ hw->alpha.func = nvgl_comparison_op(cso->alpha.func);
+ hw->alpha.ref = float_to_ubyte(cso->alpha.ref);
+
+ return (void *)hw;
+}
+
+static void
+nv20_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *dsa)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->dsa = (struct nv20_depth_stencil_alpha_state*)dsa;
+
+ nv20->dirty |= NV20_NEW_DSA;
+}
+
+static void
+nv20_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nv20_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ return draw_create_vertex_shader(nv20->draw, templ);
+}
+
+static void
+nv20_vp_state_bind(struct pipe_context *pipe, void *shader)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ draw_bind_vertex_shader(nv20->draw, (struct draw_vertex_shader *) shader);
+
+ nv20->dirty |= NV20_NEW_VERTPROG;
+}
+
+static void
+nv20_vp_state_delete(struct pipe_context *pipe, void *shader)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ draw_delete_vertex_shader(nv20->draw, (struct draw_vertex_shader *) shader);
+}
+
+static void *
+nv20_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv20_fragment_program *fp;
+
+ fp = CALLOC(1, sizeof(struct nv20_fragment_program));
+ fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ tgsi_scan_shader(cso->tokens, &fp->info);
+
+ return (void *)fp;
+}
+
+static void
+nv20_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ struct nv20_fragment_program *fp = hwcso;
+
+ nv20->fragprog.current = fp;
+ nv20->dirty |= NV20_NEW_FRAGPROG;
+}
+
+static void
+nv20_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ struct nv20_fragment_program *fp = hwcso;
+
+ nv20_fragprog_destroy(nv20, fp);
+ FREE((void*)fp->pipe.tokens);
+ FREE(fp);
+}
+
+static void
+nv20_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->blend_color = (struct pipe_blend_color*)bcol;
+
+ nv20->dirty |= NV20_NEW_BLENDCOL;
+}
+
+static void
+nv20_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ draw_set_clip_state(nv20->draw, clip);
+}
+
+static void
+nv20_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(index == 0);
+
+ if (buf) {
+ void *mapped;
+ if (buf->size && (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)))
+ {
+ memcpy(nv20->constbuf[shader], mapped, buf->size);
+ nv20->constbuf_nr[shader] =
+ buf->size / (4 * sizeof(float));
+ ws->buffer_unmap(ws, buf->buffer);
+ }
+ }
+}
+
+static void
+nv20_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->framebuffer = (struct pipe_framebuffer_state*)fb;
+
+ nv20->dirty |= NV20_NEW_FRAMEBUFFER;
+}
+
+static void
+nv20_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ NOUVEAU_ERR("line stipple hahaha\n");
+}
+
+static void
+nv20_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->scissor = (struct pipe_scissor_state*)s;
+
+ nv20->dirty |= NV20_NEW_SCISSOR;
+}
+
+static void
+nv20_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->viewport = (struct pipe_viewport_state*)vpt;
+
+ draw_set_viewport_state(nv20->draw, nv20->viewport);
+
+ nv20->dirty |= NV20_NEW_VIEWPORT;
+}
+
+static void
+nv20_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ memcpy(nv20->vtxbuf, vb, sizeof(*vb) * count);
+ nv20->dirty |= NV20_NEW_VTXARRAYS;
+
+ draw_set_vertex_buffers(nv20->draw, count, vb);
+}
+
+static void
+nv20_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *ve)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ memcpy(nv20->vtxelt, ve, sizeof(*ve) * count);
+ nv20->dirty |= NV20_NEW_VTXARRAYS;
+
+ draw_set_vertex_elements(nv20->draw, count, ve);
+}
+
+void
+nv20_init_state_functions(struct nv20_context *nv20)
+{
+ nv20->pipe.create_blend_state = nv20_blend_state_create;
+ nv20->pipe.bind_blend_state = nv20_blend_state_bind;
+ nv20->pipe.delete_blend_state = nv20_blend_state_delete;
+
+ nv20->pipe.create_sampler_state = nv20_sampler_state_create;
+ nv20->pipe.bind_sampler_states = nv20_sampler_state_bind;
+ nv20->pipe.delete_sampler_state = nv20_sampler_state_delete;
+ nv20->pipe.set_sampler_textures = nv20_set_sampler_texture;
+
+ nv20->pipe.create_rasterizer_state = nv20_rasterizer_state_create;
+ nv20->pipe.bind_rasterizer_state = nv20_rasterizer_state_bind;
+ nv20->pipe.delete_rasterizer_state = nv20_rasterizer_state_delete;
+
+ nv20->pipe.create_depth_stencil_alpha_state =
+ nv20_depth_stencil_alpha_state_create;
+ nv20->pipe.bind_depth_stencil_alpha_state =
+ nv20_depth_stencil_alpha_state_bind;
+ nv20->pipe.delete_depth_stencil_alpha_state =
+ nv20_depth_stencil_alpha_state_delete;
+
+ nv20->pipe.create_vs_state = nv20_vp_state_create;
+ nv20->pipe.bind_vs_state = nv20_vp_state_bind;
+ nv20->pipe.delete_vs_state = nv20_vp_state_delete;
+
+ nv20->pipe.create_fs_state = nv20_fp_state_create;
+ nv20->pipe.bind_fs_state = nv20_fp_state_bind;
+ nv20->pipe.delete_fs_state = nv20_fp_state_delete;
+
+ nv20->pipe.set_blend_color = nv20_set_blend_color;
+ nv20->pipe.set_clip_state = nv20_set_clip_state;
+ nv20->pipe.set_constant_buffer = nv20_set_constant_buffer;
+ nv20->pipe.set_framebuffer_state = nv20_set_framebuffer_state;
+ nv20->pipe.set_polygon_stipple = nv20_set_polygon_stipple;
+ nv20->pipe.set_scissor_state = nv20_set_scissor_state;
+ nv20->pipe.set_viewport_state = nv20_set_viewport_state;
+
+ nv20->pipe.set_vertex_buffers = nv20_set_vertex_buffers;
+ nv20->pipe.set_vertex_elements = nv20_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_state.h b/src/gallium/drivers/nv20/nv20_state.h
new file mode 100644
index 0000000000..34f402fdcb
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_state.h
@@ -0,0 +1,139 @@
+#ifndef __NV20_STATE_H__
+#define __NV20_STATE_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv20_blend_state {
+ uint32_t b_enable;
+ uint32_t b_srcfunc;
+ uint32_t b_dstfunc;
+
+ uint32_t c_mask;
+
+ uint32_t d_enable;
+};
+
+struct nv20_sampler_state {
+ uint32_t wrap;
+ uint32_t en;
+ uint32_t filt;
+ uint32_t bcol;
+};
+
+struct nv20_rasterizer_state {
+ uint32_t shade_model;
+
+ uint32_t line_width;
+ uint32_t line_smooth_en;
+
+ uint32_t point_size;
+
+ uint32_t poly_smooth_en;
+
+ uint32_t poly_mode_front;
+ uint32_t poly_mode_back;
+
+ uint32_t front_face;
+ uint32_t cull_face;
+ uint32_t cull_face_en;
+
+ uint32_t point_sprite;
+
+ const struct pipe_rasterizer_state *templ;
+};
+
+struct nv20_vertex_program_exec {
+ uint32_t data[4];
+ boolean has_branch_offset;
+ int const_index;
+};
+
+struct nv20_vertex_program_data {
+ int index; /* immediates == -1 */
+ float value[4];
+};
+
+struct nv20_vertex_program {
+ const struct pipe_shader_state *pipe;
+
+ boolean translated;
+ struct nv20_vertex_program_exec *insns;
+ unsigned nr_insns;
+ struct nv20_vertex_program_data *consts;
+ unsigned nr_consts;
+
+ struct nouveau_resource *exec;
+ unsigned exec_start;
+ struct nouveau_resource *data;
+ unsigned data_start;
+ unsigned data_start_min;
+
+ uint32_t ir;
+ uint32_t or;
+};
+
+struct nv20_fragment_program_data {
+ unsigned offset;
+ unsigned index;
+};
+
+struct nv20_fragment_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+
+ boolean translated;
+ boolean on_hw;
+ unsigned samplers;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct nv20_fragment_program_data *consts;
+ unsigned nr_consts;
+
+ struct pipe_buffer *buffer;
+
+ uint32_t fp_control;
+ uint32_t fp_reg_control;
+};
+
+
+struct nv20_depth_stencil_alpha_state {
+ struct {
+ uint32_t func;
+ uint32_t write_enable;
+ uint32_t test_enable;
+ } depth;
+
+ struct {
+ uint32_t enable;
+ uint32_t wmask;
+ uint32_t func;
+ uint32_t ref;
+ uint32_t vmask;
+ uint32_t fail;
+ uint32_t zfail;
+ uint32_t zpass;
+ } stencil;
+
+ struct {
+ uint32_t enabled;
+ uint32_t func;
+ uint32_t ref;
+ } alpha;
+};
+
+struct nv20_miptree {
+ struct pipe_texture base;
+
+ struct pipe_buffer *buffer;
+ uint total_size;
+
+ struct {
+ uint pitch;
+ uint *image_offset;
+ } level[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+#endif
diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c
new file mode 100644
index 0000000000..5265bf3a31
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_state_emit.c
@@ -0,0 +1,359 @@
+#include "nv20_context.h"
+#include "nv20_state.h"
+#include "draw/draw_context.h"
+
+static void nv20_state_emit_blend(struct nv20_context* nv20)
+{
+ struct nv20_blend_state *b = nv20->blend;
+
+ BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1);
+ OUT_RING (b->d_enable);
+
+ BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
+ OUT_RING (b->b_enable);
+
+ BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 2);
+ OUT_RING (b->b_srcfunc);
+ OUT_RING (b->b_dstfunc);
+
+ BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1);
+ OUT_RING (b->c_mask);
+}
+
+static void nv20_state_emit_blend_color(struct nv20_context* nv20)
+{
+ struct pipe_blend_color *c = nv20->blend_color;
+
+ BEGIN_RING(kelvin, NV20TCL_BLEND_COLOR, 1);
+ OUT_RING ((float_to_ubyte(c->color[3]) << 24)|
+ (float_to_ubyte(c->color[0]) << 16)|
+ (float_to_ubyte(c->color[1]) << 8) |
+ (float_to_ubyte(c->color[2]) << 0));
+}
+
+static void nv20_state_emit_rast(struct nv20_context* nv20)
+{
+ struct nv20_rasterizer_state *r = nv20->rast;
+
+ BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 2);
+ OUT_RING (r->shade_model);
+ OUT_RING (r->line_width);
+
+
+ BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1);
+ OUT_RING (r->point_size);
+
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (r->poly_mode_front);
+ OUT_RING (r->poly_mode_back);
+
+
+ BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2);
+ OUT_RING (r->cull_face);
+ OUT_RING (r->front_face);
+
+ BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2);
+ OUT_RING (r->line_smooth_en);
+ OUT_RING (r->poly_smooth_en);
+
+ BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (r->cull_face_en);
+}
+
+static void nv20_state_emit_dsa(struct nv20_context* nv20)
+{
+ struct nv20_depth_stencil_alpha_state *d = nv20->dsa;
+
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1);
+ OUT_RING (d->depth.func);
+
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (d->depth.write_enable);
+
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (d->depth.test_enable);
+
+#if 0
+ BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1);
+ OUT_RING (d->stencil.enable);
+ BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7);
+ OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7);
+#endif
+
+ BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (d->alpha.enabled);
+
+ BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1);
+ OUT_RING (d->alpha.func);
+
+ BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_REF, 1);
+ OUT_RING (d->alpha.ref);
+}
+
+static void nv20_state_emit_viewport(struct nv20_context* nv20)
+{
+}
+
+static void nv20_state_emit_scissor(struct nv20_context* nv20)
+{
+ /* NV20TCL_SCISSOR_* is probably a software method */
+/* struct pipe_scissor_state *s = nv20->scissor;
+ BEGIN_RING(kelvin, NV20TCL_SCISSOR_HORIZ, 2);
+ OUT_RING (((s->maxx - s->minx) << 16) | s->minx);
+ OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/
+}
+
+static void nv20_state_emit_framebuffer(struct nv20_context* nv20)
+{
+ struct pipe_framebuffer_state* fb = nv20->framebuffer;
+ struct pipe_surface *rt, *zeta = NULL;
+ uint32_t rt_format, w, h;
+ int colour_format = 0, zeta_format = 0;
+
+ w = fb->cbufs[0]->width;
+ h = fb->cbufs[0]->height;
+ colour_format = fb->cbufs[0]->format;
+ rt = fb->cbufs[0];
+
+ if (fb->zsbuf) {
+ if (colour_format) {
+ assert(w == fb->zsbuf->width);
+ assert(h == fb->zsbuf->height);
+ } else {
+ w = fb->zsbuf->width;
+ h = fb->zsbuf->height;
+ }
+
+ zeta_format = fb->zsbuf->format;
+ zeta = fb->zsbuf;
+ }
+
+ rt_format = NV20TCL_RT_FORMAT_TYPE_LINEAR | 0x20;
+
+ switch (colour_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case 0:
+ rt_format |= NV20TCL_RT_FORMAT_COLOR_A8R8G8B8;
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ rt_format |= NV20TCL_RT_FORMAT_COLOR_R5G6B5;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (zeta) {
+ BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1);
+ OUT_RING (rt->stride | (zeta->stride << 16));
+ } else {
+ BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1);
+ OUT_RING (rt->stride | (rt->stride << 16));
+ }
+
+ nv20->rt[0] = rt->buffer;
+
+ if (zeta_format)
+ {
+ nv20->zeta = zeta->buffer;
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 3);
+ OUT_RING ((w << 16) | 0);
+ OUT_RING ((h << 16) | 0);
+ OUT_RING (rt_format);
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+ OUT_RING (((w - 1) << 16) | 0);
+ OUT_RING (((h - 1) << 16) | 0);
+}
+
+static void nv20_vertex_layout(struct nv20_context *nv20)
+{
+ struct nv20_fragment_program *fp = nv20->fragprog.current;
+ struct draw_context *dc = nv20->draw;
+ uint32_t src;
+ int i;
+ struct vertex_info *vinfo = &nv20->vertex_info;
+ const enum interp_mode colorInterp = INTERP_LINEAR;
+ boolean colors[2] = { FALSE };
+ boolean generics[4] = { FALSE };
+ boolean fog = FALSE;
+
+ memset(vinfo, 0, sizeof(*vinfo));
+
+ /*
+ * NV10 hardware vertex attribute order:
+ * fog, weight, normal, tex1, tex0, 2nd color, color, position
+ * vinfo->hwfmt[0] has a used-bit corresponding to each of these.
+ * relation to TGSI_SEMANTIC_*:
+ * - POSITION: position (always used)
+ * - COLOR: 2nd color, color
+ * - GENERIC: weight, normal, tex1, tex0
+ * - FOG: fog
+ */
+
+ for (i = 0; i < fp->info.num_inputs; i++) {
+ int isn = fp->info.input_semantic_name[i];
+ int isi = fp->info.input_semantic_index[i];
+ switch (isn) {
+ case TGSI_SEMANTIC_POSITION:
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ assert(isi < 2);
+ colors[isi] = TRUE;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ assert(isi < 4);
+ generics[isi] = TRUE;
+ break;
+ case TGSI_SEMANTIC_FOG:
+ fog = TRUE;
+ break;
+ default:
+ assert(0 && "unknown input_semantic_name");
+ }
+ }
+
+ if (fog) {
+ int src = draw_find_vs_output(dc, TGSI_SEMANTIC_FOG, 0);
+ draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
+ vinfo->hwfmt[0] |= (1 << 7);
+ }
+
+ for (i = 3; i >= 0; i--) {
+ int src;
+ if (!generics[i])
+ continue;
+ src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ vinfo->hwfmt[0] |= (1 << (i + 3));
+ }
+
+ if (colors[1]) {
+ int src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 1);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
+ vinfo->hwfmt[0] |= (1 << 2);
+ }
+
+ if (colors[0]) {
+ int src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 0);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
+ vinfo->hwfmt[0] |= (1 << 1);
+ }
+
+ /* always do position */
+ src = draw_find_vs_output(dc, TGSI_SEMANTIC_POSITION, 0);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
+ vinfo->hwfmt[0] |= (1 << 0);
+
+ draw_compute_vertex_size(vinfo);
+}
+
+void
+nv20_emit_hw_state(struct nv20_context *nv20)
+{
+ int i;
+
+ if (nv20->dirty & NV20_NEW_VERTPROG) {
+ //nv20_vertprog_bind(nv20, nv20->vertprog.current);
+ nv20->dirty &= ~NV20_NEW_VERTPROG;
+ }
+
+ if (nv20->dirty & NV20_NEW_FRAGPROG) {
+ nv20_fragprog_bind(nv20, nv20->fragprog.current);
+ /*XXX: clear NV20_NEW_FRAGPROG if no new program uploaded */
+ nv20->dirty_samplers |= (1<<10);
+ nv20->dirty_samplers = 0;
+ }
+
+ if (nv20->dirty_samplers || (nv20->dirty & NV20_NEW_FRAGPROG)) {
+ nv20_fragtex_bind(nv20);
+ nv20->dirty &= ~NV20_NEW_FRAGPROG;
+ }
+
+ if (nv20->dirty & NV20_NEW_VTXARRAYS) {
+ nv20->dirty &= ~NV20_NEW_VTXARRAYS;
+ nv20_vertex_layout(nv20);
+ nv20_vtxbuf_bind(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_BLEND) {
+ nv20->dirty &= ~NV20_NEW_BLEND;
+ nv20_state_emit_blend(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_BLENDCOL) {
+ nv20->dirty &= ~NV20_NEW_BLENDCOL;
+ nv20_state_emit_blend_color(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_RAST) {
+ nv20->dirty &= ~NV20_NEW_RAST;
+ nv20_state_emit_rast(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_DSA) {
+ nv20->dirty &= ~NV20_NEW_DSA;
+ nv20_state_emit_dsa(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_VIEWPORT) {
+ nv20->dirty &= ~NV20_NEW_VIEWPORT;
+ nv20_state_emit_viewport(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_SCISSOR) {
+ nv20->dirty &= ~NV20_NEW_SCISSOR;
+ nv20_state_emit_scissor(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_FRAMEBUFFER) {
+ nv20->dirty &= ~NV20_NEW_FRAMEBUFFER;
+ nv20_state_emit_framebuffer(nv20);
+ }
+
+ /* Emit relocs for every referenced buffer.
+ * This is to ensure the bufmgr has an accurate idea of how
+ * the buffer is used. This isn't very efficient, but we don't
+ * seem to take a significant performance hit. Will be improved
+ * at some point. Vertex arrays are emitted by nv20_vbo.c
+ */
+
+ /* Render target */
+/* XXX figre out who's who for NV10TCL_DMA_* and fill accordingly
+ * BEGIN_RING(kelvin, NV20TCL_DMA_COLOR0, 1);
+ * OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); */
+ BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ if (nv20->zeta) {
+/* XXX
+ * BEGIN_RING(kelvin, NV20TCL_DMA_ZETA, 1);
+ * OUT_RELOCo(nv20->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); */
+ BEGIN_RING(kelvin, NV20TCL_ZETA_OFFSET, 1);
+ OUT_RELOCl(nv20->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ /* XXX for when we allocate LMA on nv17 */
+/* BEGIN_RING(kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
+ OUT_RELOCl(nv20->zeta + lma_offset);*/
+ }
+
+ /* Vertex buffer */
+ BEGIN_RING(kelvin, NV20TCL_DMA_VTXBUF0, 1);
+ OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ /* Texture images */
+ for (i = 0; i < 2; i++) {
+ if (!(nv20->fp_samplers & (1 << i)))
+ continue;
+ BEGIN_RING(kelvin, NV20TCL_TX_OFFSET(i), 1);
+ OUT_RELOCl(nv20->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ BEGIN_RING(kelvin, NV20TCL_TX_FORMAT(i), 1);
+ OUT_RELOCd(nv20->tex[i].buffer, nv20->tex[i].format,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
+ NOUVEAU_BO_OR, NV20TCL_TX_FORMAT_DMA0,
+ NV20TCL_TX_FORMAT_DMA1);
+ }
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_surface.c b/src/gallium/drivers/nv20/nv20_surface.c
new file mode 100644
index 0000000000..7bc68d0ca2
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_surface.c
@@ -0,0 +1,64 @@
+
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nv20_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+#include "util/u_tile.h"
+
+static void
+nv20_surface_copy(struct pipe_context *pipe, boolean do_flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ struct nouveau_winsys *nvws = nv20->nvws;
+
+ nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy,
+ width, height);
+}
+
+static void
+nv20_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ struct nouveau_winsys *nvws = nv20->nvws;
+
+ nvws->surface_fill(nvws, dest, destx, desty, width, height, value);
+}
+
+void
+nv20_init_surface_functions(struct nv20_context *nv20)
+{
+ nv20->pipe.surface_copy = nv20_surface_copy;
+ nv20->pipe.surface_fill = nv20_surface_fill;
+}
diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c
new file mode 100644
index 0000000000..4edc4efebd
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_vbo.c
@@ -0,0 +1,77 @@
+#include "draw/draw_context.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv20_context.h"
+#include "nv20_state.h"
+
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_pushbuf.h"
+
+boolean nv20_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count)
+{
+ struct nv20_context *nv20 = nv20_context( pipe );
+ struct draw_context *draw = nv20->draw;
+ unsigned i;
+
+ nv20_emit_hw_state(nv20);
+
+ /*
+ * Map vertex buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv20->vtxbuf[i].buffer) {
+ void *buf
+ = pipe->winsys->buffer_map(pipe->winsys,
+ nv20->vtxbuf[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(draw, i, buf);
+ }
+ }
+ /* Map index buffer, if present */
+ if (indexBuffer) {
+ void *mapped_indexes
+ = pipe->winsys->buffer_map(pipe->winsys, indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes);
+ }
+ else {
+ /* no index/element buffer */
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ draw_set_mapped_constant_buffer(draw,
+ nv20->constbuf[PIPE_SHADER_VERTEX],
+ nv20->constbuf_nr[PIPE_SHADER_VERTEX]);
+
+ /* draw! */
+ draw_arrays(nv20->draw, prim, start, count);
+
+ /*
+ * unmap vertex/index buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv20->vtxbuf[i].buffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, nv20->vtxbuf[i].buffer);
+ draw_set_mapped_vertex_buffer(draw, i, NULL);
+ }
+ }
+ if (indexBuffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer);
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ return TRUE;
+}
+
+boolean nv20_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
+{
+ return nv20_draw_elements(pipe, NULL, 0, prim, start, count);
+}
+
+
+
diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c
new file mode 100644
index 0000000000..a885fcd7a5
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_vertprog.c
@@ -0,0 +1,838 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "nv20_context.h"
+#include "nv20_state.h"
+
+/* TODO (at least...):
+ * 1. Indexed consts + ARL
+ * 2. Arb. swz/negation
+ * 3. NV_vp11, NV_vp2, NV_vp3 features
+ * - extra arith opcodes
+ * - branching
+ * - texture sampling
+ * - indexed attribs
+ * - indexed results
+ * 4. bugs
+ */
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 8
+#define MASK_Y 4
+#define MASK_Z 2
+#define MASK_W 1
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE 0
+#define DEF_CTEST 0
+#include "nv20_shader.h"
+
+#define swz(s,x,y,z,w) nv20_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv20_sr_neg((s))
+#define abs(s) nv20_sr_abs((s))
+
+struct nv20_vpc {
+ struct nv20_vertex_program *vp;
+
+ struct nv20_vertex_program_exec *vpi;
+
+ unsigned output_map[PIPE_MAX_SHADER_OUTPUTS];
+
+ int high_temp;
+ int temp_temp_count;
+
+ struct nv20_sreg *imm;
+ unsigned nr_imm;
+};
+
+static struct nv20_sreg
+temp(struct nv20_vpc *vpc)
+{
+ int idx;
+
+ idx = vpc->temp_temp_count++;
+ idx += vpc->high_temp + 1;
+ return nv20_sr(NV30SR_TEMP, idx);
+}
+
+static struct nv20_sreg
+constant(struct nv20_vpc *vpc, int pipe, float x, float y, float z, float w)
+{
+ struct nv20_vertex_program *vp = vpc->vp;
+ struct nv20_vertex_program_data *vpd;
+ int idx;
+
+ if (pipe >= 0) {
+ for (idx = 0; idx < vp->nr_consts; idx++) {
+ if (vp->consts[idx].index == pipe)
+ return nv20_sr(NV30SR_CONST, idx);
+ }
+ }
+
+ idx = vp->nr_consts++;
+ vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
+ vpd = &vp->consts[idx];
+
+ vpd->index = pipe;
+ vpd->value[0] = x;
+ vpd->value[1] = y;
+ vpd->value[2] = z;
+ vpd->value[3] = w;
+ return nv20_sr(NV30SR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nv20_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2))
+
+static void
+emit_src(struct nv20_vpc *vpc, uint32_t *hw, int pos, struct nv20_sreg src)
+{
+ struct nv20_vertex_program *vp = vpc->vp;
+ uint32_t sr = 0;
+
+ switch (src.type) {
+ case NV30SR_TEMP:
+ sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT);
+ sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT);
+ break;
+ case NV30SR_INPUT:
+ sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ vp->ir |= (1 << src.index);
+ hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT);
+ break;
+ case NV30SR_CONST:
+ sr |= (NV30_VP_SRC_REG_TYPE_CONST <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ assert(vpc->vpi->const_index == -1 ||
+ vpc->vpi->const_index == src.index);
+ vpc->vpi->const_index = src.index;
+ break;
+ case NV30SR_NONE:
+ sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (src.negate)
+ sr |= NV30_VP_SRC_NEGATE;
+
+ if (src.abs)
+ hw[0] |= (1 << (21 + pos));
+
+ sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) |
+ (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) |
+ (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) |
+ (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT));
+
+/*
+ * |VVV|
+ * d�.�b
+ * \u/
+ *
+ */
+
+ switch (pos) {
+ case 0:
+ hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >>
+ NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT;
+ hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) <<
+ NV30_VP_INST_SRC0L_SHIFT;
+ break;
+ case 1:
+ hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT;
+ break;
+ case 2:
+ hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >>
+ NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT;
+ hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) <<
+ NV30_VP_INST_SRC2L_SHIFT;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+emit_dst(struct nv20_vpc *vpc, uint32_t *hw, int slot, struct nv20_sreg dst)
+{
+ struct nv20_vertex_program *vp = vpc->vp;
+
+ switch (dst.type) {
+ case NV30SR_TEMP:
+ hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);
+ break;
+ case NV30SR_OUTPUT:
+ switch (dst.index) {
+ case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
+ case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
+ case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
+ case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
+ case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break;
+ case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
+ case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
+ case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
+ case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
+ case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
+ case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
+ case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
+ case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
+ case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
+ default:
+ break;
+ }
+
+ hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT);
+ hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);
+
+ /*XXX: no way this is entirely correct, someone needs to
+ * figure out what exactly it is.
+ */
+ hw[3] |= 0x800;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+nv20_vp_arith(struct nv20_vpc *vpc, int slot, int op,
+ struct nv20_sreg dst, int mask,
+ struct nv20_sreg s0, struct nv20_sreg s1,
+ struct nv20_sreg s2)
+{
+ struct nv20_vertex_program *vp = vpc->vp;
+ uint32_t *hw;
+
+ vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
+ vpc->vpi = &vp->insns[vp->nr_insns - 1];
+ memset(vpc->vpi, 0, sizeof(*vpc->vpi));
+ vpc->vpi->const_index = -1;
+
+ hw = vpc->vpi->data;
+
+ hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT);
+ hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) |
+ (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) |
+ (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) |
+ (3 << NV30_VP_INST_COND_SWZ_W_SHIFT));
+
+ hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
+// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK;
+// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT);
+
+ if (dst.type == NV30SR_OUTPUT) {
+ if (slot)
+ hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);
+ else
+ hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT);
+ } else {
+ if (slot)
+ hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT);
+ else
+ hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT);
+ }
+
+ emit_dst(vpc, hw, slot, dst);
+ emit_src(vpc, hw, 0, s0);
+ emit_src(vpc, hw, 1, s1);
+ emit_src(vpc, hw, 2, s2);
+}
+
+static INLINE struct nv20_sreg
+tgsi_src(struct nv20_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
+ struct nv20_sreg src;
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ src = nv20_sr(NV30SR_INPUT, fsrc->SrcRegister.Index);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ src = vpc->imm[fsrc->SrcRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ if (vpc->high_temp < fsrc->SrcRegister.Index)
+ vpc->high_temp = fsrc->SrcRegister.Index;
+ src = nv20_sr(NV30SR_TEMP, fsrc->SrcRegister.Index);
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->SrcRegisterExtMod.Absolute;
+ src.negate = fsrc->SrcRegister.Negate;
+ src.swz[0] = fsrc->SrcRegister.SwizzleX;
+ src.swz[1] = fsrc->SrcRegister.SwizzleY;
+ src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+ src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ return src;
+}
+
+static INLINE struct nv20_sreg
+tgsi_dst(struct nv20_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
+ struct nv20_sreg dst;
+
+ switch (fdst->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ dst = nv20_sr(NV30SR_OUTPUT,
+ vpc->output_map[fdst->DstRegister.Index]);
+
+ break;
+ case TGSI_FILE_TEMPORARY:
+ dst = nv20_sr(NV30SR_TEMP, fdst->DstRegister.Index);
+ if (vpc->high_temp < dst.index)
+ vpc->high_temp = dst.index;
+ break;
+ default:
+ NOUVEAU_ERR("bad dst file\n");
+ break;
+ }
+
+ return dst;
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+ return mask;
+}
+
+static boolean
+nv20_vertprog_parse_instruction(struct nv20_vpc *vpc,
+ const struct tgsi_full_instruction *finst)
+{
+ struct nv20_sreg src[3], dst, tmp;
+ struct nv20_sreg none = nv20_sr(NV30SR_NONE, 0);
+ int mask;
+ int ai = -1, ci = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ vpc->temp_temp_count = 0;
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(vpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+ ai = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ /*XXX: index comparison is broken now that consts come from
+ * two different register files.
+ */
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_IMMEDIATE:
+ if (ci == -1 || ci == fsrc->SrcRegister.Index) {
+ ci = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
+ mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]);
+ break;
+ case TGSI_OPCODE_ARL:
+ arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_EXP:
+ arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LIT:
+ arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_POW:
+ tmp = temp(vpc);
+ arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none,
+ swz(src[0], X, X, X, X));
+ arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+ swz(src[1], X, X, X, X), none);
+ arith(vpc, 1, OP_EX2, dst, mask, none, none,
+ swz(tmp, X, X, X, X));
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_RET:
+ break;
+ case TGSI_OPCODE_RSQ:
+ arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGT:
+ arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1]));
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(vpc);
+ arith(vpc, 0, OP_MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ int hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NV30_VP_INST_DEST_POS;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV30_VP_INST_DEST_COL0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV30_VP_INST_DEST_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV30_VP_INST_DEST_BFC0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV30_VP_INST_DEST_BFC1;
+ } else {
+ NOUVEAU_ERR("bad bcolour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NV30_VP_INST_DEST_FOGC;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ hw = NV30_VP_INST_DEST_PSZ;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.SemanticIndex <= 7) {
+ hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ vpc->output_map[fdec->DeclarationRange.First] = hw;
+ return TRUE;
+}
+
+static boolean
+nv20_vertprog_prepare(struct nv20_vpc *vpc)
+{
+ struct tgsi_parse_context p;
+ int nr_imm = 0;
+
+ tgsi_parse_init(&p, vpc->vp->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch(tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ nr_imm++;
+ break;
+ default:
+ break;
+ }
+ }
+ tgsi_parse_free(&p);
+
+ if (nr_imm) {
+ vpc->imm = CALLOC(nr_imm, sizeof(struct nv20_sreg));
+ assert(vpc->imm);
+ }
+
+ return TRUE;
+}
+
+static void
+nv20_vertprog_translate(struct nv20_context *nv20,
+ struct nv20_vertex_program *vp)
+{
+ struct tgsi_parse_context parse;
+ struct nv20_vpc *vpc = NULL;
+
+ tgsi_dump(vp->pipe.tokens,0);
+
+ vpc = CALLOC(1, sizeof(struct nv20_vpc));
+ if (!vpc)
+ return;
+ vpc->vp = vp;
+ vpc->high_temp = -1;
+
+ if (!nv20_vertprog_prepare(vpc)) {
+ FREE(vpc);
+ return;
+ }
+
+ tgsi_parse_init(&parse, vp->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+ fdec = &parse.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_OUTPUT:
+ if (!nv20_vertprog_parse_decl_output(vpc, fdec))
+ goto out_err;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const struct tgsi_full_immediate *imm;
+
+ imm = &parse.FullToken.FullImmediate;
+ assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+// assert(imm->Immediate.Size == 4);
+ vpc->imm[vpc->nr_imm++] =
+ constant(vpc, -1,
+ imm->u.ImmediateFloat32[0].Float,
+ imm->u.ImmediateFloat32[1].Float,
+ imm->u.ImmediateFloat32[2].Float,
+ imm->u.ImmediateFloat32[3].Float);
+ }
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+ finst = &parse.FullToken.FullInstruction;
+ if (!nv20_vertprog_parse_instruction(vpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST;
+ vp->translated = TRUE;
+out_err:
+ tgsi_parse_free(&parse);
+ FREE(vpc);
+}
+
+static boolean
+nv20_vertprog_validate(struct nv20_context *nv20)
+{
+ struct nouveau_winsys *nvws = nv20->nvws;
+ struct pipe_winsys *ws = nv20->pipe.winsys;
+ struct nouveau_grobj *rankine = nv20->screen->rankine;
+ struct nv20_vertex_program *vp;
+ struct pipe_buffer *constbuf;
+ boolean upload_code = FALSE, upload_data = FALSE;
+ int i;
+
+ vp = nv20->vertprog;
+ constbuf = nv20->constbuf[PIPE_SHADER_VERTEX];
+
+ /* Translate TGSI shader into hw bytecode */
+ if (!vp->translated) {
+ nv20_vertprog_translate(nv20, vp);
+ if (!vp->translated)
+ return FALSE;
+ }
+
+ /* Allocate hw vtxprog exec slots */
+ if (!vp->exec) {
+ struct nouveau_resource *heap = nv20->screen->vp_exec_heap;
+ struct nouveau_stateobj *so;
+ uint vplen = vp->nr_insns;
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) {
+ while (heap->next && heap->size < vplen) {
+ struct nv20_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->exec);
+ }
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec))
+ assert(0);
+ }
+
+ so = so_new(2, 0);
+ so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1);
+ so_data (so, vp->exec->start);
+ so_ref(so, &vp->so);
+
+ upload_code = TRUE;
+ }
+
+ /* Allocate hw vtxprog const slots */
+ if (vp->nr_consts && !vp->data) {
+ struct nouveau_resource *heap = nv20->screen->vp_data_heap;
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) {
+ while (heap->next && heap->size < vp->nr_consts) {
+ struct nv20_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->data);
+ }
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data))
+ assert(0);
+ }
+
+ /*XXX: handle this some day */
+ assert(vp->data->start >= vp->data_start_min);
+
+ upload_data = TRUE;
+ if (vp->data_start != vp->data->start)
+ upload_code = TRUE;
+ }
+
+ /* If exec or data segments moved we need to patch the program to
+ * fixup offsets and register IDs.
+ */
+ if (vp->exec_start != vp->exec->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv20_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->has_branch_offset) {
+ assert(0);
+ }
+ }
+
+ vp->exec_start = vp->exec->start;
+ }
+
+ if (vp->nr_consts && vp->data_start != vp->data->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv20_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->const_index >= 0) {
+ vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK;
+ vpi->data[1] |=
+ (vpi->const_index + vp->data->start) <<
+ NV30_VP_INST_CONST_SRC_SHIFT;
+
+ }
+ }
+
+ vp->data_start = vp->data->start;
+ }
+
+ /* Update + Upload constant values */
+ if (vp->nr_consts) {
+ float *map = NULL;
+
+ if (constbuf) {
+ map = ws->buffer_map(ws, constbuf,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ }
+
+ for (i = 0; i < vp->nr_consts; i++) {
+ struct nv20_vertex_program_data *vpd = &vp->consts[i];
+
+ if (vpd->index >= 0) {
+ if (!upload_data &&
+ !memcmp(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float)))
+ continue;
+ memcpy(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float));
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5);
+ OUT_RING (i + vp->data->start);
+ OUT_RINGp ((uint32_t *)vpd->value, 4);
+ }
+
+ if (constbuf) {
+ ws->buffer_unmap(ws, constbuf);
+ }
+ }
+
+ /* Upload vtxprog */
+ if (upload_code) {
+#if 0
+ for (i = 0; i < vp->nr_insns; i++) {
+ NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+ i, vp->insns[i].data[0], vp->insns[i].data[1],
+ vp->insns[i].data[2], vp->insns[i].data[3]);
+ }
+#endif
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1);
+ OUT_RING (vp->exec->start);
+ for (i = 0; i < vp->nr_insns; i++) {
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4);
+ OUT_RINGp (vp->insns[i].data, 4);
+ }
+ }
+
+ if (vp->so != nv20->state.hw[NV30_STATE_VERTPROG]) {
+ so_ref(vp->so, &nv20->state.hw[NV30_STATE_VERTPROG]);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void
+nv20_vertprog_destroy(struct nv20_context *nv20, struct nv20_vertex_program *vp)
+{
+ struct nouveau_winsys *nvws = nv20->screen->nvws;
+
+ vp->translated = FALSE;
+
+ if (vp->nr_insns) {
+ FREE(vp->insns);
+ vp->insns = NULL;
+ vp->nr_insns = 0;
+ }
+
+ if (vp->nr_consts) {
+ FREE(vp->consts);
+ vp->consts = NULL;
+ vp->nr_consts = 0;
+ }
+
+ nvws->res_free(&vp->exec);
+ vp->exec_start = 0;
+ nvws->res_free(&vp->data);
+ vp->data_start = 0;
+ vp->data_start_min = 0;
+
+ vp->ir = vp->or = 0;
+ so_ref(NULL, &vp->so);
+}
+
+struct nv20_state_entry nv20_state_vertprog = {
+ .validate = nv20_vertprog_validate,
+ .dirty = {
+ .pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/,
+ .hw = NV30_STATE_VERTPROG,
+ }
+};
diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile
new file mode 100644
index 0000000000..69f2790dfe
--- /dev/null
+++ b/src/gallium/drivers/nv30/Makefile
@@ -0,0 +1,37 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv30
+
+DRIVER_SOURCES = \
+ nv30_clear.c \
+ nv30_context.c \
+ nv30_draw.c \
+ nv30_fragprog.c \
+ nv30_fragtex.c \
+ nv30_miptree.c \
+ nv30_query.c \
+ nv30_screen.c \
+ nv30_state.c \
+ nv30_state_blend.c \
+ nv30_state_emit.c \
+ nv30_state_fb.c \
+ nv30_state_rasterizer.c \
+ nv30_state_scissor.c \
+ nv30_state_stipple.c \
+ nv30_state_viewport.c \
+ nv30_state_zsa.c \
+ nv30_surface.c \
+ nv30_vbo.c \
+ nv30_vertprog.c
+
+C_SOURCES = \
+ $(COMMON_SOURCES) \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/drivers/nv30/nv30_clear.c b/src/gallium/drivers/nv30/nv30_clear.c
new file mode 100644
index 0000000000..8c3ca204d5
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_clear.c
@@ -0,0 +1,13 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv30_context.h"
+
+void
+nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+ ps->status = PIPE_SURFACE_STATUS_CLEAR;
+}
diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c
new file mode 100644
index 0000000000..2bff28aca9
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_context.c
@@ -0,0 +1,72 @@
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+
+#include "nv30_context.h"
+#include "nv30_screen.h"
+
+static void
+nv30_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
+ BEGIN_RING(rankine, 0x1fd8, 1);
+ OUT_RING (2);
+ BEGIN_RING(rankine, 0x1fd8, 1);
+ OUT_RING (1);
+ }
+
+ FIRE_RING(fence);
+}
+
+static void
+nv30_destroy(struct pipe_context *pipe)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ if (nv30->draw)
+ draw_destroy(nv30->draw);
+ FREE(nv30);
+}
+
+struct pipe_context *
+nv30_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct nv30_screen *screen = nv30_screen(pscreen);
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv30_context *nv30;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nv30 = CALLOC(1, sizeof(struct nv30_context));
+ if (!nv30)
+ return NULL;
+ nv30->screen = screen;
+ nv30->pctx_id = pctx_id;
+
+ nv30->nvws = nvws;
+
+ nv30->pipe.winsys = ws;
+ nv30->pipe.screen = pscreen;
+ nv30->pipe.destroy = nv30_destroy;
+ nv30->pipe.draw_arrays = nv30_draw_arrays;
+ nv30->pipe.draw_elements = nv30_draw_elements;
+ nv30->pipe.clear = nv30_clear;
+ nv30->pipe.flush = nv30_flush;
+
+ nv30_init_query_functions(nv30);
+ nv30_init_surface_functions(nv30);
+ nv30_init_state_functions(nv30);
+
+ /* Create, configure, and install fallback swtnl path */
+ nv30->draw = draw_create();
+ draw_wide_point_threshold(nv30->draw, 9999999.0);
+ draw_wide_line_threshold(nv30->draw, 9999999.0);
+ draw_enable_line_stipple(nv30->draw, FALSE);
+ draw_enable_point_sprites(nv30->draw, FALSE);
+ draw_set_rasterize_stage(nv30->draw, nv30_draw_render_stage(nv30));
+
+ return &nv30->pipe;
+}
+
diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h
new file mode 100644
index 0000000000..b933769700
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_context.h
@@ -0,0 +1,212 @@
+#ifndef __NV30_CONTEXT_H__
+#define __NV30_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx) \
+ struct nv30_screen *ctx = nv30->screen
+#include "nouveau/nouveau_push.h"
+#include "nouveau/nouveau_stateobj.h"
+
+#include "nv30_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+enum nv30_state_index {
+ NV30_STATE_FB = 0,
+ NV30_STATE_VIEWPORT = 1,
+ NV30_STATE_BLEND = 2,
+ NV30_STATE_RAST = 3,
+ NV30_STATE_ZSA = 4,
+ NV30_STATE_BCOL = 5,
+ NV30_STATE_CLIP = 6,
+ NV30_STATE_SCISSOR = 7,
+ NV30_STATE_STIPPLE = 8,
+ NV30_STATE_FRAGPROG = 9,
+ NV30_STATE_VERTPROG = 10,
+ NV30_STATE_FRAGTEX0 = 11,
+ NV30_STATE_FRAGTEX1 = 12,
+ NV30_STATE_FRAGTEX2 = 13,
+ NV30_STATE_FRAGTEX3 = 14,
+ NV30_STATE_FRAGTEX4 = 15,
+ NV30_STATE_FRAGTEX5 = 16,
+ NV30_STATE_FRAGTEX6 = 17,
+ NV30_STATE_FRAGTEX7 = 18,
+ NV30_STATE_FRAGTEX8 = 19,
+ NV30_STATE_FRAGTEX9 = 20,
+ NV30_STATE_FRAGTEX10 = 21,
+ NV30_STATE_FRAGTEX11 = 22,
+ NV30_STATE_FRAGTEX12 = 23,
+ NV30_STATE_FRAGTEX13 = 24,
+ NV30_STATE_FRAGTEX14 = 25,
+ NV30_STATE_FRAGTEX15 = 26,
+ NV30_STATE_VERTTEX0 = 27,
+ NV30_STATE_VERTTEX1 = 28,
+ NV30_STATE_VERTTEX2 = 29,
+ NV30_STATE_VERTTEX3 = 30,
+ NV30_STATE_VTXBUF = 31,
+ NV30_STATE_VTXFMT = 32,
+ NV30_STATE_VTXATTR = 33,
+ NV30_STATE_MAX = 34
+};
+
+#include "nv30_screen.h"
+
+#define NV30_NEW_BLEND (1 << 0)
+#define NV30_NEW_RAST (1 << 1)
+#define NV30_NEW_ZSA (1 << 2)
+#define NV30_NEW_SAMPLER (1 << 3)
+#define NV30_NEW_FB (1 << 4)
+#define NV30_NEW_STIPPLE (1 << 5)
+#define NV30_NEW_SCISSOR (1 << 6)
+#define NV30_NEW_VIEWPORT (1 << 7)
+#define NV30_NEW_BCOL (1 << 8)
+#define NV30_NEW_VERTPROG (1 << 9)
+#define NV30_NEW_FRAGPROG (1 << 10)
+#define NV30_NEW_ARRAYS (1 << 11)
+#define NV30_NEW_UCP (1 << 12)
+
+struct nv30_rasterizer_state {
+ struct pipe_rasterizer_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv30_zsa_state {
+ struct pipe_depth_stencil_alpha_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv30_blend_state {
+ struct pipe_blend_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+
+struct nv30_state {
+ unsigned scissor_enabled;
+ unsigned stipple_enabled;
+ unsigned viewport_bypass;
+ unsigned fp_samplers;
+
+ uint64_t dirty;
+ struct nouveau_stateobj *hw[NV30_STATE_MAX];
+};
+
+struct nv30_context {
+ struct pipe_context pipe;
+
+ struct nouveau_winsys *nvws;
+ struct nv30_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ /* HW state derived from pipe states */
+ struct nv30_state state;
+
+ /* Context state */
+ unsigned dirty;
+ struct pipe_scissor_state scissor;
+ unsigned stipple[32];
+ struct nv30_vertex_program *vertprog;
+ struct nv30_fragment_program *fragprog;
+ struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
+ unsigned constbuf_nr[PIPE_SHADER_TYPES];
+ struct nv30_rasterizer_state *rasterizer;
+ struct nv30_zsa_state *zsa;
+ struct nv30_blend_state *blend;
+ struct pipe_blend_color blend_colour;
+ struct pipe_viewport_state viewport;
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_buffer *idxbuf;
+ unsigned idxbuf_format;
+ struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
+ struct nv30_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
+ unsigned nr_samplers;
+ unsigned nr_textures;
+ unsigned dirty_samplers;
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ unsigned vtxbuf_nr;
+ struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
+ unsigned vtxelt_nr;
+ const unsigned *edgeflags;
+};
+
+static INLINE struct nv30_context *
+nv30_context(struct pipe_context *pipe)
+{
+ return (struct nv30_context *)pipe;
+}
+
+struct nv30_state_entry {
+ boolean (*validate)(struct nv30_context *nv30);
+ struct {
+ unsigned pipe;
+ unsigned hw;
+ } dirty;
+};
+
+extern void nv30_init_state_functions(struct nv30_context *nv30);
+extern void nv30_init_surface_functions(struct nv30_context *nv30);
+extern void nv30_init_query_functions(struct nv30_context *nv30);
+
+extern void nv30_screen_init_miptree_functions(struct pipe_screen *pscreen);
+
+/* nv30_draw.c */
+extern struct draw_stage *nv30_draw_render_stage(struct nv30_context *nv30);
+
+/* nv30_vertprog.c */
+extern void nv30_vertprog_destroy(struct nv30_context *,
+ struct nv30_vertex_program *);
+
+/* nv30_fragprog.c */
+extern void nv30_fragprog_destroy(struct nv30_context *,
+ struct nv30_fragment_program *);
+
+/* nv30_fragtex.c */
+extern void nv30_fragtex_bind(struct nv30_context *);
+
+/* nv30_state.c and friends */
+extern boolean nv30_state_validate(struct nv30_context *nv30);
+extern void nv30_state_emit(struct nv30_context *nv30);
+extern struct nv30_state_entry nv30_state_rasterizer;
+extern struct nv30_state_entry nv30_state_scissor;
+extern struct nv30_state_entry nv30_state_stipple;
+extern struct nv30_state_entry nv30_state_fragprog;
+extern struct nv30_state_entry nv30_state_vertprog;
+extern struct nv30_state_entry nv30_state_blend;
+extern struct nv30_state_entry nv30_state_blend_colour;
+extern struct nv30_state_entry nv30_state_zsa;
+extern struct nv30_state_entry nv30_state_viewport;
+extern struct nv30_state_entry nv30_state_framebuffer;
+extern struct nv30_state_entry nv30_state_fragtex;
+extern struct nv30_state_entry nv30_state_vbo;
+
+/* nv30_vbo.c */
+extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv30_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start,
+ unsigned count);
+
+/* nv30_clear.c */
+extern void nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+#endif
diff --git a/src/gallium/drivers/nv30/nv30_draw.c b/src/gallium/drivers/nv30/nv30_draw.c
new file mode 100644
index 0000000000..74fc138c05
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_draw.c
@@ -0,0 +1,61 @@
+#include "draw/draw_pipe.h"
+
+#include "nv30_context.h"
+
+struct nv30_draw_stage {
+ struct draw_stage draw;
+ struct nv30_context *nv30;
+};
+
+static void
+nv30_draw_point(struct draw_stage *draw, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv30_draw_line(struct draw_stage *draw, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv30_draw_tri(struct draw_stage *draw, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv30_draw_flush(struct draw_stage *draw, unsigned flags)
+{
+}
+
+static void
+nv30_draw_reset_stipple_counter(struct draw_stage *draw)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv30_draw_destroy(struct draw_stage *draw)
+{
+ FREE(draw);
+}
+
+struct draw_stage *
+nv30_draw_render_stage(struct nv30_context *nv30)
+{
+ struct nv30_draw_stage *nv30draw = CALLOC_STRUCT(nv30_draw_stage);
+
+ nv30draw->nv30 = nv30;
+ nv30draw->draw.draw = nv30->draw;
+ nv30draw->draw.point = nv30_draw_point;
+ nv30draw->draw.line = nv30_draw_line;
+ nv30draw->draw.tri = nv30_draw_tri;
+ nv30draw->draw.flush = nv30_draw_flush;
+ nv30draw->draw.reset_stipple_counter = nv30_draw_reset_stipple_counter;
+ nv30draw->draw.destroy = nv30_draw_destroy;
+
+ return &nv30draw->draw;
+}
+
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
new file mode 100644
index 0000000000..320ba3f4bf
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -0,0 +1,911 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv30_context.h"
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 1
+#define MASK_Y 2
+#define MASK_Z 4
+#define MASK_W 8
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X
+#define DEF_CTEST NV30_FP_OP_COND_TR
+#include "nv30_shader.h"
+
+#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv30_sr_neg((s))
+#define abs(s) nv30_sr_abs((s))
+#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v)
+
+#define MAX_CONSTS 128
+#define MAX_IMM 32
+struct nv30_fpc {
+ struct nv30_fragment_program *fp;
+
+ uint attrib_map[PIPE_MAX_SHADER_INPUTS];
+
+ int high_temp;
+ int temp_temp_count;
+ int num_regs;
+
+ uint depth_id;
+ uint colour_id;
+
+ unsigned inst_offset;
+
+ struct {
+ int pipe;
+ float vals[4];
+ } consts[MAX_CONSTS];
+ int nr_consts;
+
+ struct nv30_sreg imm[MAX_IMM];
+ unsigned nr_imm;
+};
+
+static INLINE struct nv30_sreg
+temp(struct nv30_fpc *fpc)
+{
+ int idx;
+
+ idx = fpc->temp_temp_count++;
+ idx += fpc->high_temp + 1;
+ return nv30_sr(NV30SR_TEMP, idx);
+}
+
+static INLINE struct nv30_sreg
+constant(struct nv30_fpc *fpc, int pipe, float vals[4])
+{
+ int idx;
+
+ if (fpc->nr_consts == MAX_CONSTS)
+ assert(0);
+ idx = fpc->nr_consts++;
+
+ fpc->consts[idx].pipe = pipe;
+ if (pipe == -1)
+ memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
+ return nv30_sr(NV30SR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \
+ (d), (m), (s0), (s1), (s2))
+#define tex(cc,s,o,u,d,m,s0,s1,s2) \
+ nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \
+ (d), (m), (s0), none, none)
+
+static void
+grow_insns(struct nv30_fpc *fpc, int size)
+{
+ struct nv30_fragment_program *fp = fpc->fp;
+
+ fp->insn_len += size;
+ fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
+}
+
+static void
+emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src)
+{
+ struct nv30_fragment_program *fp = fpc->fp;
+ uint32_t *hw = &fp->insn[fpc->inst_offset];
+ uint32_t sr = 0;
+
+ switch (src.type) {
+ case NV30SR_INPUT:
+ sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
+ hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT);
+ break;
+ case NV30SR_OUTPUT:
+ sr |= NV30_FP_REG_SRC_HALF;
+ /* fall-through */
+ case NV30SR_TEMP:
+ sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT);
+ sr |= (src.index << NV30_FP_REG_SRC_SHIFT);
+ break;
+ case NV30SR_CONST:
+ grow_insns(fpc, 4);
+ hw = &fp->insn[fpc->inst_offset];
+ if (fpc->consts[src.index].pipe >= 0) {
+ struct nv30_fragment_program_data *fpd;
+
+ fp->consts = realloc(fp->consts, ++fp->nr_consts *
+ sizeof(*fpd));
+ fpd = &fp->consts[fp->nr_consts - 1];
+ fpd->offset = fpc->inst_offset + 4;
+ fpd->index = fpc->consts[src.index].pipe;
+ memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
+ } else {
+ memcpy(&fp->insn[fpc->inst_offset + 4],
+ fpc->consts[src.index].vals,
+ sizeof(uint32_t) * 4);
+ }
+
+ sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);
+ break;
+ case NV30SR_NONE:
+ sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (src.negate)
+ sr |= NV30_FP_REG_NEGATE;
+
+ if (src.abs)
+ hw[1] |= (1 << (29 + pos));
+
+ sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) |
+ (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) |
+ (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) |
+ (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT));
+
+ hw[pos + 1] |= sr;
+}
+
+static void
+emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst)
+{
+ struct nv30_fragment_program *fp = fpc->fp;
+ uint32_t *hw = &fp->insn[fpc->inst_offset];
+
+ switch (dst.type) {
+ case NV30SR_TEMP:
+ if (fpc->num_regs < (dst.index + 1))
+ fpc->num_regs = dst.index + 1;
+ break;
+ case NV30SR_OUTPUT:
+ if (dst.index == 1) {
+ fp->fp_control |= 0xe;
+ } else {
+ hw[0] |= NV30_FP_OP_OUT_REG_HALF;
+ }
+ break;
+ case NV30SR_NONE:
+ hw[0] |= (1 << 30);
+ break;
+ default:
+ assert(0);
+ }
+
+ hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT);
+}
+
+static void
+nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op,
+ struct nv30_sreg dst, int mask,
+ struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
+{
+ struct nv30_fragment_program *fp = fpc->fp;
+ uint32_t *hw;
+
+ fpc->inst_offset = fp->insn_len;
+ grow_insns(fpc, 4);
+ hw = &fp->insn[fpc->inst_offset];
+ memset(hw, 0, sizeof(uint32_t) * 4);
+
+ if (op == NV30_FP_OP_OPCODE_KIL)
+ fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
+ hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT);
+ hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT);
+ hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT);
+
+ if (sat)
+ hw[0] |= NV30_FP_OP_OUT_SAT;
+
+ if (dst.cc_update)
+ hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE;
+ hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT);
+ hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) |
+ (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) |
+ (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) |
+ (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT));
+
+ emit_dst(fpc, dst);
+ emit_src(fpc, 0, s0);
+ emit_src(fpc, 1, s1);
+ emit_src(fpc, 2, s2);
+}
+
+static void
+nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit,
+ struct nv30_sreg dst, int mask,
+ struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
+{
+ struct nv30_fragment_program *fp = fpc->fp;
+
+ nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
+
+ fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT);
+ fp->samplers |= (1 << unit);
+}
+
+static INLINE struct nv30_sreg
+tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
+{
+ struct nv30_sreg src;
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ src = nv30_sr(NV30SR_INPUT,
+ fpc->attrib_map[fsrc->SrcRegister.Index]);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(fpc, fsrc->SrcRegister.Index, NULL);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ assert(fsrc->SrcRegister.Index < fpc->nr_imm);
+ src = fpc->imm[fsrc->SrcRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1);
+ if (fpc->high_temp < src.index)
+ fpc->high_temp = src.index;
+ break;
+ /* This is clearly insane, but gallium hands us shaders like this.
+ * Luckily fragprog results are just temp regs..
+ */
+ case TGSI_FILE_OUTPUT:
+ if (fsrc->SrcRegister.Index == fpc->colour_id)
+ return nv30_sr(NV30SR_OUTPUT, 0);
+ else
+ return nv30_sr(NV30SR_OUTPUT, 1);
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->SrcRegisterExtMod.Absolute;
+ src.negate = fsrc->SrcRegister.Negate;
+ src.swz[0] = fsrc->SrcRegister.SwizzleX;
+ src.swz[1] = fsrc->SrcRegister.SwizzleY;
+ src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+ src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ return src;
+}
+
+static INLINE struct nv30_sreg
+tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
+ int idx;
+
+ switch (fdst->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ if (fdst->DstRegister.Index == fpc->colour_id)
+ return nv30_sr(NV30SR_OUTPUT, 0);
+ else
+ return nv30_sr(NV30SR_OUTPUT, 1);
+ break;
+ case TGSI_FILE_TEMPORARY:
+ idx = fdst->DstRegister.Index + 1;
+ if (fpc->high_temp < idx)
+ fpc->high_temp = idx;
+ return nv30_sr(NV30SR_TEMP, idx);
+ case TGSI_FILE_NULL:
+ return nv30_sr(NV30SR_NONE, 0);
+ default:
+ NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File);
+ return nv30_sr(NV30SR_NONE, 0);
+ }
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+ return mask;
+}
+
+static boolean
+src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
+ struct nv30_sreg *src)
+{
+ const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
+ struct nv30_sreg tgsi = tgsi_src(fpc, fsrc);
+ uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
+ uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
+ fsrc->SrcRegisterExtSwz.NegateY,
+ fsrc->SrcRegisterExtSwz.NegateZ,
+ fsrc->SrcRegisterExtSwz.NegateW };
+ uint c;
+
+ for (c = 0; c < 4; c++) {
+ switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ mask |= (1 << c);
+ break;
+ case TGSI_EXTSWIZZLE_ZERO:
+ zero_mask |= (1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ case TGSI_EXTSWIZZLE_ONE:
+ one_mask |= (1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (!tgsi.negate && neg[c])
+ neg_mask |= (1 << c);
+ }
+
+ if (mask == MASK_ALL && !neg_mask)
+ return TRUE;
+
+ *src = temp(fpc);
+
+ if (mask)
+ arith(fpc, 0, MOV, *src, mask, tgsi, none, none);
+
+ if (zero_mask)
+ arith(fpc, 0, SFL, *src, zero_mask, *src, none, none);
+
+ if (one_mask)
+ arith(fpc, 0, STR, *src, one_mask, *src, none, none);
+
+ if (neg_mask) {
+ struct nv30_sreg one = temp(fpc);
+ arith(fpc, 0, STR, one, neg_mask, one, none, none);
+ arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none);
+ }
+
+ return FALSE;
+}
+
+static boolean
+nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
+ const struct tgsi_full_instruction *finst)
+{
+ const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
+ struct nv30_sreg src[3], dst, tmp;
+ int mask, sat, unit = 0;
+ int ai = -1, ci = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ fpc->temp_temp_count = 0;
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(fpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_TEMPORARY:
+ if (!src_native_swz(fpc, fsrc, &src[i]))
+ continue;
+ break;
+ default:
+ break;
+ }
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+ ai = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ NOUVEAU_MSG("extra src attr %d\n",
+ fsrc->SrcRegister.Index);
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_IMMEDIATE:
+ if (ci == -1 || ci == fsrc->SrcRegister.Index) {
+ ci = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ case TGSI_FILE_SAMPLER:
+ unit = fsrc->SrcRegister.Index;
+ break;
+ case TGSI_FILE_OUTPUT:
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]);
+ mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+ sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_CMP:
+ tmp = temp(fpc);
+ arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+ tmp.cc_update = 1;
+ arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+ dst.cc_test = NV30_VP_INST_COND_LT;
+ arith(fpc, sat, MOV, dst, mask, src[1], none, none);
+ break;
+ case TGSI_OPCODE_COS:
+ arith(fpc, sat, COS, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ tmp = temp(fpc);
+ arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none);
+ arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
+ swz(src[1], W, W, W, W), none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(fpc, sat, EX2, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(fpc, sat, FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(fpc, sat, FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_KILP:
+ arith(fpc, 0, KIL, none, 0, none, none, none);
+ break;
+ case TGSI_OPCODE_KIL:
+ dst = nv30_sr(NV30SR_NONE, 0);
+ dst.cc_update = 1;
+ arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
+ dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT;
+ arith(fpc, 0, KIL, dst, 0, none, none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(fpc, sat, LG2, dst, mask, src[0], none, none);
+ break;
+// case TGSI_OPCODE_LIT:
+ case TGSI_OPCODE_LRP:
+ arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(fpc, sat, MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_NOISE1:
+ case TGSI_OPCODE_NOISE2:
+ case TGSI_OPCODE_NOISE3:
+ case TGSI_OPCODE_NOISE4:
+ arith(fpc, sat, SFL, dst, mask, none, none, none);
+ break;
+ case TGSI_OPCODE_POW:
+ arith(fpc, sat, POW, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(fpc, sat, RCP, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_RET:
+ assert(0);
+ break;
+ case TGSI_OPCODE_RFL:
+ arith(fpc, 0, RFL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_RSQ:
+ arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
+ break;
+ case TGSI_OPCODE_SCS:
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ break;
+ case TGSI_OPCODE_SIN:
+ arith(fpc, sat, SIN, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGT:
+ arith(fpc, sat, SGT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
+ break;
+ case TGSI_OPCODE_TEX:
+ tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_TXB:
+ tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_TXP:
+ tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(fpc);
+ arith(fpc, 0, MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(fpc, sat, MAD, dst, (mask & ~MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ int hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NV30_FP_OP_INPUT_SRC_POSITION;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV30_FP_OP_INPUT_SRC_COL0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV30_FP_OP_INPUT_SRC_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NV30_FP_OP_INPUT_SRC_FOGC;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.SemanticIndex <= 7) {
+ hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic.
+ SemanticIndex);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad input semantic\n");
+ return FALSE;
+ }
+
+ fpc->attrib_map[fdec->DeclarationRange.First] = hw;
+ return TRUE;
+}
+
+static boolean
+nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ fpc->depth_id = fdec->DeclarationRange.First;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ fpc->colour_id = fdec->DeclarationRange.First;
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv30_fragprog_prepare(struct nv30_fpc *fpc)
+{
+ struct tgsi_parse_context p;
+ /*int high_temp = -1, i;*/
+
+ tgsi_parse_init(&p, fpc->fp->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch(tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+ fdec = &p.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_INPUT:
+ if (!nv30_fragprog_parse_decl_attrib(fpc, fdec))
+ goto out_err;
+ break;
+ case TGSI_FILE_OUTPUT:
+ if (!nv30_fragprog_parse_decl_output(fpc, fdec))
+ goto out_err;
+ break;
+ /*case TGSI_FILE_TEMPORARY:
+ if (fdec->DeclarationRange.Last > high_temp) {
+ high_temp =
+ fdec->DeclarationRange.Last;
+ }
+ break;*/
+ default:
+ break;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ struct tgsi_full_immediate *imm;
+ float vals[4];
+
+ imm = &p.FullToken.FullImmediate;
+ assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+ assert(fpc->nr_imm < MAX_IMM);
+
+ vals[0] = imm->u.ImmediateFloat32[0].Float;
+ vals[1] = imm->u.ImmediateFloat32[1].Float;
+ vals[2] = imm->u.ImmediateFloat32[2].Float;
+ vals[3] = imm->u.ImmediateFloat32[3].Float;
+ fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ tgsi_parse_free(&p);
+
+ /*if (++high_temp) {
+ fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg));
+ for (i = 0; i < high_temp; i++)
+ fpc->r_temp[i] = temp(fpc);
+ fpc->r_temps_discard = 0;
+ }*/
+
+ return TRUE;
+
+out_err:
+ /*if (fpc->r_temp)
+ FREE(fpc->r_temp);*/
+ tgsi_parse_free(&p);
+ return FALSE;
+}
+
+static void
+nv30_fragprog_translate(struct nv30_context *nv30,
+ struct nv30_fragment_program *fp)
+{
+ struct tgsi_parse_context parse;
+ struct nv30_fpc *fpc = NULL;
+
+ tgsi_dump(fp->pipe.tokens,0);
+
+ fpc = CALLOC(1, sizeof(struct nv30_fpc));
+ if (!fpc)
+ return;
+ fpc->fp = fp;
+ fpc->high_temp = -1;
+ fpc->num_regs = 2;
+
+ if (!nv30_fragprog_prepare(fpc)) {
+ FREE(fpc);
+ return;
+ }
+
+ tgsi_parse_init(&parse, fp->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+
+ finst = &parse.FullToken.FullInstruction;
+ if (!nv30_fragprog_parse_instruction(fpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ fp->fp_control |= (fpc->num_regs-1)/2;
+ fp->fp_reg_control = (1<<16)|0x4;
+
+ /* Terminate final instruction */
+ fp->insn[fpc->inst_offset] |= 0x00000001;
+
+ /* Append NOP + END instruction, may or may not be necessary. */
+ fpc->inst_offset = fp->insn_len;
+ grow_insns(fpc, 4);
+ fp->insn[fpc->inst_offset + 0] = 0x00000001;
+ fp->insn[fpc->inst_offset + 1] = 0x00000000;
+ fp->insn[fpc->inst_offset + 2] = 0x00000000;
+ fp->insn[fpc->inst_offset + 3] = 0x00000000;
+
+ fp->translated = TRUE;
+ fp->on_hw = FALSE;
+out_err:
+ tgsi_parse_free(&parse);
+ FREE(fpc);
+}
+
+static void
+nv30_fragprog_upload(struct nv30_context *nv30,
+ struct nv30_fragment_program *fp)
+{
+ struct pipe_winsys *ws = nv30->pipe.winsys;
+ const uint32_t le = 1;
+ uint32_t *map;
+ int i;
+
+ map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
+
+#if 0
+ for (i = 0; i < fp->insn_len; i++) {
+ fflush(stdout); fflush(stderr);
+ NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]);
+ fflush(stdout); fflush(stderr);
+ }
+#endif
+
+ if ((*(const uint8_t *)&le)) {
+ for (i = 0; i < fp->insn_len; i++) {
+ map[i] = fp->insn[i];
+ }
+ } else {
+ /* Weird swapping for big-endian chips */
+ for (i = 0; i < fp->insn_len; i++) {
+ map[i] = ((fp->insn[i] & 0xffff) << 16) |
+ ((fp->insn[i] >> 16) & 0xffff);
+ }
+ }
+
+ ws->buffer_unmap(ws, fp->buffer);
+}
+
+static boolean
+nv30_fragprog_validate(struct nv30_context *nv30)
+{
+ struct nv30_fragment_program *fp = nv30->fragprog;
+ struct pipe_buffer *constbuf =
+ nv30->constbuf[PIPE_SHADER_FRAGMENT];
+ struct pipe_winsys *ws = nv30->pipe.winsys;
+ struct nouveau_stateobj *so;
+ boolean new_consts = FALSE;
+ int i;
+
+ if (fp->translated)
+ goto update_constants;
+
+ /*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/
+ nv30_fragprog_translate(nv30, fp);
+ if (!fp->translated) {
+ /*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/
+ return FALSE;
+ }
+
+ fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4);
+ nv30_fragprog_upload(nv30, fp);
+
+ so = so_new(8, 1);
+ so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1);
+ so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ NV34TCL_FP_ACTIVE_PROGRAM_DMA0, NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
+ so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1);
+ so_data (so, fp->fp_control);
+ so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1);
+ so_data (so, fp->fp_reg_control);
+ so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1);
+ so_data (so, fp->samplers);
+ so_ref(so, &fp->so);
+
+update_constants:
+ if (fp->nr_consts) {
+ float *map;
+
+ map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ);
+ for (i = 0; i < fp->nr_consts; i++) {
+ struct nv30_fragment_program_data *fpd = &fp->consts[i];
+ uint32_t *p = &fp->insn[fpd->offset];
+ uint32_t *cb = (uint32_t *)&map[fpd->index * 4];
+
+ if (!memcmp(p, cb, 4 * sizeof(float)))
+ continue;
+ memcpy(p, cb, 4 * sizeof(float));
+ new_consts = TRUE;
+ }
+ ws->buffer_unmap(ws, constbuf);
+
+ if (new_consts)
+ nv30_fragprog_upload(nv30, fp);
+ }
+
+ if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) {
+ so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void
+nv30_fragprog_destroy(struct nv30_context *nv30,
+ struct nv30_fragment_program *fp)
+{
+ if (fp->insn_len)
+ FREE(fp->insn);
+}
+
+struct nv30_state_entry nv30_state_fragprog = {
+ .validate = nv30_fragprog_validate,
+ .dirty = {
+ .pipe = NV30_NEW_FRAGPROG,
+ .hw = NV30_STATE_FRAGPROG
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c
new file mode 100644
index 0000000000..b1d2663af3
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_fragtex.c
@@ -0,0 +1,163 @@
+#include "nv30_context.h"
+#include "nouveau/nouveau_util.h"
+
+#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \
+{ \
+ TRUE, \
+ PIPE_FORMAT_##m, \
+ NV34TCL_TX_FORMAT_FORMAT_##tf, \
+ (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \
+ NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \
+ NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \
+ NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w) \
+}
+
+struct nv30_texture_format {
+ boolean defined;
+ uint pipe;
+ int format;
+ int swizzle;
+};
+
+static struct nv30_texture_format
+nv30_texture_formats[] = {
+ _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W),
+ _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W),
+ _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W),
+ _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W),
+ _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X),
+ _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X),
+ _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X),
+ _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y),
+// _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X),
+// _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X),
+ _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W),
+ _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W),
+ _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W),
+ _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W),
+ {},
+};
+
+static struct nv30_texture_format *
+nv30_fragtex_format(uint pipe_format)
+{
+ struct nv30_texture_format *tf = nv30_texture_formats;
+ char fs[128];
+
+ while (tf->defined) {
+ if (tf->pipe == pipe_format)
+ return tf;
+ tf++;
+ }
+
+ NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format));
+ return NULL;
+}
+
+
+static struct nouveau_stateobj *
+nv30_fragtex_build(struct nv30_context *nv30, int unit)
+{
+ struct nv30_sampler_state *ps = nv30->tex_sampler[unit];
+ struct nv30_miptree *nv30mt = nv30->tex_miptree[unit];
+ struct pipe_texture *pt = &nv30mt->base;
+ struct nv30_texture_format *tf;
+ struct nouveau_stateobj *so;
+ uint32_t txf, txs , txp;
+ unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+ tf = nv30_fragtex_format(pt->format);
+ if (!tf)
+ assert(0);
+
+ txf = tf->format;
+ txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0);
+ txf |= log2i(pt->width[0]) << 20;
+ txf |= log2i(pt->height[0]) << 24;
+ txf |= log2i(pt->depth[0]) << 28;
+ txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ txf |= NV34TCL_TX_FORMAT_CUBIC;
+ /* fall-through */
+ case PIPE_TEXTURE_2D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_2D;
+ break;
+ case PIPE_TEXTURE_3D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_3D;
+ break;
+ case PIPE_TEXTURE_1D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_1D;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return NULL;
+ }
+
+ if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ txp = 0;
+ } else {
+ txp = nv30mt->level[0].pitch;
+ txf |= (1<<13) /*FIXME: NV34TCL_TX_FORMAT_LINEAR ? */;
+ }
+
+ txs = tf->swizzle;
+
+ so = so_new(16, 2);
+ so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8);
+ so_reloc (so, nv30mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (so, nv30mt->buffer, txf, tex_flags | NOUVEAU_BO_OR,
+ NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1);
+ so_data (so, ps->wrap);
+ so_data (so, NV34TCL_TX_ENABLE_ENABLE | ps->en);
+ so_data (so, txs);
+ so_data (so, ps->filt | 0x2000 /*voodoo*/);
+ so_data (so, (pt->width[0] << NV34TCL_TX_NPOT_SIZE_W_SHIFT) |
+ pt->height[0]);
+ so_data (so, ps->bcol);
+
+ return so;
+}
+
+static boolean
+nv30_fragtex_validate(struct nv30_context *nv30)
+{
+ struct nv30_fragment_program *fp = nv30->fragprog;
+ struct nv30_state *state = &nv30->state;
+ struct nouveau_stateobj *so;
+ unsigned samplers, unit;
+
+ samplers = state->fp_samplers & ~fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ so = so_new(2, 0);
+ so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1);
+ so_data (so, 0);
+ so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]);
+ state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit));
+ }
+
+ samplers = nv30->dirty_samplers & fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ so = nv30_fragtex_build(nv30, unit);
+ so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]);
+ state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit));
+ }
+
+ nv30->state.fp_samplers = fp->samplers;
+ return FALSE;
+}
+
+struct nv30_state_entry nv30_state_fragtex = {
+ .validate = nv30_fragtex_validate,
+ .dirty = {
+ .pipe = NV30_NEW_SAMPLER | NV30_NEW_FRAGPROG,
+ .hw = 0
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
new file mode 100644
index 0000000000..54fb3585f8
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -0,0 +1,195 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv30_context.h"
+
+static void
+nv30_miptree_layout(struct nv30_miptree *nv30mt)
+{
+ struct pipe_texture *pt = &nv30mt->base;
+ boolean swizzled = FALSE;
+ uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
+ uint offset = 0;
+ int nr_faces, l, f, pitch;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ nr_faces = 6;
+ } else
+ if (pt->target == PIPE_TEXTURE_3D) {
+ nr_faces = pt->depth[0];
+ } else {
+ nr_faces = 1;
+ }
+
+ pitch = pt->width[0];
+ for (l = 0; l <= pt->last_level; l++) {
+ pt->width[l] = width;
+ pt->height[l] = height;
+ pt->depth[l] = depth;
+ pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+
+ if (swizzled)
+ pitch = pt->nblocksx[l];
+ pitch = align(pitch, 64);
+
+ nv30mt->level[l].pitch = pitch * pt->block.size;
+ nv30mt->level[l].image_offset =
+ CALLOC(nr_faces, sizeof(unsigned));
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+ depth = MAX2(1, depth >> 1);
+ }
+
+ for (f = 0; f < nr_faces; f++) {
+ for (l = 0; l <= pt->last_level; l++) {
+ nv30mt->level[l].image_offset[f] = offset;
+ offset += nv30mt->level[l].pitch * pt->height[l];
+ }
+ }
+
+ nv30mt->total_size = offset;
+}
+
+static struct pipe_texture *
+nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
+{
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv30_miptree *mt;
+
+ mt = MALLOC(sizeof(struct nv30_miptree));
+ if (!mt)
+ return NULL;
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = pscreen;
+ mt->shadow_tex = NULL;
+ mt->shadow_surface = NULL;
+
+ /* Swizzled textures must be POT */
+ if (pt->width[0] & (pt->width[0] - 1) ||
+ pt->height[0] & (pt->height[0] - 1))
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else
+ if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY |
+ PIPE_TEXTURE_USAGE_DISPLAY_TARGET))
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else {
+ switch (pt->format) {
+ /* TODO: Figure out which formats can be swizzled */
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ case PIPE_FORMAT_R16_SNORM:
+ break;
+ default:
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ }
+ }
+
+ nv30_miptree_layout(mt);
+
+ mt->buffer = ws->buffer_create(ws, 256,
+ PIPE_BUFFER_USAGE_PIXEL |
+ NOUVEAU_BUFFER_USAGE_TEXTURE,
+ mt->total_size);
+ if (!mt->buffer) {
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static void
+nv30_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt)
+{
+ struct pipe_texture *pt = *ppt;
+ struct nv30_miptree *mt = (struct nv30_miptree *)pt;
+ int l;
+
+ *ppt = NULL;
+ if (--pt->refcount)
+ return;
+
+ pipe_buffer_reference(pscreen, &mt->buffer, NULL);
+ for (l = 0; l <= pt->last_level; l++) {
+ if (mt->level[l].image_offset)
+ FREE(mt->level[l].image_offset);
+ }
+
+ if (mt->shadow_tex) {
+ assert(mt->shadow_surface);
+ pscreen->tex_surface_release(pscreen, &mt->shadow_surface);
+ nv30_miptree_release(pscreen, &mt->shadow_tex);
+ }
+
+ FREE(mt);
+}
+
+static struct pipe_surface *
+nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt;
+ struct pipe_surface *ps;
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (!ps)
+ return NULL;
+ pipe_texture_reference(&ps->texture, pt);
+ pipe_buffer_reference(pscreen, &ps->buffer, nv30mt->buffer);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = nv30mt->level[level].pitch;
+ ps->usage = flags;
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+ ps->refcount = 1;
+ ps->face = face;
+ ps->level = level;
+ ps->zslice = zslice;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ ps->offset = nv30mt->level[level].image_offset[face];
+ } else
+ if (pt->target == PIPE_TEXTURE_3D) {
+ ps->offset = nv30mt->level[level].image_offset[zslice];
+ } else {
+ ps->offset = nv30mt->level[level].image_offset[0];
+ }
+
+ return ps;
+}
+
+static void
+nv30_miptree_surface_del(struct pipe_screen *pscreen,
+ struct pipe_surface **psurface)
+{
+ struct pipe_surface *ps = *psurface;
+
+ *psurface = NULL;
+ if (--ps->refcount > 0)
+ return;
+
+ pipe_texture_reference(&ps->texture, NULL);
+ pipe_buffer_reference(pscreen->winsys, &ps->buffer, NULL);
+ FREE(ps);
+}
+
+void
+nv30_screen_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv30_miptree_create;
+ pscreen->texture_release = nv30_miptree_release;
+ pscreen->get_tex_surface = nv30_miptree_surface_new;
+ pscreen->tex_surface_release = nv30_miptree_surface_del;
+}
diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c
new file mode 100644
index 0000000000..2f974cf5c4
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_query.c
@@ -0,0 +1,122 @@
+#include "pipe/p_context.h"
+
+#include "nv30_context.h"
+
+struct nv30_query {
+ struct nouveau_resource *object;
+ unsigned type;
+ boolean ready;
+ uint64_t result;
+};
+
+static INLINE struct nv30_query *
+nv30_query(struct pipe_query *pipe)
+{
+ return (struct nv30_query *)pipe;
+}
+
+static struct pipe_query *
+nv30_query_create(struct pipe_context *pipe, unsigned query_type)
+{
+ struct nv30_query *q;
+
+ q = CALLOC(1, sizeof(struct nv30_query));
+ q->type = query_type;
+
+ return (struct pipe_query *)q;
+}
+
+static void
+nv30_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_query *q = nv30_query(pq);
+
+ if (q->object)
+ nv30->nvws->res_free(&q->object);
+ FREE(q);
+}
+
+static void
+nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_query *q = nv30_query(pq);
+
+ assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+ /* Happens when end_query() is called, then another begin_query()
+ * without querying the result in-between. For now we'll wait for
+ * the existing query to notify completion, but it could be better.
+ */
+ if (q->object) {
+ uint64_t tmp;
+ pipe->get_query_result(pipe, pq, 1, &tmp);
+ }
+
+ if (nv30->nvws->res_alloc(nv30->screen->query_heap, 1, NULL, &q->object))
+ assert(0);
+ nv30->nvws->notifier_reset(nv30->screen->query, q->object->start);
+
+ BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1);
+ OUT_RING (1);
+ BEGIN_RING(rankine, NV34TCL_QUERY_UNK17CC, 1);
+ OUT_RING (1);
+
+ q->ready = FALSE;
+}
+
+static void
+nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_query *q = nv30_query(pq);
+
+ BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1);
+ OUT_RING ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) |
+ ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT));
+ FIRE_RING(NULL);
+}
+
+static boolean
+nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, uint64_t *result)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_query *q = nv30_query(pq);
+ struct nouveau_winsys *nvws = nv30->nvws;
+
+ assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+ if (!q->ready) {
+ unsigned status;
+
+ status = nvws->notifier_status(nv30->screen->query,
+ q->object->start);
+ if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) {
+ if (wait == FALSE)
+ return FALSE;
+ nvws->notifier_wait(nv30->screen->query, q->object->start,
+ NV_NOTIFY_STATE_STATUS_COMPLETED,
+ 0);
+ }
+
+ q->result = nvws->notifier_retval(nv30->screen->query,
+ q->object->start);
+ q->ready = TRUE;
+ nvws->res_free(&q->object);
+ }
+
+ *result = q->result;
+ return TRUE;
+}
+
+void
+nv30_init_query_functions(struct nv30_context *nv30)
+{
+ nv30->pipe.create_query = nv30_query_create;
+ nv30->pipe.destroy_query = nv30_query_destroy;
+ nv30->pipe.begin_query = nv30_query_begin;
+ nv30->pipe.end_query = nv30_query_end;
+ nv30->pipe.get_query_result = nv30_query_result;
+}
diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
new file mode 100644
index 0000000000..1fac6d3df8
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -0,0 +1,386 @@
+#include "pipe/p_screen.h"
+
+#include "nv30_context.h"
+#include "nv30_screen.h"
+
+#define NV30TCL_CHIPSET_3X_MASK 0x00000003
+#define NV34TCL_CHIPSET_3X_MASK 0x00000010
+#define NV35TCL_CHIPSET_3X_MASK 0x000001e0
+
+static const char *
+nv30_screen_get_name(struct pipe_screen *pscreen)
+{
+ struct nv30_screen *screen = nv30_screen(pscreen);
+ struct nouveau_device *dev = screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv30_screen_get_vendor(struct pipe_screen *pscreen)
+{
+ return "nouveau";
+}
+
+static int
+nv30_screen_get_param(struct pipe_screen *pscreen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 16;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 0;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 2;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 13;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 13;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ return 0;
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ return 1;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 0;
+ case NOUVEAU_CAP_HW_VTXBUF:
+ case NOUVEAU_CAP_HW_IDXBUF:
+ return 1;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv30_screen_get_paramf(struct pipe_screen *pscreen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 8.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 4.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static boolean
+nv30_screen_surface_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ case PIPE_FORMAT_A8L8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static void *
+nv30_surface_map(struct pipe_screen *screen, struct pipe_surface *surface,
+ unsigned flags )
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct pipe_surface *surface_to_map;
+ void *map;
+
+ if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ struct nv30_miptree *mt = (struct nv30_miptree *)surface->texture;
+
+ if (!mt->shadow_tex) {
+ unsigned old_tex_usage = surface->texture->tex_usage;
+ surface->texture->tex_usage = NOUVEAU_TEXTURE_USAGE_LINEAR |
+ PIPE_TEXTURE_USAGE_DYNAMIC;
+ mt->shadow_tex = screen->texture_create(screen, surface->texture);
+ surface->texture->tex_usage = old_tex_usage;
+
+ assert(mt->shadow_tex->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR);
+ }
+
+ mt->shadow_surface = screen->get_tex_surface
+ (
+ screen, mt->shadow_tex,
+ surface->face, surface->level, surface->zslice,
+ surface->usage
+ );
+
+ surface_to_map = mt->shadow_surface;
+ }
+ else
+ surface_to_map = surface;
+
+ assert(surface_to_map);
+
+ map = ws->buffer_map(ws, surface_to_map->buffer, flags);
+ if (!map)
+ return NULL;
+
+ return map + surface_to_map->offset;
+}
+
+static void
+nv30_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct pipe_surface *surface_to_unmap;
+
+ /* TODO: Copy from shadow just before push buffer is flushed instead.
+ There are probably some programs that map/unmap excessively
+ before rendering. */
+ if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ struct nv30_miptree *mt = (struct nv30_miptree *)surface->texture;
+
+ assert(mt->shadow_tex);
+
+ surface_to_unmap = mt->shadow_surface;
+ }
+ else
+ surface_to_unmap = surface;
+
+ assert(surface_to_unmap);
+
+ ws->buffer_unmap(ws, surface_to_unmap->buffer);
+
+ if (surface_to_unmap != surface) {
+ struct nv30_screen *nvscreen = nv30_screen(screen);
+
+ nvscreen->nvws->surface_copy(nvscreen->nvws,
+ surface, 0, 0,
+ surface_to_unmap, 0, 0,
+ surface->width, surface->height);
+ }
+}
+
+static void
+nv30_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nv30_screen *screen = nv30_screen(pscreen);
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nvws->res_free(&screen->vp_exec_heap);
+ nvws->res_free(&screen->vp_data_heap);
+ nvws->res_free(&screen->query_heap);
+ nvws->notifier_free(&screen->query);
+ nvws->notifier_free(&screen->sync);
+ nvws->grobj_free(&screen->rankine);
+
+ FREE(pscreen);
+}
+
+struct pipe_screen *
+nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen);
+ struct nouveau_stateobj *so;
+ unsigned rankine_class = 0;
+ unsigned chipset = nvws->channel->device->chipset;
+ int ret, i;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ /* 3D object */
+ switch (chipset & 0xf0) {
+ case 0x30:
+ if (NV30TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f)))
+ rankine_class = 0x0397;
+ else
+ if (NV34TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f)))
+ rankine_class = 0x0697;
+ else
+ if (NV35TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f)))
+ rankine_class = 0x0497;
+ break;
+ default:
+ break;
+ }
+
+ if (!rankine_class) {
+ NOUVEAU_ERR("Unknown nv3x chipset: nv%02x\n", chipset);
+ return NULL;
+ }
+
+ ret = nvws->grobj_alloc(nvws, rankine_class, &screen->rankine);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ return FALSE;
+ }
+
+ /* Notifier for sync purposes */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv30_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Query objects */
+ ret = nvws->notifier_alloc(nvws, 32, &screen->query);
+ if (ret) {
+ NOUVEAU_ERR("Error initialising query objects: %d\n", ret);
+ nv30_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ ret = nvws->res_init(&screen->query_heap, 0, 32);
+ if (ret) {
+ NOUVEAU_ERR("Error initialising query object heap: %d\n", ret);
+ nv30_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Vtxprog resources */
+ if (nvws->res_init(&screen->vp_exec_heap, 0, 256) ||
+ nvws->res_init(&screen->vp_data_heap, 0, 256)) {
+ nv30_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Static rankine initialisation */
+ so = so_new(128, 0);
+ so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1);
+ so_data (so, screen->sync->handle);
+ so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->gart->handle);
+ so_method(so, screen->rankine, NV34TCL_DMA_COLOR1, 1);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->rankine, NV34TCL_DMA_COLOR0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->rankine, NV34TCL_DMA_VTXBUF0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->gart->handle);
+/* so_method(so, screen->rankine, NV34TCL_DMA_FENCE, 2);
+ so_data (so, 0);
+ so_data (so, screen->query->handle);*/
+ so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY7, 1);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY8, 1);
+ so_data (so, nvws->channel->vram->handle);
+
+ for (i=1; i<8; i++) {
+ so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+ so_data (so, 0);
+ so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_VERT(i), 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, screen->rankine, 0x220, 1);
+ so_data (so, 1);
+
+ so_method(so, screen->rankine, 0x03b0, 1);
+ so_data (so, 0x00100000);
+ so_method(so, screen->rankine, 0x1454, 1);
+ so_data (so, 0);
+ so_method(so, screen->rankine, 0x1d80, 1);
+ so_data (so, 3);
+ so_method(so, screen->rankine, 0x1450, 1);
+ so_data (so, 0x00030004);
+
+ /* NEW */
+ so_method(so, screen->rankine, 0x1e98, 1);
+ so_data (so, 0);
+ so_method(so, screen->rankine, 0x17e0, 3);
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(1.0));
+ so_method(so, screen->rankine, 0x1f80, 16);
+ for (i=0; i<16; i++) {
+ so_data (so, (i==8) ? 0x0000ffff : 0);
+ }
+
+ so_method(so, screen->rankine, 0x120, 3);
+ so_data (so, 0);
+ so_data (so, 1);
+ so_data (so, 2);
+
+ so_method(so, screen->rankine, 0x1d88, 1);
+ so_data (so, 0x00001200);
+
+ so_method(so, screen->rankine, NV34TCL_RC_ENABLE, 1);
+ so_data (so, 0);
+
+ so_method(so, screen->rankine, NV34TCL_DEPTH_RANGE_NEAR, 2);
+ so_data (so, fui(0.0));
+ so_data (so, fui(1.0));
+
+ so_method(so, screen->rankine, NV34TCL_MULTISAMPLE_CONTROL, 1);
+ so_data (so, 0xffff0000);
+
+ /* enables use of vp rather than fixed-function somehow */
+ so_method(so, screen->rankine, 0x1e94, 1);
+ so_data (so, 0x13);
+
+ so_emit(nvws, so);
+ so_ref(NULL, &so);
+ nvws->push_flush(nvws, 0, NULL);
+
+ screen->pipe.winsys = ws;
+ screen->pipe.destroy = nv30_screen_destroy;
+
+ screen->pipe.get_name = nv30_screen_get_name;
+ screen->pipe.get_vendor = nv30_screen_get_vendor;
+ screen->pipe.get_param = nv30_screen_get_param;
+ screen->pipe.get_paramf = nv30_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv30_screen_surface_format_supported;
+
+ screen->pipe.surface_map = nv30_surface_map;
+ screen->pipe.surface_unmap = nv30_surface_unmap;
+
+ nv30_screen_init_miptree_functions(&screen->pipe);
+
+ return &screen->pipe;
+}
diff --git a/src/gallium/drivers/nv30/nv30_screen.h b/src/gallium/drivers/nv30/nv30_screen.h
new file mode 100644
index 0000000000..b7ddc2a959
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_screen.h
@@ -0,0 +1,35 @@
+#ifndef __NV30_SCREEN_H__
+#define __NV30_SCREEN_H__
+
+#include "pipe/p_screen.h"
+
+struct nv30_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+
+ unsigned cur_pctx;
+
+ /* HW graphics objects */
+ struct nouveau_grobj *rankine;
+ struct nouveau_notifier *sync;
+
+ /* Query object resources */
+ struct nouveau_notifier *query;
+ struct nouveau_resource *query_heap;
+
+ /* Vtxprog resources */
+ struct nouveau_resource *vp_exec_heap;
+ struct nouveau_resource *vp_data_heap;
+
+ /* Current 3D state of channel */
+ struct nouveau_stateobj *state[NV30_STATE_MAX];
+};
+
+static INLINE struct nv30_screen *
+nv30_screen(struct pipe_screen *screen)
+{
+ return (struct nv30_screen *)screen;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv30/nv30_shader.h b/src/gallium/drivers/nv30/nv30_shader.h
new file mode 100644
index 0000000000..dd3a36f78f
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_shader.h
@@ -0,0 +1,490 @@
+#ifndef __NV30_SHADER_H__
+#define __NV30_SHADER_H__
+
+/* Vertex programs instruction set
+ *
+ * 128bit opcodes, split into 4 32-bit ones for ease of use.
+ *
+ * Non-native instructions
+ * ABS - MOV + NV40_VP_INST0_DEST_ABS
+ * POW - EX2 + MUL + LG2
+ * SUB - ADD, second source negated
+ * SWZ - MOV
+ * XPD -
+ *
+ * Register access
+ * - Only one INPUT can be accessed per-instruction (move extras into TEMPs)
+ * - Only one CONST can be accessed per-instruction (move extras into TEMPs)
+ *
+ * Relative Addressing
+ * According to the value returned for
+ * MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB
+ *
+ * there are only two address registers available. The destination in the
+ * ARL instruction is set to TEMP <n> (The temp isn't actually written).
+ *
+ * When using vanilla ARB_v_p, the proprietary driver will squish both the
+ * available ADDRESS regs into the first hardware reg in the X and Y
+ * components.
+ *
+ * To use an address reg as an index into consts, the CONST_SRC is set to
+ * (const_base + offset) and INDEX_CONST is set.
+ *
+ * To access the second address reg use ADDR_REG_SELECT_1. A particular
+ * component of the address regs is selected with ADDR_SWZ.
+ *
+ * Only one address register can be accessed per instruction.
+ *
+ * Conditional execution (see NV_vertex_program{2,3} for details) Conditional
+ * execution of an instruction is enabled by setting COND_TEST_ENABLE, and
+ * selecting the condition which will allow the test to pass with
+ * COND_{FL,LT,...}. It is possible to swizzle the values in the condition
+ * register, which allows for testing against an individual component.
+ *
+ * Branching:
+ *
+ * The BRA/CAL instructions seem to follow a slightly different opcode
+ * layout. The destination instruction ID (IADDR) overlaps a source field.
+ * Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO
+ * command, and is incremented automatically on each UPLOAD_INST FIFO
+ * command.
+ *
+ * Conditional branching is achieved by using the condition tests described
+ * above. There doesn't appear to be dedicated looping instructions, but
+ * this can be done using a temp reg + conditional branching.
+ *
+ * Subroutines may be uploaded before the main program itself, but the first
+ * executed instruction is determined by the PROGRAM_START_ID FIFO command.
+ *
+ */
+
+/* DWORD 0 */
+
+#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
+#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */
+#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */
+#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */
+#define NV30_VP_INST_VEC_RESULT (1 << 20)
+#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16
+#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16)
+#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15)
+#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16)
+#define NV30_VP_INST_COND_TEST_ENABLE (1<<14)
+#define NV30_VP_INST_COND_SHIFT 11
+#define NV30_VP_INST_COND_MASK (0x07 << 11)
+# define NV30_VP_INST_COND_FL 0 /* guess */
+# define NV30_VP_INST_COND_LT 1
+# define NV30_VP_INST_COND_EQ 2
+# define NV30_VP_INST_COND_LE 3
+# define NV30_VP_INST_COND_GT 4
+# define NV30_VP_INST_COND_NE 5
+# define NV30_VP_INST_COND_GE 6
+# define NV30_VP_INST_COND_TR 7 /* guess */
+#define NV30_VP_INST_COND_SWZ_X_SHIFT 9
+#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9)
+#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7
+#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7)
+#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5
+#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5)
+#define NV30_VP_INST_COND_SWZ_W_SHIFT 3
+#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3)
+#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3
+#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3)
+#define NV30_VP_INST_ADDR_SWZ_SHIFT 1
+#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1)
+#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0
+#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0)
+
+/* DWORD 1 */
+#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28
+#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28)
+# define NV30_VP_INST_OP_NOP 0x00
+# define NV30_VP_INST_OP_RCP 0x02
+# define NV30_VP_INST_OP_RCC 0x03
+# define NV30_VP_INST_OP_RSQ 0x04
+# define NV30_VP_INST_OP_EXP 0x05
+# define NV30_VP_INST_OP_LOG 0x06
+# define NV30_VP_INST_OP_LIT 0x07
+# define NV30_VP_INST_OP_BRA 0x09
+# define NV30_VP_INST_OP_CAL 0x0B
+# define NV30_VP_INST_OP_RET 0x0C
+# define NV30_VP_INST_OP_LG2 0x0D
+# define NV30_VP_INST_OP_EX2 0x0E
+# define NV30_VP_INST_OP_SIN 0x0F
+# define NV30_VP_INST_OP_COS 0x10
+#define NV30_VP_INST_VEC_OPCODE_SHIFT 23
+#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23)
+# define NV30_VP_INST_OP_NOPV 0x00
+# define NV30_VP_INST_OP_MOV 0x01
+# define NV30_VP_INST_OP_MUL 0x02
+# define NV30_VP_INST_OP_ADD 0x03
+# define NV30_VP_INST_OP_MAD 0x04
+# define NV30_VP_INST_OP_DP3 0x05
+# define NV30_VP_INST_OP_DP4 0x07
+# define NV30_VP_INST_OP_DPH 0x06
+# define NV30_VP_INST_OP_DST 0x08
+# define NV30_VP_INST_OP_MIN 0x09
+# define NV30_VP_INST_OP_MAX 0x0A
+# define NV30_VP_INST_OP_SLT 0x0B
+# define NV30_VP_INST_OP_SGE 0x0C
+# define NV30_VP_INST_OP_ARL 0x0D
+# define NV30_VP_INST_OP_FRC 0x0E
+# define NV30_VP_INST_OP_FLR 0x0F
+# define NV30_VP_INST_OP_SEQ 0x10
+# define NV30_VP_INST_OP_SFL 0x11
+# define NV30_VP_INST_OP_SGT 0x12
+# define NV30_VP_INST_OP_SLE 0x13
+# define NV30_VP_INST_OP_SNE 0x14
+# define NV30_VP_INST_OP_STR 0x15
+# define NV30_VP_INST_OP_SSG 0x16
+# define NV30_VP_INST_OP_ARR 0x17
+# define NV30_VP_INST_OP_ARA 0x18
+#define NV30_VP_INST_CONST_SRC_SHIFT 14
+#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14)
+#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/
+#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/
+# define NV30_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */
+# define NV30_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */
+# define NV30_VP_INST_IN_NORMAL 2
+# define NV30_VP_INST_IN_COL0 3 /* Should probably confirm them all though */
+# define NV30_VP_INST_IN_COL1 4
+# define NV30_VP_INST_IN_FOGC 5
+# define NV30_VP_INST_IN_TC0 8
+# define NV30_VP_INST_IN_TC(n) (8+n)
+#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/
+#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/
+
+/* Please note: the IADDR fields overlap other fields because they are used
+ * only for branch instructions. See Branching: label above
+ *
+ * DWORD 2
+ */
+#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/
+#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */
+#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/
+#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/
+#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/
+#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/
+#define NV30_VP_INST_IADDR_SHIFT 2
+#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */
+
+/* DWORD 3 */
+#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/
+#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/
+#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24
+#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24)
+#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20
+#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20)
+#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16
+#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16)
+#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/
+#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/
+#define NV30_VP_INST_DEST_SHIFT 2
+#define NV30_VP_INST_DEST_MASK (0x0F << 2)
+# define NV30_VP_INST_DEST_POS 0
+# define NV30_VP_INST_DEST_BFC0 1
+# define NV30_VP_INST_DEST_BFC1 2
+# define NV30_VP_INST_DEST_COL0 3
+# define NV30_VP_INST_DEST_COL1 4
+# define NV30_VP_INST_DEST_FOGC 5
+# define NV30_VP_INST_DEST_PSZ 6
+# define NV30_VP_INST_DEST_TC(n) (8+n)
+
+#define NV30_VP_INST_LAST (1 << 0)
+
+/* Useful to split the source selection regs into their pieces */
+#define NV30_VP_SRC0_HIGH_SHIFT 6
+#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0
+#define NV30_VP_SRC0_LOW_MASK 0x0000003F
+#define NV30_VP_SRC2_HIGH_SHIFT 4
+#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0
+#define NV30_VP_SRC2_LOW_MASK 0x0000000F
+
+
+/* Source-register definition - matches NV20 exactly */
+#define NV30_VP_SRC_NEGATE (1<<14)
+#define NV30_VP_SRC_SWZ_X_SHIFT 12
+#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12)
+#define NV30_VP_SRC_SWZ_Y_SHIFT 10
+#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10)
+#define NV30_VP_SRC_SWZ_Z_SHIFT 8
+#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8)
+#define NV30_VP_SRC_SWZ_W_SHIFT 6
+#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6)
+#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6
+#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6)
+#define NV30_VP_SRC_TEMP_SRC_SHIFT 2
+#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0)
+#define NV30_VP_SRC_REG_TYPE_SHIFT 0
+#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0)
+#define NV30_VP_SRC_REG_TYPE_TEMP 1
+#define NV30_VP_SRC_REG_TYPE_INPUT 2
+#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */
+
+/*
+ * Each fragment program opcode appears to be comprised of 4 32-bit values.
+ *
+ * 0 - Opcode, output reg/mask, ATTRIB source
+ * 1 - Source 0
+ * 2 - Source 1
+ * 3 - Source 2
+ *
+ * There appears to be no special difference between result regs and temp regs.
+ * result.color == R0.xyzw
+ * result.depth == R1.z
+ * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0
+ * otherwise it is set to 1.
+ *
+ * Constants are inserted directly after the instruction that uses them.
+ *
+ * It appears that it's not possible to use two input registers in one
+ * instruction as the input sourcing is done in the instruction dword
+ * and not the source selection dwords. As such instructions such as:
+ *
+ * ADD result.color, fragment.color, fragment.texcoord[0];
+ *
+ * must be split into two MOV's and then an ADD (nvidia does this) but
+ * I'm not sure why it's not just one MOV and then source the second input
+ * in the ADD instruction..
+ *
+ * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
+ * negation requires multiplication with a const.
+ *
+ * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE
+ * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO
+ * is implemented simply by not writing to the relevant components of the destination.
+ *
+ * Conditional execution
+ * TODO
+ *
+ * Non-native instructions:
+ * LIT
+ * LRP - MAD+MAD
+ * SUB - ADD, negate second source
+ * RSQ - LG2 + EX2
+ * POW - LG2 + MUL + EX2
+ * SCS - COS + SIN
+ * XPD
+ */
+
+//== Opcode / Destination selection ==
+#define NV30_FP_OP_PROGRAM_END (1 << 0)
+#define NV30_FP_OP_OUT_REG_SHIFT 1
+#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */
+/* Needs to be set when writing outputs to get expected result.. */
+#define NV30_FP_OP_OUT_REG_HALF (1 << 7)
+#define NV30_FP_OP_COND_WRITE_ENABLE (1 << 8)
+#define NV30_FP_OP_OUTMASK_SHIFT 9
+#define NV30_FP_OP_OUTMASK_MASK (0xF << 9)
+# define NV30_FP_OP_OUT_X (1<<9)
+# define NV30_FP_OP_OUT_Y (1<<10)
+# define NV30_FP_OP_OUT_Z (1<<11)
+# define NV30_FP_OP_OUT_W (1<<12)
+/* Uncertain about these, especially the input_src values.. it's possible that
+ * they can be dynamically changed.
+ */
+#define NV30_FP_OP_INPUT_SRC_SHIFT 13
+#define NV30_FP_OP_INPUT_SRC_MASK (15 << 13)
+# define NV30_FP_OP_INPUT_SRC_POSITION 0x0
+# define NV30_FP_OP_INPUT_SRC_COL0 0x1
+# define NV30_FP_OP_INPUT_SRC_COL1 0x2
+# define NV30_FP_OP_INPUT_SRC_FOGC 0x3
+# define NV30_FP_OP_INPUT_SRC_TC0 0x4
+# define NV30_FP_OP_INPUT_SRC_TC(n) (0x4 + n)
+#define NV30_FP_OP_TEX_UNIT_SHIFT 17
+#define NV30_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */
+#define NV30_FP_OP_PRECISION_SHIFT 22
+#define NV30_FP_OP_PRECISION_MASK (3 << 22)
+# define NV30_FP_PRECISION_FP32 0
+# define NV30_FP_PRECISION_FP16 1
+# define NV30_FP_PRECISION_FX12 2
+#define NV30_FP_OP_OPCODE_SHIFT 24
+#define NV30_FP_OP_OPCODE_MASK (0x3F << 24)
+# define NV30_FP_OP_OPCODE_NOP 0x00
+# define NV30_FP_OP_OPCODE_MOV 0x01
+# define NV30_FP_OP_OPCODE_MUL 0x02
+# define NV30_FP_OP_OPCODE_ADD 0x03
+# define NV30_FP_OP_OPCODE_MAD 0x04
+# define NV30_FP_OP_OPCODE_DP3 0x05
+# define NV30_FP_OP_OPCODE_DP4 0x06
+# define NV30_FP_OP_OPCODE_DST 0x07
+# define NV30_FP_OP_OPCODE_MIN 0x08
+# define NV30_FP_OP_OPCODE_MAX 0x09
+# define NV30_FP_OP_OPCODE_SLT 0x0A
+# define NV30_FP_OP_OPCODE_SGE 0x0B
+# define NV30_FP_OP_OPCODE_SLE 0x0C
+# define NV30_FP_OP_OPCODE_SGT 0x0D
+# define NV30_FP_OP_OPCODE_SNE 0x0E
+# define NV30_FP_OP_OPCODE_SEQ 0x0F
+# define NV30_FP_OP_OPCODE_FRC 0x10
+# define NV30_FP_OP_OPCODE_FLR 0x11
+# define NV30_FP_OP_OPCODE_KIL 0x12
+# define NV30_FP_OP_OPCODE_PK4B 0x13
+# define NV30_FP_OP_OPCODE_UP4B 0x14
+# define NV30_FP_OP_OPCODE_DDX 0x15 /* can only write XY */
+# define NV30_FP_OP_OPCODE_DDY 0x16 /* can only write XY */
+# define NV30_FP_OP_OPCODE_TEX 0x17
+# define NV30_FP_OP_OPCODE_TXP 0x18
+# define NV30_FP_OP_OPCODE_TXD 0x19
+# define NV30_FP_OP_OPCODE_RCP 0x1A
+# define NV30_FP_OP_OPCODE_RSQ 0x1B
+# define NV30_FP_OP_OPCODE_EX2 0x1C
+# define NV30_FP_OP_OPCODE_LG2 0x1D
+# define NV30_FP_OP_OPCODE_LIT 0x1E
+# define NV30_FP_OP_OPCODE_LRP 0x1F
+# define NV30_FP_OP_OPCODE_STR 0x20
+# define NV30_FP_OP_OPCODE_SFL 0x21
+# define NV30_FP_OP_OPCODE_COS 0x22
+# define NV30_FP_OP_OPCODE_SIN 0x23
+# define NV30_FP_OP_OPCODE_PK2H 0x24
+# define NV30_FP_OP_OPCODE_UP2H 0x25
+# define NV30_FP_OP_OPCODE_POW 0x26
+# define NV30_FP_OP_OPCODE_PK4UB 0x27
+# define NV30_FP_OP_OPCODE_UP4UB 0x28
+# define NV30_FP_OP_OPCODE_PK2US 0x29
+# define NV30_FP_OP_OPCODE_UP2US 0x2A
+# define NV30_FP_OP_OPCODE_DP2A 0x2E
+# define NV30_FP_OP_OPCODE_TXB 0x31
+# define NV30_FP_OP_OPCODE_RFL 0x36
+# define NV30_FP_OP_OPCODE_DIV 0x3A
+#define NV30_FP_OP_OUT_SAT (1 << 31)
+
+/* high order bits of SRC0 */
+#define NV30_FP_OP_OUT_ABS (1 << 29)
+#define NV30_FP_OP_COND_SWZ_W_SHIFT 27
+#define NV30_FP_OP_COND_SWZ_W_MASK (3 << 27)
+#define NV30_FP_OP_COND_SWZ_Z_SHIFT 25
+#define NV30_FP_OP_COND_SWZ_Z_MASK (3 << 25)
+#define NV30_FP_OP_COND_SWZ_Y_SHIFT 23
+#define NV30_FP_OP_COND_SWZ_Y_MASK (3 << 23)
+#define NV30_FP_OP_COND_SWZ_X_SHIFT 21
+#define NV30_FP_OP_COND_SWZ_X_MASK (3 << 21)
+#define NV30_FP_OP_COND_SWZ_ALL_SHIFT 21
+#define NV30_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21)
+#define NV30_FP_OP_COND_SHIFT 18
+#define NV30_FP_OP_COND_MASK (0x07 << 18)
+# define NV30_FP_OP_COND_FL 0
+# define NV30_FP_OP_COND_LT 1
+# define NV30_FP_OP_COND_EQ 2
+# define NV30_FP_OP_COND_LE 3
+# define NV30_FP_OP_COND_GT 4
+# define NV30_FP_OP_COND_NE 5
+# define NV30_FP_OP_COND_GE 6
+# define NV30_FP_OP_COND_TR 7
+
+/* high order bits of SRC1 */
+#define NV30_FP_OP_DST_SCALE_SHIFT 28
+#define NV30_FP_OP_DST_SCALE_MASK (3 << 28)
+#define NV30_FP_OP_DST_SCALE_1X 0
+#define NV30_FP_OP_DST_SCALE_2X 1
+#define NV30_FP_OP_DST_SCALE_4X 2
+#define NV30_FP_OP_DST_SCALE_8X 3
+#define NV30_FP_OP_DST_SCALE_INV_2X 5
+#define NV30_FP_OP_DST_SCALE_INV_4X 6
+#define NV30_FP_OP_DST_SCALE_INV_8X 7
+
+
+/* high order bits of SRC2 */
+#define NV30_FP_OP_INDEX_INPUT (1 << 30)
+
+//== Register selection ==
+#define NV30_FP_REG_TYPE_SHIFT 0
+#define NV30_FP_REG_TYPE_MASK (3 << 0)
+# define NV30_FP_REG_TYPE_TEMP 0
+# define NV30_FP_REG_TYPE_INPUT 1
+# define NV30_FP_REG_TYPE_CONST 2
+#define NV30_FP_REG_SRC_SHIFT 2 /* uncertain */
+#define NV30_FP_REG_SRC_MASK (31 << 2)
+#define NV30_FP_REG_SRC_HALF (1 << 8)
+#define NV30_FP_REG_SWZ_ALL_SHIFT 9
+#define NV30_FP_REG_SWZ_ALL_MASK (255 << 9)
+#define NV30_FP_REG_SWZ_X_SHIFT 9
+#define NV30_FP_REG_SWZ_X_MASK (3 << 9)
+#define NV30_FP_REG_SWZ_Y_SHIFT 11
+#define NV30_FP_REG_SWZ_Y_MASK (3 << 11)
+#define NV30_FP_REG_SWZ_Z_SHIFT 13
+#define NV30_FP_REG_SWZ_Z_MASK (3 << 13)
+#define NV30_FP_REG_SWZ_W_SHIFT 15
+#define NV30_FP_REG_SWZ_W_MASK (3 << 15)
+# define NV30_FP_SWIZZLE_X 0
+# define NV30_FP_SWIZZLE_Y 1
+# define NV30_FP_SWIZZLE_Z 2
+# define NV30_FP_SWIZZLE_W 3
+#define NV30_FP_REG_NEGATE (1 << 17)
+
+#define NV30SR_NONE 0
+#define NV30SR_OUTPUT 1
+#define NV30SR_INPUT 2
+#define NV30SR_TEMP 3
+#define NV30SR_CONST 4
+
+struct nv30_sreg {
+ int type;
+ int index;
+
+ int dst_scale;
+
+ int negate;
+ int abs;
+ int swz[4];
+
+ int cc_update;
+ int cc_update_reg;
+ int cc_test;
+ int cc_test_reg;
+ int cc_swz[4];
+};
+
+static INLINE struct nv30_sreg
+nv30_sr(int type, int index)
+{
+ struct nv30_sreg temp = {
+ .type = type,
+ .index = index,
+ .dst_scale = DEF_SCALE,
+ .abs = 0,
+ .negate = 0,
+ .swz = { 0, 1, 2, 3 },
+ .cc_update = 0,
+ .cc_update_reg = 0,
+ .cc_test = DEF_CTEST,
+ .cc_test_reg = 0,
+ .cc_swz = { 0, 1, 2, 3 },
+ };
+ return temp;
+}
+
+static INLINE struct nv30_sreg
+nv30_sr_swz(struct nv30_sreg src, int x, int y, int z, int w)
+{
+ struct nv30_sreg dst = src;
+
+ dst.swz[SWZ_X] = src.swz[x];
+ dst.swz[SWZ_Y] = src.swz[y];
+ dst.swz[SWZ_Z] = src.swz[z];
+ dst.swz[SWZ_W] = src.swz[w];
+ return dst;
+}
+
+static INLINE struct nv30_sreg
+nv30_sr_neg(struct nv30_sreg src)
+{
+ src.negate = !src.negate;
+ return src;
+}
+
+static INLINE struct nv30_sreg
+nv30_sr_abs(struct nv30_sreg src)
+{
+ src.abs = 1;
+ return src;
+}
+
+static INLINE struct nv30_sreg
+nv30_sr_scale(struct nv30_sreg src, int scale)
+{
+ src.dst_scale = scale;
+ return src;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c
new file mode 100644
index 0000000000..47e1a625af
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state.c
@@ -0,0 +1,725 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv30_context.h"
+#include "nv30_state.h"
+
+static void *
+nv30_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+ struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso));
+ struct nouveau_stateobj *so = so_new(16, 0);
+
+ if (cso->blend_enable) {
+ so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3);
+ so_data (so, 1);
+ so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) |
+ nvgl_blend_func(cso->rgb_src_factor));
+ so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 |
+ nvgl_blend_func(cso->rgb_dst_factor));
+ /* FIXME: Gallium assumes GL_EXT_blend_func_separate.
+ It is not the case for NV30 */
+ so_method(so, rankine, NV34TCL_BLEND_EQUATION, 1);
+ so_data (so, nvgl_blend_eqn(cso->rgb_func));
+ } else {
+ so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, rankine, NV34TCL_COLOR_MASK, 1);
+ so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) |
+ ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) |
+ ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) |
+ ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0)));
+
+ if (cso->logicop_enable) {
+ so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2);
+ so_data (so, 1);
+ so_data (so, nvgl_logicop_func(cso->logicop_func));
+ } else {
+ so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, rankine, NV34TCL_DITHER_ENABLE, 1);
+ so_data (so, cso->dither ? 1 : 0);
+
+ so_ref(so, &bso->so);
+ bso->pipe = *cso;
+ return (void *)bso;
+}
+
+static void
+nv30_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->blend = hwcso;
+ nv30->dirty |= NV30_NEW_BLEND;
+}
+
+static void
+nv30_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_blend_state *bso = hwcso;
+
+ so_ref(NULL, &bso->so);
+ FREE(bso);
+}
+
+
+static INLINE unsigned
+wrap_mode(unsigned wrap) {
+ unsigned ret;
+
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ ret = NV34TCL_TX_WRAP_S_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_CLAMP:
+ ret = NV34TCL_TX_WRAP_S_CLAMP;
+ break;
+/* case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP;
+ break;*/
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ ret = NV34TCL_TX_WRAP_S_REPEAT;
+ break;
+ }
+
+ return ret >> NV34TCL_TX_WRAP_S_SHIFT;
+}
+
+static void *
+nv30_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nv30_sampler_state *ps;
+ uint32_t filter = 0;
+
+ ps = MALLOC(sizeof(struct nv30_sampler_state));
+
+ ps->fmt = 0;
+ /* TODO: Not all RECTs formats have this bit set, bits 15-8 of format
+ are the tx format to use. We should store normalized coord flag
+ in sampler state structure, and set appropriate format in
+ nvxx_fragtex_build()
+ */
+ /*NV34TCL_TX_FORMAT_RECT*/
+ /*if (!cso->normalized_coords) {
+ ps->fmt |= (1<<14) ;
+ }*/
+
+ ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) |
+ (wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT));
+
+ ps->en = 0;
+
+ if (cso->max_anisotropy >= 8.0) {
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_8X;
+ } else
+ if (cso->max_anisotropy >= 4.0) {
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_4X;
+ } else
+ if (cso->max_anisotropy >= 2.0) {
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_2X;
+ }
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR;
+ break;
+ }
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST;
+ break;
+ }
+ break;
+ }
+
+ ps->filt = filter;
+
+ {
+ float limit;
+
+ limit = CLAMP(cso->lod_bias, -16.0, 15.0);
+ ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;
+
+ limit = CLAMP(cso->max_lod, 0.0, 15.0);
+ ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/;
+
+ limit = CLAMP(cso->min_lod, 0.0, 15.0);
+ ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/;
+ }
+
+ if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ switch (cso->compare_func) {
+ case PIPE_FUNC_NEVER:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NEVER;
+ break;
+ case PIPE_FUNC_GREATER:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GREATER;
+ break;
+ case PIPE_FUNC_EQUAL:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_EQUAL;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GEQUAL;
+ break;
+ case PIPE_FUNC_LESS:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LESS;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NOTEQUAL;
+ break;
+ case PIPE_FUNC_LEQUAL:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LEQUAL;
+ break;
+ case PIPE_FUNC_ALWAYS:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_ALWAYS;
+ break;
+ default:
+ break;
+ }
+ }
+
+ ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) |
+ (float_to_ubyte(cso->border_color[0]) << 16) |
+ (float_to_ubyte(cso->border_color[1]) << 8) |
+ (float_to_ubyte(cso->border_color[2]) << 0));
+
+ return (void *)ps;
+}
+
+static void
+nv30_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv30->tex_sampler[unit] = sampler[unit];
+ nv30->dirty_samplers |= (1 << unit);
+ }
+
+ for (unit = nr; unit < nv30->nr_samplers; unit++) {
+ nv30->tex_sampler[unit] = NULL;
+ nv30->dirty_samplers |= (1 << unit);
+ }
+
+ nv30->nr_samplers = nr;
+ nv30->dirty |= NV30_NEW_SAMPLER;
+}
+
+static void
+nv30_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nv30_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **miptree)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ pipe_texture_reference((struct pipe_texture **)
+ &nv30->tex_miptree[unit], miptree[unit]);
+ nv30->dirty_samplers |= (1 << unit);
+ }
+
+ for (unit = nr; unit < nv30->nr_textures; unit++) {
+ pipe_texture_reference((struct pipe_texture **)
+ &nv30->tex_miptree[unit], NULL);
+ nv30->dirty_samplers |= (1 << unit);
+ }
+
+ nv30->nr_textures = nr;
+ nv30->dirty |= NV30_NEW_SAMPLER;
+}
+
+static void *
+nv30_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso));
+ struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+
+ /*XXX: ignored:
+ * light_twoside
+ * point_smooth -nohw
+ * multisample
+ */
+
+ so_method(so, rankine, NV34TCL_SHADE_MODEL, 1);
+ so_data (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT :
+ NV34TCL_SHADE_MODEL_SMOOTH);
+
+ so_method(so, rankine, NV34TCL_LINE_WIDTH, 2);
+ so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff);
+ so_data (so, cso->line_smooth ? 1 : 0);
+ so_method(so, rankine, NV34TCL_LINE_STIPPLE_ENABLE, 2);
+ so_data (so, cso->line_stipple_enable ? 1 : 0);
+ so_data (so, (cso->line_stipple_pattern << 16) |
+ cso->line_stipple_factor);
+
+ so_method(so, rankine, NV34TCL_POINT_SIZE, 1);
+ so_data (so, fui(cso->point_size));
+
+ so_method(so, rankine, NV34TCL_POLYGON_MODE_FRONT, 6);
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV34TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV34TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV34TCL_CULL_FACE_BACK);
+ break;
+ }
+ so_data(so, NV34TCL_FRONT_FACE_CCW);
+ } else {
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV34TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV34TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV34TCL_CULL_FACE_BACK);
+ break;
+ }
+ so_data(so, NV34TCL_FRONT_FACE_CW);
+ }
+ so_data(so, cso->poly_smooth ? 1 : 0);
+ so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0);
+
+ so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, cso->poly_stipple_enable ? 1 : 0);
+
+ so_method(so, rankine, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if (cso->offset_cw || cso->offset_ccw) {
+ so_method(so, rankine, NV34TCL_POLYGON_OFFSET_FACTOR, 2);
+ so_data (so, fui(cso->offset_scale));
+ so_data (so, fui(cso->offset_units * 2));
+ }
+
+ so_method(so, rankine, NV34TCL_POINT_SPRITE, 1);
+ if (cso->point_sprite) {
+ unsigned psctl = (1 << 0), i;
+
+ for (i = 0; i < 8; i++) {
+ if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE)
+ psctl |= (1 << (8 + i));
+ }
+
+ so_data(so, psctl);
+ } else {
+ so_data(so, 0);
+ }
+
+ so_ref(so, &rsso->so);
+ rsso->pipe = *cso;
+ return (void *)rsso;
+}
+
+static void
+nv30_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->rasterizer = hwcso;
+ nv30->dirty |= NV30_NEW_RAST;
+ /*nv30->draw_dirty |= NV30_NEW_RAST;*/
+}
+
+static void
+nv30_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_rasterizer_state *rsso = hwcso;
+
+ so_ref(NULL, &rsso->so);
+ FREE(rsso);
+}
+
+static void *
+nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
+ struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+
+ so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3);
+ so_data (so, nvgl_comparison_op(cso->depth.func));
+ so_data (so, cso->depth.writemask ? 1 : 0);
+ so_data (so, cso->depth.enabled ? 1 : 0);
+
+ so_method(so, rankine, NV34TCL_ALPHA_FUNC_ENABLE, 3);
+ so_data (so, cso->alpha.enabled ? 1 : 0);
+ so_data (so, nvgl_comparison_op(cso->alpha.func));
+ so_data (so, float_to_ubyte(cso->alpha.ref));
+
+ if (cso->stencil[0].enabled) {
+ so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 8);
+ so_data (so, cso->stencil[0].enabled ? 1 : 0);
+ so_data (so, cso->stencil[0].write_mask);
+ so_data (so, nvgl_comparison_op(cso->stencil[0].func));
+ so_data (so, cso->stencil[0].ref_value);
+ so_data (so, cso->stencil[0].value_mask);
+ so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
+ } else {
+ so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ if (cso->stencil[1].enabled) {
+ so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 8);
+ so_data (so, cso->stencil[1].enabled ? 1 : 0);
+ so_data (so, cso->stencil[1].write_mask);
+ so_data (so, nvgl_comparison_op(cso->stencil[1].func));
+ so_data (so, cso->stencil[1].ref_value);
+ so_data (so, cso->stencil[1].value_mask);
+ so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
+ } else {
+ so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_ref(so, &zsaso->so);
+ zsaso->pipe = *cso;
+ return (void *)zsaso;
+}
+
+static void
+nv30_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->zsa = hwcso;
+ nv30->dirty |= NV30_NEW_ZSA;
+}
+
+static void
+nv30_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_zsa_state *zsaso = hwcso;
+
+ so_ref(NULL, &zsaso->so);
+ FREE(zsaso);
+}
+
+static void *
+nv30_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ /*struct nv30_context *nv30 = nv30_context(pipe);*/
+ struct nv30_vertex_program *vp;
+
+ vp = CALLOC(1, sizeof(struct nv30_vertex_program));
+ vp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+ /*vp->draw = draw_create_vertex_shader(nv30->draw, &vp->pipe);*/
+
+ return (void *)vp;
+}
+
+static void
+nv30_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->vertprog = hwcso;
+ nv30->dirty |= NV30_NEW_VERTPROG;
+ /*nv30->draw_dirty |= NV30_NEW_VERTPROG;*/
+}
+
+static void
+nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_vertex_program *vp = hwcso;
+
+ /*draw_delete_vertex_shader(nv30->draw, vp->draw);*/
+ nv30_vertprog_destroy(nv30, vp);
+ FREE((void*)vp->pipe.tokens);
+ FREE(vp);
+}
+
+static void *
+nv30_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv30_fragment_program *fp;
+
+ fp = CALLOC(1, sizeof(struct nv30_fragment_program));
+ fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ tgsi_scan_shader(fp->pipe.tokens, &fp->info);
+
+ return (void *)fp;
+}
+
+static void
+nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->fragprog = hwcso;
+ nv30->dirty |= NV30_NEW_FRAGPROG;
+}
+
+static void
+nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_fragment_program *fp = hwcso;
+
+ nv30_fragprog_destroy(nv30, fp);
+ FREE((void*)fp->pipe.tokens);
+ FREE(fp);
+}
+
+static void
+nv30_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->blend_colour = *bcol;
+ nv30->dirty |= NV30_NEW_BCOL;
+}
+
+static void
+nv30_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+}
+
+static void
+nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->constbuf[shader] = buf->buffer;
+ nv30->constbuf_nr[shader] = buf->size / (4 * sizeof(float));
+
+ if (shader == PIPE_SHADER_VERTEX) {
+ nv30->dirty |= NV30_NEW_VERTPROG;
+ } else
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ nv30->dirty |= NV30_NEW_FRAGPROG;
+ }
+}
+
+static void
+nv30_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->framebuffer = *fb;
+ nv30->dirty |= NV30_NEW_FB;
+}
+
+static void
+nv30_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ memcpy(nv30->stipple, stipple->stipple, 4 * 32);
+ nv30->dirty |= NV30_NEW_STIPPLE;
+}
+
+static void
+nv30_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->scissor = *s;
+ nv30->dirty |= NV30_NEW_SCISSOR;
+}
+
+static void
+nv30_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->viewport = *vpt;
+ nv30->dirty |= NV30_NEW_VIEWPORT;
+ /*nv30->draw_dirty |= NV30_NEW_VIEWPORT;*/
+}
+
+static void
+nv30_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ memcpy(nv30->vtxbuf, vb, sizeof(*vb) * count);
+ nv30->vtxbuf_nr = count;
+
+ nv30->dirty |= NV30_NEW_ARRAYS;
+ /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
+}
+
+static void
+nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *ve)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ memcpy(nv30->vtxelt, ve, sizeof(*ve) * count);
+ nv30->vtxelt_nr = count;
+
+ nv30->dirty |= NV30_NEW_ARRAYS;
+ /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
+}
+
+static void
+nv30_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->edgeflags = bitfield;
+ nv30->dirty |= NV30_NEW_ARRAYS;
+ /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
+}
+
+void
+nv30_init_state_functions(struct nv30_context *nv30)
+{
+ nv30->pipe.create_blend_state = nv30_blend_state_create;
+ nv30->pipe.bind_blend_state = nv30_blend_state_bind;
+ nv30->pipe.delete_blend_state = nv30_blend_state_delete;
+
+ nv30->pipe.create_sampler_state = nv30_sampler_state_create;
+ nv30->pipe.bind_sampler_states = nv30_sampler_state_bind;
+ nv30->pipe.delete_sampler_state = nv30_sampler_state_delete;
+ nv30->pipe.set_sampler_textures = nv30_set_sampler_texture;
+
+ nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create;
+ nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind;
+ nv30->pipe.delete_rasterizer_state = nv30_rasterizer_state_delete;
+
+ nv30->pipe.create_depth_stencil_alpha_state =
+ nv30_depth_stencil_alpha_state_create;
+ nv30->pipe.bind_depth_stencil_alpha_state =
+ nv30_depth_stencil_alpha_state_bind;
+ nv30->pipe.delete_depth_stencil_alpha_state =
+ nv30_depth_stencil_alpha_state_delete;
+
+ nv30->pipe.create_vs_state = nv30_vp_state_create;
+ nv30->pipe.bind_vs_state = nv30_vp_state_bind;
+ nv30->pipe.delete_vs_state = nv30_vp_state_delete;
+
+ nv30->pipe.create_fs_state = nv30_fp_state_create;
+ nv30->pipe.bind_fs_state = nv30_fp_state_bind;
+ nv30->pipe.delete_fs_state = nv30_fp_state_delete;
+
+ nv30->pipe.set_blend_color = nv30_set_blend_color;
+ nv30->pipe.set_clip_state = nv30_set_clip_state;
+ nv30->pipe.set_constant_buffer = nv30_set_constant_buffer;
+ nv30->pipe.set_framebuffer_state = nv30_set_framebuffer_state;
+ nv30->pipe.set_polygon_stipple = nv30_set_polygon_stipple;
+ nv30->pipe.set_scissor_state = nv30_set_scissor_state;
+ nv30->pipe.set_viewport_state = nv30_set_viewport_state;
+
+ nv30->pipe.set_edgeflags = nv30_set_edgeflags;
+ nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers;
+ nv30->pipe.set_vertex_elements = nv30_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h
new file mode 100644
index 0000000000..2023278e37
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state.h
@@ -0,0 +1,88 @@
+#ifndef __NV30_STATE_H__
+#define __NV30_STATE_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv30_sampler_state {
+ uint32_t fmt;
+ uint32_t wrap;
+ uint32_t en;
+ uint32_t filt;
+ uint32_t bcol;
+};
+
+struct nv30_vertex_program_exec {
+ uint32_t data[4];
+ boolean has_branch_offset;
+ int const_index;
+};
+
+struct nv30_vertex_program_data {
+ int index; /* immediates == -1 */
+ float value[4];
+};
+
+struct nv30_vertex_program {
+ struct pipe_shader_state pipe;
+
+ boolean translated;
+
+ struct nv30_vertex_program_exec *insns;
+ unsigned nr_insns;
+ struct nv30_vertex_program_data *consts;
+ unsigned nr_consts;
+
+ struct nouveau_resource *exec;
+ unsigned exec_start;
+ struct nouveau_resource *data;
+ unsigned data_start;
+ unsigned data_start_min;
+
+ uint32_t ir;
+ uint32_t or;
+ struct nouveau_stateobj *so;
+};
+
+struct nv30_fragment_program_data {
+ unsigned offset;
+ unsigned index;
+};
+
+struct nv30_fragment_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+
+ boolean translated;
+ boolean on_hw;
+ unsigned samplers;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct nv30_fragment_program_data *consts;
+ unsigned nr_consts;
+
+ struct pipe_buffer *buffer;
+
+ uint32_t fp_control;
+ uint32_t fp_reg_control;
+ struct nouveau_stateobj *so;
+};
+
+struct nv30_miptree {
+ struct pipe_texture base;
+
+ struct pipe_buffer *buffer;
+ uint total_size;
+
+ struct pipe_texture *shadow_tex;
+ struct pipe_surface *shadow_surface;
+
+ struct {
+ uint pitch;
+ uint *image_offset;
+ } level[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+#endif
diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c
new file mode 100644
index 0000000000..44d43e132a
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_blend.c
@@ -0,0 +1,40 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_blend_validate(struct nv30_context *nv30)
+{
+ so_ref(nv30->blend->so, &nv30->state.hw[NV30_STATE_BLEND]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_blend = {
+ .validate = nv30_state_blend_validate,
+ .dirty = {
+ .pipe = NV30_NEW_BLEND,
+ .hw = NV30_STATE_BLEND
+ }
+};
+
+static boolean
+nv30_state_blend_colour_validate(struct nv30_context *nv30)
+{
+ struct nouveau_stateobj *so = so_new(2, 0);
+ struct pipe_blend_color *bcol = &nv30->blend_colour;
+
+ so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1);
+ so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) |
+ (float_to_ubyte(bcol->color[0]) << 16) |
+ (float_to_ubyte(bcol->color[1]) << 8) |
+ (float_to_ubyte(bcol->color[2]) << 0)));
+
+ so_ref(so, &nv30->state.hw[NV30_STATE_BCOL]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_blend_colour = {
+ .validate = nv30_state_blend_colour_validate,
+ .dirty = {
+ .pipe = NV30_NEW_BCOL,
+ .hw = NV30_STATE_BCOL
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c
new file mode 100644
index 0000000000..9480695d6e
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_emit.c
@@ -0,0 +1,118 @@
+#include "nv30_context.h"
+#include "nv30_state.h"
+
+static struct nv30_state_entry *render_states[] = {
+ &nv30_state_framebuffer,
+ &nv30_state_rasterizer,
+ &nv30_state_scissor,
+ &nv30_state_stipple,
+ &nv30_state_fragprog,
+ &nv30_state_fragtex,
+ &nv30_state_vertprog,
+ &nv30_state_blend,
+ &nv30_state_blend_colour,
+ &nv30_state_zsa,
+ &nv30_state_viewport,
+ &nv30_state_vbo,
+ NULL
+};
+
+static void
+nv30_state_do_validate(struct nv30_context *nv30,
+ struct nv30_state_entry **states)
+{
+ const struct pipe_framebuffer_state *fb = &nv30->framebuffer;
+ unsigned i;
+
+ for (i = 0; i < fb->num_cbufs; i++)
+ fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED;
+ if (fb->zsbuf)
+ fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED;
+
+ while (*states) {
+ struct nv30_state_entry *e = *states;
+
+ if (nv30->dirty & e->dirty.pipe) {
+ if (e->validate(nv30)) {
+ nv30->state.dirty |= (1ULL << e->dirty.hw);
+ }
+ }
+
+ states++;
+ }
+ nv30->dirty = 0;
+}
+
+void
+nv30_state_emit(struct nv30_context *nv30)
+{
+ struct nv30_state *state = &nv30->state;
+ struct nv30_screen *screen = nv30->screen;
+ unsigned i, samplers;
+ uint64_t states;
+
+ if (nv30->pctx_id != screen->cur_pctx) {
+ for (i = 0; i < NV30_STATE_MAX; i++) {
+ if (state->hw[i] && screen->state[i] != state->hw[i])
+ state->dirty |= (1ULL << i);
+ }
+
+ screen->cur_pctx = nv30->pctx_id;
+ }
+
+ for (i = 0, states = state->dirty; states; i++) {
+ if (!(states & (1ULL << i)))
+ continue;
+ so_ref (state->hw[i], &nv30->screen->state[i]);
+ if (state->hw[i])
+ so_emit(nv30->nvws, nv30->screen->state[i]);
+ states &= ~(1ULL << i);
+ }
+
+ state->dirty = 0;
+
+ so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FB]);
+ for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
+ if (!(samplers & (1 << i)))
+ continue;
+ so_emit_reloc_markers(nv30->nvws,
+ state->hw[NV30_STATE_FRAGTEX0+i]);
+ samplers &= ~(1ULL << i);
+ }
+ so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FRAGPROG]);
+ if (state->hw[NV30_STATE_VTXBUF] /*&& nv30->render_mode == HW*/)
+ so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_VTXBUF]);
+}
+
+boolean
+nv30_state_validate(struct nv30_context *nv30)
+{
+#if 0
+ boolean was_sw = nv30->fallback_swtnl ? TRUE : FALSE;
+
+ if (nv30->render_mode != HW) {
+ /* Don't even bother trying to go back to hw if none
+ * of the states that caused swtnl previously have changed.
+ */
+ if ((nv30->fallback_swtnl & nv30->dirty)
+ != nv30->fallback_swtnl)
+ return FALSE;
+
+ /* Attempt to go to hwtnl again */
+ nv30->pipe.flush(&nv30->pipe, 0, NULL);
+ nv30->dirty |= (NV30_NEW_VIEWPORT |
+ NV30_NEW_VERTPROG |
+ NV30_NEW_ARRAYS);
+ nv30->render_mode = HW;
+ }
+#endif
+ nv30_state_do_validate(nv30, render_states);
+#if 0
+ if (nv30->fallback_swtnl || nv30->fallback_swrast)
+ return FALSE;
+
+ if (was_sw)
+ NOUVEAU_ERR("swtnl->hw\n");
+#endif
+ return TRUE;
+}
diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c
new file mode 100644
index 0000000000..73bdf7e56c
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_fb.c
@@ -0,0 +1,140 @@
+#include "nv30_context.h"
+#include "nouveau/nouveau_util.h"
+
+static boolean
+nv30_state_framebuffer_validate(struct nv30_context *nv30)
+{
+ struct pipe_framebuffer_state *fb = &nv30->framebuffer;
+ struct pipe_surface *rt[2], *zeta = NULL;
+ uint32_t rt_enable, rt_format;
+ int i, colour_format = 0, zeta_format = 0;
+ struct nouveau_stateobj *so = so_new(64, 10);
+ unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
+ unsigned w = fb->width;
+ unsigned h = fb->height;
+
+ rt_enable = 0;
+ for (i = 0; i < fb->num_cbufs; i++) {
+ if (colour_format) {
+ assert(colour_format == fb->cbufs[i]->format);
+ } else {
+ colour_format = fb->cbufs[i]->format;
+ rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i);
+ rt[i] = fb->cbufs[i];
+ }
+ }
+
+ if (rt_enable & NV34TCL_RT_ENABLE_COLOR1)
+ rt_enable |= NV34TCL_RT_ENABLE_MRT;
+
+ if (fb->zsbuf) {
+ zeta_format = fb->zsbuf->format;
+ zeta = fb->zsbuf;
+ }
+
+ if (!(rt[0]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
+ for (i = 1; i < fb->num_cbufs; i++)
+ assert(!(rt[i]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR));
+
+ /* FIXME: NV34TCL_RT_FORMAT_LOG2_[WIDTH/HEIGHT] */
+ rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
+ log2i(fb->width) << 16 /*NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT*/ |
+ log2i(fb->height) << 24 /*NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT*/;
+ }
+ else
+ rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
+
+ switch (colour_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case 0:
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5;
+ break;
+ default:
+ assert(0);
+ }
+
+ switch (zeta_format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16;
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case 0:
+ rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) {
+ uint32_t pitch = rt[0]->stride;
+ if (zeta) {
+ pitch |= (zeta->stride << 16);
+ } else {
+ pitch |= (pitch << 16);
+ }
+
+ so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR0, 1);
+ so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv30->nvws->channel->vram->handle,
+ nv30->nvws->channel->gart->handle);
+ so_method(so, nv30->screen->rankine, NV34TCL_COLOR0_PITCH, 2);
+ so_data (so, pitch);
+ so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ }
+
+ if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) {
+ so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR1, 1);
+ so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv30->nvws->channel->vram->handle,
+ nv30->nvws->channel->gart->handle);
+ so_method(so, nv30->screen->rankine, NV34TCL_COLOR1_OFFSET, 2);
+ so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, rt[1]->stride);
+ }
+
+ if (zeta_format) {
+ so_method(so, nv30->screen->rankine, NV34TCL_DMA_ZETA, 1);
+ so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv30->nvws->channel->vram->handle,
+ nv30->nvws->channel->gart->handle);
+ so_method(so, nv30->screen->rankine, NV34TCL_ZETA_OFFSET, 1);
+ so_reloc (so, zeta->buffer, zeta->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ /* TODO: allocate LMA depth buffer */
+ }
+
+ so_method(so, nv30->screen->rankine, NV34TCL_RT_ENABLE, 1);
+ so_data (so, rt_enable);
+ so_method(so, nv30->screen->rankine, NV34TCL_RT_HORIZ, 3);
+ so_data (so, (w << 16) | 0);
+ so_data (so, (h << 16) | 0);
+ so_data (so, rt_format);
+ so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_HORIZ, 2);
+ so_data (so, (w << 16) | 0);
+ so_data (so, (h << 16) | 0);
+ so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+ so_data (so, ((w - 1) << 16) | 0);
+ so_data (so, ((h - 1) << 16) | 0);
+ so_method(so, nv30->screen->rankine, 0x1d88, 1);
+ so_data (so, (1 << 12) | h);
+ /* Wonder why this is needed, context should all be set to zero on init */
+ so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_TX_ORIGIN, 1);
+ so_data (so, 0);
+
+ so_ref(so, &nv30->state.hw[NV30_STATE_FB]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_framebuffer = {
+ .validate = nv30_state_framebuffer_validate,
+ .dirty = {
+ .pipe = NV30_NEW_FB,
+ .hw = NV30_STATE_FB
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_rasterizer.c b/src/gallium/drivers/nv30/nv30_state_rasterizer.c
new file mode 100644
index 0000000000..6d1b60e043
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_rasterizer.c
@@ -0,0 +1,17 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_rasterizer_validate(struct nv30_context *nv30)
+{
+ so_ref(nv30->rasterizer->so,
+ &nv30->state.hw[NV30_STATE_RAST]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_rasterizer = {
+ .validate = nv30_state_rasterizer_validate,
+ .dirty = {
+ .pipe = NV30_NEW_RAST,
+ .hw = NV30_STATE_RAST
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c
new file mode 100644
index 0000000000..1db9bc1795
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_scissor.c
@@ -0,0 +1,35 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_scissor_validate(struct nv30_context *nv30)
+{
+ struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe;
+ struct pipe_scissor_state *s = &nv30->scissor;
+ struct nouveau_stateobj *so;
+
+ if (nv30->state.hw[NV30_STATE_SCISSOR] &&
+ (rast->scissor == 0 && nv30->state.scissor_enabled == 0))
+ return FALSE;
+ nv30->state.scissor_enabled = rast->scissor;
+
+ so = so_new(3, 0);
+ so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2);
+ if (nv30->state.scissor_enabled) {
+ so_data (so, ((s->maxx - s->minx) << 16) | s->minx);
+ so_data (so, ((s->maxy - s->miny) << 16) | s->miny);
+ } else {
+ so_data (so, 4096 << 16);
+ so_data (so, 4096 << 16);
+ }
+
+ so_ref(so, &nv30->state.hw[NV30_STATE_SCISSOR]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_scissor = {
+ .validate = nv30_state_scissor_validate,
+ .dirty = {
+ .pipe = NV30_NEW_SCISSOR | NV30_NEW_RAST,
+ .hw = NV30_STATE_SCISSOR
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c
new file mode 100644
index 0000000000..41b42813b4
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_stipple.c
@@ -0,0 +1,39 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_stipple_validate(struct nv30_context *nv30)
+{
+ struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe;
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+ struct nouveau_stateobj *so;
+
+ if (nv30->state.hw[NV30_STATE_STIPPLE] &&
+ (rast->poly_stipple_enable == 0 && nv30->state.stipple_enabled == 0))
+ return FALSE;
+
+ if (rast->poly_stipple_enable) {
+ unsigned i;
+
+ so = so_new(35, 0);
+ so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, 1);
+ so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32);
+ for (i = 0; i < 32; i++)
+ so_data(so, nv30->stipple[i]);
+ } else {
+ so = so_new(2, 0);
+ so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_ref(so, &nv30->state.hw[NV30_STATE_STIPPLE]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_stipple = {
+ .validate = nv30_state_stipple_validate,
+ .dirty = {
+ .pipe = NV30_NEW_STIPPLE | NV30_NEW_RAST,
+ .hw = NV30_STATE_STIPPLE,
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c
new file mode 100644
index 0000000000..951d40ebfd
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_viewport.c
@@ -0,0 +1,70 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_viewport_validate(struct nv30_context *nv30)
+{
+ struct pipe_viewport_state *vpt = &nv30->viewport;
+ struct nouveau_stateobj *so;
+ unsigned bypass;
+
+ if (/*nv30->render_mode == HW &&*/ !nv30->rasterizer->pipe.bypass_clipping)
+ bypass = 0;
+ else
+ bypass = 1;
+
+ if (nv30->state.hw[NV30_STATE_VIEWPORT] &&
+ (bypass || !(nv30->dirty & NV30_NEW_VIEWPORT)) &&
+ nv30->state.viewport_bypass == bypass)
+ return FALSE;
+ nv30->state.viewport_bypass = bypass;
+
+ so = so_new(11, 0);
+ if (!bypass) {
+ so_method(so, nv30->screen->rankine,
+ NV34TCL_VIEWPORT_TRANSLATE_X, 8);
+ so_data (so, fui(vpt->translate[0]));
+ so_data (so, fui(vpt->translate[1]));
+ so_data (so, fui(vpt->translate[2]));
+ so_data (so, fui(vpt->translate[3]));
+ so_data (so, fui(vpt->scale[0]));
+ so_data (so, fui(vpt->scale[1]));
+ so_data (so, fui(vpt->scale[2]));
+ so_data (so, fui(vpt->scale[3]));
+/* so_method(so, nv30->screen->rankine, 0x1d78, 1);
+ so_data (so, 1);
+*/ } else {
+ so_method(so, nv30->screen->rankine,
+ NV34TCL_VIEWPORT_TRANSLATE_X, 8);
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(0.0));
+ /* Not entirely certain what this is yet. The DDX uses this
+ * value also as it fixes rendering when you pass
+ * pre-transformed vertices to the GPU. My best gusss is that
+ * this bypasses some culling/clipping stage. Might be worth
+ * noting that points/lines are uneffected by whatever this
+ * value fixes, only filled polygons are effected.
+ */
+/* so_method(so, nv30->screen->rankine, 0x1d78, 1);
+ so_data (so, 0x110);
+*/ }
+ /* TODO/FIXME: never saw value 0x0110 in renouveau dumps, only 0x0001 */
+ so_method(so, nv30->screen->rankine, 0x1d78, 1);
+ so_data (so, 1);
+
+ so_ref(so, &nv30->state.hw[NV30_STATE_VIEWPORT]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_viewport = {
+ .validate = nv30_state_viewport_validate,
+ .dirty = {
+ .pipe = NV30_NEW_VIEWPORT | NV30_NEW_RAST,
+ .hw = NV30_STATE_VIEWPORT
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_zsa.c b/src/gallium/drivers/nv30/nv30_state_zsa.c
new file mode 100644
index 0000000000..0940b7269b
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_zsa.c
@@ -0,0 +1,17 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_zsa_validate(struct nv30_context *nv30)
+{
+ so_ref(nv30->zsa->so,
+ &nv30->state.hw[NV30_STATE_ZSA]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_zsa = {
+ .validate = nv30_state_zsa_validate,
+ .dirty = {
+ .pipe = NV30_NEW_ZSA,
+ .hw = NV30_STATE_ZSA
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_surface.c b/src/gallium/drivers/nv30/nv30_surface.c
new file mode 100644
index 0000000000..d3376a73bf
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_surface.c
@@ -0,0 +1,77 @@
+
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nv30_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+
+#include "util/u_tile.h"
+
+static void
+nv30_surface_copy(struct pipe_context *pipe, boolean do_flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nouveau_winsys *nvws = nv30->nvws;
+
+ if (do_flip) {
+ /*XXX: This dodgyness will do for now for correctness. But,
+ * need to investigate whether the 2D engine is able to
+ * manage a flip (perhaps SIFM?), if not, use the 3D engine
+ */
+ desty += height;
+ while (height--) {
+ nvws->surface_copy(nvws, dest, destx, desty--, src,
+ srcx, srcy++, width, 1);
+ }
+ } else {
+ nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy,
+ width, height);
+ }
+}
+
+static void
+nv30_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nouveau_winsys *nvws = nv30->nvws;
+
+ nvws->surface_fill(nvws, dest, destx, desty, width, height, value);
+}
+
+void
+nv30_init_surface_functions(struct nv30_context *nv30)
+{
+ nv30->pipe.surface_copy = nv30_surface_copy;
+ nv30->pipe.surface_fill = nv30_surface_fill;
+}
diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c
new file mode 100644
index 0000000000..556f981d4a
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_vbo.c
@@ -0,0 +1,556 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv30_context.h"
+#include "nv30_state.h"
+
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_pushbuf.h"
+#include "nouveau/nouveau_util.h"
+
+#define FORCE_SWTNL 0
+
+static INLINE int
+nv30_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
+{
+ switch (pipe) {
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ *fmt = NV34TCL_VTXFMT_TYPE_FLOAT;
+ break;
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ *fmt = NV34TCL_VTXFMT_TYPE_UBYTE;
+ break;
+ case PIPE_FORMAT_R16_SSCALED:
+ case PIPE_FORMAT_R16G16_SSCALED:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ *fmt = NV34TCL_VTXFMT_TYPE_USHORT;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe));
+ return 1;
+ }
+
+ switch (pipe) {
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R16_SSCALED:
+ *ncomp = 1;
+ break;
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R16G16_SSCALED:
+ *ncomp = 2;
+ break;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ *ncomp = 3;
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ *ncomp = 4;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe));
+ return 1;
+ }
+
+ return 0;
+}
+
+static boolean
+nv30_vbo_set_idxbuf(struct nv30_context *nv30, struct pipe_buffer *ib,
+ unsigned ib_size)
+{
+ struct pipe_screen *pscreen = &nv30->screen->pipe;
+ unsigned type;
+
+ if (!ib) {
+ nv30->idxbuf = NULL;
+ nv30->idxbuf_format = 0xdeadbeef;
+ return FALSE;
+ }
+
+ if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1)
+ return FALSE;
+
+ switch (ib_size) {
+ case 2:
+ type = NV34TCL_IDXBUF_FORMAT_TYPE_U16;
+ break;
+ case 4:
+ type = NV34TCL_IDXBUF_FORMAT_TYPE_U32;
+ break;
+ default:
+ return FALSE;
+ }
+
+ if (ib != nv30->idxbuf ||
+ type != nv30->idxbuf_format) {
+ nv30->dirty |= NV30_NEW_ARRAYS;
+ nv30->idxbuf = ib;
+ nv30->idxbuf_format = type;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so,
+ int attrib, struct pipe_vertex_element *ve,
+ struct pipe_vertex_buffer *vb)
+{
+ struct pipe_winsys *ws = nv30->pipe.winsys;
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+ unsigned type, ncomp;
+ void *map;
+
+ if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp))
+ return FALSE;
+
+ map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
+ map += vb->buffer_offset + ve->src_offset;
+
+ switch (type) {
+ case NV34TCL_VTXFMT_TYPE_FLOAT:
+ {
+ float *v = map;
+
+ switch (ncomp) {
+ case 4:
+ so_method(so, rankine, NV34TCL_VTX_ATTR_4F_X(attrib), 4);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ so_data (so, fui(v[2]));
+ so_data (so, fui(v[3]));
+ break;
+ case 3:
+ so_method(so, rankine, NV34TCL_VTX_ATTR_3F_X(attrib), 3);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ so_data (so, fui(v[2]));
+ break;
+ case 2:
+ so_method(so, rankine, NV34TCL_VTX_ATTR_2F_X(attrib), 2);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ break;
+ case 1:
+ so_method(so, rankine, NV34TCL_VTX_ATTR_1F(attrib), 1);
+ so_data (so, fui(v[0]));
+ break;
+ default:
+ ws->buffer_unmap(ws, vb->buffer);
+ return FALSE;
+ }
+ }
+ break;
+ default:
+ ws->buffer_unmap(ws, vb->buffer);
+ return FALSE;
+ }
+
+ ws->buffer_unmap(ws, vb->buffer);
+
+ return TRUE;
+}
+
+boolean
+nv30_draw_arrays(struct pipe_context *pipe,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nouveau_channel *chan = nv30->nvws->channel;
+ unsigned restart = 0;
+
+ nv30_vbo_set_idxbuf(nv30, NULL, 0);
+ if (FORCE_SWTNL || !nv30_state_validate(nv30)) {
+ /*return nv30_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);*/
+ return FALSE;
+ }
+
+ while (count) {
+ unsigned vc, nr;
+
+ nv30_state_emit(nv30);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
+ mode, start, count, &restart);
+ if (!vc) {
+ FIRE_RING(NULL);
+ continue;
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ nr = (vc & 0xff);
+ if (nr) {
+ BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1);
+ OUT_RING (((nr - 1) << 24) | start);
+ start += nr;
+ }
+
+ nr = vc >> 8;
+ while (nr) {
+ unsigned push = nr > 2047 ? 2047 : nr;
+
+ nr -= push;
+
+ BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push);
+ while (push--) {
+ OUT_RING(((0x100 - 1) << 24) | start);
+ start += 0x100;
+ }
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (0);
+
+ count -= vc;
+ start = restart;
+ }
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+static INLINE void
+nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv30->nvws->channel;
+
+ while (count) {
+ uint8_t *elts = (uint8_t *)ib + start;
+ unsigned vc, push, restart = 0;
+
+ nv30_state_emit(nv30);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ if (vc & 1) {
+ BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+
+ push = MIN2(vc, 2047 * 2);
+
+ BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((elts[i+1] << 16) | elts[i]);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static INLINE void
+nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv30->nvws->channel;
+
+ while (count) {
+ uint16_t *elts = (uint16_t *)ib + start;
+ unsigned vc, push, restart = 0;
+
+ nv30_state_emit(nv30);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ if (vc & 1) {
+ BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+
+ push = MIN2(vc, 2047 * 2);
+
+ BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((elts[i+1] << 16) | elts[i]);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static INLINE void
+nv30_draw_elements_u32(struct nv30_context *nv30, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv30->nvws->channel;
+
+ while (count) {
+ uint32_t *elts = (uint32_t *)ib + start;
+ unsigned vc, push, restart = 0;
+
+ nv30_state_emit(nv30);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ while (vc) {
+ push = MIN2(vc, 2047);
+
+ BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push);
+ OUT_RINGp (elts, push);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static boolean
+nv30_draw_elements_inline(struct pipe_context *pipe,
+ struct pipe_buffer *ib, unsigned ib_size,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+ void *map;
+
+ map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ);
+ if (!ib) {
+ NOUVEAU_ERR("failed mapping ib\n");
+ return FALSE;
+ }
+
+ switch (ib_size) {
+ case 1:
+ nv30_draw_elements_u08(nv30, map, mode, start, count);
+ break;
+ case 2:
+ nv30_draw_elements_u16(nv30, map, mode, start, count);
+ break;
+ case 4:
+ nv30_draw_elements_u32(nv30, map, mode, start, count);
+ break;
+ default:
+ NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
+ break;
+ }
+
+ ws->buffer_unmap(ws, ib);
+ return TRUE;
+}
+
+static boolean
+nv30_draw_elements_vbo(struct pipe_context *pipe,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nouveau_channel *chan = nv30->nvws->channel;
+ unsigned restart = 0;
+
+ while (count) {
+ unsigned nr, vc;
+
+ nv30_state_emit(nv30);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
+ mode, start, count, &restart);
+ if (!vc) {
+ FIRE_RING(NULL);
+ continue;
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ nr = (vc & 0xff);
+ if (nr) {
+ BEGIN_RING(rankine, NV34TCL_VB_INDEX_BATCH, 1);
+ OUT_RING (((nr - 1) << 24) | start);
+ start += nr;
+ }
+
+ nr = vc >> 8;
+ while (nr) {
+ unsigned push = nr > 2047 ? 2047 : nr;
+
+ nr -= push;
+
+ BEGIN_RING_NI(rankine, NV34TCL_VB_INDEX_BATCH, push);
+ while (push--) {
+ OUT_RING(((0x100 - 1) << 24) | start);
+ start += 0x100;
+ }
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (0);
+
+ count -= vc;
+ start = restart;
+ }
+
+ return TRUE;
+}
+
+boolean
+nv30_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer, unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ boolean idxbuf;
+
+ idxbuf = nv30_vbo_set_idxbuf(nv30, indexBuffer, indexSize);
+ if (FORCE_SWTNL || !nv30_state_validate(nv30)) {
+ /*return nv30_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);*/
+ return FALSE;
+ }
+
+ if (idxbuf) {
+ nv30_draw_elements_vbo(pipe, mode, start, count);
+ } else {
+ nv30_draw_elements_inline(pipe, indexBuffer, indexSize,
+ mode, start, count);
+ }
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+static boolean
+nv30_vbo_validate(struct nv30_context *nv30)
+{
+ struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL;
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+ struct pipe_buffer *ib = nv30->idxbuf;
+ unsigned ib_format = nv30->idxbuf_format;
+ unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+ int hw;
+
+ if (nv30->edgeflags) {
+ /*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/
+ return FALSE;
+ }
+
+ vtxbuf = so_new(20, 18);
+ so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr);
+ vtxfmt = so_new(17, 0);
+ so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr);
+
+ for (hw = 0; hw < nv30->vtxelt_nr; hw++) {
+ struct pipe_vertex_element *ve;
+ struct pipe_vertex_buffer *vb;
+ unsigned type, ncomp;
+
+ ve = &nv30->vtxelt[hw];
+ vb = &nv30->vtxbuf[ve->vertex_buffer_index];
+
+ if (!vb->pitch) {
+ if (!sattr)
+ sattr = so_new(16 * 5, 0);
+
+ if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) {
+ so_data(vtxbuf, 0);
+ so_data(vtxfmt, NV34TCL_VTXFMT_TYPE_FLOAT);
+ continue;
+ }
+ }
+
+ if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
+ /*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/
+ so_ref(NULL, &vtxbuf);
+ so_ref(NULL, &vtxfmt);
+ return FALSE;
+ }
+
+ so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset,
+ vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
+ so_data (vtxfmt, ((vb->pitch << NV34TCL_VTXFMT_STRIDE_SHIFT) |
+ (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type));
+ }
+
+ if (ib) {
+ so_method(vtxbuf, rankine, NV34TCL_IDXBUF_ADDRESS, 2);
+ so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR,
+ 0, NV34TCL_IDXBUF_FORMAT_DMA1);
+ }
+
+ so_method(vtxbuf, rankine, 0x1710, 1);
+ so_data (vtxbuf, 0);
+
+ so_ref(vtxbuf, &nv30->state.hw[NV30_STATE_VTXBUF]);
+ nv30->state.dirty |= (1ULL << NV30_STATE_VTXBUF);
+ so_ref(vtxfmt, &nv30->state.hw[NV30_STATE_VTXFMT]);
+ nv30->state.dirty |= (1ULL << NV30_STATE_VTXFMT);
+ so_ref(sattr, &nv30->state.hw[NV30_STATE_VTXATTR]);
+ nv30->state.dirty |= (1ULL << NV30_STATE_VTXATTR);
+ return FALSE;
+}
+
+struct nv30_state_entry nv30_state_vbo = {
+ .validate = nv30_vbo_validate,
+ .dirty = {
+ .pipe = NV30_NEW_ARRAYS,
+ .hw = 0,
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
new file mode 100644
index 0000000000..72824559e8
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -0,0 +1,838 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "nv30_context.h"
+#include "nv30_state.h"
+
+/* TODO (at least...):
+ * 1. Indexed consts + ARL
+ * 2. Arb. swz/negation
+ * 3. NV_vp11, NV_vp2, NV_vp3 features
+ * - extra arith opcodes
+ * - branching
+ * - texture sampling
+ * - indexed attribs
+ * - indexed results
+ * 4. bugs
+ */
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 8
+#define MASK_Y 4
+#define MASK_Z 2
+#define MASK_W 1
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE 0
+#define DEF_CTEST 0
+#include "nv30_shader.h"
+
+#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv30_sr_neg((s))
+#define abs(s) nv30_sr_abs((s))
+
+struct nv30_vpc {
+ struct nv30_vertex_program *vp;
+
+ struct nv30_vertex_program_exec *vpi;
+
+ unsigned output_map[PIPE_MAX_SHADER_OUTPUTS];
+
+ int high_temp;
+ int temp_temp_count;
+
+ struct nv30_sreg *imm;
+ unsigned nr_imm;
+};
+
+static struct nv30_sreg
+temp(struct nv30_vpc *vpc)
+{
+ int idx;
+
+ idx = vpc->temp_temp_count++;
+ idx += vpc->high_temp + 1;
+ return nv30_sr(NV30SR_TEMP, idx);
+}
+
+static struct nv30_sreg
+constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w)
+{
+ struct nv30_vertex_program *vp = vpc->vp;
+ struct nv30_vertex_program_data *vpd;
+ int idx;
+
+ if (pipe >= 0) {
+ for (idx = 0; idx < vp->nr_consts; idx++) {
+ if (vp->consts[idx].index == pipe)
+ return nv30_sr(NV30SR_CONST, idx);
+ }
+ }
+
+ idx = vp->nr_consts++;
+ vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
+ vpd = &vp->consts[idx];
+
+ vpd->index = pipe;
+ vpd->value[0] = x;
+ vpd->value[1] = y;
+ vpd->value[2] = z;
+ vpd->value[3] = w;
+ return nv30_sr(NV30SR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nv30_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2))
+
+static void
+emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src)
+{
+ struct nv30_vertex_program *vp = vpc->vp;
+ uint32_t sr = 0;
+
+ switch (src.type) {
+ case NV30SR_TEMP:
+ sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT);
+ sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT);
+ break;
+ case NV30SR_INPUT:
+ sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ vp->ir |= (1 << src.index);
+ hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT);
+ break;
+ case NV30SR_CONST:
+ sr |= (NV30_VP_SRC_REG_TYPE_CONST <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ assert(vpc->vpi->const_index == -1 ||
+ vpc->vpi->const_index == src.index);
+ vpc->vpi->const_index = src.index;
+ break;
+ case NV30SR_NONE:
+ sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (src.negate)
+ sr |= NV30_VP_SRC_NEGATE;
+
+ if (src.abs)
+ hw[0] |= (1 << (21 + pos));
+
+ sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) |
+ (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) |
+ (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) |
+ (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT));
+
+/*
+ * |VVV|
+ * d�.�b
+ * \u/
+ *
+ */
+
+ switch (pos) {
+ case 0:
+ hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >>
+ NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT;
+ hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) <<
+ NV30_VP_INST_SRC0L_SHIFT;
+ break;
+ case 1:
+ hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT;
+ break;
+ case 2:
+ hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >>
+ NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT;
+ hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) <<
+ NV30_VP_INST_SRC2L_SHIFT;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst)
+{
+ struct nv30_vertex_program *vp = vpc->vp;
+
+ switch (dst.type) {
+ case NV30SR_TEMP:
+ hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);
+ break;
+ case NV30SR_OUTPUT:
+ switch (dst.index) {
+ case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
+ case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
+ case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
+ case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
+ case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break;
+ case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
+ case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
+ case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
+ case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
+ case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
+ case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
+ case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
+ case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
+ case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
+ default:
+ break;
+ }
+
+ hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT);
+ hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);
+
+ /*XXX: no way this is entirely correct, someone needs to
+ * figure out what exactly it is.
+ */
+ hw[3] |= 0x800;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op,
+ struct nv30_sreg dst, int mask,
+ struct nv30_sreg s0, struct nv30_sreg s1,
+ struct nv30_sreg s2)
+{
+ struct nv30_vertex_program *vp = vpc->vp;
+ uint32_t *hw;
+
+ vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
+ vpc->vpi = &vp->insns[vp->nr_insns - 1];
+ memset(vpc->vpi, 0, sizeof(*vpc->vpi));
+ vpc->vpi->const_index = -1;
+
+ hw = vpc->vpi->data;
+
+ hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT);
+ hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) |
+ (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) |
+ (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) |
+ (3 << NV30_VP_INST_COND_SWZ_W_SHIFT));
+
+ hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
+// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK;
+// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT);
+
+ if (dst.type == NV30SR_OUTPUT) {
+ if (slot)
+ hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);
+ else
+ hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT);
+ } else {
+ if (slot)
+ hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT);
+ else
+ hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT);
+ }
+
+ emit_dst(vpc, hw, slot, dst);
+ emit_src(vpc, hw, 0, s0);
+ emit_src(vpc, hw, 1, s1);
+ emit_src(vpc, hw, 2, s2);
+}
+
+static INLINE struct nv30_sreg
+tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
+ struct nv30_sreg src;
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ src = nv30_sr(NV30SR_INPUT, fsrc->SrcRegister.Index);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ src = vpc->imm[fsrc->SrcRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ if (vpc->high_temp < fsrc->SrcRegister.Index)
+ vpc->high_temp = fsrc->SrcRegister.Index;
+ src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index);
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->SrcRegisterExtMod.Absolute;
+ src.negate = fsrc->SrcRegister.Negate;
+ src.swz[0] = fsrc->SrcRegister.SwizzleX;
+ src.swz[1] = fsrc->SrcRegister.SwizzleY;
+ src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+ src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ return src;
+}
+
+static INLINE struct nv30_sreg
+tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
+ struct nv30_sreg dst;
+
+ switch (fdst->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ dst = nv30_sr(NV30SR_OUTPUT,
+ vpc->output_map[fdst->DstRegister.Index]);
+
+ break;
+ case TGSI_FILE_TEMPORARY:
+ dst = nv30_sr(NV30SR_TEMP, fdst->DstRegister.Index);
+ if (vpc->high_temp < dst.index)
+ vpc->high_temp = dst.index;
+ break;
+ default:
+ NOUVEAU_ERR("bad dst file\n");
+ break;
+ }
+
+ return dst;
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+ return mask;
+}
+
+static boolean
+nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
+ const struct tgsi_full_instruction *finst)
+{
+ struct nv30_sreg src[3], dst, tmp;
+ struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
+ int mask;
+ int ai = -1, ci = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ vpc->temp_temp_count = 0;
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(vpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+ ai = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ /*XXX: index comparison is broken now that consts come from
+ * two different register files.
+ */
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_IMMEDIATE:
+ if (ci == -1 || ci == fsrc->SrcRegister.Index) {
+ ci = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
+ mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]);
+ break;
+ case TGSI_OPCODE_ARL:
+ arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_EXP:
+ arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LIT:
+ arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_POW:
+ tmp = temp(vpc);
+ arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none,
+ swz(src[0], X, X, X, X));
+ arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+ swz(src[1], X, X, X, X), none);
+ arith(vpc, 1, OP_EX2, dst, mask, none, none,
+ swz(tmp, X, X, X, X));
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_RET:
+ break;
+ case TGSI_OPCODE_RSQ:
+ arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGT:
+ arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1]));
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(vpc);
+ arith(vpc, 0, OP_MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ int hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NV30_VP_INST_DEST_POS;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV30_VP_INST_DEST_COL0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV30_VP_INST_DEST_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV30_VP_INST_DEST_BFC0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV30_VP_INST_DEST_BFC1;
+ } else {
+ NOUVEAU_ERR("bad bcolour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NV30_VP_INST_DEST_FOGC;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ hw = NV30_VP_INST_DEST_PSZ;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.SemanticIndex <= 7) {
+ hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ vpc->output_map[fdec->DeclarationRange.First] = hw;
+ return TRUE;
+}
+
+static boolean
+nv30_vertprog_prepare(struct nv30_vpc *vpc)
+{
+ struct tgsi_parse_context p;
+ int nr_imm = 0;
+
+ tgsi_parse_init(&p, vpc->vp->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch(tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ nr_imm++;
+ break;
+ default:
+ break;
+ }
+ }
+ tgsi_parse_free(&p);
+
+ if (nr_imm) {
+ vpc->imm = CALLOC(nr_imm, sizeof(struct nv30_sreg));
+ assert(vpc->imm);
+ }
+
+ return TRUE;
+}
+
+static void
+nv30_vertprog_translate(struct nv30_context *nv30,
+ struct nv30_vertex_program *vp)
+{
+ struct tgsi_parse_context parse;
+ struct nv30_vpc *vpc = NULL;
+
+ tgsi_dump(vp->pipe.tokens,0);
+
+ vpc = CALLOC(1, sizeof(struct nv30_vpc));
+ if (!vpc)
+ return;
+ vpc->vp = vp;
+ vpc->high_temp = -1;
+
+ if (!nv30_vertprog_prepare(vpc)) {
+ FREE(vpc);
+ return;
+ }
+
+ tgsi_parse_init(&parse, vp->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+ fdec = &parse.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_OUTPUT:
+ if (!nv30_vertprog_parse_decl_output(vpc, fdec))
+ goto out_err;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const struct tgsi_full_immediate *imm;
+
+ imm = &parse.FullToken.FullImmediate;
+ assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+// assert(imm->Immediate.Size == 4);
+ vpc->imm[vpc->nr_imm++] =
+ constant(vpc, -1,
+ imm->u.ImmediateFloat32[0].Float,
+ imm->u.ImmediateFloat32[1].Float,
+ imm->u.ImmediateFloat32[2].Float,
+ imm->u.ImmediateFloat32[3].Float);
+ }
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+ finst = &parse.FullToken.FullInstruction;
+ if (!nv30_vertprog_parse_instruction(vpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST;
+ vp->translated = TRUE;
+out_err:
+ tgsi_parse_free(&parse);
+ FREE(vpc);
+}
+
+static boolean
+nv30_vertprog_validate(struct nv30_context *nv30)
+{
+ struct nouveau_winsys *nvws = nv30->nvws;
+ struct pipe_winsys *ws = nv30->pipe.winsys;
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+ struct nv30_vertex_program *vp;
+ struct pipe_buffer *constbuf;
+ boolean upload_code = FALSE, upload_data = FALSE;
+ int i;
+
+ vp = nv30->vertprog;
+ constbuf = nv30->constbuf[PIPE_SHADER_VERTEX];
+
+ /* Translate TGSI shader into hw bytecode */
+ if (!vp->translated) {
+ nv30_vertprog_translate(nv30, vp);
+ if (!vp->translated)
+ return FALSE;
+ }
+
+ /* Allocate hw vtxprog exec slots */
+ if (!vp->exec) {
+ struct nouveau_resource *heap = nv30->screen->vp_exec_heap;
+ struct nouveau_stateobj *so;
+ uint vplen = vp->nr_insns;
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) {
+ while (heap->next && heap->size < vplen) {
+ struct nv30_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->exec);
+ }
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec))
+ assert(0);
+ }
+
+ so = so_new(2, 0);
+ so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1);
+ so_data (so, vp->exec->start);
+ so_ref(so, &vp->so);
+
+ upload_code = TRUE;
+ }
+
+ /* Allocate hw vtxprog const slots */
+ if (vp->nr_consts && !vp->data) {
+ struct nouveau_resource *heap = nv30->screen->vp_data_heap;
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) {
+ while (heap->next && heap->size < vp->nr_consts) {
+ struct nv30_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->data);
+ }
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data))
+ assert(0);
+ }
+
+ /*XXX: handle this some day */
+ assert(vp->data->start >= vp->data_start_min);
+
+ upload_data = TRUE;
+ if (vp->data_start != vp->data->start)
+ upload_code = TRUE;
+ }
+
+ /* If exec or data segments moved we need to patch the program to
+ * fixup offsets and register IDs.
+ */
+ if (vp->exec_start != vp->exec->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv30_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->has_branch_offset) {
+ assert(0);
+ }
+ }
+
+ vp->exec_start = vp->exec->start;
+ }
+
+ if (vp->nr_consts && vp->data_start != vp->data->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv30_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->const_index >= 0) {
+ vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK;
+ vpi->data[1] |=
+ (vpi->const_index + vp->data->start) <<
+ NV30_VP_INST_CONST_SRC_SHIFT;
+
+ }
+ }
+
+ vp->data_start = vp->data->start;
+ }
+
+ /* Update + Upload constant values */
+ if (vp->nr_consts) {
+ float *map = NULL;
+
+ if (constbuf) {
+ map = ws->buffer_map(ws, constbuf,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ }
+
+ for (i = 0; i < vp->nr_consts; i++) {
+ struct nv30_vertex_program_data *vpd = &vp->consts[i];
+
+ if (vpd->index >= 0) {
+ if (!upload_data &&
+ !memcmp(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float)))
+ continue;
+ memcpy(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float));
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5);
+ OUT_RING (i + vp->data->start);
+ OUT_RINGp ((uint32_t *)vpd->value, 4);
+ }
+
+ if (constbuf) {
+ ws->buffer_unmap(ws, constbuf);
+ }
+ }
+
+ /* Upload vtxprog */
+ if (upload_code) {
+#if 0
+ for (i = 0; i < vp->nr_insns; i++) {
+ NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+ i, vp->insns[i].data[0], vp->insns[i].data[1],
+ vp->insns[i].data[2], vp->insns[i].data[3]);
+ }
+#endif
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1);
+ OUT_RING (vp->exec->start);
+ for (i = 0; i < vp->nr_insns; i++) {
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4);
+ OUT_RINGp (vp->insns[i].data, 4);
+ }
+ }
+
+ if (vp->so != nv30->state.hw[NV30_STATE_VERTPROG]) {
+ so_ref(vp->so, &nv30->state.hw[NV30_STATE_VERTPROG]);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void
+nv30_vertprog_destroy(struct nv30_context *nv30, struct nv30_vertex_program *vp)
+{
+ struct nouveau_winsys *nvws = nv30->screen->nvws;
+
+ vp->translated = FALSE;
+
+ if (vp->nr_insns) {
+ FREE(vp->insns);
+ vp->insns = NULL;
+ vp->nr_insns = 0;
+ }
+
+ if (vp->nr_consts) {
+ FREE(vp->consts);
+ vp->consts = NULL;
+ vp->nr_consts = 0;
+ }
+
+ nvws->res_free(&vp->exec);
+ vp->exec_start = 0;
+ nvws->res_free(&vp->data);
+ vp->data_start = 0;
+ vp->data_start_min = 0;
+
+ vp->ir = vp->or = 0;
+ so_ref(NULL, &vp->so);
+}
+
+struct nv30_state_entry nv30_state_vertprog = {
+ .validate = nv30_vertprog_validate,
+ .dirty = {
+ .pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/,
+ .hw = NV30_STATE_VERTPROG,
+ }
+};
diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile
new file mode 100644
index 0000000000..9c8eadf7e4
--- /dev/null
+++ b/src/gallium/drivers/nv40/Makefile
@@ -0,0 +1,37 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv40
+
+DRIVER_SOURCES = \
+ nv40_clear.c \
+ nv40_context.c \
+ nv40_draw.c \
+ nv40_fragprog.c \
+ nv40_fragtex.c \
+ nv40_miptree.c \
+ nv40_query.c \
+ nv40_screen.c \
+ nv40_state.c \
+ nv40_state_blend.c \
+ nv40_state_emit.c \
+ nv40_state_fb.c \
+ nv40_state_rasterizer.c \
+ nv40_state_scissor.c \
+ nv40_state_stipple.c \
+ nv40_state_viewport.c \
+ nv40_state_zsa.c \
+ nv40_surface.c \
+ nv40_vbo.c \
+ nv40_vertprog.c
+
+C_SOURCES = \
+ $(COMMON_SOURCES) \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/drivers/nv40/nv40_clear.c b/src/gallium/drivers/nv40/nv40_clear.c
new file mode 100644
index 0000000000..59efd620e3
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_clear.c
@@ -0,0 +1,13 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv40_context.h"
+
+void
+nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+ ps->status = PIPE_SURFACE_STATUS_CLEAR;
+}
diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c
new file mode 100644
index 0000000000..cc63dd734b
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_context.c
@@ -0,0 +1,72 @@
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+
+#include "nv40_context.h"
+#include "nv40_screen.h"
+
+static void
+nv40_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
+ BEGIN_RING(curie, 0x1fd8, 1);
+ OUT_RING (2);
+ BEGIN_RING(curie, 0x1fd8, 1);
+ OUT_RING (1);
+ }
+
+ FIRE_RING(fence);
+}
+
+static void
+nv40_destroy(struct pipe_context *pipe)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ if (nv40->draw)
+ draw_destroy(nv40->draw);
+ FREE(nv40);
+}
+
+struct pipe_context *
+nv40_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct nv40_screen *screen = nv40_screen(pscreen);
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv40_context *nv40;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nv40 = CALLOC(1, sizeof(struct nv40_context));
+ if (!nv40)
+ return NULL;
+ nv40->screen = screen;
+ nv40->pctx_id = pctx_id;
+
+ nv40->nvws = nvws;
+
+ nv40->pipe.winsys = ws;
+ nv40->pipe.screen = pscreen;
+ nv40->pipe.destroy = nv40_destroy;
+ nv40->pipe.draw_arrays = nv40_draw_arrays;
+ nv40->pipe.draw_elements = nv40_draw_elements;
+ nv40->pipe.clear = nv40_clear;
+ nv40->pipe.flush = nv40_flush;
+
+ nv40_init_query_functions(nv40);
+ nv40_init_surface_functions(nv40);
+ nv40_init_state_functions(nv40);
+
+ /* Create, configure, and install fallback swtnl path */
+ nv40->draw = draw_create();
+ draw_wide_point_threshold(nv40->draw, 9999999.0);
+ draw_wide_line_threshold(nv40->draw, 9999999.0);
+ draw_enable_line_stipple(nv40->draw, FALSE);
+ draw_enable_point_sprites(nv40->draw, FALSE);
+ draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40));
+
+ return &nv40->pipe;
+}
+
diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
new file mode 100644
index 0000000000..adcfbdd85a
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_context.h
@@ -0,0 +1,233 @@
+#ifndef __NV40_CONTEXT_H__
+#define __NV40_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx) \
+ struct nv40_screen *ctx = nv40->screen
+#include "nouveau/nouveau_push.h"
+#include "nouveau/nouveau_stateobj.h"
+
+#include "nv40_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+enum nv40_state_index {
+ NV40_STATE_FB = 0,
+ NV40_STATE_VIEWPORT = 1,
+ NV40_STATE_BLEND = 2,
+ NV40_STATE_RAST = 3,
+ NV40_STATE_ZSA = 4,
+ NV40_STATE_BCOL = 5,
+ NV40_STATE_CLIP = 6,
+ NV40_STATE_SCISSOR = 7,
+ NV40_STATE_STIPPLE = 8,
+ NV40_STATE_FRAGPROG = 9,
+ NV40_STATE_VERTPROG = 10,
+ NV40_STATE_FRAGTEX0 = 11,
+ NV40_STATE_FRAGTEX1 = 12,
+ NV40_STATE_FRAGTEX2 = 13,
+ NV40_STATE_FRAGTEX3 = 14,
+ NV40_STATE_FRAGTEX4 = 15,
+ NV40_STATE_FRAGTEX5 = 16,
+ NV40_STATE_FRAGTEX6 = 17,
+ NV40_STATE_FRAGTEX7 = 18,
+ NV40_STATE_FRAGTEX8 = 19,
+ NV40_STATE_FRAGTEX9 = 20,
+ NV40_STATE_FRAGTEX10 = 21,
+ NV40_STATE_FRAGTEX11 = 22,
+ NV40_STATE_FRAGTEX12 = 23,
+ NV40_STATE_FRAGTEX13 = 24,
+ NV40_STATE_FRAGTEX14 = 25,
+ NV40_STATE_FRAGTEX15 = 26,
+ NV40_STATE_VERTTEX0 = 27,
+ NV40_STATE_VERTTEX1 = 28,
+ NV40_STATE_VERTTEX2 = 29,
+ NV40_STATE_VERTTEX3 = 30,
+ NV40_STATE_VTXBUF = 31,
+ NV40_STATE_VTXFMT = 32,
+ NV40_STATE_VTXATTR = 33,
+ NV40_STATE_MAX = 34
+};
+
+#include "nv40_screen.h"
+
+#define NV40_NEW_BLEND (1 << 0)
+#define NV40_NEW_RAST (1 << 1)
+#define NV40_NEW_ZSA (1 << 2)
+#define NV40_NEW_SAMPLER (1 << 3)
+#define NV40_NEW_FB (1 << 4)
+#define NV40_NEW_STIPPLE (1 << 5)
+#define NV40_NEW_SCISSOR (1 << 6)
+#define NV40_NEW_VIEWPORT (1 << 7)
+#define NV40_NEW_BCOL (1 << 8)
+#define NV40_NEW_VERTPROG (1 << 9)
+#define NV40_NEW_FRAGPROG (1 << 10)
+#define NV40_NEW_ARRAYS (1 << 11)
+#define NV40_NEW_UCP (1 << 12)
+
+struct nv40_rasterizer_state {
+ struct pipe_rasterizer_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv40_zsa_state {
+ struct pipe_depth_stencil_alpha_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv40_blend_state {
+ struct pipe_blend_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+
+struct nv40_state {
+ unsigned scissor_enabled;
+ unsigned stipple_enabled;
+ unsigned viewport_bypass;
+ unsigned fp_samplers;
+
+ uint64_t dirty;
+ struct nouveau_stateobj *hw[NV40_STATE_MAX];
+};
+
+struct nv40_context {
+ struct pipe_context pipe;
+
+ struct nouveau_winsys *nvws;
+ struct nv40_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ /* HW state derived from pipe states */
+ struct nv40_state state;
+ struct {
+ struct nv40_vertex_program *vertprog;
+
+ unsigned nr_attribs;
+ unsigned hw[PIPE_MAX_SHADER_INPUTS];
+ unsigned draw[PIPE_MAX_SHADER_INPUTS];
+ unsigned emit[PIPE_MAX_SHADER_INPUTS];
+ } swtnl;
+
+ enum {
+ HW, SWTNL, SWRAST
+ } render_mode;
+ unsigned fallback_swtnl;
+ unsigned fallback_swrast;
+
+ /* Context state */
+ unsigned dirty, draw_dirty;
+ struct pipe_scissor_state scissor;
+ unsigned stipple[32];
+ struct pipe_clip_state clip;
+ struct nv40_vertex_program *vertprog;
+ struct nv40_fragment_program *fragprog;
+ struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
+ unsigned constbuf_nr[PIPE_SHADER_TYPES];
+ struct nv40_rasterizer_state *rasterizer;
+ struct nv40_zsa_state *zsa;
+ struct nv40_blend_state *blend;
+ struct pipe_blend_color blend_colour;
+ struct pipe_viewport_state viewport;
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_buffer *idxbuf;
+ unsigned idxbuf_format;
+ struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
+ struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
+ unsigned nr_samplers;
+ unsigned nr_textures;
+ unsigned dirty_samplers;
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ unsigned vtxbuf_nr;
+ struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
+ unsigned vtxelt_nr;
+ const unsigned *edgeflags;
+};
+
+static INLINE struct nv40_context *
+nv40_context(struct pipe_context *pipe)
+{
+ return (struct nv40_context *)pipe;
+}
+
+struct nv40_state_entry {
+ boolean (*validate)(struct nv40_context *nv40);
+ struct {
+ unsigned pipe;
+ unsigned hw;
+ } dirty;
+};
+
+extern void nv40_init_state_functions(struct nv40_context *nv40);
+extern void nv40_init_surface_functions(struct nv40_context *nv40);
+extern void nv40_init_query_functions(struct nv40_context *nv40);
+
+extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen);
+
+/* nv40_draw.c */
+extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40);
+extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe,
+ struct pipe_buffer *idxbuf,
+ unsigned ib_size, unsigned mode,
+ unsigned start, unsigned count);
+
+/* nv40_vertprog.c */
+extern void nv40_vertprog_destroy(struct nv40_context *,
+ struct nv40_vertex_program *);
+
+/* nv40_fragprog.c */
+extern void nv40_fragprog_destroy(struct nv40_context *,
+ struct nv40_fragment_program *);
+
+/* nv40_fragtex.c */
+extern void nv40_fragtex_bind(struct nv40_context *);
+
+/* nv40_state.c and friends */
+extern boolean nv40_state_validate(struct nv40_context *nv40);
+extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40);
+extern void nv40_state_emit(struct nv40_context *nv40);
+extern struct nv40_state_entry nv40_state_rasterizer;
+extern struct nv40_state_entry nv40_state_scissor;
+extern struct nv40_state_entry nv40_state_stipple;
+extern struct nv40_state_entry nv40_state_fragprog;
+extern struct nv40_state_entry nv40_state_vertprog;
+extern struct nv40_state_entry nv40_state_blend;
+extern struct nv40_state_entry nv40_state_blend_colour;
+extern struct nv40_state_entry nv40_state_zsa;
+extern struct nv40_state_entry nv40_state_viewport;
+extern struct nv40_state_entry nv40_state_framebuffer;
+extern struct nv40_state_entry nv40_state_fragtex;
+extern struct nv40_state_entry nv40_state_vbo;
+extern struct nv40_state_entry nv40_state_vtxfmt;
+
+/* nv40_vbo.c */
+extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv40_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start,
+ unsigned count);
+
+/* nv40_clear.c */
+extern void nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+#endif
diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c
new file mode 100644
index 0000000000..8e56cdc2fe
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_draw.c
@@ -0,0 +1,349 @@
+#include "pipe/p_shader_tokens.h"
+
+#include "util/u_pack_color.h"
+
+#include "draw/draw_context.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pipe.h"
+
+#include "nv40_context.h"
+#define NV40_SHADER_NO_FUCKEDNESS
+#include "nv40_shader.h"
+
+/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
+ * often at all. Uses "quadro style" vertex submission + a fixed vertex
+ * layout to avoid the need to generate a vertex program or vtxfmt.
+ */
+
+struct nv40_render_stage {
+ struct draw_stage stage;
+ struct nv40_context *nv40;
+ unsigned prim;
+};
+
+static INLINE struct nv40_render_stage *
+nv40_render_stage(struct draw_stage *stage)
+{
+ return (struct nv40_render_stage *)stage;
+}
+
+static INLINE void
+nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v)
+{
+ unsigned i;
+
+ for (i = 0; i < nv40->swtnl.nr_attribs; i++) {
+ unsigned idx = nv40->swtnl.draw[i];
+ unsigned hw = nv40->swtnl.hw[i];
+
+ switch (nv40->swtnl.emit[i]) {
+ case EMIT_OMIT:
+ break;
+ case EMIT_1F:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1);
+ OUT_RING (fui(v->data[idx][0]));
+ break;
+ case EMIT_2F:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2);
+ OUT_RING (fui(v->data[idx][0]));
+ OUT_RING (fui(v->data[idx][1]));
+ break;
+ case EMIT_3F:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3);
+ OUT_RING (fui(v->data[idx][0]));
+ OUT_RING (fui(v->data[idx][1]));
+ OUT_RING (fui(v->data[idx][2]));
+ break;
+ case EMIT_4F:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4);
+ OUT_RING (fui(v->data[idx][0]));
+ OUT_RING (fui(v->data[idx][1]));
+ OUT_RING (fui(v->data[idx][2]));
+ OUT_RING (fui(v->data[idx][3]));
+ break;
+ case EMIT_4UB:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1);
+ OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]),
+ float_to_ubyte(v->data[idx][1]),
+ float_to_ubyte(v->data[idx][2]),
+ float_to_ubyte(v->data[idx][3])));
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+}
+
+static INLINE void
+nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
+ unsigned mode, unsigned count)
+{
+ struct nv40_render_stage *rs = nv40_render_stage(stage);
+ struct nv40_context *nv40 = rs->nv40;
+ struct nouveau_pushbuf *pb = nv40->nvws->channel->pushbuf;
+ unsigned i;
+
+ /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */
+ if (pb->remaining < ((count * 20) + 6)) {
+ if (rs->prim != NV40TCL_BEGIN_END_STOP) {
+ NOUVEAU_ERR("AIII, missed flush\n");
+ assert(0);
+ }
+ FIRE_RING(NULL);
+ nv40_state_emit(nv40);
+ }
+
+ /* Switch primitive modes if necessary */
+ if (rs->prim != mode) {
+ if (rs->prim != NV40TCL_BEGIN_END_STOP) {
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (NV40TCL_BEGIN_END_STOP);
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (mode);
+ rs->prim = mode;
+ }
+
+ /* Emit vertex data */
+ for (i = 0; i < count; i++)
+ nv40_render_vertex(nv40, prim->v[i]);
+
+ /* If it's likely we'll need to empty the push buffer soon, finish
+ * off the primitive now.
+ */
+ if (pb->remaining < ((count * 20) + 6)) {
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (NV40TCL_BEGIN_END_STOP);
+ rs->prim = NV40TCL_BEGIN_END_STOP;
+ }
+}
+
+static void
+nv40_render_point(struct draw_stage *draw, struct prim_header *prim)
+{
+ nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_POINTS, 1);
+}
+
+static void
+nv40_render_line(struct draw_stage *draw, struct prim_header *prim)
+{
+ nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_LINES, 2);
+}
+
+static void
+nv40_render_tri(struct draw_stage *draw, struct prim_header *prim)
+{
+ nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_TRIANGLES, 3);
+}
+
+static void
+nv40_render_flush(struct draw_stage *draw, unsigned flags)
+{
+ struct nv40_render_stage *rs = nv40_render_stage(draw);
+ struct nv40_context *nv40 = rs->nv40;
+
+ if (rs->prim != NV40TCL_BEGIN_END_STOP) {
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (NV40TCL_BEGIN_END_STOP);
+ rs->prim = NV40TCL_BEGIN_END_STOP;
+ }
+}
+
+static void
+nv40_render_reset_stipple_counter(struct draw_stage *draw)
+{
+}
+
+static void
+nv40_render_destroy(struct draw_stage *draw)
+{
+ FREE(draw);
+}
+
+static INLINE void
+emit_mov(struct nv40_vertex_program *vp,
+ unsigned dst, unsigned src, unsigned vor, unsigned mask)
+{
+ struct nv40_vertex_program_exec *inst;
+
+ vp->insns = realloc(vp->insns,
+ sizeof(struct nv40_vertex_program_exec) *
+ ++vp->nr_insns);
+ inst = &vp->insns[vp->nr_insns - 1];
+
+ inst->data[0] = 0x401f9c6c;
+ inst->data[1] = 0x0040000d | (src << 8);
+ inst->data[2] = 0x8106c083;
+ inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13);
+ inst->const_index = -1;
+ inst->has_branch_offset = FALSE;
+
+ vp->ir |= (1 << src);
+ if (vor != ~0)
+ vp->or |= (1 << vor);
+}
+
+static struct nv40_vertex_program *
+create_drawvp(struct nv40_context *nv40)
+{
+ struct nv40_vertex_program *vp = CALLOC_STRUCT(nv40_vertex_program);
+ unsigned i;
+
+ emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8);
+ for (i = 0; i < 8; i++)
+ emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf);
+
+ vp->insns[vp->nr_insns - 1].data[3] |= 1;
+ vp->translated = TRUE;
+ return vp;
+}
+
+struct draw_stage *
+nv40_draw_render_stage(struct nv40_context *nv40)
+{
+ struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage);
+
+ if (!nv40->swtnl.vertprog)
+ nv40->swtnl.vertprog = create_drawvp(nv40);
+
+ render->nv40 = nv40;
+ render->stage.draw = nv40->draw;
+ render->stage.point = nv40_render_point;
+ render->stage.line = nv40_render_line;
+ render->stage.tri = nv40_render_tri;
+ render->stage.flush = nv40_render_flush;
+ render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter;
+ render->stage.destroy = nv40_render_destroy;
+
+ return &render->stage;
+}
+
+boolean
+nv40_draw_elements_swtnl(struct pipe_context *pipe,
+ struct pipe_buffer *idxbuf, unsigned idxbuf_size,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+ unsigned i;
+ void *map;
+
+ if (!nv40_state_validate_swtnl(nv40))
+ return FALSE;
+ nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF);
+ nv40_state_emit(nv40);
+
+ for (i = 0; i < nv40->vtxbuf_nr; i++) {
+ map = ws->buffer_map(ws, nv40->vtxbuf[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(nv40->draw, i, map);
+ }
+
+ if (idxbuf) {
+ map = ws->buffer_map(ws, idxbuf, PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map);
+ } else {
+ draw_set_mapped_element_buffer(nv40->draw, 0, NULL);
+ }
+
+ if (nv40->constbuf[PIPE_SHADER_VERTEX]) {
+ const unsigned nr = nv40->constbuf_nr[PIPE_SHADER_VERTEX];
+
+ map = ws->buffer_map(ws, nv40->constbuf[PIPE_SHADER_VERTEX],
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_constant_buffer(nv40->draw, map, nr);
+ }
+
+ draw_arrays(nv40->draw, mode, start, count);
+
+ for (i = 0; i < nv40->vtxbuf_nr; i++)
+ ws->buffer_unmap(ws, nv40->vtxbuf[i].buffer);
+
+ if (idxbuf)
+ ws->buffer_unmap(ws, idxbuf);
+
+ if (nv40->constbuf[PIPE_SHADER_VERTEX])
+ ws->buffer_unmap(ws, nv40->constbuf[PIPE_SHADER_VERTEX]);
+
+ draw_flush(nv40->draw);
+ pipe->flush(pipe, 0, NULL);
+
+ return TRUE;
+}
+
+static INLINE void
+emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit,
+ unsigned semantic, unsigned index)
+{
+ unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index);
+ unsigned a = nv40->swtnl.nr_attribs++;
+
+ nv40->swtnl.hw[a] = hw;
+ nv40->swtnl.emit[a] = emit;
+ nv40->swtnl.draw[a] = draw_out;
+}
+
+static boolean
+nv40_state_vtxfmt_validate(struct nv40_context *nv40)
+{
+ struct nv40_fragment_program *fp = nv40->fragprog;
+ unsigned colour = 0, texcoords = 0, fog = 0, i;
+
+ /* Determine needed fragprog inputs */
+ for (i = 0; i < fp->info.num_inputs; i++) {
+ switch (fp->info.input_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ colour |= (1 << fp->info.input_semantic_index[i]);
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ texcoords |= (1 << fp->info.input_semantic_index[i]);
+ break;
+ case TGSI_SEMANTIC_FOG:
+ fog = 1;
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ nv40->swtnl.nr_attribs = 0;
+
+ /* Map draw vtxprog output to hw attribute IDs */
+ for (i = 0; i < 2; i++) {
+ if (!(colour & (1 << i)))
+ continue;
+ emit_attrib(nv40, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i);
+ }
+
+ for (i = 0; i < 8; i++) {
+ if (!(texcoords & (1 << i)))
+ continue;
+ emit_attrib(nv40, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i);
+ }
+
+ if (fog) {
+ emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0);
+ }
+
+ emit_attrib(nv40, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0);
+
+ return FALSE;
+}
+
+struct nv40_state_entry nv40_state_vtxfmt = {
+ .validate = nv40_state_vtxfmt_validate,
+ .dirty = {
+ .pipe = NV40_NEW_ARRAYS | NV40_NEW_FRAGPROG,
+ .hw = 0
+ }
+};
+
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
new file mode 100644
index 0000000000..91dcbebda0
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -0,0 +1,991 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv40_context.h"
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 1
+#define MASK_Y 2
+#define MASK_Z 4
+#define MASK_W 8
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X
+#define DEF_CTEST NV40_FP_OP_COND_TR
+#include "nv40_shader.h"
+
+#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv40_sr_neg((s))
+#define abs(s) nv40_sr_abs((s))
+#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v)
+
+#define MAX_CONSTS 128
+#define MAX_IMM 32
+struct nv40_fpc {
+ struct nv40_fragment_program *fp;
+
+ uint attrib_map[PIPE_MAX_SHADER_INPUTS];
+
+ unsigned r_temps;
+ unsigned r_temps_discard;
+ struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
+ struct nv40_sreg *r_temp;
+
+ int num_regs;
+
+ unsigned inst_offset;
+ unsigned have_const;
+
+ struct {
+ int pipe;
+ float vals[4];
+ } consts[MAX_CONSTS];
+ int nr_consts;
+
+ struct nv40_sreg imm[MAX_IMM];
+ unsigned nr_imm;
+};
+
+static INLINE struct nv40_sreg
+temp(struct nv40_fpc *fpc)
+{
+ int idx = ffs(~fpc->r_temps) - 1;
+
+ if (idx < 0) {
+ NOUVEAU_ERR("out of temps!!\n");
+ assert(0);
+ return nv40_sr(NV40SR_TEMP, 0);
+ }
+
+ fpc->r_temps |= (1 << idx);
+ fpc->r_temps_discard |= (1 << idx);
+ return nv40_sr(NV40SR_TEMP, idx);
+}
+
+static INLINE void
+release_temps(struct nv40_fpc *fpc)
+{
+ fpc->r_temps &= ~fpc->r_temps_discard;
+ fpc->r_temps_discard = 0;
+}
+
+static INLINE struct nv40_sreg
+constant(struct nv40_fpc *fpc, int pipe, float vals[4])
+{
+ int idx;
+
+ if (fpc->nr_consts == MAX_CONSTS)
+ assert(0);
+ idx = fpc->nr_consts++;
+
+ fpc->consts[idx].pipe = pipe;
+ if (pipe == -1)
+ memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
+ return nv40_sr(NV40SR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \
+ (d), (m), (s0), (s1), (s2))
+#define tex(cc,s,o,u,d,m,s0,s1,s2) \
+ nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \
+ (d), (m), (s0), none, none)
+
+static void
+grow_insns(struct nv40_fpc *fpc, int size)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+
+ fp->insn_len += size;
+ fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
+}
+
+static void
+emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+ uint32_t *hw = &fp->insn[fpc->inst_offset];
+ uint32_t sr = 0;
+
+ switch (src.type) {
+ case NV40SR_INPUT:
+ sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
+ hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT);
+ break;
+ case NV40SR_OUTPUT:
+ sr |= NV40_FP_REG_SRC_HALF;
+ /* fall-through */
+ case NV40SR_TEMP:
+ sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT);
+ sr |= (src.index << NV40_FP_REG_SRC_SHIFT);
+ break;
+ case NV40SR_CONST:
+ if (!fpc->have_const) {
+ grow_insns(fpc, 4);
+ fpc->have_const = 1;
+ }
+
+ hw = &fp->insn[fpc->inst_offset];
+ if (fpc->consts[src.index].pipe >= 0) {
+ struct nv40_fragment_program_data *fpd;
+
+ fp->consts = realloc(fp->consts, ++fp->nr_consts *
+ sizeof(*fpd));
+ fpd = &fp->consts[fp->nr_consts - 1];
+ fpd->offset = fpc->inst_offset + 4;
+ fpd->index = fpc->consts[src.index].pipe;
+ memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
+ } else {
+ memcpy(&fp->insn[fpc->inst_offset + 4],
+ fpc->consts[src.index].vals,
+ sizeof(uint32_t) * 4);
+ }
+
+ sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);
+ break;
+ case NV40SR_NONE:
+ sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (src.negate)
+ sr |= NV40_FP_REG_NEGATE;
+
+ if (src.abs)
+ hw[1] |= (1 << (29 + pos));
+
+ sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) |
+ (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) |
+ (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) |
+ (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT));
+
+ hw[pos + 1] |= sr;
+}
+
+static void
+emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+ uint32_t *hw = &fp->insn[fpc->inst_offset];
+
+ switch (dst.type) {
+ case NV40SR_TEMP:
+ if (fpc->num_regs < (dst.index + 1))
+ fpc->num_regs = dst.index + 1;
+ break;
+ case NV40SR_OUTPUT:
+ if (dst.index == 1) {
+ fp->fp_control |= 0xe;
+ } else {
+ hw[0] |= NV40_FP_OP_OUT_REG_HALF;
+ }
+ break;
+ case NV40SR_NONE:
+ hw[0] |= (1 << 30);
+ break;
+ default:
+ assert(0);
+ }
+
+ hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT);
+}
+
+static void
+nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op,
+ struct nv40_sreg dst, int mask,
+ struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+ uint32_t *hw;
+
+ fpc->inst_offset = fp->insn_len;
+ fpc->have_const = 0;
+ grow_insns(fpc, 4);
+ hw = &fp->insn[fpc->inst_offset];
+ memset(hw, 0, sizeof(uint32_t) * 4);
+
+ if (op == NV40_FP_OP_OPCODE_KIL)
+ fp->fp_control |= NV40TCL_FP_CONTROL_KIL;
+ hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT);
+ hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT);
+ hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT);
+
+ if (sat)
+ hw[0] |= NV40_FP_OP_OUT_SAT;
+
+ if (dst.cc_update)
+ hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE;
+ hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT);
+ hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) |
+ (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) |
+ (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) |
+ (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT));
+
+ emit_dst(fpc, dst);
+ emit_src(fpc, 0, s0);
+ emit_src(fpc, 1, s1);
+ emit_src(fpc, 2, s2);
+}
+
+static void
+nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit,
+ struct nv40_sreg dst, int mask,
+ struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+
+ nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
+
+ fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT);
+ fp->samplers |= (1 << unit);
+}
+
+static INLINE struct nv40_sreg
+tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)
+{
+ struct nv40_sreg src;
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ src = nv40_sr(NV40SR_INPUT,
+ fpc->attrib_map[fsrc->SrcRegister.Index]);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(fpc, fsrc->SrcRegister.Index, NULL);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ assert(fsrc->SrcRegister.Index < fpc->nr_imm);
+ src = fpc->imm[fsrc->SrcRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ src = fpc->r_temp[fsrc->SrcRegister.Index];
+ break;
+ /* NV40 fragprog result regs are just temps, so this is simple */
+ case TGSI_FILE_OUTPUT:
+ src = fpc->r_result[fsrc->SrcRegister.Index];
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->SrcRegisterExtMod.Absolute;
+ src.negate = fsrc->SrcRegister.Negate;
+ src.swz[0] = fsrc->SrcRegister.SwizzleX;
+ src.swz[1] = fsrc->SrcRegister.SwizzleY;
+ src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+ src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ return src;
+}
+
+static INLINE struct nv40_sreg
+tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
+ switch (fdst->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ return fpc->r_result[fdst->DstRegister.Index];
+ case TGSI_FILE_TEMPORARY:
+ return fpc->r_temp[fdst->DstRegister.Index];
+ case TGSI_FILE_NULL:
+ return nv40_sr(NV40SR_NONE, 0);
+ default:
+ NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File);
+ return nv40_sr(NV40SR_NONE, 0);
+ }
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+ return mask;
+}
+
+static boolean
+src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc,
+ struct nv40_sreg *src)
+{
+ const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+ struct nv40_sreg tgsi = tgsi_src(fpc, fsrc);
+ uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
+ uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
+ fsrc->SrcRegisterExtSwz.NegateY,
+ fsrc->SrcRegisterExtSwz.NegateZ,
+ fsrc->SrcRegisterExtSwz.NegateW };
+ uint c;
+
+ for (c = 0; c < 4; c++) {
+ switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ mask |= (1 << c);
+ break;
+ case TGSI_EXTSWIZZLE_ZERO:
+ zero_mask |= (1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ case TGSI_EXTSWIZZLE_ONE:
+ one_mask |= (1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (!tgsi.negate && neg[c])
+ neg_mask |= (1 << c);
+ }
+
+ if (mask == MASK_ALL && !neg_mask)
+ return TRUE;
+
+ *src = temp(fpc);
+
+ if (mask)
+ arith(fpc, 0, MOV, *src, mask, tgsi, none, none);
+
+ if (zero_mask)
+ arith(fpc, 0, SFL, *src, zero_mask, *src, none, none);
+
+ if (one_mask)
+ arith(fpc, 0, STR, *src, one_mask, *src, none, none);
+
+ if (neg_mask) {
+ struct nv40_sreg one = temp(fpc);
+ arith(fpc, 0, STR, one, neg_mask, one, none, none);
+ arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none);
+ }
+
+ return FALSE;
+}
+
+static boolean
+nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
+ const struct tgsi_full_instruction *finst)
+{
+ const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+ struct nv40_sreg src[3], dst, tmp;
+ int mask, sat, unit;
+ int ai = -1, ci = -1, ii = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(fpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_TEMPORARY:
+ if (!src_native_swz(fpc, fsrc, &src[i]))
+ continue;
+ break;
+ default:
+ break;
+ }
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+ ai = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_CONSTANT:
+ if ((ci == -1 && ii == -1) ||
+ ci == fsrc->SrcRegister.Index) {
+ ci = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ if ((ci == -1 && ii == -1) ||
+ ii == fsrc->SrcRegister.Index) {
+ ii = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ case TGSI_FILE_SAMPLER:
+ unit = fsrc->SrcRegister.Index;
+ break;
+ case TGSI_FILE_OUTPUT:
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]);
+ mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+ sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_CMP:
+ tmp = temp(fpc);
+ arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+ tmp.cc_update = 1;
+ arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+ dst.cc_test = NV40_VP_INST_COND_LT;
+ arith(fpc, sat, MOV, dst, mask, src[1], none, none);
+ break;
+ case TGSI_OPCODE_COS:
+ arith(fpc, sat, COS, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DDX:
+ if (mask & (MASK_Z | MASK_W)) {
+ tmp = temp(fpc);
+ arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y,
+ swz(src[0], Z, W, Z, W), none, none);
+ arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W,
+ swz(tmp, X, Y, X, Y), none, none);
+ arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, src[0],
+ none, none);
+ arith(fpc, 0, MOV, dst, mask, tmp, none, none);
+ } else {
+ arith(fpc, sat, DDX, dst, mask, src[0], none, none);
+ }
+ break;
+ case TGSI_OPCODE_DDY:
+ if (mask & (MASK_Z | MASK_W)) {
+ tmp = temp(fpc);
+ arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y,
+ swz(src[0], Z, W, Z, W), none, none);
+ arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W,
+ swz(tmp, X, Y, X, Y), none, none);
+ arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, src[0],
+ none, none);
+ arith(fpc, 0, MOV, dst, mask, tmp, none, none);
+ } else {
+ arith(fpc, sat, DDY, dst, mask, src[0], none, none);
+ }
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ tmp = temp(fpc);
+ arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none);
+ arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
+ swz(src[1], W, W, W, W), none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(fpc, sat, EX2, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(fpc, sat, FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(fpc, sat, FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_KILP:
+ arith(fpc, 0, KIL, none, 0, none, none, none);
+ break;
+ case TGSI_OPCODE_KIL:
+ dst = nv40_sr(NV40SR_NONE, 0);
+ dst.cc_update = 1;
+ arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
+ dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT;
+ arith(fpc, 0, KIL, dst, 0, none, none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(fpc, sat, LG2, dst, mask, src[0], none, none);
+ break;
+// case TGSI_OPCODE_LIT:
+ case TGSI_OPCODE_LRP:
+ tmp = temp(fpc);
+ arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
+ arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(fpc, sat, MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_NOISE1:
+ case TGSI_OPCODE_NOISE2:
+ case TGSI_OPCODE_NOISE3:
+ case TGSI_OPCODE_NOISE4:
+ arith(fpc, sat, SFL, dst, mask, none, none, none);
+ break;
+ case TGSI_OPCODE_POW:
+ tmp = temp(fpc);
+ arith(fpc, 0, LG2, tmp, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+ swz(src[1], X, X, X, X), none);
+ arith(fpc, sat, EX2, dst, mask,
+ swz(tmp, X, X, X, X), none, none);
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(fpc, sat, RCP, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_RET:
+ assert(0);
+ break;
+ case TGSI_OPCODE_RFL:
+ tmp = temp(fpc);
+ arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none);
+ arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none);
+ arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z,
+ swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
+ arith(fpc, sat, MAD, dst, mask,
+ swz(tmp, Z, Z, Z, Z), src[0], neg(src[1]));
+ break;
+ case TGSI_OPCODE_RSQ:
+ tmp = temp(fpc);
+ arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X,
+ abs(swz(src[0], X, X, X, X)), none, none);
+ arith(fpc, sat, EX2, dst, mask,
+ neg(swz(tmp, X, X, X, X)), none, none);
+ break;
+ case TGSI_OPCODE_SCS:
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ break;
+ case TGSI_OPCODE_SEQ:
+ arith(fpc, sat, SEQ, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SFL:
+ arith(fpc, sat, SFL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGT:
+ arith(fpc, sat, SGT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SIN:
+ arith(fpc, sat, SIN, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_SLE:
+ arith(fpc, sat, SLE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SNE:
+ arith(fpc, sat, SNE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_STR:
+ arith(fpc, sat, STR, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
+ break;
+ case TGSI_OPCODE_TEX:
+ tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_TXB:
+ tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_TXP:
+ tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(fpc);
+ arith(fpc, 0, MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(fpc, sat, MAD, dst, (mask & ~MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ release_temps(fpc);
+ return TRUE;
+}
+
+static boolean
+nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ int hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NV40_FP_OP_INPUT_SRC_POSITION;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV40_FP_OP_INPUT_SRC_COL0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV40_FP_OP_INPUT_SRC_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NV40_FP_OP_INPUT_SRC_FOGC;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.SemanticIndex <= 7) {
+ hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic.
+ SemanticIndex);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad input semantic\n");
+ return FALSE;
+ }
+
+ fpc->attrib_map[fdec->DeclarationRange.First] = hw;
+ return TRUE;
+}
+
+static boolean
+nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ unsigned idx = fdec->DeclarationRange.First;
+ unsigned hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = 1;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ switch (fdec->Semantic.SemanticIndex) {
+ case 0: hw = 0; break;
+ case 1: hw = 2; break;
+ case 2: hw = 3; break;
+ case 3: hw = 4; break;
+ default:
+ NOUVEAU_ERR("bad rcol index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ fpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw);
+ fpc->r_temps |= (1 << hw);
+ return TRUE;
+}
+
+static boolean
+nv40_fragprog_prepare(struct nv40_fpc *fpc)
+{
+ struct tgsi_parse_context p;
+ int high_temp = -1, i;
+
+ tgsi_parse_init(&p, fpc->fp->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch(tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+ fdec = &p.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_INPUT:
+ if (!nv40_fragprog_parse_decl_attrib(fpc, fdec))
+ goto out_err;
+ break;
+ case TGSI_FILE_OUTPUT:
+ if (!nv40_fragprog_parse_decl_output(fpc, fdec))
+ goto out_err;
+ break;
+ case TGSI_FILE_TEMPORARY:
+ if (fdec->DeclarationRange.Last > high_temp) {
+ high_temp =
+ fdec->DeclarationRange.Last;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ struct tgsi_full_immediate *imm;
+ float vals[4];
+
+ imm = &p.FullToken.FullImmediate;
+ assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+ assert(fpc->nr_imm < MAX_IMM);
+
+ vals[0] = imm->u.ImmediateFloat32[0].Float;
+ vals[1] = imm->u.ImmediateFloat32[1].Float;
+ vals[2] = imm->u.ImmediateFloat32[2].Float;
+ vals[3] = imm->u.ImmediateFloat32[3].Float;
+ fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ tgsi_parse_free(&p);
+
+ if (++high_temp) {
+ fpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg));
+ for (i = 0; i < high_temp; i++)
+ fpc->r_temp[i] = temp(fpc);
+ fpc->r_temps_discard = 0;
+ }
+
+ return TRUE;
+
+out_err:
+ if (fpc->r_temp)
+ FREE(fpc->r_temp);
+ tgsi_parse_free(&p);
+ return FALSE;
+}
+
+static void
+nv40_fragprog_translate(struct nv40_context *nv40,
+ struct nv40_fragment_program *fp)
+{
+ struct tgsi_parse_context parse;
+ struct nv40_fpc *fpc = NULL;
+
+ fpc = CALLOC(1, sizeof(struct nv40_fpc));
+ if (!fpc)
+ return;
+ fpc->fp = fp;
+ fpc->num_regs = 2;
+
+ if (!nv40_fragprog_prepare(fpc)) {
+ FREE(fpc);
+ return;
+ }
+
+ tgsi_parse_init(&parse, fp->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+
+ finst = &parse.FullToken.FullInstruction;
+ if (!nv40_fragprog_parse_instruction(fpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT;
+
+ /* Terminate final instruction */
+ fp->insn[fpc->inst_offset] |= 0x00000001;
+
+ /* Append NOP + END instruction, may or may not be necessary. */
+ fpc->inst_offset = fp->insn_len;
+ grow_insns(fpc, 4);
+ fp->insn[fpc->inst_offset + 0] = 0x00000001;
+ fp->insn[fpc->inst_offset + 1] = 0x00000000;
+ fp->insn[fpc->inst_offset + 2] = 0x00000000;
+ fp->insn[fpc->inst_offset + 3] = 0x00000000;
+
+ fp->translated = TRUE;
+out_err:
+ tgsi_parse_free(&parse);
+ if (fpc->r_temp)
+ FREE(fpc->r_temp);
+ FREE(fpc);
+}
+
+static void
+nv40_fragprog_upload(struct nv40_context *nv40,
+ struct nv40_fragment_program *fp)
+{
+ struct pipe_winsys *ws = nv40->pipe.winsys;
+ const uint32_t le = 1;
+ uint32_t *map;
+ int i;
+
+ map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
+
+#if 0
+ for (i = 0; i < fp->insn_len; i++) {
+ fflush(stdout); fflush(stderr);
+ NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]);
+ fflush(stdout); fflush(stderr);
+ }
+#endif
+
+ if ((*(const uint8_t *)&le)) {
+ for (i = 0; i < fp->insn_len; i++) {
+ map[i] = fp->insn[i];
+ }
+ } else {
+ /* Weird swapping for big-endian chips */
+ for (i = 0; i < fp->insn_len; i++) {
+ map[i] = ((fp->insn[i] & 0xffff) << 16) |
+ ((fp->insn[i] >> 16) & 0xffff);
+ }
+ }
+
+ ws->buffer_unmap(ws, fp->buffer);
+}
+
+static boolean
+nv40_fragprog_validate(struct nv40_context *nv40)
+{
+ struct nv40_fragment_program *fp = nv40->fragprog;
+ struct pipe_buffer *constbuf =
+ nv40->constbuf[PIPE_SHADER_FRAGMENT];
+ struct pipe_winsys *ws = nv40->pipe.winsys;
+ struct nouveau_stateobj *so;
+ boolean new_consts = FALSE;
+ int i;
+
+ if (fp->translated)
+ goto update_constants;
+
+ nv40->fallback_swrast &= ~NV40_NEW_FRAGPROG;
+ nv40_fragprog_translate(nv40, fp);
+ if (!fp->translated) {
+ nv40->fallback_swrast |= NV40_NEW_FRAGPROG;
+ return FALSE;
+ }
+
+ fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4);
+ nv40_fragprog_upload(nv40, fp);
+
+ so = so_new(4, 1);
+ so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1);
+ so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1);
+ so_method(so, nv40->screen->curie, NV40TCL_FP_CONTROL, 1);
+ so_data (so, fp->fp_control);
+ so_ref(so, &fp->so);
+
+update_constants:
+ if (fp->nr_consts) {
+ float *map;
+
+ map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ);
+ for (i = 0; i < fp->nr_consts; i++) {
+ struct nv40_fragment_program_data *fpd = &fp->consts[i];
+ uint32_t *p = &fp->insn[fpd->offset];
+ uint32_t *cb = (uint32_t *)&map[fpd->index * 4];
+
+ if (!memcmp(p, cb, 4 * sizeof(float)))
+ continue;
+ memcpy(p, cb, 4 * sizeof(float));
+ new_consts = TRUE;
+ }
+ ws->buffer_unmap(ws, constbuf);
+
+ if (new_consts)
+ nv40_fragprog_upload(nv40, fp);
+ }
+
+ if (new_consts || fp->so != nv40->state.hw[NV40_STATE_FRAGPROG]) {
+ so_ref(fp->so, &nv40->state.hw[NV40_STATE_FRAGPROG]);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void
+nv40_fragprog_destroy(struct nv40_context *nv40,
+ struct nv40_fragment_program *fp)
+{
+ if (fp->insn_len)
+ FREE(fp->insn);
+}
+
+struct nv40_state_entry nv40_state_fragprog = {
+ .validate = nv40_fragprog_validate,
+ .dirty = {
+ .pipe = NV40_NEW_FRAGPROG,
+ .hw = NV40_STATE_FRAGPROG
+ }
+};
+
diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c
new file mode 100644
index 0000000000..0227d22620
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_fragtex.c
@@ -0,0 +1,168 @@
+#include "nv40_context.h"
+
+#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \
+{ \
+ TRUE, \
+ PIPE_FORMAT_##m, \
+ NV40TCL_TEX_FORMAT_FORMAT_##tf, \
+ (NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y | \
+ NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w | \
+ NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y | \
+ NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w), \
+ ((NV40TCL_TEX_FILTER_SIGNED_RED*sx) | (NV40TCL_TEX_FILTER_SIGNED_GREEN*sy) | \
+ (NV40TCL_TEX_FILTER_SIGNED_BLUE*sz) | (NV40TCL_TEX_FILTER_SIGNED_ALPHA*sw)) \
+}
+
+struct nv40_texture_format {
+ boolean defined;
+ uint pipe;
+ int format;
+ int swizzle;
+ int sign;
+};
+
+static struct nv40_texture_format
+nv40_texture_formats[] = {
+ _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0),
+ _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
+ _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X, 0, 0, 0, 0),
+ _(R16_SNORM , A16 , ZERO, ZERO, S1, ONE, X, X, X, Y, 1, 1, 1, 1),
+ _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X, 0, 0, 0, 0),
+ _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y, 0, 0, 0, 0),
+ _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
+ _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
+ _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0),
+ _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ {},
+};
+
+static struct nv40_texture_format *
+nv40_fragtex_format(uint pipe_format)
+{
+ struct nv40_texture_format *tf = nv40_texture_formats;
+
+ while (tf->defined) {
+ if (tf->pipe == pipe_format)
+ return tf;
+ tf++;
+ }
+
+ NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format));
+ return NULL;
+}
+
+
+static struct nouveau_stateobj *
+nv40_fragtex_build(struct nv40_context *nv40, int unit)
+{
+ struct nv40_sampler_state *ps = nv40->tex_sampler[unit];
+ struct nv40_miptree *nv40mt = nv40->tex_miptree[unit];
+ struct pipe_texture *pt = &nv40mt->base;
+ struct nv40_texture_format *tf;
+ struct nouveau_stateobj *so;
+ uint32_t txf, txs, txp;
+ unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+ tf = nv40_fragtex_format(pt->format);
+ if (!tf)
+ assert(0);
+
+ txf = ps->fmt;
+ txf |= tf->format | 0x8000;
+ txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT);
+
+ if (1) /* XXX */
+ txf |= NV40TCL_TEX_FORMAT_NO_BORDER;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ txf |= NV40TCL_TEX_FORMAT_CUBIC;
+ /* fall-through */
+ case PIPE_TEXTURE_2D:
+ txf |= NV40TCL_TEX_FORMAT_DIMS_2D;
+ break;
+ case PIPE_TEXTURE_3D:
+ txf |= NV40TCL_TEX_FORMAT_DIMS_3D;
+ break;
+ case PIPE_TEXTURE_1D:
+ txf |= NV40TCL_TEX_FORMAT_DIMS_1D;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return NULL;
+ }
+
+ if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ txp = 0;
+ } else {
+ txp = nv40mt->level[0].pitch;
+ txf |= NV40TCL_TEX_FORMAT_LINEAR;
+ }
+
+ txs = tf->swizzle;
+
+ so = so_new(16, 2);
+ so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8);
+ so_reloc (so, nv40mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (so, nv40mt->buffer, txf, tex_flags | NOUVEAU_BO_OR,
+ NV40TCL_TEX_FORMAT_DMA0, NV40TCL_TEX_FORMAT_DMA1);
+ so_data (so, ps->wrap);
+ so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en);
+ so_data (so, txs);
+ so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/);
+ so_data (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) |
+ pt->height[0]);
+ so_data (so, ps->bcol);
+ so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1);
+ so_data (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp);
+
+ return so;
+}
+
+static boolean
+nv40_fragtex_validate(struct nv40_context *nv40)
+{
+ struct nv40_fragment_program *fp = nv40->fragprog;
+ struct nv40_state *state = &nv40->state;
+ struct nouveau_stateobj *so;
+ unsigned samplers, unit;
+
+ samplers = state->fp_samplers & ~fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ so = so_new(2, 0);
+ so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1);
+ so_data (so, 0);
+ so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]);
+ state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit));
+ }
+
+ samplers = nv40->dirty_samplers & fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ so = nv40_fragtex_build(nv40, unit);
+ so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]);
+ state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit));
+ }
+
+ nv40->state.fp_samplers = fp->samplers;
+ return FALSE;
+}
+
+struct nv40_state_entry nv40_state_fragtex = {
+ .validate = nv40_fragtex_validate,
+ .dirty = {
+ .pipe = NV40_NEW_SAMPLER | NV40_NEW_FRAGPROG,
+ .hw = 0
+ }
+};
+
diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c
new file mode 100644
index 0000000000..ba912ddcbb
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_miptree.c
@@ -0,0 +1,197 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv40_context.h"
+
+static void
+nv40_miptree_layout(struct nv40_miptree *mt)
+{
+ struct pipe_texture *pt = &mt->base;
+ uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
+ uint offset = 0;
+ int nr_faces, l, f, pitch;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ nr_faces = 6;
+ } else
+ if (pt->target == PIPE_TEXTURE_3D) {
+ nr_faces = pt->depth[0];
+ } else {
+ nr_faces = 1;
+ }
+
+ pitch = pt->width[0];
+ for (l = 0; l <= pt->last_level; l++) {
+ pt->width[l] = width;
+ pt->height[l] = height;
+ pt->depth[l] = depth;
+ pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+
+ if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
+ pitch = pt->nblocksx[l];
+ pitch = align(pitch, 64);
+
+ mt->level[l].pitch = pitch * pt->block.size;
+ mt->level[l].image_offset =
+ CALLOC(nr_faces, sizeof(unsigned));
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+ depth = MAX2(1, depth >> 1);
+ }
+
+ for (f = 0; f < nr_faces; f++) {
+ for (l = 0; l <= pt->last_level; l++) {
+ mt->level[l].image_offset[f] = offset;
+ offset += mt->level[l].pitch * pt->height[l];
+ }
+ }
+
+ mt->total_size = offset;
+}
+
+static struct pipe_texture *
+nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
+{
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv40_miptree *mt;
+ unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL |
+ NOUVEAU_BUFFER_USAGE_TEXTURE;
+
+ mt = MALLOC(sizeof(struct nv40_miptree));
+ if (!mt)
+ return NULL;
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = pscreen;
+ mt->shadow_tex = NULL;
+ mt->shadow_surface = NULL;
+
+ /* Swizzled textures must be POT */
+ if (pt->width[0] & (pt->width[0] - 1) ||
+ pt->height[0] & (pt->height[0] - 1))
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else
+ if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY |
+ PIPE_TEXTURE_USAGE_DISPLAY_TARGET))
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else {
+ switch (pt->format) {
+ /* TODO: Figure out which formats can be swizzled */
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ case PIPE_FORMAT_R16_SNORM:
+ break;
+ default:
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ }
+ }
+
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
+ buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+
+ nv40_miptree_layout(mt);
+
+ mt->buffer = ws->buffer_create(ws, 256, buf_usage, mt->total_size);
+ if (!mt->buffer) {
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static void
+nv40_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt)
+{
+ struct pipe_texture *pt = *ppt;
+ struct nv40_miptree *mt = (struct nv40_miptree *)pt;
+ int l;
+
+ *ppt = NULL;
+ if (--pt->refcount)
+ return;
+
+ pipe_buffer_reference(pscreen, &mt->buffer, NULL);
+ for (l = 0; l <= pt->last_level; l++) {
+ if (mt->level[l].image_offset)
+ FREE(mt->level[l].image_offset);
+ }
+
+ if (mt->shadow_tex) {
+ assert(mt->shadow_surface);
+ pscreen->tex_surface_release(pscreen, &mt->shadow_surface);
+ nv40_miptree_release(pscreen, &mt->shadow_tex);
+ }
+
+ FREE(mt);
+}
+
+static struct pipe_surface *
+nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct nv40_miptree *mt = (struct nv40_miptree *)pt;
+ struct pipe_surface *ps;
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (!ps)
+ return NULL;
+ pipe_texture_reference(&ps->texture, pt);
+ pipe_buffer_reference(pscreen, &ps->buffer, mt->buffer);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = mt->level[level].pitch;
+ ps->usage = flags;
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+ ps->refcount = 1;
+ ps->face = face;
+ ps->level = level;
+ ps->zslice = zslice;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ ps->offset = mt->level[level].image_offset[face];
+ } else
+ if (pt->target == PIPE_TEXTURE_3D) {
+ ps->offset = mt->level[level].image_offset[zslice];
+ } else {
+ ps->offset = mt->level[level].image_offset[0];
+ }
+
+ return ps;
+}
+
+static void
+nv40_miptree_surface_del(struct pipe_screen *pscreen,
+ struct pipe_surface **psurface)
+{
+ struct pipe_surface *ps = *psurface;
+
+ *psurface = NULL;
+ if (--ps->refcount > 0)
+ return;
+
+ pipe_texture_reference(&ps->texture, NULL);
+ pipe_buffer_reference(pscreen, &ps->buffer, NULL);
+ FREE(ps);
+}
+
+void
+nv40_screen_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv40_miptree_create;
+ pscreen->texture_release = nv40_miptree_release;
+ pscreen->get_tex_surface = nv40_miptree_surface_new;
+ pscreen->tex_surface_release = nv40_miptree_surface_del;
+}
+
diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c
new file mode 100644
index 0000000000..9b9a43f49d
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_query.c
@@ -0,0 +1,122 @@
+#include "pipe/p_context.h"
+
+#include "nv40_context.h"
+
+struct nv40_query {
+ struct nouveau_resource *object;
+ unsigned type;
+ boolean ready;
+ uint64_t result;
+};
+
+static INLINE struct nv40_query *
+nv40_query(struct pipe_query *pipe)
+{
+ return (struct nv40_query *)pipe;
+}
+
+static struct pipe_query *
+nv40_query_create(struct pipe_context *pipe, unsigned query_type)
+{
+ struct nv40_query *q;
+
+ q = CALLOC(1, sizeof(struct nv40_query));
+ q->type = query_type;
+
+ return (struct pipe_query *)q;
+}
+
+static void
+nv40_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_query *q = nv40_query(pq);
+
+ if (q->object)
+ nv40->nvws->res_free(&q->object);
+ FREE(q);
+}
+
+static void
+nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_query *q = nv40_query(pq);
+
+ assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+ /* Happens when end_query() is called, then another begin_query()
+ * without querying the result in-between. For now we'll wait for
+ * the existing query to notify completion, but it could be better.
+ */
+ if (q->object) {
+ uint64_t tmp;
+ pipe->get_query_result(pipe, pq, 1, &tmp);
+ }
+
+ if (nv40->nvws->res_alloc(nv40->screen->query_heap, 1, NULL, &q->object))
+ assert(0);
+ nv40->nvws->notifier_reset(nv40->screen->query, q->object->start);
+
+ BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1);
+ OUT_RING (1);
+ BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1);
+ OUT_RING (1);
+
+ q->ready = FALSE;
+}
+
+static void
+nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_query *q = nv40_query(pq);
+
+ BEGIN_RING(curie, NV40TCL_QUERY_GET, 1);
+ OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) |
+ ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT));
+ FIRE_RING(NULL);
+}
+
+static boolean
+nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, uint64_t *result)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_query *q = nv40_query(pq);
+ struct nouveau_winsys *nvws = nv40->nvws;
+
+ assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+ if (!q->ready) {
+ unsigned status;
+
+ status = nvws->notifier_status(nv40->screen->query,
+ q->object->start);
+ if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) {
+ if (wait == FALSE)
+ return FALSE;
+ nvws->notifier_wait(nv40->screen->query, q->object->start,
+ NV_NOTIFY_STATE_STATUS_COMPLETED,
+ 0);
+ }
+
+ q->result = nvws->notifier_retval(nv40->screen->query,
+ q->object->start);
+ q->ready = TRUE;
+ nvws->res_free(&q->object);
+ }
+
+ *result = q->result;
+ return TRUE;
+}
+
+void
+nv40_init_query_functions(struct nv40_context *nv40)
+{
+ nv40->pipe.create_query = nv40_query_create;
+ nv40->pipe.destroy_query = nv40_query_destroy;
+ nv40->pipe.begin_query = nv40_query_begin;
+ nv40->pipe.end_query = nv40_query_end;
+ nv40->pipe.get_query_result = nv40_query_result;
+}
diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c
new file mode 100644
index 0000000000..ab128fecda
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_screen.c
@@ -0,0 +1,369 @@
+#include "pipe/p_screen.h"
+
+#include "nv40_context.h"
+#include "nv40_screen.h"
+
+#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf
+#define NV4X_GRCLASS4497_CHIPSETS 0x00005450
+#define NV6X_GRCLASS4497_CHIPSETS 0x00000088
+
+static const char *
+nv40_screen_get_name(struct pipe_screen *pscreen)
+{
+ struct nv40_screen *screen = nv40_screen(pscreen);
+ struct nouveau_device *dev = screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv40_screen_get_vendor(struct pipe_screen *pscreen)
+{
+ return "nouveau";
+}
+
+static int
+nv40_screen_get_param(struct pipe_screen *pscreen, int param)
+{
+ struct nv40_screen *screen = nv40_screen(pscreen);
+
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 16;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 1;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 4;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 13;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 13;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ return 1;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 0; /* We have 4 - but unsupported currently */
+ case NOUVEAU_CAP_HW_VTXBUF:
+ return 1;
+ case NOUVEAU_CAP_HW_IDXBUF:
+ if (screen->curie->grclass == NV40TCL)
+ return 1;
+ return 0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv40_screen_get_paramf(struct pipe_screen *pscreen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 16.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 16.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static boolean
+nv40_screen_surface_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_R16_SNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ case PIPE_FORMAT_A8L8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT5_RGBA:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static void *
+nv40_surface_map(struct pipe_screen *screen, struct pipe_surface *surface,
+ unsigned flags )
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct pipe_surface *surface_to_map;
+ void *map;
+
+ if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture;
+
+ if (!mt->shadow_tex) {
+ unsigned old_tex_usage = surface->texture->tex_usage;
+ surface->texture->tex_usage = NOUVEAU_TEXTURE_USAGE_LINEAR |
+ PIPE_TEXTURE_USAGE_DYNAMIC;
+ mt->shadow_tex = screen->texture_create(screen, surface->texture);
+ surface->texture->tex_usage = old_tex_usage;
+
+ assert(mt->shadow_tex->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR);
+ }
+
+ mt->shadow_surface = screen->get_tex_surface
+ (
+ screen, mt->shadow_tex,
+ surface->face, surface->level, surface->zslice,
+ surface->usage
+ );
+
+ surface_to_map = mt->shadow_surface;
+ }
+ else
+ surface_to_map = surface;
+
+ assert(surface_to_map);
+
+ map = ws->buffer_map(ws, surface_to_map->buffer, flags);
+ if (!map)
+ return NULL;
+
+ return map + surface_to_map->offset;
+}
+
+static void
+nv40_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct pipe_surface *surface_to_unmap;
+
+ /* TODO: Copy from shadow just before push buffer is flushed instead.
+ There are probably some programs that map/unmap excessively
+ before rendering. */
+ if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture;
+
+ assert(mt->shadow_tex);
+
+ surface_to_unmap = mt->shadow_surface;
+ }
+ else
+ surface_to_unmap = surface;
+
+ assert(surface_to_unmap);
+
+ ws->buffer_unmap(ws, surface_to_unmap->buffer);
+
+ if (surface_to_unmap != surface) {
+ struct nv40_screen *nvscreen = nv40_screen(screen);
+
+ nvscreen->nvws->surface_copy(nvscreen->nvws,
+ surface, 0, 0,
+ surface_to_unmap, 0, 0,
+ surface->width, surface->height);
+ }
+}
+
+static void
+nv40_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nv40_screen *screen = nv40_screen(pscreen);
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nvws->res_free(&screen->vp_exec_heap);
+ nvws->res_free(&screen->vp_data_heap);
+ nvws->res_free(&screen->query_heap);
+ nvws->notifier_free(&screen->query);
+ nvws->notifier_free(&screen->sync);
+ nvws->grobj_free(&screen->curie);
+
+ FREE(pscreen);
+}
+
+struct pipe_screen *
+nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv40_screen *screen = CALLOC_STRUCT(nv40_screen);
+ struct nouveau_stateobj *so;
+ unsigned curie_class;
+ unsigned chipset = nvws->channel->device->chipset;
+ int ret;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ /* 3D object */
+ switch (chipset & 0xf0) {
+ case 0x40:
+ if (NV4X_GRCLASS4097_CHIPSETS & (1 << (chipset & 0x0f)))
+ curie_class = NV40TCL;
+ else
+ if (NV4X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f)))
+ curie_class = NV44TCL;
+ break;
+ case 0x60:
+ if (NV6X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f)))
+ curie_class = NV44TCL;
+ break;
+ default:
+ break;
+ }
+
+ if (!curie_class) {
+ NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", chipset);
+ return NULL;
+ }
+
+ ret = nvws->grobj_alloc(nvws, curie_class, &screen->curie);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ return FALSE;
+ }
+
+ /* Notifier for sync purposes */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv40_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Query objects */
+ ret = nvws->notifier_alloc(nvws, 32, &screen->query);
+ if (ret) {
+ NOUVEAU_ERR("Error initialising query objects: %d\n", ret);
+ nv40_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ ret = nvws->res_init(&screen->query_heap, 0, 32);
+ if (ret) {
+ NOUVEAU_ERR("Error initialising query object heap: %d\n", ret);
+ nv40_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Vtxprog resources */
+ if (nvws->res_init(&screen->vp_exec_heap, 0, 512) ||
+ nvws->res_init(&screen->vp_data_heap, 0, 256)) {
+ nv40_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Static curie initialisation */
+ so = so_new(128, 0);
+ so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1);
+ so_data (so, screen->sync->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->gart->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_COLOR1, 1);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_COLOR0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_VTXBUF0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->gart->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_FENCE, 2);
+ so_data (so, 0);
+ so_data (so, screen->query->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_UNK01AC, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_COLOR2, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->vram->handle);
+
+ so_method(so, screen->curie, 0x1ea4, 3);
+ so_data (so, 0x00000010);
+ so_data (so, 0x01000100);
+ so_data (so, 0xff800006);
+
+ /* vtxprog output routing */
+ so_method(so, screen->curie, 0x1fc4, 1);
+ so_data (so, 0x06144321);
+ so_method(so, screen->curie, 0x1fc8, 2);
+ so_data (so, 0xedcba987);
+ so_data (so, 0x00000021);
+ so_method(so, screen->curie, 0x1fd0, 1);
+ so_data (so, 0x00171615);
+ so_method(so, screen->curie, 0x1fd4, 1);
+ so_data (so, 0x001b1a19);
+
+ so_method(so, screen->curie, 0x1ef8, 1);
+ so_data (so, 0x0020ffff);
+ so_method(so, screen->curie, 0x1d64, 1);
+ so_data (so, 0x00d30000);
+ so_method(so, screen->curie, 0x1e94, 1);
+ so_data (so, 0x00000001);
+
+ so_emit(nvws, so);
+ so_ref(NULL, &so);
+ nvws->push_flush(nvws, 0, NULL);
+
+ screen->pipe.winsys = ws;
+ screen->pipe.destroy = nv40_screen_destroy;
+
+ screen->pipe.get_name = nv40_screen_get_name;
+ screen->pipe.get_vendor = nv40_screen_get_vendor;
+ screen->pipe.get_param = nv40_screen_get_param;
+ screen->pipe.get_paramf = nv40_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv40_screen_surface_format_supported;
+
+ screen->pipe.surface_map = nv40_surface_map;
+ screen->pipe.surface_unmap = nv40_surface_unmap;
+
+ nv40_screen_init_miptree_functions(&screen->pipe);
+
+ return &screen->pipe;
+}
+
diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nv40/nv40_screen.h
new file mode 100644
index 0000000000..c04a1275a0
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_screen.h
@@ -0,0 +1,35 @@
+#ifndef __NV40_SCREEN_H__
+#define __NV40_SCREEN_H__
+
+#include "pipe/p_screen.h"
+
+struct nv40_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+
+ unsigned cur_pctx;
+
+ /* HW graphics objects */
+ struct nouveau_grobj *curie;
+ struct nouveau_notifier *sync;
+
+ /* Query object resources */
+ struct nouveau_notifier *query;
+ struct nouveau_resource *query_heap;
+
+ /* Vtxprog resources */
+ struct nouveau_resource *vp_exec_heap;
+ struct nouveau_resource *vp_data_heap;
+
+ /* Current 3D state of channel */
+ struct nouveau_stateobj *state[NV40_STATE_MAX];
+};
+
+static INLINE struct nv40_screen *
+nv40_screen(struct pipe_screen *screen)
+{
+ return (struct nv40_screen *)screen;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h
new file mode 100644
index 0000000000..854dccf548
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_shader.h
@@ -0,0 +1,556 @@
+#ifndef __NV40_SHADER_H__
+#define __NV40_SHADER_H__
+
+/* Vertex programs instruction set
+ *
+ * The NV40 instruction set is very similar to NV30. Most fields are in
+ * a slightly different position in the instruction however.
+ *
+ * Merged instructions
+ * In some cases it is possible to put two instructions into one opcode
+ * slot. The rules for when this is OK is not entirely clear to me yet.
+ *
+ * There are separate writemasks and dest temp register fields for each
+ * grouping of instructions. There is however only one field with the
+ * ID of a result register. Writing to temp/result regs is selected by
+ * setting VEC_RESULT/SCA_RESULT.
+ *
+ * Temporary registers
+ * The source/dest temp register fields have been extended by 1 bit, to
+ * give a total of 32 temporary registers.
+ *
+ * Relative Addressing
+ * NV40 can use an address register to index into vertex attribute regs.
+ * This is done by putting the offset value into INPUT_SRC and setting
+ * the INDEX_INPUT flag.
+ *
+ * Conditional execution (see NV_vertex_program{2,3} for details)
+ * There is a second condition code register on NV40, it's use is enabled
+ * by setting the COND_REG_SELECT_1 flag.
+ *
+ * Texture lookup
+ * TODO
+ */
+
+/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */
+#define NV40_VP_INST_VEC_RESULT (1 << 30)
+/* uncertain.. */
+#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29)
+/* use address reg as index into attribs */
+#define NV40_VP_INST_INDEX_INPUT (1 << 27)
+#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25)
+#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
+#define NV40_VP_INST_SRC2_ABS (1 << 23)
+#define NV40_VP_INST_SRC1_ABS (1 << 22)
+#define NV40_VP_INST_SRC0_ABS (1 << 21)
+#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15
+#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15)
+#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13)
+#define NV40_VP_INST_COND_SHIFT 10
+#define NV40_VP_INST_COND_MASK (0x7 << 10)
+# define NV40_VP_INST_COND_FL 0
+# define NV40_VP_INST_COND_LT 1
+# define NV40_VP_INST_COND_EQ 2
+# define NV40_VP_INST_COND_LE 3
+# define NV40_VP_INST_COND_GT 4
+# define NV40_VP_INST_COND_NE 5
+# define NV40_VP_INST_COND_GE 6
+# define NV40_VP_INST_COND_TR 7
+#define NV40_VP_INST_COND_SWZ_X_SHIFT 8
+#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8)
+#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6
+#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6)
+#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4
+#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4)
+#define NV40_VP_INST_COND_SWZ_W_SHIFT 2
+#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2)
+#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2
+#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2)
+#define NV40_VP_INST_ADDR_SWZ_SHIFT 0
+#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0)
+#define NV40_VP_INST0_KNOWN ( \
+ NV40_VP_INST_INDEX_INPUT | \
+ NV40_VP_INST_COND_REG_SELECT_1 | \
+ NV40_VP_INST_ADDR_REG_SELECT_1 | \
+ NV40_VP_INST_SRC2_ABS | \
+ NV40_VP_INST_SRC1_ABS | \
+ NV40_VP_INST_SRC0_ABS | \
+ NV40_VP_INST_VEC_DEST_TEMP_MASK | \
+ NV40_VP_INST_COND_TEST_ENABLE | \
+ NV40_VP_INST_COND_MASK | \
+ NV40_VP_INST_COND_SWZ_ALL_MASK | \
+ NV40_VP_INST_ADDR_SWZ_MASK)
+
+/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */
+#define NV40_VP_INST_VEC_OPCODE_SHIFT 22
+#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22)
+# define NV40_VP_INST_OP_NOP 0x00
+# define NV40_VP_INST_OP_MOV 0x01
+# define NV40_VP_INST_OP_MUL 0x02
+# define NV40_VP_INST_OP_ADD 0x03
+# define NV40_VP_INST_OP_MAD 0x04
+# define NV40_VP_INST_OP_DP3 0x05
+# define NV40_VP_INST_OP_DPH 0x06
+# define NV40_VP_INST_OP_DP4 0x07
+# define NV40_VP_INST_OP_DST 0x08
+# define NV40_VP_INST_OP_MIN 0x09
+# define NV40_VP_INST_OP_MAX 0x0A
+# define NV40_VP_INST_OP_SLT 0x0B
+# define NV40_VP_INST_OP_SGE 0x0C
+# define NV40_VP_INST_OP_ARL 0x0D
+# define NV40_VP_INST_OP_FRC 0x0E
+# define NV40_VP_INST_OP_FLR 0x0F
+# define NV40_VP_INST_OP_SEQ 0x10
+# define NV40_VP_INST_OP_SFL 0x11
+# define NV40_VP_INST_OP_SGT 0x12
+# define NV40_VP_INST_OP_SLE 0x13
+# define NV40_VP_INST_OP_SNE 0x14
+# define NV40_VP_INST_OP_STR 0x15
+# define NV40_VP_INST_OP_SSG 0x16
+# define NV40_VP_INST_OP_ARR 0x17
+# define NV40_VP_INST_OP_ARA 0x18
+# define NV40_VP_INST_OP_TXL 0x19
+#define NV40_VP_INST_SCA_OPCODE_SHIFT 27
+#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27)
+# define NV40_VP_INST_OP_NOP 0x00
+# define NV40_VP_INST_OP_MOV 0x01
+# define NV40_VP_INST_OP_RCP 0x02
+# define NV40_VP_INST_OP_RCC 0x03
+# define NV40_VP_INST_OP_RSQ 0x04
+# define NV40_VP_INST_OP_EXP 0x05
+# define NV40_VP_INST_OP_LOG 0x06
+# define NV40_VP_INST_OP_LIT 0x07
+# define NV40_VP_INST_OP_BRA 0x09
+# define NV40_VP_INST_OP_CAL 0x0B
+# define NV40_VP_INST_OP_RET 0x0C
+# define NV40_VP_INST_OP_LG2 0x0D
+# define NV40_VP_INST_OP_EX2 0x0E
+# define NV40_VP_INST_OP_SIN 0x0F
+# define NV40_VP_INST_OP_COS 0x10
+# define NV40_VP_INST_OP_PUSHA 0x13
+# define NV40_VP_INST_OP_POPA 0x14
+#define NV40_VP_INST_CONST_SRC_SHIFT 12
+#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12)
+#define NV40_VP_INST_INPUT_SRC_SHIFT 8
+#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8)
+# define NV40_VP_INST_IN_POS 0
+# define NV40_VP_INST_IN_WEIGHT 1
+# define NV40_VP_INST_IN_NORMAL 2
+# define NV40_VP_INST_IN_COL0 3
+# define NV40_VP_INST_IN_COL1 4
+# define NV40_VP_INST_IN_FOGC 5
+# define NV40_VP_INST_IN_TC0 8
+# define NV40_VP_INST_IN_TC(n) (8+n)
+#define NV40_VP_INST_SRC0H_SHIFT 0
+#define NV40_VP_INST_SRC0H_MASK (0xFF << 0)
+#define NV40_VP_INST1_KNOWN ( \
+ NV40_VP_INST_VEC_OPCODE_MASK | \
+ NV40_VP_INST_SCA_OPCODE_MASK | \
+ NV40_VP_INST_CONST_SRC_MASK | \
+ NV40_VP_INST_INPUT_SRC_MASK | \
+ NV40_VP_INST_SRC0H_MASK \
+ )
+
+/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */
+#define NV40_VP_INST_SRC0L_SHIFT 23
+#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23)
+#define NV40_VP_INST_SRC1_SHIFT 6
+#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6)
+#define NV40_VP_INST_SRC2H_SHIFT 0
+#define NV40_VP_INST_SRC2H_MASK (0x3F << 0)
+#define NV40_VP_INST_IADDRH_SHIFT 0
+#define NV40_VP_INST_IADDRH_MASK (0x1F << 0)
+
+/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */
+#define NV40_VP_INST_IADDRL_SHIFT 29
+#define NV40_VP_INST_IADDRL_MASK (7 << 29)
+#define NV40_VP_INST_SRC2L_SHIFT 21
+#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21)
+#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17
+#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17)
+# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20)
+# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19)
+# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18)
+# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17)
+#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13
+#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13)
+# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16)
+# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15)
+# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14)
+# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13)
+#define NV40_VP_INST_SCA_RESULT (1 << 12)
+#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7
+#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7)
+#define NV40_VP_INST_DEST_SHIFT 2
+#define NV40_VP_INST_DEST_MASK (31 << 2)
+# define NV40_VP_INST_DEST_POS 0
+# define NV40_VP_INST_DEST_COL0 1
+# define NV40_VP_INST_DEST_COL1 2
+# define NV40_VP_INST_DEST_BFC0 3
+# define NV40_VP_INST_DEST_BFC1 4
+# define NV40_VP_INST_DEST_FOGC 5
+# define NV40_VP_INST_DEST_PSZ 6
+# define NV40_VP_INST_DEST_TC0 7
+# define NV40_VP_INST_DEST_TC(n) (7+n)
+# define NV40_VP_INST_DEST_TEMP 0x1F
+#define NV40_VP_INST_INDEX_CONST (1 << 1)
+#define NV40_VP_INST_LAST (1 << 0)
+#define NV40_VP_INST3_KNOWN ( \
+ NV40_VP_INST_SRC2L_MASK |\
+ NV40_VP_INST_SCA_WRITEMASK_MASK |\
+ NV40_VP_INST_VEC_WRITEMASK_MASK |\
+ NV40_VP_INST_SCA_DEST_TEMP_MASK |\
+ NV40_VP_INST_DEST_MASK |\
+ NV40_VP_INST_INDEX_CONST)
+
+/* Useful to split the source selection regs into their pieces */
+#define NV40_VP_SRC0_HIGH_SHIFT 9
+#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00
+#define NV40_VP_SRC0_LOW_MASK 0x000001FF
+#define NV40_VP_SRC2_HIGH_SHIFT 11
+#define NV40_VP_SRC2_HIGH_MASK 0x0001F800
+#define NV40_VP_SRC2_LOW_MASK 0x000007FF
+
+/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */
+#define NV40_VP_SRC_NEGATE (1 << 16)
+#define NV40_VP_SRC_SWZ_X_SHIFT 14
+#define NV40_VP_SRC_SWZ_X_MASK (3 << 14)
+#define NV40_VP_SRC_SWZ_Y_SHIFT 12
+#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12)
+#define NV40_VP_SRC_SWZ_Z_SHIFT 10
+#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10)
+#define NV40_VP_SRC_SWZ_W_SHIFT 8
+#define NV40_VP_SRC_SWZ_W_MASK (3 << 8)
+#define NV40_VP_SRC_SWZ_ALL_SHIFT 8
+#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8)
+#define NV40_VP_SRC_TEMP_SRC_SHIFT 2
+#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2)
+#define NV40_VP_SRC_REG_TYPE_SHIFT 0
+#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0)
+# define NV40_VP_SRC_REG_TYPE_UNK0 0
+# define NV40_VP_SRC_REG_TYPE_TEMP 1
+# define NV40_VP_SRC_REG_TYPE_INPUT 2
+# define NV40_VP_SRC_REG_TYPE_CONST 3
+
+
+/*
+ * Each fragment program opcode appears to be comprised of 4 32-bit values.
+ *
+ * 0 - Opcode, output reg/mask, ATTRIB source
+ * 1 - Source 0
+ * 2 - Source 1
+ * 3 - Source 2
+ *
+ * There appears to be no special difference between result regs and temp regs.
+ * result.color == R0.xyzw
+ * result.depth == R1.z
+ * When the fragprog contains instructions to write depth,
+ * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1.
+ *
+ * Constants are inserted directly after the instruction that uses them.
+ *
+ * It appears that it's not possible to use two input registers in one
+ * instruction as the input sourcing is done in the instruction dword
+ * and not the source selection dwords. As such instructions such as:
+ *
+ * ADD result.color, fragment.color, fragment.texcoord[0];
+ *
+ * must be split into two MOV's and then an ADD (nvidia does this) but
+ * I'm not sure why it's not just one MOV and then source the second input
+ * in the ADD instruction..
+ *
+ * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
+ * negation requires multiplication with a const.
+ *
+ * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and
+ * SWIZZLE_ONE.
+ *
+ * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as
+ * SWIZZLE_ZERO is implemented simply by not writing to the relevant components
+ * of the destination.
+ *
+ * Looping
+ * Loops appear to be fairly expensive on NV40 at least, the proprietary
+ * driver goes to a lot of effort to avoid using the native looping
+ * instructions. If the total number of *executed* instructions between
+ * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop.
+ * The maximum loop count is 255.
+ *
+ * Conditional execution
+ * TODO
+ *
+ * Non-native instructions:
+ * LIT
+ * LRP - MAD+MAD
+ * SUB - ADD, negate second source
+ * RSQ - LG2 + EX2
+ * POW - LG2 + MUL + EX2
+ * SCS - COS + SIN
+ * XPD
+ * DP2 - MUL + ADD
+ * NRM
+ */
+
+//== Opcode / Destination selection ==
+#define NV40_FP_OP_PROGRAM_END (1 << 0)
+#define NV40_FP_OP_OUT_REG_SHIFT 1
+#define NV40_FP_OP_OUT_REG_MASK (63 << 1)
+/* Needs to be set when writing outputs to get expected result.. */
+#define NV40_FP_OP_OUT_REG_HALF (1 << 7)
+#define NV40_FP_OP_COND_WRITE_ENABLE (1 << 8)
+#define NV40_FP_OP_OUTMASK_SHIFT 9
+#define NV40_FP_OP_OUTMASK_MASK (0xF << 9)
+# define NV40_FP_OP_OUT_X (1 << 9)
+# define NV40_FP_OP_OUT_Y (1 <<10)
+# define NV40_FP_OP_OUT_Z (1 <<11)
+# define NV40_FP_OP_OUT_W (1 <<12)
+/* Uncertain about these, especially the input_src values.. it's possible that
+ * they can be dynamically changed.
+ */
+#define NV40_FP_OP_INPUT_SRC_SHIFT 13
+#define NV40_FP_OP_INPUT_SRC_MASK (15 << 13)
+# define NV40_FP_OP_INPUT_SRC_POSITION 0x0
+# define NV40_FP_OP_INPUT_SRC_COL0 0x1
+# define NV40_FP_OP_INPUT_SRC_COL1 0x2
+# define NV40_FP_OP_INPUT_SRC_FOGC 0x3
+# define NV40_FP_OP_INPUT_SRC_TC0 0x4
+# define NV40_FP_OP_INPUT_SRC_TC(n) (0x4 + n)
+# define NV40_FP_OP_INPUT_SRC_FACING 0xE
+#define NV40_FP_OP_TEX_UNIT_SHIFT 17
+#define NV40_FP_OP_TEX_UNIT_MASK (0xF << 17)
+#define NV40_FP_OP_PRECISION_SHIFT 22
+#define NV40_FP_OP_PRECISION_MASK (3 << 22)
+# define NV40_FP_PRECISION_FP32 0
+# define NV40_FP_PRECISION_FP16 1
+# define NV40_FP_PRECISION_FX12 2
+#define NV40_FP_OP_OPCODE_SHIFT 24
+#define NV40_FP_OP_OPCODE_MASK (0x3F << 24)
+# define NV40_FP_OP_OPCODE_NOP 0x00
+# define NV40_FP_OP_OPCODE_MOV 0x01
+# define NV40_FP_OP_OPCODE_MUL 0x02
+# define NV40_FP_OP_OPCODE_ADD 0x03
+# define NV40_FP_OP_OPCODE_MAD 0x04
+# define NV40_FP_OP_OPCODE_DP3 0x05
+# define NV40_FP_OP_OPCODE_DP4 0x06
+# define NV40_FP_OP_OPCODE_DST 0x07
+# define NV40_FP_OP_OPCODE_MIN 0x08
+# define NV40_FP_OP_OPCODE_MAX 0x09
+# define NV40_FP_OP_OPCODE_SLT 0x0A
+# define NV40_FP_OP_OPCODE_SGE 0x0B
+# define NV40_FP_OP_OPCODE_SLE 0x0C
+# define NV40_FP_OP_OPCODE_SGT 0x0D
+# define NV40_FP_OP_OPCODE_SNE 0x0E
+# define NV40_FP_OP_OPCODE_SEQ 0x0F
+# define NV40_FP_OP_OPCODE_FRC 0x10
+# define NV40_FP_OP_OPCODE_FLR 0x11
+# define NV40_FP_OP_OPCODE_KIL 0x12
+# define NV40_FP_OP_OPCODE_PK4B 0x13
+# define NV40_FP_OP_OPCODE_UP4B 0x14
+/* DDX/DDY can only write to XY */
+# define NV40_FP_OP_OPCODE_DDX 0x15
+# define NV40_FP_OP_OPCODE_DDY 0x16
+# define NV40_FP_OP_OPCODE_TEX 0x17
+# define NV40_FP_OP_OPCODE_TXP 0x18
+# define NV40_FP_OP_OPCODE_TXD 0x19
+# define NV40_FP_OP_OPCODE_RCP 0x1A
+# define NV40_FP_OP_OPCODE_EX2 0x1C
+# define NV40_FP_OP_OPCODE_LG2 0x1D
+# define NV40_FP_OP_OPCODE_STR 0x20
+# define NV40_FP_OP_OPCODE_SFL 0x21
+# define NV40_FP_OP_OPCODE_COS 0x22
+# define NV40_FP_OP_OPCODE_SIN 0x23
+# define NV40_FP_OP_OPCODE_PK2H 0x24
+# define NV40_FP_OP_OPCODE_UP2H 0x25
+# define NV40_FP_OP_OPCODE_PK4UB 0x27
+# define NV40_FP_OP_OPCODE_UP4UB 0x28
+# define NV40_FP_OP_OPCODE_PK2US 0x29
+# define NV40_FP_OP_OPCODE_UP2US 0x2A
+# define NV40_FP_OP_OPCODE_DP2A 0x2E
+# define NV40_FP_OP_OPCODE_TXL 0x2F
+# define NV40_FP_OP_OPCODE_TXB 0x31
+# define NV40_FP_OP_OPCODE_DIV 0x3A
+# define NV40_FP_OP_OPCODE_UNK_LIT 0x3C
+/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/
+# define NV40_FP_OP_BRA_OPCODE_BRK 0x0
+# define NV40_FP_OP_BRA_OPCODE_CAL 0x1
+# define NV40_FP_OP_BRA_OPCODE_IF 0x2
+# define NV40_FP_OP_BRA_OPCODE_LOOP 0x3
+# define NV40_FP_OP_BRA_OPCODE_REP 0x4
+# define NV40_FP_OP_BRA_OPCODE_RET 0x5
+#define NV40_FP_OP_OUT_SAT (1 << 31)
+
+/* high order bits of SRC0 */
+#define NV40_FP_OP_OUT_ABS (1 << 29)
+#define NV40_FP_OP_COND_SWZ_W_SHIFT 27
+#define NV40_FP_OP_COND_SWZ_W_MASK (3 << 27)
+#define NV40_FP_OP_COND_SWZ_Z_SHIFT 25
+#define NV40_FP_OP_COND_SWZ_Z_MASK (3 << 25)
+#define NV40_FP_OP_COND_SWZ_Y_SHIFT 23
+#define NV40_FP_OP_COND_SWZ_Y_MASK (3 << 23)
+#define NV40_FP_OP_COND_SWZ_X_SHIFT 21
+#define NV40_FP_OP_COND_SWZ_X_MASK (3 << 21)
+#define NV40_FP_OP_COND_SWZ_ALL_SHIFT 21
+#define NV40_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21)
+#define NV40_FP_OP_COND_SHIFT 18
+#define NV40_FP_OP_COND_MASK (0x07 << 18)
+# define NV40_FP_OP_COND_FL 0
+# define NV40_FP_OP_COND_LT 1
+# define NV40_FP_OP_COND_EQ 2
+# define NV40_FP_OP_COND_LE 3
+# define NV40_FP_OP_COND_GT 4
+# define NV40_FP_OP_COND_NE 5
+# define NV40_FP_OP_COND_GE 6
+# define NV40_FP_OP_COND_TR 7
+
+/* high order bits of SRC1 */
+#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31)
+#define NV40_FP_OP_DST_SCALE_SHIFT 28
+#define NV40_FP_OP_DST_SCALE_MASK (3 << 28)
+#define NV40_FP_OP_DST_SCALE_1X 0
+#define NV40_FP_OP_DST_SCALE_2X 1
+#define NV40_FP_OP_DST_SCALE_4X 2
+#define NV40_FP_OP_DST_SCALE_8X 3
+#define NV40_FP_OP_DST_SCALE_INV_2X 5
+#define NV40_FP_OP_DST_SCALE_INV_4X 6
+#define NV40_FP_OP_DST_SCALE_INV_8X 7
+
+/* SRC1 LOOP */
+#define NV40_FP_OP_LOOP_INCR_SHIFT 19
+#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19)
+#define NV40_FP_OP_LOOP_INDEX_SHIFT 10
+#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10)
+#define NV40_FP_OP_LOOP_COUNT_SHIFT 2
+#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2)
+
+/* SRC1 IF */
+#define NV40_FP_OP_ELSE_ID_SHIFT 2
+#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2)
+
+/* SRC1 CAL */
+#define NV40_FP_OP_IADDR_SHIFT 2
+#define NV40_FP_OP_IADDR_MASK (0xFF << 2)
+
+/* SRC1 REP
+ * I have no idea why there are 3 count values here.. but they
+ * have always been filled with the same value in my tests so
+ * far..
+ */
+#define NV40_FP_OP_REP_COUNT1_SHIFT 2
+#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2)
+#define NV40_FP_OP_REP_COUNT2_SHIFT 10
+#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10)
+#define NV40_FP_OP_REP_COUNT3_SHIFT 19
+#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19)
+
+/* SRC2 REP/IF */
+#define NV40_FP_OP_END_ID_SHIFT 2
+#define NV40_FP_OP_END_ID_MASK (0xFF << 2)
+
+// SRC2 high-order
+#define NV40_FP_OP_INDEX_INPUT (1 << 30)
+#define NV40_FP_OP_ADDR_INDEX_SHIFT 19
+#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19)
+
+//== Register selection ==
+#define NV40_FP_REG_TYPE_SHIFT 0
+#define NV40_FP_REG_TYPE_MASK (3 << 0)
+# define NV40_FP_REG_TYPE_TEMP 0
+# define NV40_FP_REG_TYPE_INPUT 1
+# define NV40_FP_REG_TYPE_CONST 2
+#define NV40_FP_REG_SRC_SHIFT 2
+#define NV40_FP_REG_SRC_MASK (63 << 2)
+#define NV40_FP_REG_SRC_HALF (1 << 8)
+#define NV40_FP_REG_SWZ_ALL_SHIFT 9
+#define NV40_FP_REG_SWZ_ALL_MASK (255 << 9)
+#define NV40_FP_REG_SWZ_X_SHIFT 9
+#define NV40_FP_REG_SWZ_X_MASK (3 << 9)
+#define NV40_FP_REG_SWZ_Y_SHIFT 11
+#define NV40_FP_REG_SWZ_Y_MASK (3 << 11)
+#define NV40_FP_REG_SWZ_Z_SHIFT 13
+#define NV40_FP_REG_SWZ_Z_MASK (3 << 13)
+#define NV40_FP_REG_SWZ_W_SHIFT 15
+#define NV40_FP_REG_SWZ_W_MASK (3 << 15)
+# define NV40_FP_SWIZZLE_X 0
+# define NV40_FP_SWIZZLE_Y 1
+# define NV40_FP_SWIZZLE_Z 2
+# define NV40_FP_SWIZZLE_W 3
+#define NV40_FP_REG_NEGATE (1 << 17)
+
+#ifndef NV40_SHADER_NO_FUCKEDNESS
+#define NV40SR_NONE 0
+#define NV40SR_OUTPUT 1
+#define NV40SR_INPUT 2
+#define NV40SR_TEMP 3
+#define NV40SR_CONST 4
+
+struct nv40_sreg {
+ int type;
+ int index;
+
+ int dst_scale;
+
+ int negate;
+ int abs;
+ int swz[4];
+
+ int cc_update;
+ int cc_update_reg;
+ int cc_test;
+ int cc_test_reg;
+ int cc_swz[4];
+};
+
+static INLINE struct nv40_sreg
+nv40_sr(int type, int index)
+{
+ struct nv40_sreg temp = {
+ .type = type,
+ .index = index,
+ .dst_scale = DEF_SCALE,
+ .abs = 0,
+ .negate = 0,
+ .swz = { 0, 1, 2, 3 },
+ .cc_update = 0,
+ .cc_update_reg = 0,
+ .cc_test = DEF_CTEST,
+ .cc_test_reg = 0,
+ .cc_swz = { 0, 1, 2, 3 },
+ };
+ return temp;
+}
+
+static INLINE struct nv40_sreg
+nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w)
+{
+ struct nv40_sreg dst = src;
+
+ dst.swz[SWZ_X] = src.swz[x];
+ dst.swz[SWZ_Y] = src.swz[y];
+ dst.swz[SWZ_Z] = src.swz[z];
+ dst.swz[SWZ_W] = src.swz[w];
+ return dst;
+}
+
+static INLINE struct nv40_sreg
+nv40_sr_neg(struct nv40_sreg src)
+{
+ src.negate = !src.negate;
+ return src;
+}
+
+static INLINE struct nv40_sreg
+nv40_sr_abs(struct nv40_sreg src)
+{
+ src.abs = 1;
+ return src;
+}
+
+static INLINE struct nv40_sreg
+nv40_sr_scale(struct nv40_sreg src, int scale)
+{
+ src.dst_scale = scale;
+ return src;
+}
+#endif
+
+#endif
diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c
new file mode 100644
index 0000000000..255c4b294d
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state.c
@@ -0,0 +1,740 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "draw/draw_context.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv40_context.h"
+#include "nv40_state.h"
+
+static void *
+nv40_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nouveau_grobj *curie = nv40->screen->curie;
+ struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso));
+ struct nouveau_stateobj *so = so_new(16, 0);
+
+ if (cso->blend_enable) {
+ so_method(so, curie, NV40TCL_BLEND_ENABLE, 3);
+ so_data (so, 1);
+ so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) |
+ nvgl_blend_func(cso->rgb_src_factor));
+ so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 |
+ nvgl_blend_func(cso->rgb_dst_factor));
+ so_method(so, curie, NV40TCL_BLEND_EQUATION, 1);
+ so_data (so, nvgl_blend_eqn(cso->alpha_func) << 16 |
+ nvgl_blend_eqn(cso->rgb_func));
+ } else {
+ so_method(so, curie, NV40TCL_BLEND_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, curie, NV40TCL_COLOR_MASK, 1);
+ so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) |
+ ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) |
+ ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) |
+ ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0)));
+
+ if (cso->logicop_enable) {
+ so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2);
+ so_data (so, 1);
+ so_data (so, nvgl_logicop_func(cso->logicop_func));
+ } else {
+ so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, curie, NV40TCL_DITHER_ENABLE, 1);
+ so_data (so, cso->dither ? 1 : 0);
+
+ so_ref(so, &bso->so);
+ bso->pipe = *cso;
+ return (void *)bso;
+}
+
+static void
+nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->blend = hwcso;
+ nv40->dirty |= NV40_NEW_BLEND;
+}
+
+static void
+nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_blend_state *bso = hwcso;
+
+ so_ref(NULL, &bso->so);
+ FREE(bso);
+}
+
+
+static INLINE unsigned
+wrap_mode(unsigned wrap) {
+ unsigned ret;
+
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ ret = NV40TCL_TEX_WRAP_S_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ ret = NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_CLAMP:
+ ret = NV40TCL_TEX_WRAP_S_CLAMP;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP;
+ break;
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ ret = NV40TCL_TEX_WRAP_S_REPEAT;
+ break;
+ }
+
+ return ret >> NV40TCL_TEX_WRAP_S_SHIFT;
+}
+
+static void *
+nv40_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nv40_sampler_state *ps;
+ uint32_t filter = 0;
+
+ ps = MALLOC(sizeof(struct nv40_sampler_state));
+
+ ps->fmt = 0;
+ if (!cso->normalized_coords)
+ ps->fmt |= NV40TCL_TEX_FORMAT_RECT;
+
+ ps->wrap = ((wrap_mode(cso->wrap_s) << NV40TCL_TEX_WRAP_S_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV40TCL_TEX_WRAP_T_SHIFT) |
+ (wrap_mode(cso->wrap_r) << NV40TCL_TEX_WRAP_R_SHIFT));
+
+ ps->en = 0;
+ if (cso->max_anisotropy >= 2.0) {
+ /* no idea, binary driver sets it, works without it.. meh.. */
+ ps->wrap |= (1 << 5);
+
+ if (cso->max_anisotropy >= 16.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X;
+ } else
+ if (cso->max_anisotropy >= 12.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X;
+ } else
+ if (cso->max_anisotropy >= 10.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X;
+ } else
+ if (cso->max_anisotropy >= 8.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X;
+ } else
+ if (cso->max_anisotropy >= 6.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X;
+ } else
+ if (cso->max_anisotropy >= 4.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X;
+ } else {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X;
+ }
+ }
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ filter |= NV40TCL_TEX_FILTER_MAG_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ filter |= NV40TCL_TEX_FILTER_MAG_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV40TCL_TEX_FILTER_MIN_LINEAR;
+ break;
+ }
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV40TCL_TEX_FILTER_MIN_NEAREST;
+ break;
+ }
+ break;
+ }
+
+ ps->filt = filter;
+
+ {
+ float limit;
+
+ limit = CLAMP(cso->lod_bias, -16.0, 15.0);
+ ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;
+
+ limit = CLAMP(cso->max_lod, 0.0, 15.0);
+ ps->en |= (int)(limit * 256.0) << 7;
+
+ limit = CLAMP(cso->min_lod, 0.0, 15.0);
+ ps->en |= (int)(limit * 256.0) << 19;
+ }
+
+
+ if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ switch (cso->compare_func) {
+ case PIPE_FUNC_NEVER:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NEVER;
+ break;
+ case PIPE_FUNC_GREATER:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GREATER;
+ break;
+ case PIPE_FUNC_EQUAL:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_EQUAL;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GEQUAL;
+ break;
+ case PIPE_FUNC_LESS:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LESS;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL;
+ break;
+ case PIPE_FUNC_LEQUAL:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LEQUAL;
+ break;
+ case PIPE_FUNC_ALWAYS:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_ALWAYS;
+ break;
+ default:
+ break;
+ }
+ }
+
+ ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) |
+ (float_to_ubyte(cso->border_color[0]) << 16) |
+ (float_to_ubyte(cso->border_color[1]) << 8) |
+ (float_to_ubyte(cso->border_color[2]) << 0));
+
+ return (void *)ps;
+}
+
+static void
+nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv40->tex_sampler[unit] = sampler[unit];
+ nv40->dirty_samplers |= (1 << unit);
+ }
+
+ for (unit = nr; unit < nv40->nr_samplers; unit++) {
+ nv40->tex_sampler[unit] = NULL;
+ nv40->dirty_samplers |= (1 << unit);
+ }
+
+ nv40->nr_samplers = nr;
+ nv40->dirty |= NV40_NEW_SAMPLER;
+}
+
+static void
+nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nv40_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **miptree)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ pipe_texture_reference((struct pipe_texture **)
+ &nv40->tex_miptree[unit], miptree[unit]);
+ nv40->dirty_samplers |= (1 << unit);
+ }
+
+ for (unit = nr; unit < nv40->nr_textures; unit++) {
+ pipe_texture_reference((struct pipe_texture **)
+ &nv40->tex_miptree[unit], NULL);
+ nv40->dirty_samplers |= (1 << unit);
+ }
+
+ nv40->nr_textures = nr;
+ nv40->dirty |= NV40_NEW_SAMPLER;
+}
+
+static void *
+nv40_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso));
+ struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_grobj *curie = nv40->screen->curie;
+
+ /*XXX: ignored:
+ * light_twoside
+ * point_smooth -nohw
+ * multisample
+ */
+
+ so_method(so, curie, NV40TCL_SHADE_MODEL, 1);
+ so_data (so, cso->flatshade ? NV40TCL_SHADE_MODEL_FLAT :
+ NV40TCL_SHADE_MODEL_SMOOTH);
+
+ so_method(so, curie, NV40TCL_LINE_WIDTH, 2);
+ so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff);
+ so_data (so, cso->line_smooth ? 1 : 0);
+ so_method(so, curie, NV40TCL_LINE_STIPPLE_ENABLE, 2);
+ so_data (so, cso->line_stipple_enable ? 1 : 0);
+ so_data (so, (cso->line_stipple_pattern << 16) |
+ cso->line_stipple_factor);
+
+ so_method(so, curie, NV40TCL_POINT_SIZE, 1);
+ so_data (so, fui(cso->point_size));
+
+ so_method(so, curie, NV40TCL_POLYGON_MODE_FRONT, 6);
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV40TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV40TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV40TCL_CULL_FACE_BACK);
+ break;
+ }
+ so_data(so, NV40TCL_FRONT_FACE_CCW);
+ } else {
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV40TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV40TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV40TCL_CULL_FACE_BACK);
+ break;
+ }
+ so_data(so, NV40TCL_FRONT_FACE_CW);
+ }
+ so_data(so, cso->poly_smooth ? 1 : 0);
+ so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0);
+
+ so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, cso->poly_stipple_enable ? 1 : 0);
+
+ so_method(so, curie, NV40TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if (cso->offset_cw || cso->offset_ccw) {
+ so_method(so, curie, NV40TCL_POLYGON_OFFSET_FACTOR, 2);
+ so_data (so, fui(cso->offset_scale));
+ so_data (so, fui(cso->offset_units * 2));
+ }
+
+ so_method(so, curie, NV40TCL_POINT_SPRITE, 1);
+ if (cso->point_sprite) {
+ unsigned psctl = (1 << 0), i;
+
+ for (i = 0; i < 8; i++) {
+ if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE)
+ psctl |= (1 << (8 + i));
+ }
+
+ so_data(so, psctl);
+ } else {
+ so_data(so, 0);
+ }
+
+ so_ref(so, &rsso->so);
+ rsso->pipe = *cso;
+ return (void *)rsso;
+}
+
+static void
+nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->rasterizer = hwcso;
+ nv40->dirty |= NV40_NEW_RAST;
+ nv40->draw_dirty |= NV40_NEW_RAST;
+}
+
+static void
+nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_rasterizer_state *rsso = hwcso;
+
+ so_ref(NULL, &rsso->so);
+ FREE(rsso);
+}
+
+static void *
+nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
+ struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_grobj *curie = nv40->screen->curie;
+
+ so_method(so, curie, NV40TCL_DEPTH_FUNC, 3);
+ so_data (so, nvgl_comparison_op(cso->depth.func));
+ so_data (so, cso->depth.writemask ? 1 : 0);
+ so_data (so, cso->depth.enabled ? 1 : 0);
+
+ so_method(so, curie, NV40TCL_ALPHA_TEST_ENABLE, 3);
+ so_data (so, cso->alpha.enabled ? 1 : 0);
+ so_data (so, nvgl_comparison_op(cso->alpha.func));
+ so_data (so, float_to_ubyte(cso->alpha.ref));
+
+ if (cso->stencil[0].enabled) {
+ so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 8);
+ so_data (so, cso->stencil[0].enabled ? 1 : 0);
+ so_data (so, cso->stencil[0].write_mask);
+ so_data (so, nvgl_comparison_op(cso->stencil[0].func));
+ so_data (so, cso->stencil[0].ref_value);
+ so_data (so, cso->stencil[0].value_mask);
+ so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
+ } else {
+ so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ if (cso->stencil[1].enabled) {
+ so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 8);
+ so_data (so, cso->stencil[1].enabled ? 1 : 0);
+ so_data (so, cso->stencil[1].write_mask);
+ so_data (so, nvgl_comparison_op(cso->stencil[1].func));
+ so_data (so, cso->stencil[1].ref_value);
+ so_data (so, cso->stencil[1].value_mask);
+ so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
+ } else {
+ so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_ref(so, &zsaso->so);
+ zsaso->pipe = *cso;
+ return (void *)zsaso;
+}
+
+static void
+nv40_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->zsa = hwcso;
+ nv40->dirty |= NV40_NEW_ZSA;
+}
+
+static void
+nv40_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_zsa_state *zsaso = hwcso;
+
+ so_ref(NULL, &zsaso->so);
+ FREE(zsaso);
+}
+
+static void *
+nv40_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_vertex_program *vp;
+
+ vp = CALLOC(1, sizeof(struct nv40_vertex_program));
+ vp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+ vp->draw = draw_create_vertex_shader(nv40->draw, &vp->pipe);
+
+ return (void *)vp;
+}
+
+static void
+nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->vertprog = hwcso;
+ nv40->dirty |= NV40_NEW_VERTPROG;
+ nv40->draw_dirty |= NV40_NEW_VERTPROG;
+}
+
+static void
+nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_vertex_program *vp = hwcso;
+
+ draw_delete_vertex_shader(nv40->draw, vp->draw);
+ nv40_vertprog_destroy(nv40, vp);
+ FREE((void*)vp->pipe.tokens);
+ FREE(vp);
+}
+
+static void *
+nv40_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv40_fragment_program *fp;
+
+ fp = CALLOC(1, sizeof(struct nv40_fragment_program));
+ fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ tgsi_scan_shader(fp->pipe.tokens, &fp->info);
+
+ return (void *)fp;
+}
+
+static void
+nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->fragprog = hwcso;
+ nv40->dirty |= NV40_NEW_FRAGPROG;
+}
+
+static void
+nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_fragment_program *fp = hwcso;
+
+ nv40_fragprog_destroy(nv40, fp);
+ FREE((void*)fp->pipe.tokens);
+ FREE(fp);
+}
+
+static void
+nv40_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->blend_colour = *bcol;
+ nv40->dirty |= NV40_NEW_BCOL;
+}
+
+static void
+nv40_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->clip = *clip;
+ nv40->dirty |= NV40_NEW_UCP;
+ nv40->draw_dirty |= NV40_NEW_UCP;
+}
+
+static void
+nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->constbuf[shader] = buf->buffer;
+ nv40->constbuf_nr[shader] = buf->size / (4 * sizeof(float));
+
+ if (shader == PIPE_SHADER_VERTEX) {
+ nv40->dirty |= NV40_NEW_VERTPROG;
+ } else
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ nv40->dirty |= NV40_NEW_FRAGPROG;
+ }
+}
+
+static void
+nv40_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->framebuffer = *fb;
+ nv40->dirty |= NV40_NEW_FB;
+}
+
+static void
+nv40_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ memcpy(nv40->stipple, stipple->stipple, 4 * 32);
+ nv40->dirty |= NV40_NEW_STIPPLE;
+}
+
+static void
+nv40_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->scissor = *s;
+ nv40->dirty |= NV40_NEW_SCISSOR;
+}
+
+static void
+nv40_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->viewport = *vpt;
+ nv40->dirty |= NV40_NEW_VIEWPORT;
+ nv40->draw_dirty |= NV40_NEW_VIEWPORT;
+}
+
+static void
+nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ memcpy(nv40->vtxbuf, vb, sizeof(*vb) * count);
+ nv40->vtxbuf_nr = count;
+
+ nv40->dirty |= NV40_NEW_ARRAYS;
+ nv40->draw_dirty |= NV40_NEW_ARRAYS;
+}
+
+static void
+nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *ve)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ memcpy(nv40->vtxelt, ve, sizeof(*ve) * count);
+ nv40->vtxelt_nr = count;
+
+ nv40->dirty |= NV40_NEW_ARRAYS;
+ nv40->draw_dirty |= NV40_NEW_ARRAYS;
+}
+
+static void
+nv40_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->edgeflags = bitfield;
+ nv40->dirty |= NV40_NEW_ARRAYS;
+ nv40->draw_dirty |= NV40_NEW_ARRAYS;
+}
+
+void
+nv40_init_state_functions(struct nv40_context *nv40)
+{
+ nv40->pipe.create_blend_state = nv40_blend_state_create;
+ nv40->pipe.bind_blend_state = nv40_blend_state_bind;
+ nv40->pipe.delete_blend_state = nv40_blend_state_delete;
+
+ nv40->pipe.create_sampler_state = nv40_sampler_state_create;
+ nv40->pipe.bind_sampler_states = nv40_sampler_state_bind;
+ nv40->pipe.delete_sampler_state = nv40_sampler_state_delete;
+ nv40->pipe.set_sampler_textures = nv40_set_sampler_texture;
+
+ nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create;
+ nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind;
+ nv40->pipe.delete_rasterizer_state = nv40_rasterizer_state_delete;
+
+ nv40->pipe.create_depth_stencil_alpha_state =
+ nv40_depth_stencil_alpha_state_create;
+ nv40->pipe.bind_depth_stencil_alpha_state =
+ nv40_depth_stencil_alpha_state_bind;
+ nv40->pipe.delete_depth_stencil_alpha_state =
+ nv40_depth_stencil_alpha_state_delete;
+
+ nv40->pipe.create_vs_state = nv40_vp_state_create;
+ nv40->pipe.bind_vs_state = nv40_vp_state_bind;
+ nv40->pipe.delete_vs_state = nv40_vp_state_delete;
+
+ nv40->pipe.create_fs_state = nv40_fp_state_create;
+ nv40->pipe.bind_fs_state = nv40_fp_state_bind;
+ nv40->pipe.delete_fs_state = nv40_fp_state_delete;
+
+ nv40->pipe.set_blend_color = nv40_set_blend_color;
+ nv40->pipe.set_clip_state = nv40_set_clip_state;
+ nv40->pipe.set_constant_buffer = nv40_set_constant_buffer;
+ nv40->pipe.set_framebuffer_state = nv40_set_framebuffer_state;
+ nv40->pipe.set_polygon_stipple = nv40_set_polygon_stipple;
+ nv40->pipe.set_scissor_state = nv40_set_scissor_state;
+ nv40->pipe.set_viewport_state = nv40_set_viewport_state;
+
+ nv40->pipe.set_edgeflags = nv40_set_edgeflags;
+ nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers;
+ nv40->pipe.set_vertex_elements = nv40_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h
new file mode 100644
index 0000000000..9c55903ae3
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state.h
@@ -0,0 +1,91 @@
+#ifndef __NV40_STATE_H__
+#define __NV40_STATE_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv40_sampler_state {
+ uint32_t fmt;
+ uint32_t wrap;
+ uint32_t en;
+ uint32_t filt;
+ uint32_t bcol;
+};
+
+struct nv40_vertex_program_exec {
+ uint32_t data[4];
+ boolean has_branch_offset;
+ int const_index;
+};
+
+struct nv40_vertex_program_data {
+ int index; /* immediates == -1 */
+ float value[4];
+};
+
+struct nv40_vertex_program {
+ struct pipe_shader_state pipe;
+
+ struct draw_vertex_shader *draw;
+
+ boolean translated;
+
+ struct pipe_clip_state ucp;
+
+ struct nv40_vertex_program_exec *insns;
+ unsigned nr_insns;
+ struct nv40_vertex_program_data *consts;
+ unsigned nr_consts;
+
+ struct nouveau_resource *exec;
+ unsigned exec_start;
+ struct nouveau_resource *data;
+ unsigned data_start;
+ unsigned data_start_min;
+
+ uint32_t ir;
+ uint32_t or;
+ uint32_t clip_ctrl;
+ struct nouveau_stateobj *so;
+};
+
+struct nv40_fragment_program_data {
+ unsigned offset;
+ unsigned index;
+};
+
+struct nv40_fragment_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+
+ boolean translated;
+ unsigned samplers;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct nv40_fragment_program_data *consts;
+ unsigned nr_consts;
+
+ struct pipe_buffer *buffer;
+
+ uint32_t fp_control;
+ struct nouveau_stateobj *so;
+};
+
+struct nv40_miptree {
+ struct pipe_texture base;
+
+ struct pipe_buffer *buffer;
+ uint total_size;
+
+ struct pipe_texture *shadow_tex;
+ struct pipe_surface *shadow_surface;
+
+ struct {
+ uint pitch;
+ uint *image_offset;
+ } level[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+#endif
diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c
new file mode 100644
index 0000000000..95e6d7394f
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_blend.c
@@ -0,0 +1,40 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_blend_validate(struct nv40_context *nv40)
+{
+ so_ref(nv40->blend->so, &nv40->state.hw[NV40_STATE_BLEND]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_blend = {
+ .validate = nv40_state_blend_validate,
+ .dirty = {
+ .pipe = NV40_NEW_BLEND,
+ .hw = NV40_STATE_BLEND
+ }
+};
+
+static boolean
+nv40_state_blend_colour_validate(struct nv40_context *nv40)
+{
+ struct nouveau_stateobj *so = so_new(2, 0);
+ struct pipe_blend_color *bcol = &nv40->blend_colour;
+
+ so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1);
+ so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) |
+ (float_to_ubyte(bcol->color[0]) << 16) |
+ (float_to_ubyte(bcol->color[1]) << 8) |
+ (float_to_ubyte(bcol->color[2]) << 0)));
+
+ so_ref(so, &nv40->state.hw[NV40_STATE_BCOL]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_blend_colour = {
+ .validate = nv40_state_blend_colour_validate,
+ .dirty = {
+ .pipe = NV40_NEW_BCOL,
+ .hw = NV40_STATE_BCOL
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c
new file mode 100644
index 0000000000..52ec4c044b
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_emit.c
@@ -0,0 +1,184 @@
+#include "nv40_context.h"
+#include "nv40_state.h"
+#include "draw/draw_context.h"
+
+static struct nv40_state_entry *render_states[] = {
+ &nv40_state_framebuffer,
+ &nv40_state_rasterizer,
+ &nv40_state_scissor,
+ &nv40_state_stipple,
+ &nv40_state_fragprog,
+ &nv40_state_fragtex,
+ &nv40_state_vertprog,
+ &nv40_state_blend,
+ &nv40_state_blend_colour,
+ &nv40_state_zsa,
+ &nv40_state_viewport,
+ &nv40_state_vbo,
+ NULL
+};
+
+static struct nv40_state_entry *swtnl_states[] = {
+ &nv40_state_framebuffer,
+ &nv40_state_rasterizer,
+ &nv40_state_scissor,
+ &nv40_state_stipple,
+ &nv40_state_fragprog,
+ &nv40_state_fragtex,
+ &nv40_state_vertprog,
+ &nv40_state_blend,
+ &nv40_state_blend_colour,
+ &nv40_state_zsa,
+ &nv40_state_viewport,
+ &nv40_state_vtxfmt,
+ NULL
+};
+
+static void
+nv40_state_do_validate(struct nv40_context *nv40,
+ struct nv40_state_entry **states)
+{
+ const struct pipe_framebuffer_state *fb = &nv40->framebuffer;
+ unsigned i;
+
+ for (i = 0; i < fb->num_cbufs; i++)
+ fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED;
+ if (fb->zsbuf)
+ fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED;
+
+ while (*states) {
+ struct nv40_state_entry *e = *states;
+
+ if (nv40->dirty & e->dirty.pipe) {
+ if (e->validate(nv40))
+ nv40->state.dirty |= (1ULL << e->dirty.hw);
+ }
+
+ states++;
+ }
+ nv40->dirty = 0;
+}
+
+void
+nv40_state_emit(struct nv40_context *nv40)
+{
+ struct nv40_state *state = &nv40->state;
+ struct nv40_screen *screen = nv40->screen;
+ unsigned i, samplers;
+ uint64_t states;
+
+ if (nv40->pctx_id != screen->cur_pctx) {
+ for (i = 0; i < NV40_STATE_MAX; i++) {
+ if (state->hw[i] && screen->state[i] != state->hw[i])
+ state->dirty |= (1ULL << i);
+ }
+
+ screen->cur_pctx = nv40->pctx_id;
+ }
+
+ for (i = 0, states = state->dirty; states; i++) {
+ if (!(states & (1ULL << i)))
+ continue;
+ so_ref (state->hw[i], &nv40->screen->state[i]);
+ if (state->hw[i])
+ so_emit(nv40->nvws, nv40->screen->state[i]);
+ states &= ~(1ULL << i);
+ }
+
+ if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) |
+ (1ULL << NV40_STATE_FRAGTEX0))) {
+ BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
+ OUT_RING (2);
+ BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
+ OUT_RING (1);
+ }
+
+ state->dirty = 0;
+
+ so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FB]);
+ for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
+ if (!(samplers & (1 << i)))
+ continue;
+ so_emit_reloc_markers(nv40->nvws,
+ state->hw[NV40_STATE_FRAGTEX0+i]);
+ samplers &= ~(1ULL << i);
+ }
+ so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FRAGPROG]);
+ if (state->hw[NV40_STATE_VTXBUF] && nv40->render_mode == HW)
+ so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_VTXBUF]);
+}
+
+boolean
+nv40_state_validate(struct nv40_context *nv40)
+{
+ boolean was_sw = nv40->fallback_swtnl ? TRUE : FALSE;
+
+ if (nv40->render_mode != HW) {
+ /* Don't even bother trying to go back to hw if none
+ * of the states that caused swtnl previously have changed.
+ */
+ if ((nv40->fallback_swtnl & nv40->dirty)
+ != nv40->fallback_swtnl)
+ return FALSE;
+
+ /* Attempt to go to hwtnl again */
+ nv40->pipe.flush(&nv40->pipe, 0, NULL);
+ nv40->dirty |= (NV40_NEW_VIEWPORT |
+ NV40_NEW_VERTPROG |
+ NV40_NEW_ARRAYS);
+ nv40->render_mode = HW;
+ }
+
+ nv40_state_do_validate(nv40, render_states);
+ if (nv40->fallback_swtnl || nv40->fallback_swrast)
+ return FALSE;
+
+ if (was_sw)
+ NOUVEAU_ERR("swtnl->hw\n");
+
+ return TRUE;
+}
+
+boolean
+nv40_state_validate_swtnl(struct nv40_context *nv40)
+{
+ struct draw_context *draw = nv40->draw;
+
+ /* Setup for swtnl */
+ if (nv40->render_mode == HW) {
+ NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl);
+ nv40->pipe.flush(&nv40->pipe, 0, NULL);
+ nv40->dirty |= (NV40_NEW_VIEWPORT |
+ NV40_NEW_VERTPROG |
+ NV40_NEW_ARRAYS);
+ nv40->render_mode = SWTNL;
+ }
+
+ if (nv40->draw_dirty & NV40_NEW_VERTPROG)
+ draw_bind_vertex_shader(draw, nv40->vertprog->draw);
+
+ if (nv40->draw_dirty & NV40_NEW_RAST)
+ draw_set_rasterizer_state(draw, &nv40->rasterizer->pipe);
+
+ if (nv40->draw_dirty & NV40_NEW_UCP)
+ draw_set_clip_state(draw, &nv40->clip);
+
+ if (nv40->draw_dirty & NV40_NEW_VIEWPORT)
+ draw_set_viewport_state(draw, &nv40->viewport);
+
+ if (nv40->draw_dirty & NV40_NEW_ARRAYS) {
+ draw_set_edgeflags(draw, nv40->edgeflags);
+ draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf);
+ draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt);
+ }
+
+ nv40_state_do_validate(nv40, swtnl_states);
+ if (nv40->fallback_swrast) {
+ NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nv40->fallback_swrast);
+ return FALSE;
+ }
+
+ nv40->draw_dirty = 0;
+ return TRUE;
+}
+
diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c
new file mode 100644
index 0000000000..28592d71c3
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_fb.c
@@ -0,0 +1,155 @@
+#include "nv40_context.h"
+#include "nouveau/nouveau_util.h"
+
+static boolean
+nv40_state_framebuffer_validate(struct nv40_context *nv40)
+{
+ struct pipe_framebuffer_state *fb = &nv40->framebuffer;
+ struct pipe_surface *rt[4], *zeta;
+ uint32_t rt_enable, rt_format;
+ int i, colour_format = 0, zeta_format = 0;
+ struct nouveau_stateobj *so = so_new(64, 10);
+ unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
+ unsigned w = fb->width;
+ unsigned h = fb->height;
+
+ rt_enable = 0;
+ for (i = 0; i < fb->num_cbufs; i++) {
+ if (colour_format) {
+ assert(colour_format == fb->cbufs[i]->format);
+ } else {
+ colour_format = fb->cbufs[i]->format;
+ rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i);
+ rt[i] = fb->cbufs[i];
+ }
+ }
+
+ if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 |
+ NV40TCL_RT_ENABLE_COLOR3))
+ rt_enable |= NV40TCL_RT_ENABLE_MRT;
+
+ if (fb->zsbuf) {
+ zeta_format = fb->zsbuf->format;
+ zeta = fb->zsbuf;
+ }
+
+ if (!(rt[0]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
+ for (i = 1; i < fb->num_cbufs; i++)
+ assert(!(rt[i]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR));
+
+ rt_format = NV40TCL_RT_FORMAT_TYPE_SWIZZLED |
+ log2i(fb->width) << NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT |
+ log2i(fb->height) << NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT;
+ }
+ else
+ rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR;
+
+ switch (colour_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case 0:
+ rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8;
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ rt_format |= NV40TCL_RT_FORMAT_COLOR_R5G6B5;
+ break;
+ default:
+ assert(0);
+ }
+
+ switch (zeta_format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ rt_format |= NV40TCL_RT_FORMAT_ZETA_Z16;
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case 0:
+ rt_format |= NV40TCL_RT_FORMAT_ZETA_Z24S8;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) {
+ so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR0, 1);
+ so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv40->nvws->channel->vram->handle,
+ nv40->nvws->channel->gart->handle);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR0_PITCH, 2);
+ so_data (so, rt[0]->stride);
+ so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ }
+
+ if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) {
+ so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR1, 1);
+ so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv40->nvws->channel->vram->handle,
+ nv40->nvws->channel->gart->handle);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR1_OFFSET, 2);
+ so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, rt[1]->stride);
+ }
+
+ if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
+ so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR2, 1);
+ so_reloc (so, rt[2]->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv40->nvws->channel->vram->handle,
+ nv40->nvws->channel->gart->handle);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR2_OFFSET, 1);
+ so_reloc (so, rt[2]->buffer, rt[2]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR2_PITCH, 1);
+ so_data (so, rt[2]->stride);
+ }
+
+ if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
+ so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR3, 1);
+ so_reloc (so, rt[3]->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv40->nvws->channel->vram->handle,
+ nv40->nvws->channel->gart->handle);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR3_OFFSET, 1);
+ so_reloc (so, rt[3]->buffer, rt[3]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR3_PITCH, 1);
+ so_data (so, rt[3]->stride);
+ }
+
+ if (zeta_format) {
+ so_method(so, nv40->screen->curie, NV40TCL_DMA_ZETA, 1);
+ so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv40->nvws->channel->vram->handle,
+ nv40->nvws->channel->gart->handle);
+ so_method(so, nv40->screen->curie, NV40TCL_ZETA_OFFSET, 1);
+ so_reloc (so, zeta->buffer, zeta->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, nv40->screen->curie, NV40TCL_ZETA_PITCH, 1);
+ so_data (so, zeta->stride);
+ }
+
+ so_method(so, nv40->screen->curie, NV40TCL_RT_ENABLE, 1);
+ so_data (so, rt_enable);
+ so_method(so, nv40->screen->curie, NV40TCL_RT_HORIZ, 3);
+ so_data (so, (w << 16) | 0);
+ so_data (so, (h << 16) | 0);
+ so_data (so, rt_format);
+ so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_HORIZ, 2);
+ so_data (so, (w << 16) | 0);
+ so_data (so, (h << 16) | 0);
+ so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+ so_data (so, ((w - 1) << 16) | 0);
+ so_data (so, ((h - 1) << 16) | 0);
+ so_method(so, nv40->screen->curie, 0x1d88, 1);
+ so_data (so, (1 << 12) | h);
+
+ so_ref(so, &nv40->state.hw[NV40_STATE_FB]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_framebuffer = {
+ .validate = nv40_state_framebuffer_validate,
+ .dirty = {
+ .pipe = NV40_NEW_FB,
+ .hw = NV40_STATE_FB
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_rasterizer.c b/src/gallium/drivers/nv40/nv40_state_rasterizer.c
new file mode 100644
index 0000000000..9ecda5990f
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_rasterizer.c
@@ -0,0 +1,17 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_rasterizer_validate(struct nv40_context *nv40)
+{
+ so_ref(nv40->rasterizer->so,
+ &nv40->state.hw[NV40_STATE_RAST]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_rasterizer = {
+ .validate = nv40_state_rasterizer_validate,
+ .dirty = {
+ .pipe = NV40_NEW_RAST,
+ .hw = NV40_STATE_RAST
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c
new file mode 100644
index 0000000000..285239ef41
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_scissor.c
@@ -0,0 +1,35 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_scissor_validate(struct nv40_context *nv40)
+{
+ struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe;
+ struct pipe_scissor_state *s = &nv40->scissor;
+ struct nouveau_stateobj *so;
+
+ if (nv40->state.hw[NV40_STATE_SCISSOR] &&
+ (rast->scissor == 0 && nv40->state.scissor_enabled == 0))
+ return FALSE;
+ nv40->state.scissor_enabled = rast->scissor;
+
+ so = so_new(3, 0);
+ so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2);
+ if (nv40->state.scissor_enabled) {
+ so_data (so, ((s->maxx - s->minx) << 16) | s->minx);
+ so_data (so, ((s->maxy - s->miny) << 16) | s->miny);
+ } else {
+ so_data (so, 4096 << 16);
+ so_data (so, 4096 << 16);
+ }
+
+ so_ref(so, &nv40->state.hw[NV40_STATE_SCISSOR]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_scissor = {
+ .validate = nv40_state_scissor_validate,
+ .dirty = {
+ .pipe = NV40_NEW_SCISSOR | NV40_NEW_RAST,
+ .hw = NV40_STATE_SCISSOR
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c
new file mode 100644
index 0000000000..b51024ad9b
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_stipple.c
@@ -0,0 +1,39 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_stipple_validate(struct nv40_context *nv40)
+{
+ struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe;
+ struct nouveau_grobj *curie = nv40->screen->curie;
+ struct nouveau_stateobj *so;
+
+ if (nv40->state.hw[NV40_STATE_STIPPLE] &&
+ (rast->poly_stipple_enable == 0 && nv40->state.stipple_enabled == 0))
+ return FALSE;
+
+ if (rast->poly_stipple_enable) {
+ unsigned i;
+
+ so = so_new(35, 0);
+ so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, 1);
+ so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32);
+ for (i = 0; i < 32; i++)
+ so_data(so, nv40->stipple[i]);
+ } else {
+ so = so_new(2, 0);
+ so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_ref(so, &nv40->state.hw[NV40_STATE_STIPPLE]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_stipple = {
+ .validate = nv40_state_stipple_validate,
+ .dirty = {
+ .pipe = NV40_NEW_STIPPLE | NV40_NEW_RAST,
+ .hw = NV40_STATE_STIPPLE,
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c
new file mode 100644
index 0000000000..869a55b405
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_viewport.c
@@ -0,0 +1,67 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_viewport_validate(struct nv40_context *nv40)
+{
+ struct pipe_viewport_state *vpt = &nv40->viewport;
+ struct nouveau_stateobj *so;
+ unsigned bypass;
+
+ if (nv40->render_mode == HW && !nv40->rasterizer->pipe.bypass_clipping)
+ bypass = 0;
+ else
+ bypass = 1;
+
+ if (nv40->state.hw[NV40_STATE_VIEWPORT] &&
+ (bypass || !(nv40->dirty & NV40_NEW_VIEWPORT)) &&
+ nv40->state.viewport_bypass == bypass)
+ return FALSE;
+ nv40->state.viewport_bypass = bypass;
+
+ so = so_new(11, 0);
+ if (!bypass) {
+ so_method(so, nv40->screen->curie,
+ NV40TCL_VIEWPORT_TRANSLATE_X, 8);
+ so_data (so, fui(vpt->translate[0]));
+ so_data (so, fui(vpt->translate[1]));
+ so_data (so, fui(vpt->translate[2]));
+ so_data (so, fui(vpt->translate[3]));
+ so_data (so, fui(vpt->scale[0]));
+ so_data (so, fui(vpt->scale[1]));
+ so_data (so, fui(vpt->scale[2]));
+ so_data (so, fui(vpt->scale[3]));
+ so_method(so, nv40->screen->curie, 0x1d78, 1);
+ so_data (so, 1);
+ } else {
+ so_method(so, nv40->screen->curie,
+ NV40TCL_VIEWPORT_TRANSLATE_X, 8);
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(0.0));
+ /* Not entirely certain what this is yet. The DDX uses this
+ * value also as it fixes rendering when you pass
+ * pre-transformed vertices to the GPU. My best gusss is that
+ * this bypasses some culling/clipping stage. Might be worth
+ * noting that points/lines are uneffected by whatever this
+ * value fixes, only filled polygons are effected.
+ */
+ so_method(so, nv40->screen->curie, 0x1d78, 1);
+ so_data (so, 0x110);
+ }
+
+ so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_viewport = {
+ .validate = nv40_state_viewport_validate,
+ .dirty = {
+ .pipe = NV40_NEW_VIEWPORT | NV40_NEW_RAST,
+ .hw = NV40_STATE_VIEWPORT
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_zsa.c b/src/gallium/drivers/nv40/nv40_state_zsa.c
new file mode 100644
index 0000000000..fb760677c8
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_zsa.c
@@ -0,0 +1,17 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_zsa_validate(struct nv40_context *nv40)
+{
+ so_ref(nv40->zsa->so,
+ &nv40->state.hw[NV40_STATE_ZSA]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_zsa = {
+ .validate = nv40_state_zsa_validate,
+ .dirty = {
+ .pipe = NV40_NEW_ZSA,
+ .hw = NV40_STATE_ZSA
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c
new file mode 100644
index 0000000000..576af7c59e
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_surface.c
@@ -0,0 +1,77 @@
+
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nv40_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+
+#include "util/u_tile.h"
+
+static void
+nv40_surface_copy(struct pipe_context *pipe, boolean do_flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nouveau_winsys *nvws = nv40->nvws;
+
+ if (do_flip) {
+ /*XXX: This dodgyness will do for now for correctness. But,
+ * need to investigate whether the 2D engine is able to
+ * manage a flip (perhaps SIFM?), if not, use the 3D engine
+ */
+ desty += height;
+ while (height--) {
+ nvws->surface_copy(nvws, dest, destx, desty--, src,
+ srcx, srcy++, width, 1);
+ }
+ } else {
+ nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy,
+ width, height);
+ }
+}
+
+static void
+nv40_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nouveau_winsys *nvws = nv40->nvws;
+
+ nvws->surface_fill(nvws, dest, destx, desty, width, height, value);
+}
+
+void
+nv40_init_surface_functions(struct nv40_context *nv40)
+{
+ nv40->pipe.surface_copy = nv40_surface_copy;
+ nv40->pipe.surface_fill = nv40_surface_fill;
+}
diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c
new file mode 100644
index 0000000000..09f6e79d32
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_vbo.c
@@ -0,0 +1,555 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv40_context.h"
+#include "nv40_state.h"
+
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_pushbuf.h"
+#include "nouveau/nouveau_util.h"
+
+#define FORCE_SWTNL 0
+
+static INLINE int
+nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
+{
+ switch (pipe) {
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ *fmt = NV40TCL_VTXFMT_TYPE_FLOAT;
+ break;
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ *fmt = NV40TCL_VTXFMT_TYPE_UBYTE;
+ break;
+ case PIPE_FORMAT_R16_SSCALED:
+ case PIPE_FORMAT_R16G16_SSCALED:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ *fmt = NV40TCL_VTXFMT_TYPE_USHORT;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe));
+ return 1;
+ }
+
+ switch (pipe) {
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R16_SSCALED:
+ *ncomp = 1;
+ break;
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R16G16_SSCALED:
+ *ncomp = 2;
+ break;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ *ncomp = 3;
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ *ncomp = 4;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe));
+ return 1;
+ }
+
+ return 0;
+}
+
+static boolean
+nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib,
+ unsigned ib_size)
+{
+ struct pipe_screen *pscreen = &nv40->screen->pipe;
+ unsigned type;
+
+ if (!ib) {
+ nv40->idxbuf = NULL;
+ nv40->idxbuf_format = 0xdeadbeef;
+ return FALSE;
+ }
+
+ if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1)
+ return FALSE;
+
+ switch (ib_size) {
+ case 2:
+ type = NV40TCL_IDXBUF_FORMAT_TYPE_U16;
+ break;
+ case 4:
+ type = NV40TCL_IDXBUF_FORMAT_TYPE_U32;
+ break;
+ default:
+ return FALSE;
+ }
+
+ if (ib != nv40->idxbuf ||
+ type != nv40->idxbuf_format) {
+ nv40->dirty |= NV40_NEW_ARRAYS;
+ nv40->idxbuf = ib;
+ nv40->idxbuf_format = type;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so,
+ int attrib, struct pipe_vertex_element *ve,
+ struct pipe_vertex_buffer *vb)
+{
+ struct pipe_winsys *ws = nv40->pipe.winsys;
+ struct nouveau_grobj *curie = nv40->screen->curie;
+ unsigned type, ncomp;
+ void *map;
+
+ if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp))
+ return FALSE;
+
+ map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
+ map += vb->buffer_offset + ve->src_offset;
+
+ switch (type) {
+ case NV40TCL_VTXFMT_TYPE_FLOAT:
+ {
+ float *v = map;
+
+ switch (ncomp) {
+ case 4:
+ so_method(so, curie, NV40TCL_VTX_ATTR_4F_X(attrib), 4);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ so_data (so, fui(v[2]));
+ so_data (so, fui(v[3]));
+ break;
+ case 3:
+ so_method(so, curie, NV40TCL_VTX_ATTR_3F_X(attrib), 3);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ so_data (so, fui(v[2]));
+ break;
+ case 2:
+ so_method(so, curie, NV40TCL_VTX_ATTR_2F_X(attrib), 2);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ break;
+ case 1:
+ so_method(so, curie, NV40TCL_VTX_ATTR_1F(attrib), 1);
+ so_data (so, fui(v[0]));
+ break;
+ default:
+ ws->buffer_unmap(ws, vb->buffer);
+ return FALSE;
+ }
+ }
+ break;
+ default:
+ ws->buffer_unmap(ws, vb->buffer);
+ return FALSE;
+ }
+
+ ws->buffer_unmap(ws, vb->buffer);
+
+ return TRUE;
+}
+
+boolean
+nv40_draw_arrays(struct pipe_context *pipe,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nouveau_channel *chan = nv40->nvws->channel;
+ unsigned restart;
+
+ nv40_vbo_set_idxbuf(nv40, NULL, 0);
+ if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
+ return nv40_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);
+ }
+
+ while (count) {
+ unsigned vc, nr;
+
+ nv40_state_emit(nv40);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
+ mode, start, count, &restart);
+ if (!vc) {
+ FIRE_RING(NULL);
+ continue;
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ nr = (vc & 0xff);
+ if (nr) {
+ BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1);
+ OUT_RING (((nr - 1) << 24) | start);
+ start += nr;
+ }
+
+ nr = vc >> 8;
+ while (nr) {
+ unsigned push = nr > 2047 ? 2047 : nr;
+
+ nr -= push;
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push);
+ while (push--) {
+ OUT_RING(((0x100 - 1) << 24) | start);
+ start += 0x100;
+ }
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (0);
+
+ count -= vc;
+ start = restart;
+ }
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+static INLINE void
+nv40_draw_elements_u08(struct nv40_context *nv40, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv40->nvws->channel;
+
+ while (count) {
+ uint8_t *elts = (uint8_t *)ib + start;
+ unsigned vc, push, restart;
+
+ nv40_state_emit(nv40);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ if (vc & 1) {
+ BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+
+ push = MIN2(vc, 2047 * 2);
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((elts[i+1] << 16) | elts[i]);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static INLINE void
+nv40_draw_elements_u16(struct nv40_context *nv40, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv40->nvws->channel;
+
+ while (count) {
+ uint16_t *elts = (uint16_t *)ib + start;
+ unsigned vc, push, restart;
+
+ nv40_state_emit(nv40);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ if (vc & 1) {
+ BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+
+ push = MIN2(vc, 2047 * 2);
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((elts[i+1] << 16) | elts[i]);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static INLINE void
+nv40_draw_elements_u32(struct nv40_context *nv40, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv40->nvws->channel;
+
+ while (count) {
+ uint32_t *elts = (uint32_t *)ib + start;
+ unsigned vc, push, restart;
+
+ nv40_state_emit(nv40);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ while (vc) {
+ push = MIN2(vc, 2047);
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push);
+ OUT_RINGp (elts, push);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static boolean
+nv40_draw_elements_inline(struct pipe_context *pipe,
+ struct pipe_buffer *ib, unsigned ib_size,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+ void *map;
+
+ map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ);
+ if (!ib) {
+ NOUVEAU_ERR("failed mapping ib\n");
+ return FALSE;
+ }
+
+ switch (ib_size) {
+ case 1:
+ nv40_draw_elements_u08(nv40, map, mode, start, count);
+ break;
+ case 2:
+ nv40_draw_elements_u16(nv40, map, mode, start, count);
+ break;
+ case 4:
+ nv40_draw_elements_u32(nv40, map, mode, start, count);
+ break;
+ default:
+ NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
+ break;
+ }
+
+ ws->buffer_unmap(ws, ib);
+ return TRUE;
+}
+
+static boolean
+nv40_draw_elements_vbo(struct pipe_context *pipe,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nouveau_channel *chan = nv40->nvws->channel;
+ unsigned restart;
+
+ while (count) {
+ unsigned nr, vc;
+
+ nv40_state_emit(nv40);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
+ mode, start, count, &restart);
+ if (!vc) {
+ FIRE_RING(NULL);
+ continue;
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ nr = (vc & 0xff);
+ if (nr) {
+ BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1);
+ OUT_RING (((nr - 1) << 24) | start);
+ start += nr;
+ }
+
+ nr = vc >> 8;
+ while (nr) {
+ unsigned push = nr > 2047 ? 2047 : nr;
+
+ nr -= push;
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push);
+ while (push--) {
+ OUT_RING(((0x100 - 1) << 24) | start);
+ start += 0x100;
+ }
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (0);
+
+ count -= vc;
+ start = restart;
+ }
+
+ return TRUE;
+}
+
+boolean
+nv40_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer, unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ boolean idxbuf;
+
+ idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize);
+ if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
+ return nv40_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);
+ }
+
+ if (idxbuf) {
+ nv40_draw_elements_vbo(pipe, mode, start, count);
+ } else {
+ nv40_draw_elements_inline(pipe, indexBuffer, indexSize,
+ mode, start, count);
+ }
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+static boolean
+nv40_vbo_validate(struct nv40_context *nv40)
+{
+ struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL;
+ struct nouveau_grobj *curie = nv40->screen->curie;
+ struct pipe_buffer *ib = nv40->idxbuf;
+ unsigned ib_format = nv40->idxbuf_format;
+ unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+ int hw;
+
+ if (nv40->edgeflags) {
+ nv40->fallback_swtnl |= NV40_NEW_ARRAYS;
+ return FALSE;
+ }
+
+ vtxbuf = so_new(20, 18);
+ so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr);
+ vtxfmt = so_new(17, 0);
+ so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr);
+
+ for (hw = 0; hw < nv40->vtxelt_nr; hw++) {
+ struct pipe_vertex_element *ve;
+ struct pipe_vertex_buffer *vb;
+ unsigned type, ncomp;
+
+ ve = &nv40->vtxelt[hw];
+ vb = &nv40->vtxbuf[ve->vertex_buffer_index];
+
+ if (!vb->pitch) {
+ if (!sattr)
+ sattr = so_new(16 * 5, 0);
+
+ if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) {
+ so_data(vtxbuf, 0);
+ so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT);
+ continue;
+ }
+ }
+
+ if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
+ nv40->fallback_swtnl |= NV40_NEW_ARRAYS;
+ so_ref(NULL, &vtxbuf);
+ so_ref(NULL, &vtxfmt);
+ return FALSE;
+ }
+
+ so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset,
+ vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ 0, NV40TCL_VTXBUF_ADDRESS_DMA1);
+ so_data (vtxfmt, ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) |
+ (ncomp << NV40TCL_VTXFMT_SIZE_SHIFT) | type));
+ }
+
+ if (ib) {
+ so_method(vtxbuf, curie, NV40TCL_IDXBUF_ADDRESS, 2);
+ so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR,
+ 0, NV40TCL_IDXBUF_FORMAT_DMA1);
+ }
+
+ so_method(vtxbuf, curie, 0x1710, 1);
+ so_data (vtxbuf, 0);
+
+ so_ref(vtxbuf, &nv40->state.hw[NV40_STATE_VTXBUF]);
+ nv40->state.dirty |= (1ULL << NV40_STATE_VTXBUF);
+ so_ref(vtxfmt, &nv40->state.hw[NV40_STATE_VTXFMT]);
+ nv40->state.dirty |= (1ULL << NV40_STATE_VTXFMT);
+ so_ref(sattr, &nv40->state.hw[NV40_STATE_VTXATTR]);
+ nv40->state.dirty |= (1ULL << NV40_STATE_VTXATTR);
+ return FALSE;
+}
+
+struct nv40_state_entry nv40_state_vbo = {
+ .validate = nv40_vbo_validate,
+ .dirty = {
+ .pipe = NV40_NEW_ARRAYS,
+ .hw = 0,
+ }
+};
+
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
new file mode 100644
index 0000000000..1392fe956f
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -0,0 +1,1070 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv40_context.h"
+#include "nv40_state.h"
+
+/* TODO (at least...):
+ * 1. Indexed consts + ARL
+ * 3. NV_vp11, NV_vp2, NV_vp3 features
+ * - extra arith opcodes
+ * - branching
+ * - texture sampling
+ * - indexed attribs
+ * - indexed results
+ * 4. bugs
+ */
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 8
+#define MASK_Y 4
+#define MASK_Z 2
+#define MASK_W 1
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE 0
+#define DEF_CTEST 0
+#include "nv40_shader.h"
+
+#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv40_sr_neg((s))
+#define abs(s) nv40_sr_abs((s))
+
+#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n))
+
+struct nv40_vpc {
+ struct nv40_vertex_program *vp;
+
+ struct nv40_vertex_program_exec *vpi;
+
+ unsigned r_temps;
+ unsigned r_temps_discard;
+ struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
+ struct nv40_sreg *r_address;
+ struct nv40_sreg *r_temp;
+
+ struct nv40_sreg *imm;
+ unsigned nr_imm;
+
+ unsigned hpos_idx;
+};
+
+static struct nv40_sreg
+temp(struct nv40_vpc *vpc)
+{
+ int idx = ffs(~vpc->r_temps) - 1;
+
+ if (idx < 0) {
+ NOUVEAU_ERR("out of temps!!\n");
+ assert(0);
+ return nv40_sr(NV40SR_TEMP, 0);
+ }
+
+ vpc->r_temps |= (1 << idx);
+ vpc->r_temps_discard |= (1 << idx);
+ return nv40_sr(NV40SR_TEMP, idx);
+}
+
+static INLINE void
+release_temps(struct nv40_vpc *vpc)
+{
+ vpc->r_temps &= ~vpc->r_temps_discard;
+ vpc->r_temps_discard = 0;
+}
+
+static struct nv40_sreg
+constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w)
+{
+ struct nv40_vertex_program *vp = vpc->vp;
+ struct nv40_vertex_program_data *vpd;
+ int idx;
+
+ if (pipe >= 0) {
+ for (idx = 0; idx < vp->nr_consts; idx++) {
+ if (vp->consts[idx].index == pipe)
+ return nv40_sr(NV40SR_CONST, idx);
+ }
+ }
+
+ idx = vp->nr_consts++;
+ vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
+ vpd = &vp->consts[idx];
+
+ vpd->index = pipe;
+ vpd->value[0] = x;
+ vpd->value[1] = y;
+ vpd->value[2] = z;
+ vpd->value[3] = w;
+ return nv40_sr(NV40SR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2))
+
+static void
+emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src)
+{
+ struct nv40_vertex_program *vp = vpc->vp;
+ uint32_t sr = 0;
+
+ switch (src.type) {
+ case NV40SR_TEMP:
+ sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT);
+ sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT);
+ break;
+ case NV40SR_INPUT:
+ sr |= (NV40_VP_SRC_REG_TYPE_INPUT <<
+ NV40_VP_SRC_REG_TYPE_SHIFT);
+ vp->ir |= (1 << src.index);
+ hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT);
+ break;
+ case NV40SR_CONST:
+ sr |= (NV40_VP_SRC_REG_TYPE_CONST <<
+ NV40_VP_SRC_REG_TYPE_SHIFT);
+ assert(vpc->vpi->const_index == -1 ||
+ vpc->vpi->const_index == src.index);
+ vpc->vpi->const_index = src.index;
+ break;
+ case NV40SR_NONE:
+ sr |= (NV40_VP_SRC_REG_TYPE_INPUT <<
+ NV40_VP_SRC_REG_TYPE_SHIFT);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (src.negate)
+ sr |= NV40_VP_SRC_NEGATE;
+
+ if (src.abs)
+ hw[0] |= (1 << (21 + pos));
+
+ sr |= ((src.swz[0] << NV40_VP_SRC_SWZ_X_SHIFT) |
+ (src.swz[1] << NV40_VP_SRC_SWZ_Y_SHIFT) |
+ (src.swz[2] << NV40_VP_SRC_SWZ_Z_SHIFT) |
+ (src.swz[3] << NV40_VP_SRC_SWZ_W_SHIFT));
+
+ switch (pos) {
+ case 0:
+ hw[1] |= ((sr & NV40_VP_SRC0_HIGH_MASK) >>
+ NV40_VP_SRC0_HIGH_SHIFT) << NV40_VP_INST_SRC0H_SHIFT;
+ hw[2] |= (sr & NV40_VP_SRC0_LOW_MASK) <<
+ NV40_VP_INST_SRC0L_SHIFT;
+ break;
+ case 1:
+ hw[2] |= sr << NV40_VP_INST_SRC1_SHIFT;
+ break;
+ case 2:
+ hw[2] |= ((sr & NV40_VP_SRC2_HIGH_MASK) >>
+ NV40_VP_SRC2_HIGH_SHIFT) << NV40_VP_INST_SRC2H_SHIFT;
+ hw[3] |= (sr & NV40_VP_SRC2_LOW_MASK) <<
+ NV40_VP_INST_SRC2L_SHIFT;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)
+{
+ struct nv40_vertex_program *vp = vpc->vp;
+
+ switch (dst.type) {
+ case NV40SR_TEMP:
+ hw[3] |= NV40_VP_INST_DEST_MASK;
+ if (slot == 0) {
+ hw[0] |= (dst.index <<
+ NV40_VP_INST_VEC_DEST_TEMP_SHIFT);
+ } else {
+ hw[3] |= (dst.index <<
+ NV40_VP_INST_SCA_DEST_TEMP_SHIFT);
+ }
+ break;
+ case NV40SR_OUTPUT:
+ switch (dst.index) {
+ case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
+ case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
+ case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
+ case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
+ case NV40_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break;
+ case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
+ case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
+ case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
+ case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
+ case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
+ case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
+ case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
+ case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
+ case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
+ case NV40_VP_INST_DEST_CLIP(0):
+ vp->or |= (1 << 6);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE0;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(1):
+ vp->or |= (1 << 7);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE1;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(2):
+ vp->or |= (1 << 8);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE2;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(3):
+ vp->or |= (1 << 9);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE3;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
+ case NV40_VP_INST_DEST_CLIP(4):
+ vp->or |= (1 << 10);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE4;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
+ case NV40_VP_INST_DEST_CLIP(5):
+ vp->or |= (1 << 11);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE5;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
+ default:
+ break;
+ }
+
+ hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT);
+ if (slot == 0) {
+ hw[0] |= NV40_VP_INST_VEC_RESULT;
+ hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);
+ } else {
+ hw[3] |= NV40_VP_INST_SCA_RESULT;
+ hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op,
+ struct nv40_sreg dst, int mask,
+ struct nv40_sreg s0, struct nv40_sreg s1,
+ struct nv40_sreg s2)
+{
+ struct nv40_vertex_program *vp = vpc->vp;
+ uint32_t *hw;
+
+ vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
+ vpc->vpi = &vp->insns[vp->nr_insns - 1];
+ memset(vpc->vpi, 0, sizeof(*vpc->vpi));
+ vpc->vpi->const_index = -1;
+
+ hw = vpc->vpi->data;
+
+ hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT);
+ hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) |
+ (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) |
+ (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) |
+ (3 << NV40_VP_INST_COND_SWZ_W_SHIFT));
+
+ if (slot == 0) {
+ hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT);
+ hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
+ hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT);
+ } else {
+ hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT);
+ hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20));
+ hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT);
+ }
+
+ emit_dst(vpc, hw, slot, dst);
+ emit_src(vpc, hw, 0, s0);
+ emit_src(vpc, hw, 1, s1);
+ emit_src(vpc, hw, 2, s2);
+}
+
+static INLINE struct nv40_sreg
+tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
+ struct nv40_sreg src;
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ src = nv40_sr(NV40SR_INPUT, fsrc->SrcRegister.Index);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ src = vpc->imm[fsrc->SrcRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ src = vpc->r_temp[fsrc->SrcRegister.Index];
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->SrcRegisterExtMod.Absolute;
+ src.negate = fsrc->SrcRegister.Negate;
+ src.swz[0] = fsrc->SrcRegister.SwizzleX;
+ src.swz[1] = fsrc->SrcRegister.SwizzleY;
+ src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+ src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ return src;
+}
+
+static INLINE struct nv40_sreg
+tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
+ struct nv40_sreg dst;
+
+ switch (fdst->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ dst = vpc->r_result[fdst->DstRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ dst = vpc->r_temp[fdst->DstRegister.Index];
+ break;
+ case TGSI_FILE_ADDRESS:
+ dst = vpc->r_address[fdst->DstRegister.Index];
+ break;
+ default:
+ NOUVEAU_ERR("bad dst file\n");
+ break;
+ }
+
+ return dst;
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+ return mask;
+}
+
+static boolean
+src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,
+ struct nv40_sreg *src)
+{
+ const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+ struct nv40_sreg tgsi = tgsi_src(vpc, fsrc);
+ uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
+ uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
+ fsrc->SrcRegisterExtSwz.NegateY,
+ fsrc->SrcRegisterExtSwz.NegateZ,
+ fsrc->SrcRegisterExtSwz.NegateW };
+ uint c;
+
+ for (c = 0; c < 4; c++) {
+ switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ mask |= tgsi_mask(1 << c);
+ break;
+ case TGSI_EXTSWIZZLE_ZERO:
+ zero_mask |= tgsi_mask(1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ case TGSI_EXTSWIZZLE_ONE:
+ one_mask |= tgsi_mask(1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (!tgsi.negate && neg[c])
+ neg_mask |= tgsi_mask(1 << c);
+ }
+
+ if (mask == MASK_ALL && !neg_mask)
+ return TRUE;
+
+ *src = temp(vpc);
+
+ if (mask)
+ arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none);
+
+ if (zero_mask)
+ arith(vpc, 0, OP_SFL, *src, zero_mask, *src, none, none);
+
+ if (one_mask)
+ arith(vpc, 0, OP_STR, *src, one_mask, *src, none, none);
+
+ if (neg_mask) {
+ struct nv40_sreg one = temp(vpc);
+ arith(vpc, 0, OP_STR, one, neg_mask, one, none, none);
+ arith(vpc, 0, OP_MUL, *src, neg_mask, *src, neg(one), none);
+ }
+
+ return FALSE;
+}
+
+static boolean
+nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
+ const struct tgsi_full_instruction *finst)
+{
+ struct nv40_sreg src[3], dst, tmp;
+ struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+ int mask;
+ int ai = -1, ci = -1, ii = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(vpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_TEMPORARY:
+ if (!src_native_swz(vpc, fsrc, &src[i]))
+ continue;
+ break;
+ default:
+ break;
+ }
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+ ai = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_CONSTANT:
+ if ((ci == -1 && ii == -1) ||
+ ci == fsrc->SrcRegister.Index) {
+ ci = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ if ((ci == -1 && ii == -1) ||
+ ii == fsrc->SrcRegister.Index) {
+ ii = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
+ mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]);
+ break;
+ case TGSI_OPCODE_ARL:
+ arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_EXP:
+ arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LIT:
+ arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_POW:
+ tmp = temp(vpc);
+ arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none,
+ swz(src[0], X, X, X, X));
+ arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+ swz(src[1], X, X, X, X), none);
+ arith(vpc, 1, OP_EX2, dst, mask, none, none,
+ swz(tmp, X, X, X, X));
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_RET:
+ break;
+ case TGSI_OPCODE_RSQ:
+ arith(vpc, 1, OP_RSQ, dst, mask, none, none, abs(src[0]));
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1]));
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(vpc);
+ arith(vpc, 0, OP_MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ release_temps(vpc);
+ return TRUE;
+}
+
+static boolean
+nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ unsigned idx = fdec->DeclarationRange.First;
+ int hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NV40_VP_INST_DEST_POS;
+ vpc->hpos_idx = idx;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV40_VP_INST_DEST_COL0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV40_VP_INST_DEST_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV40_VP_INST_DEST_BFC0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV40_VP_INST_DEST_BFC1;
+ } else {
+ NOUVEAU_ERR("bad bcolour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NV40_VP_INST_DEST_FOGC;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ hw = NV40_VP_INST_DEST_PSZ;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.SemanticIndex <= 7) {
+ hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ vpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw);
+ return TRUE;
+}
+
+static boolean
+nv40_vertprog_prepare(struct nv40_vpc *vpc)
+{
+ struct tgsi_parse_context p;
+ int high_temp = -1, high_addr = -1, nr_imm = 0, i;
+
+ tgsi_parse_init(&p, vpc->vp->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch(tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ nr_imm++;
+ break;
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+
+ fdec = &p.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_TEMPORARY:
+ if (fdec->DeclarationRange.Last > high_temp) {
+ high_temp =
+ fdec->DeclarationRange.Last;
+ }
+ break;
+#if 0 /* this would be nice.. except gallium doesn't track it */
+ case TGSI_FILE_ADDRESS:
+ if (fdec->DeclarationRange.Last > high_addr) {
+ high_addr =
+ fdec->DeclarationRange.Last;
+ }
+ break;
+#endif
+ case TGSI_FILE_OUTPUT:
+ if (!nv40_vertprog_parse_decl_output(vpc, fdec))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+#if 1 /* yay, parse instructions looking for address regs instead */
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+ const struct tgsi_full_dst_register *fdst;
+
+ finst = &p.FullToken.FullInstruction;
+ fdst = &finst->FullDstRegisters[0];
+
+ if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) {
+ if (fdst->DstRegister.Index > high_addr)
+ high_addr = fdst->DstRegister.Index;
+ }
+
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+ }
+ tgsi_parse_free(&p);
+
+ if (nr_imm) {
+ vpc->imm = CALLOC(nr_imm, sizeof(struct nv40_sreg));
+ assert(vpc->imm);
+ }
+
+ if (++high_temp) {
+ vpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg));
+ for (i = 0; i < high_temp; i++)
+ vpc->r_temp[i] = temp(vpc);
+ }
+
+ if (++high_addr) {
+ vpc->r_address = CALLOC(high_addr, sizeof(struct nv40_sreg));
+ for (i = 0; i < high_addr; i++)
+ vpc->r_address[i] = temp(vpc);
+ }
+
+ vpc->r_temps_discard = 0;
+ return TRUE;
+}
+
+static void
+nv40_vertprog_translate(struct nv40_context *nv40,
+ struct nv40_vertex_program *vp)
+{
+ struct tgsi_parse_context parse;
+ struct nv40_vpc *vpc = NULL;
+ struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+ int i;
+
+ vpc = CALLOC(1, sizeof(struct nv40_vpc));
+ if (!vpc)
+ return;
+ vpc->vp = vp;
+
+ if (!nv40_vertprog_prepare(vpc)) {
+ FREE(vpc);
+ return;
+ }
+
+ /* Redirect post-transform vertex position to a temp if user clip
+ * planes are enabled. We need to append code the the vtxprog
+ * to handle clip planes later.
+ */
+ if (vp->ucp.nr) {
+ vpc->r_result[vpc->hpos_idx] = temp(vpc);
+ vpc->r_temps_discard = 0;
+ }
+
+ tgsi_parse_init(&parse, vp->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const struct tgsi_full_immediate *imm;
+
+ imm = &parse.FullToken.FullImmediate;
+ assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+// assert(imm->Immediate.Size == 4);
+ vpc->imm[vpc->nr_imm++] =
+ constant(vpc, -1,
+ imm->u.ImmediateFloat32[0].Float,
+ imm->u.ImmediateFloat32[1].Float,
+ imm->u.ImmediateFloat32[2].Float,
+ imm->u.ImmediateFloat32[3].Float);
+ }
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+ finst = &parse.FullToken.FullInstruction;
+ if (!nv40_vertprog_parse_instruction(vpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Write out HPOS if it was redirected to a temp earlier */
+ if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) {
+ struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT,
+ NV40_VP_INST_DEST_POS);
+ struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx];
+
+ arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none);
+ }
+
+ /* Insert code to handle user clip planes */
+ for (i = 0; i < vp->ucp.nr; i++) {
+ struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT,
+ NV40_VP_INST_DEST_CLIP(i));
+ struct nv40_sreg ceqn = constant(vpc, -1,
+ nv40->clip.ucp[i][0],
+ nv40->clip.ucp[i][1],
+ nv40->clip.ucp[i][2],
+ nv40->clip.ucp[i][3]);
+ struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx];
+ unsigned mask;
+
+ switch (i) {
+ case 0: case 3: mask = MASK_Y; break;
+ case 1: case 4: mask = MASK_Z; break;
+ case 2: case 5: mask = MASK_W; break;
+ default:
+ NOUVEAU_ERR("invalid clip dist #%d\n", i);
+ goto out_err;
+ }
+
+ arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none);
+ }
+
+ vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST;
+ vp->translated = TRUE;
+out_err:
+ tgsi_parse_free(&parse);
+ if (vpc->r_temp)
+ FREE(vpc->r_temp);
+ if (vpc->r_address)
+ FREE(vpc->r_address);
+ if (vpc->imm)
+ FREE(vpc->imm);
+ FREE(vpc);
+}
+
+static boolean
+nv40_vertprog_validate(struct nv40_context *nv40)
+{
+ struct nouveau_winsys *nvws = nv40->nvws;
+ struct pipe_winsys *ws = nv40->pipe.winsys;
+ struct nouveau_grobj *curie = nv40->screen->curie;
+ struct nv40_vertex_program *vp;
+ struct pipe_buffer *constbuf;
+ boolean upload_code = FALSE, upload_data = FALSE;
+ int i;
+
+ if (nv40->render_mode == HW) {
+ vp = nv40->vertprog;
+ constbuf = nv40->constbuf[PIPE_SHADER_VERTEX];
+
+ if ((nv40->dirty & NV40_NEW_UCP) ||
+ memcmp(&nv40->clip, &vp->ucp, sizeof(vp->ucp))) {
+ nv40_vertprog_destroy(nv40, vp);
+ memcpy(&vp->ucp, &nv40->clip, sizeof(vp->ucp));
+ }
+ } else {
+ vp = nv40->swtnl.vertprog;
+ constbuf = NULL;
+ }
+
+ /* Translate TGSI shader into hw bytecode */
+ if (vp->translated)
+ goto check_gpu_resources;
+
+ nv40->fallback_swtnl &= ~NV40_NEW_VERTPROG;
+ nv40_vertprog_translate(nv40, vp);
+ if (!vp->translated) {
+ nv40->fallback_swtnl |= NV40_NEW_VERTPROG;
+ return FALSE;
+ }
+
+check_gpu_resources:
+ /* Allocate hw vtxprog exec slots */
+ if (!vp->exec) {
+ struct nouveau_resource *heap = nv40->screen->vp_exec_heap;
+ struct nouveau_stateobj *so;
+ uint vplen = vp->nr_insns;
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) {
+ while (heap->next && heap->size < vplen) {
+ struct nv40_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->exec);
+ }
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec))
+ assert(0);
+ }
+
+ so = so_new(7, 0);
+ so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1);
+ so_data (so, vp->exec->start);
+ so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2);
+ so_data (so, vp->ir);
+ so_data (so, vp->or);
+ so_method(so, curie, NV40TCL_CLIP_PLANE_ENABLE, 1);
+ so_data (so, vp->clip_ctrl);
+ so_ref(so, &vp->so);
+
+ upload_code = TRUE;
+ }
+
+ /* Allocate hw vtxprog const slots */
+ if (vp->nr_consts && !vp->data) {
+ struct nouveau_resource *heap = nv40->screen->vp_data_heap;
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) {
+ while (heap->next && heap->size < vp->nr_consts) {
+ struct nv40_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->data);
+ }
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data))
+ assert(0);
+ }
+
+ /*XXX: handle this some day */
+ assert(vp->data->start >= vp->data_start_min);
+
+ upload_data = TRUE;
+ if (vp->data_start != vp->data->start)
+ upload_code = TRUE;
+ }
+
+ /* If exec or data segments moved we need to patch the program to
+ * fixup offsets and register IDs.
+ */
+ if (vp->exec_start != vp->exec->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv40_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->has_branch_offset) {
+ assert(0);
+ }
+ }
+
+ vp->exec_start = vp->exec->start;
+ }
+
+ if (vp->nr_consts && vp->data_start != vp->data->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv40_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->const_index >= 0) {
+ vpi->data[1] &= ~NV40_VP_INST_CONST_SRC_MASK;
+ vpi->data[1] |=
+ (vpi->const_index + vp->data->start) <<
+ NV40_VP_INST_CONST_SRC_SHIFT;
+
+ }
+ }
+
+ vp->data_start = vp->data->start;
+ }
+
+ /* Update + Upload constant values */
+ if (vp->nr_consts) {
+ float *map = NULL;
+
+ if (constbuf) {
+ map = ws->buffer_map(ws, constbuf,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ }
+
+ for (i = 0; i < vp->nr_consts; i++) {
+ struct nv40_vertex_program_data *vpd = &vp->consts[i];
+
+ if (vpd->index >= 0) {
+ if (!upload_data &&
+ !memcmp(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float)))
+ continue;
+ memcpy(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float));
+ }
+
+ BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5);
+ OUT_RING (i + vp->data->start);
+ OUT_RINGp ((uint32_t *)vpd->value, 4);
+ }
+
+ if (constbuf)
+ ws->buffer_unmap(ws, constbuf);
+ }
+
+ /* Upload vtxprog */
+ if (upload_code) {
+#if 0
+ for (i = 0; i < vp->nr_insns; i++) {
+ NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]);
+ NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]);
+ NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]);
+ NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]);
+ }
+#endif
+ BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1);
+ OUT_RING (vp->exec->start);
+ for (i = 0; i < vp->nr_insns; i++) {
+ BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4);
+ OUT_RINGp (vp->insns[i].data, 4);
+ }
+ }
+
+ if (vp->so != nv40->state.hw[NV40_STATE_VERTPROG]) {
+ so_ref(vp->so, &nv40->state.hw[NV40_STATE_VERTPROG]);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void
+nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp)
+{
+ struct nouveau_winsys *nvws = nv40->screen->nvws;
+
+ vp->translated = FALSE;
+
+ if (vp->nr_insns) {
+ FREE(vp->insns);
+ vp->insns = NULL;
+ vp->nr_insns = 0;
+ }
+
+ if (vp->nr_consts) {
+ FREE(vp->consts);
+ vp->consts = NULL;
+ vp->nr_consts = 0;
+ }
+
+ nvws->res_free(&vp->exec);
+ vp->exec_start = 0;
+ nvws->res_free(&vp->data);
+ vp->data_start = 0;
+ vp->data_start_min = 0;
+
+ vp->ir = vp->or = vp->clip_ctrl = 0;
+ so_ref(NULL, &vp->so);
+}
+
+struct nv40_state_entry nv40_state_vertprog = {
+ .validate = nv40_vertprog_validate,
+ .dirty = {
+ .pipe = NV40_NEW_VERTPROG | NV40_NEW_UCP,
+ .hw = NV40_STATE_VERTPROG,
+ }
+};
+
diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile
new file mode 100644
index 0000000000..be30400c03
--- /dev/null
+++ b/src/gallium/drivers/nv50/Makefile
@@ -0,0 +1,29 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv50
+
+DRIVER_SOURCES = \
+ nv50_clear.c \
+ nv50_context.c \
+ nv50_draw.c \
+ nv50_miptree.c \
+ nv50_query.c \
+ nv50_program.c \
+ nv50_screen.c \
+ nv50_state.c \
+ nv50_state_validate.c \
+ nv50_surface.c \
+ nv50_tex.c \
+ nv50_vbo.c
+
+C_SOURCES = \
+ $(COMMON_SOURCES) \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c
new file mode 100644
index 0000000000..a31a42d6b5
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_clear.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv50_context.h"
+
+void
+nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct pipe_framebuffer_state fb, s_fb = nv50->framebuffer;
+ struct pipe_scissor_state sc, s_sc = nv50->scissor;
+ unsigned dirty = nv50->dirty;
+
+ nv50->dirty = 0;
+
+ if (ps->format == PIPE_FORMAT_Z24S8_UNORM ||
+ ps->format == PIPE_FORMAT_Z16_UNORM) {
+ fb.num_cbufs = 0;
+ fb.zsbuf = ps;
+ } else {
+ fb.num_cbufs = 1;
+ fb.cbufs[0] = ps;
+ fb.zsbuf = NULL;
+ }
+ fb.width = ps->width;
+ fb.height = ps->height;
+ pipe->set_framebuffer_state(pipe, &fb);
+
+ sc.minx = sc.miny = 0;
+ sc.maxx = fb.width;
+ sc.maxy = fb.height;
+ pipe->set_scissor_state(pipe, &sc);
+
+ nv50_state_validate(nv50);
+
+ switch (ps->format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ BEGIN_RING(tesla, 0x0d80, 4);
+ OUT_RINGf (ubyte_to_float((clearValue >> 16) & 0xff));
+ OUT_RINGf (ubyte_to_float((clearValue >> 8) & 0xff));
+ OUT_RINGf (ubyte_to_float((clearValue >> 0) & 0xff));
+ OUT_RINGf (ubyte_to_float((clearValue >> 24) & 0xff));
+ BEGIN_RING(tesla, 0x19d0, 1);
+ OUT_RING (0x3c);
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ BEGIN_RING(tesla, 0x0d90, 1);
+ OUT_RINGf ((float)(clearValue >> 8) * (1.0 / 16777215.0));
+ BEGIN_RING(tesla, 0x0da0, 1);
+ OUT_RING (clearValue & 0xff);
+ BEGIN_RING(tesla, 0x19d0, 1);
+ OUT_RING (0x03);
+ break;
+ default:
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height,
+ clearValue);
+ break;
+ }
+
+ pipe->set_framebuffer_state(pipe, &s_fb);
+ pipe->set_scissor_state(pipe, &s_sc);
+ nv50->dirty |= dirty;
+
+ ps->status = PIPE_SURFACE_STATUS_CLEAR;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
new file mode 100644
index 0000000000..b02c53f209
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+
+#include "nv50_context.h"
+#include "nv50_screen.h"
+
+static void
+nv50_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv50_context *nv50 = (struct nv50_context *)pipe;
+
+ FIRE_RING(fence);
+}
+
+static void
+nv50_destroy(struct pipe_context *pipe)
+{
+ struct nv50_context *nv50 = (struct nv50_context *)pipe;
+
+ draw_destroy(nv50->draw);
+ FREE(nv50);
+}
+
+
+static void
+nv50_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+}
+
+struct pipe_context *
+nv50_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct pipe_winsys *pipe_winsys = pscreen->winsys;
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ struct nv50_context *nv50;
+
+ nv50 = CALLOC_STRUCT(nv50_context);
+ if (!nv50)
+ return NULL;
+ nv50->screen = screen;
+ nv50->pctx_id = pctx_id;
+
+ nv50->pipe.winsys = pipe_winsys;
+ nv50->pipe.screen = pscreen;
+
+ nv50->pipe.destroy = nv50_destroy;
+
+ nv50->pipe.set_edgeflags = nv50_set_edgeflags;
+ nv50->pipe.draw_arrays = nv50_draw_arrays;
+ nv50->pipe.draw_elements = nv50_draw_elements;
+ nv50->pipe.clear = nv50_clear;
+
+ nv50->pipe.flush = nv50_flush;
+
+ nv50_init_surface_functions(nv50);
+ nv50_init_state_functions(nv50);
+ nv50_init_query_functions(nv50);
+
+ nv50->draw = draw_create();
+ assert(nv50->draw);
+ draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50));
+
+ return &nv50->pipe;
+}
+
+
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
new file mode 100644
index 0000000000..061a4c064b
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -0,0 +1,199 @@
+#ifndef __NV50_CONTEXT_H__
+#define __NV50_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+#include "nouveau/nouveau_stateobj.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx) \
+ struct nv50_screen *ctx = nv50->screen
+#include "nouveau/nouveau_push.h"
+
+#include "nv50_screen.h"
+#include "nv50_program.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+/* Constant buffer assignment */
+#define NV50_CB_PMISC 0
+#define NV50_CB_PVP 1
+#define NV50_CB_PFP 2
+#define NV50_CB_PGP 3
+#define NV50_CB_TIC 4
+#define NV50_CB_TSC 5
+#define NV50_CB_PUPLOAD 6
+
+#define NV50_NEW_BLEND (1 << 0)
+#define NV50_NEW_ZSA (1 << 1)
+#define NV50_NEW_BLEND_COLOUR (1 << 2)
+#define NV50_NEW_STIPPLE (1 << 3)
+#define NV50_NEW_SCISSOR (1 << 4)
+#define NV50_NEW_VIEWPORT (1 << 5)
+#define NV50_NEW_RASTERIZER (1 << 6)
+#define NV50_NEW_FRAMEBUFFER (1 << 7)
+#define NV50_NEW_VERTPROG (1 << 8)
+#define NV50_NEW_VERTPROG_CB (1 << 9)
+#define NV50_NEW_FRAGPROG (1 << 10)
+#define NV50_NEW_FRAGPROG_CB (1 << 11)
+#define NV50_NEW_ARRAYS (1 << 12)
+#define NV50_NEW_SAMPLER (1 << 13)
+#define NV50_NEW_TEXTURE (1 << 14)
+
+struct nv50_blend_stateobj {
+ struct pipe_blend_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv50_zsa_stateobj {
+ struct pipe_depth_stencil_alpha_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv50_rasterizer_stateobj {
+ struct pipe_rasterizer_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv50_miptree_level {
+ struct pipe_buffer **image;
+ int *image_offset;
+ unsigned image_dirty_cpu[512/32];
+ unsigned image_dirty_gpu[512/32];
+};
+
+struct nv50_miptree {
+ struct pipe_texture base;
+ struct pipe_buffer *buffer;
+
+ struct nv50_miptree_level level[PIPE_MAX_TEXTURE_LEVELS];
+ int image_nr;
+ int total_size;
+};
+
+static INLINE struct nv50_miptree *
+nv50_miptree(struct pipe_texture *pt)
+{
+ return (struct nv50_miptree *)pt;
+}
+
+struct nv50_surface {
+ struct pipe_surface base;
+};
+
+static INLINE struct nv50_surface *
+nv50_surface(struct pipe_surface *pt)
+{
+ return (struct nv50_surface *)pt;
+}
+
+struct nv50_state {
+ unsigned dirty;
+
+ struct nouveau_stateobj *fb;
+ struct nouveau_stateobj *blend;
+ struct nouveau_stateobj *blend_colour;
+ struct nouveau_stateobj *zsa;
+ struct nouveau_stateobj *rast;
+ struct nouveau_stateobj *stipple;
+ struct nouveau_stateobj *scissor;
+ unsigned scissor_enabled;
+ struct nouveau_stateobj *viewport;
+ unsigned viewport_bypass;
+ struct nouveau_stateobj *tsc_upload;
+ struct nouveau_stateobj *tic_upload;
+ struct nouveau_stateobj *vertprog;
+ struct nouveau_stateobj *fragprog;
+ struct nouveau_stateobj *vtxfmt;
+ struct nouveau_stateobj *vtxbuf;
+};
+
+struct nv50_context {
+ struct pipe_context pipe;
+
+ struct nv50_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ struct nv50_state state;
+
+ unsigned dirty;
+ struct nv50_blend_stateobj *blend;
+ struct nv50_zsa_stateobj *zsa;
+ struct nv50_rasterizer_stateobj *rasterizer;
+ struct pipe_blend_color blend_colour;
+ struct pipe_poly_stipple stipple;
+ struct pipe_scissor_state scissor;
+ struct pipe_viewport_state viewport;
+ struct pipe_framebuffer_state framebuffer;
+ struct nv50_program *vertprog;
+ struct nv50_program *fragprog;
+ struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ unsigned vtxbuf_nr;
+ struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
+ unsigned vtxelt_nr;
+ unsigned *sampler[PIPE_MAX_SAMPLERS];
+ unsigned sampler_nr;
+ struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS];
+ unsigned miptree_nr;
+};
+
+static INLINE struct nv50_context *
+nv50_context(struct pipe_context *pipe)
+{
+ return (struct nv50_context *)pipe;
+}
+
+extern void nv50_init_surface_functions(struct nv50_context *nv50);
+extern void nv50_init_state_functions(struct nv50_context *nv50);
+extern void nv50_init_query_functions(struct nv50_context *nv50);
+
+extern void nv50_screen_init_miptree_functions(struct pipe_screen *pscreen);
+
+/* nv50_draw.c */
+extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50);
+
+/* nv50_vbo.c */
+extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv50_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start,
+ unsigned count);
+extern void nv50_vbo_validate(struct nv50_context *nv50);
+
+/* nv50_clear.c */
+extern void nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+/* nv50_program.c */
+extern void nv50_vertprog_validate(struct nv50_context *nv50);
+extern void nv50_fragprog_validate(struct nv50_context *nv50);
+extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p);
+
+/* nv50_state_validate.c */
+extern boolean nv50_state_validate(struct nv50_context *nv50);
+
+/* nv50_tex.c */
+extern void nv50_tex_validate(struct nv50_context *);
+
+/* nv50_miptree.c */
+extern void nv50_miptree_sync(struct pipe_screen *, struct nv50_miptree *,
+ unsigned level, unsigned image);
+
+#endif
diff --git a/src/gallium/drivers/nv50/nv50_draw.c b/src/gallium/drivers/nv50/nv50_draw.c
new file mode 100644
index 0000000000..2f6f607261
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_draw.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "draw/draw_pipe.h"
+
+#include "nv50_context.h"
+
+struct nv50_render_stage {
+ struct draw_stage stage;
+ struct nv50_context *nv50;
+};
+
+static INLINE struct nv50_render_stage *
+nv50_render_stage(struct draw_stage *stage)
+{
+ return (struct nv50_render_stage *)stage;
+}
+
+static void
+nv50_render_point(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_line(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_tri(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_flush(struct draw_stage *stage, unsigned flags)
+{
+}
+
+static void
+nv50_render_reset_stipple_counter(struct draw_stage *stage)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_destroy(struct draw_stage *stage)
+{
+ FREE(stage);
+}
+
+struct draw_stage *
+nv50_draw_render_stage(struct nv50_context *nv50)
+{
+ struct nv50_render_stage *rs = CALLOC_STRUCT(nv50_render_stage);
+
+ rs->nv50 = nv50;
+ rs->stage.draw = nv50->draw;
+ rs->stage.destroy = nv50_render_destroy;
+ rs->stage.point = nv50_render_point;
+ rs->stage.line = nv50_render_line;
+ rs->stage.tri = nv50_render_tri;
+ rs->stage.flush = nv50_render_flush;
+ rs->stage.reset_stipple_counter = nv50_render_reset_stipple_counter;
+
+ return &rs->stage;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
new file mode 100644
index 0000000000..7770fcc3f2
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -0,0 +1,298 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv50_context.h"
+
+static struct pipe_texture *
+nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
+{
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
+ struct pipe_texture *pt = &mt->base;
+ unsigned usage, width = tmp->width[0], height = tmp->height[0];
+ unsigned depth = tmp->depth[0];
+ int i, l;
+
+ mt->base = *tmp;
+ mt->base.refcount = 1;
+ mt->base.screen = pscreen;
+
+ usage = PIPE_BUFFER_USAGE_PIXEL;
+ switch (pt->format) {
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ usage |= NOUVEAU_BUFFER_USAGE_ZETA;
+ break;
+ default:
+ break;
+ }
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_3D:
+ mt->image_nr = pt->depth[0];
+ break;
+ case PIPE_TEXTURE_CUBE:
+ mt->image_nr = 6;
+ break;
+ default:
+ mt->image_nr = 1;
+ break;
+ }
+
+ for (l = 0; l <= pt->last_level; l++) {
+ struct nv50_miptree_level *lvl = &mt->level[l];
+
+ pt->width[l] = width;
+ pt->height[l] = height;
+ pt->depth[l] = depth;
+ pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+
+ lvl->image_offset = CALLOC(mt->image_nr, sizeof(int));
+ lvl->image = CALLOC(mt->image_nr, sizeof(struct pipe_buffer *));
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+ depth = MAX2(1, depth >> 1);
+ }
+
+ for (i = 0; i < mt->image_nr; i++) {
+ for (l = 0; l <= pt->last_level; l++) {
+ struct nv50_miptree_level *lvl = &mt->level[l];
+ int size;
+
+ size = align(pt->width[l], 8) * pt->block.size;
+ size = align(size, 64);
+ size *= align(pt->height[l], 8) * pt->block.size;
+
+ lvl->image[i] = ws->buffer_create(ws, 256, 0, size);
+ lvl->image_offset[i] = mt->total_size;
+
+ mt->total_size += size;
+ }
+ }
+
+ mt->buffer = ws->buffer_create(ws, 256, usage, mt->total_size);
+ if (!mt->buffer) {
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static INLINE void
+mark_dirty(uint32_t *flags, unsigned image)
+{
+ flags[image / 32] |= (1 << (image % 32));
+}
+
+static INLINE void
+mark_clean(uint32_t *flags, unsigned image)
+{
+ flags[image / 32] &= ~(1 << (image % 32));
+}
+
+static INLINE int
+is_dirty(uint32_t *flags, unsigned image)
+{
+ return !!(flags[image / 32] & (1 << (image % 32)));
+}
+
+static void
+nv50_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt)
+{
+ struct pipe_texture *pt = *ppt;
+
+ *ppt = NULL;
+
+ if (--pt->refcount <= 0) {
+ struct nv50_miptree *mt = nv50_miptree(pt);
+
+ pipe_buffer_reference(pscreen, &mt->buffer, NULL);
+ FREE(mt);
+ }
+}
+
+void
+nv50_miptree_sync(struct pipe_screen *pscreen, struct nv50_miptree *mt,
+ unsigned level, unsigned image)
+{
+ struct nouveau_winsys *nvws = nv50_screen(pscreen)->nvws;
+ struct nv50_miptree_level *lvl = &mt->level[level];
+ struct pipe_surface *dst, *src;
+ unsigned face = 0, zslice = 0;
+
+ if (!is_dirty(lvl->image_dirty_cpu, image))
+ return;
+
+ if (mt->base.target == PIPE_TEXTURE_CUBE)
+ face = image;
+ else
+ if (mt->base.target == PIPE_TEXTURE_3D)
+ zslice = image;
+
+ /* Mark as clean already - so we don't continually call this function
+ * trying to get a GPU_WRITE pipe_surface!
+ */
+ mark_clean(lvl->image_dirty_cpu, image);
+
+ /* Pretend we're doing CPU access so we get the backing pipe_surface
+ * and not a view into the larger miptree.
+ */
+ src = pscreen->get_tex_surface(pscreen, &mt->base, face, level, zslice,
+ PIPE_BUFFER_USAGE_CPU_READ);
+
+ /* Pretend we're only reading with the GPU so surface doesn't get marked
+ * as dirtied by the GPU.
+ */
+ dst = pscreen->get_tex_surface(pscreen, &mt->base, face, level, zslice,
+ PIPE_BUFFER_USAGE_GPU_READ);
+
+ nvws->surface_copy(nvws, dst, 0, 0, src, 0, 0, dst->width, dst->height);
+
+ pscreen->tex_surface_release(pscreen, &dst);
+ pscreen->tex_surface_release(pscreen, &src);
+}
+
+/* The reverse of the above */
+void
+nv50_miptree_sync_cpu(struct pipe_screen *pscreen, struct nv50_miptree *mt,
+ unsigned level, unsigned image)
+{
+ struct nouveau_winsys *nvws = nv50_screen(pscreen)->nvws;
+ struct nv50_miptree_level *lvl = &mt->level[level];
+ struct pipe_surface *dst, *src;
+ unsigned face = 0, zslice = 0;
+
+ if (!is_dirty(lvl->image_dirty_gpu, image))
+ return;
+
+ if (mt->base.target == PIPE_TEXTURE_CUBE)
+ face = image;
+ else
+ if (mt->base.target == PIPE_TEXTURE_3D)
+ zslice = image;
+
+ mark_clean(lvl->image_dirty_gpu, image);
+
+ src = pscreen->get_tex_surface(pscreen, &mt->base, face, level, zslice,
+ PIPE_BUFFER_USAGE_GPU_READ);
+ dst = pscreen->get_tex_surface(pscreen, &mt->base, face, level, zslice,
+ PIPE_BUFFER_USAGE_CPU_READ);
+
+ nvws->surface_copy(nvws, dst, 0, 0, src, 0, 0, dst->width, dst->height);
+
+ pscreen->tex_surface_release(pscreen, &dst);
+ pscreen->tex_surface_release(pscreen, &src);
+}
+
+static struct pipe_surface *
+nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct nv50_miptree *mt = nv50_miptree(pt);
+ struct nv50_miptree_level *lvl = &mt->level[level];
+ struct pipe_surface *ps;
+ int img;
+
+ if (pt->target == PIPE_TEXTURE_CUBE)
+ img = face;
+ else
+ if (pt->target == PIPE_TEXTURE_3D)
+ img = zslice;
+ else
+ img = 0;
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (!ps)
+ return NULL;
+ pipe_texture_reference(&ps->texture, pt);
+ pipe_buffer_reference(pscreen, &ps->buffer, mt->buffer);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = ps->width * ps->block.size;
+ ps->usage = flags;
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+ ps->refcount = 1;
+ ps->face = face;
+ ps->level = level;
+ ps->zslice = zslice;
+
+ if (flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) {
+ assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE));
+ nv50_miptree_sync_cpu(pscreen, mt, level, img);
+
+ ps->offset = 0;
+ pipe_texture_reference(&ps->texture, pt);
+ pipe_buffer_reference(pscreen, &ps->buffer, lvl->image[img]);
+
+ if (flags & PIPE_BUFFER_USAGE_CPU_WRITE)
+ mark_dirty(lvl->image_dirty_cpu, img);
+ } else {
+ nv50_miptree_sync(pscreen, mt, level, img);
+
+ ps->offset = lvl->image_offset[img];
+ pipe_texture_reference(&ps->texture, pt);
+ pipe_buffer_reference(pscreen, &ps->buffer, mt->buffer);
+
+ if (flags & PIPE_BUFFER_USAGE_GPU_WRITE)
+ mark_dirty(lvl->image_dirty_gpu, img);
+ }
+
+ return ps;
+}
+
+static void
+nv50_miptree_surface_del(struct pipe_screen *pscreen,
+ struct pipe_surface **psurface)
+{
+ struct pipe_surface *ps = *psurface;
+ struct nv50_surface *s = nv50_surface(ps);
+
+ *psurface = NULL;
+
+ if (--ps->refcount <= 0) {
+ pipe_texture_reference(&ps->texture, NULL);
+ pipe_buffer_reference(pscreen, &ps->buffer, NULL);
+ FREE(s);
+ }
+}
+
+void
+nv50_screen_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv50_miptree_create;
+ pscreen->texture_release = nv50_miptree_release;
+ pscreen->get_tex_surface = nv50_miptree_surface_new;
+ pscreen->tex_surface_release = nv50_miptree_surface_del;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
new file mode 100644
index 0000000000..7686f746eb
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -0,0 +1,1779 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_inlines.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv50_context.h"
+
+#define NV50_SU_MAX_TEMP 64
+//#define NV50_PROGRAM_DUMP
+
+/* ARL - gallium craps itself on progs/vp/arl.txt
+ *
+ * MSB - Like MAD, but MUL+SUB
+ * - Fuck it off, introduce a way to negate args for ops that
+ * support it.
+ *
+ * Look into inlining IMMD for ops other than MOV (make it general?)
+ * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
+ * but can emit to P_TEMP first - then MOV later. NVIDIA does this
+ *
+ * In ops such as ADD it's possible to construct a bad opcode in the !is_long()
+ * case, if the emit_src() causes the inst to suddenly become long.
+ *
+ * Verify half-insns work where expected - and force disable them where they
+ * don't work - MUL has it forcibly disabled atm as it fixes POW..
+ *
+ * FUCK! watch dst==src vectors, can overwrite components that are needed.
+ * ie. SUB R0, R0.yzxw, R0
+ *
+ * Things to check with renouveau:
+ * FP attr/result assignment - how?
+ * attrib
+ * - 0x16bc maps vp output onto fp hpos
+ * - 0x16c0 maps vp output onto fp col0
+ * result
+ * - colr always 0-3
+ * - depr always 4
+ * 0x16bc->0x16e8 --> some binding between vp/fp regs
+ * 0x16b8 --> VP output count
+ *
+ * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
+ * "MOV rcol.x, fcol.y" = 0x00000004
+ * 0x19a8 --> as above but 0x00000100 and 0x00000000
+ * - 0x00100000 used when KIL used
+ * 0x196c --> as above but 0x00000011 and 0x00000000
+ *
+ * 0x1988 --> 0xXXNNNNNN
+ * - XX == FP high something
+ */
+struct nv50_reg {
+ enum {
+ P_TEMP,
+ P_ATTR,
+ P_RESULT,
+ P_CONST,
+ P_IMMD
+ } type;
+ int index;
+
+ int hw;
+ int neg;
+};
+
+struct nv50_pc {
+ struct nv50_program *p;
+
+ /* hw resources */
+ struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
+
+ /* tgsi resources */
+ struct nv50_reg *temp;
+ int temp_nr;
+ struct nv50_reg *attr;
+ int attr_nr;
+ struct nv50_reg *result;
+ int result_nr;
+ struct nv50_reg *param;
+ int param_nr;
+ struct nv50_reg *immd;
+ float *immd_buf;
+ int immd_nr;
+
+ struct nv50_reg *temp_temp[16];
+ unsigned temp_temp_nr;
+};
+
+static void
+alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
+{
+ int i;
+
+ if (reg->type == P_RESULT) {
+ if (pc->p->cfg.high_result < (reg->hw + 1))
+ pc->p->cfg.high_result = reg->hw + 1;
+ }
+
+ if (reg->type != P_TEMP)
+ return;
+
+ if (reg->hw >= 0) {
+ /*XXX: do this here too to catch FP temp-as-attr usage..
+ * not clean, but works */
+ if (pc->p->cfg.high_temp < (reg->hw + 1))
+ pc->p->cfg.high_temp = reg->hw + 1;
+ return;
+ }
+
+ for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
+ if (!(pc->r_temp[i])) {
+ pc->r_temp[i] = reg;
+ reg->hw = i;
+ if (pc->p->cfg.high_temp < (i + 1))
+ pc->p->cfg.high_temp = i + 1;
+ return;
+ }
+ }
+
+ assert(0);
+}
+
+static struct nv50_reg *
+alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
+{
+ struct nv50_reg *r;
+ int i;
+
+ if (dst && dst->type == P_TEMP && dst->hw == -1)
+ return dst;
+
+ for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
+ if (!pc->r_temp[i]) {
+ r = CALLOC_STRUCT(nv50_reg);
+ r->type = P_TEMP;
+ r->index = -1;
+ r->hw = i;
+ pc->r_temp[i] = r;
+ return r;
+ }
+ }
+
+ assert(0);
+ return NULL;
+}
+
+static void
+free_temp(struct nv50_pc *pc, struct nv50_reg *r)
+{
+ if (r->index == -1) {
+ unsigned hw = r->hw;
+
+ FREE(pc->r_temp[hw]);
+ pc->r_temp[hw] = NULL;
+ }
+}
+
+static int
+alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx)
+{
+ int i;
+
+ if ((idx + 4) >= NV50_SU_MAX_TEMP)
+ return 1;
+
+ if (pc->r_temp[idx] || pc->r_temp[idx + 1] ||
+ pc->r_temp[idx + 2] || pc->r_temp[idx + 3])
+ return alloc_temp4(pc, dst, idx + 1);
+
+ for (i = 0; i < 4; i++) {
+ dst[i] = CALLOC_STRUCT(nv50_reg);
+ dst[i]->type = P_TEMP;
+ dst[i]->index = -1;
+ dst[i]->hw = idx + i;
+ pc->r_temp[idx + i] = dst[i];
+ }
+
+ return 0;
+}
+
+static void
+free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4])
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ free_temp(pc, reg[i]);
+}
+
+static struct nv50_reg *
+temp_temp(struct nv50_pc *pc)
+{
+ if (pc->temp_temp_nr >= 16)
+ assert(0);
+
+ pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
+ return pc->temp_temp[pc->temp_temp_nr++];
+}
+
+static void
+kill_temp_temp(struct nv50_pc *pc)
+{
+ int i;
+
+ for (i = 0; i < pc->temp_temp_nr; i++)
+ free_temp(pc, pc->temp_temp[i]);
+ pc->temp_temp_nr = 0;
+}
+
+static int
+ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
+{
+ pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)),
+ (pc->immd_nr + 1) * 4 * sizeof(float));
+ pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
+ pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
+ pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
+ pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
+
+ return pc->immd_nr++;
+}
+
+static struct nv50_reg *
+alloc_immd(struct nv50_pc *pc, float f)
+{
+ struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
+ unsigned hw;
+
+ hw = ctor_immd(pc, f, 0, 0, 0) * 4;
+ r->type = P_IMMD;
+ r->hw = hw;
+ r->index = -1;
+ return r;
+}
+
+static struct nv50_program_exec *
+exec(struct nv50_pc *pc)
+{
+ struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec);
+
+ e->param.index = -1;
+ return e;
+}
+
+static void
+emit(struct nv50_pc *pc, struct nv50_program_exec *e)
+{
+ struct nv50_program *p = pc->p;
+
+ if (p->exec_tail)
+ p->exec_tail->next = e;
+ if (!p->exec_head)
+ p->exec_head = e;
+ p->exec_tail = e;
+ p->exec_size += (e->inst[0] & 1) ? 2 : 1;
+}
+
+static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *);
+
+static boolean
+is_long(struct nv50_program_exec *e)
+{
+ if (e->inst[0] & 1)
+ return TRUE;
+ return FALSE;
+}
+
+static boolean
+is_immd(struct nv50_program_exec *e)
+{
+ if (is_long(e) && (e->inst[1] & 3) == 3)
+ return TRUE;
+ return FALSE;
+}
+
+static INLINE void
+set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
+ struct nv50_program_exec *e)
+{
+ set_long(pc, e);
+ e->inst[1] &= ~((0x1f << 7) | (0x3 << 12));
+ e->inst[1] |= (pred << 7) | (idx << 12);
+}
+
+static INLINE void
+set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx,
+ struct nv50_program_exec *e)
+{
+ set_long(pc, e);
+ e->inst[1] &= ~((0x3 << 4) | (1 << 6));
+ e->inst[1] |= (idx << 4) | (on << 6);
+}
+
+static INLINE void
+set_long(struct nv50_pc *pc, struct nv50_program_exec *e)
+{
+ if (is_long(e))
+ return;
+
+ e->inst[0] |= 1;
+ set_pred(pc, 0xf, 0, e);
+ set_pred_wr(pc, 0, 0, e);
+}
+
+static INLINE void
+set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
+{
+ if (dst->type == P_RESULT) {
+ set_long(pc, e);
+ e->inst[1] |= 0x00000008;
+ }
+
+ alloc_reg(pc, dst);
+ e->inst[0] |= (dst->hw << 2);
+}
+
+static INLINE void
+set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
+{
+ unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
+
+ set_long(pc, e);
+ /*XXX: can't be predicated - bits overlap.. catch cases where both
+ * are required and avoid them. */
+ set_pred(pc, 0, 0, e);
+ set_pred_wr(pc, 0, 0, e);
+
+ e->inst[1] |= 0x00000002 | 0x00000001;
+ e->inst[0] |= (val & 0x3f) << 16;
+ e->inst[1] |= (val >> 6) << 2;
+}
+
+static void
+emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src, struct nv50_reg *iv)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0x80000000;
+ set_dst(pc, dst, e);
+ alloc_reg(pc, src);
+ e->inst[0] |= (src->hw << 16);
+ if (iv) {
+ e->inst[0] |= (1 << 25);
+ alloc_reg(pc, iv);
+ e->inst[0] |= (iv->hw << 9);
+ }
+
+ emit(pc, e);
+}
+
+static void
+set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
+ struct nv50_program_exec *e)
+{
+ set_long(pc, e);
+#if 1
+ e->inst[1] |= (1 << 22);
+#else
+ if (src->type == P_IMMD) {
+ e->inst[1] |= (NV50_CB_PMISC << 22);
+ } else {
+ if (pc->p->type == PIPE_SHADER_VERTEX)
+ e->inst[1] |= (NV50_CB_PVP << 22);
+ else
+ e->inst[1] |= (NV50_CB_PFP << 22);
+ }
+#endif
+
+ e->param.index = src->hw;
+ e->param.shift = s;
+ e->param.mask = m << (s % 32);
+}
+
+static void
+emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0x10000000;
+
+ set_dst(pc, dst, e);
+
+ if (0 && dst->type != P_RESULT && src->type == P_IMMD) {
+ set_immd(pc, src, e);
+ /*XXX: 32-bit, but steals part of "half" reg space - need to
+ * catch and handle this case if/when we do half-regs
+ */
+ e->inst[0] |= 0x00008000;
+ } else
+ if (src->type == P_IMMD || src->type == P_CONST) {
+ set_long(pc, e);
+ set_data(pc, src, 0x7f, 9, e);
+ e->inst[1] |= 0x20000000; /* src0 const? */
+ } else {
+ if (src->type == P_ATTR) {
+ set_long(pc, e);
+ e->inst[1] |= 0x00200000;
+ }
+
+ alloc_reg(pc, src);
+ e->inst[0] |= (src->hw << 9);
+ }
+
+ /* We really should support "half" instructions here at some point,
+ * but I don't feel confident enough about them yet.
+ */
+ set_long(pc, e);
+ if (is_long(e) && !is_immd(e)) {
+ e->inst[1] |= 0x04000000; /* 32-bit */
+ e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
+ }
+
+ emit(pc, e);
+}
+
+static boolean
+check_swap_src_0_1(struct nv50_pc *pc,
+ struct nv50_reg **s0, struct nv50_reg **s1)
+{
+ struct nv50_reg *src0 = *s0, *src1 = *s1;
+
+ if (src0->type == P_CONST) {
+ if (src1->type != P_CONST) {
+ *s0 = src1;
+ *s1 = src0;
+ return TRUE;
+ }
+ } else
+ if (src1->type == P_ATTR) {
+ if (src0->type != P_ATTR) {
+ *s0 = src1;
+ *s1 = src0;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static void
+set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
+{
+ if (src->type == P_ATTR) {
+ set_long(pc, e);
+ e->inst[1] |= 0x00200000;
+ } else
+ if (src->type == P_CONST || src->type == P_IMMD) {
+ struct nv50_reg *temp = temp_temp(pc);
+
+ emit_mov(pc, temp, src);
+ src = temp;
+ }
+
+ alloc_reg(pc, src);
+ e->inst[0] |= (src->hw << 9);
+}
+
+static void
+set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
+{
+ if (src->type == P_ATTR) {
+ struct nv50_reg *temp = temp_temp(pc);
+
+ emit_mov(pc, temp, src);
+ src = temp;
+ } else
+ if (src->type == P_CONST || src->type == P_IMMD) {
+ assert(!(e->inst[0] & 0x00800000));
+ if (e->inst[0] & 0x01000000) {
+ struct nv50_reg *temp = temp_temp(pc);
+
+ emit_mov(pc, temp, src);
+ src = temp;
+ } else {
+ set_data(pc, src, 0x7f, 16, e);
+ e->inst[0] |= 0x00800000;
+ }
+ }
+
+ alloc_reg(pc, src);
+ e->inst[0] |= (src->hw << 16);
+}
+
+static void
+set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
+{
+ set_long(pc, e);
+
+ if (src->type == P_ATTR) {
+ struct nv50_reg *temp = temp_temp(pc);
+
+ emit_mov(pc, temp, src);
+ src = temp;
+ } else
+ if (src->type == P_CONST || src->type == P_IMMD) {
+ assert(!(e->inst[0] & 0x01000000));
+ if (e->inst[0] & 0x00800000) {
+ struct nv50_reg *temp = temp_temp(pc);
+
+ emit_mov(pc, temp, src);
+ src = temp;
+ } else {
+ set_data(pc, src, 0x7f, 32+14, e);
+ e->inst[0] |= 0x01000000;
+ }
+ }
+
+ alloc_reg(pc, src);
+ e->inst[1] |= (src->hw << 14);
+}
+
+static void
+emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
+ struct nv50_reg *src1)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xc0000000;
+ set_long(pc, e);
+
+ check_swap_src_0_1(pc, &src0, &src1);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_1(pc, src1, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src0, struct nv50_reg *src1)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xb0000000;
+
+ check_swap_src_0_1(pc, &src0, &src1);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ if (is_long(e))
+ set_src_2(pc, src1, e);
+ else
+ set_src_1(pc, src1, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
+ struct nv50_reg *src0, struct nv50_reg *src1)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ set_long(pc, e);
+ e->inst[0] |= 0xb0000000;
+ e->inst[1] |= (sub << 29);
+
+ check_swap_src_0_1(pc, &src0, &src1);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_1(pc, src1, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
+ struct nv50_reg *src1)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xb0000000;
+
+ set_long(pc, e);
+ if (check_swap_src_0_1(pc, &src0, &src1))
+ e->inst[1] |= 0x04000000;
+ else
+ e->inst[1] |= 0x08000000;
+
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_2(pc, src1, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
+ struct nv50_reg *src1, struct nv50_reg *src2)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xe0000000;
+
+ check_swap_src_0_1(pc, &src0, &src1);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_1(pc, src1, e);
+ set_src_2(pc, src2, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
+ struct nv50_reg *src1, struct nv50_reg *src2)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xe0000000;
+ set_long(pc, e);
+ e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */
+
+ check_swap_src_0_1(pc, &src0, &src1);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_1(pc, src1, e);
+ set_src_2(pc, src2, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_flop(struct nv50_pc *pc, unsigned sub,
+ struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0x90000000;
+ if (sub) {
+ set_long(pc, e);
+ e->inst[1] |= (sub << 29);
+ }
+
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xb0000000;
+
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+ set_long(pc, e);
+ e->inst[1] |= (6 << 29) | 0x00004000;
+
+ emit(pc, e);
+}
+
+static void
+emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xb0000000;
+
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+ set_long(pc, e);
+ e->inst[1] |= (6 << 29);
+
+ emit(pc, e);
+}
+
+static void
+emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
+ struct nv50_reg *src0, struct nv50_reg *src1)
+{
+ struct nv50_program_exec *e = exec(pc);
+ unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
+ struct nv50_reg *rdst;
+
+ assert(c_op <= 7);
+ if (check_swap_src_0_1(pc, &src0, &src1))
+ c_op = inv_cop[c_op];
+
+ rdst = dst;
+ if (dst->type != P_TEMP)
+ dst = alloc_temp(pc, NULL);
+
+ /* set.u32 */
+ set_long(pc, e);
+ e->inst[0] |= 0xb0000000;
+ e->inst[1] |= (3 << 29);
+ e->inst[1] |= (c_op << 14);
+ /*XXX: breaks things, .u32 by default?
+ * decuda will disasm as .u16 and use .lo/.hi regs, but this
+ * doesn't seem to match what the hw actually does.
+ inst[1] |= 0x04000000; << breaks things.. .u32 by default?
+ */
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_1(pc, src1, e);
+ emit(pc, e);
+
+ /* cvt.f32.u32 */
+ e = exec(pc);
+ e->inst[0] = 0xa0000001;
+ e->inst[1] = 0x64014780;
+ set_dst(pc, rdst, e);
+ set_src_0(pc, dst, e);
+ emit(pc, e);
+
+ if (dst != rdst)
+ free_temp(pc, dst);
+}
+
+static void
+emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xa0000000; /* cvt */
+ set_long(pc, e);
+ e->inst[1] |= (6 << 29); /* cvt */
+ e->inst[1] |= 0x08000000; /* integer mode */
+ e->inst[1] |= 0x04000000; /* 32 bit */
+ e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */
+ e->inst[1] |= (1 << 14); /* src .f32 */
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *v, struct nv50_reg *e)
+{
+ struct nv50_reg *temp = alloc_temp(pc, NULL);
+
+ emit_flop(pc, 3, temp, v);
+ emit_mul(pc, temp, temp, e);
+ emit_preex2(pc, temp, temp);
+ emit_flop(pc, 6, dst, temp);
+
+ free_temp(pc, temp);
+}
+
+static void
+emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xa0000000; /* cvt */
+ set_long(pc, e);
+ e->inst[1] |= (6 << 29); /* cvt */
+ e->inst[1] |= 0x04000000; /* 32 bit */
+ e->inst[1] |= (1 << 14); /* src .f32 */
+ e->inst[1] |= ((1 << 6) << 14); /* .abs */
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
+ struct nv50_reg **src)
+{
+ struct nv50_reg *one = alloc_immd(pc, 1.0);
+ struct nv50_reg *zero = alloc_immd(pc, 0.0);
+ struct nv50_reg *neg128 = alloc_immd(pc, -127.999999);
+ struct nv50_reg *pos128 = alloc_immd(pc, 127.999999);
+ struct nv50_reg *tmp[4];
+
+ if (mask & (1 << 0))
+ emit_mov(pc, dst[0], one);
+
+ if (mask & (1 << 3))
+ emit_mov(pc, dst[3], one);
+
+ if (mask & (3 << 1)) {
+ if (mask & (1 << 1))
+ tmp[0] = dst[1];
+ else
+ tmp[0] = temp_temp(pc);
+ emit_minmax(pc, 4, tmp[0], src[0], zero);
+ }
+
+ if (mask & (1 << 2)) {
+ set_pred_wr(pc, 1, 0, pc->p->exec_tail);
+
+ tmp[1] = temp_temp(pc);
+ emit_minmax(pc, 4, tmp[1], src[1], zero);
+
+ tmp[3] = temp_temp(pc);
+ emit_minmax(pc, 4, tmp[3], src[3], neg128);
+ emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
+
+ emit_pow(pc, dst[2], tmp[1], tmp[3]);
+ emit_mov(pc, dst[2], zero);
+ set_pred(pc, 3, 0, pc->p->exec_tail);
+ }
+}
+
+static void
+emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ set_long(pc, e);
+ e->inst[0] |= 0xa0000000; /* delta */
+ e->inst[1] |= (7 << 29); /* delta */
+ e->inst[1] |= 0x04000000; /* negate arg0? probably not */
+ e->inst[1] |= (1 << 14); /* src .f32 */
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e;
+ const int r_pred = 1;
+
+ /* Sets predicate reg ? */
+ e = exec(pc);
+ e->inst[0] = 0xa00001fd;
+ e->inst[1] = 0xc4014788;
+ set_src_0(pc, src, e);
+ set_pred_wr(pc, 1, r_pred, e);
+ emit(pc, e);
+
+ /* This is probably KILP */
+ e = exec(pc);
+ e->inst[0] = 0x000001fe;
+ set_long(pc, e);
+ set_pred(pc, 1 /* LT? */, r_pred, e);
+ emit(pc, e);
+}
+
+static struct nv50_reg *
+tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
+{
+ switch (dst->DstRegister.File) {
+ case TGSI_FILE_TEMPORARY:
+ return &pc->temp[dst->DstRegister.Index * 4 + c];
+ case TGSI_FILE_OUTPUT:
+ return &pc->result[dst->DstRegister.Index * 4 + c];
+ case TGSI_FILE_NULL:
+ return NULL;
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+static struct nv50_reg *
+tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
+{
+ struct nv50_reg *r = NULL;
+ struct nv50_reg *temp;
+ unsigned c;
+
+ c = tgsi_util_get_full_src_register_extswizzle(src, chan);
+ switch (c) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ switch (src->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ r = &pc->attr[src->SrcRegister.Index * 4 + c];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ r = &pc->temp[src->SrcRegister.Index * 4 + c];
+ break;
+ case TGSI_FILE_CONSTANT:
+ r = &pc->param[src->SrcRegister.Index * 4 + c];
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ r = &pc->immd[src->SrcRegister.Index * 4 + c];
+ break;
+ case TGSI_FILE_SAMPLER:
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case TGSI_EXTSWIZZLE_ZERO:
+ r = alloc_immd(pc, 0.0);
+ break;
+ case TGSI_EXTSWIZZLE_ONE:
+ r = alloc_immd(pc, 1.0);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
+ case TGSI_UTIL_SIGN_KEEP:
+ break;
+ case TGSI_UTIL_SIGN_CLEAR:
+ temp = temp_temp(pc);
+ emit_abs(pc, temp, r);
+ r = temp;
+ break;
+ case TGSI_UTIL_SIGN_TOGGLE:
+ temp = temp_temp(pc);
+ emit_neg(pc, temp, r);
+ r = temp;
+ break;
+ case TGSI_UTIL_SIGN_SET:
+ temp = temp_temp(pc);
+ emit_abs(pc, temp, r);
+ emit_neg(pc, temp, r);
+ r = temp;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ return r;
+}
+
+static boolean
+nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
+{
+ const struct tgsi_full_instruction *inst = &tok->FullInstruction;
+ struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
+ unsigned mask, sat, unit;
+ int i, c;
+
+ mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
+
+ for (c = 0; c < 4; c++) {
+ if (mask & (1 << c))
+ dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
+ else
+ dst[c] = NULL;
+ }
+
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i];
+
+ if (fs->SrcRegister.File == TGSI_FILE_SAMPLER)
+ unit = fs->SrcRegister.Index;
+
+ for (c = 0; c < 4; c++)
+ src[i][c] = tgsi_src(pc, c, fs);
+ }
+
+ if (sat) {
+ for (c = 0; c < 4; c++) {
+ rdst[c] = dst[c];
+ dst[c] = temp_temp(pc);
+ }
+ }
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_abs(pc, dst[c], src[0][c]);
+ }
+ break;
+ case TGSI_OPCODE_ADD:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_add(pc, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_COS:
+ temp = alloc_temp(pc, NULL);
+ emit_precossin(pc, temp, src[0][0]);
+ emit_flop(pc, 5, temp, temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ break;
+ case TGSI_OPCODE_DP3:
+ temp = alloc_temp(pc, NULL);
+ emit_mul(pc, temp, src[0][0], src[1][0]);
+ emit_mad(pc, temp, src[0][1], src[1][1], temp);
+ emit_mad(pc, temp, src[0][2], src[1][2], temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_DP4:
+ temp = alloc_temp(pc, NULL);
+ emit_mul(pc, temp, src[0][0], src[1][0]);
+ emit_mad(pc, temp, src[0][1], src[1][1], temp);
+ emit_mad(pc, temp, src[0][2], src[1][2], temp);
+ emit_mad(pc, temp, src[0][3], src[1][3], temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_DPH:
+ temp = alloc_temp(pc, NULL);
+ emit_mul(pc, temp, src[0][0], src[1][0]);
+ emit_mad(pc, temp, src[0][1], src[1][1], temp);
+ emit_mad(pc, temp, src[0][2], src[1][2], temp);
+ emit_add(pc, temp, src[1][3], temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_DST:
+ {
+ struct nv50_reg *one = alloc_immd(pc, 1.0);
+ if (mask & (1 << 0))
+ emit_mov(pc, dst[0], one);
+ if (mask & (1 << 1))
+ emit_mul(pc, dst[1], src[0][1], src[1][1]);
+ if (mask & (1 << 2))
+ emit_mov(pc, dst[2], src[0][2]);
+ if (mask & (1 << 3))
+ emit_mov(pc, dst[3], src[1][3]);
+ FREE(one);
+ }
+ break;
+ case TGSI_OPCODE_EX2:
+ temp = alloc_temp(pc, NULL);
+ emit_preex2(pc, temp, src[0][0]);
+ emit_flop(pc, 6, temp, temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_FLR:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_flr(pc, dst[c], src[0][c]);
+ }
+ break;
+ case TGSI_OPCODE_FRC:
+ temp = alloc_temp(pc, NULL);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_flr(pc, temp, src[0][c]);
+ emit_sub(pc, dst[c], src[0][c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_KIL:
+ emit_kil(pc, src[0][0]);
+ emit_kil(pc, src[0][1]);
+ emit_kil(pc, src[0][2]);
+ emit_kil(pc, src[0][3]);
+ break;
+ case TGSI_OPCODE_LIT:
+ emit_lit(pc, &dst[0], mask, &src[0][0]);
+ break;
+ case TGSI_OPCODE_LG2:
+ temp = alloc_temp(pc, NULL);
+ emit_flop(pc, 3, temp, src[0][0]);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ break;
+ case TGSI_OPCODE_LRP:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ /*XXX: we can do better than this */
+ temp = alloc_temp(pc, NULL);
+ emit_neg(pc, temp, src[0][c]);
+ emit_mad(pc, temp, temp, src[2][c], src[2][c]);
+ emit_mad(pc, dst[c], src[0][c], src[1][c], temp);
+ free_temp(pc, temp);
+ }
+ break;
+ case TGSI_OPCODE_MAD:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
+ }
+ break;
+ case TGSI_OPCODE_MAX:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_MIN:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_MOV:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], src[0][c]);
+ }
+ break;
+ case TGSI_OPCODE_MUL:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mul(pc, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_POW:
+ temp = alloc_temp(pc, NULL);
+ emit_pow(pc, temp, src[0][0], src[1][0]);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_RCP:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_flop(pc, 0, dst[c], src[0][0]);
+ }
+ break;
+ case TGSI_OPCODE_RSQ:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_flop(pc, 2, dst[c], src[0][0]);
+ }
+ break;
+ case TGSI_OPCODE_SCS:
+ temp = alloc_temp(pc, NULL);
+ emit_precossin(pc, temp, src[0][0]);
+ if (mask & (1 << 0))
+ emit_flop(pc, 5, dst[0], temp);
+ if (mask & (1 << 1))
+ emit_flop(pc, 4, dst[1], temp);
+ break;
+ case TGSI_OPCODE_SGE:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_SIN:
+ temp = alloc_temp(pc, NULL);
+ emit_precossin(pc, temp, src[0][0]);
+ emit_flop(pc, 4, temp, temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ break;
+ case TGSI_OPCODE_SLT:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_SUB:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_sub(pc, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXP:
+ {
+ struct nv50_reg *t[4];
+ struct nv50_program_exec *e;
+
+ alloc_temp4(pc, t, 0);
+ emit_mov(pc, t[0], src[0][0]);
+ emit_mov(pc, t[1], src[0][1]);
+
+ e = exec(pc);
+ e->inst[0] = 0xf6400000;
+ e->inst[0] |= (unit << 9);
+ set_long(pc, e);
+ e->inst[1] |= 0x0000c004;
+ set_dst(pc, t[0], e);
+ emit(pc, e);
+
+ if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]);
+ if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]);
+ if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]);
+ if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]);
+
+ free_temp4(pc, t);
+ }
+ break;
+ case TGSI_OPCODE_XPD:
+ temp = alloc_temp(pc, NULL);
+ if (mask & (1 << 0)) {
+ emit_mul(pc, temp, src[0][2], src[1][1]);
+ emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
+ }
+ if (mask & (1 << 1)) {
+ emit_mul(pc, temp, src[0][0], src[1][2]);
+ emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
+ }
+ if (mask & (1 << 2)) {
+ emit_mul(pc, temp, src[0][1], src[1][0]);
+ emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_END:
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ if (sat) {
+ for (c = 0; c < 4; c++) {
+ struct nv50_program_exec *e;
+
+ if (!(mask & (1 << c)))
+ continue;
+ e = exec(pc);
+
+ e->inst[0] = 0xa0000000; /* cvt */
+ set_long(pc, e);
+ e->inst[1] |= (6 << 29); /* cvt */
+ e->inst[1] |= 0x04000000; /* 32 bit */
+ e->inst[1] |= (1 << 14); /* src .f32 */
+ e->inst[1] |= ((1 << 5) << 14); /* .sat */
+ set_dst(pc, rdst[c], e);
+ set_src_0(pc, dst[c], e);
+ emit(pc, e);
+ }
+ }
+
+ kill_temp_temp(pc);
+ return TRUE;
+}
+
+static boolean
+nv50_program_tx_prep(struct nv50_pc *pc)
+{
+ struct tgsi_parse_context p;
+ boolean ret = FALSE;
+ unsigned i, c;
+
+ tgsi_parse_init(&p, pc->p->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch (tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const struct tgsi_full_immediate *imm =
+ &p.FullToken.FullImmediate;
+
+ ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
+ imm->u.ImmediateFloat32[1].Float,
+ imm->u.ImmediateFloat32[2].Float,
+ imm->u.ImmediateFloat32[3].Float);
+ }
+ break;
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *d;
+ unsigned last;
+
+ d = &p.FullToken.FullDeclaration;
+ last = d->DeclarationRange.Last;
+
+ switch (d->Declaration.File) {
+ case TGSI_FILE_TEMPORARY:
+ if (pc->temp_nr < (last + 1))
+ pc->temp_nr = last + 1;
+ break;
+ case TGSI_FILE_OUTPUT:
+ if (pc->result_nr < (last + 1))
+ pc->result_nr = last + 1;
+ break;
+ case TGSI_FILE_INPUT:
+ if (pc->attr_nr < (last + 1))
+ pc->attr_nr = last + 1;
+ break;
+ case TGSI_FILE_CONSTANT:
+ if (pc->param_nr < (last + 1))
+ pc->param_nr = last + 1;
+ break;
+ case TGSI_FILE_SAMPLER:
+ break;
+ default:
+ NOUVEAU_ERR("bad decl file %d\n",
+ d->Declaration.File);
+ goto out_err;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (pc->temp_nr) {
+ pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg));
+ if (!pc->temp)
+ goto out_err;
+
+ for (i = 0; i < pc->temp_nr; i++) {
+ for (c = 0; c < 4; c++) {
+ pc->temp[i*4+c].type = P_TEMP;
+ pc->temp[i*4+c].hw = -1;
+ pc->temp[i*4+c].index = i;
+ }
+ }
+ }
+
+ if (pc->attr_nr) {
+ struct nv50_reg *iv = NULL;
+ int aid = 0;
+
+ pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg));
+ if (!pc->attr)
+ goto out_err;
+
+ if (pc->p->type == PIPE_SHADER_FRAGMENT) {
+ iv = alloc_temp(pc, NULL);
+ emit_interp(pc, iv, iv, NULL);
+ emit_flop(pc, 0, iv, iv);
+ aid++;
+ }
+
+ for (i = 0; i < pc->attr_nr; i++) {
+ struct nv50_reg *a = &pc->attr[i*4];
+
+ for (c = 0; c < 4; c++) {
+ if (pc->p->type == PIPE_SHADER_FRAGMENT) {
+ struct nv50_reg *at =
+ alloc_temp(pc, NULL);
+ pc->attr[i*4+c].type = at->type;
+ pc->attr[i*4+c].hw = at->hw;
+ pc->attr[i*4+c].index = at->index;
+ } else {
+ pc->p->cfg.vp.attr[aid/32] |=
+ (1 << (aid % 32));
+ pc->attr[i*4+c].type = P_ATTR;
+ pc->attr[i*4+c].hw = aid++;
+ pc->attr[i*4+c].index = i;
+ }
+ }
+
+ if (pc->p->type != PIPE_SHADER_FRAGMENT)
+ continue;
+
+ emit_interp(pc, &a[0], &a[0], iv);
+ emit_interp(pc, &a[1], &a[1], iv);
+ emit_interp(pc, &a[2], &a[2], iv);
+ emit_interp(pc, &a[3], &a[3], iv);
+ }
+
+ if (iv)
+ free_temp(pc, iv);
+ }
+
+ if (pc->result_nr) {
+ int rid = 0;
+
+ pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg));
+ if (!pc->result)
+ goto out_err;
+
+ for (i = 0; i < pc->result_nr; i++) {
+ for (c = 0; c < 4; c++) {
+ if (pc->p->type == PIPE_SHADER_FRAGMENT) {
+ pc->result[i*4+c].type = P_TEMP;
+ pc->result[i*4+c].hw = -1;
+ } else {
+ pc->result[i*4+c].type = P_RESULT;
+ pc->result[i*4+c].hw = rid++;
+ }
+ pc->result[i*4+c].index = i;
+ }
+ }
+ }
+
+ if (pc->param_nr) {
+ int rid = 0;
+
+ pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg));
+ if (!pc->param)
+ goto out_err;
+
+ for (i = 0; i < pc->param_nr; i++) {
+ for (c = 0; c < 4; c++) {
+ pc->param[i*4+c].type = P_CONST;
+ pc->param[i*4+c].hw = rid++;
+ pc->param[i*4+c].index = i;
+ }
+ }
+ }
+
+ if (pc->immd_nr) {
+ int rid = pc->param_nr * 4;
+
+ pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg));
+ if (!pc->immd)
+ goto out_err;
+
+ for (i = 0; i < pc->immd_nr; i++) {
+ for (c = 0; c < 4; c++) {
+ pc->immd[i*4+c].type = P_IMMD;
+ pc->immd[i*4+c].hw = rid++;
+ pc->immd[i*4+c].index = i;
+ }
+ }
+ }
+
+ ret = TRUE;
+out_err:
+ tgsi_parse_free(&p);
+ return ret;
+}
+
+static boolean
+nv50_program_tx(struct nv50_program *p)
+{
+ struct tgsi_parse_context parse;
+ struct nv50_pc *pc;
+ boolean ret;
+
+ pc = CALLOC_STRUCT(nv50_pc);
+ if (!pc)
+ return FALSE;
+ pc->p = p;
+ pc->p->cfg.high_temp = 4;
+
+ ret = nv50_program_tx_prep(pc);
+ if (ret == FALSE)
+ goto out_cleanup;
+
+ tgsi_parse_init(&parse, pc->p->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ const union tgsi_full_token *tok = &parse.FullToken;
+
+ tgsi_parse_token(&parse);
+
+ switch (tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ ret = nv50_program_tx_insn(pc, tok);
+ if (ret == FALSE)
+ goto out_err;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (p->type == PIPE_SHADER_FRAGMENT) {
+ struct nv50_reg out;
+
+ out.type = P_TEMP;
+ for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++)
+ emit_mov(pc, &out, &pc->result[out.hw]);
+ }
+
+ assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head));
+ pc->p->exec_tail->inst[1] |= 0x00000001;
+
+ p->param_nr = pc->param_nr * 4;
+ p->immd_nr = pc->immd_nr * 4;
+ p->immd = pc->immd_buf;
+
+out_err:
+ tgsi_parse_free(&parse);
+
+out_cleanup:
+ return ret;
+}
+
+static void
+nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
+{
+ if (nv50_program_tx(p) == FALSE)
+ assert(0);
+ p->translated = TRUE;
+}
+
+static void
+nv50_program_upload_data(struct nv50_context *nv50, float *map,
+ unsigned start, unsigned count)
+{
+ while (count) {
+ unsigned nr = count > 2047 ? 2047 : count;
+
+ BEGIN_RING(tesla, 0x00000f00, 1);
+ OUT_RING ((NV50_CB_PMISC << 0) | (start << 8));
+ BEGIN_RING(tesla, 0x40000f04, nr);
+ OUT_RINGp (map, nr);
+
+ map += nr;
+ start += nr;
+ count -= nr;
+ }
+}
+
+static void
+nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
+{
+ struct nouveau_winsys *nvws = nv50->screen->nvws;
+ struct pipe_winsys *ws = nv50->pipe.winsys;
+ unsigned nr = p->param_nr + p->immd_nr;
+
+ if (!p->data && nr) {
+ struct nouveau_resource *heap = nv50->screen->vp_data_heap;
+
+ if (nvws->res_alloc(heap, nr, p, &p->data)) {
+ while (heap->next && heap->size < nr) {
+ struct nv50_program *evict = heap->next->priv;
+ nvws->res_free(&evict->data);
+ }
+
+ if (nvws->res_alloc(heap, nr, p, &p->data))
+ assert(0);
+ }
+ }
+
+ if (p->param_nr) {
+ float *map = ws->buffer_map(ws, nv50->constbuf[p->type],
+ PIPE_BUFFER_USAGE_CPU_READ);
+ nv50_program_upload_data(nv50, map, p->data->start,
+ p->param_nr);
+ ws->buffer_unmap(ws, nv50->constbuf[p->type]);
+ }
+
+ if (p->immd_nr) {
+ nv50_program_upload_data(nv50, p->immd,
+ p->data->start + p->param_nr,
+ p->immd_nr);
+ }
+}
+
+static void
+nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
+{
+ struct pipe_winsys *ws = nv50->pipe.winsys;
+ struct nv50_program_exec *e;
+ struct nouveau_stateobj *so;
+ const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
+ unsigned start, count, *up, *ptr;
+ boolean upload = FALSE;
+
+ if (!p->buffer) {
+ p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4);
+ upload = TRUE;
+ }
+
+ if (p->data && p->data->start != p->data_start) {
+ for (e = p->exec_head; e; e = e->next) {
+ unsigned ei, ci;
+
+ if (e->param.index < 0)
+ continue;
+ ei = e->param.shift >> 5;
+ ci = e->param.index + p->data->start;
+
+ e->inst[ei] &= ~e->param.mask;
+ e->inst[ei] |= (ci << e->param.shift);
+ }
+
+ p->data_start = p->data->start;
+ upload = TRUE;
+ }
+
+ if (!upload)
+ return;
+
+#ifdef NV50_PROGRAM_DUMP
+ NOUVEAU_ERR("-------\n");
+ up = ptr = MALLOC(p->exec_size * 4);
+ for (e = p->exec_head; e; e = e->next) {
+ NOUVEAU_ERR("0x%08x\n", e->inst[0]);
+ if (is_long(e))
+ NOUVEAU_ERR("0x%08x\n", e->inst[1]);
+ }
+
+#endif
+
+ up = ptr = MALLOC(p->exec_size * 4);
+ for (e = p->exec_head; e; e = e->next) {
+ *(ptr++) = e->inst[0];
+ if (is_long(e))
+ *(ptr++) = e->inst[1];
+ }
+
+ so = so_new(4,2);
+ so_method(so, nv50->screen->tesla, 0x1280, 3);
+ so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4));
+
+ start = 0; count = p->exec_size;
+ while (count) {
+ struct nouveau_winsys *nvws = nv50->screen->nvws;
+ unsigned nr;
+
+ so_emit(nvws, so);
+
+ nr = MIN2(count, 2047);
+ nr = MIN2(nvws->channel->pushbuf->remaining, nr);
+ if (nvws->channel->pushbuf->remaining < (nr + 3)) {
+ FIRE_RING(NULL);
+ continue;
+ }
+
+ BEGIN_RING(tesla, 0x0f00, 1);
+ OUT_RING ((start << 8) | NV50_CB_PUPLOAD);
+ BEGIN_RING(tesla, 0x40000f04, nr);
+ OUT_RINGp (up + start, nr);
+
+ start += nr;
+ count -= nr;
+ }
+
+ FREE(up);
+ so_ref(NULL, &so);
+}
+
+void
+nv50_vertprog_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nv50_program *p = nv50->vertprog;
+ struct nouveau_stateobj *so;
+
+ if (!p->translated) {
+ nv50_program_validate(nv50, p);
+ if (!p->translated)
+ assert(0);
+ }
+
+ nv50_program_validate_data(nv50, p);
+ nv50_program_validate_code(nv50, p);
+
+ so = so_new(13, 2);
+ so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
+ so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, tesla, 0x1650, 2);
+ so_data (so, p->cfg.vp.attr[0]);
+ so_data (so, p->cfg.vp.attr[1]);
+ so_method(so, tesla, 0x16b8, 1);
+ so_data (so, p->cfg.high_result);
+ so_method(so, tesla, 0x16ac, 2);
+ so_data (so, p->cfg.high_result); //8);
+ so_data (so, p->cfg.high_temp);
+ so_method(so, tesla, 0x140c, 1);
+ so_data (so, 0); /* program start offset */
+ so_ref(so, &nv50->state.vertprog);
+}
+
+void
+nv50_fragprog_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nv50_program *p = nv50->fragprog;
+ struct nouveau_stateobj *so;
+
+ if (!p->translated) {
+ nv50_program_validate(nv50, p);
+ if (!p->translated)
+ assert(0);
+ }
+
+ nv50_program_validate_data(nv50, p);
+ nv50_program_validate_code(nv50, p);
+
+ so = so_new(64, 2);
+ so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
+ so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, tesla, 0x1904, 4);
+ so_data (so, 0x00040404); /* p: 0x01000404 */
+ so_data (so, 0x00000004);
+ so_data (so, 0x00000000);
+ so_data (so, 0x00000000);
+ so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */
+ so_data (so, 0x03020100);
+ so_data (so, 0x07060504);
+ so_data (so, 0x0b0a0908);
+ so_method(so, tesla, 0x1988, 2);
+ so_data (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */
+ so_data (so, p->cfg.high_temp);
+ so_method(so, tesla, 0x1414, 1);
+ so_data (so, 0); /* program start offset */
+ so_ref(so, &nv50->state.fragprog);
+}
+
+void
+nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
+{
+ struct pipe_screen *pscreen = nv50->pipe.screen;
+
+ while (p->exec_head) {
+ struct nv50_program_exec *e = p->exec_head;
+
+ p->exec_head = e->next;
+ FREE(e);
+ }
+ p->exec_tail = NULL;
+ p->exec_size = 0;
+
+ if (p->buffer)
+ pipe_buffer_reference(pscreen, &p->buffer, NULL);
+
+ nv50->screen->nvws->res_free(&p->data);
+
+ p->translated = 0;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
new file mode 100644
index 0000000000..78deed6a38
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -0,0 +1,45 @@
+#ifndef __NV50_PROGRAM_H__
+#define __NV50_PROGRAM_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv50_program_exec {
+ struct nv50_program_exec *next;
+
+ unsigned inst[2];
+ struct {
+ int index;
+ unsigned mask;
+ unsigned shift;
+ } param;
+};
+
+struct nv50_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+ boolean translated;
+
+ unsigned type;
+ struct nv50_program_exec *exec_head;
+ struct nv50_program_exec *exec_tail;
+ unsigned exec_size;
+ struct nouveau_resource *data;
+ unsigned data_start;
+
+ struct pipe_buffer *buffer;
+
+ float *immd;
+ unsigned immd_nr;
+ unsigned param_nr;
+
+ struct {
+ unsigned high_temp;
+ unsigned high_result;
+ struct {
+ unsigned attr[2];
+ } vp;
+ } cfg;
+};
+
+#endif
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
new file mode 100644
index 0000000000..b923c820eb
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_inlines.h"
+
+#include "nv50_context.h"
+
+struct nv50_query {
+ struct pipe_buffer *buffer;
+ unsigned type;
+ boolean ready;
+ uint64_t result;
+};
+
+static INLINE struct nv50_query *
+nv50_query(struct pipe_query *pipe)
+{
+ return (struct nv50_query *)pipe;
+}
+
+static struct pipe_query *
+nv50_query_create(struct pipe_context *pipe, unsigned type)
+{
+ struct pipe_winsys *ws = pipe->winsys;
+ struct nv50_query *q = CALLOC_STRUCT(nv50_query);
+
+ assert (q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+ q->type = type;
+
+ q->buffer = ws->buffer_create(ws, 256, 0, 16);
+ if (!q->buffer) {
+ FREE(q);
+ return NULL;
+ }
+
+ return (struct pipe_query *)q;
+}
+
+static void
+nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv50_query *q = nv50_query(pq);
+
+ if (q) {
+ pipe_buffer_reference(pipe, &q->buffer, NULL);
+ FREE(q);
+ }
+}
+
+static void
+nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_query *q = nv50_query(pq);
+
+ BEGIN_RING(tesla, 0x1530, 1);
+ OUT_RING (1);
+ BEGIN_RING(tesla, 0x1514, 1);
+ OUT_RING (1);
+
+ q->ready = FALSE;
+}
+
+static void
+nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_query *q = nv50_query(pq);
+
+ BEGIN_RING(tesla, 0x1b00, 4);
+ OUT_RELOCh(q->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(q->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RING (0x00000000);
+ OUT_RING (0x0100f002);
+ FIRE_RING (NULL);
+}
+
+static boolean
+nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, uint64_t *result)
+{
+ struct pipe_winsys *ws = pipe->winsys;
+ struct nv50_query *q = nv50_query(pq);
+
+ /*XXX: Want to be able to return FALSE here instead of blocking
+ * until the result is available..
+ */
+
+ if (!q->ready) {
+ uint32_t *map = ws->buffer_map(ws, q->buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ q->result = map[1];
+ q->ready = TRUE;
+ ws->buffer_unmap(ws, q->buffer);
+ }
+
+ *result = q->result;
+ return q->ready;
+}
+
+void
+nv50_init_query_functions(struct nv50_context *nv50)
+{
+ nv50->pipe.create_query = nv50_query_create;
+ nv50->pipe.destroy_query = nv50_query_destroy;
+ nv50->pipe.begin_query = nv50_query_begin;
+ nv50->pipe.end_query = nv50_query_end;
+ nv50->pipe.get_query_result = nv50_query_result;
+}
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
new file mode 100644
index 0000000000..ef46233f83
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -0,0 +1,329 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_screen.h"
+
+#include "nv50_context.h"
+#include "nv50_screen.h"
+
+#include "nouveau/nouveau_stateobj.h"
+
+#define NV5X_GRCLASS5097_CHIPSETS 0x00000001
+#define NV8X_GRCLASS8297_CHIPSETS 0x00000050
+#define NV9X_GRCLASS8297_CHIPSETS 0x00000014
+
+static boolean
+nv50_screen_is_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ case PIPE_FORMAT_A8L8_UNORM:
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT5_RGBA:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static const char *
+nv50_screen_get_name(struct pipe_screen *pscreen)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ struct nouveau_device *dev = screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv50_screen_get_vendor(struct pipe_screen *pscreen)
+{
+ return "nouveau";
+}
+
+static int
+nv50_screen_get_param(struct pipe_screen *pscreen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 32;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 1;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 8;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 13;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 13;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ return 1;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 0;
+ case NOUVEAU_CAP_HW_VTXBUF:
+ return 1;
+ case NOUVEAU_CAP_HW_IDXBUF:
+ return 0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv50_screen_get_paramf(struct pipe_screen *pscreen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 16.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 4.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static void
+nv50_screen_destroy(struct pipe_screen *pscreen)
+{
+ FREE(pscreen);
+}
+
+struct pipe_screen *
+nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen);
+ struct nouveau_stateobj *so;
+ unsigned tesla_class = 0, ret;
+ unsigned chipset = nvws->channel->device->chipset;
+ int i;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ /* 3D object */
+ if ((chipset & 0xf0) != 0x50 && (chipset & 0xf0) != 0x80) {
+ NOUVEAU_ERR("Not a G8x chipset\n");
+ nv50_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ switch (chipset & 0xf0) {
+ case 0x50:
+ if (NV5X_GRCLASS5097_CHIPSETS & (1 << (chipset & 0x0f)))
+ tesla_class = 0x5097;
+ break;
+ case 0x80:
+ if (NV8X_GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f)))
+ tesla_class = 0x8297;
+ break;
+ case 0x90:
+ if (NV9X_GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f)))
+ tesla_class = 0x8297;
+ break;
+ default:
+ break;
+ }
+
+ if (tesla_class == 0) {
+ NOUVEAU_ERR("Unknown G8x chipset: NV%02x\n", chipset);
+ nv50_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ ret = nvws->grobj_alloc(nvws, tesla_class, &screen->tesla);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ nv50_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Sync notifier */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv50_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Static tesla init */
+ so = so_new(256, 20);
+
+ so_method(so, screen->tesla, 0x1558, 1);
+ so_data (so, 1);
+ so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1);
+ so_data (so, screen->sync->handle);
+ so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0),
+ NV50TCL_DMA_UNK0__SIZE);
+ for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++)
+ so_data(so, nvws->channel->vram->handle);
+ so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0),
+ NV50TCL_DMA_UNK1__SIZE);
+ for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++)
+ so_data(so, nvws->channel->vram->handle);
+ so_method(so, screen->tesla, 0x121c, 1);
+ so_data (so, 1);
+
+ so_method(so, screen->tesla, 0x13bc, 1);
+ so_data (so, 0x54);
+ so_method(so, screen->tesla, 0x13ac, 1);
+ so_data (so, 1);
+ so_method(so, screen->tesla, 0x16b8, 1);
+ so_data (so, 8);
+
+ /* Shared constant buffer */
+ screen->constbuf = ws->buffer_create(ws, 0, 0, 128 * 4 * 4);
+ if (nvws->res_init(&screen->vp_data_heap, 0, 128)) {
+ NOUVEAU_ERR("Error initialising constant buffer\n");
+ nv50_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ so_method(so, screen->tesla, 0x1280, 3);
+ so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, (NV50_CB_PMISC << 16) | 0x00001000);
+
+ /* Texture sampler/image unit setup - we abuse the constant buffer
+ * upload mechanism for the moment to upload data to the tex config
+ * blocks. At some point we *may* want to go the NVIDIA way of doing
+ * things?
+ */
+ screen->tic = ws->buffer_create(ws, 0, 0, 32 * 8 * 4);
+ so_method(so, screen->tesla, 0x1280, 3);
+ so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, (NV50_CB_TIC << 16) | 0x0800);
+ so_method(so, screen->tesla, 0x1574, 3);
+ so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, 0x00000800);
+
+ screen->tsc = ws->buffer_create(ws, 0, 0, 32 * 8 * 4);
+ so_method(so, screen->tesla, 0x1280, 3);
+ so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, (NV50_CB_TSC << 16) | 0x0800);
+ so_method(so, screen->tesla, 0x155c, 3);
+ so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, 0x00000800);
+
+
+ /* Vertex array limits - max them out */
+ for (i = 0; i < 16; i++) {
+ so_method(so, screen->tesla, 0x1080 + (i * 8), 2);
+ so_data (so, 0x000000ff);
+ so_data (so, 0xffffffff);
+ }
+
+ so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2);
+ so_data (so, fui(0.0));
+ so_data (so, fui(1.0));
+
+ so_method(so, screen->tesla, 0x1234, 1);
+ so_data (so, 1);
+ so_method(so, screen->tesla, 0x1458, 1);
+ so_data (so, 1);
+
+ so_emit(nvws, so);
+ so_ref(so, &screen->static_init);
+ nvws->push_flush(nvws, 0, NULL);
+
+ screen->pipe.winsys = ws;
+
+ screen->pipe.destroy = nv50_screen_destroy;
+
+ screen->pipe.get_name = nv50_screen_get_name;
+ screen->pipe.get_vendor = nv50_screen_get_vendor;
+ screen->pipe.get_param = nv50_screen_get_param;
+ screen->pipe.get_paramf = nv50_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv50_screen_is_format_supported;
+
+ nv50_screen_init_miptree_functions(&screen->pipe);
+ nv50_surface_init_screen_functions(&screen->pipe);
+
+ return &screen->pipe;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
new file mode 100644
index 0000000000..400ddcef06
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -0,0 +1,33 @@
+#ifndef __NV50_SCREEN_H__
+#define __NV50_SCREEN_H__
+
+#include "pipe/p_screen.h"
+
+struct nv50_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+
+ unsigned cur_pctx;
+
+ struct nouveau_grobj *tesla;
+ struct nouveau_notifier *sync;
+
+ struct pipe_buffer *constbuf;
+ struct nouveau_resource *vp_data_heap;
+
+ struct pipe_buffer *tic;
+ struct pipe_buffer *tsc;
+
+ struct nouveau_stateobj *static_init;
+};
+
+static INLINE struct nv50_screen *
+nv50_screen(struct pipe_screen *screen)
+{
+ return (struct nv50_screen *)screen;
+}
+
+void nv50_surface_init_screen_functions(struct pipe_screen *);
+
+#endif
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
new file mode 100644
index 0000000000..38c1d938b8
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -0,0 +1,664 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv50_context.h"
+#include "nv50_texture.h"
+
+#include "nouveau/nouveau_stateobj.h"
+
+static void *
+nv50_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nouveau_stateobj *so = so_new(64, 0);
+ struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
+ struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj);
+ unsigned cmask = 0, i;
+
+ /*XXX ignored:
+ * - dither
+ */
+
+ if (cso->blend_enable == 0) {
+ so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8);
+ for (i = 0; i < 8; i++)
+ so_data(so, 0);
+ } else {
+ so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8);
+ for (i = 0; i < 8; i++)
+ so_data(so, 1);
+ so_method(so, tesla, NV50TCL_BLEND_EQUATION_RGB, 5);
+ so_data (so, nvgl_blend_eqn(cso->rgb_func));
+ so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_src_factor));
+ so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_dst_factor));
+ so_data (so, nvgl_blend_eqn(cso->alpha_func));
+ so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_src_factor));
+ so_method(so, tesla, NV50TCL_BLEND_FUNC_DST_ALPHA, 1);
+ so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_dst_factor));
+ }
+
+ if (cso->logicop_enable == 0 ) {
+ so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 1);
+ so_data (so, 0);
+ } else {
+ so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 2);
+ so_data (so, 1);
+ so_data (so, nvgl_logicop_func(cso->logicop_func));
+ }
+
+ if (cso->colormask & PIPE_MASK_R)
+ cmask |= (1 << 0);
+ if (cso->colormask & PIPE_MASK_G)
+ cmask |= (1 << 4);
+ if (cso->colormask & PIPE_MASK_B)
+ cmask |= (1 << 8);
+ if (cso->colormask & PIPE_MASK_A)
+ cmask |= (1 << 12);
+ so_method(so, tesla, NV50TCL_COLOR_MASK(0), 8);
+ for (i = 0; i < 8; i++)
+ so_data(so, cmask);
+
+ bso->pipe = *cso;
+ so_ref(so, &bso->so);
+ return (void *)bso;
+}
+
+static void
+nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->blend = hwcso;
+ nv50->dirty |= NV50_NEW_BLEND;
+}
+
+static void
+nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_blend_stateobj *bso = hwcso;
+
+ so_ref(NULL, &bso->so);
+ FREE(bso);
+}
+
+static INLINE unsigned
+wrap_mode(unsigned wrap)
+{
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return NV50TSC_1_0_WRAPS_REPEAT;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return NV50TSC_1_0_WRAPS_MIRROR_REPEAT;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER;
+ case PIPE_TEX_WRAP_CLAMP:
+ return NV50TSC_1_0_WRAPS_CLAMP;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ return NV50TSC_1_0_WRAPS_MIRROR_CLAMP;
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ return NV50TSC_1_0_WRAPS_REPEAT;
+ }
+}
+static void *
+nv50_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ unsigned *tsc = CALLOC(8, sizeof(unsigned));
+
+ tsc[0] = (0x00024000 |
+ (wrap_mode(cso->wrap_s) << 0) |
+ (wrap_mode(cso->wrap_t) << 3) |
+ (wrap_mode(cso->wrap_r) << 6));
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ tsc[1] |= NV50TSC_1_1_MAGF_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ tsc[1] |= NV50TSC_1_1_MAGF_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ tsc[1] |= NV50TSC_1_1_MINF_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ tsc[1] |= NV50TSC_1_1_MINF_NEAREST;
+ break;
+ }
+
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ tsc[1] |= NV50TSC_1_1_MIPF_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ tsc[1] |= NV50TSC_1_1_MIPF_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ tsc[1] |= NV50TSC_1_1_MIPF_NONE;
+ break;
+ }
+
+ if (cso->max_anisotropy >= 16.0)
+ tsc[0] |= (7 << 20);
+ else
+ if (cso->max_anisotropy >= 12.0)
+ tsc[0] |= (6 << 20);
+ else
+ if (cso->max_anisotropy >= 10.0)
+ tsc[0] |= (5 << 20);
+ else
+ if (cso->max_anisotropy >= 8.0)
+ tsc[0] |= (4 << 20);
+ else
+ if (cso->max_anisotropy >= 6.0)
+ tsc[0] |= (3 << 20);
+ else
+ if (cso->max_anisotropy >= 4.0)
+ tsc[0] |= (2 << 20);
+ else
+ if (cso->max_anisotropy >= 2.0)
+ tsc[0] |= (1 << 20);
+
+ if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ tsc[0] |= (1 << 8);
+ tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7);
+ }
+
+ return (void *)tsc;
+}
+
+static void
+nv50_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ int i;
+
+ nv50->sampler_nr = nr;
+ for (i = 0; i < nv50->sampler_nr; i++)
+ nv50->sampler[i] = sampler[i];
+
+ nv50->dirty |= NV50_NEW_SAMPLER;
+}
+
+static void
+nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **pt)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ int i;
+
+ for (i = 0; i < nr; i++)
+ pipe_texture_reference((void *)&nv50->miptree[i], pt[i]);
+ for (i = nr; i < nv50->miptree_nr; i++)
+ pipe_texture_reference((void *)&nv50->miptree[i], NULL);
+
+ nv50->miptree_nr = nr;
+ nv50->dirty |= NV50_NEW_TEXTURE;
+}
+
+static void *
+nv50_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nouveau_stateobj *so = so_new(64, 0);
+ struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
+ struct nv50_rasterizer_stateobj *rso =
+ CALLOC_STRUCT(nv50_rasterizer_stateobj);
+
+ /*XXX: ignored
+ * - light_twosize
+ * - point_smooth
+ * - multisample
+ * - point_sprite / sprite_coord_mode
+ */
+
+ so_method(so, tesla, NV50TCL_SHADE_MODEL, 1);
+ so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT :
+ NV50TCL_SHADE_MODEL_SMOOTH);
+
+ so_method(so, tesla, NV50TCL_LINE_WIDTH, 1);
+ so_data (so, fui(cso->line_width));
+ so_method(so, tesla, NV50TCL_LINE_SMOOTH_ENABLE, 1);
+ so_data (so, cso->line_smooth ? 1 : 0);
+ if (cso->line_stipple_enable) {
+ so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1);
+ so_data (so, 1);
+ so_method(so, tesla, NV50TCL_LINE_STIPPLE_PATTERN, 1);
+ so_data (so, (cso->line_stipple_pattern << 8) |
+ cso->line_stipple_factor);
+ } else {
+ so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, tesla, NV50TCL_POINT_SIZE, 1);
+ so_data (so, fui(cso->point_size));
+
+ so_method(so, tesla, NV50TCL_POLYGON_MODE_FRONT, 3);
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ } else {
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ }
+ so_data(so, cso->poly_smooth ? 1 : 0);
+
+ so_method(so, tesla, NV50TCL_CULL_FACE_ENABLE, 3);
+ so_data (so, cso->cull_mode != PIPE_WINDING_NONE);
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ so_data(so, NV50TCL_FRONT_FACE_CCW);
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV50TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV50TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV50TCL_CULL_FACE_BACK);
+ break;
+ }
+ } else {
+ so_data(so, NV50TCL_FRONT_FACE_CW);
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV50TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV50TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV50TCL_CULL_FACE_BACK);
+ break;
+ }
+ }
+
+ so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, cso->poly_stipple_enable ? 1 : 0);
+
+ so_method(so, tesla, NV50TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+
+ if (cso->offset_cw || cso->offset_ccw) {
+ so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1);
+ so_data (so, fui(cso->offset_scale));
+ so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1);
+ so_data (so, fui(cso->offset_units));
+ }
+
+ rso->pipe = *cso;
+ so_ref(so, &rso->so);
+ return (void *)rso;
+}
+
+static void
+nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->rasterizer = hwcso;
+ nv50->dirty |= NV50_NEW_RASTERIZER;
+}
+
+static void
+nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_rasterizer_stateobj *rso = hwcso;
+
+ so_ref(NULL, &rso->so);
+ FREE(rso);
+}
+
+static void *
+nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
+ struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj);
+ struct nouveau_stateobj *so = so_new(64, 0);
+
+ so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1);
+ so_data (so, cso->depth.writemask ? 1 : 0);
+ if (cso->depth.enabled) {
+ so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1);
+ so_data (so, 1);
+ so_method(so, tesla, NV50TCL_DEPTH_TEST_FUNC, 1);
+ so_data (so, nvgl_comparison_op(cso->depth.func));
+ } else {
+ so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ /*XXX: yes, I know they're backwards.. header needs fixing */
+ if (cso->stencil[0].enabled) {
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5);
+ so_data (so, 1);
+ so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
+ so_data (so, nvgl_comparison_op(cso->stencil[0].func));
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3);
+ so_data (so, cso->stencil[0].ref_value);
+ so_data (so, cso->stencil[0].write_mask);
+ so_data (so, cso->stencil[0].value_mask);
+ } else {
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ if (cso->stencil[1].enabled) {
+ so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8);
+ so_data (so, 1);
+ so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
+ so_data (so, nvgl_comparison_op(cso->stencil[1].func));
+ so_data (so, cso->stencil[1].ref_value);
+ so_data (so, cso->stencil[1].write_mask);
+ so_data (so, cso->stencil[1].value_mask);
+ } else {
+ so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ if (cso->alpha.enabled) {
+ so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1);
+ so_data (so, 1);
+ so_method(so, tesla, NV50TCL_ALPHA_TEST_REF, 2);
+ so_data (so, fui(cso->alpha.ref));
+ so_data (so, nvgl_comparison_op(cso->alpha.func));
+ } else {
+ so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ zsa->pipe = *cso;
+ so_ref(so, &zsa->so);
+ return (void *)zsa;
+}
+
+static void
+nv50_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->zsa = hwcso;
+ nv50->dirty |= NV50_NEW_ZSA;
+}
+
+static void
+nv50_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_zsa_stateobj *zsa = hwcso;
+
+ so_ref(NULL, &zsa->so);
+ FREE(zsa);
+}
+
+static void *
+nv50_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv50_program *p = CALLOC_STRUCT(nv50_program);
+
+ p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+ p->type = PIPE_SHADER_VERTEX;
+ tgsi_scan_shader(p->pipe.tokens, &p->info);
+ return (void *)p;
+}
+
+static void
+nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->vertprog = hwcso;
+ nv50->dirty |= NV50_NEW_VERTPROG;
+}
+
+static void
+nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_program *p = hwcso;
+
+ nv50_program_destroy(nv50, p);
+ FREE((void*)p->pipe.tokens);
+ FREE(p);
+}
+
+static void *
+nv50_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv50_program *p = CALLOC_STRUCT(nv50_program);
+
+ p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+ p->type = PIPE_SHADER_FRAGMENT;
+ tgsi_scan_shader(p->pipe.tokens, &p->info);
+ return (void *)p;
+}
+
+static void
+nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->fragprog = hwcso;
+ nv50->dirty |= NV50_NEW_FRAGPROG;
+}
+
+static void
+nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_program *p = hwcso;
+
+ nv50_program_destroy(nv50, p);
+ FREE((void*)p->pipe.tokens);
+ FREE(p);
+}
+
+static void
+nv50_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->blend_colour = *bcol;
+ nv50->dirty |= NV50_NEW_BLEND_COLOUR;
+}
+
+static void
+nv50_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+}
+
+static void
+nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ if (shader == PIPE_SHADER_VERTEX) {
+ nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer;
+ nv50->dirty |= NV50_NEW_VERTPROG_CB;
+ } else
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer;
+ nv50->dirty |= NV50_NEW_FRAGPROG_CB;
+ }
+}
+
+static void
+nv50_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->framebuffer = *fb;
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+}
+
+static void
+nv50_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->stipple = *stipple;
+ nv50->dirty |= NV50_NEW_STIPPLE;
+}
+
+static void
+nv50_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->scissor = *s;
+ nv50->dirty |= NV50_NEW_SCISSOR;
+}
+
+static void
+nv50_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->viewport = *vpt;
+ nv50->dirty |= NV50_NEW_VIEWPORT;
+}
+
+static void
+nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count);
+ nv50->vtxbuf_nr = count;
+
+ nv50->dirty |= NV50_NEW_ARRAYS;
+}
+
+static void
+nv50_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *ve)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ memcpy(nv50->vtxelt, ve, sizeof(*ve) * count);
+ nv50->vtxelt_nr = count;
+
+ nv50->dirty |= NV50_NEW_ARRAYS;
+}
+
+void
+nv50_init_state_functions(struct nv50_context *nv50)
+{
+ nv50->pipe.create_blend_state = nv50_blend_state_create;
+ nv50->pipe.bind_blend_state = nv50_blend_state_bind;
+ nv50->pipe.delete_blend_state = nv50_blend_state_delete;
+
+ nv50->pipe.create_sampler_state = nv50_sampler_state_create;
+ nv50->pipe.bind_sampler_states = nv50_sampler_state_bind;
+ nv50->pipe.delete_sampler_state = nv50_sampler_state_delete;
+ nv50->pipe.set_sampler_textures = nv50_set_sampler_texture;
+
+ nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create;
+ nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind;
+ nv50->pipe.delete_rasterizer_state = nv50_rasterizer_state_delete;
+
+ nv50->pipe.create_depth_stencil_alpha_state =
+ nv50_depth_stencil_alpha_state_create;
+ nv50->pipe.bind_depth_stencil_alpha_state =
+ nv50_depth_stencil_alpha_state_bind;
+ nv50->pipe.delete_depth_stencil_alpha_state =
+ nv50_depth_stencil_alpha_state_delete;
+
+ nv50->pipe.create_vs_state = nv50_vp_state_create;
+ nv50->pipe.bind_vs_state = nv50_vp_state_bind;
+ nv50->pipe.delete_vs_state = nv50_vp_state_delete;
+
+ nv50->pipe.create_fs_state = nv50_fp_state_create;
+ nv50->pipe.bind_fs_state = nv50_fp_state_bind;
+ nv50->pipe.delete_fs_state = nv50_fp_state_delete;
+
+ nv50->pipe.set_blend_color = nv50_set_blend_color;
+ nv50->pipe.set_clip_state = nv50_set_clip_state;
+ nv50->pipe.set_constant_buffer = nv50_set_constant_buffer;
+ nv50->pipe.set_framebuffer_state = nv50_set_framebuffer_state;
+ nv50->pipe.set_polygon_stipple = nv50_set_polygon_stipple;
+ nv50->pipe.set_scissor_state = nv50_set_scissor_state;
+ nv50->pipe.set_viewport_state = nv50_set_viewport_state;
+
+ nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers;
+ nv50->pipe.set_vertex_elements = nv50_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
new file mode 100644
index 0000000000..198e25f448
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_context.h"
+#include "nouveau/nouveau_stateobj.h"
+
+static void
+nv50_state_validate_fb(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so = so_new(128, 18);
+ struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+ unsigned i, w, h, gw = 0;
+
+ for (i = 0; i < fb->num_cbufs; i++) {
+ if (!gw) {
+ w = fb->cbufs[i]->width;
+ h = fb->cbufs[i]->height;
+ gw = 1;
+ } else {
+ assert(w == fb->cbufs[i]->width);
+ assert(h == fb->cbufs[i]->height);
+ }
+
+ so_method(so, tesla, NV50TCL_RT_HORIZ(i), 2);
+ so_data (so, fb->cbufs[i]->width);
+ so_data (so, fb->cbufs[i]->height);
+
+ so_method(so, tesla, NV50TCL_RT_ADDRESS_HIGH(i), 5);
+ so_reloc (so, fb->cbufs[i]->buffer, fb->cbufs[i]->offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, fb->cbufs[i]->buffer, fb->cbufs[i]->offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0);
+ switch (fb->cbufs[i]->format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ so_data(so, 0xcf);
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ so_data(so, 0xe8);
+ break;
+ default:
+ NOUVEAU_ERR("AIIII unknown format %s\n",
+ pf_name(fb->cbufs[i]->format));
+ so_data(so, 0xe6);
+ break;
+ }
+ so_data(so, 0x00000000);
+ so_data(so, 0x00000000);
+
+ so_method(so, tesla, 0x1224, 1);
+ so_data (so, 1);
+ }
+
+ if (fb->zsbuf) {
+ if (!gw) {
+ w = fb->zsbuf->width;
+ h = fb->zsbuf->height;
+ gw = 1;
+ } else {
+ assert(w == fb->zsbuf->width);
+ assert(h == fb->zsbuf->height);
+ }
+
+ so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5);
+ so_reloc (so, fb->zsbuf->buffer, fb->zsbuf->offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, fb->zsbuf->buffer, fb->zsbuf->offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0);
+ switch (fb->zsbuf->format) {
+ case PIPE_FORMAT_Z24S8_UNORM:
+ so_data(so, 0x16);
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
+ so_data(so, 0x15);
+ break;
+ default:
+ NOUVEAU_ERR("AIIII unknown format %s\n",
+ pf_name(fb->zsbuf->format));
+ so_data(so, 0x16);
+ break;
+ }
+ so_data(so, 0x00000000);
+ so_data(so, 0x00000000);
+
+ so_method(so, tesla, 0x1538, 1);
+ so_data (so, 1);
+ so_method(so, tesla, 0x1228, 3);
+ so_data (so, fb->zsbuf->width);
+ so_data (so, fb->zsbuf->height);
+ so_data (so, 0x00010001);
+ }
+
+ so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2);
+ so_data (so, w << 16);
+ so_data (so, h << 16);
+ so_method(so, tesla, 0x0e04, 2);
+ so_data (so, w << 16);
+ so_data (so, h << 16);
+ so_method(so, tesla, 0xdf8, 2);
+ so_data (so, 0);
+ so_data (so, h);
+
+ so_ref(so, &nv50->state.fb);
+}
+
+static void
+nv50_state_emit(struct nv50_context *nv50)
+{
+ struct nv50_screen *screen = nv50->screen;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ if (nv50->pctx_id != screen->cur_pctx) {
+ nv50->state.dirty |= 0xffffffff;
+ screen->cur_pctx = nv50->pctx_id;
+ }
+
+ if (nv50->state.dirty & NV50_NEW_FRAMEBUFFER)
+ so_emit(nvws, nv50->state.fb);
+ if (nv50->state.dirty & NV50_NEW_BLEND)
+ so_emit(nvws, nv50->state.blend);
+ if (nv50->state.dirty & NV50_NEW_ZSA)
+ so_emit(nvws, nv50->state.zsa);
+ if (nv50->state.dirty & NV50_NEW_VERTPROG)
+ so_emit(nvws, nv50->state.vertprog);
+ if (nv50->state.dirty & NV50_NEW_FRAGPROG)
+ so_emit(nvws, nv50->state.fragprog);
+ if (nv50->state.dirty & NV50_NEW_RASTERIZER)
+ so_emit(nvws, nv50->state.rast);
+ if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR)
+ so_emit(nvws, nv50->state.blend_colour);
+ if (nv50->state.dirty & NV50_NEW_STIPPLE)
+ so_emit(nvws, nv50->state.stipple);
+ if (nv50->state.dirty & NV50_NEW_SCISSOR)
+ so_emit(nvws, nv50->state.scissor);
+ if (nv50->state.dirty & NV50_NEW_VIEWPORT)
+ so_emit(nvws, nv50->state.viewport);
+ if (nv50->state.dirty & NV50_NEW_SAMPLER)
+ so_emit(nvws, nv50->state.tsc_upload);
+ if (nv50->state.dirty & NV50_NEW_TEXTURE)
+ so_emit(nvws, nv50->state.tic_upload);
+ if (nv50->state.dirty & NV50_NEW_ARRAYS) {
+ so_emit(nvws, nv50->state.vtxfmt);
+ so_emit(nvws, nv50->state.vtxbuf);
+ }
+ nv50->state.dirty = 0;
+
+ so_emit_reloc_markers(nvws, nv50->state.fb);
+ so_emit_reloc_markers(nvws, nv50->state.vertprog);
+ so_emit_reloc_markers(nvws, nv50->state.fragprog);
+ so_emit_reloc_markers(nvws, nv50->state.vtxbuf);
+ so_emit_reloc_markers(nvws, nv50->screen->static_init);
+}
+
+boolean
+nv50_state_validate(struct nv50_context *nv50)
+{
+ const struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so;
+ unsigned i;
+
+ for (i = 0; i < fb->num_cbufs; i++)
+ fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED;
+
+ if (fb->zsbuf)
+ fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED;
+
+ if (nv50->dirty & NV50_NEW_FRAMEBUFFER)
+ nv50_state_validate_fb(nv50);
+
+ if (nv50->dirty & NV50_NEW_BLEND)
+ so_ref(nv50->blend->so, &nv50->state.blend);
+
+ if (nv50->dirty & NV50_NEW_ZSA)
+ so_ref(nv50->zsa->so, &nv50->state.zsa);
+
+ if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB))
+ nv50_vertprog_validate(nv50);
+
+ if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB))
+ nv50_fragprog_validate(nv50);
+
+ if (nv50->dirty & NV50_NEW_RASTERIZER)
+ so_ref(nv50->rasterizer->so, &nv50->state.rast);
+
+ if (nv50->dirty & NV50_NEW_BLEND_COLOUR) {
+ so = so_new(5, 0);
+ so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4);
+ so_data (so, fui(nv50->blend_colour.color[0]));
+ so_data (so, fui(nv50->blend_colour.color[1]));
+ so_data (so, fui(nv50->blend_colour.color[2]));
+ so_data (so, fui(nv50->blend_colour.color[3]));
+ so_ref(so, &nv50->state.blend_colour);
+ }
+
+ if (nv50->dirty & NV50_NEW_STIPPLE) {
+ so = so_new(33, 0);
+ so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
+ for (i = 0; i < 32; i++)
+ so_data(so, nv50->stipple.stipple[i]);
+ so_ref(so, &nv50->state.stipple);
+ }
+
+ if (nv50->dirty & (NV50_NEW_SCISSOR | NV50_NEW_RASTERIZER)) {
+ struct pipe_rasterizer_state *rast = &nv50->rasterizer->pipe;
+ struct pipe_scissor_state *s = &nv50->scissor;
+
+ if (nv50->state.scissor &&
+ (rast->scissor == 0 && nv50->state.scissor_enabled == 0))
+ goto scissor_uptodate;
+ nv50->state.scissor_enabled = rast->scissor;
+
+ so = so_new(3, 0);
+ so_method(so, tesla, 0x0ff4, 2);
+ if (nv50->state.scissor_enabled) {
+ so_data(so, ((s->maxx - s->minx) << 16) | s->minx);
+ so_data(so, ((s->maxy - s->miny) << 16) | s->miny);
+ } else {
+ so_data(so, (8192 << 16));
+ so_data(so, (8192 << 16));
+ }
+ so_ref(so, &nv50->state.scissor);
+ nv50->state.dirty |= NV50_NEW_SCISSOR;
+ }
+scissor_uptodate:
+
+ if (nv50->dirty & NV50_NEW_VIEWPORT) {
+ unsigned bypass;
+
+ if (!nv50->rasterizer->pipe.bypass_clipping)
+ bypass = 0;
+ else
+ bypass = 1;
+
+ if (nv50->state.viewport &&
+ (bypass || !(nv50->dirty & NV50_NEW_VIEWPORT)) &&
+ nv50->state.viewport_bypass == bypass)
+ goto viewport_uptodate;
+ nv50->state.viewport_bypass = bypass;
+
+ so = so_new(12, 0);
+ if (!bypass) {
+ so_method(so, tesla, NV50TCL_VIEWPORT_UNK1(0), 3);
+ so_data (so, fui(nv50->viewport.translate[0]));
+ so_data (so, fui(nv50->viewport.translate[1]));
+ so_data (so, fui(nv50->viewport.translate[2]));
+ so_method(so, tesla, NV50TCL_VIEWPORT_UNK0(0), 3);
+ so_data (so, fui(nv50->viewport.scale[0]));
+ so_data (so, fui(-nv50->viewport.scale[1]));
+ so_data (so, fui(nv50->viewport.scale[2]));
+ so_method(so, tesla, 0x192c, 1);
+ so_data (so, 1);
+ so_method(so, tesla, 0x0f90, 1);
+ so_data (so, 0);
+ } else {
+ so_method(so, tesla, 0x192c, 1);
+ so_data (so, 0);
+ so_method(so, tesla, 0x0f90, 1);
+ so_data (so, 1);
+ }
+
+ so_ref(so, &nv50->state.viewport);
+ }
+viewport_uptodate:
+
+ if (nv50->dirty & NV50_NEW_SAMPLER) {
+ int i;
+
+ so = so_new(nv50->sampler_nr * 8 + 3, 0);
+ so_method(so, tesla, 0x0f00, 1);
+ so_data (so, NV50_CB_TSC);
+ so_method(so, tesla, 0x40000f04, nv50->sampler_nr * 8);
+ for (i = 0; i < nv50->sampler_nr; i++)
+ so_datap (so, nv50->sampler[i], 8);
+ so_ref(so, &nv50->state.tsc_upload);
+ }
+
+ if (nv50->dirty & NV50_NEW_TEXTURE)
+ nv50_tex_validate(nv50);
+
+ if (nv50->dirty & NV50_NEW_ARRAYS)
+ nv50_vbo_validate(nv50);
+
+ nv50->state.dirty |= nv50->dirty;
+ nv50->dirty = 0;
+ nv50_state_emit(nv50);
+
+ return TRUE;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
new file mode 100644
index 0000000000..3f45a2fe18
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+
+#include "util/u_tile.h"
+
+static void
+nv50_surface_copy(struct pipe_context *pipe, boolean flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv50_context *nv50 = (struct nv50_context *)pipe;
+ struct nouveau_winsys *nvws = nv50->screen->nvws;
+
+ if (flip) {
+ desty += height;
+ while (height--) {
+ nvws->surface_copy(nvws, dest, destx, desty--, src,
+ srcx, srcy++, width, 1);
+ }
+ } else {
+ nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy,
+ width, height);
+ }
+}
+
+static void
+nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv50_context *nv50 = (struct nv50_context *)pipe;
+ struct nouveau_winsys *nvws = nv50->screen->nvws;
+
+ nvws->surface_fill(nvws, dest, destx, desty, width, height, value);
+}
+
+static void *
+nv50_surface_map(struct pipe_screen *screen, struct pipe_surface *ps,
+ unsigned flags )
+{
+ struct pipe_winsys *ws = screen->winsys;
+
+ return ws->buffer_map(ws, ps->buffer, flags);
+}
+
+static void
+nv50_surface_unmap(struct pipe_screen *pscreen, struct pipe_surface *ps)
+{
+ struct pipe_winsys *ws = pscreen->winsys;
+
+ ws->buffer_unmap(ws, ps->buffer);
+}
+
+void
+nv50_init_surface_functions(struct nv50_context *nv50)
+{
+ nv50->pipe.surface_copy = nv50_surface_copy;
+ nv50->pipe.surface_fill = nv50_surface_fill;
+}
+
+void
+nv50_surface_init_screen_functions(struct pipe_screen *pscreen)
+{
+ pscreen->surface_map = nv50_surface_map;
+ pscreen->surface_unmap = nv50_surface_unmap;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
new file mode 100644
index 0000000000..239407c92b
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_context.h"
+#include "nv50_texture.h"
+
+#include "nouveau/nouveau_stateobj.h"
+
+static int
+nv50_tex_construct(struct nouveau_stateobj *so, struct nv50_miptree *mt)
+{
+ switch (mt->base.format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_8_8_8_8);
+ break;
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_1_5_5_5);
+ break;
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_4_4_4_4);
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_5_6_5);
+ break;
+ case PIPE_FORMAT_L8_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_8);
+ break;
+ case PIPE_FORMAT_A8_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_ZERO | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_ZERO | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_ZERO | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_8);
+ break;
+ case PIPE_FORMAT_I8_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_8);
+ break;
+ case PIPE_FORMAT_A8L8_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C1 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_8_8);
+ break;
+ case PIPE_FORMAT_DXT1_RGB:
+ so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_DXT1);
+ break;
+ case PIPE_FORMAT_DXT1_RGBA:
+ so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_DXT1);
+ break;
+ case PIPE_FORMAT_DXT3_RGBA:
+ so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_DXT3);
+ break;
+ case PIPE_FORMAT_DXT5_RGBA:
+ so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_DXT5);
+ break;
+ default:
+ return 1;
+ }
+
+ so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, 0xd0005000);
+ so_data (so, 0x00300000);
+ so_data (so, mt->base.width[0]);
+ so_data (so, (mt->base.depth[0] << 16) | mt->base.height[0]);
+ so_data (so, 0x03000000);
+ so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0);
+
+ return 0;
+}
+
+void
+nv50_tex_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so;
+ int unit, level, image;
+
+ so = so_new(nv50->miptree_nr * 8 + 3, nv50->miptree_nr * 2);
+ so_method(so, tesla, 0x0f00, 1);
+ so_data (so, NV50_CB_TIC);
+ so_method(so, tesla, 0x40000f04, nv50->miptree_nr * 8);
+ for (unit = 0; unit < nv50->miptree_nr; unit++) {
+ struct nv50_miptree *mt = nv50->miptree[unit];
+
+ for (level = 0; level <= mt->base.last_level; level++) {
+ for (image = 0; image < mt->image_nr; image++) {
+ nv50_miptree_sync(&nv50->screen->pipe, mt,
+ level, image);
+ }
+ }
+
+ if (nv50_tex_construct(so, mt)) {
+ NOUVEAU_ERR("failed tex validate\n");
+ so_ref(NULL, &so);
+ return;
+ }
+ }
+
+ so_ref(so, &nv50->state.tic_upload);
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h
new file mode 100644
index 0000000000..aca622c73b
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_texture.h
@@ -0,0 +1,129 @@
+#ifndef __NV50_TEXTURE_H__
+#define __NV50_TEXTURE_H__
+
+/* It'd be really nice to have these in nouveau_class.h generated by
+ * renouveau like the rest of the object header - but not sure it can
+ * handle non-object stuff nicely - need to look into it.
+ */
+
+/* Texture image control block */
+#define NV50TIC_0_0_MAPA_MASK 0x38000000
+#define NV50TIC_0_0_MAPA_ZERO 0x00000000
+#define NV50TIC_0_0_MAPA_C0 0x10000000
+#define NV50TIC_0_0_MAPA_C1 0x18000000
+#define NV50TIC_0_0_MAPA_C2 0x20000000
+#define NV50TIC_0_0_MAPA_C3 0x28000000
+#define NV50TIC_0_0_MAPA_ONE 0x38000000
+#define NV50TIC_0_0_MAPR_MASK 0x07000000
+#define NV50TIC_0_0_MAPR_ZERO 0x00000000
+#define NV50TIC_0_0_MAPR_C0 0x02000000
+#define NV50TIC_0_0_MAPR_C1 0x03000000
+#define NV50TIC_0_0_MAPR_C2 0x04000000
+#define NV50TIC_0_0_MAPR_C3 0x05000000
+#define NV50TIC_0_0_MAPR_ONE 0x07000000
+#define NV50TIC_0_0_MAPG_MASK 0x00e00000
+#define NV50TIC_0_0_MAPG_ZERO 0x00000000
+#define NV50TIC_0_0_MAPG_C0 0x00400000
+#define NV50TIC_0_0_MAPG_C1 0x00600000
+#define NV50TIC_0_0_MAPG_C2 0x00800000
+#define NV50TIC_0_0_MAPG_C3 0x00a00000
+#define NV50TIC_0_0_MAPG_ONE 0x00e00000
+#define NV50TIC_0_0_MAPB_MASK 0x001c0000
+#define NV50TIC_0_0_MAPB_ZERO 0x00000000
+#define NV50TIC_0_0_MAPB_C0 0x00080000
+#define NV50TIC_0_0_MAPB_C1 0x000c0000
+#define NV50TIC_0_0_MAPB_C2 0x00100000
+#define NV50TIC_0_0_MAPB_C3 0x00140000
+#define NV50TIC_0_0_MAPB_ONE 0x001c0000
+#define NV50TIC_0_0_TYPEA_MASK 0x00038000
+#define NV50TIC_0_0_TYPEA_UNORM 0x00010000
+#define NV50TIC_0_0_TYPER_MASK 0x00007000
+#define NV50TIC_0_0_TYPER_UNORM 0x00002000
+#define NV50TIC_0_0_TYPEG_MASK 0x00000e00
+#define NV50TIC_0_0_TYPEG_UNORM 0x00000400
+#define NV50TIC_0_0_TYPEB_MASK 0x000001c0
+#define NV50TIC_0_0_TYPEB_UNORM 0x00000080
+#define NV50TIC_0_0_FMT_MASK 0x0000003c
+#define NV50TIC_0_0_FMT_8_8_8_8 0x00000008
+#define NV50TIC_0_0_FMT_4_4_4_4 0x00000012
+#define NV50TIC_0_0_FMT_1_5_5_5 0x00000013
+#define NV50TIC_0_0_FMT_5_6_5 0x00000015
+#define NV50TIC_0_0_FMT_8_8 0x00000018
+#define NV50TIC_0_0_FMT_8 0x0000001d
+#define NV50TIC_0_0_FMT_DXT1 0x00000024
+#define NV50TIC_0_0_FMT_DXT3 0x00000025
+#define NV50TIC_0_0_FMT_DXT5 0x00000026
+
+#define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff
+#define NV50TIC_0_1_OFFSET_LOW_SHIFT 0
+
+#define NV50TIC_0_2_UNKNOWN_MASK 0xffffffff
+
+#define NV50TIC_0_3_UNKNOWN_MASK 0xffffffff
+
+#define NV50TIC_0_4_WIDTH_MASK 0x0000ffff
+#define NV50TIC_0_4_WIDTH_SHIFT 0
+
+#define NV50TIC_0_5_DEPTH_MASK 0xffff0000
+#define NV50TIC_0_5_DEPTH_SHIFT 16
+#define NV50TIC_0_5_HEIGHT_MASK 0x0000ffff
+#define NV50TIC_0_5_HEIGHT_SHIFT 0
+
+#define NV50TIC_0_6_UNKNOWN_MASK 0xffffffff
+
+#define NV50TIC_0_7_OFFSET_HIGH_MASK 0xffffffff
+#define NV50TIC_0_7_OFFSET_HIGH_SHIFT 0
+
+/* Texture sampler control block */
+#define NV50TSC_1_0_WRAPS_MASK 0x00000007
+#define NV50TSC_1_0_WRAPS_REPEAT 0x00000000
+#define NV50TSC_1_0_WRAPS_MIRROR_REPEAT 0x00000001
+#define NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE 0x00000002
+#define NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER 0x00000003
+#define NV50TSC_1_0_WRAPS_CLAMP 0x00000004
+#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE 0x00000005
+#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER 0x00000006
+#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP 0x00000007
+#define NV50TSC_1_0_WRAPT_MASK 0x00000038
+#define NV50TSC_1_0_WRAPT_REPEAT 0x00000000
+#define NV50TSC_1_0_WRAPT_MIRROR_REPEAT 0x00000008
+#define NV50TSC_1_0_WRAPT_CLAMP_TO_EDGE 0x00000010
+#define NV50TSC_1_0_WRAPT_CLAMP_TO_BORDER 0x00000018
+#define NV50TSC_1_0_WRAPT_CLAMP 0x00000020
+#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_EDGE 0x00000028
+#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_BORDER 0x00000030
+#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP 0x00000038
+#define NV50TSC_1_0_WRAPR_MASK 0x000001c0
+#define NV50TSC_1_0_WRAPR_REPEAT 0x00000000
+#define NV50TSC_1_0_WRAPR_MIRROR_REPEAT 0x00000040
+#define NV50TSC_1_0_WRAPR_CLAMP_TO_EDGE 0x00000080
+#define NV50TSC_1_0_WRAPR_CLAMP_TO_BORDER 0x000000c0
+#define NV50TSC_1_0_WRAPR_CLAMP 0x00000100
+#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_EDGE 0x00000140
+#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_BORDER 0x00000180
+#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP 0x000001c0
+
+#define NV50TSC_1_1_MAGF_MASK 0x00000003
+#define NV50TSC_1_1_MAGF_NEAREST 0x00000001
+#define NV50TSC_1_1_MAGF_LINEAR 0x00000002
+#define NV50TSC_1_1_MINF_MASK 0x00000030
+#define NV50TSC_1_1_MINF_NEAREST 0x00000010
+#define NV50TSC_1_1_MINF_LINEAR 0x00000020
+#define NV50TSC_1_1_MIPF_MASK 0x000000c0
+#define NV50TSC_1_1_MIPF_NONE 0x00000040
+#define NV50TSC_1_1_MIPF_NEAREST 0x00000080
+#define NV50TSC_1_1_MIPF_LINEAR 0x000000c0
+
+#define NV50TSC_1_2_UNKNOWN_MASK 0xffffffff
+
+#define NV50TSC_1_3_UNKNOWN_MASK 0xffffffff
+
+#define NV50TSC_1_4_UNKNOWN_MASK 0xffffffff
+
+#define NV50TSC_1_5_UNKNOWN_MASK 0xffffffff
+
+#define NV50TSC_1_6_UNKNOWN_MASK 0xffffffff
+
+#define NV50TSC_1_7_UNKNOWN_MASK 0xffffffff
+
+#endif
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
new file mode 100644
index 0000000000..435dc9777d
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv50_context.h"
+
+static INLINE unsigned
+nv50_prim(unsigned mode)
+{
+ switch (mode) {
+ case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
+ case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
+ case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
+ case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
+ case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
+ case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
+ case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
+ case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
+ case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
+ default:
+ break;
+ }
+
+ NOUVEAU_ERR("invalid primitive type %d\n", mode);
+ return NV50TCL_VERTEX_BEGIN_POINTS;
+}
+
+boolean
+nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
+ unsigned count)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50_state_validate(nv50);
+
+ BEGIN_RING(tesla, 0x142c, 1);
+ OUT_RING (0);
+ BEGIN_RING(tesla, 0x142c, 1);
+ OUT_RING (0);
+ BEGIN_RING(tesla, 0x1440, 1);
+ OUT_RING (0);
+ BEGIN_RING(tesla, 0x1334, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(tesla, NV50TCL_VERTEX_BEGIN, 1);
+ OUT_RING (nv50_prim(mode));
+ BEGIN_RING(tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
+ OUT_RING (start);
+ OUT_RING (count);
+ BEGIN_RING(tesla, NV50TCL_VERTEX_END, 1);
+ OUT_RING (0);
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+static INLINE void
+nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 1) {
+ BEGIN_RING(tesla, 0x15e8, 1);
+ OUT_RING (map[0]);
+ map++;
+ count--;
+ }
+
+ while (count) {
+ unsigned nr = count > 2046 ? 2046 : count;
+ int i;
+
+ BEGIN_RING(tesla, 0x400015f0, nr >> 1);
+ for (i = 0; i < nr; i += 2)
+ OUT_RING ((map[1] << 16) | map[0]);
+
+ count -= nr;
+ map += nr;
+ }
+}
+
+static INLINE void
+nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 1) {
+ BEGIN_RING(tesla, 0x15e8, 1);
+ OUT_RING (map[0]);
+ map++;
+ count--;
+ }
+
+ while (count) {
+ unsigned nr = count > 2046 ? 2046 : count;
+ int i;
+
+ BEGIN_RING(tesla, 0x400015f0, nr >> 1);
+ for (i = 0; i < nr; i += 2)
+ OUT_RING ((map[1] << 16) | map[0]);
+
+ count -= nr;
+ map += nr;
+ }
+}
+
+static INLINE void
+nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint8_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ while (count) {
+ unsigned nr = count > 2047 ? 2047 : count;
+
+ BEGIN_RING(tesla, 0x400015e8, nr);
+ OUT_RINGp (map, nr);
+
+ count -= nr;
+ map += nr;
+ }
+}
+
+boolean
+nv50_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer, unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+ void *map = ws->buffer_map(ws, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
+
+ nv50_state_validate(nv50);
+
+ BEGIN_RING(tesla, 0x142c, 1);
+ OUT_RING (0);
+ BEGIN_RING(tesla, 0x142c, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(tesla, NV50TCL_VERTEX_BEGIN, 1);
+ OUT_RING (nv50_prim(mode));
+ switch (indexSize) {
+ case 1:
+ nv50_draw_elements_inline_u08(nv50, map, start, count);
+ break;
+ case 2:
+ nv50_draw_elements_inline_u16(nv50, map, start, count);
+ break;
+ case 4:
+ nv50_draw_elements_inline_u32(nv50, map, start, count);
+ break;
+ default:
+ assert(0);
+ }
+ BEGIN_RING(tesla, NV50TCL_VERTEX_END, 1);
+ OUT_RING (0);
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+void
+nv50_vbo_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *vtxbuf, *vtxfmt;
+ int i, vpi = 0;
+
+ vtxbuf = so_new(nv50->vtxelt_nr * 4, nv50->vtxelt_nr * 2);
+ vtxfmt = so_new(nv50->vtxelt_nr + 1, 0);
+ so_method(vtxfmt, tesla, 0x1ac0, nv50->vtxelt_nr);
+
+ for (i = 0; i < nv50->vtxelt_nr; i++) {
+ struct pipe_vertex_element *ve = &nv50->vtxelt[i];
+ struct pipe_vertex_buffer *vb =
+ &nv50->vtxbuf[ve->vertex_buffer_index];
+
+ switch (ve->src_format) {
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ so_data(vtxfmt, 0x7e080000 | i);
+ break;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ so_data(vtxfmt, 0x7e100000 | i);
+ break;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ so_data(vtxfmt, 0x7e200000 | i);
+ break;
+ case PIPE_FORMAT_R32_FLOAT:
+ so_data(vtxfmt, 0x7e900000 | i);
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ so_data(vtxfmt, 0x24500000 | i);
+ break;
+ default:
+ {
+ NOUVEAU_ERR("invalid vbo format %s\n",
+ pf_name(ve->src_format));
+ assert(0);
+ return;
+ }
+ }
+
+ so_method(vtxbuf, tesla, 0x900 + (i * 16), 3);
+ so_data (vtxbuf, 0x20000000 | vb->pitch);
+ so_reloc (vtxbuf, vb->buffer, vb->buffer_offset +
+ ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (vtxbuf, vb->buffer, vb->buffer_offset +
+ ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ }
+
+ so_ref (vtxfmt, &nv50->state.vtxfmt);
+ so_ref (vtxbuf, &nv50->state.vtxbuf);
+}
+
diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile
new file mode 100644
index 0000000000..120bdfd9dd
--- /dev/null
+++ b/src/gallium/drivers/softpipe/Makefile
@@ -0,0 +1,47 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = softpipe
+
+C_SOURCES = \
+ sp_fs_exec.c \
+ sp_fs_sse.c \
+ sp_fs_llvm.c \
+ sp_clear.c \
+ sp_flush.c \
+ sp_query.c \
+ sp_context.c \
+ sp_draw_arrays.c \
+ sp_prim_setup.c \
+ sp_prim_vbuf.c \
+ sp_quad.c \
+ sp_quad_alpha_test.c \
+ sp_quad_blend.c \
+ sp_quad_colormask.c \
+ sp_quad_coverage.c \
+ sp_quad_depth_test.c \
+ sp_quad_earlyz.c \
+ sp_quad_fs.c \
+ sp_quad_occlusion.c \
+ sp_quad_output.c \
+ sp_quad_stencil.c \
+ sp_quad_stipple.c \
+ sp_screen.c \
+ sp_setup.c \
+ sp_state_blend.c \
+ sp_state_clip.c \
+ sp_state_derived.c \
+ sp_state_fs.c \
+ sp_state_sampler.c \
+ sp_state_rasterizer.c \
+ sp_state_surface.c \
+ sp_state_vertex.c \
+ sp_texture.c \
+ sp_tex_sample.c \
+ sp_tile_cache.c \
+ sp_surface.c
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript
new file mode 100644
index 0000000000..c1f7daa8ab
--- /dev/null
+++ b/src/gallium/drivers/softpipe/SConscript
@@ -0,0 +1,46 @@
+Import('*')
+
+env = env.Clone()
+
+softpipe = env.ConvenienceLibrary(
+ target = 'softpipe',
+ source = [
+ 'sp_fs_exec.c',
+ 'sp_fs_sse.c',
+ 'sp_fs_llvm.c',
+ 'sp_clear.c',
+ 'sp_context.c',
+ 'sp_draw_arrays.c',
+ 'sp_flush.c',
+ 'sp_prim_setup.c',
+ 'sp_prim_vbuf.c',
+ 'sp_setup.c',
+ 'sp_quad_alpha_test.c',
+ 'sp_quad_blend.c',
+ 'sp_quad.c',
+ 'sp_quad_colormask.c',
+ 'sp_quad_coverage.c',
+ 'sp_quad_depth_test.c',
+ 'sp_quad_earlyz.c',
+ 'sp_quad_fs.c',
+ 'sp_quad_occlusion.c',
+ 'sp_quad_output.c',
+ 'sp_quad_stencil.c',
+ 'sp_quad_stipple.c',
+ 'sp_query.c',
+ 'sp_screen.c',
+ 'sp_state_blend.c',
+ 'sp_state_clip.c',
+ 'sp_state_derived.c',
+ 'sp_state_fs.c',
+ 'sp_state_rasterizer.c',
+ 'sp_state_sampler.c',
+ 'sp_state_surface.c',
+ 'sp_state_vertex.c',
+ 'sp_surface.c',
+ 'sp_tex_sample.c',
+ 'sp_texture.c',
+ 'sp_tile_cache.c',
+ ])
+
+Export('softpipe') \ No newline at end of file
diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c
new file mode 100644
index 0000000000..dfa46c9fb7
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_clear.c
@@ -0,0 +1,107 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Brian Paul
+ */
+
+
+#include "pipe/p_defines.h"
+#include "util/u_pack_color.h"
+#include "sp_clear.h"
+#include "sp_context.h"
+#include "sp_surface.h"
+#include "sp_state.h"
+#include "sp_tile_cache.h"
+
+
+/**
+ * Convert packed pixel from one format to another.
+ */
+static unsigned
+convert_color(enum pipe_format srcFormat, unsigned srcColor,
+ enum pipe_format dstFormat)
+{
+ ubyte r, g, b, a;
+ unsigned dstColor;
+
+ util_unpack_color_ub(srcFormat, &srcColor, &r, &g, &b, &a);
+ util_pack_color_ub(r, g, b, a, dstFormat, &dstColor);
+
+ return dstColor;
+}
+
+
+
+/**
+ * Clear the given surface to the specified value.
+ * No masking, no scissor (clear entire buffer).
+ * Note: when clearing a color buffer, the clearValue is always
+ * encoded as PIPE_FORMAT_A8R8G8B8_UNORM.
+ */
+void
+softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+ uint i;
+
+ if (softpipe->no_rast)
+ return;
+
+#if 0
+ softpipe_update_derived(softpipe); /* not needed?? */
+#endif
+
+ if (ps == sp_tile_cache_get_surface(softpipe->zsbuf_cache)) {
+ sp_tile_cache_clear(softpipe->zsbuf_cache, clearValue);
+ softpipe->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_CLEAR;
+#if TILE_CLEAR_OPTIMIZATION
+ return;
+#endif
+ }
+
+ for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) {
+ if (ps == sp_tile_cache_get_surface(softpipe->cbuf_cache[i])) {
+ unsigned cv;
+ if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) {
+ cv = convert_color(PIPE_FORMAT_A8R8G8B8_UNORM, clearValue,
+ ps->format);
+ }
+ else {
+ cv = clearValue;
+ }
+ sp_tile_cache_clear(softpipe->cbuf_cache[i], cv);
+ softpipe->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_CLEAR;
+ }
+ }
+
+#if !TILE_CLEAR_OPTIMIZATION
+ /* non-cached surface */
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+#endif
+}
diff --git a/src/gallium/drivers/softpipe/sp_clear.h b/src/gallium/drivers/softpipe/sp_clear.h
new file mode 100644
index 0000000000..a8ed1c4ecc
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_clear.h
@@ -0,0 +1,43 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Brian Paul
+ */
+
+#ifndef SP_CLEAR_H
+#define SP_CLEAR_H
+
+#include "pipe/p_state.h"
+struct pipe_context;
+
+extern void
+softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+
+#endif /* SP_CLEAR_H */
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
new file mode 100644
index 0000000000..800f944838
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -0,0 +1,288 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * Copyright 2008 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "sp_clear.h"
+#include "sp_context.h"
+#include "sp_flush.h"
+#include "sp_prim_setup.h"
+#include "sp_prim_vbuf.h"
+#include "sp_state.h"
+#include "sp_surface.h"
+#include "sp_tile_cache.h"
+#include "sp_texture.h"
+#include "sp_winsys.h"
+#include "sp_query.h"
+
+
+
+/**
+ * Map any drawing surfaces which aren't already mapped
+ */
+void
+softpipe_map_surfaces(struct softpipe_context *sp)
+{
+ unsigned i;
+
+ for (i = 0; i < sp->framebuffer.num_cbufs; i++) {
+ sp_tile_cache_map_surfaces(sp->cbuf_cache[i]);
+ }
+
+ sp_tile_cache_map_surfaces(sp->zsbuf_cache);
+}
+
+
+/**
+ * Unmap any mapped drawing surfaces
+ */
+void
+softpipe_unmap_surfaces(struct softpipe_context *sp)
+{
+ uint i;
+
+ for (i = 0; i < sp->framebuffer.num_cbufs; i++)
+ sp_flush_tile_cache(sp, sp->cbuf_cache[i]);
+ sp_flush_tile_cache(sp, sp->zsbuf_cache);
+
+ for (i = 0; i < sp->framebuffer.num_cbufs; i++) {
+ sp_tile_cache_unmap_surfaces(sp->cbuf_cache[i]);
+ }
+ sp_tile_cache_unmap_surfaces(sp->zsbuf_cache);
+}
+
+
+static void softpipe_destroy( struct pipe_context *pipe )
+{
+ struct softpipe_context *softpipe = softpipe_context( pipe );
+ struct pipe_winsys *ws = pipe->winsys;
+ uint i;
+
+ if (softpipe->draw)
+ draw_destroy( softpipe->draw );
+
+ for (i = 0; i < SP_NUM_QUAD_THREADS; i++) {
+ softpipe->quad[i].polygon_stipple->destroy( softpipe->quad[i].polygon_stipple );
+ softpipe->quad[i].earlyz->destroy( softpipe->quad[i].earlyz );
+ softpipe->quad[i].shade->destroy( softpipe->quad[i].shade );
+ softpipe->quad[i].alpha_test->destroy( softpipe->quad[i].alpha_test );
+ softpipe->quad[i].depth_test->destroy( softpipe->quad[i].depth_test );
+ softpipe->quad[i].stencil_test->destroy( softpipe->quad[i].stencil_test );
+ softpipe->quad[i].occlusion->destroy( softpipe->quad[i].occlusion );
+ softpipe->quad[i].coverage->destroy( softpipe->quad[i].coverage );
+ softpipe->quad[i].blend->destroy( softpipe->quad[i].blend );
+ softpipe->quad[i].colormask->destroy( softpipe->quad[i].colormask );
+ softpipe->quad[i].output->destroy( softpipe->quad[i].output );
+ }
+
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
+ sp_destroy_tile_cache(softpipe->cbuf_cache[i]);
+ sp_destroy_tile_cache(softpipe->zsbuf_cache);
+
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
+ sp_destroy_tile_cache(softpipe->tex_cache[i]);
+
+ for (i = 0; i < Elements(softpipe->constants); i++) {
+ if (softpipe->constants[i].buffer) {
+ winsys_buffer_reference(ws, &softpipe->constants[i].buffer, NULL);
+ }
+ }
+
+ FREE( softpipe );
+}
+
+
+struct pipe_context *
+softpipe_create( struct pipe_screen *screen,
+ struct pipe_winsys *pipe_winsys,
+ void *unused )
+{
+ struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context);
+ uint i;
+
+ util_init_math();
+
+#ifdef PIPE_ARCH_X86
+ softpipe->use_sse = !debug_get_bool_option( "GALLIUM_NOSSE", FALSE );
+#else
+ softpipe->use_sse = FALSE;
+#endif
+
+ softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE );
+
+ softpipe->pipe.winsys = pipe_winsys;
+ softpipe->pipe.screen = screen;
+ softpipe->pipe.destroy = softpipe_destroy;
+
+ /* state setters */
+ softpipe->pipe.create_blend_state = softpipe_create_blend_state;
+ softpipe->pipe.bind_blend_state = softpipe_bind_blend_state;
+ softpipe->pipe.delete_blend_state = softpipe_delete_blend_state;
+
+ softpipe->pipe.create_sampler_state = softpipe_create_sampler_state;
+ softpipe->pipe.bind_sampler_states = softpipe_bind_sampler_states;
+ softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state;
+
+ softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state;
+ softpipe->pipe.bind_depth_stencil_alpha_state = softpipe_bind_depth_stencil_state;
+ softpipe->pipe.delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state;
+
+ softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state;
+ softpipe->pipe.bind_rasterizer_state = softpipe_bind_rasterizer_state;
+ softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state;
+
+ softpipe->pipe.create_fs_state = softpipe_create_fs_state;
+ softpipe->pipe.bind_fs_state = softpipe_bind_fs_state;
+ softpipe->pipe.delete_fs_state = softpipe_delete_fs_state;
+
+ softpipe->pipe.create_vs_state = softpipe_create_vs_state;
+ softpipe->pipe.bind_vs_state = softpipe_bind_vs_state;
+ softpipe->pipe.delete_vs_state = softpipe_delete_vs_state;
+
+ softpipe->pipe.set_blend_color = softpipe_set_blend_color;
+ softpipe->pipe.set_clip_state = softpipe_set_clip_state;
+ softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer;
+ softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state;
+ softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple;
+ softpipe->pipe.set_scissor_state = softpipe_set_scissor_state;
+ softpipe->pipe.set_sampler_textures = softpipe_set_sampler_textures;
+ softpipe->pipe.set_viewport_state = softpipe_set_viewport_state;
+
+ softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers;
+ softpipe->pipe.set_vertex_elements = softpipe_set_vertex_elements;
+
+ softpipe->pipe.draw_arrays = softpipe_draw_arrays;
+ softpipe->pipe.draw_elements = softpipe_draw_elements;
+ softpipe->pipe.draw_range_elements = softpipe_draw_range_elements;
+ softpipe->pipe.set_edgeflags = softpipe_set_edgeflags;
+
+
+ softpipe->pipe.clear = softpipe_clear;
+ softpipe->pipe.flush = softpipe_flush;
+
+ softpipe_init_query_funcs( softpipe );
+ softpipe_init_texture_funcs( softpipe );
+
+ /*
+ * Alloc caches for accessing drawing surfaces and textures.
+ * Must be before quad stage setup!
+ */
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
+ softpipe->cbuf_cache[i] = sp_create_tile_cache( screen );
+ softpipe->zsbuf_cache = sp_create_tile_cache( screen );
+
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
+ softpipe->tex_cache[i] = sp_create_tile_cache( screen );
+
+
+ /* setup quad rendering stages */
+ for (i = 0; i < SP_NUM_QUAD_THREADS; i++) {
+ softpipe->quad[i].polygon_stipple = sp_quad_polygon_stipple_stage(softpipe);
+ softpipe->quad[i].earlyz = sp_quad_earlyz_stage(softpipe);
+ softpipe->quad[i].shade = sp_quad_shade_stage(softpipe);
+ softpipe->quad[i].alpha_test = sp_quad_alpha_test_stage(softpipe);
+ softpipe->quad[i].depth_test = sp_quad_depth_test_stage(softpipe);
+ softpipe->quad[i].stencil_test = sp_quad_stencil_test_stage(softpipe);
+ softpipe->quad[i].occlusion = sp_quad_occlusion_stage(softpipe);
+ softpipe->quad[i].coverage = sp_quad_coverage_stage(softpipe);
+ softpipe->quad[i].blend = sp_quad_blend_stage(softpipe);
+ softpipe->quad[i].colormask = sp_quad_colormask_stage(softpipe);
+ softpipe->quad[i].output = sp_quad_output_stage(softpipe);
+ }
+
+ /* vertex shader samplers */
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ softpipe->tgsi.vert_samplers[i].base.get_samples = sp_get_samples_vertex;
+ softpipe->tgsi.vert_samplers[i].unit = i;
+ softpipe->tgsi.vert_samplers[i].sp = softpipe;
+ softpipe->tgsi.vert_samplers[i].cache = softpipe->tex_cache[i];
+ softpipe->tgsi.vert_samplers_list[i] = &softpipe->tgsi.vert_samplers[i];
+ }
+
+ /* fragment shader samplers */
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ softpipe->tgsi.frag_samplers[i].base.get_samples = sp_get_samples_fragment;
+ softpipe->tgsi.frag_samplers[i].unit = i;
+ softpipe->tgsi.frag_samplers[i].sp = softpipe;
+ softpipe->tgsi.frag_samplers[i].cache = softpipe->tex_cache[i];
+ softpipe->tgsi.frag_samplers_list[i] = &softpipe->tgsi.frag_samplers[i];
+ }
+
+ /*
+ * Create drawing context and plug our rendering stage into it.
+ */
+ softpipe->draw = draw_create();
+ if (!softpipe->draw)
+ goto fail;
+
+ draw_texture_samplers(softpipe->draw,
+ PIPE_MAX_SAMPLERS,
+ (struct tgsi_sampler **)
+ softpipe->tgsi.vert_samplers_list);
+
+ softpipe->setup = sp_draw_render_stage(softpipe);
+ if (!softpipe->setup)
+ goto fail;
+
+ if (debug_get_bool_option( "SP_NO_RAST", FALSE ))
+ softpipe->no_rast = TRUE;
+
+ if (debug_get_bool_option( "SP_NO_VBUF", FALSE )) {
+ /* Deprecated path -- vbuf is the intended interface to the draw module:
+ */
+ draw_set_rasterize_stage(softpipe->draw, softpipe->setup);
+ }
+ else {
+ sp_init_vbuf(softpipe);
+ }
+
+ /* plug in AA line/point stages */
+ draw_install_aaline_stage(softpipe->draw, &softpipe->pipe);
+ draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe);
+
+#if USE_DRAW_STAGE_PSTIPPLE
+ /* Do polygon stipple w/ texture map + frag prog? */
+ draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe);
+#endif
+
+ sp_init_surface_functions(softpipe);
+
+ return &softpipe->pipe;
+
+ fail:
+ softpipe_destroy(&softpipe->pipe);
+ return NULL;
+}
+
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
new file mode 100644
index 0000000000..e2451c6ecb
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -0,0 +1,174 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef SP_CONTEXT_H
+#define SP_CONTEXT_H
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+
+#include "draw/draw_vertex.h"
+
+#include "sp_quad.h"
+#include "sp_tex_sample.h"
+
+
+/**
+ * This is a temporary variable for testing draw-stage polygon stipple.
+ * If zero, do stipple in sp_quad_stipple.c
+ */
+#define USE_DRAW_STAGE_PSTIPPLE 1
+
+/* Number of threads working on individual quads.
+ * Setting to 1 disables this feature.
+ */
+#define SP_NUM_QUAD_THREADS 1
+
+struct softpipe_winsys;
+struct softpipe_vbuf_render;
+struct draw_context;
+struct draw_stage;
+struct softpipe_tile_cache;
+struct sp_fragment_shader;
+struct sp_vertex_shader;
+
+
+struct softpipe_context {
+ struct pipe_context pipe; /**< base class */
+
+ /* The most recent drawing state as set by the driver:
+ */
+ const struct pipe_blend_state *blend;
+ const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+ const struct pipe_depth_stencil_alpha_state *depth_stencil;
+ const struct pipe_rasterizer_state *rasterizer;
+ const struct sp_fragment_shader *fs;
+ const struct sp_vertex_shader *vs;
+
+ struct pipe_blend_color blend_color;
+ struct pipe_clip_state clip;
+ struct pipe_constant_buffer constants[PIPE_SHADER_TYPES];
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_poly_stipple poly_stipple;
+ struct pipe_scissor_state scissor;
+ struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+ struct pipe_viewport_state viewport;
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+ unsigned dirty;
+
+ unsigned num_samplers;
+ unsigned num_textures;
+ unsigned num_vertex_elements;
+ unsigned num_vertex_buffers;
+
+ boolean no_rast;
+
+ /* Counter for occlusion queries. Note this supports overlapping
+ * queries.
+ */
+ uint64_t occlusion_count;
+
+ /*
+ * Mapped vertex buffers
+ */
+ ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS];
+
+ /** Mapped constant buffers */
+ void *mapped_constants[PIPE_SHADER_TYPES];
+
+ /** Vertex format */
+ struct vertex_info vertex_info;
+ struct vertex_info vertex_info_vbuf;
+
+ int psize_slot;
+
+ unsigned reduced_api_prim; /**< PIPE_PRIM_POINTS, _LINES or _TRIANGLES */
+
+#if 0
+ /* Stipple derived state:
+ */
+ ubyte stipple_masks[16][16];
+#endif
+
+ /** Derived from scissor and surface bounds: */
+ struct pipe_scissor_state cliprect;
+
+ unsigned line_stipple_counter;
+
+ /** Software quad rendering pipeline */
+ struct {
+ struct quad_stage *polygon_stipple;
+ struct quad_stage *earlyz;
+ struct quad_stage *shade;
+ struct quad_stage *alpha_test;
+ struct quad_stage *stencil_test;
+ struct quad_stage *depth_test;
+ struct quad_stage *occlusion;
+ struct quad_stage *coverage;
+ struct quad_stage *blend;
+ struct quad_stage *colormask;
+ struct quad_stage *output;
+
+ struct quad_stage *first; /**< points to one of the above stages */
+ } quad[SP_NUM_QUAD_THREADS];
+
+ /** TGSI exec things */
+ struct {
+ struct sp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS];
+ struct sp_shader_sampler *vert_samplers_list[PIPE_MAX_SAMPLERS];
+ struct sp_shader_sampler frag_samplers[PIPE_MAX_SAMPLERS];
+ struct sp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS];
+ } tgsi;
+
+ /** The primitive drawing context */
+ struct draw_context *draw;
+ struct draw_stage *setup;
+ struct draw_stage *vbuf;
+ struct softpipe_vbuf_render *vbuf_render;
+
+ struct softpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS];
+ struct softpipe_tile_cache *zsbuf_cache;
+
+ struct softpipe_tile_cache *tex_cache[PIPE_MAX_SAMPLERS];
+
+ int use_sse : 1;
+ int dump_fs : 1;
+};
+
+
+static INLINE struct softpipe_context *
+softpipe_context( struct pipe_context *pipe )
+{
+ return (struct softpipe_context *)pipe;
+}
+
+#endif /* SP_CONTEXT_H */
+
diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c
new file mode 100644
index 0000000000..424bd56846
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c
@@ -0,0 +1,202 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Brian Paul
+ * Keith Whitwell
+ */
+
+
+#include "pipe/p_defines.h"
+#include "pipe/p_context.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+
+#include "sp_context.h"
+#include "sp_state.h"
+
+#include "draw/draw_context.h"
+
+
+
+static void
+softpipe_map_constant_buffers(struct softpipe_context *sp)
+{
+ struct pipe_winsys *ws = sp->pipe.winsys;
+ uint i;
+ for (i = 0; i < PIPE_SHADER_TYPES; i++) {
+ if (sp->constants[i].size)
+ sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ }
+
+ draw_set_mapped_constant_buffer(sp->draw,
+ sp->mapped_constants[PIPE_SHADER_VERTEX],
+ sp->constants[PIPE_SHADER_VERTEX].size);
+}
+
+static void
+softpipe_unmap_constant_buffers(struct softpipe_context *sp)
+{
+ struct pipe_winsys *ws = sp->pipe.winsys;
+ uint i;
+
+ /* really need to flush all prims since the vert/frag shaders const buffers
+ * are going away now.
+ */
+ draw_flush(sp->draw);
+
+ draw_set_mapped_constant_buffer(sp->draw, NULL, 0);
+
+ for (i = 0; i < 2; i++) {
+ if (sp->constants[i].size)
+ ws->buffer_unmap(ws, sp->constants[i].buffer);
+ sp->mapped_constants[i] = NULL;
+ }
+}
+
+
+static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
+ PIPE_PRIM_POINTS,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES
+};
+
+
+boolean
+softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
+ unsigned start, unsigned count)
+{
+ return softpipe_draw_elements(pipe, NULL, 0, mode, start, count);
+}
+
+
+
+/**
+ * Draw vertex arrays, with optional indexing.
+ * Basically, map the vertex buffers (and drawing surfaces), then hand off
+ * the drawing to the 'draw' module.
+ *
+ * XXX should the element buffer be specified/bound with a separate function?
+ */
+
+boolean
+softpipe_draw_range_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct softpipe_context *sp = softpipe_context(pipe);
+ struct draw_context *draw = sp->draw;
+ unsigned i;
+
+ sp->reduced_api_prim = reduced_prim[mode];
+
+ if (sp->dirty)
+ softpipe_update_derived( sp );
+
+ softpipe_map_surfaces(sp);
+ softpipe_map_constant_buffers(sp);
+
+ /*
+ * Map vertex buffers
+ */
+ for (i = 0; i < sp->num_vertex_buffers; i++) {
+ void *buf
+ = pipe_buffer_map(pipe->screen,
+ sp->vertex_buffer[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(draw, i, buf);
+ }
+ /* Map index buffer, if present */
+ if (indexBuffer) {
+ void *mapped_indexes
+ = pipe_buffer_map(pipe->screen, indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer_range(draw, indexSize,
+ min_index,
+ max_index,
+ mapped_indexes);
+ }
+ else {
+ /* no index/element buffer */
+ draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL);
+ }
+
+
+ /* draw! */
+ draw_arrays(draw, mode, start, count);
+
+ /*
+ * unmap vertex/index buffers - will cause draw module to flush
+ */
+ for (i = 0; i < sp->num_vertex_buffers; i++) {
+ draw_set_mapped_vertex_buffer(draw, i, NULL);
+ pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer);
+ }
+ if (indexBuffer) {
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ pipe_buffer_unmap(pipe->screen, indexBuffer);
+ }
+
+
+ /* Note: leave drawing surfaces mapped */
+ softpipe_unmap_constant_buffers(sp);
+
+ return TRUE;
+}
+
+boolean
+softpipe_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count)
+{
+ return softpipe_draw_range_elements( pipe, indexBuffer,
+ indexSize,
+ 0, 0xffffffff,
+ mode, start, count );
+}
+
+
+
+void
+softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags)
+{
+ struct softpipe_context *sp = softpipe_context(pipe);
+ draw_set_edgeflags(sp->draw, edgeflags);
+}
+
diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c
new file mode 100644
index 0000000000..401764bb43
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_flush.c
@@ -0,0 +1,92 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_defines.h"
+#include "draw/draw_context.h"
+#include "sp_flush.h"
+#include "sp_context.h"
+#include "sp_surface.h"
+#include "sp_state.h"
+#include "sp_tile_cache.h"
+#include "sp_winsys.h"
+
+
+void
+softpipe_flush( struct pipe_context *pipe,
+ unsigned flags,
+ struct pipe_fence_handle **fence )
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+ uint i;
+
+ draw_flush(softpipe->draw);
+
+ if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
+ for (i = 0; i < softpipe->num_textures; i++) {
+ sp_flush_tile_cache(softpipe, softpipe->tex_cache[i]);
+ }
+ }
+
+ if (flags & PIPE_FLUSH_RENDER_CACHE) {
+ for (i = 0; i < softpipe->framebuffer.num_cbufs; i++)
+ if (softpipe->cbuf_cache[i])
+ sp_flush_tile_cache(softpipe, softpipe->cbuf_cache[i]);
+
+ if (softpipe->zsbuf_cache)
+ sp_flush_tile_cache(softpipe, softpipe->zsbuf_cache);
+
+ /* Need this call for hardware buffers before swapbuffers.
+ *
+ * there should probably be another/different flush-type function
+ * that's called before swapbuffers because we don't always want
+ * to unmap surfaces when flushing.
+ */
+ softpipe_unmap_surfaces(softpipe);
+ }
+
+ /* Enable to dump BMPs of the color/depth buffers each frame */
+#if 0
+ if(flags & PIPE_FLUSH_FRAME) {
+ static unsigned frame_no = 1;
+ static char filename[256];
+ util_snprintf(filename, sizeof(filename), "cbuf_%u.bmp", frame_no);
+ debug_dump_surface_bmp(filename, softpipe->framebuffer.cbufs[0]);
+ util_snprintf(filename, sizeof(filename), "zsbuf_%u.bmp", frame_no);
+ debug_dump_surface_bmp(filename, softpipe->framebuffer.zsbuf);
+ ++frame_no;
+ }
+#endif
+
+ if (fence)
+ *fence = NULL;
+}
+
diff --git a/src/gallium/drivers/softpipe/sp_flush.h b/src/gallium/drivers/softpipe/sp_flush.h
new file mode 100644
index 0000000000..68d9b5fa83
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_flush.h
@@ -0,0 +1,37 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef SP_FLUSH_H
+#define SP_FLUSH_H
+
+struct pipe_context;
+struct pipe_fence_handle;
+
+void softpipe_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence);
+
+#endif
diff --git a/src/gallium/drivers/softpipe/sp_fs.h b/src/gallium/drivers/softpipe/sp_fs.h
new file mode 100644
index 0000000000..4792ace3a3
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_fs.h
@@ -0,0 +1,54 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef SP_FS_H
+#define SP_FS_H
+
+struct sp_fragment_shader *
+softpipe_create_fs_exec(struct softpipe_context *softpipe,
+ const struct pipe_shader_state *templ);
+
+struct sp_fragment_shader *
+softpipe_create_fs_sse(struct softpipe_context *softpipe,
+ const struct pipe_shader_state *templ);
+
+struct sp_fragment_shader *
+softpipe_create_fs_llvm(struct softpipe_context *softpipe,
+ const struct pipe_shader_state *templ);
+
+struct tgsi_interp_coef;
+struct tgsi_exec_vector;
+
+void sp_setup_pos_vector(const struct tgsi_interp_coef *coef,
+ float x, float y,
+ struct tgsi_exec_vector *quadpos);
+
+
+#endif
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
new file mode 100644
index 0000000000..453b0373f0
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -0,0 +1,164 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "sp_context.h"
+#include "sp_state.h"
+#include "sp_fs.h"
+#include "sp_headers.h"
+
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "pipe/p_inlines.h"
+#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_parse.h"
+
+struct sp_exec_fragment_shader
+{
+ struct sp_fragment_shader base;
+};
+
+
+/** cast wrapper */
+static INLINE struct sp_exec_fragment_shader *
+sp_exec_fragment_shader(const struct sp_fragment_shader *base)
+{
+ return (struct sp_exec_fragment_shader *) base;
+}
+
+
+/**
+ * Compute quad X,Y,Z,W for the four fragments in a quad.
+ *
+ * This should really be part of the compiled shader.
+ */
+void
+sp_setup_pos_vector(const struct tgsi_interp_coef *coef,
+ float x, float y,
+ struct tgsi_exec_vector *quadpos)
+{
+ uint chan;
+ /* do X */
+ quadpos->xyzw[0].f[0] = x;
+ quadpos->xyzw[0].f[1] = x + 1;
+ quadpos->xyzw[0].f[2] = x;
+ quadpos->xyzw[0].f[3] = x + 1;
+
+ /* do Y */
+ quadpos->xyzw[1].f[0] = y;
+ quadpos->xyzw[1].f[1] = y;
+ quadpos->xyzw[1].f[2] = y + 1;
+ quadpos->xyzw[1].f[3] = y + 1;
+
+ /* do Z and W for all fragments in the quad */
+ for (chan = 2; chan < 4; chan++) {
+ const float dadx = coef->dadx[chan];
+ const float dady = coef->dady[chan];
+ const float a0 = coef->a0[chan] + dadx * x + dady * y;
+ quadpos->xyzw[chan].f[0] = a0;
+ quadpos->xyzw[chan].f[1] = a0 + dadx;
+ quadpos->xyzw[chan].f[2] = a0 + dady;
+ quadpos->xyzw[chan].f[3] = a0 + dadx + dady;
+ }
+}
+
+
+static void
+exec_prepare( const struct sp_fragment_shader *base,
+ struct tgsi_exec_machine *machine,
+ struct tgsi_sampler **samplers )
+{
+ /*
+ * Bind tokens/shader to the interpreter's machine state.
+ * Avoid redundant binding.
+ */
+ if (machine->Tokens != base->shader.tokens) {
+ tgsi_exec_machine_bind_shader( machine,
+ base->shader.tokens,
+ PIPE_MAX_SAMPLERS,
+ samplers );
+ }
+}
+
+
+
+
+/* TODO: hide the machine struct in here somewhere, remove from this
+ * interface:
+ */
+static unsigned
+exec_run( const struct sp_fragment_shader *base,
+ struct tgsi_exec_machine *machine,
+ struct quad_header *quad )
+{
+
+ /* Compute X, Y, Z, W vals for this quad */
+ sp_setup_pos_vector(quad->posCoef,
+ (float)quad->input.x0, (float)quad->input.y0,
+ &machine->QuadPos);
+
+ return tgsi_exec_machine_run( machine );
+}
+
+
+
+static void
+exec_delete( struct sp_fragment_shader *base )
+{
+ FREE((void *) base->shader.tokens);
+ FREE(base);
+}
+
+
+
+
+
+struct sp_fragment_shader *
+softpipe_create_fs_exec(struct softpipe_context *softpipe,
+ const struct pipe_shader_state *templ)
+{
+ struct sp_exec_fragment_shader *shader;
+
+ /* Decide whether we'll be codegenerating this shader and if so do
+ * that now.
+ */
+
+ shader = CALLOC_STRUCT(sp_exec_fragment_shader);
+ if (!shader)
+ return NULL;
+
+ /* we need to keep a local copy of the tokens */
+ shader->base.shader.tokens = tgsi_dup_tokens(templ->tokens);
+ shader->base.prepare = exec_prepare;
+ shader->base.run = exec_run;
+ shader->base.delete = exec_delete;
+
+ return &shader->base;
+}
+
diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c
new file mode 100644
index 0000000000..34adac5226
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c
@@ -0,0 +1,200 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors:
+ * Zack Rusin
+ */
+
+#include "sp_context.h"
+#include "sp_state.h"
+#include "sp_fs.h"
+
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "pipe/p_inlines.h"
+#include "tgsi/tgsi_sse2.h"
+
+#if 0
+
+struct sp_llvm_fragment_shader {
+ struct sp_fragment_shader base;
+ struct gallivm_prog *llvm_prog;
+};
+
+static void
+shade_quad_llvm(struct quad_stage *qs,
+ struct quad_header *quad)
+{
+ struct quad_shade_stage *qss = quad_shade_stage(qs);
+ struct softpipe_context *softpipe = qs->softpipe;
+ float dests[4][16][4] ALIGN16_ATTRIB;
+ float inputs[4][16][4] ALIGN16_ATTRIB;
+ const float fx = (float) quad->x0;
+ const float fy = (float) quad->y0;
+ struct gallivm_prog *llvm = qss->llvm_prog;
+
+ inputs[0][0][0] = fx;
+ inputs[1][0][0] = fx + 1.0f;
+ inputs[2][0][0] = fx;
+ inputs[3][0][0] = fx + 1.0f;
+
+ inputs[0][0][1] = fy;
+ inputs[1][0][1] = fy;
+ inputs[2][0][1] = fy + 1.0f;
+ inputs[3][0][1] = fy + 1.0f;
+
+
+ gallivm_prog_inputs_interpolate(llvm, inputs, quad->coef);
+
+#if DLLVM
+ debug_printf("MASK = %d\n", quad->mask);
+ for (int i = 0; i < 4; ++i) {
+ for (int j = 0; j < 2; ++j) {
+ debug_printf("IN(%d,%d) [%f %f %f %f]\n", i, j,
+ inputs[i][j][0], inputs[i][j][1], inputs[i][j][2], inputs[i][j][3]);
+ }
+ }
+#endif
+
+ quad->mask &=
+ gallivm_fragment_shader_exec(llvm, fx, fy, dests, inputs,
+ softpipe->mapped_constants[PIPE_SHADER_FRAGMENT],
+ qss->samplers);
+#if DLLVM
+ debug_printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n",
+ dests[0][0][0], dests[0][0][1], dests[0][0][2], dests[0][0][3],
+ dests[0][1][0], dests[0][1][1], dests[0][1][2], dests[0][1][3]);
+#endif
+
+ /* store result color */
+ if (qss->colorOutSlot >= 0) {
+ unsigned i;
+ /* XXX need to handle multiple color outputs someday */
+ allvmrt(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot]
+ == TGSI_SEMANTIC_COLOR);
+ for (i = 0; i < QUAD_SIZE; ++i) {
+ quad->outputs.color[0][0][i] = dests[i][qss->colorOutSlot][0];
+ quad->outputs.color[0][1][i] = dests[i][qss->colorOutSlot][1];
+ quad->outputs.color[0][2][i] = dests[i][qss->colorOutSlot][2];
+ quad->outputs.color[0][3][i] = dests[i][qss->colorOutSlot][3];
+ }
+ }
+#if DLLVM
+ for (int i = 0; i < QUAD_SIZE; ++i) {
+ debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot,
+ quad->outputs.color[0][0][i],
+ quad->outputs.color[0][1][i],
+ quad->outputs.color[0][2][i],
+ quad->outputs.color[0][3][i]);
+ }
+#endif
+
+ /* store result Z */
+ if (qss->depthOutSlot >= 0) {
+ /* output[slot] is new Z */
+ uint i;
+ for (i = 0; i < 4; i++) {
+ quad->outputs.depth[i] = dests[i][0][2];
+ }
+ }
+ else {
+ /* copy input Z (which was interpolated by the executor) to output Z */
+ uint i;
+ for (i = 0; i < 4; i++) {
+ quad->outputs.depth[i] = inputs[i][0][2];
+ }
+ }
+#if DLLVM
+ debug_printf("D [%f, %f, %f, %f] mask = %d\n",
+ quad->outputs.depth[0],
+ quad->outputs.depth[1],
+ quad->outputs.depth[2],
+ quad->outputs.depth[3], quad->mask);
+#endif
+
+ /* shader may cull fragments */
+ if( quad->mask ) {
+ qs->next->run( qs->next, quad );
+ }
+}
+
+
+unsigned
+run_llvm_fs( const struct sp_fragment_shader *base,
+ struct foo *machine )
+{
+}
+
+
+void
+delete_llvm_fs( struct sp_fragment_shader *base )
+{
+ FREE(base);
+}
+
+
+struct sp_fragment_shader *
+softpipe_create_fs_llvm(struct softpipe_context *softpipe,
+ const struct pipe_shader_state *templ)
+{
+ struct sp_llvm_fragment_shader *shader = NULL;
+
+ /* LLVM fragment shaders currently disabled:
+ */
+ state = CALLOC_STRUCT(sp_llvm_shader_state);
+ if (!state)
+ return NULL;
+
+ state->llvm_prog = 0;
+
+ if (!gallivm_global_cpu_engine()) {
+ gallivm_cpu_engine_create(state->llvm_prog);
+ }
+ else
+ gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog);
+
+ if (shader) {
+ shader->base.run = run_llvm_fs;
+ shader->base.delete = delete_llvm_fs;
+ }
+
+ return shader;
+}
+
+
+#else
+
+struct sp_fragment_shader *
+softpipe_create_fs_llvm(struct softpipe_context *softpipe,
+ const struct pipe_shader_state *templ)
+{
+ return NULL;
+}
+
+#endif
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
new file mode 100644
index 0000000000..9a273c8764
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -0,0 +1,169 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "sp_context.h"
+#include "sp_state.h"
+#include "sp_fs.h"
+#include "sp_headers.h"
+
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "pipe/p_inlines.h"
+#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_sse2.h"
+
+
+#if defined(PIPE_ARCH_X86)
+
+#include "rtasm/rtasm_x86sse.h"
+
+/* Surely this should be defined somewhere in a tgsi header:
+ */
+typedef void (PIPE_CDECL *codegen_function)(
+ const struct tgsi_exec_vector *input,
+ struct tgsi_exec_vector *output,
+ const float (*constant)[4],
+ struct tgsi_exec_vector *temporary,
+ const struct tgsi_interp_coef *coef,
+ float (*immediates)[4]
+ //, const struct tgsi_exec_vector *quadPos
+ );
+
+
+struct sp_sse_fragment_shader {
+ struct sp_fragment_shader base;
+ struct x86_function sse2_program;
+ codegen_function func;
+ float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
+};
+
+
+
+static void
+fs_sse_prepare( const struct sp_fragment_shader *base,
+ struct tgsi_exec_machine *machine,
+ struct tgsi_sampler **samplers )
+{
+}
+
+
+/* TODO: codegenerate the whole run function, skip this wrapper.
+ * TODO: break dependency on tgsi_exec_machine struct
+ * TODO: push Position calculation into the generated shader
+ * TODO: process >1 quad at a time
+ */
+static unsigned
+fs_sse_run( const struct sp_fragment_shader *base,
+ struct tgsi_exec_machine *machine,
+ struct quad_header *quad )
+{
+ struct sp_sse_fragment_shader *shader = (struct sp_sse_fragment_shader *) base;
+
+ /* Compute X, Y, Z, W vals for this quad -- place in temp[0] for now */
+ sp_setup_pos_vector(quad->posCoef,
+ (float)quad->input.x0, (float)quad->input.y0,
+ machine->Temps);
+
+ /* init kill mask */
+ tgsi_set_kill_mask(machine, 0x0);
+ tgsi_set_exec_mask(machine, 1, 1, 1, 1);
+
+ shader->func( machine->Inputs,
+ machine->Outputs,
+ machine->Consts,
+ machine->Temps,
+ machine->InterpCoefs,
+ shader->immediates
+ // , &machine->QuadPos
+ );
+
+ return ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]);
+}
+
+
+static void
+fs_sse_delete( struct sp_fragment_shader *base )
+{
+ struct sp_sse_fragment_shader *shader = (struct sp_sse_fragment_shader *) base;
+
+ x86_release_func( &shader->sse2_program );
+ FREE(shader);
+}
+
+
+struct sp_fragment_shader *
+softpipe_create_fs_sse(struct softpipe_context *softpipe,
+ const struct pipe_shader_state *templ)
+{
+ struct sp_sse_fragment_shader *shader;
+
+ if (!softpipe->use_sse)
+ return NULL;
+
+ shader = CALLOC_STRUCT(sp_sse_fragment_shader);
+ if (!shader)
+ return NULL;
+
+ x86_init_func( &shader->sse2_program );
+
+ if (!tgsi_emit_sse2( templ->tokens, &shader->sse2_program,
+ shader->immediates, FALSE )) {
+ FREE(shader);
+ return NULL;
+ }
+
+ shader->func = (codegen_function) x86_get_func( &shader->sse2_program );
+ if (!shader->func) {
+ x86_release_func( &shader->sse2_program );
+ FREE(shader);
+ return NULL;
+ }
+
+ shader->base.shader = *templ;
+ shader->base.prepare = fs_sse_prepare;
+ shader->base.run = fs_sse_run;
+ shader->base.delete = fs_sse_delete;
+
+ return &shader->base;
+}
+
+
+#else
+
+/* Maybe put this varient in the header file.
+ */
+struct sp_fragment_shader *
+softpipe_create_fs_sse(struct softpipe_context *softpipe,
+ const struct pipe_shader_state *templ)
+{
+ return NULL;
+}
+
+#endif
diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h
new file mode 100644
index 0000000000..4a42cb3c19
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_headers.h
@@ -0,0 +1,95 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef SP_HEADERS_H
+#define SP_HEADERS_H
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_exec.h"
+
+#define PRIM_POINT 1
+#define PRIM_LINE 2
+#define PRIM_TRI 3
+
+
+/* The rasterizer generates 2x2 quads of fragment and feeds them to
+ * the current fp_machine (see below).
+ * Remember that Y=0=top with Y increasing down the window.
+ */
+#define QUAD_TOP_LEFT 0
+#define QUAD_TOP_RIGHT 1
+#define QUAD_BOTTOM_LEFT 2
+#define QUAD_BOTTOM_RIGHT 3
+
+#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT)
+#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT)
+#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT)
+#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
+#define MASK_ALL 0xf
+
+
+/**
+ * Encodes everything we need to know about a 2x2 pixel block. Uses
+ * "Channel-Serial" or "SoA" layout.
+ */
+struct quad_header_input
+{
+ int x0;
+ int y0;
+ float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */
+ unsigned facing:1; /**< Front (0) or back (1) facing? */
+ unsigned prim:2; /**< PRIM_POINT, LINE, TRI */
+};
+
+struct quad_header_inout
+{
+ unsigned mask:4;
+};
+
+struct quad_header_output
+{
+ /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */
+ float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE];
+ float depth[QUAD_SIZE];
+};
+
+struct quad_header {
+ struct quad_header_input input;
+ struct quad_header_inout inout;
+ struct quad_header_output output;
+
+ const struct tgsi_interp_coef *coef;
+ const struct tgsi_interp_coef *posCoef;
+
+ unsigned nr_attrs;
+};
+
+#endif /* SP_HEADERS_H */
+
diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c
new file mode 100644
index 0000000000..038ff04d4f
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_prim_setup.c
@@ -0,0 +1,190 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \brief A draw stage that drives our triangle setup routines from
+ * within the draw pipeline. One of two ways to drive setup, the
+ * other being in sp_prim_vbuf.c.
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author Brian Paul
+ */
+
+
+#include "sp_context.h"
+#include "sp_setup.h"
+#include "sp_state.h"
+#include "sp_prim_setup.h"
+#include "draw/draw_pipe.h"
+#include "draw/draw_vertex.h"
+#include "util/u_memory.h"
+
+/**
+ * Triangle setup info (derived from draw_stage).
+ * Also used for line drawing (taking some liberties).
+ */
+struct setup_stage {
+ struct draw_stage stage; /**< This must be first (base class) */
+
+ struct setup_context *setup;
+};
+
+
+
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
+{
+ return (struct setup_stage *)stage;
+}
+
+
+typedef const float (*cptrf4)[4];
+
+static void
+do_tri(struct draw_stage *stage, struct prim_header *prim)
+{
+ struct setup_stage *setup = setup_stage( stage );
+
+ setup_tri( setup->setup,
+ (cptrf4)prim->v[0]->data,
+ (cptrf4)prim->v[1]->data,
+ (cptrf4)prim->v[2]->data );
+}
+
+static void
+do_line(struct draw_stage *stage, struct prim_header *prim)
+{
+ struct setup_stage *setup = setup_stage( stage );
+
+ setup_line( setup->setup,
+ (cptrf4)prim->v[0]->data,
+ (cptrf4)prim->v[1]->data );
+}
+
+static void
+do_point(struct draw_stage *stage, struct prim_header *prim)
+{
+ struct setup_stage *setup = setup_stage( stage );
+
+ setup_point( setup->setup,
+ (cptrf4)prim->v[0]->data );
+}
+
+
+
+
+static void setup_begin( struct draw_stage *stage )
+{
+ struct setup_stage *setup = setup_stage(stage);
+
+ setup_prepare( setup->setup );
+
+ stage->point = do_point;
+ stage->line = do_line;
+ stage->tri = do_tri;
+}
+
+
+static void setup_first_point( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ setup_begin(stage);
+ stage->point( stage, header );
+}
+
+static void setup_first_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ setup_begin(stage);
+ stage->line( stage, header );
+}
+
+
+static void setup_first_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ setup_begin(stage);
+ stage->tri( stage, header );
+}
+
+
+
+static void setup_flush( struct draw_stage *stage,
+ unsigned flags )
+{
+ stage->point = setup_first_point;
+ stage->line = setup_first_line;
+ stage->tri = setup_first_tri;
+}
+
+
+static void reset_stipple_counter( struct draw_stage *stage )
+{
+}
+
+
+static void render_destroy( struct draw_stage *stage )
+{
+ struct setup_stage *ssetup = setup_stage(stage);
+ setup_destroy_context(ssetup->setup);
+ FREE( stage );
+}
+
+
+/**
+ * Create a new primitive setup/render stage.
+ */
+struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe )
+{
+ struct setup_stage *sstage = CALLOC_STRUCT(setup_stage);
+
+ sstage->setup = setup_create_context(softpipe);
+ sstage->stage.draw = softpipe->draw;
+ sstage->stage.point = setup_first_point;
+ sstage->stage.line = setup_first_line;
+ sstage->stage.tri = setup_first_tri;
+ sstage->stage.flush = setup_flush;
+ sstage->stage.reset_stipple_counter = reset_stipple_counter;
+ sstage->stage.destroy = render_destroy;
+
+ return (struct draw_stage *)sstage;
+}
+
+struct setup_context *
+sp_draw_setup_context( struct draw_stage *stage )
+{
+ struct setup_stage *ssetup = setup_stage(stage);
+ return ssetup->setup;
+}
+
+void
+sp_draw_flush( struct draw_stage *stage )
+{
+ stage->flush( stage, 0 );
+}
diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.h b/src/gallium/drivers/softpipe/sp_prim_setup.h
new file mode 100644
index 0000000000..49bdd98ed8
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_prim_setup.h
@@ -0,0 +1,85 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef SP_PRIM_SETUP_H
+#define SP_PRIM_SETUP_H
+
+
+/**
+ * vbuf is a special stage to gather the stream of triangles, lines, points
+ * together and reconstruct vertex buffers for hardware upload.
+ *
+ * First attempt, work in progress.
+ *
+ * TODO:
+ * - separate out vertex buffer building and primitive emit, ie >1 draw per vb.
+ * - tell vbuf stage how to build hw vertices directly
+ * - pass vbuf stage a buffer pointer for direct emit to agp/vram.
+ *
+ *
+ *
+ * Vertices are just an array of floats, with all the attributes
+ * packed. We currently assume a layout like:
+ *
+ * attr[0][0..3] - window position
+ * attr[1..n][0..3] - remaining attributes.
+ *
+ * Attributes are assumed to be 4 floats wide but are packed so that
+ * all the enabled attributes run contiguously.
+ */
+
+
+struct draw_stage;
+struct softpipe_context;
+
+
+typedef void (*vbuf_draw_func)( struct pipe_context *pipe,
+ unsigned prim,
+ const ushort *elements,
+ unsigned nr_elements,
+ const void *vertex_buffer,
+ unsigned nr_vertices );
+
+
+extern struct draw_stage *
+sp_draw_render_stage( struct softpipe_context *softpipe );
+
+extern struct setup_context *
+sp_draw_setup_context( struct draw_stage * );
+
+extern void
+sp_draw_flush( struct draw_stage * );
+
+
+extern struct draw_stage *
+sp_draw_vbuf_stage( struct draw_context *draw_context,
+ struct pipe_context *pipe,
+ vbuf_draw_func draw );
+
+
+#endif /* SP_PRIM_SETUP_H */
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
new file mode 100644
index 0000000000..9cd5784e5b
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -0,0 +1,410 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Interface between 'draw' module's output and the softpipe rasterizer/setup
+ * code. When the 'draw' module has finished filling a vertex buffer, the
+ * draw_arrays() functions below will be called. Loop over the vertices and
+ * call the point/line/tri setup functions.
+ *
+ * Authors
+ * Brian Paul
+ */
+
+
+#include "sp_context.h"
+#include "sp_state.h"
+#include "sp_prim_vbuf.h"
+#include "sp_prim_setup.h"
+#include "sp_setup.h"
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+#include "util/u_memory.h"
+
+
+#define SP_MAX_VBUF_INDEXES 1024
+#define SP_MAX_VBUF_SIZE 4096
+
+typedef const float (*cptrf4)[4];
+
+/**
+ * Subclass of vbuf_render.
+ */
+struct softpipe_vbuf_render
+{
+ struct vbuf_render base;
+ struct softpipe_context *softpipe;
+ uint prim;
+ uint vertex_size;
+ void *vertex_buffer;
+};
+
+
+/** cast wrapper */
+static struct softpipe_vbuf_render *
+softpipe_vbuf_render(struct vbuf_render *vbr)
+{
+ return (struct softpipe_vbuf_render *) vbr;
+}
+
+
+static const struct vertex_info *
+sp_vbuf_get_vertex_info(struct vbuf_render *vbr)
+{
+ struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
+ return softpipe_get_vbuf_vertex_info(cvbr->softpipe);
+}
+
+
+static void *
+sp_vbuf_allocate_vertices(struct vbuf_render *vbr,
+ ushort vertex_size, ushort nr_vertices)
+{
+ struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
+ assert(!cvbr->vertex_buffer);
+ cvbr->vertex_buffer = align_malloc(vertex_size * nr_vertices, 16);
+ cvbr->vertex_size = vertex_size;
+ return cvbr->vertex_buffer;
+}
+
+
+static void
+sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices,
+ unsigned vertex_size, unsigned vertices_used)
+{
+ struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
+ align_free(vertices);
+ assert(vertices == cvbr->vertex_buffer);
+ cvbr->vertex_buffer = NULL;
+}
+
+
+static boolean
+sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
+{
+ struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
+
+ /* XXX: break this dependency - make setup_context live under
+ * softpipe, rename the old "setup" draw stage to something else.
+ */
+ struct setup_context *setup_ctx = sp_draw_setup_context(cvbr->softpipe->setup);
+
+ setup_prepare( setup_ctx );
+
+
+
+ cvbr->prim = prim;
+ return TRUE;
+
+}
+
+
+static INLINE cptrf4 get_vert( const void *vertex_buffer,
+ int index,
+ int stride )
+{
+ return (cptrf4)((char *)vertex_buffer + index * stride);
+}
+
+
+/**
+ * draw elements / indexed primitives
+ */
+static void
+sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
+{
+ struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
+ struct softpipe_context *softpipe = cvbr->softpipe;
+ const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float);
+ const void *vertex_buffer = cvbr->vertex_buffer;
+ unsigned i;
+
+ /* XXX: break this dependency - make setup_context live under
+ * softpipe, rename the old "setup" draw stage to something else.
+ */
+ struct draw_stage *setup = softpipe->setup;
+ struct setup_context *setup_ctx = sp_draw_setup_context(setup);
+
+ switch (cvbr->prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < nr; i++) {
+ setup_point( setup_ctx,
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINES:
+ for (i = 1; i < nr; i += 2) {
+ setup_line( setup_ctx,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP:
+ for (i = 1; i < nr; i ++) {
+ setup_line( setup_ctx,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ for (i = 1; i < nr; i ++) {
+ setup_line( setup_ctx,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ if (nr) {
+ setup_line( setup_ctx,
+ get_vert(vertex_buffer, indices[nr-1], stride),
+ get_vert(vertex_buffer, indices[0], stride) );
+ }
+ break;
+
+
+ case PIPE_PRIM_TRIANGLES:
+ for (i = 2; i < nr; i += 3) {
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride));
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ for (i = 2; i < nr; i += 1) {
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, indices[i+(i&1)-2], stride),
+ get_vert(vertex_buffer, indices[i-(i&1)-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride));
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ for (i = 2; i < nr; i += 1) {
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, indices[0], stride),
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride));
+ }
+ break;
+ case PIPE_PRIM_QUADS:
+ for (i = 3; i < nr; i += 4) {
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, indices[i-3], stride),
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i-0], stride));
+
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride));
+ }
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ for (i = 3; i < nr; i += 2) {
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, indices[i-3], stride),
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i-0], stride));
+
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-3], stride),
+ get_vert(vertex_buffer, indices[i-0], stride));
+ }
+ break;
+ default:
+ assert(0);
+ }
+
+ /* XXX: why are we calling this??? If we had to call something, it
+ * would be a function in sp_setup.c:
+ */
+ sp_draw_flush( setup );
+}
+
+
+/**
+ * This function is hit when the draw module is working in pass-through mode.
+ * It's up to us to convert the vertex array into point/line/tri prims.
+ */
+static void
+sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
+{
+ struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
+ struct softpipe_context *softpipe = cvbr->softpipe;
+ const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float);
+ const void *vertex_buffer =
+ (void *) get_vert(cvbr->vertex_buffer, start, stride);
+ unsigned i;
+
+ /* XXX: break this dependency - make setup_context live under
+ * softpipe, rename the old "setup" draw stage to something else.
+ */
+ struct draw_stage *setup = softpipe->setup;
+ struct setup_context *setup_ctx = sp_draw_setup_context(setup);
+
+ switch (cvbr->prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < nr; i++) {
+ setup_point( setup_ctx,
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINES:
+ for (i = 1; i < nr; i += 2) {
+ setup_line( setup_ctx,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP:
+ for (i = 1; i < nr; i ++) {
+ setup_line( setup_ctx,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ for (i = 1; i < nr; i ++) {
+ setup_line( setup_ctx,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ if (nr) {
+ setup_line( setup_ctx,
+ get_vert(vertex_buffer, nr-1, stride),
+ get_vert(vertex_buffer, 0, stride) );
+ }
+ break;
+
+
+ case PIPE_PRIM_TRIANGLES:
+ for (i = 2; i < nr; i += 3) {
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride));
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ for (i = 2; i < nr; i += 1) {
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, i+(i&1)-2, stride),
+ get_vert(vertex_buffer, i-(i&1)-1, stride),
+ get_vert(vertex_buffer, i-0, stride));
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ for (i = 2; i < nr; i += 1) {
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, 0, stride),
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride));
+ }
+ break;
+ case PIPE_PRIM_QUADS:
+ for (i = 3; i < nr; i += 4) {
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, i-3, stride),
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i-0, stride));
+
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride));
+ }
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ for (i = 3; i < nr; i += 2) {
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, i-3, stride),
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i-0, stride));
+
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-3, stride),
+ get_vert(vertex_buffer, i-0, stride));
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+
+static void
+sp_vbuf_destroy(struct vbuf_render *vbr)
+{
+ struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
+ cvbr->softpipe->vbuf_render = NULL;
+ FREE(cvbr);
+}
+
+
+/**
+ * Initialize the post-transform vertex buffer information for the given
+ * context.
+ */
+void
+sp_init_vbuf(struct softpipe_context *sp)
+{
+ assert(sp->draw);
+
+ sp->vbuf_render = CALLOC_STRUCT(softpipe_vbuf_render);
+
+ sp->vbuf_render->base.max_indices = SP_MAX_VBUF_INDEXES;
+ sp->vbuf_render->base.max_vertex_buffer_bytes = SP_MAX_VBUF_SIZE;
+
+ sp->vbuf_render->base.get_vertex_info = sp_vbuf_get_vertex_info;
+ sp->vbuf_render->base.allocate_vertices = sp_vbuf_allocate_vertices;
+ sp->vbuf_render->base.set_primitive = sp_vbuf_set_primitive;
+ sp->vbuf_render->base.draw = sp_vbuf_draw;
+ sp->vbuf_render->base.draw_arrays = sp_vbuf_draw_arrays;
+ sp->vbuf_render->base.release_vertices = sp_vbuf_release_vertices;
+ sp->vbuf_render->base.destroy = sp_vbuf_destroy;
+
+ sp->vbuf_render->softpipe = sp;
+
+ sp->vbuf = draw_vbuf_stage(sp->draw, &sp->vbuf_render->base);
+
+ draw_set_rasterize_stage(sp->draw, sp->vbuf);
+
+ draw_set_render(sp->draw, &sp->vbuf_render->base);
+}
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.h b/src/gallium/drivers/softpipe/sp_prim_vbuf.h
new file mode 100644
index 0000000000..1de9cc2a89
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.h
@@ -0,0 +1,38 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef SP_VBUF_H
+#define SP_VBUF_H
+
+
+struct softpipe_context;
+
+extern void
+sp_init_vbuf(struct softpipe_context *softpipe);
+
+
+#endif /* SP_VBUF_H */
diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c
new file mode 100644
index 0000000000..892ef87ee9
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_quad.c
@@ -0,0 +1,118 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "sp_context.h"
+#include "sp_state.h"
+#include "pipe/p_shader_tokens.h"
+
+static void
+sp_push_quad_first(
+ struct softpipe_context *sp,
+ struct quad_stage *quad,
+ uint i )
+{
+ quad->next = sp->quad[i].first;
+ sp->quad[i].first = quad;
+}
+
+static void
+sp_build_depth_stencil(
+ struct softpipe_context *sp,
+ uint i )
+{
+ if (sp->depth_stencil->stencil[0].enabled ||
+ sp->depth_stencil->stencil[1].enabled) {
+ sp_push_quad_first( sp, sp->quad[i].stencil_test, i );
+ }
+ else if (sp->depth_stencil->depth.enabled &&
+ sp->framebuffer.zsbuf) {
+ sp_push_quad_first( sp, sp->quad[i].depth_test, i );
+ }
+}
+
+void
+sp_build_quad_pipeline(struct softpipe_context *sp)
+{
+ uint i;
+
+ boolean early_depth_test =
+ sp->depth_stencil->depth.enabled &&
+ sp->framebuffer.zsbuf &&
+ !sp->depth_stencil->alpha.enabled &&
+ !sp->fs->info.uses_kill &&
+ !sp->fs->info.writes_z;
+
+ /* build up the pipeline in reverse order... */
+ for (i = 0; i < SP_NUM_QUAD_THREADS; i++) {
+ sp->quad[i].first = sp->quad[i].output;
+
+ if (sp->blend->colormask != 0xf) {
+ sp_push_quad_first( sp, sp->quad[i].colormask, i );
+ }
+
+ if (sp->blend->blend_enable ||
+ sp->blend->logicop_enable) {
+ sp_push_quad_first( sp, sp->quad[i].blend, i );
+ }
+
+ if (sp->depth_stencil->depth.occlusion_count) {
+ sp_push_quad_first( sp, sp->quad[i].occlusion, i );
+ }
+
+ if (sp->rasterizer->poly_smooth ||
+ sp->rasterizer->line_smooth ||
+ sp->rasterizer->point_smooth) {
+ sp_push_quad_first( sp, sp->quad[i].coverage, i );
+ }
+
+ if (!early_depth_test) {
+ sp_build_depth_stencil( sp, i );
+ }
+
+ if (sp->depth_stencil->alpha.enabled) {
+ sp_push_quad_first( sp, sp->quad[i].alpha_test, i );
+ }
+
+ /* XXX always enable shader? */
+ if (1) {
+ sp_push_quad_first( sp, sp->quad[i].shade, i );
+ }
+
+ if (early_depth_test) {
+ sp_build_depth_stencil( sp, i );
+ sp_push_quad_first( sp, sp->quad[i].earlyz, i );
+ }
+
+#if !USE_DRAW_STAGE_PSTIPPLE
+ if (sp->rasterizer->poly_stipple_enable) {
+ sp_push_quad_first( sp, sp->quad[i].polygon_stipple, i );
+ }
+#endif
+ }
+}
+
diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h
new file mode 100644
index 0000000000..08513cb95f
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_quad.h
@@ -0,0 +1,69 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef SP_QUAD_H
+#define SP_QUAD_H
+
+
+struct softpipe_context;
+struct quad_header;
+
+
+struct quad_stage {
+ struct softpipe_context *softpipe;
+
+ struct quad_stage *next;
+
+ void (*begin)(struct quad_stage *qs);
+
+ /** the stage action */
+ void (*run)(struct quad_stage *qs, struct quad_header *quad);
+
+ void (*destroy)(struct quad_stage *qs);
+};
+
+
+struct quad_stage *sp_quad_polygon_stipple_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_earlyz_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_alpha_test_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe );
+struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe );
+
+void sp_build_quad_pipeline(struct softpipe_context *sp);
+
+void sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad);
+
+#endif /* SP_QUAD_H */
diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c
new file mode 100644
index 0000000000..5bebd141e9
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c
@@ -0,0 +1,108 @@
+
+/**
+ * quad alpha test
+ */
+
+#include "sp_context.h"
+#include "sp_headers.h"
+#include "sp_quad.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+
+
+static void
+alpha_test_quad(struct quad_stage *qs, struct quad_header *quad)
+{
+ struct softpipe_context *softpipe = qs->softpipe;
+ const float ref = softpipe->depth_stencil->alpha.ref;
+ unsigned passMask = 0x0, j;
+ const uint cbuf = 0; /* only output[0].alpha is tested */
+ const float *aaaa = quad->output.color[cbuf][3];
+
+ switch (softpipe->depth_stencil->alpha.func) {
+ case PIPE_FUNC_NEVER:
+ break;
+ case PIPE_FUNC_LESS:
+ /*
+ * If mask were an array [4] we could do this SIMD-style:
+ * passMask = (quad->outputs.color[0][3] <= vec4(ref));
+ */
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (aaaa[j] < ref) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_EQUAL:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (aaaa[j] == ref) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_LEQUAL:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (aaaa[j] <= ref) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_GREATER:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (aaaa[j] > ref) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (aaaa[j] != ref) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_GEQUAL:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (aaaa[j] >= ref) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_ALWAYS:
+ passMask = MASK_ALL;
+ break;
+ default:
+ assert(0);
+ }
+
+ quad->inout.mask &= passMask;
+
+ if (quad->inout.mask)
+ qs->next->run(qs->next, quad);
+}
+
+
+static void alpha_test_begin(struct quad_stage *qs)
+{
+ qs->next->begin(qs->next);
+}
+
+
+static void alpha_test_destroy(struct quad_stage *qs)
+{
+ FREE( qs );
+}
+
+
+struct quad_stage *
+sp_quad_alpha_test_stage( struct softpipe_context *softpipe )
+{
+ struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
+
+ stage->softpipe = softpipe;
+ stage->begin = alpha_test_begin;
+ stage->run = alpha_test_quad;
+ stage->destroy = alpha_test_destroy;
+
+ return stage;
+}
diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
new file mode 100644
index 0000000000..6f64c6e584
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -0,0 +1,759 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * quad blending
+ * \author Brian Paul
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "sp_context.h"
+#include "sp_headers.h"
+#include "sp_surface.h"
+#include "sp_tile_cache.h"
+#include "sp_quad.h"
+
+
+#define VEC4_COPY(DST, SRC) \
+do { \
+ DST[0] = SRC[0]; \
+ DST[1] = SRC[1]; \
+ DST[2] = SRC[2]; \
+ DST[3] = SRC[3]; \
+} while(0)
+
+#define VEC4_SCALAR(DST, SRC) \
+do { \
+ DST[0] = SRC; \
+ DST[1] = SRC; \
+ DST[2] = SRC; \
+ DST[3] = SRC; \
+} while(0)
+
+#define VEC4_ADD(R, A, B) \
+do { \
+ R[0] = A[0] + B[0]; \
+ R[1] = A[1] + B[1]; \
+ R[2] = A[2] + B[2]; \
+ R[3] = A[3] + B[3]; \
+} while (0)
+
+#define VEC4_SUB(R, A, B) \
+do { \
+ R[0] = A[0] - B[0]; \
+ R[1] = A[1] - B[1]; \
+ R[2] = A[2] - B[2]; \
+ R[3] = A[3] - B[3]; \
+} while (0)
+
+#define VEC4_MUL(R, A, B) \
+do { \
+ R[0] = A[0] * B[0]; \
+ R[1] = A[1] * B[1]; \
+ R[2] = A[2] * B[2]; \
+ R[3] = A[3] * B[3]; \
+} while (0)
+
+#define VEC4_MIN(R, A, B) \
+do { \
+ R[0] = (A[0] < B[0]) ? A[0] : B[0]; \
+ R[1] = (A[1] < B[1]) ? A[1] : B[1]; \
+ R[2] = (A[2] < B[2]) ? A[2] : B[2]; \
+ R[3] = (A[3] < B[3]) ? A[3] : B[3]; \
+} while (0)
+
+#define VEC4_MAX(R, A, B) \
+do { \
+ R[0] = (A[0] > B[0]) ? A[0] : B[0]; \
+ R[1] = (A[1] > B[1]) ? A[1] : B[1]; \
+ R[2] = (A[2] > B[2]) ? A[2] : B[2]; \
+ R[3] = (A[3] > B[3]) ? A[3] : B[3]; \
+} while (0)
+
+
+
+static void
+logicop_quad(struct quad_stage *qs, struct quad_header *quad)
+{
+ struct softpipe_context *softpipe = qs->softpipe;
+ uint cbuf;
+
+ /* loop over colorbuffer outputs */
+ for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+ float dest[4][QUAD_SIZE];
+ ubyte src[4][4], dst[4][4], res[4][4];
+ uint *src4 = (uint *) src;
+ uint *dst4 = (uint *) dst;
+ uint *res4 = (uint *) res;
+ struct softpipe_cached_tile *
+ tile = sp_get_cached_tile(softpipe,
+ softpipe->cbuf_cache[cbuf],
+ quad->input.x0, quad->input.y0);
+ float (*quadColor)[4] = quad->output.color[cbuf];
+ uint i, j;
+
+ /* get/swizzle dest colors */
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
+ int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
+ for (i = 0; i < 4; i++) {
+ dest[i][j] = tile->data.color[y][x][i];
+ }
+ }
+
+ /* convert to ubyte */
+ for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
+ dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
+ dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
+ dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
+ dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
+
+ src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
+ src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
+ src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
+ src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
+ }
+
+ switch (softpipe->blend->logicop_func) {
+ case PIPE_LOGICOP_CLEAR:
+ for (j = 0; j < 4; j++)
+ res4[j] = 0;
+ break;
+ case PIPE_LOGICOP_NOR:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~(src4[j] | dst4[j]);
+ break;
+ case PIPE_LOGICOP_AND_INVERTED:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~src4[j] & dst4[j];
+ break;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~src4[j];
+ break;
+ case PIPE_LOGICOP_AND_REVERSE:
+ for (j = 0; j < 4; j++)
+ res4[j] = src4[j] & ~dst4[j];
+ break;
+ case PIPE_LOGICOP_INVERT:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~dst4[j];
+ break;
+ case PIPE_LOGICOP_XOR:
+ for (j = 0; j < 4; j++)
+ res4[j] = dst4[j] ^ src4[j];
+ break;
+ case PIPE_LOGICOP_NAND:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~(src4[j] & dst4[j]);
+ break;
+ case PIPE_LOGICOP_AND:
+ for (j = 0; j < 4; j++)
+ res4[j] = src4[j] & dst4[j];
+ break;
+ case PIPE_LOGICOP_EQUIV:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~(src4[j] ^ dst4[j]);
+ break;
+ case PIPE_LOGICOP_NOOP:
+ for (j = 0; j < 4; j++)
+ res4[j] = dst4[j];
+ break;
+ case PIPE_LOGICOP_OR_INVERTED:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~src4[j] | dst4[j];
+ break;
+ case PIPE_LOGICOP_COPY:
+ for (j = 0; j < 4; j++)
+ res4[j] = src4[j];
+ break;
+ case PIPE_LOGICOP_OR_REVERSE:
+ for (j = 0; j < 4; j++)
+ res4[j] = src4[j] | ~dst4[j];
+ break;
+ case PIPE_LOGICOP_OR:
+ for (j = 0; j < 4; j++)
+ res4[j] = src4[j] | dst4[j];
+ break;
+ case PIPE_LOGICOP_SET:
+ for (j = 0; j < 4; j++)
+ res4[j] = ~0;
+ break;
+ default:
+ assert(0);
+ }
+
+ for (j = 0; j < 4; j++) {
+ quadColor[j][0] = ubyte_to_float(res[j][0]);
+ quadColor[j][1] = ubyte_to_float(res[j][1]);
+ quadColor[j][2] = ubyte_to_float(res[j][2]);
+ quadColor[j][3] = ubyte_to_float(res[j][3]);
+ }
+ }
+
+ /* pass quad to next stage */
+ qs->next->run(qs->next, quad);
+}
+
+
+
+
+static void
+blend_quad(struct quad_stage *qs, struct quad_header *quad)
+{
+ static const float zero[4] = { 0, 0, 0, 0 };
+ static const float one[4] = { 1, 1, 1, 1 };
+
+ struct softpipe_context *softpipe = qs->softpipe;
+ uint cbuf;
+
+ if (softpipe->blend->logicop_enable) {
+ logicop_quad(qs, quad);
+ return;
+ }
+
+ /* loop over colorbuffer outputs */
+ for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+ float source[4][QUAD_SIZE], dest[4][QUAD_SIZE];
+ struct softpipe_cached_tile *tile
+ = sp_get_cached_tile(softpipe,
+ softpipe->cbuf_cache[cbuf],
+ quad->input.x0, quad->input.y0);
+ float (*quadColor)[4] = quad->output.color[cbuf];
+ uint i, j;
+
+ /* get/swizzle dest colors */
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
+ int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
+ for (i = 0; i < 4; i++) {
+ dest[i][j] = tile->data.color[y][x][i];
+ }
+ }
+
+ /*
+ * Compute src/first term RGB
+ */
+ switch (softpipe->blend->rgb_src_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ VEC4_COPY(source[0], quadColor[0]); /* R */
+ VEC4_COPY(source[1], quadColor[1]); /* G */
+ VEC4_COPY(source[2], quadColor[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
+ VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
+ VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ {
+ const float *alpha = quadColor[3];
+ VEC4_MUL(source[0], quadColor[0], alpha); /* R */
+ VEC4_MUL(source[1], quadColor[1], alpha); /* G */
+ VEC4_MUL(source[2], quadColor[2], alpha); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
+ VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
+ VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ {
+ const float *alpha = dest[3];
+ VEC4_MUL(source[0], quadColor[0], alpha); /* R */
+ VEC4_MUL(source[1], quadColor[1], alpha); /* G */
+ VEC4_MUL(source[2], quadColor[2], alpha); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ {
+ const float *alpha = quadColor[3];
+ float diff[4], temp[4];
+ VEC4_SUB(diff, one, dest[3]);
+ VEC4_MIN(temp, alpha, diff);
+ VEC4_MUL(source[0], quadColor[0], temp); /* R */
+ VEC4_MUL(source[1], quadColor[1], temp); /* G */
+ VEC4_MUL(source[2], quadColor[2], temp); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ {
+ float comp[4];
+ VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
+ VEC4_MUL(source[0], quadColor[0], comp); /* R */
+ VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
+ VEC4_MUL(source[1], quadColor[1], comp); /* G */
+ VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
+ VEC4_MUL(source[2], quadColor[2], comp); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ {
+ float alpha[4];
+ VEC4_SCALAR(alpha, softpipe->blend_color.color[3]);
+ VEC4_MUL(source[0], quadColor[0], alpha); /* R */
+ VEC4_MUL(source[1], quadColor[1], alpha); /* G */
+ VEC4_MUL(source[2], quadColor[2], alpha); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ assert(0); /* to do */
+ break;
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ assert(0); /* to do */
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ VEC4_COPY(source[0], zero); /* R */
+ VEC4_COPY(source[1], zero); /* G */
+ VEC4_COPY(source[2], zero); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ {
+ float inv_comp[4];
+ VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
+ VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
+ VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
+ VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
+ VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
+ VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ {
+ float inv_alpha[4];
+ VEC4_SUB(inv_alpha, one, quadColor[3]);
+ VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
+ VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
+ VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ {
+ float inv_alpha[4];
+ VEC4_SUB(inv_alpha, one, dest[3]);
+ VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
+ VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
+ VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ {
+ float inv_comp[4];
+ VEC4_SUB(inv_comp, one, dest[0]); /* R */
+ VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
+ VEC4_SUB(inv_comp, one, dest[1]); /* G */
+ VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
+ VEC4_SUB(inv_comp, one, dest[2]); /* B */
+ VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ {
+ float inv_comp[4];
+ /* R */
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
+ VEC4_MUL(source[0], quadColor[0], inv_comp);
+ /* G */
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
+ VEC4_MUL(source[1], quadColor[1], inv_comp);
+ /* B */
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
+ VEC4_MUL(source[2], quadColor[2], inv_comp);
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ {
+ float inv_alpha[4];
+ VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]);
+ VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
+ VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
+ VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ assert(0); /* to do */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ assert(0); /* to do */
+ break;
+ default:
+ assert(0);
+ }
+
+ /*
+ * Compute src/first term A
+ */
+ switch (softpipe->blend->alpha_src_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ VEC4_COPY(source[3], quadColor[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ {
+ const float *alpha = quadColor[3];
+ VEC4_MUL(source[3], quadColor[3], alpha); /* A */
+ }
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ /* multiply alpha by 1.0 */
+ VEC4_COPY(source[3], quadColor[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ {
+ float comp[4];
+ VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
+ VEC4_MUL(source[3], quadColor[3], comp); /* A */
+ }
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ VEC4_COPY(source[3], zero); /* A */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ {
+ float inv_alpha[4];
+ VEC4_SUB(inv_alpha, one, quadColor[3]);
+ VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ {
+ float inv_alpha[4];
+ VEC4_SUB(inv_alpha, one, dest[3]);
+ VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ {
+ float inv_comp[4];
+ /* A */
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
+ VEC4_MUL(source[3], quadColor[3], inv_comp);
+ }
+ break;
+ default:
+ assert(0);
+ }
+
+
+ /*
+ * Compute dest/second term RGB
+ */
+ switch (softpipe->blend->rgb_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ /* dest = dest * 1 NO-OP, leave dest as-is */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */
+ VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */
+ VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */
+ VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */
+ VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */
+ VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */
+ VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ VEC4_MUL(dest[0], dest[0], dest[0]); /* R */
+ VEC4_MUL(dest[1], dest[1], dest[1]); /* G */
+ VEC4_MUL(dest[2], dest[2], dest[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ assert(0); /* illegal */
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ {
+ float comp[4];
+ VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
+ VEC4_MUL(dest[0], dest[0], comp); /* R */
+ VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
+ VEC4_MUL(dest[1], dest[1], comp); /* G */
+ VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
+ VEC4_MUL(dest[2], dest[2], comp); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ {
+ float comp[4];
+ VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
+ VEC4_MUL(dest[0], dest[0], comp); /* R */
+ VEC4_MUL(dest[1], dest[1], comp); /* G */
+ VEC4_MUL(dest[2], dest[2], comp); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ VEC4_COPY(dest[0], zero); /* R */
+ VEC4_COPY(dest[1], zero); /* G */
+ VEC4_COPY(dest[2], zero); /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ /* XXX what are these? */
+ assert(0);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ {
+ float inv_comp[4];
+ VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
+ VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
+ VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
+ VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
+ VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
+ VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ {
+ float one_minus_alpha[QUAD_SIZE];
+ VEC4_SUB(one_minus_alpha, one, quadColor[3]);
+ VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
+ VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
+ VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ {
+ float inv_comp[4];
+ VEC4_SUB(inv_comp, one, dest[3]); /* A */
+ VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
+ VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
+ VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ {
+ float inv_comp[4];
+ VEC4_SUB(inv_comp, one, dest[0]); /* R */
+ VEC4_MUL(dest[0], dest[0], inv_comp); /* R */
+ VEC4_SUB(inv_comp, one, dest[1]); /* G */
+ VEC4_MUL(dest[1], dest[1], inv_comp); /* G */
+ VEC4_SUB(inv_comp, one, dest[2]); /* B */
+ VEC4_MUL(dest[2], dest[2], inv_comp); /* B */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ {
+ float inv_comp[4];
+ /* R */
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
+ VEC4_MUL(dest[0], dest[0], inv_comp);
+ /* G */
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
+ VEC4_MUL(dest[1], dest[1], inv_comp);
+ /* B */
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
+ VEC4_MUL(dest[2], dest[2], inv_comp);
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ {
+ float inv_comp[4];
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
+ VEC4_MUL(dest[0], dest[0], inv_comp);
+ VEC4_MUL(dest[1], dest[1], inv_comp);
+ VEC4_MUL(dest[2], dest[2], inv_comp);
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ /* XXX what are these? */
+ assert(0);
+ break;
+ default:
+ assert(0);
+ }
+
+ /*
+ * Compute dest/second term A
+ */
+ switch (softpipe->blend->alpha_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ /* dest = dest * 1 NO-OP, leave dest as-is */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ VEC4_MUL(dest[3], dest[3], dest[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ assert(0); /* illegal */
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ {
+ float comp[4];
+ VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
+ VEC4_MUL(dest[3], dest[3], comp); /* A */
+ }
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ VEC4_COPY(dest[3], zero); /* A */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ {
+ float one_minus_alpha[QUAD_SIZE];
+ VEC4_SUB(one_minus_alpha, one, quadColor[3]);
+ VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ {
+ float inv_comp[4];
+ VEC4_SUB(inv_comp, one, dest[3]); /* A */
+ VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */
+ }
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ {
+ float inv_comp[4];
+ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
+ VEC4_MUL(dest[3], dest[3], inv_comp);
+ }
+ break;
+ default:
+ assert(0);
+ }
+
+ /*
+ * Combine RGB terms
+ */
+ switch (softpipe->blend->rgb_func) {
+ case PIPE_BLEND_ADD:
+ VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */
+ VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */
+ VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ VEC4_SUB(quadColor[0], source[0], dest[0]); /* R */
+ VEC4_SUB(quadColor[1], source[1], dest[1]); /* G */
+ VEC4_SUB(quadColor[2], source[2], dest[2]); /* B */
+ break;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ VEC4_SUB(quadColor[0], dest[0], source[0]); /* R */
+ VEC4_SUB(quadColor[1], dest[1], source[1]); /* G */
+ VEC4_SUB(quadColor[2], dest[2], source[2]); /* B */
+ break;
+ case PIPE_BLEND_MIN:
+ VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */
+ VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */
+ VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */
+ break;
+ case PIPE_BLEND_MAX:
+ VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */
+ VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */
+ VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */
+ break;
+ default:
+ assert(0);
+ }
+
+ /*
+ * Combine A terms
+ */
+ switch (softpipe->blend->alpha_func) {
+ case PIPE_BLEND_ADD:
+ VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ VEC4_SUB(quadColor[3], source[3], dest[3]); /* A */
+ break;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ VEC4_SUB(quadColor[3], dest[3], source[3]); /* A */
+ break;
+ case PIPE_BLEND_MIN:
+ VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */
+ break;
+ case PIPE_BLEND_MAX:
+ VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */
+ break;
+ default:
+ assert(0);
+ }
+
+ } /* cbuf loop */
+
+ /* pass blended quad to next stage */
+ qs->next->run(qs->next, quad);
+}
+
+
+static void blend_begin(struct quad_stage *qs)
+{
+ qs->next->begin(qs->next);
+}
+
+
+static void blend_destroy(struct quad_stage *qs)
+{
+ FREE( qs );
+}
+
+
+struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe )
+{
+ struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
+
+ stage->softpipe = softpipe;
+ stage->begin = blend_begin;
+ stage->run = blend_quad;
+ stage->destroy = blend_destroy;
+
+ return stage;
+}
diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c
new file mode 100644
index 0000000000..92e9af09c1
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_quad_bufloop.c
@@ -0,0 +1,74 @@
+
+#include "util/u_memory.h"
+#include "sp_context.h"
+#include "sp_headers.h"
+#include "sp_surface.h"
+#include "sp_quad.h"
+
+
+/**
+ * Loop over colorbuffers, passing quad to next stage each time.
+ */
+static void
+cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad)
+{
+ struct softpipe_context *softpipe = qs->softpipe;
+ float tmp[PIPE_MAX_COLOR_BUFS][4][QUAD_SIZE];
+ unsigned i;
+
+ assert(sizeof(quad->outputs.color) == sizeof(tmp));
+ assert(softpipe->framebuffer.num_cbufs <= PIPE_MAX_COLOR_BUFS);
+
+ /* make copy of original colors since they can get modified
+ * by blending and masking.
+ * XXX we won't have to do this if the fragment program actually emits
+ * N separate colors and we're drawing to N color buffers (MRT).
+ * But if we emitted one color and glDrawBuffer(GL_FRONT_AND_BACK) is
+ * in effect, we need to save/restore colors like this.
+ */
+ memcpy(tmp, quad->outputs.color, sizeof(tmp));
+
+ for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) {
+ /* set current cbuffer */
+#if 0 /* obsolete & going away */
+ softpipe->current_cbuf = i;
+#endif
+
+ /* pass blended quad to next stage */
+ qs->next->run(qs->next, quad);
+
+ /* restore quad's colors for next buffer */
+ memcpy(quad->outputs.color, tmp, sizeof(tmp));
+ }
+}
+
+
+static void cbuf_loop_begin(struct quad_stage *qs)
+{
+ qs->next->begin(qs->next);
+}
+
+
+static void cbuf_loop_destroy(struct quad_stage *qs)
+{
+ FREE( qs );
+}
+
+
+/**
+ * Create the colorbuffer loop stage.
+ * This is used to implement multiple render targets and GL_FRONT_AND_BACK
+ * rendering.
+ */
+struct quad_stage *sp_quad_bufloop_stage( struct softpipe_context *softpipe )
+{
+ struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
+
+ stage->softpipe = softpipe;
+ stage->begin = cbuf_loop_begin;
+ stage->run = cbuf_loop_quad;
+ stage->destroy = cbuf_loop_destroy;
+
+ return stage;
+}
+
diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c
new file mode 100644
index 0000000000..f32bdfab78
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c
@@ -0,0 +1,116 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \brief quad colormask stage
+ * \author Brian Paul
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "sp_context.h"
+#include "sp_headers.h"
+#include "sp_surface.h"
+#include "sp_quad.h"
+#include "sp_tile_cache.h"
+
+
+
+/**
+ * XXX colormask could be rolled into blending...
+ */
+static void
+colormask_quad(struct quad_stage *qs, struct quad_header *quad)
+{
+ struct softpipe_context *softpipe = qs->softpipe;
+ uint cbuf;
+
+ /* loop over colorbuffer outputs */
+ for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+ float dest[4][QUAD_SIZE];
+ struct softpipe_cached_tile *tile
+ = sp_get_cached_tile(softpipe,
+ softpipe->cbuf_cache[cbuf],
+ quad->input.x0, quad->input.y0);
+ float (*quadColor)[4] = quad->output.color[cbuf];
+ uint i, j;
+
+ /* get/swizzle dest colors */
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
+ int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
+ for (i = 0; i < 4; i++) {
+ dest[i][j] = tile->data.color[y][x][i];
+ }
+ }
+
+ /* R */
+ if (!(softpipe->blend->colormask & PIPE_MASK_R))
+ COPY_4V(quadColor[0], dest[0]);
+
+ /* G */
+ if (!(softpipe->blend->colormask & PIPE_MASK_G))
+ COPY_4V(quadColor[1], dest[1]);
+
+ /* B */
+ if (!(softpipe->blend->colormask & PIPE_MASK_B))
+ COPY_4V(quadColor[2], dest[2]);
+
+ /* A */
+ if (!(softpipe->blend->colormask & PIPE_MASK_A))
+ COPY_4V(quadColor[3], dest[3]);
+ }
+
+ /* pass quad to next stage */
+ qs->next->run(qs->next, quad);
+}
+
+
+static void colormask_begin(struct quad_stage *qs)
+{
+ qs->next->begin(qs->next);
+}
+
+
+static void colormask_destroy(struct quad_stage *qs)
+{
+ FREE( qs );
+}
+
+
+struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe )
+{
+ struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
+
+ stage->softpipe = softpipe;
+ stage->begin = colormask_begin;
+ stage->run = colormask_quad;
+ stage->destroy = colormask_destroy;
+
+ return stage;
+}
diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c
new file mode 100644
index 0000000000..ee29aa7dfe
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c
@@ -0,0 +1,93 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * \brief Apply AA coverage to quad alpha valus
+ * \author Brian Paul
+ */
+
+
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "sp_context.h"
+#include "sp_headers.h"
+#include "sp_quad.h"
+
+
+/**
+ * Multiply quad's alpha values by the fragment coverage.
+ */
+static void
+coverage_quad(struct quad_stage *qs, struct quad_header *quad)
+{
+ struct softpipe_context *softpipe = qs->softpipe;
+
+ if ((softpipe->rasterizer->poly_smooth && quad->input.prim == PRIM_TRI) ||
+ (softpipe->rasterizer->line_smooth && quad->input.prim == PRIM_LINE) ||
+ (softpipe->rasterizer->point_smooth && quad->input.prim == PRIM_POINT)) {
+ uint cbuf;
+
+ /* loop over colorbuffer outputs */
+ for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+ float (*quadColor)[4] = quad->output.color[cbuf];
+ unsigned j;
+ for (j = 0; j < QUAD_SIZE; j++) {
+ assert(quad->input.coverage[j] >= 0.0);
+ assert(quad->input.coverage[j] <= 1.0);
+ quadColor[3][j] *= quad->input.coverage[j];
+ }
+ }
+ }
+
+ qs->next->run(qs->next, quad);
+}
+
+
+static void coverage_begin(struct quad_stage *qs)
+{
+ qs->next->begin(qs->next);
+}
+
+
+static void coverage_destroy(struct quad_stage *qs)
+{
+ FREE( qs );
+}
+
+
+struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe )
+{
+ struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
+
+ stage->softpipe = softpipe;
+ stage->begin = coverage_begin;
+ stage->run = coverage_quad;
+ stage->destroy = coverage_destroy;
+
+ return stage;
+}
diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
new file mode 100644
index 0000000000..523bd3e080
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
@@ -0,0 +1,290 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \brief Quad depth testing
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "sp_context.h"
+#include "sp_headers.h"
+#include "sp_surface.h"
+#include "sp_quad.h"
+#include "sp_tile_cache.h"
+
+
+/**
+ * Do depth testing for a quad.
+ * Not static since it's used by the stencil code.
+ */
+
+/*
+ * To increase efficiency, we should probably have multiple versions
+ * of this function that are specifically for Z16, Z32 and FP Z buffers.
+ * Try to effectively do that with codegen...
+ */
+
+void
+sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
+{
+ struct softpipe_context *softpipe = qs->softpipe;
+ struct pipe_surface *ps = softpipe->framebuffer.zsbuf;
+ const enum pipe_format format = ps->format;
+ unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */
+ unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */
+ unsigned zmask = 0;
+ unsigned j;
+ struct softpipe_cached_tile *tile
+ = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0);
+
+ assert(ps); /* shouldn't get here if there's no zbuffer */
+
+ /*
+ * Convert quad's float depth values to int depth values (qzzzz).
+ * If the Z buffer stores integer values, we _have_ to do the depth
+ * compares with integers (not floats). Otherwise, the float->int->float
+ * conversion of Z values (which isn't an identity function) will cause
+ * Z-fighting errors.
+ *
+ * Also, get the zbuffer values (bzzzz) from the cached tile.
+ */
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ {
+ float scale = 65535.0;
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
+ }
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ bzzzz[j] = tile->data.depth16[y][x];
+ }
+ }
+ break;
+ case PIPE_FORMAT_Z32_UNORM:
+ {
+ double scale = (double) (uint) ~0UL;
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
+ }
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ bzzzz[j] = tile->data.depth32[y][x];
+ }
+ }
+ break;
+ case PIPE_FORMAT_X8Z24_UNORM:
+ /* fall-through */
+ case PIPE_FORMAT_S8Z24_UNORM:
+ {
+ float scale = (float) ((1 << 24) - 1);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
+ }
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
+ }
+ }
+ break;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ /* fall-through */
+ case PIPE_FORMAT_Z24S8_UNORM:
+ {
+ float scale = (float) ((1 << 24) - 1);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
+ }
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ bzzzz[j] = tile->data.depth32[y][x] >> 8;
+ }
+ }
+ break;
+ default:
+ assert(0);
+ }
+
+ switch (softpipe->depth_stencil->depth.func) {
+ case PIPE_FUNC_NEVER:
+ /* zmask = 0 */
+ break;
+ case PIPE_FUNC_LESS:
+ /* Note this is pretty much a single sse or cell instruction.
+ * Like this: quad->mask &= (quad->outputs.depth < zzzz);
+ */
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (qzzzz[j] < bzzzz[j])
+ zmask |= 1 << j;
+ }
+ break;
+ case PIPE_FUNC_EQUAL:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (qzzzz[j] == bzzzz[j])
+ zmask |= 1 << j;
+ }
+ break;
+ case PIPE_FUNC_LEQUAL:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (qzzzz[j] <= bzzzz[j])
+ zmask |= (1 << j);
+ }
+ break;
+ case PIPE_FUNC_GREATER:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (qzzzz[j] > bzzzz[j])
+ zmask |= (1 << j);
+ }
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (qzzzz[j] != bzzzz[j])
+ zmask |= (1 << j);
+ }
+ break;
+ case PIPE_FUNC_GEQUAL:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (qzzzz[j] >= bzzzz[j])
+ zmask |= (1 << j);
+ }
+ break;
+ case PIPE_FUNC_ALWAYS:
+ zmask = MASK_ALL;
+ break;
+ default:
+ assert(0);
+ }
+
+ quad->inout.mask &= zmask;
+
+ if (softpipe->depth_stencil->depth.writemask) {
+
+ /* This is also efficient with sse / spe instructions:
+ */
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (quad->inout.mask & (1 << j)) {
+ bzzzz[j] = qzzzz[j];
+ }
+ }
+
+ /* put updated Z values back into cached tile */
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ tile->data.depth16[y][x] = (ushort) bzzzz[j];
+ }
+ break;
+ case PIPE_FORMAT_X8Z24_UNORM:
+ /* fall-through */
+ /* (yes, this falls through to a different case than above) */
+ case PIPE_FORMAT_Z32_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ tile->data.depth32[y][x] = bzzzz[j];
+ }
+ break;
+ case PIPE_FORMAT_S8Z24_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ uint s8z24 = tile->data.depth32[y][x];
+ s8z24 = (s8z24 & 0xff000000) | bzzzz[j];
+ tile->data.depth32[y][x] = s8z24;
+ }
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ uint z24s8 = tile->data.depth32[y][x];
+ z24s8 = (z24s8 & 0xff) | (bzzzz[j] << 8);
+ tile->data.depth32[y][x] = z24s8;
+ }
+ break;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ tile->data.depth32[y][x] = bzzzz[j] << 8;
+ }
+ break;
+ default:
+ assert(0);
+ }
+ }
+}
+
+
+static void
+depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
+{
+ sp_depth_test_quad(qs, quad);
+
+ if (quad->inout.mask)
+ qs->next->run(qs->next, quad);
+}
+
+
+static void depth_test_begin(struct quad_stage *qs)
+{
+ qs->next->begin(qs->next);
+}
+
+
+static void depth_test_destroy(struct quad_stage *qs)
+{
+ FREE( qs );
+}
+
+
+struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe )
+{
+ struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
+
+ stage->softpipe = softpipe;
+ stage->begin = depth_test_begin;
+ stage->run = depth_test_quad;
+ stage->destroy = depth_test_destroy;
+
+ return stage;
+}
diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c
new file mode 100644
index 0000000000..6e2dde304e
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_quad_earlyz.c
@@ -0,0 +1,88 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \brief Quad early-z testing
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "sp_headers.h"
+#include "sp_quad.h"
+
+
+/**
+ * All this stage does is compute the quad's Z values (which is normally
+ * done by the shading stage).
+ * The next stage will do the actual depth test.
+ */
+static void
+earlyz_quad(
+ struct quad_stage *qs,
+ struct quad_header *quad )
+{
+ const float fx = (float) quad->input.x0;
+ const float fy = (float) quad->input.y0;
+ const float dzdx = quad->posCoef->dadx[2];
+ const float dzdy = quad->posCoef->dady[2];
+ const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
+
+ quad->output.depth[0] = z0;
+ quad->output.depth[1] = z0 + dzdx;
+ quad->output.depth[2] = z0 + dzdy;
+ quad->output.depth[3] = z0 + dzdx + dzdy;
+
+ qs->next->run( qs->next, quad );
+}
+
+static void
+earlyz_begin(
+ struct quad_stage *qs )
+{
+ qs->next->begin( qs->next );
+}
+
+static void
+earlyz_destroy(
+ struct quad_stage *qs )
+{
+ FREE( qs );
+}
+
+struct quad_stage *
+sp_quad_earlyz_stage(
+ struct softpipe_context *softpipe )
+{
+ struct quad_stage *stage = CALLOC_STRUCT( quad_stage );
+
+ stage->softpipe = softpipe;
+ stage->begin = earlyz_begin;
+ stage->run = earlyz_quad;
+ stage->destroy = earlyz_destroy;
+
+ return stage;
+}
diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c
new file mode 100644
index 0000000000..5dacbbe55f
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@@ -0,0 +1,189 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * Copyright 2008 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Vertices are just an array of floats, with all the attributes
+ * packed. We currently assume a layout like:
+ *
+ * attr[0][0..3] - window position
+ * attr[1..n][0..3] - remaining attributes.
+ *
+ * Attributes are assumed to be 4 floats wide but are packed so that
+ * all the enabled attributes run contiguously.
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "sp_context.h"
+#include "sp_state.h"
+#include "sp_headers.h"
+#include "sp_quad.h"
+#include "sp_texture.h"
+#include "sp_tex_sample.h"
+
+
+struct quad_shade_stage
+{
+ struct quad_stage stage; /**< base class */
+ struct tgsi_exec_machine machine;
+ struct tgsi_exec_vector *inputs, *outputs;
+};
+
+
+/** cast wrapper */
+static INLINE struct quad_shade_stage *
+quad_shade_stage(struct quad_stage *qs)
+{
+ return (struct quad_shade_stage *) qs;
+}
+
+
+
+/**
+ * Execute fragment shader for the four fragments in the quad.
+ */
+static void
+shade_quad(
+ struct quad_stage *qs,
+ struct quad_header *quad )
+{
+ struct quad_shade_stage *qss = quad_shade_stage( qs );
+ struct softpipe_context *softpipe = qs->softpipe;
+ struct tgsi_exec_machine *machine = &qss->machine;
+ boolean z_written;
+
+ /* Consts do not require 16 byte alignment. */
+ machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
+
+ machine->InterpCoefs = quad->coef;
+
+ /* run shader */
+ quad->inout.mask &= softpipe->fs->run( softpipe->fs,
+ &qss->machine,
+ quad );
+
+ /* store outputs */
+ z_written = FALSE;
+ {
+ const ubyte *sem_name = softpipe->fs->info.output_semantic_name;
+ const ubyte *sem_index = softpipe->fs->info.output_semantic_index;
+ const uint n = qss->stage.softpipe->fs->info.num_outputs;
+ uint i;
+ for (i = 0; i < n; i++) {
+ switch (sem_name[i]) {
+ case TGSI_SEMANTIC_COLOR:
+ {
+ uint cbuf = sem_index[i];
+ memcpy(quad->output.color[cbuf],
+ &machine->Outputs[i].xyzw[0].f[0],
+ sizeof(quad->output.color[0]) );
+ }
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ {
+ uint j;
+ for (j = 0; j < 4; j++) {
+ quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j];
+ }
+ z_written = TRUE;
+ }
+ break;
+ }
+ }
+ }
+
+ if (!z_written) {
+ /* compute Z values now, as in the quad earlyz stage */
+ /* XXX we should really only do this if the earlyz stage is not used */
+ const float fx = (float) quad->input.x0;
+ const float fy = (float) quad->input.y0;
+ const float dzdx = quad->posCoef->dadx[2];
+ const float dzdy = quad->posCoef->dady[2];
+ const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
+
+ quad->output.depth[0] = z0;
+ quad->output.depth[1] = z0 + dzdx;
+ quad->output.depth[2] = z0 + dzdy;
+ quad->output.depth[3] = z0 + dzdx + dzdy;
+ }
+
+ /* shader may cull fragments */
+ if( quad->inout.mask ) {
+ qs->next->run( qs->next, quad );
+ }
+}
+
+/**
+ * Per-primitive (or per-begin?) setup
+ */
+static void shade_begin(struct quad_stage *qs)
+{
+ struct quad_shade_stage *qss = quad_shade_stage(qs);
+ struct softpipe_context *softpipe = qs->softpipe;
+
+ softpipe->fs->prepare( softpipe->fs,
+ &qss->machine,
+ (struct tgsi_sampler **)
+ softpipe->tgsi.frag_samplers_list );
+
+ qs->next->begin(qs->next);
+}
+
+
+static void shade_destroy(struct quad_stage *qs)
+{
+ struct quad_shade_stage *qss = (struct quad_shade_stage *) qs;
+
+ tgsi_exec_machine_free_data(&qss->machine);
+ FREE( qss->inputs );
+ FREE( qss->outputs );
+ FREE( qs );
+}
+
+
+struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe )
+{
+ struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage);
+
+ /* allocate storage for program inputs/outputs, aligned to 16 bytes */
+ qss->inputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->inputs) + 16);
+ qss->outputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->outputs) + 16);
+ qss->machine.Inputs = align16(qss->inputs);
+ qss->machine.Outputs = align16(qss->outputs);
+
+ qss->stage.softpipe = softpipe;
+ qss->stage.begin = shade_begin;
+ qss->stage.run = shade_quad;
+ qss->stage.destroy = shade_destroy;
+
+ tgsi_exec_machine_init( &qss->machine );
+
+ return &qss->stage;
+}
diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c
new file mode 100644
index 0000000000..169bd82876
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_quad_occlusion.c
@@ -0,0 +1,85 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * \brief Quad occlusion counter stage
+ * \author Brian Paul
+ */
+
+
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "sp_context.h"
+#include "sp_headers.h"
+#include "sp_surface.h"
+#include "sp_quad.h"
+
+static unsigned count_bits( unsigned val )
+{
+ unsigned i;
+
+ for (i = 0; val ; val >>= 1)
+ i += (val & 1);
+
+ return i;
+}
+
+static void
+occlusion_count_quad(struct quad_stage *qs, struct quad_header *quad)
+{
+ struct softpipe_context *softpipe = qs->softpipe;
+
+ softpipe->occlusion_count += count_bits(quad->inout.mask);
+
+ qs->next->run(qs->next, quad);
+}
+
+
+static void occlusion_begin(struct quad_stage *qs)
+{
+ qs->next->begin(qs->next);
+}
+
+
+static void occlusion_destroy(struct quad_stage *qs)
+{
+ FREE( qs );
+}
+
+
+struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe )
+{
+ struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
+
+ stage->softpipe = softpipe;
+ stage->begin = occlusion_begin;
+ stage->run = occlusion_count_quad;
+ stage->destroy = occlusion_destroy;
+
+ return stage;
+}
diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c
new file mode 100644
index 0000000000..b7aac7f84a
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_quad_output.c
@@ -0,0 +1,103 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_memory.h"
+#include "sp_context.h"
+#include "sp_headers.h"
+#include "sp_surface.h"
+#include "sp_quad.h"
+#include "sp_tile_cache.h"
+
+
+/**
+ * Last step of quad processing: write quad colors to the framebuffer,
+ * taking mask into account.
+ */
+static void
+output_quad(struct quad_stage *qs, struct quad_header *quad)
+{
+ /* in-tile pos: */
+ const int itx = quad->input.x0 % TILE_SIZE;
+ const int ity = quad->input.y0 % TILE_SIZE;
+
+ struct softpipe_context *softpipe = qs->softpipe;
+ uint cbuf;
+
+ /* loop over colorbuffer outputs */
+ for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+ struct softpipe_cached_tile *tile
+ = sp_get_cached_tile(softpipe,
+ softpipe->cbuf_cache[cbuf],
+ quad->input.x0, quad->input.y0);
+ float (*quadColor)[4] = quad->output.color[cbuf];
+ int i, j;
+
+ /* get/swizzle dest colors */
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (quad->inout.mask & (1 << j)) {
+ int x = itx + (j & 1);
+ int y = ity + (j >> 1);
+ for (i = 0; i < 4; i++) { /* loop over color chans */
+ tile->data.color[y][x][i] = quadColor[i][j];
+ }
+ if (0) {
+ debug_printf("sp write pixel %d,%d: %g, %g, %g\n",
+ quad->input.x0 + x,
+ quad->input.y0 + y,
+ quadColor[0][j],
+ quadColor[1][j],
+ quadColor[2][j]);
+ }
+ }
+ }
+ }
+}
+
+
+static void output_begin(struct quad_stage *qs)
+{
+ assert(qs->next == NULL);
+}
+
+
+static void output_destroy(struct quad_stage *qs)
+{
+ FREE( qs );
+}
+
+
+struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe )
+{
+ struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
+
+ stage->softpipe = softpipe;
+ stage->begin = output_begin;
+ stage->run = output_quad;
+ stage->destroy = output_destroy;
+
+ return stage;
+}
diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c
new file mode 100644
index 0000000000..abb5487748
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c
@@ -0,0 +1,352 @@
+
+/**
+ * \brief Quad stencil testing
+ */
+
+
+#include "sp_context.h"
+#include "sp_headers.h"
+#include "sp_surface.h"
+#include "sp_tile_cache.h"
+#include "sp_quad.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+
+
+/** Only 8-bit stencil supported */
+#define STENCIL_MAX 0xff
+
+
+/**
+ * Do the basic stencil test (compare stencil buffer values against the
+ * reference value.
+ *
+ * \param stencilVals the stencil values from the stencil buffer
+ * \param func the stencil func (PIPE_FUNC_x)
+ * \param ref the stencil reference value
+ * \param valMask the stencil value mask indicating which bits of the stencil
+ * values and ref value are to be used.
+ * \return mask indicating which pixels passed the stencil test
+ */
+static unsigned
+do_stencil_test(const ubyte stencilVals[QUAD_SIZE], unsigned func,
+ unsigned ref, unsigned valMask)
+{
+ unsigned passMask = 0x0;
+ unsigned j;
+
+ ref &= valMask;
+
+ switch (func) {
+ case PIPE_FUNC_NEVER:
+ /* passMask = 0x0 */
+ break;
+ case PIPE_FUNC_LESS:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (ref < (stencilVals[j] & valMask)) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_EQUAL:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (ref == (stencilVals[j] & valMask)) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_LEQUAL:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (ref <= (stencilVals[j] & valMask)) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_GREATER:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (ref > (stencilVals[j] & valMask)) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (ref != (stencilVals[j] & valMask)) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_GEQUAL:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (ref >= (stencilVals[j] & valMask)) {
+ passMask |= (1 << j);
+ }
+ }
+ break;
+ case PIPE_FUNC_ALWAYS:
+ passMask = MASK_ALL;
+ break;
+ default:
+ assert(0);
+ }
+
+ return passMask;
+}
+
+
+/**
+ * Apply the stencil operator to stencil values.
+ *
+ * \param stencilVals the stencil buffer values (read and written)
+ * \param mask indicates which pixels to update
+ * \param op the stencil operator (PIPE_STENCIL_OP_x)
+ * \param ref the stencil reference value
+ * \param wrtMask writemask controlling which bits are changed in the
+ * stencil values
+ */
+static void
+apply_stencil_op(ubyte stencilVals[QUAD_SIZE],
+ unsigned mask, unsigned op, ubyte ref, ubyte wrtMask)
+{
+ unsigned j;
+ ubyte newstencil[QUAD_SIZE];
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ newstencil[j] = stencilVals[j];
+ }
+
+ switch (op) {
+ case PIPE_STENCIL_OP_KEEP:
+ /* no-op */
+ break;
+ case PIPE_STENCIL_OP_ZERO:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (mask & (1 << j)) {
+ newstencil[j] = 0;
+ }
+ }
+ break;
+ case PIPE_STENCIL_OP_REPLACE:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (mask & (1 << j)) {
+ newstencil[j] = ref;
+ }
+ }
+ break;
+ case PIPE_STENCIL_OP_INCR:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (mask & (1 << j)) {
+ if (stencilVals[j] < STENCIL_MAX) {
+ newstencil[j] = stencilVals[j] + 1;
+ }
+ }
+ }
+ break;
+ case PIPE_STENCIL_OP_DECR:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (mask & (1 << j)) {
+ if (stencilVals[j] > 0) {
+ newstencil[j] = stencilVals[j] - 1;
+ }
+ }
+ }
+ break;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (mask & (1 << j)) {
+ newstencil[j] = stencilVals[j] + 1;
+ }
+ }
+ break;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (mask & (1 << j)) {
+ newstencil[j] = stencilVals[j] - 1;
+ }
+ }
+ break;
+ case PIPE_STENCIL_OP_INVERT:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ if (mask & (1 << j)) {
+ newstencil[j] = ~stencilVals[j];
+ }
+ }
+ break;
+ default:
+ assert(0);
+ }
+
+ /*
+ * update the stencil values
+ */
+ if (wrtMask != STENCIL_MAX) {
+ /* apply bit-wise stencil buffer writemask */
+ for (j = 0; j < QUAD_SIZE; j++) {
+ stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & stencilVals[j]);
+ }
+ }
+ else {
+ for (j = 0; j < QUAD_SIZE; j++) {
+ stencilVals[j] = newstencil[j];
+ }
+ }
+}
+
+
+/**
+ * Do stencil (and depth) testing. Stenciling depends on the outcome of
+ * depth testing.
+ */
+static void
+stencil_test_quad(struct quad_stage *qs, struct quad_header *quad)
+{
+ struct softpipe_context *softpipe = qs->softpipe;
+ struct pipe_surface *ps = softpipe->framebuffer.zsbuf;
+ unsigned func, zFailOp, zPassOp, failOp;
+ ubyte ref, wrtMask, valMask;
+ ubyte stencilVals[QUAD_SIZE];
+ struct softpipe_cached_tile *tile
+ = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0);
+ uint j;
+ uint face = quad->input.facing;
+
+ if (!softpipe->depth_stencil->stencil[1].enabled) {
+ /* single-sided stencil test, use front (face=0) state */
+ face = 0;
+ }
+
+ /* choose front or back face function, operator, etc */
+ /* XXX we could do these initializations once per primitive */
+ func = softpipe->depth_stencil->stencil[face].func;
+ failOp = softpipe->depth_stencil->stencil[face].fail_op;
+ zFailOp = softpipe->depth_stencil->stencil[face].zfail_op;
+ zPassOp = softpipe->depth_stencil->stencil[face].zpass_op;
+ ref = softpipe->depth_stencil->stencil[face].ref_value;
+ wrtMask = softpipe->depth_stencil->stencil[face].write_mask;
+ valMask = softpipe->depth_stencil->stencil[face].value_mask;
+
+ assert(ps); /* shouldn't get here if there's no stencil buffer */
+
+ /* get stencil values from cached tile */
+ switch (ps->format) {
+ case PIPE_FORMAT_S8Z24_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ stencilVals[j] = tile->data.depth32[y][x] >> 24;
+ }
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ stencilVals[j] = tile->data.depth32[y][x] & 0xff;
+ }
+ break;
+ case PIPE_FORMAT_S8_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ stencilVals[j] = tile->data.stencil8[y][x];
+ }
+ break;
+ default:
+ assert(0);
+ }
+
+ /* do the stencil test first */
+ {
+ unsigned passMask, failMask;
+ passMask = do_stencil_test(stencilVals, func, ref, valMask);
+ failMask = quad->inout.mask & ~passMask;
+ quad->inout.mask &= passMask;
+
+ if (failOp != PIPE_STENCIL_OP_KEEP) {
+ apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask);
+ }
+ }
+
+ if (quad->inout.mask) {
+ /* now the pixels that passed the stencil test are depth tested */
+ if (softpipe->depth_stencil->depth.enabled) {
+ const unsigned origMask = quad->inout.mask;
+
+ sp_depth_test_quad(qs, quad); /* quad->mask is updated */
+
+ /* update stencil buffer values according to z pass/fail result */
+ if (zFailOp != PIPE_STENCIL_OP_KEEP) {
+ const unsigned failMask = origMask & ~quad->inout.mask;
+ apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask);
+ }
+
+ if (zPassOp != PIPE_STENCIL_OP_KEEP) {
+ const unsigned passMask = origMask & quad->inout.mask;
+ apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask);
+ }
+ }
+ else {
+ /* no depth test, apply Zpass operator to stencil buffer values */
+ apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask);
+ }
+
+ }
+
+ /* put new stencil values into cached tile */
+ switch (ps->format) {
+ case PIPE_FORMAT_S8Z24_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ uint s8z24 = tile->data.depth32[y][x];
+ s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff);
+ tile->data.depth32[y][x] = s8z24;
+ }
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ uint z24s8 = tile->data.depth32[y][x];
+ z24s8 = (z24s8 & 0xffffff00) | stencilVals[j];
+ tile->data.depth32[y][x] = z24s8;
+ }
+ break;
+ case PIPE_FORMAT_S8_UNORM:
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
+ tile->data.stencil8[y][x] = stencilVals[j];
+ }
+ break;
+ default:
+ assert(0);
+ }
+
+ if (quad->inout.mask)
+ qs->next->run(qs->next, quad);
+}
+
+
+static void stencil_begin(struct quad_stage *qs)
+{
+ qs->next->begin(qs->next);
+}
+
+
+static void stencil_destroy(struct quad_stage *qs)
+{
+ FREE( qs );
+}
+
+
+struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe )
+{
+ struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
+
+ stage->softpipe = softpipe;
+ stage->begin = stencil_begin;
+ stage->run = stencil_test_quad;
+ stage->destroy = stencil_destroy;
+
+ return stage;
+}
diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c
new file mode 100644
index 0000000000..ccf37f6be5
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c
@@ -0,0 +1,94 @@
+
+/**
+ * quad polygon stipple stage
+ */
+
+#include "sp_context.h"
+#include "sp_headers.h"
+#include "sp_quad.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+
+
+/**
+ * Apply polygon stipple to quads produced by triangle rasterization
+ */
+static void
+stipple_quad(struct quad_stage *qs, struct quad_header *quad)
+{
+ static const uint bit31 = 1 << 31;
+ static const uint bit30 = 1 << 30;
+
+ if (quad->input.prim == PRIM_TRI) {
+ struct softpipe_context *softpipe = qs->softpipe;
+ /* need to invert Y to index into OpenGL's stipple pattern */
+ int y0, y1;
+ uint stipple0, stipple1;
+ if (softpipe->rasterizer->origin_lower_left) {
+ y0 = softpipe->framebuffer.height - 1 - quad->input.y0;
+ y1 = y0 - 1;
+ }
+ else {
+ y0 = quad->input.y0;
+ y1 = y0 + 1;
+ }
+ stipple0 = softpipe->poly_stipple.stipple[y0 % 32];
+ stipple1 = softpipe->poly_stipple.stipple[y1 % 32];
+
+#if 1
+ {
+ const int col0 = quad->input.x0 % 32;
+ if ((stipple0 & (bit31 >> col0)) == 0)
+ quad->inout.mask &= ~MASK_TOP_LEFT;
+
+ if ((stipple0 & (bit30 >> col0)) == 0)
+ quad->inout.mask &= ~MASK_TOP_RIGHT;
+
+ if ((stipple1 & (bit31 >> col0)) == 0)
+ quad->inout.mask &= ~MASK_BOTTOM_LEFT;
+
+ if ((stipple1 & (bit30 >> col0)) == 0)
+ quad->inout.mask &= ~MASK_BOTTOM_RIGHT;
+ }
+#else
+ /* We'd like to use this code, but we'd need to redefine
+ * MASK_TOP_LEFT to be (1 << 1) and MASK_TOP_RIGHT to be (1 << 0),
+ * and similarly for the BOTTOM bits. But that may have undesirable
+ * side effects elsewhere.
+ */
+ const int col0 = 30 - (quad->input.x0 % 32);
+ quad->inout.mask &= (((stipple0 >> col0) & 0x3) |
+ (((stipple1 >> col0) & 0x3) << 2));
+#endif
+ if (!quad->inout.mask)
+ return;
+ }
+
+ qs->next->run(qs->next, quad);
+}
+
+
+static void stipple_begin(struct quad_stage *qs)
+{
+ qs->next->begin(qs->next);
+}
+
+
+static void stipple_destroy(struct quad_stage *qs)
+{
+ FREE( qs );
+}
+
+
+struct quad_stage *
+sp_quad_polygon_stipple_stage( struct softpipe_context *softpipe )
+{
+ struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
+
+ stage->softpipe = softpipe;
+ stage->begin = stipple_begin;
+ stage->run = stipple_quad;
+ stage->destroy = stipple_destroy;
+
+ return stage;
+}
diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c
new file mode 100644
index 0000000000..b0d8e01426
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_query.c
@@ -0,0 +1,107 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "util/u_memory.h"
+#include "sp_context.h"
+#include "sp_query.h"
+
+struct softpipe_query {
+ uint64_t start;
+ uint64_t end;
+};
+
+
+static struct softpipe_query *softpipe_query( struct pipe_query *p )
+{
+ return (struct softpipe_query *)p;
+}
+
+static struct pipe_query *
+softpipe_create_query(struct pipe_context *pipe,
+ unsigned type)
+{
+ assert(type == PIPE_QUERY_OCCLUSION_COUNTER);
+ return (struct pipe_query *)CALLOC_STRUCT( softpipe_query );
+}
+
+
+static void
+softpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+ FREE(q);
+}
+
+
+static void
+softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct softpipe_context *softpipe = softpipe_context( pipe );
+ struct softpipe_query *sq = softpipe_query(q);
+
+ sq->start = softpipe->occlusion_count;
+}
+
+
+static void
+softpipe_end_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct softpipe_context *softpipe = softpipe_context( pipe );
+ struct softpipe_query *sq = softpipe_query(q);
+
+ sq->end = softpipe->occlusion_count;
+}
+
+
+static boolean
+softpipe_get_query_result(struct pipe_context *pipe,
+ struct pipe_query *q,
+ boolean wait,
+ uint64_t *result )
+{
+ struct softpipe_query *sq = softpipe_query(q);
+ *result = sq->end - sq->start;
+ return TRUE;
+}
+
+
+void softpipe_init_query_funcs(struct softpipe_context *softpipe )
+{
+ softpipe->pipe.create_query = softpipe_create_query;
+ softpipe->pipe.destroy_query = softpipe_destroy_query;
+ softpipe->pipe.begin_query = softpipe_begin_query;
+ softpipe->pipe.end_query = softpipe_end_query;
+ softpipe->pipe.get_query_result = softpipe_get_query_result;
+}
+
+
diff --git a/src/gallium/drivers/softpipe/sp_query.h b/src/gallium/drivers/softpipe/sp_query.h
new file mode 100644
index 0000000000..05060a4575
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_query.h
@@ -0,0 +1,39 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Keith Whitwell
+ */
+
+#ifndef SP_QUERY_H
+#define SP_QUERY_H
+
+struct softpipe_context;
+extern void softpipe_init_query_funcs(struct softpipe_context * );
+
+
+#endif /* SP_QUERY_H */
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
new file mode 100644
index 0000000000..11b08b3a82
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -0,0 +1,179 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_memory.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+
+#include "sp_texture.h"
+#include "sp_winsys.h"
+#include "sp_screen.h"
+
+
+static const char *
+softpipe_get_vendor(struct pipe_screen *screen)
+{
+ return "Tungsten Graphics, Inc.";
+}
+
+
+static const char *
+softpipe_get_name(struct pipe_screen *screen)
+{
+ return "softpipe";
+}
+
+
+static int
+softpipe_get_param(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return PIPE_MAX_SAMPLERS;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return PIPE_MAX_SAMPLERS;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 1;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 0;
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return PIPE_MAX_COLOR_BUFS;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ return 1;
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ return 1;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 12; /* max 2Kx2K */
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 8; /* max 128x128x128 */
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 12; /* max 2Kx2K */
+ default:
+ return 0;
+ }
+}
+
+
+static float
+softpipe_get_paramf(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 255.0; /* arbitrary */
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 255.0; /* arbitrary */
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 0.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 16.0; /* arbitrary */
+ default:
+ return 0;
+ }
+}
+
+
+/**
+ * Query format support for creating a texture, drawing surface, etc.
+ * \param format the format to test
+ * \param type one of PIPE_TEXTURE, PIPE_SURFACE
+ */
+static boolean
+softpipe_is_format_supported( struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage,
+ unsigned geom_flags )
+{
+ switch(format) {
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT5_RGBA:
+ return FALSE;
+ default:
+ return TRUE;
+ }
+}
+
+
+static void
+softpipe_destroy_screen( struct pipe_screen *screen )
+{
+ struct pipe_winsys *winsys = screen->winsys;
+
+ if(winsys->destroy)
+ winsys->destroy(winsys);
+
+ FREE(screen);
+}
+
+
+
+/**
+ * Create a new pipe_screen object
+ * Note: we're not presently subclassing pipe_screen (no softpipe_screen).
+ */
+struct pipe_screen *
+softpipe_create_screen(struct pipe_winsys *winsys)
+{
+ struct softpipe_screen *screen = CALLOC_STRUCT(softpipe_screen);
+
+ if (!screen)
+ return NULL;
+
+ screen->base.winsys = winsys;
+
+ screen->base.destroy = softpipe_destroy_screen;
+
+ screen->base.get_name = softpipe_get_name;
+ screen->base.get_vendor = softpipe_get_vendor;
+ screen->base.get_param = softpipe_get_param;
+ screen->base.get_paramf = softpipe_get_paramf;
+ screen->base.is_format_supported = softpipe_is_format_supported;
+
+ softpipe_init_screen_texture_funcs(&screen->base);
+
+ return &screen->base;
+}
diff --git a/src/gallium/drivers/softpipe/sp_screen.h b/src/gallium/drivers/softpipe/sp_screen.h
new file mode 100644
index 0000000000..3d4bfd3e84
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_screen.h
@@ -0,0 +1,58 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef SP_SCREEN_H
+#define SP_SCREEN_H
+
+#include "pipe/p_screen.h"
+#include "pipe/p_defines.h"
+
+
+
+struct softpipe_screen {
+ struct pipe_screen base;
+
+ /* Increments whenever textures are modified. Contexts can track
+ * this.
+ */
+ unsigned timestamp;
+};
+
+
+
+
+static INLINE struct softpipe_screen *
+softpipe_screen( struct pipe_screen *pipe )
+{
+ return (struct softpipe_screen *)pipe;
+}
+
+
+#endif /* SP_SCREEN_H */
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
new file mode 100644
index 0000000000..13d8017393
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -0,0 +1,1569 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \brief Primitive rasterization/rendering (points, lines, triangles)
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author Brian Paul
+ */
+
+#include "sp_setup.h"
+
+#include "sp_context.h"
+#include "sp_headers.h"
+#include "sp_quad.h"
+#include "sp_state.h"
+#include "sp_prim_setup.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vertex.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_thread.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+
+#define DEBUG_VERTS 0
+#define DEBUG_FRAGS 0
+
+/**
+ * Triangle edge info
+ */
+struct edge {
+ float dx; /**< X(v1) - X(v0), used only during setup */
+ float dy; /**< Y(v1) - Y(v0), used only during setup */
+ float dxdy; /**< dx/dy */
+ float sx, sy; /**< first sample point coord */
+ int lines; /**< number of lines on this edge */
+};
+
+#if SP_NUM_QUAD_THREADS > 1
+
+/* Set to 1 if you want other threads to be instantly
+ * notified of pending jobs.
+ */
+#define INSTANT_NOTEMPTY_NOTIFY 0
+
+struct thread_info
+{
+ struct setup_context *setup;
+ uint id;
+ pipe_thread handle;
+};
+
+struct quad_job;
+
+typedef void (* quad_job_routine)( struct setup_context *setup, uint thread, struct quad_job *job );
+
+struct quad_job
+{
+ struct quad_header_input input;
+ struct quad_header_inout inout;
+ quad_job_routine routine;
+};
+
+#define NUM_QUAD_JOBS 64
+
+struct quad_job_que
+{
+ struct quad_job jobs[NUM_QUAD_JOBS];
+ uint first;
+ uint last;
+ pipe_mutex que_mutex;
+ pipe_condvar que_notfull_condvar;
+ pipe_condvar que_notempty_condvar;
+ uint jobs_added;
+ uint jobs_done;
+ pipe_condvar que_done_condvar;
+};
+
+static void
+add_quad_job( struct quad_job_que *que, struct quad_header *quad, quad_job_routine routine )
+{
+#if INSTANT_NOTEMPTY_NOTIFY
+ boolean empty;
+#endif
+
+ /* Wait for empty slot, see if the que is empty.
+ */
+ pipe_mutex_lock( que->que_mutex );
+ while ((que->last + 1) % NUM_QUAD_JOBS == que->first) {
+#if !INSTANT_NOTEMPTY_NOTIFY
+ pipe_condvar_broadcast( que->que_notempty_condvar );
+#endif
+ pipe_condvar_wait( que->que_notfull_condvar, que->que_mutex );
+ }
+#if INSTANT_NOTEMPTY_NOTIFY
+ empty = que->last == que->first;
+#endif
+ que->jobs_added++;
+ pipe_mutex_unlock( que->que_mutex );
+
+ /* Submit new job.
+ */
+ que->jobs[que->last].input = quad->input;
+ que->jobs[que->last].inout = quad->inout;
+ que->jobs[que->last].routine = routine;
+ que->last = (que->last + 1) % NUM_QUAD_JOBS;
+
+#if INSTANT_NOTEMPTY_NOTIFY
+ /* If the que was empty, notify consumers there's a job to be done.
+ */
+ if (empty) {
+ pipe_mutex_lock( que->que_mutex );
+ pipe_condvar_broadcast( que->que_notempty_condvar );
+ pipe_mutex_unlock( que->que_mutex );
+ }
+#endif
+}
+
+#endif
+
+/**
+ * Triangle setup info (derived from draw_stage).
+ * Also used for line drawing (taking some liberties).
+ */
+struct setup_context {
+ struct softpipe_context *softpipe;
+
+ /* Vertices are just an array of floats making up each attribute in
+ * turn. Currently fixed at 4 floats, but should change in time.
+ * Codegen will help cope with this.
+ */
+ const float (*vmax)[4];
+ const float (*vmid)[4];
+ const float (*vmin)[4];
+ const float (*vprovoke)[4];
+
+ struct edge ebot;
+ struct edge etop;
+ struct edge emaj;
+
+ float oneoverarea;
+
+ struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
+ struct tgsi_interp_coef posCoef; /* For Z, W */
+ struct quad_header quad;
+
+#if SP_NUM_QUAD_THREADS > 1
+ struct quad_job_que que;
+ struct thread_info threads[SP_NUM_QUAD_THREADS];
+#endif
+
+ struct {
+ int left[2]; /**< [0] = row0, [1] = row1 */
+ int right[2];
+ int y;
+ unsigned y_flags;
+ unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
+ } span;
+
+#if DEBUG_FRAGS
+ uint numFragsEmitted; /**< per primitive */
+ uint numFragsWritten; /**< per primitive */
+#endif
+
+ unsigned winding; /* which winding to cull */
+};
+
+#if SP_NUM_QUAD_THREADS > 1
+
+static PIPE_THREAD_ROUTINE( quad_thread, param )
+{
+ struct thread_info *info = (struct thread_info *) param;
+ struct quad_job_que *que = &info->setup->que;
+
+ for (;;) {
+ struct quad_job job;
+ boolean full;
+
+ /* Wait for an available job.
+ */
+ pipe_mutex_lock( que->que_mutex );
+ while (que->last == que->first)
+ pipe_condvar_wait( que->que_notempty_condvar, que->que_mutex );
+
+ /* See if the que is full.
+ */
+ full = (que->last + 1) % NUM_QUAD_JOBS == que->first;
+
+ /* Take a job and remove it from que.
+ */
+ job = que->jobs[que->first];
+ que->first = (que->first + 1) % NUM_QUAD_JOBS;
+
+ /* Notify the producer if the que is not full.
+ */
+ if (full)
+ pipe_condvar_signal( que->que_notfull_condvar );
+ pipe_mutex_unlock( que->que_mutex );
+
+ job.routine( info->setup, info->id, &job );
+
+ /* Notify the producer if that's the last finished job.
+ */
+ pipe_mutex_lock( que->que_mutex );
+ que->jobs_done++;
+ if (que->jobs_added == que->jobs_done)
+ pipe_condvar_signal( que->que_done_condvar );
+ pipe_mutex_unlock( que->que_mutex );
+ }
+
+ return NULL;
+}
+
+#define WAIT_FOR_COMPLETION(setup) \
+ do {\
+ pipe_mutex_lock( setup->que.que_mutex );\
+ if (!INSTANT_NOTEMPTY_NOTIFY)\
+ pipe_condvar_broadcast( setup->que.que_notempty_condvar );\
+ while (setup->que.jobs_added != setup->que.jobs_done)\
+ pipe_condvar_wait( setup->que.que_done_condvar, setup->que.que_mutex );\
+ pipe_mutex_unlock( setup->que.que_mutex );\
+ } while (0)
+
+#else
+
+#define WAIT_FOR_COMPLETION(setup) ((void) 0)
+
+#endif
+
+/**
+ * Test if x is NaN or +/- infinity.
+ */
+static INLINE boolean
+is_inf_or_nan(float x)
+{
+ union fi tmp;
+ tmp.f = x;
+ return !(int)((unsigned int)((tmp.i & 0x7fffffff)-0x7f800000) >> 31);
+}
+
+
+static boolean cull_tri( struct setup_context *setup,
+ float det )
+{
+ if (det != 0)
+ {
+ /* if (det < 0 then Z points toward camera and triangle is
+ * counter-clockwise winding.
+ */
+ unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW;
+
+ if ((winding & setup->winding) == 0)
+ return FALSE;
+ }
+
+ /* Culled:
+ */
+ return TRUE;
+}
+
+
+
+/**
+ * Clip setup->quad against the scissor/surface bounds.
+ */
+static INLINE void
+quad_clip( struct setup_context *setup, struct quad_header *quad )
+{
+ const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
+ const int minx = (int) cliprect->minx;
+ const int maxx = (int) cliprect->maxx;
+ const int miny = (int) cliprect->miny;
+ const int maxy = (int) cliprect->maxy;
+
+ if (quad->input.x0 >= maxx ||
+ quad->input.y0 >= maxy ||
+ quad->input.x0 + 1 < minx ||
+ quad->input.y0 + 1 < miny) {
+ /* totally clipped */
+ quad->inout.mask = 0x0;
+ return;
+ }
+ if (quad->input.x0 < minx)
+ quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
+ if (quad->input.y0 < miny)
+ quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
+ if (quad->input.x0 == maxx - 1)
+ quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
+ if (quad->input.y0 == maxy - 1)
+ quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
+}
+
+
+/**
+ * Emit a quad (pass to next stage) with clipping.
+ */
+static INLINE void
+clip_emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread )
+{
+ quad_clip( setup, quad );
+ if (quad->inout.mask) {
+ struct softpipe_context *sp = setup->softpipe;
+
+ sp->quad[thread].first->run( sp->quad[thread].first, quad );
+ }
+}
+
+#if SP_NUM_QUAD_THREADS > 1
+
+static void
+clip_emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job )
+{
+ struct quad_header quad;
+
+ quad.input = job->input;
+ quad.inout = job->inout;
+ quad.coef = setup->quad.coef;
+ quad.posCoef = setup->quad.posCoef;
+ quad.nr_attrs = setup->quad.nr_attrs;
+ clip_emit_quad( setup, &quad, thread );
+}
+
+#define CLIP_EMIT_QUAD(setup) add_quad_job( &setup->que, &setup->quad, clip_emit_quad_job )
+
+#else
+
+#define CLIP_EMIT_QUAD(setup) clip_emit_quad( setup, &setup->quad, 0 )
+
+#endif
+
+/**
+ * Emit a quad (pass to next stage). No clipping is done.
+ */
+static INLINE void
+emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread )
+{
+ struct softpipe_context *sp = setup->softpipe;
+#if DEBUG_FRAGS
+ uint mask = quad->inout.mask;
+#endif
+
+#if DEBUG_FRAGS
+ if (mask & 1) setup->numFragsEmitted++;
+ if (mask & 2) setup->numFragsEmitted++;
+ if (mask & 4) setup->numFragsEmitted++;
+ if (mask & 8) setup->numFragsEmitted++;
+#endif
+ sp->quad[thread].first->run( sp->quad[thread].first, quad );
+#if DEBUG_FRAGS
+ mask = quad->inout.mask;
+ if (mask & 1) setup->numFragsWritten++;
+ if (mask & 2) setup->numFragsWritten++;
+ if (mask & 4) setup->numFragsWritten++;
+ if (mask & 8) setup->numFragsWritten++;
+#endif
+}
+
+#if SP_NUM_QUAD_THREADS > 1
+
+static void
+emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job )
+{
+ struct quad_header quad;
+
+ quad.input = job->input;
+ quad.inout = job->inout;
+ quad.coef = setup->quad.coef;
+ quad.posCoef = setup->quad.posCoef;
+ quad.nr_attrs = setup->quad.nr_attrs;
+ emit_quad( setup, &quad, thread );
+}
+
+#define EMIT_QUAD(setup,x,y,mask) do {\
+ setup->quad.input.x0 = x;\
+ setup->quad.input.y0 = y;\
+ setup->quad.inout.mask = mask;\
+ add_quad_job( &setup->que, &setup->quad, emit_quad_job );\
+ } while (0)
+
+#else
+
+#define EMIT_QUAD(setup,x,y,mask) do {\
+ setup->quad.input.x0 = x;\
+ setup->quad.input.y0 = y;\
+ setup->quad.inout.mask = mask;\
+ emit_quad( setup, &setup->quad, 0 );\
+ } while (0)
+
+#endif
+
+/**
+ * Given an X or Y coordinate, return the block/quad coordinate that it
+ * belongs to.
+ */
+static INLINE int block( int x )
+{
+ return x & ~1;
+}
+
+
+/**
+ * Render a horizontal span of quads
+ */
+static void flush_spans( struct setup_context *setup )
+{
+ const int xleft0 = setup->span.left[0];
+ const int xleft1 = setup->span.left[1];
+ const int xright0 = setup->span.right[0];
+ const int xright1 = setup->span.right[1];
+ int minleft, maxright;
+ int x;
+
+ switch (setup->span.y_flags) {
+ case 0x3:
+ /* both odd and even lines written (both quad rows) */
+ minleft = block(MIN2(xleft0, xleft1));
+ maxright = block(MAX2(xright0, xright1));
+ for (x = minleft; x <= maxright; x += 2) {
+ /* determine which of the four pixels is inside the span bounds */
+ uint mask = 0x0;
+ if (x >= xleft0 && x < xright0)
+ mask |= MASK_TOP_LEFT;
+ if (x >= xleft1 && x < xright1)
+ mask |= MASK_BOTTOM_LEFT;
+ if (x+1 >= xleft0 && x+1 < xright0)
+ mask |= MASK_TOP_RIGHT;
+ if (x+1 >= xleft1 && x+1 < xright1)
+ mask |= MASK_BOTTOM_RIGHT;
+ EMIT_QUAD( setup, x, setup->span.y, mask );
+ }
+ break;
+
+ case 0x1:
+ /* only even line written (quad top row) */
+ minleft = block(xleft0);
+ maxright = block(xright0);
+ for (x = minleft; x <= maxright; x += 2) {
+ uint mask = 0x0;
+ if (x >= xleft0 && x < xright0)
+ mask |= MASK_TOP_LEFT;
+ if (x+1 >= xleft0 && x+1 < xright0)
+ mask |= MASK_TOP_RIGHT;
+ EMIT_QUAD( setup, x, setup->span.y, mask );
+ }
+ break;
+
+ case 0x2:
+ /* only odd line written (quad bottom row) */
+ minleft = block(xleft1);
+ maxright = block(xright1);
+ for (x = minleft; x <= maxright; x += 2) {
+ uint mask = 0x0;
+ if (x >= xleft1 && x < xright1)
+ mask |= MASK_BOTTOM_LEFT;
+ if (x+1 >= xleft1 && x+1 < xright1)
+ mask |= MASK_BOTTOM_RIGHT;
+ EMIT_QUAD( setup, x, setup->span.y, mask );
+ }
+ break;
+
+ default:
+ return;
+ }
+
+ setup->span.y = 0;
+ setup->span.y_flags = 0;
+ setup->span.right[0] = 0;
+ setup->span.right[1] = 0;
+}
+
+
+#if DEBUG_VERTS
+static void print_vertex(const struct setup_context *setup,
+ const float (*v)[4])
+{
+ int i;
+ debug_printf(" Vertex: (%p)\n", v);
+ for (i = 0; i < setup->quad.nr_attrs; i++) {
+ debug_printf(" %d: %f %f %f %f\n", i,
+ v[i][0], v[i][1], v[i][2], v[i][3]);
+ }
+}
+#endif
+
+/**
+ * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise
+ */
+static boolean setup_sort_vertices( struct setup_context *setup,
+ float det,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4] )
+{
+ setup->vprovoke = v2;
+
+ /* determine bottom to top order of vertices */
+ {
+ float y0 = v0[0][1];
+ float y1 = v1[0][1];
+ float y2 = v2[0][1];
+ if (y0 <= y1) {
+ if (y1 <= y2) {
+ /* y0<=y1<=y2 */
+ setup->vmin = v0;
+ setup->vmid = v1;
+ setup->vmax = v2;
+ }
+ else if (y2 <= y0) {
+ /* y2<=y0<=y1 */
+ setup->vmin = v2;
+ setup->vmid = v0;
+ setup->vmax = v1;
+ }
+ else {
+ /* y0<=y2<=y1 */
+ setup->vmin = v0;
+ setup->vmid = v2;
+ setup->vmax = v1;
+ }
+ }
+ else {
+ if (y0 <= y2) {
+ /* y1<=y0<=y2 */
+ setup->vmin = v1;
+ setup->vmid = v0;
+ setup->vmax = v2;
+ }
+ else if (y2 <= y1) {
+ /* y2<=y1<=y0 */
+ setup->vmin = v2;
+ setup->vmid = v1;
+ setup->vmax = v0;
+ }
+ else {
+ /* y1<=y2<=y0 */
+ setup->vmin = v1;
+ setup->vmid = v2;
+ setup->vmax = v0;
+ }
+ }
+ }
+
+ setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0];
+ setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1];
+ setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
+ setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
+ setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0];
+ setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1];
+
+ /*
+ * Compute triangle's area. Use 1/area to compute partial
+ * derivatives of attributes later.
+ *
+ * The area will be the same as prim->det, but the sign may be
+ * different depending on how the vertices get sorted above.
+ *
+ * To determine whether the primitive is front or back facing we
+ * use the prim->det value because its sign is correct.
+ */
+ {
+ const float area = (setup->emaj.dx * setup->ebot.dy -
+ setup->ebot.dx * setup->emaj.dy);
+
+ setup->oneoverarea = 1.0f / area;
+
+ /*
+ debug_printf("%s one-over-area %f area %f det %f\n",
+ __FUNCTION__, setup->oneoverarea, area, det );
+ */
+ if (is_inf_or_nan(setup->oneoverarea))
+ return FALSE;
+ }
+
+ /* We need to know if this is a front or back-facing triangle for:
+ * - the GLSL gl_FrontFacing fragment attribute (bool)
+ * - two-sided stencil test
+ */
+ setup->quad.input.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
+
+ return TRUE;
+}
+
+
+/**
+ * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
+ * The value value comes from vertex[slot][i].
+ * The result will be put into setup->coef[slot].a0[i].
+ * \param slot which attribute slot
+ * \param i which component of the slot (0..3)
+ */
+static void const_coeff( struct setup_context *setup,
+ struct tgsi_interp_coef *coef,
+ uint vertSlot, uint i)
+{
+ assert(i <= 3);
+
+ coef->dadx[i] = 0;
+ coef->dady[i] = 0;
+
+ /* need provoking vertex info!
+ */
+ coef->a0[i] = setup->vprovoke[vertSlot][i];
+}
+
+
+/**
+ * Compute a0, dadx and dady for a linearly interpolated coefficient,
+ * for a triangle.
+ */
+static void tri_linear_coeff( struct setup_context *setup,
+ struct tgsi_interp_coef *coef,
+ uint vertSlot, uint i)
+{
+ float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i];
+ float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
+ float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
+ float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
+ float dadx = a * setup->oneoverarea;
+ float dady = b * setup->oneoverarea;
+
+ assert(i <= 3);
+
+ coef->dadx[i] = dadx;
+ coef->dady[i] = dady;
+
+ /* calculate a0 as the value which would be sampled for the
+ * fragment at (0,0), taking into account that we want to sample at
+ * pixel centers, in other words (0.5, 0.5).
+ *
+ * this is neat but unfortunately not a good way to do things for
+ * triangles with very large values of dadx or dady as it will
+ * result in the subtraction and re-addition from a0 of a very
+ * large number, which means we'll end up loosing a lot of the
+ * fractional bits and precision from a0. the way to fix this is
+ * to define a0 as the sample at a pixel center somewhere near vmin
+ * instead - i'll switch to this later.
+ */
+ coef->a0[i] = (setup->vmin[vertSlot][i] -
+ (dadx * (setup->vmin[0][0] - 0.5f) +
+ dady * (setup->vmin[0][1] - 0.5f)));
+
+ /*
+ debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
+ slot, "xyzw"[i],
+ setup->coef[slot].a0[i],
+ setup->coef[slot].dadx[i],
+ setup->coef[slot].dady[i]);
+ */
+}
+
+
+/**
+ * Compute a0, dadx and dady for a perspective-corrected interpolant,
+ * for a triangle.
+ * We basically multiply the vertex value by 1/w before computing
+ * the plane coefficients (a0, dadx, dady).
+ * Later, when we compute the value at a particular fragment position we'll
+ * divide the interpolated value by the interpolated W at that fragment.
+ */
+static void tri_persp_coeff( struct setup_context *setup,
+ struct tgsi_interp_coef *coef,
+ uint vertSlot, uint i)
+{
+ /* premultiply by 1/w (v[0][3] is always W):
+ */
+ float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3];
+ float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3];
+ float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3];
+ float botda = mida - mina;
+ float majda = maxa - mina;
+ float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
+ float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
+ float dadx = a * setup->oneoverarea;
+ float dady = b * setup->oneoverarea;
+
+ /*
+ debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
+ setup->vmin[vertSlot][i],
+ setup->vmid[vertSlot][i],
+ setup->vmax[vertSlot][i]
+ );
+ */
+ assert(i <= 3);
+
+ coef->dadx[i] = dadx;
+ coef->dady[i] = dady;
+ coef->a0[i] = (mina -
+ (dadx * (setup->vmin[0][0] - 0.5f) +
+ dady * (setup->vmin[0][1] - 0.5f)));
+}
+
+
+/**
+ * Special coefficient setup for gl_FragCoord.
+ * X and Y are trivial, though Y has to be inverted for OpenGL.
+ * Z and W are copied from posCoef which should have already been computed.
+ * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
+ */
+static void
+setup_fragcoord_coeff(struct setup_context *setup, uint slot)
+{
+ /*X*/
+ setup->coef[slot].a0[0] = 0;
+ setup->coef[slot].dadx[0] = 1.0;
+ setup->coef[slot].dady[0] = 0.0;
+ /*Y*/
+ if (setup->softpipe->rasterizer->origin_lower_left) {
+ /* y=0=bottom */
+ const int winHeight = setup->softpipe->framebuffer.height;
+ setup->coef[slot].a0[1] = (float) (winHeight - 1);
+ setup->coef[slot].dady[1] = -1.0;
+ }
+ else {
+ /* y=0=top */
+ setup->coef[slot].a0[1] = 0.0;
+ setup->coef[slot].dady[1] = 1.0;
+ }
+ setup->coef[slot].dadx[1] = 0.0;
+ /*Z*/
+ setup->coef[slot].a0[2] = setup->posCoef.a0[2];
+ setup->coef[slot].dadx[2] = setup->posCoef.dadx[2];
+ setup->coef[slot].dady[2] = setup->posCoef.dady[2];
+ /*W*/
+ setup->coef[slot].a0[3] = setup->posCoef.a0[3];
+ setup->coef[slot].dadx[3] = setup->posCoef.dadx[3];
+ setup->coef[slot].dady[3] = setup->posCoef.dady[3];
+}
+
+
+
+/**
+ * Compute the setup->coef[] array dadx, dady, a0 values.
+ * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
+ */
+static void setup_tri_coefficients( struct setup_context *setup )
+{
+ struct softpipe_context *softpipe = setup->softpipe;
+ const struct sp_fragment_shader *spfs = softpipe->fs;
+ const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
+ uint fragSlot;
+
+ /* z and w are done by linear interpolation:
+ */
+ tri_linear_coeff(setup, &setup->posCoef, 0, 2);
+ tri_linear_coeff(setup, &setup->posCoef, 0, 3);
+
+ /* setup interpolation for all the remaining attributes:
+ */
+ for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
+ const uint vertSlot = vinfo->attrib[fragSlot].src_index;
+ uint j;
+
+ switch (vinfo->attrib[fragSlot].interp_mode) {
+ case INTERP_CONSTANT:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+ break;
+ case INTERP_LINEAR:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+ break;
+ case INTERP_PERSPECTIVE:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+ break;
+ case INTERP_POS:
+ setup_fragcoord_coeff(setup, fragSlot);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
+ /* FOG.y = front/back facing XXX fix this */
+ setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing;
+ setup->coef[fragSlot].dadx[1] = 0.0;
+ setup->coef[fragSlot].dady[1] = 0.0;
+ }
+ }
+}
+
+
+
+static void setup_tri_edges( struct setup_context *setup )
+{
+ float vmin_x = setup->vmin[0][0] + 0.5f;
+ float vmid_x = setup->vmid[0][0] + 0.5f;
+
+ float vmin_y = setup->vmin[0][1] - 0.5f;
+ float vmid_y = setup->vmid[0][1] - 0.5f;
+ float vmax_y = setup->vmax[0][1] - 0.5f;
+
+ setup->emaj.sy = ceilf(vmin_y);
+ setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy);
+ setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy;
+ setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy;
+
+ setup->etop.sy = ceilf(vmid_y);
+ setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy);
+ setup->etop.dxdy = setup->etop.dx / setup->etop.dy;
+ setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy;
+
+ setup->ebot.sy = ceilf(vmin_y);
+ setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy);
+ setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy;
+ setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy;
+}
+
+
+/**
+ * Render the upper or lower half of a triangle.
+ * Scissoring/cliprect is applied here too.
+ */
+static void subtriangle( struct setup_context *setup,
+ struct edge *eleft,
+ struct edge *eright,
+ unsigned lines )
+{
+ const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
+ const int minx = (int) cliprect->minx;
+ const int maxx = (int) cliprect->maxx;
+ const int miny = (int) cliprect->miny;
+ const int maxy = (int) cliprect->maxy;
+ int y, start_y, finish_y;
+ int sy = (int)eleft->sy;
+
+ assert((int)eleft->sy == (int) eright->sy);
+
+ /* clip top/bottom */
+ start_y = sy;
+ finish_y = sy + lines;
+
+ if (start_y < miny)
+ start_y = miny;
+
+ if (finish_y > maxy)
+ finish_y = maxy;
+
+ start_y -= sy;
+ finish_y -= sy;
+
+ /*
+ debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
+ */
+
+ for (y = start_y; y < finish_y; y++) {
+
+ /* avoid accumulating adds as floats don't have the precision to
+ * accurately iterate large triangle edges that way. luckily we
+ * can just multiply these days.
+ *
+ * this is all drowned out by the attribute interpolation anyway.
+ */
+ int left = (int)(eleft->sx + y * eleft->dxdy);
+ int right = (int)(eright->sx + y * eright->dxdy);
+
+ /* clip left/right */
+ if (left < minx)
+ left = minx;
+ if (right > maxx)
+ right = maxx;
+
+ if (left < right) {
+ int _y = sy + y;
+ if (block(_y) != setup->span.y) {
+ flush_spans(setup);
+ setup->span.y = block(_y);
+ }
+
+ setup->span.left[_y&1] = left;
+ setup->span.right[_y&1] = right;
+ setup->span.y_flags |= 1<<(_y&1);
+ }
+ }
+
+
+ /* save the values so that emaj can be restarted:
+ */
+ eleft->sx += lines * eleft->dxdy;
+ eright->sx += lines * eright->dxdy;
+ eleft->sy += lines;
+ eright->sy += lines;
+}
+
+
+/**
+ * Recalculate prim's determinant. This is needed as we don't have
+ * get this information through the vbuf_render interface & we must
+ * calculate it here.
+ */
+static float
+calc_det( const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4] )
+{
+ /* edge vectors e = v0 - v2, f = v1 - v2 */
+ const float ex = v0[0][0] - v2[0][0];
+ const float ey = v0[0][1] - v2[0][1];
+ const float fx = v1[0][0] - v2[0][0];
+ const float fy = v1[0][1] - v2[0][1];
+
+ /* det = cross(e,f).z */
+ return ex * fy - ey * fx;
+}
+
+
+/**
+ * Do setup for triangle rasterization, then render the triangle.
+ */
+void setup_tri( struct setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4] )
+{
+ float det;
+
+#if DEBUG_VERTS
+ debug_printf("Setup triangle:\n");
+ print_vertex(setup, v0);
+ print_vertex(setup, v1);
+ print_vertex(setup, v2);
+#endif
+
+ if (setup->softpipe->no_rast)
+ return;
+
+ det = calc_det(v0, v1, v2);
+ /*
+ debug_printf("%s\n", __FUNCTION__ );
+ */
+
+#if DEBUG_FRAGS
+ setup->numFragsEmitted = 0;
+ setup->numFragsWritten = 0;
+#endif
+
+ if (cull_tri( setup, det ))
+ return;
+
+ if (!setup_sort_vertices( setup, det, v0, v1, v2 ))
+ return;
+ setup_tri_coefficients( setup );
+ setup_tri_edges( setup );
+
+ setup->quad.input.prim = PRIM_TRI;
+
+ setup->span.y = 0;
+ setup->span.y_flags = 0;
+ setup->span.right[0] = 0;
+ setup->span.right[1] = 0;
+ /* setup->span.z_mode = tri_z_mode( setup->ctx ); */
+
+ /* init_constant_attribs( setup ); */
+
+ if (setup->oneoverarea < 0.0) {
+ /* emaj on left:
+ */
+ subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines );
+ subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines );
+ }
+ else {
+ /* emaj on right:
+ */
+ subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines );
+ subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines );
+ }
+
+ flush_spans( setup );
+
+ WAIT_FOR_COMPLETION(setup);
+
+#if DEBUG_FRAGS
+ printf("Tri: %u frags emitted, %u written\n",
+ setup->numFragsEmitted,
+ setup->numFragsWritten);
+#endif
+}
+
+
+
+/**
+ * Compute a0, dadx and dady for a linearly interpolated coefficient,
+ * for a line.
+ */
+static void
+line_linear_coeff(struct setup_context *setup,
+ struct tgsi_interp_coef *coef,
+ uint vertSlot, uint i)
+{
+ const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
+ const float dadx = da * setup->emaj.dx * setup->oneoverarea;
+ const float dady = da * setup->emaj.dy * setup->oneoverarea;
+ coef->dadx[i] = dadx;
+ coef->dady[i] = dady;
+ coef->a0[i] = (setup->vmin[vertSlot][i] -
+ (dadx * (setup->vmin[0][0] - 0.5f) +
+ dady * (setup->vmin[0][1] - 0.5f)));
+}
+
+
+/**
+ * Compute a0, dadx and dady for a perspective-corrected interpolant,
+ * for a line.
+ */
+static void
+line_persp_coeff(struct setup_context *setup,
+ struct tgsi_interp_coef *coef,
+ uint vertSlot, uint i)
+{
+ /* XXX double-check/verify this arithmetic */
+ const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3];
+ const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3];
+ const float da = a1 - a0;
+ const float dadx = da * setup->emaj.dx * setup->oneoverarea;
+ const float dady = da * setup->emaj.dy * setup->oneoverarea;
+ coef->dadx[i] = dadx;
+ coef->dady[i] = dady;
+ coef->a0[i] = (setup->vmin[vertSlot][i] -
+ (dadx * (setup->vmin[0][0] - 0.5f) +
+ dady * (setup->vmin[0][1] - 0.5f)));
+}
+
+
+/**
+ * Compute the setup->coef[] array dadx, dady, a0 values.
+ * Must be called after setup->vmin,vmax are initialized.
+ */
+static INLINE boolean
+setup_line_coefficients(struct setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4])
+{
+ struct softpipe_context *softpipe = setup->softpipe;
+ const struct sp_fragment_shader *spfs = softpipe->fs;
+ const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
+ uint fragSlot;
+ float area;
+
+ /* use setup->vmin, vmax to point to vertices */
+ setup->vprovoke = v1;
+ setup->vmin = v0;
+ setup->vmax = v1;
+
+ setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
+ setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
+
+ /* NOTE: this is not really area but something proportional to it */
+ area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy;
+ if (area == 0.0f || is_inf_or_nan(area))
+ return FALSE;
+ setup->oneoverarea = 1.0f / area;
+
+ /* z and w are done by linear interpolation:
+ */
+ line_linear_coeff(setup, &setup->posCoef, 0, 2);
+ line_linear_coeff(setup, &setup->posCoef, 0, 3);
+
+ /* setup interpolation for all the remaining attributes:
+ */
+ for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
+ const uint vertSlot = vinfo->attrib[fragSlot].src_index;
+ uint j;
+
+ switch (vinfo->attrib[fragSlot].interp_mode) {
+ case INTERP_CONSTANT:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+ break;
+ case INTERP_LINEAR:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+ break;
+ case INTERP_PERSPECTIVE:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+ break;
+ case INTERP_POS:
+ setup_fragcoord_coeff(setup, fragSlot);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
+ /* FOG.y = front/back facing XXX fix this */
+ setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing;
+ setup->coef[fragSlot].dadx[1] = 0.0;
+ setup->coef[fragSlot].dady[1] = 0.0;
+ }
+ }
+ return TRUE;
+}
+
+
+/**
+ * Plot a pixel in a line segment.
+ */
+static INLINE void
+plot(struct setup_context *setup, int x, int y)
+{
+ const int iy = y & 1;
+ const int ix = x & 1;
+ const int quadX = x - ix;
+ const int quadY = y - iy;
+ const int mask = (1 << ix) << (2 * iy);
+
+ if (quadX != setup->quad.input.x0 ||
+ quadY != setup->quad.input.y0)
+ {
+ /* flush prev quad, start new quad */
+
+ if (setup->quad.input.x0 != -1)
+ CLIP_EMIT_QUAD(setup);
+
+ setup->quad.input.x0 = quadX;
+ setup->quad.input.y0 = quadY;
+ setup->quad.inout.mask = 0x0;
+ }
+
+ setup->quad.inout.mask |= mask;
+}
+
+
+/**
+ * Do setup for line rasterization, then render the line.
+ * Single-pixel width, no stipple, etc. We rely on the 'draw' module
+ * to handle stippling and wide lines.
+ */
+void
+setup_line(struct setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4])
+{
+ int x0 = (int) v0[0][0];
+ int x1 = (int) v1[0][0];
+ int y0 = (int) v0[0][1];
+ int y1 = (int) v1[0][1];
+ int dx = x1 - x0;
+ int dy = y1 - y0;
+ int xstep, ystep;
+
+#if DEBUG_VERTS
+ debug_printf("Setup line:\n");
+ print_vertex(setup, v0);
+ print_vertex(setup, v1);
+#endif
+
+ if (setup->softpipe->no_rast)
+ return;
+
+ if (dx == 0 && dy == 0)
+ return;
+
+ if (!setup_line_coefficients(setup, v0, v1))
+ return;
+
+ assert(v0[0][0] < 1.0e9);
+ assert(v0[0][1] < 1.0e9);
+ assert(v1[0][0] < 1.0e9);
+ assert(v1[0][1] < 1.0e9);
+
+ if (dx < 0) {
+ dx = -dx; /* make positive */
+ xstep = -1;
+ }
+ else {
+ xstep = 1;
+ }
+
+ if (dy < 0) {
+ dy = -dy; /* make positive */
+ ystep = -1;
+ }
+ else {
+ ystep = 1;
+ }
+
+ assert(dx >= 0);
+ assert(dy >= 0);
+
+ setup->quad.input.x0 = setup->quad.input.y0 = -1;
+ setup->quad.inout.mask = 0x0;
+ setup->quad.input.prim = PRIM_LINE;
+ /* XXX temporary: set coverage to 1.0 so the line appears
+ * if AA mode happens to be enabled.
+ */
+ setup->quad.input.coverage[0] =
+ setup->quad.input.coverage[1] =
+ setup->quad.input.coverage[2] =
+ setup->quad.input.coverage[3] = 1.0;
+
+ if (dx > dy) {
+ /*** X-major line ***/
+ int i;
+ const int errorInc = dy + dy;
+ int error = errorInc - dx;
+ const int errorDec = error - dx;
+
+ for (i = 0; i < dx; i++) {
+ plot(setup, x0, y0);
+
+ x0 += xstep;
+ if (error < 0) {
+ error += errorInc;
+ }
+ else {
+ error += errorDec;
+ y0 += ystep;
+ }
+ }
+ }
+ else {
+ /*** Y-major line ***/
+ int i;
+ const int errorInc = dx + dx;
+ int error = errorInc - dy;
+ const int errorDec = error - dy;
+
+ for (i = 0; i < dy; i++) {
+ plot(setup, x0, y0);
+
+ y0 += ystep;
+ if (error < 0) {
+ error += errorInc;
+ }
+ else {
+ error += errorDec;
+ x0 += xstep;
+ }
+ }
+ }
+
+ /* draw final quad */
+ if (setup->quad.inout.mask) {
+ CLIP_EMIT_QUAD(setup);
+ }
+
+ WAIT_FOR_COMPLETION(setup);
+}
+
+
+static void
+point_persp_coeff(struct setup_context *setup,
+ const float (*vert)[4],
+ struct tgsi_interp_coef *coef,
+ uint vertSlot, uint i)
+{
+ assert(i <= 3);
+ coef->dadx[i] = 0.0F;
+ coef->dady[i] = 0.0F;
+ coef->a0[i] = vert[vertSlot][i] * vert[0][3];
+}
+
+
+/**
+ * Do setup for point rasterization, then render the point.
+ * Round or square points...
+ * XXX could optimize a lot for 1-pixel points.
+ */
+void
+setup_point( struct setup_context *setup,
+ const float (*v0)[4] )
+{
+ struct softpipe_context *softpipe = setup->softpipe;
+ const struct sp_fragment_shader *spfs = softpipe->fs;
+ const int sizeAttr = setup->softpipe->psize_slot;
+ const float size
+ = sizeAttr > 0 ? v0[sizeAttr][0]
+ : setup->softpipe->rasterizer->point_size;
+ const float halfSize = 0.5F * size;
+ const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth;
+ const float x = v0[0][0]; /* Note: data[0] is always position */
+ const float y = v0[0][1];
+ const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
+ uint fragSlot;
+
+#if DEBUG_VERTS
+ debug_printf("Setup point:\n");
+ print_vertex(setup, v0);
+#endif
+
+ if (softpipe->no_rast)
+ return;
+
+ /* For points, all interpolants are constant-valued.
+ * However, for point sprites, we'll need to setup texcoords appropriately.
+ * XXX: which coefficients are the texcoords???
+ * We may do point sprites as textured quads...
+ *
+ * KW: We don't know which coefficients are texcoords - ultimately
+ * the choice of what interpolation mode to use for each attribute
+ * should be determined by the fragment program, using
+ * per-attribute declaration statements that include interpolation
+ * mode as a parameter. So either the fragment program will have
+ * to be adjusted for pointsprite vs normal point behaviour, or
+ * otherwise a special interpolation mode will have to be defined
+ * which matches the required behaviour for point sprites. But -
+ * the latter is not a feature of normal hardware, and as such
+ * probably should be ruled out on that basis.
+ */
+ setup->vprovoke = v0;
+
+ /* setup Z, W */
+ const_coeff(setup, &setup->posCoef, 0, 2);
+ const_coeff(setup, &setup->posCoef, 0, 3);
+
+ for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
+ const uint vertSlot = vinfo->attrib[fragSlot].src_index;
+ uint j;
+
+ switch (vinfo->attrib[fragSlot].interp_mode) {
+ case INTERP_CONSTANT:
+ /* fall-through */
+ case INTERP_LINEAR:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+ break;
+ case INTERP_PERSPECTIVE:
+ for (j = 0; j < NUM_CHANNELS; j++)
+ point_persp_coeff(setup, setup->vprovoke,
+ &setup->coef[fragSlot], vertSlot, j);
+ break;
+ case INTERP_POS:
+ setup_fragcoord_coeff(setup, fragSlot);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
+ /* FOG.y = front/back facing XXX fix this */
+ setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing;
+ setup->coef[fragSlot].dadx[1] = 0.0;
+ setup->coef[fragSlot].dady[1] = 0.0;
+ }
+ }
+
+ setup->quad.input.prim = PRIM_POINT;
+
+ if (halfSize <= 0.5 && !round) {
+ /* special case for 1-pixel points */
+ const int ix = ((int) x) & 1;
+ const int iy = ((int) y) & 1;
+ setup->quad.input.x0 = (int) x - ix;
+ setup->quad.input.y0 = (int) y - iy;
+ setup->quad.inout.mask = (1 << ix) << (2 * iy);
+ CLIP_EMIT_QUAD(setup);
+ }
+ else {
+ if (round) {
+ /* rounded points */
+ const int ixmin = block((int) (x - halfSize));
+ const int ixmax = block((int) (x + halfSize));
+ const int iymin = block((int) (y - halfSize));
+ const int iymax = block((int) (y + halfSize));
+ const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */
+ const float rmax = halfSize + 0.7071F;
+ const float rmin2 = MAX2(0.0F, rmin * rmin);
+ const float rmax2 = rmax * rmax;
+ const float cscale = 1.0F / (rmax2 - rmin2);
+ int ix, iy;
+
+ for (iy = iymin; iy <= iymax; iy += 2) {
+ for (ix = ixmin; ix <= ixmax; ix += 2) {
+ float dx, dy, dist2, cover;
+
+ setup->quad.inout.mask = 0x0;
+
+ dx = (ix + 0.5f) - x;
+ dy = (iy + 0.5f) - y;
+ dist2 = dx * dx + dy * dy;
+ if (dist2 <= rmax2) {
+ cover = 1.0F - (dist2 - rmin2) * cscale;
+ setup->quad.input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
+ setup->quad.inout.mask |= MASK_TOP_LEFT;
+ }
+
+ dx = (ix + 1.5f) - x;
+ dy = (iy + 0.5f) - y;
+ dist2 = dx * dx + dy * dy;
+ if (dist2 <= rmax2) {
+ cover = 1.0F - (dist2 - rmin2) * cscale;
+ setup->quad.input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
+ setup->quad.inout.mask |= MASK_TOP_RIGHT;
+ }
+
+ dx = (ix + 0.5f) - x;
+ dy = (iy + 1.5f) - y;
+ dist2 = dx * dx + dy * dy;
+ if (dist2 <= rmax2) {
+ cover = 1.0F - (dist2 - rmin2) * cscale;
+ setup->quad.input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
+ setup->quad.inout.mask |= MASK_BOTTOM_LEFT;
+ }
+
+ dx = (ix + 1.5f) - x;
+ dy = (iy + 1.5f) - y;
+ dist2 = dx * dx + dy * dy;
+ if (dist2 <= rmax2) {
+ cover = 1.0F - (dist2 - rmin2) * cscale;
+ setup->quad.input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
+ setup->quad.inout.mask |= MASK_BOTTOM_RIGHT;
+ }
+
+ if (setup->quad.inout.mask) {
+ setup->quad.input.x0 = ix;
+ setup->quad.input.y0 = iy;
+ CLIP_EMIT_QUAD(setup);
+ }
+ }
+ }
+ }
+ else {
+ /* square points */
+ const int xmin = (int) (x + 0.75 - halfSize);
+ const int ymin = (int) (y + 0.25 - halfSize);
+ const int xmax = xmin + (int) size;
+ const int ymax = ymin + (int) size;
+ /* XXX could apply scissor to xmin,ymin,xmax,ymax now */
+ const int ixmin = block(xmin);
+ const int ixmax = block(xmax - 1);
+ const int iymin = block(ymin);
+ const int iymax = block(ymax - 1);
+ int ix, iy;
+
+ /*
+ debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax);
+ */
+ for (iy = iymin; iy <= iymax; iy += 2) {
+ uint rowMask = 0xf;
+ if (iy < ymin) {
+ /* above the top edge */
+ rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
+ }
+ if (iy + 1 >= ymax) {
+ /* below the bottom edge */
+ rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
+ }
+
+ for (ix = ixmin; ix <= ixmax; ix += 2) {
+ uint mask = rowMask;
+
+ if (ix < xmin) {
+ /* fragment is past left edge of point, turn off left bits */
+ mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
+ }
+ if (ix + 1 >= xmax) {
+ /* past the right edge */
+ mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
+ }
+
+ setup->quad.inout.mask = mask;
+ setup->quad.input.x0 = ix;
+ setup->quad.input.y0 = iy;
+ CLIP_EMIT_QUAD(setup);
+ }
+ }
+ }
+ }
+
+ WAIT_FOR_COMPLETION(setup);
+}
+
+void setup_prepare( struct setup_context *setup )
+{
+ struct softpipe_context *sp = setup->softpipe;
+ unsigned i;
+
+ if (sp->dirty) {
+ softpipe_update_derived(sp);
+ }
+
+ /* Mark surfaces as defined now */
+ for (i = 0; i < sp->framebuffer.num_cbufs; i++){
+ if (sp->framebuffer.cbufs[i]) {
+ sp->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED;
+ }
+ }
+ if (sp->framebuffer.zsbuf) {
+ sp->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_DEFINED;
+ }
+
+ /* Note: nr_attrs is only used for debugging (vertex printing) */
+ setup->quad.nr_attrs = draw_num_vs_outputs(sp->draw);
+
+ for (i = 0; i < SP_NUM_QUAD_THREADS; i++) {
+ sp->quad[i].first->begin( sp->quad[i].first );
+ }
+
+ if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES &&
+ sp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL &&
+ sp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) {
+ /* we'll do culling */
+ setup->winding = sp->rasterizer->cull_mode;
+ }
+ else {
+ /* 'draw' will do culling */
+ setup->winding = PIPE_WINDING_NONE;
+ }
+}
+
+
+
+void setup_destroy_context( struct setup_context *setup )
+{
+ FREE( setup );
+}
+
+
+/**
+ * Create a new primitive setup/render stage.
+ */
+struct setup_context *setup_create_context( struct softpipe_context *softpipe )
+{
+ struct setup_context *setup = CALLOC_STRUCT(setup_context);
+#if SP_NUM_QUAD_THREADS > 1
+ uint i;
+#endif
+
+ setup->softpipe = softpipe;
+
+ setup->quad.coef = setup->coef;
+ setup->quad.posCoef = &setup->posCoef;
+
+#if SP_NUM_QUAD_THREADS > 1
+ setup->que.first = 0;
+ setup->que.last = 0;
+ pipe_mutex_init( setup->que.que_mutex );
+ pipe_condvar_init( setup->que.que_notfull_condvar );
+ pipe_condvar_init( setup->que.que_notempty_condvar );
+ setup->que.jobs_added = 0;
+ setup->que.jobs_done = 0;
+ pipe_condvar_init( setup->que.que_done_condvar );
+ for (i = 0; i < SP_NUM_QUAD_THREADS; i++) {
+ setup->threads[i].setup = setup;
+ setup->threads[i].id = i;
+ setup->threads[i].handle = pipe_thread_create( quad_thread, &setup->threads[i] );
+ }
+#endif
+
+ return setup;
+}
+
diff --git a/src/gallium/drivers/softpipe/sp_setup.h b/src/gallium/drivers/softpipe/sp_setup.h
new file mode 100644
index 0000000000..d54f334428
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_setup.h
@@ -0,0 +1,53 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef SP_SETUP_H
+#define SP_SETUP_H
+
+struct setup_context;
+struct softpipe_context;
+
+void
+setup_tri( struct setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4] );
+
+void
+setup_line(struct setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4]);
+
+void
+setup_point( struct setup_context *setup,
+ const float (*v0)[4] );
+
+
+struct setup_context *setup_create_context( struct softpipe_context *softpipe );
+void setup_prepare( struct setup_context *setup );
+void setup_destroy_context( struct setup_context *setup );
+
+#endif
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
new file mode 100644
index 0000000000..3eff41ffa5
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -0,0 +1,206 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef SP_STATE_H
+#define SP_STATE_H
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+
+#define SP_NEW_VIEWPORT 0x1
+#define SP_NEW_RASTERIZER 0x2
+#define SP_NEW_FS 0x4
+#define SP_NEW_BLEND 0x8
+#define SP_NEW_CLIP 0x10
+#define SP_NEW_SCISSOR 0x20
+#define SP_NEW_STIPPLE 0x40
+#define SP_NEW_FRAMEBUFFER 0x80
+#define SP_NEW_DEPTH_STENCIL_ALPHA 0x100
+#define SP_NEW_CONSTANTS 0x200
+#define SP_NEW_SAMPLER 0x400
+#define SP_NEW_TEXTURE 0x800
+#define SP_NEW_VERTEX 0x1000
+#define SP_NEW_VS 0x2000
+#define SP_NEW_QUERY 0x4000
+
+
+struct tgsi_sampler;
+struct tgsi_exec_machine;
+struct vertex_info;
+
+
+/**
+ * Subclass of pipe_shader_state (though it doesn't really need to be).
+ *
+ * This is starting to look an awful lot like a quad pipeline stage...
+ */
+struct sp_fragment_shader {
+ struct pipe_shader_state shader;
+
+ struct tgsi_shader_info info;
+
+ void (*prepare)( const struct sp_fragment_shader *shader,
+ struct tgsi_exec_machine *machine,
+ struct tgsi_sampler **samplers);
+
+ /* Run the shader - this interface will get cleaned up in the
+ * future:
+ */
+ unsigned (*run)( const struct sp_fragment_shader *shader,
+ struct tgsi_exec_machine *machine,
+ struct quad_header *quad );
+
+
+ void (*delete)( struct sp_fragment_shader * );
+};
+
+
+/** Subclass of pipe_shader_state */
+struct sp_vertex_shader {
+ struct pipe_shader_state shader; /* Note: this field not actually used */
+ struct draw_vertex_shader *draw_data;
+};
+
+
+
+void *
+softpipe_create_blend_state(struct pipe_context *,
+ const struct pipe_blend_state *);
+void softpipe_bind_blend_state(struct pipe_context *,
+ void *);
+void softpipe_delete_blend_state(struct pipe_context *,
+ void *);
+
+void *
+softpipe_create_sampler_state(struct pipe_context *,
+ const struct pipe_sampler_state *);
+void softpipe_bind_sampler_states(struct pipe_context *, unsigned, void **);
+void softpipe_delete_sampler_state(struct pipe_context *, void *);
+
+void *
+softpipe_create_depth_stencil_state(struct pipe_context *,
+ const struct pipe_depth_stencil_alpha_state *);
+void softpipe_bind_depth_stencil_state(struct pipe_context *, void *);
+void softpipe_delete_depth_stencil_state(struct pipe_context *, void *);
+
+void *
+softpipe_create_rasterizer_state(struct pipe_context *,
+ const struct pipe_rasterizer_state *);
+void softpipe_bind_rasterizer_state(struct pipe_context *, void *);
+void softpipe_delete_rasterizer_state(struct pipe_context *, void *);
+
+void softpipe_set_framebuffer_state( struct pipe_context *,
+ const struct pipe_framebuffer_state * );
+
+void softpipe_set_blend_color( struct pipe_context *pipe,
+ const struct pipe_blend_color *blend_color );
+
+void softpipe_set_clip_state( struct pipe_context *,
+ const struct pipe_clip_state * );
+
+void softpipe_set_constant_buffer(struct pipe_context *,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *buf);
+
+void *softpipe_create_fs_state(struct pipe_context *,
+ const struct pipe_shader_state *);
+void softpipe_bind_fs_state(struct pipe_context *, void *);
+void softpipe_delete_fs_state(struct pipe_context *, void *);
+void *softpipe_create_vs_state(struct pipe_context *,
+ const struct pipe_shader_state *);
+void softpipe_bind_vs_state(struct pipe_context *, void *);
+void softpipe_delete_vs_state(struct pipe_context *, void *);
+
+void softpipe_set_polygon_stipple( struct pipe_context *,
+ const struct pipe_poly_stipple * );
+
+void softpipe_set_scissor_state( struct pipe_context *,
+ const struct pipe_scissor_state * );
+
+void softpipe_set_sampler_textures( struct pipe_context *,
+ unsigned num,
+ struct pipe_texture ** );
+
+void softpipe_set_viewport_state( struct pipe_context *,
+ const struct pipe_viewport_state * );
+
+void softpipe_set_vertex_elements(struct pipe_context *,
+ unsigned count,
+ const struct pipe_vertex_element *);
+
+void softpipe_set_vertex_buffers(struct pipe_context *,
+ unsigned count,
+ const struct pipe_vertex_buffer *);
+
+
+void softpipe_update_derived( struct softpipe_context *softpipe );
+
+
+boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
+ unsigned start, unsigned count);
+
+boolean softpipe_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count);
+boolean
+softpipe_draw_range_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned mode, unsigned start, unsigned count);
+
+void
+softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags);
+
+
+void
+softpipe_map_surfaces(struct softpipe_context *sp);
+
+void
+softpipe_unmap_surfaces(struct softpipe_context *sp);
+
+void
+softpipe_map_texture_surfaces(struct softpipe_context *sp);
+
+void
+softpipe_unmap_texture_surfaces(struct softpipe_context *sp);
+
+
+struct vertex_info *
+softpipe_get_vertex_info(struct softpipe_context *softpipe);
+
+struct vertex_info *
+softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe);
+
+
+#endif
diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c
new file mode 100644
index 0000000000..384fe559af
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_state_blend.c
@@ -0,0 +1,98 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "sp_context.h"
+#include "sp_state.h"
+
+
+void *
+softpipe_create_blend_state(struct pipe_context *pipe,
+ const struct pipe_blend_state *blend)
+{
+ return mem_dup(blend, sizeof(*blend));
+}
+
+void softpipe_bind_blend_state( struct pipe_context *pipe,
+ void *blend )
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ softpipe->blend = (const struct pipe_blend_state *)blend;
+
+ softpipe->dirty |= SP_NEW_BLEND;
+}
+
+void softpipe_delete_blend_state(struct pipe_context *pipe,
+ void *blend)
+{
+ FREE( blend );
+}
+
+
+void softpipe_set_blend_color( struct pipe_context *pipe,
+ const struct pipe_blend_color *blend_color )
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ softpipe->blend_color = *blend_color;
+
+ softpipe->dirty |= SP_NEW_BLEND;
+}
+
+
+/** XXX move someday? Or consolidate all these simple state setters
+ * into one file.
+ */
+
+
+void *
+softpipe_create_depth_stencil_state(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *depth_stencil)
+{
+ return mem_dup(depth_stencil, sizeof(*depth_stencil));
+}
+
+void
+softpipe_bind_depth_stencil_state(struct pipe_context *pipe,
+ void *depth_stencil)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ softpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil;
+
+ softpipe->dirty |= SP_NEW_DEPTH_STENCIL_ALPHA;
+}
+
+void
+softpipe_delete_depth_stencil_state(struct pipe_context *pipe, void *depth)
+{
+ FREE( depth );
+}
diff --git a/src/gallium/drivers/softpipe/sp_state_clip.c b/src/gallium/drivers/softpipe/sp_state_clip.c
new file mode 100644
index 0000000000..4946c776e3
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_state_clip.c
@@ -0,0 +1,79 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "sp_context.h"
+#include "sp_state.h"
+#include "draw/draw_context.h"
+
+
+void softpipe_set_clip_state( struct pipe_context *pipe,
+ const struct pipe_clip_state *clip )
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ /* pass the clip state to the draw module */
+ draw_set_clip_state(softpipe->draw, clip);
+}
+
+
+void softpipe_set_viewport_state( struct pipe_context *pipe,
+ const struct pipe_viewport_state *viewport )
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ /* pass the viewport info to the draw module */
+ draw_set_viewport_state(softpipe->draw, viewport);
+
+ softpipe->viewport = *viewport; /* struct copy */
+ softpipe->dirty |= SP_NEW_VIEWPORT;
+}
+
+
+void softpipe_set_scissor_state( struct pipe_context *pipe,
+ const struct pipe_scissor_state *scissor )
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ draw_flush(softpipe->draw);
+
+ softpipe->scissor = *scissor; /* struct copy */
+ softpipe->dirty |= SP_NEW_SCISSOR;
+}
+
+
+void softpipe_set_polygon_stipple( struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple )
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ draw_flush(softpipe->draw);
+
+ softpipe->poly_stipple = *stipple; /* struct copy */
+ softpipe->dirty |= SP_NEW_STIPPLE;
+}
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
new file mode 100644
index 0000000000..6b6a4c3ff3
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -0,0 +1,210 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw/draw_context.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_private.h"
+#include "sp_context.h"
+#include "sp_state.h"
+
+
+/**
+ * Mark the current vertex layout as "invalid".
+ * We'll validate the vertex layout later, when we start to actually
+ * render a point or line or tri.
+ */
+static void
+invalidate_vertex_layout(struct softpipe_context *softpipe)
+{
+ softpipe->vertex_info.num_attribs = 0;
+}
+
+
+/**
+ * The vertex info describes how to convert the post-transformed vertices
+ * (simple float[][4]) used by the 'draw' module into vertices for
+ * rasterization.
+ *
+ * This function validates the vertex layout and returns a pointer to a
+ * vertex_info object.
+ */
+struct vertex_info *
+softpipe_get_vertex_info(struct softpipe_context *softpipe)
+{
+ struct vertex_info *vinfo = &softpipe->vertex_info;
+
+ if (vinfo->num_attribs == 0) {
+ /* compute vertex layout now */
+ const struct sp_fragment_shader *spfs = softpipe->fs;
+ const enum interp_mode colorInterp
+ = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
+ uint i;
+
+ if (softpipe->vbuf) {
+ /* if using the post-transform vertex buffer, tell draw_vbuf to
+ * simply emit the whole post-xform vertex as-is:
+ */
+ struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf;
+ const uint num = draw_num_vs_outputs(softpipe->draw);
+ uint i;
+
+ /* No longer any need to try and emit draw vertex_header info.
+ */
+ vinfo_vbuf->num_attribs = 0;
+ for (i = 0; i < num; i++) {
+ draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i);
+ }
+ draw_compute_vertex_size(vinfo_vbuf);
+ }
+
+ /*
+ * Loop over fragment shader inputs, searching for the matching output
+ * from the vertex shader.
+ */
+ vinfo->num_attribs = 0;
+ for (i = 0; i < spfs->info.num_inputs; i++) {
+ int src;
+ switch (spfs->info.input_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ src = draw_find_vs_output(softpipe->draw,
+ TGSI_SEMANTIC_POSITION, 0);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
+ break;
+
+ case TGSI_SEMANTIC_COLOR:
+ src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_COLOR,
+ spfs->info.input_semantic_index[i]);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
+ break;
+
+ case TGSI_SEMANTIC_FOG:
+ src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_FOG, 0);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ break;
+
+ case TGSI_SEMANTIC_GENERIC:
+ /* this includes texcoords and varying vars */
+ src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC,
+ spfs->info.input_semantic_index[i]);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+
+ softpipe->psize_slot = draw_find_vs_output(softpipe->draw,
+ TGSI_SEMANTIC_PSIZE, 0);
+ if (softpipe->psize_slot > 0) {
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT,
+ softpipe->psize_slot);
+ }
+
+ draw_compute_vertex_size(vinfo);
+ }
+
+ return vinfo;
+}
+
+
+/**
+ * Called from vbuf module.
+ *
+ * Note that there's actually two different vertex layouts in softpipe.
+ *
+ * The normal one is computed in softpipe_get_vertex_info() above and is
+ * used by the point/line/tri "setup" code.
+ *
+ * The other one (this one) is only used by the vbuf module (which is
+ * not normally used by default but used in testing). For the vbuf module,
+ * we basically want to pass-through the draw module's vertex layout as-is.
+ * When the softpipe vbuf code begins drawing, the normal vertex layout
+ * will come into play again.
+ */
+struct vertex_info *
+softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe)
+{
+ (void) softpipe_get_vertex_info(softpipe);
+ return &softpipe->vertex_info_vbuf;
+}
+
+
+/**
+ * Recompute cliprect from scissor bounds, scissor enable and surface size.
+ */
+static void
+compute_cliprect(struct softpipe_context *sp)
+{
+ uint surfWidth = sp->framebuffer.width;
+ uint surfHeight = sp->framebuffer.height;
+
+ if (sp->rasterizer->scissor) {
+ /* clip to scissor rect */
+ sp->cliprect.minx = MAX2(sp->scissor.minx, 0);
+ sp->cliprect.miny = MAX2(sp->scissor.miny, 0);
+ sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth);
+ sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight);
+ }
+ else {
+ /* clip to surface bounds */
+ sp->cliprect.minx = 0;
+ sp->cliprect.miny = 0;
+ sp->cliprect.maxx = surfWidth;
+ sp->cliprect.maxy = surfHeight;
+ }
+}
+
+
+/* Hopefully this will remain quite simple, otherwise need to pull in
+ * something like the state tracker mechanism.
+ */
+void softpipe_update_derived( struct softpipe_context *softpipe )
+{
+ if (softpipe->dirty & (SP_NEW_RASTERIZER |
+ SP_NEW_FS |
+ SP_NEW_VS))
+ invalidate_vertex_layout( softpipe );
+
+ if (softpipe->dirty & (SP_NEW_SCISSOR |
+ SP_NEW_DEPTH_STENCIL_ALPHA |
+ SP_NEW_FRAMEBUFFER))
+ compute_cliprect(softpipe);
+
+ if (softpipe->dirty & (SP_NEW_BLEND |
+ SP_NEW_DEPTH_STENCIL_ALPHA |
+ SP_NEW_FRAMEBUFFER |
+ SP_NEW_RASTERIZER |
+ SP_NEW_FS |
+ SP_NEW_QUERY))
+ sp_build_quad_pipeline(softpipe);
+
+ softpipe->dirty = 0;
+}
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
new file mode 100644
index 0000000000..e5b609cf6c
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -0,0 +1,161 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "sp_context.h"
+#include "sp_state.h"
+#include "sp_fs.h"
+
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw/draw_context.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_scan.h"
+
+
+void *
+softpipe_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+ struct sp_fragment_shader *state;
+
+ /* debug */
+ if (softpipe->dump_fs)
+ tgsi_dump(templ->tokens, 0);
+
+ /* codegen */
+ state = softpipe_create_fs_llvm( softpipe, templ );
+ if (!state) {
+ state = softpipe_create_fs_sse( softpipe, templ );
+ if (!state) {
+ state = softpipe_create_fs_exec( softpipe, templ );
+ }
+ }
+
+ assert(state);
+
+ /* get/save the summary info for this shader */
+ tgsi_scan_shader(templ->tokens, &state->info);
+
+ return state;
+}
+
+
+void
+softpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ softpipe->fs = (struct sp_fragment_shader *) fs;
+
+ softpipe->dirty |= SP_NEW_FS;
+}
+
+
+void
+softpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
+{
+ struct sp_fragment_shader *state = fs;
+
+ assert(fs != softpipe_context(pipe)->fs);
+
+ state->delete( state );
+}
+
+
+void *
+softpipe_create_vs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+ struct sp_vertex_shader *state;
+
+ state = CALLOC_STRUCT(sp_vertex_shader);
+ if (state == NULL ) {
+ return NULL;
+ }
+
+ state->draw_data = draw_create_vertex_shader(softpipe->draw, templ);
+ if (state->draw_data == NULL) {
+ FREE( state );
+ return NULL;
+ }
+
+ return state;
+}
+
+
+void
+softpipe_bind_vs_state(struct pipe_context *pipe, void *vs)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ softpipe->vs = (const struct sp_vertex_shader *)vs;
+
+ draw_bind_vertex_shader(softpipe->draw,
+ (softpipe->vs ? softpipe->vs->draw_data : NULL));
+
+ softpipe->dirty |= SP_NEW_VS;
+}
+
+
+void
+softpipe_delete_vs_state(struct pipe_context *pipe, void *vs)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ struct sp_vertex_shader *state =
+ (struct sp_vertex_shader *)vs;
+
+ draw_delete_vertex_shader(softpipe->draw, state->draw_data);
+ FREE( state );
+}
+
+
+
+void
+softpipe_set_constant_buffer(struct pipe_context *pipe,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *buf)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(index == 0);
+
+ /* note: reference counting */
+ winsys_buffer_reference(ws,
+ &softpipe->constants[shader].buffer,
+ buf ? buf->buffer : NULL);
+ softpipe->constants[shader].size = buf ? buf->size : 0;
+
+ softpipe->dirty |= SP_NEW_CONSTANTS;
+}
diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c
new file mode 100644
index 0000000000..87b7219683
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c
@@ -0,0 +1,62 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "sp_context.h"
+#include "sp_state.h"
+#include "draw/draw_context.h"
+
+
+
+void *
+softpipe_create_rasterizer_state(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *rast)
+{
+ return mem_dup(rast, sizeof(*rast));
+}
+
+void softpipe_bind_rasterizer_state(struct pipe_context *pipe,
+ void *setup)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ /* pass-through to draw module */
+ draw_set_rasterizer_state(softpipe->draw, setup);
+
+ softpipe->rasterizer = (struct pipe_rasterizer_state *)setup;
+
+ softpipe->dirty |= SP_NEW_RASTERIZER;
+}
+
+void softpipe_delete_rasterizer_state(struct pipe_context *pipe,
+ void *rasterizer)
+{
+ FREE( rasterizer );
+}
+
+
diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c
new file mode 100644
index 0000000000..99a28c0d7e
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_state_sampler.c
@@ -0,0 +1,118 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors:
+ * Brian Paul
+ */
+
+#include "util/u_memory.h"
+#include "pipe/p_inlines.h"
+
+#include "draw/draw_context.h"
+
+#include "sp_context.h"
+#include "sp_context.h"
+#include "sp_state.h"
+#include "sp_texture.h"
+#include "sp_tile_cache.h"
+#include "draw/draw_context.h"
+
+
+
+void *
+softpipe_create_sampler_state(struct pipe_context *pipe,
+ const struct pipe_sampler_state *sampler)
+{
+ return mem_dup(sampler, sizeof(*sampler));
+}
+
+
+void
+softpipe_bind_sampler_states(struct pipe_context *pipe,
+ unsigned num, void **sampler)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+ unsigned i;
+
+ assert(num <= PIPE_MAX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num == softpipe->num_samplers &&
+ !memcmp(softpipe->sampler, sampler, num * sizeof(void *)))
+ return;
+
+ draw_flush(softpipe->draw);
+
+ for (i = 0; i < num; ++i)
+ softpipe->sampler[i] = sampler[i];
+ for (i = num; i < PIPE_MAX_SAMPLERS; ++i)
+ softpipe->sampler[i] = NULL;
+
+ softpipe->num_samplers = num;
+
+ softpipe->dirty |= SP_NEW_SAMPLER;
+}
+
+
+void
+softpipe_set_sampler_textures(struct pipe_context *pipe,
+ unsigned num, struct pipe_texture **texture)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+ uint i;
+
+ assert(num <= PIPE_MAX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num == softpipe->num_textures &&
+ !memcmp(softpipe->texture, texture, num * sizeof(struct pipe_texture *)))
+ return;
+
+ draw_flush(softpipe->draw);
+
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ struct pipe_texture *tex = i < num ? texture[i] : NULL;
+
+ pipe_texture_reference(&softpipe->texture[i], tex);
+ sp_tile_cache_set_texture(pipe, softpipe->tex_cache[i], tex);
+ }
+
+ softpipe->num_textures = num;
+
+ softpipe->dirty |= SP_NEW_TEXTURE;
+}
+
+
+void
+softpipe_delete_sampler_state(struct pipe_context *pipe,
+ void *sampler)
+{
+ FREE( sampler );
+}
+
+
+
diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c
new file mode 100644
index 0000000000..b5376e522d
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_state_surface.c
@@ -0,0 +1,130 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "pipe/p_inlines.h"
+
+#include "sp_context.h"
+#include "sp_state.h"
+#include "sp_surface.h"
+#include "sp_tile_cache.h"
+
+#include "draw/draw_context.h"
+
+
+/**
+ * XXX this might get moved someday
+ * Set the framebuffer surface info: color buffers, zbuffer, stencil buffer.
+ * Here, we flush the old surfaces and update the tile cache to point to the new
+ * surfaces.
+ */
+void
+softpipe_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct softpipe_context *sp = softpipe_context(pipe);
+ uint i;
+
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ /* check if changing cbuf */
+ if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) {
+ /* flush old */
+ sp_flush_tile_cache(sp, sp->cbuf_cache[i]);
+
+ /* assign new */
+ sp->framebuffer.cbufs[i] = fb->cbufs[i];
+
+ /* update cache */
+ sp_tile_cache_set_surface(sp->cbuf_cache[i], fb->cbufs[i]);
+ }
+ }
+
+ sp->framebuffer.num_cbufs = fb->num_cbufs;
+
+ /* zbuf changing? */
+ if (sp->framebuffer.zsbuf != fb->zsbuf) {
+ /* flush old */
+ sp_flush_tile_cache(sp, sp->zsbuf_cache);
+
+ /* assign new */
+ sp->framebuffer.zsbuf = fb->zsbuf;
+
+ /* update cache */
+ sp_tile_cache_set_surface(sp->zsbuf_cache, fb->zsbuf);
+ }
+
+#if 0
+ /* XXX combined depth/stencil here */
+
+ /* sbuf changing? */
+ if (sp->framebuffer.sbuf != fb->sbuf) {
+ /* flush old */
+ sp_flush_tile_cache(sp, sp->sbuf_cache_sep);
+
+ /* assign new */
+ sp->framebuffer.sbuf = fb->sbuf;
+
+ /* update cache */
+ if (fb->sbuf != fb->zbuf) {
+ /* separate stencil buf */
+ sp->sbuf_cache = sp->sbuf_cache_sep;
+ sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf);
+ }
+ else {
+ /* combined depth/stencil */
+ sp->sbuf_cache = sp->zbuf_cache;
+ sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf);
+ }
+ }
+#endif
+
+ /* Tell draw module how deep the Z/depth buffer is */
+ {
+ int depth_bits;
+ double mrd;
+ if (sp->framebuffer.zsbuf) {
+ depth_bits = pf_get_component_bits(sp->framebuffer.zsbuf->format,
+ PIPE_FORMAT_COMP_Z);
+ }
+ else {
+ depth_bits = 0;
+ }
+ if (depth_bits > 16) {
+ mrd = 0.0000001;
+ }
+ else {
+ mrd = 0.00002;
+ }
+ draw_set_mrd(sp->draw, mrd);
+ }
+
+ sp->framebuffer.width = fb->width;
+ sp->framebuffer.height = fb->height;
+
+ sp->dirty |= SP_NEW_FRAMEBUFFER;
+}
diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c
new file mode 100644
index 0000000000..46b6991195
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_state_vertex.c
@@ -0,0 +1,73 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "sp_context.h"
+#include "sp_state.h"
+#include "sp_surface.h"
+
+#include "draw/draw_context.h"
+
+
+void
+softpipe_set_vertex_elements(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *attribs)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ assert(count <= PIPE_MAX_ATTRIBS);
+
+ memcpy(softpipe->vertex_element, attribs,
+ count * sizeof(struct pipe_vertex_element));
+ softpipe->num_vertex_elements = count;
+
+ softpipe->dirty |= SP_NEW_VERTEX;
+
+ draw_set_vertex_elements(softpipe->draw, count, attribs);
+}
+
+
+void
+softpipe_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ assert(count <= PIPE_MAX_ATTRIBS);
+
+ memcpy(softpipe->vertex_buffer, buffers, count * sizeof(buffers[0]));
+ softpipe->num_vertex_buffers = count;
+
+ softpipe->dirty |= SP_NEW_VERTEX;
+
+ draw_set_vertex_buffers(softpipe->draw, count, buffers);
+}
diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c
new file mode 100644
index 0000000000..6ade732698
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_surface.c
@@ -0,0 +1,38 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_rect.h"
+#include "sp_context.h"
+
+
+
+void
+sp_init_surface_functions(struct softpipe_context *sp)
+{
+ sp->pipe.surface_copy = util_surface_copy;
+ sp->pipe.surface_fill = util_surface_fill;
+}
diff --git a/src/gallium/drivers/softpipe/sp_surface.h b/src/gallium/drivers/softpipe/sp_surface.h
new file mode 100644
index 0000000000..22de3ba43f
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_surface.h
@@ -0,0 +1,42 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef SP_SURFACE_H
+#define SP_SURFACE_H
+
+
+struct softpipe_context;
+
+
+extern void
+sp_init_surface_functions(struct softpipe_context *sp);
+
+
+#endif /* SP_SURFACE_H */
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
new file mode 100644
index 0000000000..32aa5025e4
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -0,0 +1,1229 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * Copyright 2008 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Texture sampling
+ *
+ * Authors:
+ * Brian Paul
+ */
+
+#include "sp_context.h"
+#include "sp_headers.h"
+#include "sp_surface.h"
+#include "sp_texture.h"
+#include "sp_tex_sample.h"
+#include "sp_tile_cache.h"
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+
+
+/*
+ * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes
+ * see 1-pixel bands of improperly weighted linear-filtered textures.
+ * The tests/texwrap.c demo is a good test.
+ * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0.
+ * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x).
+ */
+#define FRAC(f) ((f) - util_ifloor(f))
+
+
+/**
+ * Linear interpolation macro
+ */
+static INLINE float
+lerp(float a, float v0, float v1)
+{
+ return v0 + a * (v1 - v0);
+}
+
+
+/**
+ * Do 2D/biliner interpolation of float values.
+ * v00, v10, v01 and v11 are typically four texture samples in a square/box.
+ * a and b are the horizontal and vertical interpolants.
+ * It's important that this function is inlined when compiled with
+ * optimization! If we find that's not true on some systems, convert
+ * to a macro.
+ */
+static INLINE float
+lerp_2d(float a, float b,
+ float v00, float v10, float v01, float v11)
+{
+ const float temp0 = lerp(a, v00, v10);
+ const float temp1 = lerp(a, v01, v11);
+ return lerp(b, temp0, temp1);
+}
+
+
+/**
+ * As above, but 3D interpolation of 8 values.
+ */
+static INLINE float
+lerp_3d(float a, float b, float c,
+ float v000, float v100, float v010, float v110,
+ float v001, float v101, float v011, float v111)
+{
+ const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
+ const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
+ return lerp(c, temp0, temp1);
+}
+
+
+
+/**
+ * If A is a signed integer, A % B doesn't give the right value for A < 0
+ * (in terms of texture repeat). Just casting to unsigned fixes that.
+ */
+#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B))
+
+
+/**
+ * Apply texture coord wrapping mode and return integer texture indexes
+ * for a vector of four texcoords (S or T or P).
+ * \param wrapMode PIPE_TEX_WRAP_x
+ * \param s the incoming texcoords
+ * \param size the texture image size
+ * \param icoord returns the integer texcoords
+ * \return integer texture index
+ */
+static INLINE void
+nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
+ int icoord[4])
+{
+ uint ch;
+ switch (wrapMode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ /* s limited to [0,1) */
+ /* i limited to [0,size-1] */
+ for (ch = 0; ch < 4; ch++) {
+ int i = util_ifloor(s[ch] * size);
+ icoord[ch] = REMAINDER(i, size);
+ }
+ return;
+ case PIPE_TEX_WRAP_CLAMP:
+ /* s limited to [0,1] */
+ /* i limited to [0,size-1] */
+ for (ch = 0; ch < 4; ch++) {
+ if (s[ch] <= 0.0F)
+ icoord[ch] = 0;
+ else if (s[ch] >= 1.0F)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(s[ch] * size);
+ }
+ return;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ {
+ /* s limited to [min,max] */
+ /* i limited to [0, size-1] */
+ const float min = 1.0F / (2.0F * size);
+ const float max = 1.0F - min;
+ for (ch = 0; ch < 4; ch++) {
+ if (s[ch] < min)
+ icoord[ch] = 0;
+ else if (s[ch] > max)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(s[ch] * size);
+ }
+ }
+ return;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ {
+ /* s limited to [min,max] */
+ /* i limited to [-1, size] */
+ const float min = -1.0F / (2.0F * size);
+ const float max = 1.0F - min;
+ for (ch = 0; ch < 4; ch++) {
+ if (s[ch] <= min)
+ icoord[ch] = -1;
+ else if (s[ch] >= max)
+ icoord[ch] = size;
+ else
+ icoord[ch] = util_ifloor(s[ch] * size);
+ }
+ }
+ return;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ {
+ const float min = 1.0F / (2.0F * size);
+ const float max = 1.0F - min;
+ for (ch = 0; ch < 4; ch++) {
+ const int flr = util_ifloor(s[ch]);
+ float u;
+ if (flr & 1)
+ u = 1.0F - (s[ch] - (float) flr);
+ else
+ u = s[ch] - (float) flr;
+ if (u < min)
+ icoord[ch] = 0;
+ else if (u > max)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(u * size);
+ }
+ }
+ return;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ for (ch = 0; ch < 4; ch++) {
+ /* s limited to [0,1] */
+ /* i limited to [0,size-1] */
+ const float u = fabsf(s[ch]);
+ if (u <= 0.0F)
+ icoord[ch] = 0;
+ else if (u >= 1.0F)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(u * size);
+ }
+ return;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ {
+ /* s limited to [min,max] */
+ /* i limited to [0, size-1] */
+ const float min = 1.0F / (2.0F * size);
+ const float max = 1.0F - min;
+ for (ch = 0; ch < 4; ch++) {
+ const float u = fabsf(s[ch]);
+ if (u < min)
+ icoord[ch] = 0;
+ else if (u > max)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(u * size);
+ }
+ }
+ return;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ {
+ /* s limited to [min,max] */
+ /* i limited to [0, size-1] */
+ const float min = -1.0F / (2.0F * size);
+ const float max = 1.0F - min;
+ for (ch = 0; ch < 4; ch++) {
+ const float u = fabsf(s[ch]);
+ if (u < min)
+ icoord[ch] = -1;
+ else if (u > max)
+ icoord[ch] = size;
+ else
+ icoord[ch] = util_ifloor(u * size);
+ }
+ }
+ return;
+ default:
+ assert(0);
+ }
+}
+
+
+/**
+ * Used to compute texel locations for linear sampling for four texcoords.
+ * \param wrapMode PIPE_TEX_WRAP_x
+ * \param s the texcoords
+ * \param size the texture image size
+ * \param icoord0 returns first texture indexes
+ * \param icoord1 returns second texture indexes (usually icoord0 + 1)
+ * \param w returns blend factor/weight between texture indexes
+ * \param icoord returns the computed integer texture coords
+ */
+static INLINE void
+linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
+ int icoord0[4], int icoord1[4], float w[4])
+{
+ uint ch;
+
+ switch (wrapMode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ for (ch = 0; ch < 4; ch++) {
+ float u = s[ch] * size - 0.5F;
+ icoord0[ch] = REMAINDER(util_ifloor(u), size);
+ icoord1[ch] = REMAINDER(icoord0[ch] + 1, size);
+ w[ch] = FRAC(u);
+ }
+ break;;
+ case PIPE_TEX_WRAP_CLAMP:
+ for (ch = 0; ch < 4; ch++) {
+ float u = CLAMP(s[ch], 0.0F, 1.0F);
+ u = u * size - 0.5f;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = FRAC(u);
+ }
+ break;;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ for (ch = 0; ch < 4; ch++) {
+ float u = CLAMP(s[ch], 0.0F, 1.0F);
+ u = u * size - 0.5f;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ if (icoord0[ch] < 0)
+ icoord0[ch] = 0;
+ if (icoord1[ch] >= (int) size)
+ icoord1[ch] = size - 1;
+ w[ch] = FRAC(u);
+ }
+ break;;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ {
+ const float min = -1.0F / (2.0F * size);
+ const float max = 1.0F - min;
+ for (ch = 0; ch < 4; ch++) {
+ float u = CLAMP(s[ch], min, max);
+ u = u * size - 0.5f;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = FRAC(u);
+ }
+ }
+ break;;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ for (ch = 0; ch < 4; ch++) {
+ const int flr = util_ifloor(s[ch]);
+ float u;
+ if (flr & 1)
+ u = 1.0F - (s[ch] - (float) flr);
+ else
+ u = s[ch] - (float) flr;
+ u = u * size - 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ if (icoord0[ch] < 0)
+ icoord0[ch] = 0;
+ if (icoord1[ch] >= (int) size)
+ icoord1[ch] = size - 1;
+ w[ch] = FRAC(u);
+ }
+ break;;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ for (ch = 0; ch < 4; ch++) {
+ float u = fabsf(s[ch]);
+ if (u >= 1.0F)
+ u = (float) size;
+ else
+ u *= size;
+ u -= 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = FRAC(u);
+ }
+ break;;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ for (ch = 0; ch < 4; ch++) {
+ float u = fabsf(s[ch]);
+ if (u >= 1.0F)
+ u = (float) size;
+ else
+ u *= size;
+ u -= 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ if (icoord0[ch] < 0)
+ icoord0[ch] = 0;
+ if (icoord1[ch] >= (int) size)
+ icoord1[ch] = size - 1;
+ w[ch] = FRAC(u);
+ }
+ break;;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ {
+ const float min = -1.0F / (2.0F * size);
+ const float max = 1.0F - min;
+ for (ch = 0; ch < 4; ch++) {
+ float u = fabsf(s[ch]);
+ if (u <= min)
+ u = min * size;
+ else if (u >= max)
+ u = max * size;
+ else
+ u *= size;
+ u -= 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = FRAC(u);
+ }
+ }
+ break;;
+ default:
+ assert(0);
+ }
+}
+
+
+/**
+ * For RECT textures / unnormalized texcoords
+ * Only a subset of wrap modes supported.
+ */
+static INLINE void
+nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
+ int icoord[4])
+{
+ uint ch;
+ switch (wrapMode) {
+ case PIPE_TEX_WRAP_CLAMP:
+ for (ch = 0; ch < 4; ch++) {
+ int i = util_ifloor(s[ch]);
+ icoord[ch]= CLAMP(i, 0, (int) size-1);
+ }
+ return;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ /* fall-through */
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ for (ch = 0; ch < 4; ch++) {
+ icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
+ }
+ return;
+ default:
+ assert(0);
+ }
+}
+
+
+/**
+ * For RECT textures / unnormalized texcoords.
+ * Only a subset of wrap modes supported.
+ */
+static INLINE void
+linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
+ int icoord0[4], int icoord1[4], float w[4])
+{
+ uint ch;
+ switch (wrapMode) {
+ case PIPE_TEX_WRAP_CLAMP:
+ for (ch = 0; ch < 4; ch++) {
+ /* Not exactly what the spec says, but it matches NVIDIA output */
+ float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = FRAC(u);
+ }
+ return;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ /* fall-through */
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ for (ch = 0; ch < 4; ch++) {
+ float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
+ u -= 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ if (icoord1[ch] > (int) size - 1)
+ icoord1[ch] = size - 1;
+ w[ch] = FRAC(u);
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+static unsigned
+choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
+{
+ /*
+ major axis
+ direction target sc tc ma
+ ---------- ------------------------------- --- --- ---
+ +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
+ -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
+ +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
+ -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
+ +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
+ -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
+ */
+ const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
+ unsigned face;
+ float sc, tc, ma;
+
+ if (arx > ary && arx > arz) {
+ if (rx >= 0.0F) {
+ face = PIPE_TEX_FACE_POS_X;
+ sc = -rz;
+ tc = -ry;
+ ma = arx;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_X;
+ sc = rz;
+ tc = -ry;
+ ma = arx;
+ }
+ }
+ else if (ary > arx && ary > arz) {
+ if (ry >= 0.0F) {
+ face = PIPE_TEX_FACE_POS_Y;
+ sc = rx;
+ tc = rz;
+ ma = ary;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_Y;
+ sc = rx;
+ tc = -rz;
+ ma = ary;
+ }
+ }
+ else {
+ if (rz > 0.0F) {
+ face = PIPE_TEX_FACE_POS_Z;
+ sc = rx;
+ tc = -ry;
+ ma = arz;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_Z;
+ sc = -rx;
+ tc = -ry;
+ ma = arz;
+ }
+ }
+
+ *newS = ( sc / ma + 1.0F ) * 0.5F;
+ *newT = ( tc / ma + 1.0F ) * 0.5F;
+
+ return face;
+}
+
+
+/**
+ * Examine the quad's texture coordinates to compute the partial
+ * derivatives w.r.t X and Y, then compute lambda (level of detail).
+ *
+ * This is only done for fragment shaders, not vertex shaders.
+ */
+static float
+compute_lambda(const struct pipe_texture *tex,
+ const struct pipe_sampler_state *sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias)
+{
+ float rho, lambda;
+
+ assert(sampler->normalized_coords);
+
+ assert(s);
+ {
+ float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT];
+ float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT];
+ dsdx = fabsf(dsdx);
+ dsdy = fabsf(dsdy);
+ rho = MAX2(dsdx, dsdy) * tex->width[0];
+ }
+ if (t) {
+ float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
+ float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT];
+ float max;
+ dtdx = fabsf(dtdx);
+ dtdy = fabsf(dtdy);
+ max = MAX2(dtdx, dtdy) * tex->height[0];
+ rho = MAX2(rho, max);
+ }
+ if (p) {
+ float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT];
+ float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT];
+ float max;
+ dpdx = fabsf(dpdx);
+ dpdy = fabsf(dpdy);
+ max = MAX2(dpdx, dpdy) * tex->depth[0];
+ rho = MAX2(rho, max);
+ }
+
+ lambda = util_fast_log2(rho);
+ lambda += lodbias + sampler->lod_bias;
+ lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
+
+ return lambda;
+}
+
+
+/**
+ * Do several things here:
+ * 1. Compute lambda from the texcoords, if needed
+ * 2. Determine if we're minifying or magnifying
+ * 3. If minifying, choose mipmap levels
+ * 4. Return image filter to use within mipmap images
+ * \param level0 Returns first mipmap level to sample from
+ * \param level1 Returns second mipmap level to sample from
+ * \param levelBlend Returns blend factor between levels, in [0,1]
+ * \param imgFilter Returns either the min or mag filter, depending on lambda
+ */
+static void
+choose_mipmap_levels(const struct pipe_texture *texture,
+ const struct pipe_sampler_state *sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ boolean computeLambda,
+ float lodbias,
+ unsigned *level0, unsigned *level1, float *levelBlend,
+ unsigned *imgFilter)
+{
+ if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+ /* no mipmap selection needed */
+ *level0 = *level1 = CLAMP((int) sampler->min_lod,
+ 0, (int) texture->last_level);
+
+ if (sampler->min_img_filter != sampler->mag_img_filter) {
+ /* non-mipmapped texture, but still need to determine if doing
+ * minification or magnification.
+ */
+ float lambda = compute_lambda(texture, sampler, s, t, p, lodbias);
+ if (lambda <= 0.0) {
+ *imgFilter = sampler->mag_img_filter;
+ }
+ else {
+ *imgFilter = sampler->min_img_filter;
+ }
+ }
+ else {
+ *imgFilter = sampler->mag_img_filter;
+ }
+ }
+ else {
+ float lambda;
+
+ if (computeLambda)
+ /* fragment shader */
+ lambda = compute_lambda(texture, sampler, s, t, p, lodbias);
+ else
+ /* vertex shader */
+ lambda = lodbias; /* not really a bias, but absolute LOD */
+
+ if (lambda <= 0.0) { /* XXX threshold depends on the filter */
+ /* magnifying */
+ *imgFilter = sampler->mag_img_filter;
+ *level0 = *level1 = 0;
+ }
+ else {
+ /* minifying */
+ *imgFilter = sampler->min_img_filter;
+
+ /* choose mipmap level(s) and compute the blend factor between them */
+ if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
+ /* Nearest mipmap level */
+ const int lvl = (int) (lambda + 0.5);
+ *level0 =
+ *level1 = CLAMP(lvl, 0, (int) texture->last_level);
+ }
+ else {
+ /* Linear interpolation between mipmap levels */
+ const int lvl = (int) lambda;
+ *level0 = CLAMP(lvl, 0, (int) texture->last_level);
+ *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level);
+ *levelBlend = FRAC(lambda); /* blending weight between levels */
+ }
+ }
+ }
+}
+
+
+/**
+ * Get a texel from a texture, using the texture tile cache.
+ *
+ * \param face the cube face in 0..5
+ * \param level the mipmap level
+ * \param x the x coord of texel within 2D image
+ * \param y the y coord of texel within 2D image
+ * \param z which slice of a 3D texture
+ * \param rgba the quad to put the texel/color into
+ * \param j which element of the rgba quad to write to
+ *
+ * XXX maybe move this into sp_tile_cache.c and merge with the
+ * sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1...
+ */
+static void
+get_texel(const struct tgsi_sampler *tgsi_sampler,
+ unsigned face, unsigned level, int x, int y, int z,
+ float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j)
+{
+ const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+ struct softpipe_context *sp = samp->sp;
+ const uint unit = samp->unit;
+ const struct pipe_texture *texture = sp->texture[unit];
+ const struct pipe_sampler_state *sampler = sp->sampler[unit];
+
+ if (x < 0 || x >= (int) texture->width[level] ||
+ y < 0 || y >= (int) texture->height[level] ||
+ z < 0 || z >= (int) texture->depth[level]) {
+ rgba[0][j] = sampler->border_color[0];
+ rgba[1][j] = sampler->border_color[1];
+ rgba[2][j] = sampler->border_color[2];
+ rgba[3][j] = sampler->border_color[3];
+ }
+ else {
+ const int tx = x % TILE_SIZE;
+ const int ty = y % TILE_SIZE;
+ const struct softpipe_cached_tile *tile
+ = sp_get_cached_tile_tex(sp, samp->cache,
+ x, y, z, face, level);
+ rgba[0][j] = tile->data.color[ty][tx][0];
+ rgba[1][j] = tile->data.color[ty][tx][1];
+ rgba[2][j] = tile->data.color[ty][tx][2];
+ rgba[3][j] = tile->data.color[ty][tx][3];
+ if (0)
+ {
+ debug_printf("Get texel %f %f %f %f from %s\n",
+ rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j],
+ pf_name(texture->format));
+ }
+ }
+}
+
+
+/**
+ * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
+ * When we sampled the depth texture, the depth value was put into all
+ * RGBA channels. We look at the red channel here.
+ */
+static INLINE void
+shadow_compare(uint compare_func,
+ float rgba[NUM_CHANNELS][QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ uint j)
+{
+ int k;
+ switch (compare_func) {
+ case PIPE_FUNC_LESS:
+ k = p[j] < rgba[0][j];
+ break;
+ case PIPE_FUNC_LEQUAL:
+ k = p[j] <= rgba[0][j];
+ break;
+ case PIPE_FUNC_GREATER:
+ k = p[j] > rgba[0][j];
+ break;
+ case PIPE_FUNC_GEQUAL:
+ k = p[j] >= rgba[0][j];
+ break;
+ case PIPE_FUNC_EQUAL:
+ k = p[j] == rgba[0][j];
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ k = p[j] != rgba[0][j];
+ break;
+ case PIPE_FUNC_ALWAYS:
+ k = 1;
+ break;
+ case PIPE_FUNC_NEVER:
+ k = 0;
+ break;
+ default:
+ k = 0;
+ assert(0);
+ break;
+ }
+
+ rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k;
+}
+
+
+/**
+ * Common code for sampling 1D/2D/cube textures.
+ * Could probably extend for 3D...
+ */
+static void
+sp_get_samples_2d_common(const struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ boolean computeLambda,
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE],
+ const unsigned faces[4])
+{
+ const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+ const struct softpipe_context *sp = samp->sp;
+ const uint unit = samp->unit;
+ const struct pipe_texture *texture = sp->texture[unit];
+ const struct pipe_sampler_state *sampler = sp->sampler[unit];
+ const uint compare_func = sampler->compare_func;
+ unsigned level0, level1, j, imgFilter;
+ int width, height;
+ float levelBlend;
+
+ choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias,
+ &level0, &level1, &levelBlend, &imgFilter);
+
+ assert(sampler->normalized_coords);
+
+ width = texture->width[level0];
+ height = texture->height[level0];
+
+ assert(width > 0);
+
+ switch (imgFilter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ {
+ int x[4], y[4];
+ nearest_texcoord_4(sampler->wrap_s, s, width, x);
+ nearest_texcoord_4(sampler->wrap_t, t, height, y);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ shadow_compare(compare_func, rgba, p, j);
+ }
+
+ if (level0 != level1) {
+ /* get texels from second mipmap level and blend */
+ float rgba2[4][4];
+ unsigned c;
+ x[j] /= 2;
+ y[j] /= 2;
+ get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0,
+ rgba2, j);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
+ shadow_compare(compare_func, rgba2, p, j);
+ }
+
+ for (c = 0; c < NUM_CHANNELS; c++) {
+ rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
+ }
+ }
+ }
+ }
+ break;
+ case PIPE_TEX_FILTER_LINEAR:
+ case PIPE_TEX_FILTER_ANISO:
+ {
+ int x0[4], y0[4], x1[4], y1[4];
+ float xw[4], yw[4]; /* weights */
+
+ linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
+ linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ float tx[4][4]; /* texels */
+ int c;
+ get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0);
+ get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1);
+ get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2);
+ get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ shadow_compare(compare_func, tx, p, 0);
+ shadow_compare(compare_func, tx, p, 1);
+ shadow_compare(compare_func, tx, p, 2);
+ shadow_compare(compare_func, tx, p, 3);
+ }
+
+ /* interpolate R, G, B, A */
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = lerp_2d(xw[j], yw[j],
+ tx[c][0], tx[c][1],
+ tx[c][2], tx[c][3]);
+ }
+
+ if (level0 != level1) {
+ /* get texels from second mipmap level and blend */
+ float rgba2[4][4];
+ x0[j] /= 2;
+ y0[j] /= 2;
+ x1[j] /= 2;
+ y1[j] /= 2;
+ get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0);
+ get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1);
+ get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2);
+ get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
+ shadow_compare(compare_func, tx, p, 0);
+ shadow_compare(compare_func, tx, p, 1);
+ shadow_compare(compare_func, tx, p, 2);
+ shadow_compare(compare_func, tx, p, 3);
+ }
+
+ /* interpolate R, G, B, A */
+ for (c = 0; c < 4; c++) {
+ rgba2[c][j] = lerp_2d(xw[j], yw[j],
+ tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
+ }
+
+ for (c = 0; c < NUM_CHANNELS; c++) {
+ rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
+ }
+ }
+ }
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+static INLINE void
+sp_get_samples_1d(const struct tgsi_sampler *sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ boolean computeLambda,
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ static const unsigned faces[4] = {0, 0, 0, 0};
+ static const float tzero[4] = {0, 0, 0, 0};
+ sp_get_samples_2d_common(sampler, s, tzero, NULL,
+ computeLambda, lodbias, rgba, faces);
+}
+
+
+static INLINE void
+sp_get_samples_2d(const struct tgsi_sampler *sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ boolean computeLambda,
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ static const unsigned faces[4] = {0, 0, 0, 0};
+ sp_get_samples_2d_common(sampler, s, t, p,
+ computeLambda, lodbias, rgba, faces);
+}
+
+
+static INLINE void
+sp_get_samples_3d(const struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ boolean computeLambda,
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+ const struct softpipe_context *sp = samp->sp;
+ const uint unit = samp->unit;
+ const struct pipe_texture *texture = sp->texture[unit];
+ const struct pipe_sampler_state *sampler = sp->sampler[unit];
+ /* get/map pipe_surfaces corresponding to 3D tex slices */
+ unsigned level0, level1, j, imgFilter;
+ int width, height, depth;
+ float levelBlend;
+ const uint face = 0;
+
+ choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias,
+ &level0, &level1, &levelBlend, &imgFilter);
+
+ assert(sampler->normalized_coords);
+
+ width = texture->width[level0];
+ height = texture->height[level0];
+ depth = texture->depth[level0];
+
+ assert(width > 0);
+ assert(height > 0);
+ assert(depth > 0);
+
+ switch (imgFilter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ {
+ int x[4], y[4], z[4];
+ nearest_texcoord_4(sampler->wrap_s, s, width, x);
+ nearest_texcoord_4(sampler->wrap_t, t, height, y);
+ nearest_texcoord_4(sampler->wrap_r, p, depth, z);
+ for (j = 0; j < QUAD_SIZE; j++) {
+ get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j);
+ if (level0 != level1) {
+ /* get texels from second mipmap level and blend */
+ float rgba2[4][4];
+ unsigned c;
+ x[j] /= 2;
+ y[j] /= 2;
+ z[j] /= 2;
+ get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j);
+ for (c = 0; c < NUM_CHANNELS; c++) {
+ rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]);
+ }
+ }
+ }
+ }
+ break;
+ case PIPE_TEX_FILTER_LINEAR:
+ case PIPE_TEX_FILTER_ANISO:
+ {
+ int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
+ float xw[4], yw[4], zw[4]; /* interpolation weights */
+ linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
+ linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
+ linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int c;
+ float tx0[4][4], tx1[4][4];
+ get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0);
+ get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1);
+ get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2);
+ get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3);
+ get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0);
+ get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1);
+ get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2);
+ get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3);
+
+ /* interpolate R, G, B, A */
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
+ tx0[c][0], tx0[c][1],
+ tx0[c][2], tx0[c][3],
+ tx1[c][0], tx1[c][1],
+ tx1[c][2], tx1[c][3]);
+ }
+
+ if (level0 != level1) {
+ /* get texels from second mipmap level and blend */
+ float rgba2[4][4];
+ x0[j] /= 2;
+ y0[j] /= 2;
+ z0[j] /= 2;
+ x1[j] /= 2;
+ y1[j] /= 2;
+ z1[j] /= 2;
+ get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0);
+ get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1);
+ get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2);
+ get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3);
+ get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0);
+ get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1);
+ get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2);
+ get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3);
+
+ /* interpolate R, G, B, A */
+ for (c = 0; c < 4; c++) {
+ rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j],
+ tx0[c][0], tx0[c][1],
+ tx0[c][2], tx0[c][3],
+ tx1[c][0], tx1[c][1],
+ tx1[c][2], tx1[c][3]);
+ }
+
+ /* blend mipmap levels */
+ for (c = 0; c < NUM_CHANNELS; c++) {
+ rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
+ }
+ }
+ }
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+static void
+sp_get_samples_cube(const struct tgsi_sampler *sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ boolean computeLambda,
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ unsigned faces[QUAD_SIZE], j;
+ float ssss[4], tttt[4];
+ for (j = 0; j < QUAD_SIZE; j++) {
+ faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j);
+ }
+ sp_get_samples_2d_common(sampler, ssss, tttt, NULL,
+ computeLambda, lodbias, rgba, faces);
+}
+
+
+static void
+sp_get_samples_rect(const struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ boolean computeLambda,
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+ const struct softpipe_context *sp = samp->sp;
+ const uint unit = samp->unit;
+ const struct pipe_texture *texture = sp->texture[unit];
+ const struct pipe_sampler_state *sampler = sp->sampler[unit];
+ const uint face = 0;
+ const uint compare_func = sampler->compare_func;
+ unsigned level0, level1, j, imgFilter;
+ int width, height;
+ float levelBlend;
+
+ choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias,
+ &level0, &level1, &levelBlend, &imgFilter);
+
+ /* texture RECTS cannot be mipmapped */
+ assert(level0 == level1);
+
+ width = texture->width[level0];
+ height = texture->height[level0];
+
+ assert(width > 0);
+
+ switch (imgFilter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ {
+ int x[4], y[4];
+ nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x);
+ nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y);
+ for (j = 0; j < QUAD_SIZE; j++) {
+ get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ shadow_compare(compare_func, rgba, p, j);
+ }
+ }
+ }
+ break;
+ case PIPE_TEX_FILTER_LINEAR:
+ case PIPE_TEX_FILTER_ANISO:
+ {
+ int x0[4], y0[4], x1[4], y1[4];
+ float xw[4], yw[4]; /* weights */
+ linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw);
+ linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw);
+ for (j = 0; j < QUAD_SIZE; j++) {
+ float tx[4][4]; /* texels */
+ int c;
+ get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0);
+ get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1);
+ get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2);
+ get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ shadow_compare(compare_func, tx, p, 0);
+ shadow_compare(compare_func, tx, p, 1);
+ shadow_compare(compare_func, tx, p, 2);
+ shadow_compare(compare_func, tx, p, 3);
+ }
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = lerp_2d(xw[j], yw[j],
+ tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
+ }
+ }
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+/**
+ * Common code for vertex/fragment program texture sampling.
+ */
+static INLINE void
+sp_get_samples(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ boolean computeLambda,
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+ const struct softpipe_context *sp = samp->sp;
+ const uint unit = samp->unit;
+ const struct pipe_texture *texture = sp->texture[unit];
+ const struct pipe_sampler_state *sampler = sp->sampler[unit];
+
+ if (!texture)
+ return;
+
+ switch (texture->target) {
+ case PIPE_TEXTURE_1D:
+ assert(sampler->normalized_coords);
+ sp_get_samples_1d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
+ break;
+ case PIPE_TEXTURE_2D:
+ if (sampler->normalized_coords)
+ sp_get_samples_2d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
+ else
+ sp_get_samples_rect(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
+ break;
+ case PIPE_TEXTURE_3D:
+ assert(sampler->normalized_coords);
+ sp_get_samples_3d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
+ break;
+ case PIPE_TEXTURE_CUBE:
+ assert(sampler->normalized_coords);
+ sp_get_samples_cube(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
+ break;
+ default:
+ assert(0);
+ }
+
+#if 0 /* DEBUG */
+ {
+ int i;
+ printf("Sampled at %f, %f, %f:\n", s[0], t[0], p[0]);
+ for (i = 0; i < 4; i++) {
+ printf("Frag %d: %f %f %f %f\n", i,
+ rgba[0][i],
+ rgba[1][i],
+ rgba[2][i],
+ rgba[3][i]);
+ }
+ }
+#endif
+}
+
+
+/**
+ * Called via tgsi_sampler::get_samples() when running a fragment shader.
+ * Get four filtered RGBA values from the sampler's texture.
+ */
+void
+sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ sp_get_samples(tgsi_sampler, s, t, p, TRUE, lodbias, rgba);
+}
+
+
+/**
+ * Called via tgsi_sampler::get_samples() when running a vertex shader.
+ * Get four filtered RGBA values from the sampler's texture.
+ */
+void
+sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ sp_get_samples(tgsi_sampler, s, t, p, FALSE, lodbias, rgba);
+}
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
new file mode 100644
index 0000000000..40d8eb2c2a
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -0,0 +1,73 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef SP_TEX_SAMPLE_H
+#define SP_TEX_SAMPLE_H
+
+
+#include "tgsi/tgsi_exec.h"
+
+
+/**
+ * Subclass of tgsi_sampler
+ */
+struct sp_shader_sampler
+{
+ struct tgsi_sampler base; /**< base class */
+
+ uint unit;
+ struct softpipe_context *sp;
+ struct softpipe_tile_cache *cache;
+};
+
+
+
+static INLINE const struct sp_shader_sampler *
+sp_shader_sampler(const struct tgsi_sampler *sampler)
+{
+ return (const struct sp_shader_sampler *) sampler;
+}
+
+
+extern void
+sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE]);
+
+extern void
+sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE]);
+
+
+#endif /* SP_TEX_SAMPLE_H */
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
new file mode 100644
index 0000000000..faf9e871f9
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -0,0 +1,343 @@
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Michel Dänzer <michel@tungstengraphics.com>
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_winsys.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "sp_context.h"
+#include "sp_state.h"
+#include "sp_texture.h"
+#include "sp_tile_cache.h"
+#include "sp_screen.h"
+
+
+/* Simple, maximally packed layout.
+ */
+
+static unsigned minify( unsigned d )
+{
+ return MAX2(1, d>>1);
+}
+
+
+/* Conventional allocation path for non-display textures:
+ */
+static boolean
+softpipe_texture_layout(struct pipe_screen *screen,
+ struct softpipe_texture * spt)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct pipe_texture *pt = &spt->base;
+ unsigned level;
+ unsigned width = pt->width[0];
+ unsigned height = pt->height[0];
+ unsigned depth = pt->depth[0];
+
+ unsigned buffer_size = 0;
+
+ for (level = 0; level <= pt->last_level; level++) {
+ pt->width[level] = width;
+ pt->height[level] = height;
+ pt->depth[level] = depth;
+ pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);
+ spt->stride[level] = pt->nblocksx[level]*pt->block.size;
+
+ spt->level_offset[level] = buffer_size;
+
+ buffer_size += (pt->nblocksy[level] *
+ ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
+ spt->stride[level]);
+
+ width = minify(width);
+ height = minify(height);
+ depth = minify(depth);
+ }
+
+ spt->buffer = ws->buffer_create(ws, 32,
+ PIPE_BUFFER_USAGE_PIXEL,
+ buffer_size);
+
+ return spt->buffer != NULL;
+}
+
+static boolean
+softpipe_displaytarget_layout(struct pipe_screen *screen,
+ struct softpipe_texture * spt)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ unsigned usage = (PIPE_BUFFER_USAGE_CPU_READ_WRITE |
+ PIPE_BUFFER_USAGE_GPU_READ_WRITE);
+
+ spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]);
+ spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]);
+
+ spt->buffer = ws->surface_buffer_create( ws,
+ spt->base.width[0],
+ spt->base.height[0],
+ spt->base.format,
+ usage,
+ &spt->stride[0]);
+
+ return spt->buffer != NULL;
+}
+
+
+
+
+
+static struct pipe_texture *
+softpipe_texture_create(struct pipe_screen *screen,
+ const struct pipe_texture *templat)
+{
+ struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture);
+ if (!spt)
+ return NULL;
+
+ spt->base = *templat;
+ spt->base.refcount = 1;
+ spt->base.screen = screen;
+
+ if (spt->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) {
+ if (!softpipe_displaytarget_layout(screen, spt))
+ goto fail;
+ }
+ else {
+ if (!softpipe_texture_layout(screen, spt))
+ goto fail;
+ }
+
+ assert(spt->base.refcount == 1);
+ return &spt->base;
+
+ fail:
+ FREE(spt);
+ return NULL;
+}
+
+
+static struct pipe_texture *
+softpipe_texture_blanket(struct pipe_screen * screen,
+ const struct pipe_texture *base,
+ const unsigned *stride,
+ struct pipe_buffer *buffer)
+{
+ struct softpipe_texture *spt;
+ assert(screen);
+
+ /* Only supports one type */
+ if (base->target != PIPE_TEXTURE_2D ||
+ base->last_level != 0 ||
+ base->depth[0] != 1) {
+ return NULL;
+ }
+
+ spt = CALLOC_STRUCT(softpipe_texture);
+ if (!spt)
+ return NULL;
+
+ spt->base = *base;
+ spt->base.refcount = 1;
+ spt->base.screen = screen;
+ spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]);
+ spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]);
+ spt->stride[0] = stride[0];
+
+ pipe_buffer_reference(screen, &spt->buffer, buffer);
+
+ return &spt->base;
+}
+
+
+static void
+softpipe_texture_release(struct pipe_screen *screen,
+ struct pipe_texture **pt)
+{
+ if (!*pt)
+ return;
+
+ if (--(*pt)->refcount <= 0) {
+ struct softpipe_texture *spt = softpipe_texture(*pt);
+
+ pipe_buffer_reference(screen, &spt->buffer, NULL);
+ FREE(spt);
+ }
+ *pt = NULL;
+}
+
+
+static struct pipe_surface *
+softpipe_get_tex_surface(struct pipe_screen *screen,
+ struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned usage)
+{
+ struct softpipe_texture *spt = softpipe_texture(pt);
+ struct pipe_surface *ps;
+
+ assert(level <= pt->last_level);
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ ps->refcount = 1;
+ if (ps) {
+ assert(ps->refcount);
+ pipe_texture_reference(&ps->texture, pt);
+ pipe_buffer_reference(screen, &ps->buffer, spt->buffer);
+ ps->format = pt->format;
+ ps->block = pt->block;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = spt->stride[level];
+ ps->offset = spt->level_offset[level];
+ ps->usage = usage;
+
+ /* Because we are softpipe, anything that the state tracker
+ * thought was going to be done with the GPU will actually get
+ * done with the CPU. Let's adjust the flags to take that into
+ * account.
+ */
+ if (ps->usage & PIPE_BUFFER_USAGE_GPU_WRITE) {
+ /* GPU_WRITE means "render" and that can involve reads (blending) */
+ ps->usage |= PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_CPU_READ;
+ }
+
+ if (ps->usage & PIPE_BUFFER_USAGE_GPU_READ)
+ ps->usage |= PIPE_BUFFER_USAGE_CPU_READ;
+
+ if (ps->usage & (PIPE_BUFFER_USAGE_CPU_WRITE |
+ PIPE_BUFFER_USAGE_GPU_WRITE)) {
+ /* Mark the surface as dirty. The tile cache will look for this. */
+ spt->modified = TRUE;
+ }
+
+ ps->face = face;
+ ps->level = level;
+ ps->zslice = zslice;
+
+ if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) {
+ ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) *
+ ps->nblocksy *
+ ps->stride;
+ }
+ else {
+ assert(face == 0);
+ assert(zslice == 0);
+ }
+ }
+ return ps;
+}
+
+
+static void
+softpipe_tex_surface_release(struct pipe_screen *screen,
+ struct pipe_surface **s)
+{
+ struct pipe_surface *surf = *s;
+ /* Effectively do the texture_update work here - if texture images
+ * needed post-processing to put them into hardware layout, this is
+ * where it would happen. For softpipe, nothing to do.
+ */
+ assert ((*s)->texture);
+ if (--surf->refcount == 0) {
+ pipe_texture_reference(&surf->texture, NULL);
+ pipe_buffer_reference(screen, &surf->buffer, NULL);
+ FREE(surf);
+ }
+ *s = NULL;
+}
+
+
+static void *
+softpipe_surface_map( struct pipe_screen *screen,
+ struct pipe_surface *surface,
+ unsigned flags )
+{
+ ubyte *map;
+
+ if (flags & ~surface->usage) {
+ assert(0);
+ return NULL;
+ }
+
+ map = pipe_buffer_map( screen, surface->buffer, flags );
+ if (map == NULL)
+ return NULL;
+
+ /* May want to different things here depending on read/write nature
+ * of the map:
+ */
+ if (surface->texture &&
+ (flags & PIPE_BUFFER_USAGE_CPU_WRITE))
+ {
+ /* Do something to notify sharing contexts of a texture change.
+ * In softpipe, that would mean flushing the texture cache.
+ */
+ softpipe_screen(screen)->timestamp++;
+ }
+
+ return map + surface->offset;
+}
+
+
+static void
+softpipe_surface_unmap(struct pipe_screen *screen,
+ struct pipe_surface *surface)
+{
+ pipe_buffer_unmap( screen, surface->buffer );
+}
+
+
+void
+softpipe_init_texture_funcs(struct softpipe_context *sp)
+{
+}
+
+
+void
+softpipe_init_screen_texture_funcs(struct pipe_screen *screen)
+{
+ screen->texture_create = softpipe_texture_create;
+ screen->texture_blanket = softpipe_texture_blanket;
+ screen->texture_release = softpipe_texture_release;
+
+ screen->get_tex_surface = softpipe_get_tex_surface;
+ screen->tex_surface_release = softpipe_tex_surface_release;
+
+ screen->surface_map = softpipe_surface_map;
+ screen->surface_unmap = softpipe_surface_unmap;
+}
diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h
new file mode 100644
index 0000000000..c1636920cd
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_texture.h
@@ -0,0 +1,70 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef SP_TEXTURE_H
+#define SP_TEXTURE_H
+
+
+#include "pipe/p_state.h"
+
+
+struct pipe_context;
+struct pipe_screen;
+struct softpipe_context;
+
+
+struct softpipe_texture
+{
+ struct pipe_texture base;
+
+ unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned stride[PIPE_MAX_TEXTURE_LEVELS];
+
+ /* The data is held here:
+ */
+ struct pipe_buffer *buffer;
+
+ boolean modified;
+};
+
+
+/** cast wrapper */
+static INLINE struct softpipe_texture *
+softpipe_texture(struct pipe_texture *pt)
+{
+ return (struct softpipe_texture *) pt;
+}
+
+
+extern void
+softpipe_init_texture_funcs( struct softpipe_context *softpipe );
+
+extern void
+softpipe_init_screen_texture_funcs(struct pipe_screen *screen);
+
+
+#endif /* SP_TEXTURE */
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
new file mode 100644
index 0000000000..78b0efa46d
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -0,0 +1,614 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Framebuffer/surface tile caching.
+ *
+ * Author:
+ * Brian Paul
+ */
+
+#include "pipe/p_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_tile.h"
+#include "sp_context.h"
+#include "sp_surface.h"
+#include "sp_texture.h"
+#include "sp_tile_cache.h"
+
+#define NUM_ENTRIES 32
+
+
+/** XXX move these */
+#define MAX_WIDTH 2048
+#define MAX_HEIGHT 2048
+
+
+struct softpipe_tile_cache
+{
+ struct pipe_screen *screen;
+ struct pipe_surface *surface; /**< the surface we're caching */
+ void *surface_map;
+ struct pipe_texture *texture; /**< if caching a texture */
+ struct softpipe_cached_tile entries[NUM_ENTRIES];
+ uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32];
+ float clear_color[4];
+ uint clear_val;
+ boolean depth_stencil; /** Is the surface a depth/stencil format? */
+
+ struct pipe_surface *tex_surf;
+ void *tex_surf_map;
+ int tex_face, tex_level, tex_z;
+
+ struct softpipe_cached_tile tile; /**< scratch tile for clears */
+};
+
+
+/**
+ * Return the position in the cache for the tile that contains win pos (x,y).
+ * We currently use a direct mapped cache so this is like a hack key.
+ * At some point we should investige something more sophisticated, like
+ * a LRU replacement policy.
+ */
+#define CACHE_POS(x, y) \
+ (((x) / TILE_SIZE + ((y) / TILE_SIZE) * 5) % NUM_ENTRIES)
+
+
+
+/**
+ * Is the tile at (x,y) in cleared state?
+ */
+static INLINE uint
+is_clear_flag_set(const uint *bitvec, int x, int y)
+{
+ int pos, bit;
+ x /= TILE_SIZE;
+ y /= TILE_SIZE;
+ pos = y * (MAX_WIDTH / TILE_SIZE) + x;
+ assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32);
+ bit = bitvec[pos / 32] & (1 << (pos & 31));
+ return bit;
+}
+
+
+/**
+ * Mark the tile at (x,y) as not cleared.
+ */
+static INLINE void
+clear_clear_flag(uint *bitvec, int x, int y)
+{
+ int pos;
+ x /= TILE_SIZE;
+ y /= TILE_SIZE;
+ pos = y * (MAX_WIDTH / TILE_SIZE) + x;
+ assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32);
+ bitvec[pos / 32] &= ~(1 << (pos & 31));
+}
+
+
+struct softpipe_tile_cache *
+sp_create_tile_cache( struct pipe_screen *screen )
+{
+ struct softpipe_tile_cache *tc;
+ uint pos;
+
+ tc = CALLOC_STRUCT( softpipe_tile_cache );
+ if (tc) {
+ tc->screen = screen;
+ for (pos = 0; pos < NUM_ENTRIES; pos++) {
+ tc->entries[pos].x =
+ tc->entries[pos].y = -1;
+ }
+ }
+ return tc;
+}
+
+
+void
+sp_destroy_tile_cache(struct softpipe_tile_cache *tc)
+{
+ uint pos;
+
+ for (pos = 0; pos < NUM_ENTRIES; pos++) {
+ /*assert(tc->entries[pos].x < 0);*/
+ }
+ if (tc->surface) {
+ pipe_surface_reference(&tc->surface, NULL);
+ }
+ if (tc->tex_surf) {
+ pipe_surface_reference(&tc->tex_surf, NULL);
+ }
+
+ FREE( tc );
+}
+
+
+/**
+ * Specify the surface to cache.
+ */
+void
+sp_tile_cache_set_surface(struct softpipe_tile_cache *tc,
+ struct pipe_surface *ps)
+{
+ assert(!tc->texture);
+
+ if (tc->surface_map) {
+ tc->screen->surface_unmap(tc->screen, tc->surface);
+ tc->surface_map = NULL;
+ }
+
+ pipe_surface_reference(&tc->surface, ps);
+
+ if (tc->surface) {
+ if (tc->surface_map) /* XXX: this is always NULL!? */
+ tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface,
+ PIPE_BUFFER_USAGE_CPU_READ |
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+
+ tc->depth_stencil = (ps->format == PIPE_FORMAT_S8Z24_UNORM ||
+ ps->format == PIPE_FORMAT_X8Z24_UNORM ||
+ ps->format == PIPE_FORMAT_Z24S8_UNORM ||
+ ps->format == PIPE_FORMAT_Z24X8_UNORM ||
+ ps->format == PIPE_FORMAT_Z16_UNORM ||
+ ps->format == PIPE_FORMAT_Z32_UNORM ||
+ ps->format == PIPE_FORMAT_S8_UNORM);
+ }
+}
+
+
+/**
+ * Return the surface being cached.
+ */
+struct pipe_surface *
+sp_tile_cache_get_surface(struct softpipe_tile_cache *tc)
+{
+ return tc->surface;
+}
+
+
+void
+sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc)
+{
+ if (tc->surface && !tc->surface_map)
+ tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface,
+ PIPE_BUFFER_USAGE_CPU_WRITE |
+ PIPE_BUFFER_USAGE_CPU_READ);
+
+ if (tc->tex_surf && !tc->tex_surf_map)
+ tc->tex_surf_map = tc->screen->surface_map(tc->screen, tc->tex_surf,
+ PIPE_BUFFER_USAGE_CPU_READ);
+}
+
+
+void
+sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc)
+{
+ if (tc->surface_map) {
+ tc->screen->surface_unmap(tc->screen, tc->surface);
+ tc->surface_map = NULL;
+ }
+
+ if (tc->tex_surf_map) {
+ tc->screen->surface_unmap(tc->screen, tc->tex_surf);
+ tc->tex_surf_map = NULL;
+ }
+}
+
+
+/**
+ * Specify the texture to cache.
+ */
+void
+sp_tile_cache_set_texture(struct pipe_context *pipe,
+ struct softpipe_tile_cache *tc,
+ struct pipe_texture *texture)
+{
+ uint i;
+
+ assert(!tc->surface);
+
+ pipe_texture_reference(&tc->texture, texture);
+
+ if (tc->tex_surf_map) {
+ tc->screen->surface_unmap(tc->screen, tc->tex_surf);
+ tc->tex_surf_map = NULL;
+ }
+ pipe_surface_reference(&tc->tex_surf, NULL);
+
+ /* mark as entries as invalid/empty */
+ /* XXX we should try to avoid this when the teximage hasn't changed */
+ for (i = 0; i < NUM_ENTRIES; i++) {
+ tc->entries[i].x = -1;
+ }
+
+ tc->tex_face = -1; /* any invalid value here */
+}
+
+
+/**
+ * Set pixels in a tile to the given clear color/value, float.
+ */
+static void
+clear_tile_rgba(struct softpipe_cached_tile *tile,
+ enum pipe_format format,
+ const float clear_value[4])
+{
+ if (clear_value[0] == 0.0 &&
+ clear_value[1] == 0.0 &&
+ clear_value[2] == 0.0 &&
+ clear_value[3] == 0.0) {
+ memset(tile->data.color, 0, sizeof(tile->data.color));
+ }
+ else {
+ uint i, j;
+ for (i = 0; i < TILE_SIZE; i++) {
+ for (j = 0; j < TILE_SIZE; j++) {
+ tile->data.color[i][j][0] = clear_value[0];
+ tile->data.color[i][j][1] = clear_value[1];
+ tile->data.color[i][j][2] = clear_value[2];
+ tile->data.color[i][j][3] = clear_value[3];
+ }
+ }
+ }
+}
+
+
+/**
+ * Set a tile to a solid value/color.
+ */
+static void
+clear_tile(struct softpipe_cached_tile *tile,
+ enum pipe_format format,
+ uint clear_value)
+{
+ uint i, j;
+
+ switch (pf_get_size(format)) {
+ case 1:
+ memset(tile->data.any, 0, TILE_SIZE * TILE_SIZE);
+ break;
+ case 2:
+ if (clear_value == 0) {
+ memset(tile->data.any, 0, 2 * TILE_SIZE * TILE_SIZE);
+ }
+ else {
+ for (i = 0; i < TILE_SIZE; i++) {
+ for (j = 0; j < TILE_SIZE; j++) {
+ tile->data.depth16[i][j] = (ushort) clear_value;
+ }
+ }
+ }
+ break;
+ case 4:
+ if (clear_value == 0) {
+ memset(tile->data.any, 0, 4 * TILE_SIZE * TILE_SIZE);
+ }
+ else {
+ for (i = 0; i < TILE_SIZE; i++) {
+ for (j = 0; j < TILE_SIZE; j++) {
+ tile->data.color32[i][j] = clear_value;
+ }
+ }
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+/**
+ * Actually clear the tiles which were flagged as being in a clear state.
+ */
+static void
+sp_tile_cache_flush_clear(struct pipe_context *pipe,
+ struct softpipe_tile_cache *tc)
+{
+ struct pipe_surface *ps = tc->surface;
+ const uint w = tc->surface->width;
+ const uint h = tc->surface->height;
+ uint x, y;
+ uint numCleared = 0;
+
+ /* clear the scratch tile to the clear value */
+ clear_tile(&tc->tile, ps->format, tc->clear_val);
+
+ /* push the tile to all positions marked as clear */
+ for (y = 0; y < h; y += TILE_SIZE) {
+ for (x = 0; x < w; x += TILE_SIZE) {
+ if (is_clear_flag_set(tc->clear_flags, x, y)) {
+ pipe_put_tile_raw(ps,
+ x, y, TILE_SIZE, TILE_SIZE,
+ tc->tile.data.color32, 0/*STRIDE*/);
+
+ /* do this? */
+ clear_clear_flag(tc->clear_flags, x, y);
+
+ numCleared++;
+ }
+ }
+ }
+#if 0
+ debug_printf("num cleared: %u\n", numCleared);
+#endif
+}
+
+
+/**
+ * Flush the tile cache: write all dirty tiles back to the surface.
+ * any tiles "flagged" as cleared will be "really" cleared.
+ */
+void
+sp_flush_tile_cache(struct softpipe_context *softpipe,
+ struct softpipe_tile_cache *tc)
+{
+ struct pipe_surface *ps = tc->surface;
+ int inuse = 0, pos;
+
+ if (ps && ps->buffer) {
+ /* caching a drawing surface */
+ for (pos = 0; pos < NUM_ENTRIES; pos++) {
+ struct softpipe_cached_tile *tile = tc->entries + pos;
+ if (tile->x >= 0) {
+ if (tc->depth_stencil) {
+ pipe_put_tile_raw(ps,
+ tile->x, tile->y, TILE_SIZE, TILE_SIZE,
+ tile->data.depth32, 0/*STRIDE*/);
+ }
+ else {
+ pipe_put_tile_rgba(ps,
+ tile->x, tile->y, TILE_SIZE, TILE_SIZE,
+ (float *) tile->data.color);
+ }
+ tile->x = tile->y = -1; /* mark as empty */
+ inuse++;
+ }
+ }
+
+#if TILE_CLEAR_OPTIMIZATION
+ sp_tile_cache_flush_clear(&softpipe->pipe, tc);
+#endif
+ }
+ else if (tc->texture) {
+ /* caching a texture, mark all entries as empty */
+ for (pos = 0; pos < NUM_ENTRIES; pos++) {
+ tc->entries[pos].x = -1;
+ }
+ tc->tex_face = -1;
+ }
+
+#if 0
+ debug_printf("flushed tiles in use: %d\n", inuse);
+#endif
+}
+
+
+/**
+ * Get a tile from the cache.
+ * \param x, y position of tile, in pixels
+ */
+struct softpipe_cached_tile *
+sp_get_cached_tile(struct softpipe_context *softpipe,
+ struct softpipe_tile_cache *tc, int x, int y)
+{
+ struct pipe_surface *ps = tc->surface;
+
+ /* tile pos in framebuffer: */
+ const int tile_x = x & ~(TILE_SIZE - 1);
+ const int tile_y = y & ~(TILE_SIZE - 1);
+
+ /* cache pos/entry: */
+ const int pos = CACHE_POS(x, y);
+ struct softpipe_cached_tile *tile = tc->entries + pos;
+
+ if (tile_x != tile->x ||
+ tile_y != tile->y) {
+
+ if (tile->x != -1) {
+ /* put dirty tile back in framebuffer */
+ if (tc->depth_stencil) {
+ pipe_put_tile_raw(ps,
+ tile->x, tile->y, TILE_SIZE, TILE_SIZE,
+ tile->data.depth32, 0/*STRIDE*/);
+ }
+ else {
+ pipe_put_tile_rgba(ps,
+ tile->x, tile->y, TILE_SIZE, TILE_SIZE,
+ (float *) tile->data.color);
+ }
+ }
+
+ tile->x = tile_x;
+ tile->y = tile_y;
+
+ if (is_clear_flag_set(tc->clear_flags, x, y)) {
+ /* don't get tile from framebuffer, just clear it */
+ if (tc->depth_stencil) {
+ clear_tile(tile, ps->format, tc->clear_val);
+ }
+ else {
+ clear_tile_rgba(tile, ps->format, tc->clear_color);
+ }
+ clear_clear_flag(tc->clear_flags, x, y);
+ }
+ else {
+ /* get new tile data from surface */
+ if (tc->depth_stencil) {
+ pipe_get_tile_raw(ps,
+ tile->x, tile->y, TILE_SIZE, TILE_SIZE,
+ tile->data.depth32, 0/*STRIDE*/);
+ }
+ else {
+ pipe_get_tile_rgba(ps,
+ tile->x, tile->y, TILE_SIZE, TILE_SIZE,
+ (float *) tile->data.color);
+ }
+ }
+ }
+
+ return tile;
+}
+
+
+/**
+ * Given the texture face, level, zslice, x and y values, compute
+ * the cache entry position/index where we'd hope to find the
+ * cached texture tile.
+ * This is basically a direct-map cache.
+ * XXX There's probably lots of ways in which we can improve this.
+ */
+static INLINE uint
+tex_cache_pos(int x, int y, int z, int face, int level)
+{
+ uint entry = x + y * 5 + z * 4 + face + level;
+ return entry % NUM_ENTRIES;
+}
+
+
+/**
+ * Similar to sp_get_cached_tile() but for textures.
+ * Tiles are read-only and indexed with more params.
+ */
+const struct softpipe_cached_tile *
+sp_get_cached_tile_tex(struct softpipe_context *sp,
+ struct softpipe_tile_cache *tc, int x, int y, int z,
+ int face, int level)
+{
+ struct pipe_screen *screen = sp->pipe.screen;
+ /* tile pos in framebuffer: */
+ const int tile_x = x & ~(TILE_SIZE - 1);
+ const int tile_y = y & ~(TILE_SIZE - 1);
+ /* cache pos/entry: */
+ const uint pos = tex_cache_pos(x / TILE_SIZE, y / TILE_SIZE, z,
+ face, level);
+ struct softpipe_cached_tile *tile = tc->entries + pos;
+
+ if (tc->texture) {
+ struct softpipe_texture *spt = softpipe_texture(tc->texture);
+ if (spt->modified) {
+ /* texture was modified, force a cache reload */
+ tile->x = -1;
+ spt->modified = FALSE;
+ }
+ }
+
+ if (tile_x != tile->x ||
+ tile_y != tile->y ||
+ z != tile->z ||
+ face != tile->face ||
+ level != tile->level) {
+ /* cache miss */
+
+ /* check if we need to get a new surface */
+ if (!tc->tex_surf ||
+ tc->tex_face != face ||
+ tc->tex_level != level ||
+ tc->tex_z != z) {
+ /* get new surface (view into texture) */
+
+ if (tc->tex_surf_map)
+ tc->screen->surface_unmap(tc->screen, tc->tex_surf);
+
+ tc->tex_surf = screen->get_tex_surface(screen, tc->texture, face, level, z,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ tc->tex_surf_map = screen->surface_map(screen, tc->tex_surf,
+ PIPE_BUFFER_USAGE_CPU_READ);
+
+ tc->tex_face = face;
+ tc->tex_level = level;
+ tc->tex_z = z;
+ }
+
+ /* get tile from the surface (view into texture) */
+ pipe_get_tile_rgba(tc->tex_surf,
+ tile_x, tile_y, TILE_SIZE, TILE_SIZE,
+ (float *) tile->data.color);
+ tile->x = tile_x;
+ tile->y = tile_y;
+ tile->z = z;
+ tile->face = face;
+ tile->level = level;
+ }
+
+ return tile;
+}
+
+
+/**
+ * When a whole surface is being cleared to a value we can avoid
+ * fetching tiles above.
+ * Save the color and set a 'clearflag' for each tile of the screen.
+ */
+void
+sp_tile_cache_clear(struct softpipe_tile_cache *tc, uint clearValue)
+{
+ uint r, g, b, a;
+ uint pos;
+
+ tc->clear_val = clearValue;
+
+ switch (tc->surface->format) {
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ r = (clearValue >> 24) & 0xff;
+ g = (clearValue >> 16) & 0xff;
+ b = (clearValue >> 8) & 0xff;
+ a = (clearValue ) & 0xff;
+ break;
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ r = (clearValue >> 16) & 0xff;
+ g = (clearValue >> 8) & 0xff;
+ b = (clearValue ) & 0xff;
+ a = (clearValue >> 24) & 0xff;
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ r = (clearValue >> 8) & 0xff;
+ g = (clearValue >> 16) & 0xff;
+ b = (clearValue >> 24) & 0xff;
+ a = (clearValue ) & 0xff;
+ break;
+ default:
+ r = g = b = a = 0;
+ }
+
+ tc->clear_color[0] = r / 255.0f;
+ tc->clear_color[1] = g / 255.0f;
+ tc->clear_color[2] = b / 255.0f;
+ tc->clear_color[3] = a / 255.0f;
+
+#if TILE_CLEAR_OPTIMIZATION
+ /* set flags to indicate all the tiles are cleared */
+ memset(tc->clear_flags, 255, sizeof(tc->clear_flags));
+#else
+ /* disable the optimization */
+ memset(tc->clear_flags, 0, sizeof(tc->clear_flags));
+#endif
+
+ for (pos = 0; pos < NUM_ENTRIES; pos++) {
+ struct softpipe_cached_tile *tile = tc->entries + pos;
+ tile->x = tile->y = -1;
+ }
+}
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h
new file mode 100644
index 0000000000..a66bb50bcc
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.h
@@ -0,0 +1,105 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef SP_TILE_CACHE_H
+#define SP_TILE_CACHE_H
+
+#define TILE_CLEAR_OPTIMIZATION 1
+
+
+#include "pipe/p_compiler.h"
+
+
+struct softpipe_context;
+struct softpipe_tile_cache;
+
+
+/**
+ * Cache tile size (width and height). This needs to be a power of two.
+ */
+#define TILE_SIZE 64
+
+
+
+struct softpipe_cached_tile
+{
+ int x, y; /**< pos of tile in window coords */
+ int z, face, level; /**< Extra texture indexes */
+ union {
+ float color[TILE_SIZE][TILE_SIZE][4];
+ uint color32[TILE_SIZE][TILE_SIZE];
+ uint depth32[TILE_SIZE][TILE_SIZE];
+ ushort depth16[TILE_SIZE][TILE_SIZE];
+ ubyte stencil8[TILE_SIZE][TILE_SIZE];
+ ubyte any[1];
+ } data;
+};
+
+
+extern struct softpipe_tile_cache *
+sp_create_tile_cache( struct pipe_screen *screen );
+
+extern void
+sp_destroy_tile_cache(struct softpipe_tile_cache *tc);
+
+extern void
+sp_tile_cache_set_surface(struct softpipe_tile_cache *tc,
+ struct pipe_surface *sps);
+
+extern struct pipe_surface *
+sp_tile_cache_get_surface(struct softpipe_tile_cache *tc);
+
+extern void
+sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc);
+
+extern void
+sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc);
+
+extern void
+sp_tile_cache_set_texture(struct pipe_context *pipe,
+ struct softpipe_tile_cache *tc,
+ struct pipe_texture *texture);
+
+extern void
+sp_flush_tile_cache(struct softpipe_context *softpipe,
+ struct softpipe_tile_cache *tc);
+
+extern void
+sp_tile_cache_clear(struct softpipe_tile_cache *tc, uint clearValue);
+
+extern struct softpipe_cached_tile *
+sp_get_cached_tile(struct softpipe_context *softpipe,
+ struct softpipe_tile_cache *tc, int x, int y);
+
+extern const struct softpipe_cached_tile *
+sp_get_cached_tile_tex(struct softpipe_context *softpipe,
+ struct softpipe_tile_cache *tc, int x, int y, int z,
+ int face, int level);
+
+
+#endif /* SP_TILE_CACHE_H */
+
diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h
new file mode 100644
index 0000000000..4ab666486c
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_winsys.h
@@ -0,0 +1,73 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* This is the interface that softpipe requires any window system
+ * hosting it to implement. This is the only include file in softpipe
+ * which is public.
+ */
+
+
+#ifndef SP_WINSYS_H
+#define SP_WINSYS_H
+
+
+#include "pipe/p_compiler.h" /* for boolean */
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+enum pipe_format;
+
+struct softpipe_winsys {
+ /** test if the given format is supported for front/back color bufs */
+ boolean (*is_format_supported)( struct softpipe_winsys *sws,
+ enum pipe_format format );
+
+};
+
+struct pipe_screen;
+struct pipe_winsys;
+struct pipe_context;
+
+
+struct pipe_context *softpipe_create( struct pipe_screen *,
+ struct pipe_winsys *,
+ void *unused );
+
+
+struct pipe_screen *
+softpipe_create_screen(struct pipe_winsys *);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SP_WINSYS_H */
diff --git a/src/gallium/drivers/trace/Makefile b/src/gallium/drivers/trace/Makefile
new file mode 100644
index 0000000000..3859b8acb0
--- /dev/null
+++ b/src/gallium/drivers/trace/Makefile
@@ -0,0 +1,18 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = trace
+
+C_SOURCES = \
+ tr_context.c \
+ tr_dump.c \
+ tr_screen.c \
+ tr_state.c \
+ tr_texture.c \
+ tr_winsys.c
+
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README
new file mode 100644
index 0000000000..f0e1cd596d
--- /dev/null
+++ b/src/gallium/drivers/trace/README
@@ -0,0 +1,64 @@
+ TRACE PIPE DRIVER
+
+
+= About =
+
+This directory contains a Gallium3D pipe driver which traces all incoming calls.
+
+
+= Build Instructions =
+
+To build, invoke scons on the top dir as
+
+ scons statetrackers=mesa drivers=softpipe,i965simple,trace winsys=xlib
+
+
+= Usage =
+
+To use do
+
+ ln -s libGL.so build/linux-x86-debug/gallium/winsys/xlib/libGL.so.1
+ export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/gallium/winsys/xlib
+
+ensure the right libGL.so is being picked by doing
+
+ ldd progs/trivial/tri
+
+and then try running
+
+ GALLIUM_TRACE=tri.trace progs/trivial/tri
+
+which should create a tri.trace file, which is an XML file. You can view copying
+trace.xsl to the same directory, and opening with a XSLT capable browser such as
+Firefox or Internet Explorer.
+
+
+= Integrating =
+
+You can integrate the trace pipe driver either inside the state tracker or the
+winsys. The procedure on both cases is the same. Let's assume you have a
+pipe_screen and a pipe_context pair obtained by the usual means (variable and
+function names are just for illustration purposes):
+
+ real_screen = real_screen_create(...);
+
+ real_context = real_context_create(...);
+
+The trace screen and pipe_context is then created by doing
+
+ trace_screen = trace_screen_create(real_screen);
+
+ trace_context = trace_context_create(trace_screen, real_context);
+
+You can then simply use trace_screen and trace_context instead of real_screen
+and real_context.
+
+Do not call trace_winsys_create. Simply pass trace_screen->winsys or
+trace_context->winsys in places you would pass winsys.
+
+You can create as many contexts you wish. Just ensure that you don't mistake
+trace_screen with real_screen when creating them.
+
+
+--
+Jose Fonseca <jrfonseca@tungstengraphics.com>
diff --git a/src/gallium/drivers/trace/SConscript b/src/gallium/drivers/trace/SConscript
new file mode 100644
index 0000000000..0a6bfb8f4c
--- /dev/null
+++ b/src/gallium/drivers/trace/SConscript
@@ -0,0 +1,16 @@
+Import('*')
+
+env = env.Clone()
+
+trace = env.ConvenienceLibrary(
+ target = 'trace',
+ source = [
+ 'tr_context.c',
+ 'tr_dump.c',
+ 'tr_screen.c',
+ 'tr_state.c',
+ 'tr_texture.c',
+ 'tr_winsys.c',
+ ])
+
+Export('trace') \ No newline at end of file
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
new file mode 100644
index 0000000000..f0d51ad82e
--- /dev/null
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -0,0 +1,1072 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_memory.h"
+#include "pipe/p_screen.h"
+
+#include "tr_dump.h"
+#include "tr_state.h"
+#include "tr_screen.h"
+#include "tr_texture.h"
+#include "tr_winsys.h"
+#include "tr_context.h"
+
+
+static INLINE struct pipe_texture *
+trace_texture_unwrap(struct trace_context *tr_ctx,
+ struct pipe_texture *texture)
+{
+ struct trace_screen *tr_scr = trace_screen(tr_ctx->base.screen);
+ struct trace_texture *tr_tex;
+
+ if(!texture)
+ return NULL;
+
+ tr_tex = trace_texture(tr_scr, texture);
+
+ assert(tr_tex->texture);
+ assert(tr_tex->texture->screen == tr_scr->screen);
+ return tr_tex->texture;
+}
+
+
+static INLINE struct pipe_surface *
+trace_surface_unwrap(struct trace_context *tr_ctx,
+ struct pipe_surface *surface)
+{
+ struct trace_screen *tr_scr = trace_screen(tr_ctx->base.screen);
+ struct trace_texture *tr_tex;
+ struct trace_surface *tr_surf;
+
+ if(!surface)
+ return NULL;
+
+ assert(surface->texture);
+ if(!surface->texture)
+ return surface;
+
+ tr_tex = trace_texture(tr_scr, surface->texture);
+ tr_surf = trace_surface(tr_tex, surface);
+
+ assert(tr_surf->surface);
+ assert(tr_surf->surface->texture->screen == tr_scr->screen);
+ return tr_surf->surface;
+}
+
+
+static INLINE void
+trace_context_set_edgeflags(struct pipe_context *_pipe,
+ const unsigned *bitfield)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "set_edgeflags");
+
+ trace_dump_arg(ptr, pipe);
+ /* FIXME: we don't know how big this array is */
+ trace_dump_arg(ptr, bitfield);
+
+ pipe->set_edgeflags(pipe, bitfield);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE boolean
+trace_context_draw_arrays(struct pipe_context *_pipe,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ boolean result;
+
+ trace_dump_call_begin("pipe_context", "draw_arrays");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(uint, mode);
+ trace_dump_arg(uint, start);
+ trace_dump_arg(uint, count);
+
+ result = pipe->draw_arrays(pipe, mode, start, count);;
+
+ trace_dump_ret(bool, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static INLINE boolean
+trace_context_draw_elements(struct pipe_context *_pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ boolean result;
+
+ trace_winsys_user_buffer_update(_pipe->winsys, indexBuffer);
+
+ trace_dump_call_begin("pipe_context", "draw_elements");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, indexBuffer);
+ trace_dump_arg(uint, indexSize);
+ trace_dump_arg(uint, mode);
+ trace_dump_arg(uint, start);
+ trace_dump_arg(uint, count);
+
+ result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);;
+
+ trace_dump_ret(bool, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static INLINE boolean
+trace_context_draw_range_elements(struct pipe_context *_pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ boolean result;
+
+ trace_winsys_user_buffer_update(_pipe->winsys, indexBuffer);
+
+ trace_dump_call_begin("pipe_context", "draw_range_elements");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, indexBuffer);
+ trace_dump_arg(uint, indexSize);
+ trace_dump_arg(uint, minIndex);
+ trace_dump_arg(uint, maxIndex);
+ trace_dump_arg(uint, mode);
+ trace_dump_arg(uint, start);
+ trace_dump_arg(uint, count);
+
+ result = pipe->draw_range_elements(pipe,
+ indexBuffer,
+ indexSize, minIndex, maxIndex,
+ mode, start, count);
+
+ trace_dump_ret(bool, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static INLINE struct pipe_query *
+trace_context_create_query(struct pipe_context *_pipe,
+ unsigned query_type)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ struct pipe_query *result;
+
+ trace_dump_call_begin("pipe_context", "create_query");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(uint, query_type);
+
+ result = pipe->create_query(pipe, query_type);;
+
+ trace_dump_ret(ptr, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static INLINE void
+trace_context_destroy_query(struct pipe_context *_pipe,
+ struct pipe_query *query)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "destroy_query");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, query);
+
+ pipe->destroy_query(pipe, query);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_begin_query(struct pipe_context *_pipe,
+ struct pipe_query *query)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "begin_query");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, query);
+
+ pipe->begin_query(pipe, query);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_end_query(struct pipe_context *_pipe,
+ struct pipe_query *query)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "end_query");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, query);
+
+ pipe->end_query(pipe, query);
+
+ trace_dump_call_end();
+}
+
+
+static INLINE boolean
+trace_context_get_query_result(struct pipe_context *_pipe,
+ struct pipe_query *query,
+ boolean wait,
+ uint64_t *presult)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ uint64_t result;
+ boolean _result;
+
+ trace_dump_call_begin("pipe_context", "get_query_result");
+
+ trace_dump_arg(ptr, pipe);
+
+ _result = pipe->get_query_result(pipe, query, wait, presult);;
+ result = *presult;
+
+ trace_dump_arg(uint, result);
+ trace_dump_ret(bool, _result);
+
+ trace_dump_call_end();
+
+ return _result;
+}
+
+
+static INLINE void *
+trace_context_create_blend_state(struct pipe_context *_pipe,
+ const struct pipe_blend_state *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ void * result;
+
+ trace_dump_call_begin("pipe_context", "create_blend_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(blend_state, state);
+
+ result = pipe->create_blend_state(pipe, state);;
+
+ trace_dump_ret(ptr, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static INLINE void
+trace_context_bind_blend_state(struct pipe_context *_pipe,
+ void *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "bind_blend_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, state);
+
+ pipe->bind_blend_state(pipe, state);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_delete_blend_state(struct pipe_context *_pipe,
+ void *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "delete_blend_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, state);
+
+ pipe->delete_blend_state(pipe, state);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void *
+trace_context_create_sampler_state(struct pipe_context *_pipe,
+ const struct pipe_sampler_state *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ void * result;
+
+ trace_dump_call_begin("pipe_context", "create_sampler_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(sampler_state, state);
+
+ result = pipe->create_sampler_state(pipe, state);;
+
+ trace_dump_ret(ptr, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static INLINE void
+trace_context_bind_sampler_states(struct pipe_context *_pipe,
+ unsigned num_states, void **states)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "bind_sampler_states");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(uint, num_states);
+ trace_dump_arg_array(ptr, states, num_states);
+
+ pipe->bind_sampler_states(pipe, num_states, states);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_delete_sampler_state(struct pipe_context *_pipe,
+ void *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "delete_sampler_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, state);
+
+ pipe->delete_sampler_state(pipe, state);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void *
+trace_context_create_rasterizer_state(struct pipe_context *_pipe,
+ const struct pipe_rasterizer_state *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ void * result;
+
+ trace_dump_call_begin("pipe_context", "create_rasterizer_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(rasterizer_state, state);
+
+ result = pipe->create_rasterizer_state(pipe, state);;
+
+ trace_dump_ret(ptr, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static INLINE void
+trace_context_bind_rasterizer_state(struct pipe_context *_pipe,
+ void *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "bind_rasterizer_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, state);
+
+ pipe->bind_rasterizer_state(pipe, state);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_delete_rasterizer_state(struct pipe_context *_pipe,
+ void *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "delete_rasterizer_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, state);
+
+ pipe->delete_rasterizer_state(pipe, state);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void *
+trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe,
+ const struct pipe_depth_stencil_alpha_state *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ void * result;
+
+ trace_dump_call_begin("pipe_context", "create_depth_stencil_alpha_state");
+
+ result = pipe->create_depth_stencil_alpha_state(pipe, state);;
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(depth_stencil_alpha_state, state);
+
+ trace_dump_ret(ptr, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static INLINE void
+trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe,
+ void *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "bind_depth_stencil_alpha_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, state);
+
+ pipe->bind_depth_stencil_alpha_state(pipe, state);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
+ void *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "delete_depth_stencil_alpha_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, state);
+
+ pipe->delete_depth_stencil_alpha_state(pipe, state);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void *
+trace_context_create_fs_state(struct pipe_context *_pipe,
+ const struct pipe_shader_state *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ void * result;
+
+ trace_dump_call_begin("pipe_context", "create_fs_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(shader_state, state);
+
+ result = pipe->create_fs_state(pipe, state);;
+
+ trace_dump_ret(ptr, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static INLINE void
+trace_context_bind_fs_state(struct pipe_context *_pipe,
+ void *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "bind_fs_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, state);
+
+ pipe->bind_fs_state(pipe, state);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_delete_fs_state(struct pipe_context *_pipe,
+ void *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "delete_fs_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, state);
+
+ pipe->delete_fs_state(pipe, state);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void *
+trace_context_create_vs_state(struct pipe_context *_pipe,
+ const struct pipe_shader_state *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ void * result;
+
+ trace_dump_call_begin("pipe_context", "create_vs_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(shader_state, state);
+
+ result = pipe->create_vs_state(pipe, state);;
+
+ trace_dump_ret(ptr, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static INLINE void
+trace_context_bind_vs_state(struct pipe_context *_pipe,
+ void *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "bind_vs_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, state);
+
+ pipe->bind_vs_state(pipe, state);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_delete_vs_state(struct pipe_context *_pipe,
+ void *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "delete_vs_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, state);
+
+ pipe->delete_vs_state(pipe, state);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_set_blend_color(struct pipe_context *_pipe,
+ const struct pipe_blend_color *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "set_blend_color");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(blend_color, state);
+
+ pipe->set_blend_color(pipe, state);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_set_clip_state(struct pipe_context *_pipe,
+ const struct pipe_clip_state *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "set_clip_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(clip_state, state);
+
+ pipe->set_clip_state(pipe, state);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_set_constant_buffer(struct pipe_context *_pipe,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *buffer)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_winsys_user_buffer_update(_pipe->winsys, (struct pipe_buffer *)buffer);
+
+ trace_dump_call_begin("pipe_context", "set_constant_buffer");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(uint, shader);
+ trace_dump_arg(uint, index);
+ trace_dump_arg(constant_buffer, buffer);
+
+ pipe->set_constant_buffer(pipe, shader, index, buffer);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_set_framebuffer_state(struct pipe_context *_pipe,
+ const struct pipe_framebuffer_state *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ struct pipe_framebuffer_state unwrapped_state;
+ unsigned i;
+
+ /* Unwrap the input state */
+ memcpy(&unwrapped_state, state, sizeof(unwrapped_state));
+ for(i = 0; i < state->num_cbufs; ++i)
+ unwrapped_state.cbufs[i] = trace_surface_unwrap(tr_ctx, state->cbufs[i]);
+ for(i = state->num_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i)
+ unwrapped_state.cbufs[i] = NULL;
+ unwrapped_state.zsbuf = trace_surface_unwrap(tr_ctx, state->zsbuf);
+ state = &unwrapped_state;
+
+ trace_dump_call_begin("pipe_context", "set_framebuffer_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(framebuffer_state, state);
+
+ pipe->set_framebuffer_state(pipe, state);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_set_polygon_stipple(struct pipe_context *_pipe,
+ const struct pipe_poly_stipple *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "set_polygon_stipple");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(poly_stipple, state);
+
+ pipe->set_polygon_stipple(pipe, state);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_set_scissor_state(struct pipe_context *_pipe,
+ const struct pipe_scissor_state *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "set_scissor_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(scissor_state, state);
+
+ pipe->set_scissor_state(pipe, state);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_set_viewport_state(struct pipe_context *_pipe,
+ const struct pipe_viewport_state *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "set_viewport_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(viewport_state, state);
+
+ pipe->set_viewport_state(pipe, state);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_set_sampler_textures(struct pipe_context *_pipe,
+ unsigned num_textures,
+ struct pipe_texture **textures)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ struct pipe_texture *unwrapped_textures[PIPE_MAX_SAMPLERS];
+ unsigned i;
+
+ for(i = 0; i < num_textures; ++i)
+ unwrapped_textures[i] = trace_texture_unwrap(tr_ctx, textures[i]);
+ textures = unwrapped_textures;
+
+ trace_dump_call_begin("pipe_context", "set_sampler_textures");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(uint, num_textures);
+ trace_dump_arg_array(ptr, textures, num_textures);
+
+ pipe->set_sampler_textures(pipe, num_textures, textures);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_set_vertex_buffers(struct pipe_context *_pipe,
+ unsigned num_buffers,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ unsigned i;
+
+ for(i = 0; i < num_buffers; ++i)
+ trace_winsys_user_buffer_update(_pipe->winsys, buffers[i].buffer);
+
+ trace_dump_call_begin("pipe_context", "set_vertex_buffers");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(uint, num_buffers);
+
+ trace_dump_arg_begin("buffers");
+ trace_dump_struct_array(vertex_buffer, buffers, num_buffers);
+ trace_dump_arg_end();
+
+ pipe->set_vertex_buffers(pipe, num_buffers, buffers);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_set_vertex_elements(struct pipe_context *_pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "set_vertex_elements");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(uint, num_elements);
+
+ trace_dump_arg_begin("elements");
+ trace_dump_struct_array(vertex_element, elements, num_elements);
+ trace_dump_arg_end();
+
+ pipe->set_vertex_elements(pipe, num_elements, elements);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_surface_copy(struct pipe_context *_pipe,
+ boolean do_flip,
+ struct pipe_surface *dest,
+ unsigned destx, unsigned desty,
+ struct pipe_surface *src,
+ unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ dest = trace_surface_unwrap(tr_ctx, dest);
+ src = trace_surface_unwrap(tr_ctx, src);
+
+ trace_dump_call_begin("pipe_context", "surface_copy");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(bool, do_flip);
+ trace_dump_arg(ptr, dest);
+ trace_dump_arg(uint, destx);
+ trace_dump_arg(uint, desty);
+ trace_dump_arg(ptr, src);
+ trace_dump_arg(uint, srcx);
+ trace_dump_arg(uint, srcy);
+ trace_dump_arg(uint, width);
+ trace_dump_arg(uint, height);
+
+ pipe->surface_copy(pipe, do_flip,
+ dest, destx, desty,
+ src, srcx, srcy, width, height);
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_surface_fill(struct pipe_context *_pipe,
+ struct pipe_surface *dst,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height,
+ unsigned value)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ dst = trace_surface_unwrap(tr_ctx, dst);
+
+ trace_dump_call_begin("pipe_context", "surface_fill");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, dst);
+ trace_dump_arg(uint, dstx);
+ trace_dump_arg(uint, dsty);
+ trace_dump_arg(uint, width);
+ trace_dump_arg(uint, height);
+
+ pipe->surface_fill(pipe, dst, dstx, dsty, width, height, value);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_clear(struct pipe_context *_pipe,
+ struct pipe_surface *surface,
+ unsigned clearValue)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ surface = trace_surface_unwrap(tr_ctx, surface);
+
+ trace_dump_call_begin("pipe_context", "clear");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, surface);
+ trace_dump_arg(uint, clearValue);
+
+ pipe->clear(pipe, surface, clearValue);;
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_flush(struct pipe_context *_pipe,
+ unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "flush");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(uint, flags);
+
+ pipe->flush(pipe, flags, fence);;
+
+ if(fence)
+ trace_dump_ret(ptr, *fence);
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_destroy(struct pipe_context *_pipe)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "destroy");
+
+ trace_dump_arg(ptr, pipe);
+
+ pipe->destroy(pipe);
+
+ trace_dump_call_end();
+
+ FREE(tr_ctx);
+}
+
+
+struct pipe_context *
+trace_context_create(struct pipe_screen *screen,
+ struct pipe_context *pipe)
+{
+ struct trace_context *tr_ctx;
+
+ if(!pipe)
+ goto error1;
+
+ if(!trace_dump_enabled())
+ goto error1;
+
+ tr_ctx = CALLOC_STRUCT(trace_context);
+ if(!tr_ctx)
+ goto error1;
+
+ tr_ctx->base.winsys = screen->winsys;
+ tr_ctx->base.screen = screen;
+ tr_ctx->base.destroy = trace_context_destroy;
+ tr_ctx->base.set_edgeflags = trace_context_set_edgeflags;
+ tr_ctx->base.draw_arrays = trace_context_draw_arrays;
+ tr_ctx->base.draw_elements = trace_context_draw_elements;
+ tr_ctx->base.draw_range_elements = trace_context_draw_range_elements;
+ tr_ctx->base.create_query = trace_context_create_query;
+ tr_ctx->base.destroy_query = trace_context_destroy_query;
+ tr_ctx->base.begin_query = trace_context_begin_query;
+ tr_ctx->base.end_query = trace_context_end_query;
+ tr_ctx->base.get_query_result = trace_context_get_query_result;
+ tr_ctx->base.create_blend_state = trace_context_create_blend_state;
+ tr_ctx->base.bind_blend_state = trace_context_bind_blend_state;
+ tr_ctx->base.delete_blend_state = trace_context_delete_blend_state;
+ tr_ctx->base.create_sampler_state = trace_context_create_sampler_state;
+ tr_ctx->base.bind_sampler_states = trace_context_bind_sampler_states;
+ tr_ctx->base.delete_sampler_state = trace_context_delete_sampler_state;
+ tr_ctx->base.create_rasterizer_state = trace_context_create_rasterizer_state;
+ tr_ctx->base.bind_rasterizer_state = trace_context_bind_rasterizer_state;
+ tr_ctx->base.delete_rasterizer_state = trace_context_delete_rasterizer_state;
+ tr_ctx->base.create_depth_stencil_alpha_state = trace_context_create_depth_stencil_alpha_state;
+ tr_ctx->base.bind_depth_stencil_alpha_state = trace_context_bind_depth_stencil_alpha_state;
+ tr_ctx->base.delete_depth_stencil_alpha_state = trace_context_delete_depth_stencil_alpha_state;
+ tr_ctx->base.create_fs_state = trace_context_create_fs_state;
+ tr_ctx->base.bind_fs_state = trace_context_bind_fs_state;
+ tr_ctx->base.delete_fs_state = trace_context_delete_fs_state;
+ tr_ctx->base.create_vs_state = trace_context_create_vs_state;
+ tr_ctx->base.bind_vs_state = trace_context_bind_vs_state;
+ tr_ctx->base.delete_vs_state = trace_context_delete_vs_state;
+ tr_ctx->base.set_blend_color = trace_context_set_blend_color;
+ tr_ctx->base.set_clip_state = trace_context_set_clip_state;
+ tr_ctx->base.set_constant_buffer = trace_context_set_constant_buffer;
+ tr_ctx->base.set_framebuffer_state = trace_context_set_framebuffer_state;
+ tr_ctx->base.set_polygon_stipple = trace_context_set_polygon_stipple;
+ tr_ctx->base.set_scissor_state = trace_context_set_scissor_state;
+ tr_ctx->base.set_viewport_state = trace_context_set_viewport_state;
+ tr_ctx->base.set_sampler_textures = trace_context_set_sampler_textures;
+ tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers;
+ tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements;
+ tr_ctx->base.surface_copy = trace_context_surface_copy;
+ tr_ctx->base.surface_fill = trace_context_surface_fill;
+ tr_ctx->base.clear = trace_context_clear;
+ tr_ctx->base.flush = trace_context_flush;
+
+ tr_ctx->pipe = pipe;
+
+ trace_dump_call_begin("", "pipe_context_create");
+ trace_dump_arg_begin("screen");
+ trace_dump_ptr(pipe->screen);
+ trace_dump_arg_end();
+ trace_dump_ret(ptr, pipe);
+ trace_dump_call_end();
+
+ return &tr_ctx->base;
+
+error1:
+ return pipe;
+}
diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h
new file mode 100644
index 0000000000..7831900ec2
--- /dev/null
+++ b/src/gallium/drivers/trace/tr_context.h
@@ -0,0 +1,68 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef TR_CONTEXT_H_
+#define TR_CONTEXT_H_
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_debug.h"
+#include "pipe/p_context.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+struct trace_context
+{
+ struct pipe_context base;
+
+ struct pipe_context *pipe;
+};
+
+
+static INLINE struct trace_context *
+trace_context(struct pipe_context *pipe)
+{
+ assert(pipe);
+ return (struct trace_context *)pipe;
+}
+
+
+
+struct pipe_context *
+trace_context_create(struct pipe_screen *screen,
+ struct pipe_context *pipe);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* TR_CONTEXT_H_ */
diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c
new file mode 100644
index 0000000000..a0ead0ded3
--- /dev/null
+++ b/src/gallium/drivers/trace/tr_dump.c
@@ -0,0 +1,404 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Trace dumping functions.
+ *
+ * For now we just use standard XML for dumping the trace calls, as this is
+ * simple to write, parse, and visually inspect, but the actual representation
+ * is abstracted out of this file, so that we can switch to a binary
+ * representation if/when it becomes justified.
+ *
+ * @author Jose Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+#include "pipe/p_config.h"
+
+#if defined(PIPE_OS_LINUX)
+#include <stdlib.h>
+#endif
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_debug.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+#include "util/u_stream.h"
+
+#include "tr_dump.h"
+
+
+static struct util_stream *stream = NULL;
+static unsigned refcount = 0;
+
+
+static INLINE void
+trace_dump_write(const char *buf, size_t size)
+{
+ if(stream)
+ util_stream_write(stream, buf, size);
+}
+
+
+static INLINE void
+trace_dump_writes(const char *s)
+{
+ trace_dump_write(s, strlen(s));
+}
+
+
+static INLINE void
+trace_dump_writef(const char *format, ...)
+{
+ static char buf[1024];
+ unsigned len;
+ va_list ap;
+ va_start(ap, format);
+ len = util_vsnprintf(buf, sizeof(buf), format, ap);
+ va_end(ap);
+ trace_dump_write(buf, len);
+}
+
+
+static INLINE void
+trace_dump_escape(const char *str)
+{
+ const unsigned char *p = (const unsigned char *)str;
+ unsigned char c;
+ while((c = *p++) != 0) {
+ if(c == '<')
+ trace_dump_writes("&lt;");
+ else if(c == '>')
+ trace_dump_writes("&gt;");
+ else if(c == '&')
+ trace_dump_writes("&amp;");
+ else if(c == '\'')
+ trace_dump_writes("&apos;");
+ else if(c == '\"')
+ trace_dump_writes("&quot;");
+ else if(c >= 0x20 && c <= 0x7e)
+ trace_dump_writef("%c", c);
+ else
+ trace_dump_writef("&#%u;", c);
+ }
+}
+
+
+static INLINE void
+trace_dump_indent(unsigned level)
+{
+ unsigned i;
+ for(i = 0; i < level; ++i)
+ trace_dump_writes("\t");
+}
+
+
+static INLINE void
+trace_dump_newline(void)
+{
+ trace_dump_writes("\n");
+}
+
+
+static INLINE void
+trace_dump_tag(const char *name)
+{
+ trace_dump_writes("<");
+ trace_dump_writes(name);
+ trace_dump_writes("/>");
+}
+
+
+static INLINE void
+trace_dump_tag_begin(const char *name)
+{
+ trace_dump_writes("<");
+ trace_dump_writes(name);
+ trace_dump_writes(">");
+}
+
+static INLINE void
+trace_dump_tag_begin1(const char *name,
+ const char *attr1, const char *value1)
+{
+ trace_dump_writes("<");
+ trace_dump_writes(name);
+ trace_dump_writes(" ");
+ trace_dump_writes(attr1);
+ trace_dump_writes("='");
+ trace_dump_escape(value1);
+ trace_dump_writes("'>");
+}
+
+
+static INLINE void
+trace_dump_tag_begin2(const char *name,
+ const char *attr1, const char *value1,
+ const char *attr2, const char *value2)
+{
+ trace_dump_writes("<");
+ trace_dump_writes(name);
+ trace_dump_writes(" ");
+ trace_dump_writes(attr1);
+ trace_dump_writes("=\'");
+ trace_dump_escape(value1);
+ trace_dump_writes("\' ");
+ trace_dump_writes(attr2);
+ trace_dump_writes("=\'");
+ trace_dump_escape(value2);
+ trace_dump_writes("\'>");
+}
+
+
+static INLINE void
+trace_dump_tag_begin3(const char *name,
+ const char *attr1, const char *value1,
+ const char *attr2, const char *value2,
+ const char *attr3, const char *value3)
+{
+ trace_dump_writes("<");
+ trace_dump_writes(name);
+ trace_dump_writes(" ");
+ trace_dump_writes(attr1);
+ trace_dump_writes("=\'");
+ trace_dump_escape(value1);
+ trace_dump_writes("\' ");
+ trace_dump_writes(attr2);
+ trace_dump_writes("=\'");
+ trace_dump_escape(value2);
+ trace_dump_writes("\' ");
+ trace_dump_writes(attr3);
+ trace_dump_writes("=\'");
+ trace_dump_escape(value3);
+ trace_dump_writes("\'>");
+}
+
+
+static INLINE void
+trace_dump_tag_end(const char *name)
+{
+ trace_dump_writes("</");
+ trace_dump_writes(name);
+ trace_dump_writes(">");
+}
+
+static void
+trace_dump_trace_close(void)
+{
+ if(stream) {
+ trace_dump_writes("</trace>\n");
+ util_stream_close(stream);
+ stream = NULL;
+ refcount = 0;
+ }
+}
+
+boolean trace_dump_trace_begin()
+{
+ const char *filename;
+
+ filename = debug_get_option("GALLIUM_TRACE", NULL);
+ if(!filename)
+ return FALSE;
+
+ if(!stream) {
+
+ stream = util_stream_create(filename, 0);
+ if(!stream)
+ return FALSE;
+
+ trace_dump_writes("<?xml version='1.0' encoding='UTF-8'?>\n");
+ trace_dump_writes("<?xml-stylesheet type='text/xsl' href='trace.xsl'?>\n");
+ trace_dump_writes("<trace version='0.1'>\n");
+
+#if defined(PIPE_OS_LINUX)
+ /* Linux applications rarely cleanup GL / Gallium resources so catch
+ * application exit here */
+ atexit(trace_dump_trace_close);
+#endif
+ }
+
+ ++refcount;
+
+ return TRUE;
+}
+
+boolean trace_dump_enabled(void)
+{
+ return stream ? TRUE : FALSE;
+}
+
+void trace_dump_trace_end(void)
+{
+ if(stream)
+ if(!--refcount)
+ trace_dump_trace_close();
+}
+
+void trace_dump_call_begin(const char *klass, const char *method)
+{
+ trace_dump_indent(1);
+ trace_dump_tag_begin2("call", "class", klass, "method", method);
+ trace_dump_newline();
+}
+
+void trace_dump_call_end(void)
+{
+ trace_dump_indent(1);
+ trace_dump_tag_end("call");
+ trace_dump_newline();
+ util_stream_flush(stream);
+}
+
+void trace_dump_arg_begin(const char *name)
+{
+ trace_dump_indent(2);
+ trace_dump_tag_begin1("arg", "name", name);
+}
+
+void trace_dump_arg_end(void)
+{
+ trace_dump_tag_end("arg");
+ trace_dump_newline();
+}
+
+void trace_dump_ret_begin(void)
+{
+ trace_dump_indent(2);
+ trace_dump_tag_begin("ret");
+}
+
+void trace_dump_ret_end(void)
+{
+ trace_dump_tag_end("ret");
+ trace_dump_newline();
+}
+
+void trace_dump_bool(int value)
+{
+ trace_dump_writef("<bool>%c</bool>", value ? '1' : '0');
+}
+
+void trace_dump_int(long long int value)
+{
+ trace_dump_writef("<int>%lli</int>", value);
+}
+
+void trace_dump_uint(long long unsigned value)
+{
+ trace_dump_writef("<uint>%llu</uint>", value);
+}
+
+void trace_dump_float(double value)
+{
+ trace_dump_writef("<float>%g</float>", value);
+}
+
+void trace_dump_bytes(const void *data,
+ long unsigned size)
+{
+ static const char hex_table[16] = "0123456789ABCDEF";
+ const uint8_t *p = data;
+ long unsigned i;
+ trace_dump_writes("<bytes>");
+ for(i = 0; i < size; ++i) {
+ uint8_t byte = *p++;
+ char hex[2];
+ hex[0] = hex_table[byte >> 4];
+ hex[1] = hex_table[byte & 0xf];
+ trace_dump_write(hex, 2);
+ }
+ trace_dump_writes("</bytes>");
+}
+
+void trace_dump_string(const char *str)
+{
+ trace_dump_writes("<string>");
+ trace_dump_escape(str);
+ trace_dump_writes("</string>");
+}
+
+void trace_dump_enum(const char *value)
+{
+ trace_dump_writes("<enum>");
+ trace_dump_escape(value);
+ trace_dump_writes("</enum>");
+}
+
+void trace_dump_array_begin(void)
+{
+ trace_dump_writes("<array>");
+}
+
+void trace_dump_array_end(void)
+{
+ trace_dump_writes("</array>");
+}
+
+void trace_dump_elem_begin(void)
+{
+ trace_dump_writes("<elem>");
+}
+
+void trace_dump_elem_end(void)
+{
+ trace_dump_writes("</elem>");
+}
+
+void trace_dump_struct_begin(const char *name)
+{
+ trace_dump_writef("<struct name='%s'>", name);
+}
+
+void trace_dump_struct_end(void)
+{
+ trace_dump_writes("</struct>");
+}
+
+void trace_dump_member_begin(const char *name)
+{
+ trace_dump_writef("<member name='%s'>", name);
+}
+
+void trace_dump_member_end(void)
+{
+ trace_dump_writes("</member>");
+}
+
+void trace_dump_null(void)
+{
+ trace_dump_writes("<null/>");
+}
+
+void trace_dump_ptr(const void *value)
+{
+ if(value)
+ trace_dump_writef("<ptr>0x%08lx</ptr>", (unsigned long)(uintptr_t)value);
+ else
+ trace_dump_null();
+}
diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h
new file mode 100644
index 0000000000..76a53731b3
--- /dev/null
+++ b/src/gallium/drivers/trace/tr_dump.h
@@ -0,0 +1,132 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Trace data dumping primitives.
+ */
+
+#ifndef TR_DUMP_H
+#define TR_DUMP_H
+
+
+#include "pipe/p_compiler.h"
+
+
+boolean trace_dump_trace_begin(void);
+boolean trace_dump_enabled(void);
+void trace_dump_trace_end(void);
+void trace_dump_call_begin(const char *klass, const char *method);
+void trace_dump_call_end(void);
+void trace_dump_arg_begin(const char *name);
+void trace_dump_arg_end(void);
+void trace_dump_ret_begin(void);
+void trace_dump_ret_end(void);
+void trace_dump_bool(int value);
+void trace_dump_int(long long int value);
+void trace_dump_uint(long long unsigned value);
+void trace_dump_float(double value);
+void trace_dump_bytes(const void *data, long unsigned size);
+void trace_dump_string(const char *str);
+void trace_dump_enum(const char *value);
+void trace_dump_array_begin(void);
+void trace_dump_array_end(void);
+void trace_dump_elem_begin(void);
+void trace_dump_elem_end(void);
+void trace_dump_struct_begin(const char *name);
+void trace_dump_struct_end(void);
+void trace_dump_member_begin(const char *name);
+void trace_dump_member_end(void);
+void trace_dump_null(void);
+void trace_dump_ptr(const void *value);
+
+
+/*
+ * Code saving macros.
+ */
+
+#define trace_dump_arg(_type, _arg) \
+ do { \
+ trace_dump_arg_begin(#_arg); \
+ trace_dump_##_type(_arg); \
+ trace_dump_arg_end(); \
+ } while(0)
+
+#define trace_dump_ret(_type, _arg) \
+ do { \
+ trace_dump_ret_begin(); \
+ trace_dump_##_type(_arg); \
+ trace_dump_ret_end(); \
+ } while(0)
+
+#define trace_dump_array(_type, _obj, _size) \
+ do { \
+ unsigned long idx; \
+ trace_dump_array_begin(); \
+ for(idx = 0; idx < (_size); ++idx) { \
+ trace_dump_elem_begin(); \
+ trace_dump_##_type((_obj)[idx]); \
+ trace_dump_elem_end(); \
+ } \
+ trace_dump_array_end(); \
+ } while(0)
+
+#define trace_dump_struct_array(_type, _obj, _size) \
+ do { \
+ unsigned long idx; \
+ trace_dump_array_begin(); \
+ for(idx = 0; idx < (_size); ++idx) { \
+ trace_dump_elem_begin(); \
+ trace_dump_##_type(&(_obj)[idx]); \
+ trace_dump_elem_end(); \
+ } \
+ trace_dump_array_end(); \
+ } while(0)
+
+#define trace_dump_member(_type, _obj, _member) \
+ do { \
+ trace_dump_member_begin(#_member); \
+ trace_dump_##_type((_obj)->_member); \
+ trace_dump_member_end(); \
+ } while(0)
+
+#define trace_dump_arg_array(_type, _arg, _size) \
+ do { \
+ trace_dump_arg_begin(#_arg); \
+ trace_dump_array(_type, _arg, _size); \
+ trace_dump_arg_end(); \
+ } while(0)
+
+#define trace_dump_member_array(_type, _obj, _member) \
+ do { \
+ trace_dump_member_begin(#_member); \
+ trace_dump_array(_type, (_obj)->_member, sizeof((_obj)->_member)/sizeof((_obj)->_member[0])); \
+ trace_dump_member_end(); \
+ } while(0)
+
+
+#endif /* TR_DUMP_H */
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
new file mode 100644
index 0000000000..8789f86b1a
--- /dev/null
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -0,0 +1,469 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_memory.h"
+
+#include "tr_dump.h"
+#include "tr_state.h"
+#include "tr_winsys.h"
+#include "tr_texture.h"
+#include "tr_screen.h"
+
+
+static const char *
+trace_screen_get_name(struct pipe_screen *_screen)
+{
+ struct trace_screen *tr_scr = trace_screen(_screen);
+ struct pipe_screen *screen = tr_scr->screen;
+ const char *result;
+
+ trace_dump_call_begin("pipe_screen", "get_name");
+
+ trace_dump_arg(ptr, screen);
+
+ result = screen->get_name(screen);
+
+ trace_dump_ret(string, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static const char *
+trace_screen_get_vendor(struct pipe_screen *_screen)
+{
+ struct trace_screen *tr_scr = trace_screen(_screen);
+ struct pipe_screen *screen = tr_scr->screen;
+ const char *result;
+
+ trace_dump_call_begin("pipe_screen", "get_vendor");
+
+ trace_dump_arg(ptr, screen);
+
+ result = screen->get_vendor(screen);
+
+ trace_dump_ret(string, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static int
+trace_screen_get_param(struct pipe_screen *_screen,
+ int param)
+{
+ struct trace_screen *tr_scr = trace_screen(_screen);
+ struct pipe_screen *screen = tr_scr->screen;
+ int result;
+
+ trace_dump_call_begin("pipe_screen", "get_param");
+
+ trace_dump_arg(ptr, screen);
+ trace_dump_arg(int, param);
+
+ result = screen->get_param(screen, param);
+
+ trace_dump_ret(int, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static float
+trace_screen_get_paramf(struct pipe_screen *_screen,
+ int param)
+{
+ struct trace_screen *tr_scr = trace_screen(_screen);
+ struct pipe_screen *screen = tr_scr->screen;
+ float result;
+
+ trace_dump_call_begin("pipe_screen", "get_paramf");
+
+ trace_dump_arg(ptr, screen);
+ trace_dump_arg(int, param);
+
+ result = screen->get_paramf(screen, param);
+
+ trace_dump_ret(float, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static boolean
+trace_screen_is_format_supported(struct pipe_screen *_screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage,
+ unsigned geom_flags)
+{
+ struct trace_screen *tr_scr = trace_screen(_screen);
+ struct pipe_screen *screen = tr_scr->screen;
+ boolean result;
+
+ trace_dump_call_begin("pipe_screen", "is_format_supported");
+
+ trace_dump_arg(ptr, screen);
+ trace_dump_arg(format, format);
+ trace_dump_arg(int, target);
+ trace_dump_arg(uint, tex_usage);
+ trace_dump_arg(uint, geom_flags);
+
+ result = screen->is_format_supported(screen, format, target, tex_usage, geom_flags);
+
+ trace_dump_ret(bool, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static struct pipe_texture *
+trace_screen_texture_create(struct pipe_screen *_screen,
+ const struct pipe_texture *templat)
+{
+ struct trace_screen *tr_scr = trace_screen(_screen);
+ struct pipe_screen *screen = tr_scr->screen;
+ struct pipe_texture *result;
+
+ trace_dump_call_begin("pipe_screen", "texture_create");
+
+ trace_dump_arg(ptr, screen);
+ trace_dump_arg(template, templat);
+
+ result = screen->texture_create(screen, templat);
+
+ trace_dump_ret(ptr, result);
+
+ trace_dump_call_end();
+
+ result = trace_texture_create(tr_scr, result);
+
+ return result;
+}
+
+
+static struct pipe_texture *
+trace_screen_texture_blanket(struct pipe_screen *_screen,
+ const struct pipe_texture *templat,
+ const unsigned *ppitch,
+ struct pipe_buffer *buffer)
+{
+ struct trace_screen *tr_scr = trace_screen(_screen);
+ struct pipe_screen *screen = tr_scr->screen;
+ unsigned pitch = *ppitch;
+ struct pipe_texture *result;
+
+ trace_dump_call_begin("pipe_screen", "texture_blanket");
+
+ trace_dump_arg(ptr, screen);
+ trace_dump_arg(template, templat);
+ trace_dump_arg(uint, pitch);
+ trace_dump_arg(ptr, buffer);
+
+ result = screen->texture_blanket(screen, templat, ppitch, buffer);
+
+ trace_dump_ret(ptr, result);
+
+ trace_dump_call_end();
+
+ result = trace_texture_create(tr_scr, result);
+
+ return result;
+}
+
+
+static void
+trace_screen_texture_release(struct pipe_screen *_screen,
+ struct pipe_texture **ptexture)
+{
+ struct trace_screen *tr_scr = trace_screen(_screen);
+ struct pipe_screen *screen = tr_scr->screen;
+ struct trace_texture *tr_tex;
+ struct pipe_texture *texture;
+
+ assert(ptexture);
+ if(*ptexture) {
+ tr_tex = trace_texture(tr_scr, *ptexture);
+ texture = tr_tex->texture;
+ assert(texture->screen == screen);
+ }
+ else
+ texture = NULL;
+
+ if (*ptexture) {
+ if (!--(*ptexture)->refcount) {
+ trace_dump_call_begin("pipe_screen", "texture_destroy");
+
+ trace_dump_arg(ptr, screen);
+ trace_dump_arg(ptr, texture);
+
+ trace_texture_destroy(tr_scr, *ptexture);
+
+ trace_dump_call_end();
+ }
+
+ *ptexture = NULL;
+ }
+}
+
+
+static struct pipe_surface *
+trace_screen_get_tex_surface(struct pipe_screen *_screen,
+ struct pipe_texture *texture,
+ unsigned face, unsigned level,
+ unsigned zslice,
+ unsigned usage)
+{
+ struct trace_screen *tr_scr = trace_screen(_screen);
+ struct pipe_screen *screen = tr_scr->screen;
+ struct trace_texture *tr_tex;
+ struct pipe_surface *result;
+
+ assert(texture);
+ tr_tex = trace_texture(tr_scr, texture);
+ texture = tr_tex->texture;
+ assert(texture->screen == screen);
+
+ trace_dump_call_begin("pipe_screen", "get_tex_surface");
+
+ trace_dump_arg(ptr, screen);
+ trace_dump_arg(ptr, texture);
+ trace_dump_arg(uint, face);
+ trace_dump_arg(uint, level);
+ trace_dump_arg(uint, zslice);
+ trace_dump_arg(uint, usage);
+
+ result = screen->get_tex_surface(screen, texture, face, level, zslice, usage);
+
+ trace_dump_ret(ptr, result);
+
+ trace_dump_call_end();
+
+ result = trace_surface_create(tr_tex, result);
+
+ return result;
+}
+
+
+static void
+trace_screen_tex_surface_release(struct pipe_screen *_screen,
+ struct pipe_surface **psurface)
+{
+ struct trace_screen *tr_scr = trace_screen(_screen);
+ struct pipe_screen *screen = tr_scr->screen;
+ struct trace_texture *tr_tex;
+ struct trace_surface *tr_surf;
+ struct pipe_surface *surface;
+
+ assert(psurface);
+ if(*psurface) {
+ tr_tex = trace_texture(tr_scr, (*psurface)->texture);
+ tr_surf = trace_surface(tr_tex, *psurface);
+ surface = tr_surf->surface;
+ }
+ else
+ surface = NULL;
+
+ if (*psurface) {
+ if (!--(*psurface)->refcount) {
+ trace_dump_call_begin("pipe_screen", "tex_surface_destroy");
+
+ trace_dump_arg(ptr, screen);
+ trace_dump_arg(ptr, surface);
+
+ trace_surface_destroy(tr_tex, *psurface);
+
+ trace_dump_call_end();
+ }
+
+ *psurface = NULL;
+ }
+}
+
+
+static void *
+trace_screen_surface_map(struct pipe_screen *_screen,
+ struct pipe_surface *surface,
+ unsigned flags)
+{
+ struct trace_screen *tr_scr = trace_screen(_screen);
+ struct pipe_screen *screen = tr_scr->screen;
+ struct trace_texture *tr_tex;
+ struct trace_surface *tr_surf;
+ void *map;
+
+ tr_tex = trace_texture(tr_scr, surface->texture);
+ tr_surf = trace_surface(tr_tex, surface);
+ surface = tr_surf->surface;
+
+ map = screen->surface_map(screen, surface, flags);
+ if(map) {
+ if(flags & PIPE_BUFFER_USAGE_CPU_WRITE) {
+ assert(!tr_surf->map);
+ tr_surf->map = map;
+ }
+ }
+
+ return map;
+}
+
+
+static void
+trace_screen_surface_unmap(struct pipe_screen *_screen,
+ struct pipe_surface *surface)
+{
+ struct trace_screen *tr_scr = trace_screen(_screen);
+ struct pipe_screen *screen = tr_scr->screen;
+ struct trace_texture *tr_tex;
+ struct trace_surface *tr_surf;
+
+ tr_tex = trace_texture(tr_scr, surface->texture);
+ tr_surf = trace_surface(tr_tex, surface);
+ surface = tr_surf->surface;
+
+ if(tr_surf->map) {
+ size_t size = surface->nblocksy * surface->stride;
+
+ trace_dump_call_begin("pipe_winsys", "surface_write");
+
+ trace_dump_arg(ptr, screen);
+
+ trace_dump_arg(ptr, surface);
+
+ trace_dump_arg_begin("data");
+ trace_dump_bytes(tr_surf->map, size);
+ trace_dump_arg_end();
+
+ trace_dump_arg_begin("stride");
+ trace_dump_uint(surface->stride);
+ trace_dump_arg_end();
+
+ trace_dump_arg_begin("size");
+ trace_dump_uint(size);
+ trace_dump_arg_end();
+
+ trace_dump_call_end();
+
+ tr_surf->map = NULL;
+ }
+
+ screen->surface_unmap(screen, surface);
+}
+
+
+static void
+trace_screen_destroy(struct pipe_screen *_screen)
+{
+ struct trace_screen *tr_scr = trace_screen(_screen);
+ struct pipe_screen *screen = tr_scr->screen;
+
+ trace_dump_call_begin("pipe_screen", "destroy");
+
+ trace_dump_arg(ptr, screen);
+
+ screen->destroy(screen);
+
+ trace_dump_call_end();
+
+ trace_dump_trace_end();
+
+ FREE(tr_scr);
+}
+
+
+struct pipe_screen *
+trace_screen_create(struct pipe_screen *screen)
+{
+ struct trace_screen *tr_scr;
+ struct pipe_winsys *winsys;
+
+ if(!screen)
+ goto error1;
+
+ if(!trace_dump_trace_begin())
+ goto error1;
+
+ tr_scr = CALLOC_STRUCT(trace_screen);
+ if(!tr_scr)
+ goto error2;
+
+ winsys = trace_winsys_create(screen->winsys);
+ if(!winsys)
+ goto error3;
+
+ tr_scr->base.winsys = winsys;
+ tr_scr->base.destroy = trace_screen_destroy;
+ tr_scr->base.get_name = trace_screen_get_name;
+ tr_scr->base.get_vendor = trace_screen_get_vendor;
+ tr_scr->base.get_param = trace_screen_get_param;
+ tr_scr->base.get_paramf = trace_screen_get_paramf;
+ tr_scr->base.is_format_supported = trace_screen_is_format_supported;
+ tr_scr->base.texture_create = trace_screen_texture_create;
+ tr_scr->base.texture_blanket = trace_screen_texture_blanket;
+ tr_scr->base.texture_release = trace_screen_texture_release;
+ tr_scr->base.get_tex_surface = trace_screen_get_tex_surface;
+ tr_scr->base.tex_surface_release = trace_screen_tex_surface_release;
+ tr_scr->base.surface_map = trace_screen_surface_map;
+ tr_scr->base.surface_unmap = trace_screen_surface_unmap;
+
+ tr_scr->screen = screen;
+
+ trace_dump_call_begin("", "pipe_screen_create");
+ trace_dump_arg_begin("winsys");
+ trace_dump_ptr(screen->winsys);
+ trace_dump_arg_end();
+ trace_dump_ret(ptr, screen);
+ trace_dump_call_end();
+
+ return &tr_scr->base;
+
+error3:
+ FREE(tr_scr);
+error2:
+ trace_dump_trace_end();
+error1:
+ return screen;
+}
+
+
+struct trace_screen *
+trace_screen(struct pipe_screen *screen)
+{
+ assert(screen);
+ assert(screen->destroy == trace_screen_destroy);
+ return (struct trace_screen *)screen;
+}
diff --git a/src/gallium/drivers/trace/tr_screen.h b/src/gallium/drivers/trace/tr_screen.h
new file mode 100644
index 0000000000..93fefdb9a5
--- /dev/null
+++ b/src/gallium/drivers/trace/tr_screen.h
@@ -0,0 +1,60 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef TR_SCREEN_H_
+#define TR_SCREEN_H_
+
+
+#include "pipe/p_screen.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+struct trace_screen
+{
+ struct pipe_screen base;
+
+ struct pipe_screen *screen;
+};
+
+
+struct trace_screen *
+trace_screen(struct pipe_screen *screen);
+
+
+struct pipe_screen *
+trace_screen_create(struct pipe_screen *screen);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* TR_SCREEN_H_ */
diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c
new file mode 100644
index 0000000000..986d939e0c
--- /dev/null
+++ b/src/gallium/drivers/trace/tr_state.c
@@ -0,0 +1,464 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_compiler.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "tr_dump.h"
+#include "tr_state.h"
+
+
+void trace_dump_format(enum pipe_format format)
+{
+ trace_dump_enum(pf_name(format) );
+}
+
+
+void trace_dump_block(const struct pipe_format_block *block)
+{
+ trace_dump_struct_begin("pipe_format_block");
+ trace_dump_member(uint, block, size);
+ trace_dump_member(uint, block, width);
+ trace_dump_member(uint, block, height);
+ trace_dump_struct_end();
+}
+
+
+void trace_dump_template(const struct pipe_texture *templat)
+{
+ if(!templat) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_texture");
+
+ trace_dump_member(int, templat, target);
+ trace_dump_member(format, templat, format);
+
+ trace_dump_member_begin("width");
+ trace_dump_array(uint, templat->width, 1);
+ trace_dump_member_end();
+
+ trace_dump_member_begin("height");
+ trace_dump_array(uint, templat->height, 1);
+ trace_dump_member_end();
+
+ trace_dump_member_begin("depth");
+ trace_dump_array(uint, templat->depth, 1);
+ trace_dump_member_end();
+
+ trace_dump_member_begin("block");
+ trace_dump_block(&templat->block);
+ trace_dump_member_end();
+
+ trace_dump_member(uint, templat, last_level);
+ trace_dump_member(uint, templat, tex_usage);
+
+ trace_dump_struct_end();
+}
+
+
+void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state)
+{
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_rasterizer_state");
+
+ trace_dump_member(bool, state, flatshade);
+ trace_dump_member(bool, state, light_twoside);
+ trace_dump_member(uint, state, front_winding);
+ trace_dump_member(uint, state, cull_mode);
+ trace_dump_member(uint, state, fill_cw);
+ trace_dump_member(uint, state, fill_ccw);
+ trace_dump_member(bool, state, offset_cw);
+ trace_dump_member(bool, state, offset_ccw);
+ trace_dump_member(bool, state, scissor);
+ trace_dump_member(bool, state, poly_smooth);
+ trace_dump_member(bool, state, poly_stipple_enable);
+ trace_dump_member(bool, state, point_smooth);
+ trace_dump_member(bool, state, point_sprite);
+ trace_dump_member(bool, state, point_size_per_vertex);
+ trace_dump_member(bool, state, multisample);
+ trace_dump_member(bool, state, line_smooth);
+ trace_dump_member(bool, state, line_stipple_enable);
+ trace_dump_member(uint, state, line_stipple_factor);
+ trace_dump_member(uint, state, line_stipple_pattern);
+ trace_dump_member(bool, state, line_last_pixel);
+ trace_dump_member(bool, state, bypass_clipping);
+ trace_dump_member(bool, state, bypass_vs);
+ trace_dump_member(bool, state, origin_lower_left);
+ trace_dump_member(bool, state, flatshade_first);
+ trace_dump_member(bool, state, gl_rasterization_rules);
+
+ trace_dump_member(float, state, line_width);
+ trace_dump_member(float, state, point_size);
+ trace_dump_member(float, state, point_size_min);
+ trace_dump_member(float, state, point_size_max);
+ trace_dump_member(float, state, offset_units);
+ trace_dump_member(float, state, offset_scale);
+
+ trace_dump_member_array(uint, state, sprite_coord_mode);
+
+ trace_dump_struct_end();
+}
+
+
+void trace_dump_poly_stipple(const struct pipe_poly_stipple *state)
+{
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_poly_stipple");
+
+ trace_dump_member_begin("stipple");
+ trace_dump_array(uint,
+ state->stipple,
+ Elements(state->stipple));
+ trace_dump_member_end();
+
+ trace_dump_struct_end();
+}
+
+
+void trace_dump_viewport_state(const struct pipe_viewport_state *state)
+{
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_viewport_state");
+
+ trace_dump_member_array(float, state, scale);
+ trace_dump_member_array(float, state, translate);
+
+ trace_dump_struct_end();
+}
+
+
+void trace_dump_scissor_state(const struct pipe_scissor_state *state)
+{
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_scissor_state");
+
+ trace_dump_member(uint, state, minx);
+ trace_dump_member(uint, state, miny);
+ trace_dump_member(uint, state, maxx);
+ trace_dump_member(uint, state, maxy);
+
+ trace_dump_struct_end();
+}
+
+
+void trace_dump_clip_state(const struct pipe_clip_state *state)
+{
+ unsigned i;
+
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_clip_state");
+
+ trace_dump_member_begin("ucp");
+ trace_dump_array_begin();
+ for(i = 0; i < PIPE_MAX_CLIP_PLANES; ++i) {
+ trace_dump_elem_begin();
+ trace_dump_array(float, state->ucp[i], 4);
+ trace_dump_elem_end();
+ }
+ trace_dump_array_end();
+ trace_dump_member_end();
+
+ trace_dump_member(uint, state, nr);
+
+ trace_dump_struct_end();
+}
+
+
+void trace_dump_constant_buffer(const struct pipe_constant_buffer *state)
+{
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_constant_buffer");
+
+ trace_dump_member(ptr, state, buffer);
+ trace_dump_member(uint, state, size);
+
+ trace_dump_struct_end();
+}
+
+
+void trace_dump_shader_state(const struct pipe_shader_state *state)
+{
+ static char str[8192];
+
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ tgsi_dump_str(state->tokens, 0, str, sizeof(str));
+
+ trace_dump_struct_begin("pipe_shader_state");
+
+ trace_dump_member_begin("tokens");
+ trace_dump_string(str);
+ trace_dump_member_end();
+
+ trace_dump_struct_end();
+}
+
+
+void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state)
+{
+ unsigned i;
+
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_depth_stencil_alpha_state");
+
+ trace_dump_member_begin("depth");
+ trace_dump_struct_begin("pipe_depth_state");
+ trace_dump_member(bool, &state->depth, enabled);
+ trace_dump_member(bool, &state->depth, writemask);
+ trace_dump_member(uint, &state->depth, func);
+ trace_dump_member(bool, &state->depth, occlusion_count);
+ trace_dump_struct_end();
+ trace_dump_member_end();
+
+ trace_dump_member_begin("stencil");
+ trace_dump_array_begin();
+ for(i = 0; i < Elements(state->stencil); ++i) {
+ trace_dump_elem_begin();
+ trace_dump_struct_begin("pipe_stencil_state");
+ trace_dump_member(bool, &state->stencil[i], enabled);
+ trace_dump_member(uint, &state->stencil[i], func);
+ trace_dump_member(uint, &state->stencil[i], fail_op);
+ trace_dump_member(uint, &state->stencil[i], zpass_op);
+ trace_dump_member(uint, &state->stencil[i], zfail_op);
+ trace_dump_member(uint, &state->stencil[i], ref_value);
+ trace_dump_member(uint, &state->stencil[i], value_mask);
+ trace_dump_member(uint, &state->stencil[i], write_mask);
+ trace_dump_struct_end();
+ trace_dump_elem_end();
+ }
+ trace_dump_array_end();
+ trace_dump_member_end();
+
+ trace_dump_member_begin("alpha");
+ trace_dump_struct_begin("pipe_alpha_state");
+ trace_dump_member(bool, &state->alpha, enabled);
+ trace_dump_member(uint, &state->alpha, func);
+ trace_dump_member(float, &state->alpha, ref);
+ trace_dump_struct_end();
+ trace_dump_member_end();
+
+ trace_dump_struct_end();
+}
+
+
+void trace_dump_blend_state(const struct pipe_blend_state *state)
+{
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_blend_state");
+
+ trace_dump_member(bool, state, blend_enable);
+
+ trace_dump_member(uint, state, rgb_func);
+ trace_dump_member(uint, state, rgb_src_factor);
+ trace_dump_member(uint, state, rgb_dst_factor);
+
+ trace_dump_member(uint, state, alpha_func);
+ trace_dump_member(uint, state, alpha_src_factor);
+ trace_dump_member(uint, state, alpha_dst_factor);
+
+ trace_dump_member(bool, state, logicop_enable);
+ trace_dump_member(uint, state, logicop_func);
+
+ trace_dump_member(uint, state, colormask);
+ trace_dump_member(bool, state, dither);
+
+ trace_dump_struct_end();
+}
+
+
+void trace_dump_blend_color(const struct pipe_blend_color *state)
+{
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_blend_color");
+
+ trace_dump_member_array(float, state, color);
+
+ trace_dump_struct_end();
+}
+
+
+void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state)
+{
+ trace_dump_struct_begin("pipe_framebuffer_state");
+
+ trace_dump_member(uint, state, width);
+ trace_dump_member(uint, state, height);
+ trace_dump_member(uint, state, num_cbufs);
+ trace_dump_member_array(ptr, state, cbufs);
+ trace_dump_member(ptr, state, zsbuf);
+
+ trace_dump_struct_end();
+}
+
+
+void trace_dump_sampler_state(const struct pipe_sampler_state *state)
+{
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_sampler_state");
+
+ trace_dump_member(uint, state, wrap_s);
+ trace_dump_member(uint, state, wrap_t);
+ trace_dump_member(uint, state, wrap_r);
+ trace_dump_member(uint, state, min_img_filter);
+ trace_dump_member(uint, state, min_mip_filter);
+ trace_dump_member(uint, state, mag_img_filter);
+ trace_dump_member(bool, state, compare_mode);
+ trace_dump_member(uint, state, compare_func);
+ trace_dump_member(bool, state, normalized_coords);
+ trace_dump_member(uint, state, prefilter);
+ trace_dump_member(float, state, shadow_ambient);
+ trace_dump_member(float, state, lod_bias);
+ trace_dump_member(float, state, min_lod);
+ trace_dump_member(float, state, max_lod);
+ trace_dump_member_array(float, state, border_color);
+ trace_dump_member(float, state, max_anisotropy);
+
+ trace_dump_struct_end();
+}
+
+
+void trace_dump_surface(const struct pipe_surface *state)
+{
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_surface");
+
+ trace_dump_member(ptr, state, buffer);
+ trace_dump_member(format, state, format);
+ trace_dump_member(uint, state, status);
+ trace_dump_member(uint, state, clear_value);
+ trace_dump_member(uint, state, width);
+ trace_dump_member(uint, state, height);
+
+ trace_dump_member_begin("block");
+ trace_dump_block(&state->block);
+ trace_dump_member_end();
+
+ trace_dump_member(uint, state, nblocksx);
+ trace_dump_member(uint, state, nblocksy);
+ trace_dump_member(uint, state, stride);
+ trace_dump_member(uint, state, layout);
+ trace_dump_member(uint, state, offset);
+ trace_dump_member(uint, state, refcount);
+ trace_dump_member(uint, state, usage);
+
+ trace_dump_member(ptr, state, texture);
+ trace_dump_member(uint, state, face);
+ trace_dump_member(uint, state, level);
+ trace_dump_member(uint, state, zslice);
+
+ trace_dump_struct_end();
+}
+
+
+void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state)
+{
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_vertex_buffer");
+
+ trace_dump_member(uint, state, pitch);
+ trace_dump_member(uint, state, max_index);
+ trace_dump_member(uint, state, buffer_offset);
+ trace_dump_member(ptr, state, buffer);
+
+ trace_dump_struct_end();
+}
+
+
+void trace_dump_vertex_element(const struct pipe_vertex_element *state)
+{
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_vertex_element");
+
+ trace_dump_member(uint, state, src_offset);
+
+ trace_dump_member(uint, state, vertex_buffer_index);
+ trace_dump_member(uint, state, nr_components);
+
+ trace_dump_member(format, state, src_format);
+
+ trace_dump_struct_end();
+}
diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h
new file mode 100644
index 0000000000..5ae533dc66
--- /dev/null
+++ b/src/gallium/drivers/trace/tr_state.h
@@ -0,0 +1,76 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef TR_STATE_H
+#define TR_STATE_H
+
+#include "pipe/p_format.h"
+#include "pipe/p_state.h"
+#include "pipe/p_shader_tokens.h"
+
+
+void trace_dump_format(enum pipe_format format);
+
+void trace_dump_block(const struct pipe_format_block *block);
+
+void trace_dump_template(const struct pipe_texture *templat);
+
+
+void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state);
+
+void trace_dump_poly_stipple(const struct pipe_poly_stipple *state);
+
+void trace_dump_viewport_state(const struct pipe_viewport_state *state);
+
+void trace_dump_scissor_state(const struct pipe_scissor_state *state);
+
+void trace_dump_clip_state(const struct pipe_clip_state *state);
+
+void trace_dump_constant_buffer(const struct pipe_constant_buffer *state);
+
+void trace_dump_token(const struct tgsi_token *token);
+
+void trace_dump_shader_state(const struct pipe_shader_state *state);
+
+void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state);
+
+void trace_dump_blend_state(const struct pipe_blend_state *state);
+
+void trace_dump_blend_color(const struct pipe_blend_color *state);
+
+void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state);
+
+void trace_dump_sampler_state(const struct pipe_sampler_state *state);
+
+void trace_dump_surface(const struct pipe_surface *state);
+
+void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state);
+
+void trace_dump_vertex_element(const struct pipe_vertex_element *state);
+
+
+#endif /* TR_STATE_H */
diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c
new file mode 100644
index 0000000000..1cc4f0bd43
--- /dev/null
+++ b/src/gallium/drivers/trace/tr_texture.c
@@ -0,0 +1,111 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_inlines.h"
+#include "util/u_hash_table.h"
+#include "util/u_memory.h"
+
+#include "tr_screen.h"
+#include "tr_texture.h"
+
+
+struct pipe_texture *
+trace_texture_create(struct trace_screen *tr_scr,
+ struct pipe_texture *texture)
+{
+ struct trace_texture *tr_tex;
+
+ if(!texture)
+ goto error;
+
+ assert(texture->screen == tr_scr->screen);
+
+ tr_tex = CALLOC_STRUCT(trace_texture);
+ if(!tr_tex)
+ goto error;
+
+ memcpy(&tr_tex->base, texture, sizeof(struct pipe_texture));
+ tr_tex->base.screen = &tr_scr->base;
+ tr_tex->texture = texture;
+
+ return &tr_tex->base;
+
+error:
+ pipe_texture_reference(&texture, NULL);
+ return NULL;
+}
+
+
+void
+trace_texture_destroy(struct trace_screen *tr_scr,
+ struct pipe_texture *texture)
+{
+ struct trace_texture *tr_tex = trace_texture(tr_scr, texture);
+ pipe_texture_reference(&tr_tex->texture, NULL);
+ FREE(tr_tex);
+}
+
+
+struct pipe_surface *
+trace_surface_create(struct trace_texture *tr_tex,
+ struct pipe_surface *surface)
+{
+ struct trace_surface *tr_surf;
+
+ if(!surface)
+ goto error;
+
+ assert(surface->texture == tr_tex->texture);
+
+ tr_surf = CALLOC_STRUCT(trace_surface);
+ if(!tr_surf)
+ goto error;
+
+ memcpy(&tr_surf->base, surface, sizeof(struct pipe_surface));
+
+ tr_surf->base.texture = NULL;
+ pipe_texture_reference(&tr_surf->base.texture, &tr_tex->base);
+ tr_surf->surface = surface;
+
+ return &tr_surf->base;
+
+error:
+ pipe_surface_reference(&surface, NULL);
+ return NULL;
+}
+
+
+void
+trace_surface_destroy(struct trace_texture *tr_tex,
+ struct pipe_surface *surface)
+{
+ struct trace_surface *tr_surf = trace_surface(tr_tex, surface);
+ pipe_texture_reference(&tr_surf->base.texture, NULL);
+ pipe_surface_reference(&tr_surf->surface, NULL);
+ FREE(tr_surf);
+}
+
diff --git a/src/gallium/drivers/trace/tr_texture.h b/src/gallium/drivers/trace/tr_texture.h
new file mode 100644
index 0000000000..9e72edb8a3
--- /dev/null
+++ b/src/gallium/drivers/trace/tr_texture.h
@@ -0,0 +1,95 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef TR_TEXTURE_H_
+#define TR_TEXTURE_H_
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+
+#include "tr_screen.h"
+
+
+struct trace_texture
+{
+ struct pipe_texture base;
+
+ struct pipe_texture *texture;
+};
+
+
+struct trace_surface
+{
+ struct pipe_surface base;
+
+ struct pipe_surface *surface;
+
+ void *map;
+};
+
+
+static INLINE struct trace_texture *
+trace_texture(struct trace_screen *tr_scr,
+ struct pipe_texture *texture)
+{
+ if(!texture)
+ return NULL;
+ assert(texture->screen == &tr_scr->base);
+ return (struct trace_texture *)texture;
+}
+
+
+static INLINE struct trace_surface *
+trace_surface(struct trace_texture *tr_tex,
+ struct pipe_surface *surface)
+{
+ if(!surface)
+ return NULL;
+ assert(surface->texture == &tr_tex->base);
+ return (struct trace_surface *)surface;
+}
+
+
+struct pipe_texture *
+trace_texture_create(struct trace_screen *tr_scr,
+ struct pipe_texture *texture);
+
+void
+trace_texture_destroy(struct trace_screen *tr_scr,
+ struct pipe_texture *texture);
+
+struct pipe_surface *
+trace_surface_create(struct trace_texture *tr_tex,
+ struct pipe_surface *surface);
+
+void
+trace_surface_destroy(struct trace_texture *tr_tex,
+ struct pipe_surface *surface);
+
+
+#endif /* TR_TEXTURE_H_ */
diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c
new file mode 100644
index 0000000000..c4148fe810
--- /dev/null
+++ b/src/gallium/drivers/trace/tr_winsys.c
@@ -0,0 +1,450 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_memory.h"
+#include "util/u_hash_table.h"
+
+#include "tr_dump.h"
+#include "tr_state.h"
+#include "tr_screen.h"
+#include "tr_texture.h"
+#include "tr_winsys.h"
+
+
+static unsigned trace_buffer_hash(void *buffer)
+{
+ return (unsigned)(uintptr_t)buffer;
+}
+
+
+static int trace_buffer_compare(void *buffer1, void *buffer2)
+{
+ return (char *)buffer2 - (char *)buffer1;
+}
+
+
+static const char *
+trace_winsys_get_name(struct pipe_winsys *_winsys)
+{
+ struct trace_winsys *tr_ws = trace_winsys(_winsys);
+ struct pipe_winsys *winsys = tr_ws->winsys;
+ const char *result;
+
+ trace_dump_call_begin("pipe_winsys", "get_name");
+
+ trace_dump_arg(ptr, winsys);
+
+ result = winsys->get_name(winsys);
+
+ trace_dump_ret(string, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static void
+trace_winsys_flush_frontbuffer(struct pipe_winsys *_winsys,
+ struct pipe_surface *surface,
+ void *context_private)
+{
+ struct trace_winsys *tr_ws = trace_winsys(_winsys);
+ struct pipe_winsys *winsys = tr_ws->winsys;
+
+ assert(surface);
+ if(surface->texture) {
+ struct trace_screen *tr_scr = trace_screen(surface->texture->screen);
+ struct trace_texture *tr_tex = trace_texture(tr_scr, surface->texture);
+ struct trace_surface *tr_surf = trace_surface(tr_tex, surface);
+ surface = tr_surf->surface;
+ }
+
+ trace_dump_call_begin("pipe_winsys", "flush_frontbuffer");
+
+ trace_dump_arg(ptr, winsys);
+ trace_dump_arg(ptr, surface);
+ /* XXX: hide, as there is nothing we can do with this
+ trace_dump_arg(ptr, context_private);
+ */
+
+ winsys->flush_frontbuffer(winsys, surface, context_private);
+
+ trace_dump_call_end();
+}
+
+
+static struct pipe_buffer *
+trace_winsys_surface_buffer_create(struct pipe_winsys *_winsys,
+ unsigned width, unsigned height,
+ enum pipe_format format,
+ unsigned usage,
+ unsigned *pstride)
+{
+ struct trace_winsys *tr_ws = trace_winsys(_winsys);
+ struct pipe_winsys *winsys = tr_ws->winsys;
+ unsigned stride;
+ struct pipe_buffer *result;
+
+ trace_dump_call_begin("pipe_winsys", "surface_buffer_create");
+
+ trace_dump_arg(ptr, winsys);
+ trace_dump_arg(uint, width);
+ trace_dump_arg(uint, height);
+ trace_dump_arg(format, format);
+ trace_dump_arg(uint, usage);
+
+ result = winsys->surface_buffer_create(winsys,
+ width, height,
+ format,
+ usage,
+ pstride);
+
+ stride = *pstride;
+
+ trace_dump_arg(uint, stride);
+
+ trace_dump_ret(ptr, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static struct pipe_buffer *
+trace_winsys_buffer_create(struct pipe_winsys *_winsys,
+ unsigned alignment,
+ unsigned usage,
+ unsigned size)
+{
+ struct trace_winsys *tr_ws = trace_winsys(_winsys);
+ struct pipe_winsys *winsys = tr_ws->winsys;
+ struct pipe_buffer *buffer;
+
+ trace_dump_call_begin("pipe_winsys", "buffer_create");
+
+ trace_dump_arg(ptr, winsys);
+ trace_dump_arg(uint, alignment);
+ trace_dump_arg(uint, usage);
+ trace_dump_arg(uint, size);
+
+ buffer = winsys->buffer_create(winsys, alignment, usage, size);
+
+ trace_dump_ret(ptr, buffer);
+
+ trace_dump_call_end();
+
+ /* Zero the buffer to avoid dumping uninitialized memory */
+ if(buffer->usage & PIPE_BUFFER_USAGE_CPU_WRITE) {
+ void *map;
+ map = winsys->buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
+ if(map) {
+ memset(map, 0, buffer->size);
+ winsys->buffer_unmap(winsys, buffer);
+ }
+ }
+
+ return buffer;
+}
+
+
+static struct pipe_buffer *
+trace_winsys_user_buffer_create(struct pipe_winsys *_winsys,
+ void *data,
+ unsigned size)
+{
+ struct trace_winsys *tr_ws = trace_winsys(_winsys);
+ struct pipe_winsys *winsys = tr_ws->winsys;
+ struct pipe_buffer *result;
+
+ trace_dump_call_begin("pipe_winsys", "user_buffer_create");
+
+ trace_dump_arg(ptr, winsys);
+ trace_dump_arg_begin("data");
+ trace_dump_bytes(data, size);
+ trace_dump_arg_end();
+ trace_dump_arg(uint, size);
+
+ result = winsys->user_buffer_create(winsys, data, size);
+
+ trace_dump_ret(ptr, result);
+
+ trace_dump_call_end();
+
+ /* XXX: Mark the user buffers. (we should wrap pipe_buffers, but is is
+ * impossible to do so while texture-less surfaces are still around */
+ if(result) {
+ assert(!(result->usage & TRACE_BUFFER_USAGE_USER));
+ result->usage |= TRACE_BUFFER_USAGE_USER;
+ }
+
+ return result;
+}
+
+
+void
+trace_winsys_user_buffer_update(struct pipe_winsys *_winsys,
+ struct pipe_buffer *buffer)
+{
+ struct trace_winsys *tr_ws = trace_winsys(_winsys);
+ struct pipe_winsys *winsys = tr_ws->winsys;
+ const void *map;
+
+ if(buffer && buffer->usage & TRACE_BUFFER_USAGE_USER) {
+ map = winsys->buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_READ);
+ if(map) {
+ trace_dump_call_begin("pipe_winsys", "buffer_write");
+
+ trace_dump_arg(ptr, winsys);
+
+ trace_dump_arg(ptr, buffer);
+
+ trace_dump_arg_begin("data");
+ trace_dump_bytes(map, buffer->size);
+ trace_dump_arg_end();
+
+ trace_dump_arg_begin("size");
+ trace_dump_uint(buffer->size);
+ trace_dump_arg_end();
+
+ trace_dump_call_end();
+
+ winsys->buffer_unmap(winsys, buffer);
+ }
+ }
+}
+
+
+static void *
+trace_winsys_buffer_map(struct pipe_winsys *_winsys,
+ struct pipe_buffer *buffer,
+ unsigned usage)
+{
+ struct trace_winsys *tr_ws = trace_winsys(_winsys);
+ struct pipe_winsys *winsys = tr_ws->winsys;
+ void *map;
+
+ map = winsys->buffer_map(winsys, buffer, usage);
+ if(map) {
+ if(usage & PIPE_BUFFER_USAGE_CPU_WRITE) {
+ assert(!hash_table_get(tr_ws->buffer_maps, buffer));
+ hash_table_set(tr_ws->buffer_maps, buffer, map);
+ }
+ }
+
+ return map;
+}
+
+
+static void
+trace_winsys_buffer_unmap(struct pipe_winsys *_winsys,
+ struct pipe_buffer *buffer)
+{
+ struct trace_winsys *tr_ws = trace_winsys(_winsys);
+ struct pipe_winsys *winsys = tr_ws->winsys;
+ const void *map;
+
+ map = hash_table_get(tr_ws->buffer_maps, buffer);
+ if(map) {
+ trace_dump_call_begin("pipe_winsys", "buffer_write");
+
+ trace_dump_arg(ptr, winsys);
+
+ trace_dump_arg(ptr, buffer);
+
+ trace_dump_arg_begin("data");
+ trace_dump_bytes(map, buffer->size);
+ trace_dump_arg_end();
+
+ trace_dump_arg_begin("size");
+ trace_dump_uint(buffer->size);
+ trace_dump_arg_end();
+
+ trace_dump_call_end();
+
+ hash_table_remove(tr_ws->buffer_maps, buffer);
+ }
+
+ winsys->buffer_unmap(winsys, buffer);
+}
+
+
+static void
+trace_winsys_buffer_destroy(struct pipe_winsys *_winsys,
+ struct pipe_buffer *buffer)
+{
+ struct trace_winsys *tr_ws = trace_winsys(_winsys);
+ struct pipe_winsys *winsys = tr_ws->winsys;
+
+ trace_dump_call_begin("pipe_winsys", "buffer_destroy");
+
+ trace_dump_arg(ptr, winsys);
+ trace_dump_arg(ptr, buffer);
+
+ winsys->buffer_destroy(winsys, buffer);
+
+ trace_dump_call_end();
+}
+
+
+static void
+trace_winsys_fence_reference(struct pipe_winsys *_winsys,
+ struct pipe_fence_handle **pdst,
+ struct pipe_fence_handle *src)
+{
+ struct trace_winsys *tr_ws = trace_winsys(_winsys);
+ struct pipe_winsys *winsys = tr_ws->winsys;
+ struct pipe_fence_handle *dst = *pdst;
+
+ trace_dump_call_begin("pipe_winsys", "fence_reference");
+
+ trace_dump_arg(ptr, winsys);
+ trace_dump_arg(ptr, dst);
+ trace_dump_arg(ptr, src);
+
+ winsys->fence_reference(winsys, pdst, src);
+
+ trace_dump_call_end();
+}
+
+
+static int
+trace_winsys_fence_signalled(struct pipe_winsys *_winsys,
+ struct pipe_fence_handle *fence,
+ unsigned flag)
+{
+ struct trace_winsys *tr_ws = trace_winsys(_winsys);
+ struct pipe_winsys *winsys = tr_ws->winsys;
+ int result;
+
+ trace_dump_call_begin("pipe_winsys", "fence_signalled");
+
+ trace_dump_arg(ptr, winsys);
+ trace_dump_arg(ptr, fence);
+ trace_dump_arg(uint, flag);
+
+ result = winsys->fence_signalled(winsys, fence, flag);
+
+ trace_dump_ret(int, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static int
+trace_winsys_fence_finish(struct pipe_winsys *_winsys,
+ struct pipe_fence_handle *fence,
+ unsigned flag)
+{
+ struct trace_winsys *tr_ws = trace_winsys(_winsys);
+ struct pipe_winsys *winsys = tr_ws->winsys;
+ int result;
+
+ trace_dump_call_begin("pipe_winsys", "fence_finish");
+
+ trace_dump_arg(ptr, winsys);
+ trace_dump_arg(ptr, fence);
+ trace_dump_arg(uint, flag);
+
+ result = winsys->fence_finish(winsys, fence, flag);
+
+ trace_dump_ret(int, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static void
+trace_winsys_destroy(struct pipe_winsys *_winsys)
+{
+ struct trace_winsys *tr_ws = trace_winsys(_winsys);
+ struct pipe_winsys *winsys = tr_ws->winsys;
+
+ trace_dump_call_begin("pipe_winsys", "destroy");
+
+ trace_dump_arg(ptr, winsys);
+
+ /*
+ winsys->destroy(winsys);
+ */
+
+ trace_dump_call_end();
+
+ hash_table_destroy(tr_ws->buffer_maps);
+
+ FREE(tr_ws);
+}
+
+
+struct pipe_winsys *
+trace_winsys_create(struct pipe_winsys *winsys)
+{
+ struct trace_winsys *tr_ws;
+
+ if(!winsys)
+ goto error1;
+
+ tr_ws = CALLOC_STRUCT(trace_winsys);
+ if(!tr_ws)
+ goto error1;
+
+ tr_ws->base.destroy = trace_winsys_destroy;
+ tr_ws->base.get_name = trace_winsys_get_name;
+ tr_ws->base.flush_frontbuffer = trace_winsys_flush_frontbuffer;
+ tr_ws->base.surface_buffer_create = trace_winsys_surface_buffer_create;
+ tr_ws->base.buffer_create = trace_winsys_buffer_create;
+ tr_ws->base.user_buffer_create = trace_winsys_user_buffer_create;
+ tr_ws->base.buffer_map = trace_winsys_buffer_map;
+ tr_ws->base.buffer_unmap = trace_winsys_buffer_unmap;
+ tr_ws->base.buffer_destroy = trace_winsys_buffer_destroy;
+ tr_ws->base.fence_reference = trace_winsys_fence_reference;
+ tr_ws->base.fence_signalled = trace_winsys_fence_signalled;
+ tr_ws->base.fence_finish = trace_winsys_fence_finish;
+
+ tr_ws->winsys = winsys;
+
+ tr_ws->buffer_maps = hash_table_create(trace_buffer_hash,
+ trace_buffer_compare);
+ if(!tr_ws->buffer_maps)
+ goto error2;
+
+ trace_dump_call_begin("", "pipe_winsys_create");
+ trace_dump_ret(ptr, winsys);
+ trace_dump_call_end();
+
+ return &tr_ws->base;
+
+error2:
+ FREE(tr_ws);
+error1:
+ return winsys;
+}
diff --git a/src/gallium/drivers/trace/tr_winsys.h b/src/gallium/drivers/trace/tr_winsys.h
new file mode 100644
index 0000000000..062ddf66a0
--- /dev/null
+++ b/src/gallium/drivers/trace/tr_winsys.h
@@ -0,0 +1,76 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef TR_WINSYS_H_
+#define TR_WINSYS_H_
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_debug.h"
+#include "pipe/p_winsys.h"
+
+
+/**
+ * It often happens that new data is written directly to the user buffers
+ * without mapping/unmapping. This flag marks user buffers, so that their
+ * contents can be dumpped before being used by the pipe context.
+ */
+#define TRACE_BUFFER_USAGE_USER (1 << 31)
+
+
+struct hash_table;
+
+
+struct trace_winsys
+{
+ struct pipe_winsys base;
+
+ struct pipe_winsys *winsys;
+
+ struct hash_table *buffer_maps;
+};
+
+
+static INLINE struct trace_winsys *
+trace_winsys(struct pipe_winsys *winsys)
+{
+ assert(winsys);
+ return (struct trace_winsys *)winsys;
+}
+
+
+
+struct pipe_winsys *
+trace_winsys_create(struct pipe_winsys *winsys);
+
+
+void
+trace_winsys_user_buffer_update(struct pipe_winsys *winsys,
+ struct pipe_buffer *buffer);
+
+
+#endif /* TR_WINSYS_H_ */
diff --git a/src/gallium/drivers/trace/trace.xsl b/src/gallium/drivers/trace/trace.xsl
new file mode 100644
index 0000000000..9cd621e7ab
--- /dev/null
+++ b/src/gallium/drivers/trace/trace.xsl
@@ -0,0 +1,185 @@
+<?xml version="1.0"?>
+
+<!--
+
+Copyright 2008 Tungsten Graphics, Inc.
+
+This program is free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published
+by the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+!-->
+
+<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+ <xsl:output method="html" />
+
+ <xsl:strip-space elements="*" />
+
+ <xsl:template match="/trace">
+ <html>
+ <head>
+ <title>Gallium Trace</title>
+ </head>
+ <style>
+ body {
+ font-family: verdana, sans-serif;
+ font-size: 11px;
+ font-weight: normal;
+ text-align : left;
+ }
+
+ .fun {
+ font-weight: bold;
+ }
+
+ .var {
+ font-style: italic;
+ }
+
+ .typ {
+ display: none;
+ }
+
+ .lit {
+ color: #0000ff;
+ }
+
+ .ptr {
+ color: #008000;
+ }
+ </style>
+ <body>
+ <ol class="calls">
+ <xsl:apply-templates/>
+ </ol>
+ </body>
+ </html>
+ </xsl:template>
+
+ <xsl:template match="call">
+ <li>
+ <span class="fun">
+ <xsl:value-of select="@class"/>
+ <xsl:text>::</xsl:text>
+ <xsl:value-of select="@method"/>
+ </span>
+ <xsl:text>(</xsl:text>
+ <xsl:apply-templates select="arg"/>
+ <xsl:text>)</xsl:text>
+ <xsl:apply-templates select="ret"/>
+ </li>
+ </xsl:template>
+
+ <xsl:template match="arg|member">
+ <xsl:apply-templates select="@name"/>
+ <xsl:text> = </xsl:text>
+ <xsl:apply-templates />
+ <xsl:if test="position() != last()">
+ <xsl:text>, </xsl:text>
+ </xsl:if>
+ </xsl:template>
+
+ <xsl:template match="ret">
+ <xsl:text> = </xsl:text>
+ <xsl:apply-templates />
+ </xsl:template>
+
+ <xsl:template match="bool|int|uint|float|enum">
+ <span class="lit">
+ <xsl:value-of select="text()"/>
+ </span>
+ </xsl:template>
+
+ <xsl:template match="bytes">
+ <span class="lit">
+ <xsl:text>...</xsl:text>
+ </span>
+ </xsl:template>
+
+ <xsl:template match="string">
+ <span class="lit">
+ <xsl:text>"</xsl:text>
+ <xsl:call-template name="break">
+ <xsl:with-param name="text" select="text()"/>
+ </xsl:call-template>
+ <xsl:text>"</xsl:text>
+ </span>
+ </xsl:template>
+
+ <xsl:template match="array|struct">
+ <xsl:text>{</xsl:text>
+ <xsl:apply-templates />
+ <xsl:text>}</xsl:text>
+ </xsl:template>
+
+ <xsl:template match="elem">
+ <xsl:apply-templates />
+ <xsl:if test="position() != last()">
+ <xsl:text>, </xsl:text>
+ </xsl:if>
+ </xsl:template>
+
+ <xsl:template match="null">
+ <span class="ptr">
+ <xsl:text>NULL</xsl:text>
+ </span>
+ </xsl:template>
+
+ <xsl:template match="ptr">
+ <span class="ptr">
+ <xsl:value-of select="text()"/>
+ </span>
+ </xsl:template>
+
+ <xsl:template match="@name">
+ <span class="var">
+ <xsl:value-of select="."/>
+ </span>
+ </xsl:template>
+
+ <xsl:template name="break">
+ <xsl:param name="text" select="."/>
+ <xsl:choose>
+ <xsl:when test="contains($text, '&#xa;')">
+ <xsl:value-of select="substring-before($text, '&#xa;')"/>
+ <br/>
+ <xsl:call-template name="break">
+ <xsl:with-param name="text" select="substring-after($text, '&#xa;')"/>
+ </xsl:call-template>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="$text"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+
+ <xsl:template name="replace">
+ <xsl:param name="text"/>
+ <xsl:param name="from"/>
+ <xsl:param name="to"/>
+ <xsl:choose>
+ <xsl:when test="contains($text,$from)">
+ <xsl:value-of select="concat(substring-before($text,$from),$to)"/>
+ <xsl:call-template name="replace">
+ <xsl:with-param name="text" select="substring-after($text,$from)"/>
+ <xsl:with-param name="from" select="$from"/>
+ <xsl:with-param name="to" select="$to"/>
+ </xsl:call-template>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="$text"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+
+</xsl:transform>