summaryrefslogtreecommitdiff
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/cell/common.h3
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.h8
-rw-r--r--src/gallium/drivers/cell/ppu/cell_draw_arrays.c20
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_derived.c8
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c2
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.c9
-rw-r--r--src/gallium/drivers/cell/spu/spu_exec.c8
-rw-r--r--src/gallium/drivers/cell/spu/spu_exec.h4
-rw-r--r--src/gallium/drivers/cell/spu/spu_funcs.c2
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h22
-rw-r--r--src/gallium/drivers/cell/spu/spu_render.c2
-rw-r--r--src/gallium/drivers/cell/spu/spu_vertex_fetch.c5
-rw-r--r--src/gallium/drivers/cell/spu/spu_vertex_shader.c15
-rw-r--r--src/gallium/drivers/failover/fo_context.c42
-rw-r--r--src/gallium/drivers/failover/fo_winsys.h3
-rw-r--r--src/gallium/drivers/i915/i915_buffer.c1
-rw-r--r--src/gallium/drivers/i915/i915_context.c20
-rw-r--r--src/gallium/drivers/i915/i915_state.c9
-rw-r--r--src/gallium/drivers/i915/i915_state_derived.c10
-rw-r--r--src/gallium/drivers/i965/brw_clip.c12
-rw-r--r--src/gallium/drivers/i965/brw_context.h9
-rw-r--r--src/gallium/drivers/i965/brw_disasm.h2
-rw-r--r--src/gallium/drivers/i965/brw_draw.c20
-rw-r--r--src/gallium/drivers/i965/brw_eu_emit.c2
-rw-r--r--src/gallium/drivers/i965/brw_pipe_clear.c8
-rw-r--r--src/gallium/drivers/i965/brw_pipe_fb.c11
-rw-r--r--src/gallium/drivers/i965/brw_pipe_sampler.c2
-rw-r--r--src/gallium/drivers/i965/brw_pipe_shader.c7
-rw-r--r--src/gallium/drivers/i965/brw_vs_emit.c39
-rw-r--r--src/gallium/drivers/i965/brw_wm_emit.c2
-rw-r--r--src/gallium/drivers/identity/id_context.c42
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile1
-rw-r--r--src/gallium/drivers/llvmpipe/README39
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript29
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_flow.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format_aos.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_misc.cpp14
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_sample.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c14
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c29
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_draw_arrays.c26
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.c33
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.h18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_prim_vbuf.c1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_quad.h9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state.h7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_blend.c18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_derived.c41
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c23
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_rasterizer.c9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_surface.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_vs.c12
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_blend.c22
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_conv.c8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_format.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_cache.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample.h53
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample_c.c1713
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_winsys.h2
-rw-r--r--src/gallium/drivers/nouveau/nouveau_push.h93
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.c24
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.h3
-rw-r--r--src/gallium/drivers/nouveau/nouveau_stateobj.h291
-rw-r--r--src/gallium/drivers/nouveau/nouveau_winsys.h3
-rw-r--r--src/gallium/drivers/nv04/nv04_context.c44
-rw-r--r--src/gallium/drivers/nv04/nv04_context.h8
-rw-r--r--src/gallium/drivers/nv04/nv04_fragtex.c16
-rw-r--r--src/gallium/drivers/nv04/nv04_prim_vbuf.c84
-rw-r--r--src/gallium/drivers/nv04/nv04_screen.c6
-rw-r--r--src/gallium/drivers/nv04/nv04_state.c60
-rw-r--r--src/gallium/drivers/nv04/nv04_state_emit.c72
-rw-r--r--src/gallium/drivers/nv04/nv04_surface_2d.c66
-rw-r--r--src/gallium/drivers/nv04/nv04_surface_2d.h4
-rw-r--r--src/gallium/drivers/nv04/nv04_transfer.c24
-rw-r--r--src/gallium/drivers/nv04/nv04_vbo.c12
-rw-r--r--src/gallium/drivers/nv10/nv10_context.c367
-rw-r--r--src/gallium/drivers/nv10/nv10_context.h8
-rw-r--r--src/gallium/drivers/nv10/nv10_fragtex.c28
-rw-r--r--src/gallium/drivers/nv10/nv10_prim_vbuf.c34
-rw-r--r--src/gallium/drivers/nv10/nv10_screen.c4
-rw-r--r--src/gallium/drivers/nv10/nv10_state_emit.c166
-rw-r--r--src/gallium/drivers/nv10/nv10_transfer.c24
-rw-r--r--src/gallium/drivers/nv10/nv10_vbo.c11
-rw-r--r--src/gallium/drivers/nv20/nv20_context.c530
-rw-r--r--src/gallium/drivers/nv20/nv20_context.h8
-rw-r--r--src/gallium/drivers/nv20/nv20_fragtex.c28
-rw-r--r--src/gallium/drivers/nv20/nv20_miptree.c22
-rw-r--r--src/gallium/drivers/nv20/nv20_prim_vbuf.c56
-rw-r--r--src/gallium/drivers/nv20/nv20_screen.c4
-rw-r--r--src/gallium/drivers/nv20/nv20_state_emit.c197
-rw-r--r--src/gallium/drivers/nv20/nv20_transfer.c26
-rw-r--r--src/gallium/drivers/nv20/nv20_vbo.c9
-rw-r--r--src/gallium/drivers/nv30/nv30_context.c21
-rw-r--r--src/gallium/drivers/nv30/nv30_context.h8
-rw-r--r--src/gallium/drivers/nv30/nv30_fragprog.c40
-rw-r--r--src/gallium/drivers/nv30/nv30_fragtex.c4
-rw-r--r--src/gallium/drivers/nv30/nv30_miptree.c22
-rw-r--r--src/gallium/drivers/nv30/nv30_query.c20
-rw-r--r--src/gallium/drivers/nv30/nv30_screen.c12
-rw-r--r--src/gallium/drivers/nv30/nv30_state.c6
-rw-r--r--src/gallium/drivers/nv30/nv30_state_blend.c2
-rw-r--r--src/gallium/drivers/nv30/nv30_state_fb.c2
-rw-r--r--src/gallium/drivers/nv30/nv30_state_scissor.c2
-rw-r--r--src/gallium/drivers/nv30/nv30_state_stipple.c4
-rw-r--r--src/gallium/drivers/nv30/nv30_state_viewport.c2
-rw-r--r--src/gallium/drivers/nv30/nv30_transfer.c26
-rw-r--r--src/gallium/drivers/nv30/nv30_vbo.c128
-rw-r--r--src/gallium/drivers/nv30/nv30_vertprog.c20
-rw-r--r--src/gallium/drivers/nv40/nv40_context.c21
-rw-r--r--src/gallium/drivers/nv40/nv40_context.h10
-rw-r--r--src/gallium/drivers/nv40/nv40_draw.c73
-rw-r--r--src/gallium/drivers/nv40/nv40_fragprog.c48
-rw-r--r--src/gallium/drivers/nv40/nv40_fragtex.c4
-rw-r--r--src/gallium/drivers/nv40/nv40_miptree.c22
-rw-r--r--src/gallium/drivers/nv40/nv40_query.c20
-rw-r--r--src/gallium/drivers/nv40/nv40_screen.c10
-rw-r--r--src/gallium/drivers/nv40/nv40_state.c6
-rw-r--r--src/gallium/drivers/nv40/nv40_state_blend.c2
-rw-r--r--src/gallium/drivers/nv40/nv40_state_emit.c11
-rw-r--r--src/gallium/drivers/nv40/nv40_state_fb.c2
-rw-r--r--src/gallium/drivers/nv40/nv40_state_scissor.c2
-rw-r--r--src/gallium/drivers/nv40/nv40_state_stipple.c4
-rw-r--r--src/gallium/drivers/nv40/nv40_state_viewport.c2
-rw-r--r--src/gallium/drivers/nv40/nv40_transfer.c26
-rw-r--r--src/gallium/drivers/nv40/nv40_vbo.c135
-rw-r--r--src/gallium/drivers/nv40/nv40_vertprog.c20
-rw-r--r--src/gallium/drivers/nv50/nv50_context.c33
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h4
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c4
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c1111
-rw-r--r--src/gallium/drivers/nv50/nv50_program.h1
-rw-r--r--src/gallium/drivers/nv50/nv50_query.c6
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c94
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.h2
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c23
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c36
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c6
-rw-r--r--src/gallium/drivers/nv50/nv50_tex.c10
-rw-r--r--src/gallium/drivers/nv50/nv50_transfer.c12
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c101
-rw-r--r--src/gallium/drivers/r300/SConscript7
-rw-r--r--src/gallium/drivers/r300/r300_blit.c6
-rw-r--r--src/gallium/drivers/r300/r300_chipset.c10
-rw-r--r--src/gallium/drivers/r300/r300_chipset.h9
-rw-r--r--src/gallium/drivers/r300/r300_context.c48
-rw-r--r--src/gallium/drivers/r300/r300_context.h73
-rw-r--r--src/gallium/drivers/r300/r300_cs.h2
-rw-r--r--src/gallium/drivers/r300/r300_emit.c282
-rw-r--r--src/gallium/drivers/r300/r300_emit.h22
-rw-r--r--src/gallium/drivers/r300/r300_flush.c10
-rw-r--r--src/gallium/drivers/r300/r300_fs.c21
-rw-r--r--src/gallium/drivers/r300/r300_reg.h34
-rw-r--r--src/gallium/drivers/r300/r300_render.c204
-rw-r--r--src/gallium/drivers/r300/r300_render.h60
-rw-r--r--src/gallium/drivers/r300/r300_screen.c9
-rw-r--r--src/gallium/drivers/r300/r300_shader_semantics.h2
-rw-r--r--src/gallium/drivers/r300/r300_state.c344
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c61
-rw-r--r--src/gallium/drivers/r300/r300_state_inlines.h19
-rw-r--r--src/gallium/drivers/r300/r300_state_invariant.c12
-rw-r--r--src/gallium/drivers/r300/r300_texture.c62
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c2
-rw-r--r--src/gallium/drivers/r300/r300_vs.c126
-rw-r--r--src/gallium/drivers/r300/r300_vs.h6
-rw-r--r--src/gallium/drivers/softpipe/sp_clear.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_context.c22
-rw-r--r--src/gallium/drivers/softpipe/sp_context.h6
-rw-r--r--src/gallium/drivers/softpipe/sp_draw_arrays.c215
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_vbuf.c1
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_blend.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_query.c26
-rw-r--r--src/gallium/drivers/softpipe/sp_query.h4
-rw-r--r--src/gallium/drivers/softpipe/sp_setup.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_state.h42
-rw-r--r--src/gallium/drivers/softpipe/sp_state_blend.c5
-rw-r--r--src/gallium/drivers/softpipe/sp_state_derived.c10
-rw-r--r--src/gallium/drivers/softpipe/sp_state_fs.c75
-rw-r--r--src/gallium/drivers/softpipe/sp_state_rasterizer.c9
-rw-r--r--src/gallium/drivers/softpipe/sp_state_surface.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c195
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.h7
-rw-r--r--src/gallium/drivers/svga/svga_context.c44
-rw-r--r--src/gallium/drivers/svga/svga_context.h9
-rw-r--r--src/gallium/drivers/svga/svga_draw.c3
-rw-r--r--src/gallium/drivers/svga/svga_pipe_draw.c28
-rw-r--r--src/gallium/drivers/svga/svga_pipe_fs.c10
-rw-r--r--src/gallium/drivers/svga/svga_pipe_sampler.c3
-rw-r--r--src/gallium/drivers/svga/svga_pipe_vs.c10
-rw-r--r--src/gallium/drivers/svga/svga_state_fs.c50
-rw-r--r--src/gallium/drivers/svga/svga_state_vs.c28
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_draw.c2
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_state.c4
-rw-r--r--src/gallium/drivers/svga/svga_tgsi.c2
-rw-r--r--src/gallium/drivers/svga/svga_tgsi.h29
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_insn.c2
-rw-r--r--src/gallium/drivers/svga/svgadump/svga_dump.c606
-rw-r--r--src/gallium/drivers/svga/svgadump/svga_dump.h3
-rwxr-xr-xsrc/gallium/drivers/svga/svgadump/svga_dump.py94
-rw-r--r--src/gallium/drivers/trace/README5
-rw-r--r--src/gallium/drivers/trace/tr_context.c41
-rw-r--r--src/gallium/drivers/trace/tr_dump.c4
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.c2
-rw-r--r--src/gallium/drivers/trace/tr_rbug.c6
-rw-r--r--src/gallium/drivers/trace/tr_screen.c2
-rw-r--r--src/gallium/drivers/trace/tr_state.h2
210 files changed, 5200 insertions, 4862 deletions
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h
index d5f5c7bbba..aa29dcb394 100644
--- a/src/gallium/drivers/cell/common.h
+++ b/src/gallium/drivers/cell/common.h
@@ -358,6 +358,7 @@ struct cell_spu_function_info
/** This is the object passed to spe_create_thread() */
+PIPE_ALIGN_TYPE(16,
struct cell_init_info
{
unsigned id;
@@ -370,7 +371,7 @@ struct cell_init_info
uint *buffer_status; /**< points at cell_context->buffer_status */
struct cell_spu_function_info *spu_functions;
-} ALIGN16_ATTRIB;
+});
#endif /* CELL_COMMON_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
index 1498fb665a..e402ed2922 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.h
+++ b/src/gallium/drivers/cell/ppu/cell_context.h
@@ -89,7 +89,7 @@ struct cell_buffer_node;
*/
struct cell_buffer_list
{
- struct cell_fence fence ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) struct cell_fence fence;
struct cell_buffer_node *head;
};
@@ -150,18 +150,18 @@ struct cell_context
/** Mapped constant buffers */
void *mapped_constants[PIPE_SHADER_TYPES];
- struct cell_spu_function_info spu_functions ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions;
uint num_cells, num_spus;
/** Buffers for command batches, vertex/index data */
uint buffer_size[CELL_NUM_BUFFERS];
- ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE];
int cur_batch; /**< which buffer is being filled w/ commands */
/** [4] to ensure 16-byte alignment for each status word */
- uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4];
/** Associated with each command/batch buffer is a list of pipe_buffers
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
index 2016b21a40..0a4da8ecc8 100644
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
@@ -59,7 +59,7 @@ cell_map_constant_buffers(struct cell_context *sp)
}
}
- draw_set_mapped_constant_buffer(sp->draw,
+ draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX,
sp->mapped_constants[PIPE_SHADER_VERTEX],
sp->constants[PIPE_SHADER_VERTEX]->size);
}
@@ -85,7 +85,7 @@ cell_unmap_constant_buffers(struct cell_context *sp)
*
* XXX should the element buffer be specified/bound with a separate function?
*/
-static boolean
+static void
cell_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -145,29 +145,27 @@ cell_draw_range_elements(struct pipe_context *pipe,
/* Note: leave drawing surfaces mapped */
cell_unmap_constant_buffers(sp);
-
- return TRUE;
}
-static boolean
+static void
cell_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
{
- return cell_draw_range_elements( pipe, indexBuffer,
- indexSize,
- 0, 0xffffffff,
- mode, start, count );
+ cell_draw_range_elements( pipe, indexBuffer,
+ indexSize,
+ 0, 0xffffffff,
+ mode, start, count );
}
-static boolean
+static void
cell_draw_arrays(struct pipe_context *pipe, unsigned mode,
unsigned start, unsigned count)
{
- return cell_draw_elements(pipe, NULL, 0, mode, start, count);
+ cell_draw_elements(pipe, NULL, 0, mode, start, count);
}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c
index efc4f78364..b723e794e7 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_derived.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c
@@ -66,7 +66,7 @@ calculate_vertex_layout( struct cell_context *cell )
vinfo->num_attribs = 0;
/* we always want to emit vertex pos */
- src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0);
+ src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_POSITION, 0);
assert(src >= 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
@@ -82,14 +82,14 @@ calculate_vertex_layout( struct cell_context *cell )
break;
case TGSI_SEMANTIC_COLOR:
- src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR,
+ src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_COLOR,
fs->info.input_semantic_index[i]);
assert(src >= 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
break;
case TGSI_SEMANTIC_FOG:
- src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0);
+ src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_FOG, 0);
#if 1
if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */
src = 0;
@@ -100,7 +100,7 @@ calculate_vertex_layout( struct cell_context *cell )
case TGSI_SEMANTIC_GENERIC:
/* this includes texcoords and varying vars */
- src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC,
+ src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_GENERIC,
fs->info.input_semantic_index[i]);
assert(src >= 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index ab55a24bb6..f1e1dcb9eb 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -331,7 +331,7 @@ cell_emit_state(struct cell_context *cell)
const struct draw_context *const draw = cell->draw;
struct cell_shader_info info;
- info.num_outputs = draw_num_vs_outputs(draw);
+ info.num_outputs = draw_num_shader_outputs(draw);
info.declarations = (uintptr_t) draw->vs.machine.Declarations;
info.num_declarations = draw->vs.machine.NumDeclarations;
info.instructions = (uintptr_t) draw->vs.machine.Instructions;
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c
index 5c0179d954..55bd85bde2 100644
--- a/src/gallium/drivers/cell/spu/spu_command.c
+++ b/src/gallium/drivers/cell/spu/spu_command.c
@@ -53,8 +53,7 @@ struct spu_vs_context draw;
/**
* Buffers containing dynamically generated SPU code:
*/
-static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
- ALIGN16_ATTRIB;
+PIPE_ALIGN_VAR(16) static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS];
@@ -405,8 +404,6 @@ cmd_state_sampler(const struct cell_command_sampler *sampler)
case PIPE_TEX_FILTER_LINEAR:
spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear;
break;
- case PIPE_TEX_FILTER_ANISO:
- /* fall-through, for now */
case PIPE_TEX_FILTER_NEAREST:
spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest;
break;
@@ -418,8 +415,6 @@ cmd_state_sampler(const struct cell_command_sampler *sampler)
case PIPE_TEX_FILTER_LINEAR:
spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear;
break;
- case PIPE_TEX_FILTER_ANISO:
- /* fall-through, for now */
case PIPE_TEX_FILTER_NEAREST:
spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest;
break;
@@ -547,7 +542,7 @@ cmd_batch(uint opcode)
{
const uint buf = (opcode >> 8) & 0xff;
uint size = (opcode >> 16);
- qword buffer[CELL_BUFFER_SIZE / 16] ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) qword buffer[CELL_BUFFER_SIZE / 16];
const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]);
uint pos;
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index 5ed330aa6e..d2166a4901 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -1681,7 +1681,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_SHR:
+ case TGSI_OPCODE_ISHR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
@@ -1839,10 +1839,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach )
/* execute declarations (interpolants) */
if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
for (i = 0; i < mach->NumDeclarations; i++) {
+ PIPE_ALIGN_VAR(16)
union {
struct tgsi_full_declaration decl;
qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16];
- } d ALIGN16_ATTRIB;
+ } d;
unsigned ea = (unsigned) (mach->Declarations + pc);
spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl));
@@ -1853,10 +1854,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach )
/* execute instructions, until pc is set to -1 */
while (pc != -1) {
+ PIPE_ALIGN_VAR(16)
union {
struct tgsi_full_instruction inst;
qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16];
- } i ALIGN16_ATTRIB;
+ } i;
unsigned ea = (unsigned) (mach->Instructions + pc);
spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst));
diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h
index 8605679940..0ca92af248 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.h
+++ b/src/gallium/drivers/cell/spu/spu_exec.h
@@ -98,9 +98,9 @@ struct spu_exec_machine
* 4 internal temporaries
* 1 address
*/
+ PIPE_ALIGN_VAR(16)
struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS
- + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]
- ALIGN16_ATTRIB;
+ + TGSI_EXEC_NUM_TEMP_EXTRAS + 1];
struct spu_exec_vector *Addrs;
diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c
index ff3d609d25..98919c43ff 100644
--- a/src/gallium/drivers/cell/spu/spu_funcs.c
+++ b/src/gallium/drivers/cell/spu/spu_funcs.c
@@ -144,7 +144,7 @@ export_func(struct cell_spu_function_info *spu_functions,
void
return_function_info(void)
{
- struct cell_spu_function_info funcs ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) struct cell_spu_function_info funcs;
int tag = TAG_MISC;
ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index 33767e7c51..b18f4c22ef 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -93,6 +93,7 @@ typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs,
vector float *constants);
+PIPE_ALIGN_TYPE(16,
struct spu_framebuffer
{
void *color_start; /**< addr of color surface in main memory */
@@ -107,10 +108,11 @@ struct spu_framebuffer
uint zsize; /**< 0, 2 or 4 bytes per Z */
float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */
-} ALIGN16_ATTRIB;
+});
/** per-texture level info */
+PIPE_ALIGN_TYPE(16,
struct spu_texture_level
{
void *start;
@@ -123,20 +125,22 @@ struct spu_texture_level
vector signed int mask_s, mask_t, mask_r;
/** texcoord clamp limits */
vector signed int max_s, max_t, max_r;
-} ALIGN16_ATTRIB;
+});
+PIPE_ALIGN_TYPE(16,
struct spu_texture
{
struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS];
uint max_level;
uint target; /**< PIPE_TEXTURE_x */
-} ALIGN16_ATTRIB;
+});
/**
* All SPU global/context state will be in a singleton object of this type:
*/
+PIPE_ALIGN_TYPE(16,
struct spu_global
{
/** One-time init/constant info */
@@ -155,8 +159,8 @@ struct spu_global
struct vertex_info vertex_info;
/** Current color and Z tiles */
- tile_t ctile ALIGN16_ATTRIB;
- tile_t ztile ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) tile_t ctile;
+ PIPE_ALIGN_VAR(16) tile_t ztile;
/** Read depth/stencil tiles? */
boolean read_depth_stencil;
@@ -165,8 +169,8 @@ struct spu_global
ubyte cur_ctile_status, cur_ztile_status;
/** Status of all tiles in framebuffer */
- ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
- ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE];
+ PIPE_ALIGN_VAR(16) ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE];
/** Current fragment ops machine code, at 8-byte boundary */
uint *fragment_ops_code;
@@ -175,7 +179,7 @@ struct spu_global
spu_fragment_ops_func fragment_ops[2];
/** Current fragment program machine code, at 8-byte boundary */
- uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB;
+ PIPE_ALIGN_VAR(8) uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
/** Current fragment ops function */
spu_fragment_program_func fragment_program;
@@ -187,7 +191,7 @@ struct spu_global
/** Fragment program constants */
vector float constants[4 * CELL_MAX_CONSTANTS];
-} ALIGN16_ATTRIB;
+});
extern struct spu_global spu;
diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c
index 5ffb7073ab..14987e3c3a 100644
--- a/src/gallium/drivers/cell/spu/spu_render.c
+++ b/src/gallium/drivers/cell/spu/spu_render.c
@@ -169,7 +169,7 @@ void
cmd_render(const struct cell_command_render *render, uint *pos_incr)
{
/* we'll DMA into these buffers */
- ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) ubyte vertex_data[CELL_BUFFER_SIZE];
const uint vertex_size = render->vertex_size; /* in bytes */
/*const*/ uint total_vertex_bytes = render->num_verts * vertex_size;
uint index_bytes;
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
index 03375d84a5..087963960d 100644
--- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
+++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
@@ -43,7 +43,8 @@ typedef void (*spu_fetch_func)(qword *out, const qword *in,
const qword *shuffle_data);
-static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = {
+PIPE_ALIGN_VAR(16) static const qword
+fetch_shuffle_data[5] = {
/* Shuffle used by CVT_64_FLOAT
*/
{
@@ -110,7 +111,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw,
unsigned idx;
const unsigned bytes_per_entry = draw->vertex_fetch.size[attr];
const unsigned quads_per_entry = (bytes_per_entry + 15) / 16;
- qword in[2 * 4] ALIGN16_ATTRIB;
+ PIPE_ALIGN_VAR(16) qword in[2 * 4];
/* Fetch four attributes for four vertices.
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c
index fbe5b34d39..3e9804bf8e 100644
--- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c
+++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c
@@ -107,8 +107,8 @@ run_vertex_program(struct spu_vs_context *draw,
struct spu_exec_machine *machine = &draw->machine;
unsigned int j;
- ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS);
- ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS);
+ PIPE_ALIGN_VAR(16) struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS];
+ PIPE_ALIGN_VAR(16) struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS];
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
@@ -119,8 +119,8 @@ run_vertex_program(struct spu_vs_context *draw,
ASSERT_ALIGN16(draw->constants);
machine->Consts = (float (*)[4]) draw->constants;
- machine->Inputs = ALIGN16_ASSIGN(inputs);
- machine->Outputs = ALIGN16_ASSIGN(outputs);
+ machine->Inputs = inputs;
+ machine->Outputs = outputs;
spu_vertex_fetch( draw, machine, elts, count );
@@ -132,8 +132,9 @@ run_vertex_program(struct spu_vs_context *draw,
for (j = 0; j < count; j++) {
unsigned slot;
float x, y, z, w;
+ PIPE_ALIGN_VAR(16)
unsigned char buffer[sizeof(struct vertex_header)
- + MAX_VERTEX_SIZE] ALIGN16_ATTRIB;
+ + MAX_VERTEX_SIZE];
struct vertex_header *const tmpOut =
(struct vertex_header *) buffer;
const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header)
@@ -186,8 +187,8 @@ run_vertex_program(struct spu_vs_context *draw,
}
-unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]
- ALIGN16_ATTRIB;
+PIPE_ALIGN_VAR(16) unsigned char
+immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]);
void
diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c
index 37184eac7b..46e4338d98 100644
--- a/src/gallium/drivers/failover/fo_context.c
+++ b/src/gallium/drivers/failover/fo_context.c
@@ -44,11 +44,19 @@ static void failover_destroy( struct pipe_context *pipe )
}
+void failover_fail_over( struct failover_context *failover )
+{
+ failover->dirty = TRUE;
+ failover->mode = FO_SW;
+}
+
-static boolean failover_draw_elements( struct pipe_context *pipe,
- struct pipe_buffer *indexBuffer,
- unsigned indexSize,
- unsigned prim, unsigned start, unsigned count)
+static void failover_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim,
+ unsigned start,
+ unsigned count)
{
struct failover_context *failover = failover_context( pipe );
@@ -62,24 +70,22 @@ static boolean failover_draw_elements( struct pipe_context *pipe,
/* Try hardware:
*/
if (failover->mode == FO_HW) {
- if (!failover->hw->draw_elements( failover->hw,
- indexBuffer,
- indexSize,
- prim,
- start,
- count )) {
-
- failover->hw->flush( failover->hw, ~0, NULL );
- failover->mode = FO_SW;
- }
+ failover->hw->draw_elements( failover->hw,
+ indexBuffer,
+ indexSize,
+ prim,
+ start,
+ count );
}
/* Possibly try software:
*/
if (failover->mode == FO_SW) {
- if (failover->dirty)
+ if (failover->dirty) {
+ failover->hw->flush( failover->hw, ~0, NULL );
failover_state_emit( failover );
+ }
failover->sw->draw_elements( failover->sw,
indexBuffer,
@@ -94,15 +100,13 @@ static boolean failover_draw_elements( struct pipe_context *pipe,
*/
failover->sw->flush( failover->sw, ~0, NULL );
}
-
- return TRUE;
}
-static boolean failover_draw_arrays( struct pipe_context *pipe,
+static void failover_draw_arrays( struct pipe_context *pipe,
unsigned prim, unsigned start, unsigned count)
{
- return failover_draw_elements(pipe, NULL, 0, prim, start, count);
+ failover_draw_elements(pipe, NULL, 0, prim, start, count);
}
static unsigned int
diff --git a/src/gallium/drivers/failover/fo_winsys.h b/src/gallium/drivers/failover/fo_winsys.h
index a8ce997a1f..533122b69d 100644
--- a/src/gallium/drivers/failover/fo_winsys.h
+++ b/src/gallium/drivers/failover/fo_winsys.h
@@ -36,10 +36,13 @@
struct pipe_context;
+struct failover_context;
struct pipe_context *failover_create( struct pipe_context *hw,
struct pipe_context *sw );
+void failover_fail_over( struct failover_context *failover );
+
#endif /* FO_WINSYS_H */
diff --git a/src/gallium/drivers/i915/i915_buffer.c b/src/gallium/drivers/i915/i915_buffer.c
index effeba1297..669964770d 100644
--- a/src/gallium/drivers/i915/i915_buffer.c
+++ b/src/gallium/drivers/i915/i915_buffer.c
@@ -111,6 +111,7 @@ i915_buffer_unmap(struct pipe_screen *screen,
{
struct i915_buffer *buf = i915_buffer(buffer);
assert(!buf->ibuf);
+ (void) buf;
}
static void
diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c
index 94c8aee30f..89feeade75 100644
--- a/src/gallium/drivers/i915/i915_context.c
+++ b/src/gallium/drivers/i915/i915_context.c
@@ -45,7 +45,7 @@
*/
-static boolean
+static void
i915_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -84,7 +84,7 @@ i915_draw_range_elements(struct pipe_context *pipe,
}
- draw_set_mapped_constant_buffer(draw,
+ draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX,
i915->current.constants[PIPE_SHADER_VERTEX],
(i915->current.num_user_constants[PIPE_SHADER_VERTEX] *
4 * sizeof(float)));
@@ -106,27 +106,25 @@ i915_draw_range_elements(struct pipe_context *pipe,
pipe_buffer_unmap(pipe->screen, indexBuffer);
draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL);
}
-
- return TRUE;
}
-static boolean
+static void
i915_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count)
{
- return i915_draw_range_elements(pipe, indexBuffer,
- indexSize,
- 0, 0xffffffff,
- prim, start, count);
+ i915_draw_range_elements(pipe, indexBuffer,
+ indexSize,
+ 0, 0xffffffff,
+ prim, start, count);
}
-static boolean
+static void
i915_draw_arrays(struct pipe_context *pipe,
unsigned prim, unsigned start, unsigned count)
{
- return i915_draw_elements(pipe, NULL, 0, prim, start, count);
+ i915_draw_elements(pipe, NULL, 0, prim, start, count);
}
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index f7ebfadd59..0fab6e1bc3 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -58,10 +58,10 @@ translate_wrap_mode(unsigned wrap)
return TEXCOORDMODE_CLAMP_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
return TEXCOORDMODE_CLAMP_BORDER;
-/*
+ /*
case PIPE_TEX_WRAP_MIRRORED_REPEAT:
return TEXCOORDMODE_MIRROR;
-*/
+ */
default:
return TEXCOORDMODE_WRAP;
}
@@ -74,8 +74,6 @@ static unsigned translate_img_filter( unsigned filter )
return FILTER_NEAREST;
case PIPE_TEX_FILTER_LINEAR:
return FILTER_LINEAR;
- case PIPE_TEX_FILTER_ANISO:
- return FILTER_ANISOTROPIC;
default:
assert(0);
return FILTER_NEAREST;
@@ -221,6 +219,9 @@ i915_create_sampler_state(struct pipe_context *pipe,
minFilt = translate_img_filter( sampler->min_img_filter );
magFilt = translate_img_filter( sampler->mag_img_filter );
+ if (sampler->max_anisotropy > 1.0)
+ minFilt = magFilt = FILTER_ANISOTROPIC;
+
if (sampler->max_anisotropy > 2.0) {
cso->state[0] |= SS2_MAX_ANISO_4;
}
diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c
index 178d4e8781..03dd5091a6 100644
--- a/src/gallium/drivers/i915/i915_state_derived.c
+++ b/src/gallium/drivers/i915/i915_state_derived.c
@@ -84,7 +84,7 @@ static void calculate_vertex_layout( struct i915_context *i915 )
/* pos */
- src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0);
+ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_POSITION, 0);
if (needW) {
draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src);
vinfo.hwfmt[0] |= S4_VFMT_XYZW;
@@ -101,21 +101,21 @@ static void calculate_vertex_layout( struct i915_context *i915 )
/* primary color */
if (colors[0]) {
- src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0);
+ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 0);
draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src);
vinfo.hwfmt[0] |= S4_VFMT_COLOR;
}
/* secondary color */
if (colors[1]) {
- src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1);
+ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 1);
draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src);
vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG;
}
/* fog coord, not fog blend factor */
if (fog) {
- src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0);
+ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FOG, 0);
draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM;
}
@@ -125,7 +125,7 @@ static void calculate_vertex_layout( struct i915_context *i915 )
uint hwtc;
if (texCoords[i]) {
hwtc = TEXCOORDFMT_4D;
- src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i);
+ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, i);
draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
}
else {
diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
index 58d9e56df2..d67a1a6263 100644
--- a/src/gallium/drivers/i965/brw_clip.c
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -83,19 +83,19 @@ compile_clip_prog( struct brw_context *brw,
c.offset_hpos = delta + c.key.output_hpos * ATTR_SIZE;
- if (c.key.output_color0)
+ if (c.key.output_color0 != BRW_OUTPUT_NOT_PRESENT)
c.offset_color0 = delta + c.key.output_color0 * ATTR_SIZE;
- if (c.key.output_color1)
+ if (c.key.output_color1 != BRW_OUTPUT_NOT_PRESENT)
c.offset_color1 = delta + c.key.output_color1 * ATTR_SIZE;
- if (c.key.output_bfc0)
+ if (c.key.output_bfc0 != BRW_OUTPUT_NOT_PRESENT)
c.offset_bfc0 = delta + c.key.output_bfc0 * ATTR_SIZE;
- if (c.key.output_bfc1)
+ if (c.key.output_bfc1 != BRW_OUTPUT_NOT_PRESENT)
c.offset_bfc1 = delta + c.key.output_bfc1 * ATTR_SIZE;
- if (c.key.output_edgeflag)
+ if (c.key.output_edgeflag != BRW_OUTPUT_NOT_PRESENT)
c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE;
if (BRW_IS_IGDNG(brw))
@@ -182,7 +182,6 @@ upload_clip_prog(struct brw_context *brw)
*/
/* CACHE_NEW_VS_PROG */
key.nr_attrs = brw->vs.prog_data->nr_outputs;
- key.output_edgeflag = brw->vs.prog_data->output_edgeflag;
/* PIPE_NEW_VS */
key.output_hpos = vs->output_hpos;
@@ -190,6 +189,7 @@ upload_clip_prog(struct brw_context *brw)
key.output_color1 = vs->output_color1;
key.output_bfc0 = vs->output_bfc0;
key.output_bfc1 = vs->output_bfc1;
+ key.output_edgeflag = vs->output_edgeflag;
/* PIPE_NEW_CLIP */
key.nr_userclip = brw->curr.ucp.nr;
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 56e7807400..8c006bb95b 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -120,6 +120,13 @@
#define BRW_MAX_CURBE (32*16)
+
+/* Need a value to say a particular vertex shader output isn't
+ * present. Limits us to 63 outputs currently.
+ */
+#define BRW_OUTPUT_NOT_PRESENT ((1<<6)-1)
+
+
struct brw_context;
struct brw_depth_stencil_state {
@@ -335,8 +342,6 @@ struct brw_vs_prog_data {
GLuint nr_params; /**< number of TGSI_FILE_CONSTANT's */
- GLuint output_edgeflag;
-
GLboolean writes_psiz;
/* Used for calculating urb partitions:
diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h
index 77d402d35e..ba5b109c48 100644
--- a/src/gallium/drivers/i965/brw_disasm.h
+++ b/src/gallium/drivers/i965/brw_disasm.h
@@ -23,6 +23,8 @@
#ifndef BRW_DISASM_H
#define BRW_DISASM_H
+#include <stdio.h>
+
struct brw_instruction;
int brw_disasm_insn (FILE *file, const struct brw_instruction *inst);
diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
index 852fd22982..ea8d39adaf 100644
--- a/src/gallium/drivers/i965/brw_draw.c
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -176,7 +176,7 @@ try_draw_range_elements(struct brw_context *brw,
}
-static boolean
+static void
brw_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *index_buffer,
unsigned index_size,
@@ -228,29 +228,27 @@ brw_draw_range_elements(struct pipe_context *pipe,
ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count );
assert(ret == 0);
}
-
- return TRUE;
}
-static boolean
+static void
brw_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *index_buffer,
unsigned index_size,
unsigned mode,
unsigned start, unsigned count)
{
- return brw_draw_range_elements( pipe, index_buffer,
- index_size,
- 0, 0xffffffff,
- mode,
- start, count );
+ brw_draw_range_elements( pipe, index_buffer,
+ index_size,
+ 0, 0xffffffff,
+ mode,
+ start, count );
}
-static boolean
+static void
brw_draw_arrays(struct pipe_context *pipe, unsigned mode,
unsigned start, unsigned count)
{
- return brw_draw_elements(pipe, NULL, 0, mode, start, count);
+ brw_draw_elements(pipe, NULL, 0, mode, start, count);
}
diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c
index 4fe7b6acc1..00d8eaccbc 100644
--- a/src/gallium/drivers/i965/brw_eu_emit.c
+++ b/src/gallium/drivers/i965/brw_eu_emit.c
@@ -860,7 +860,7 @@ void brw_land_fwd_jump(struct brw_compile *p,
jmpi = 2;
assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
- assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
+ assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
}
diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c
index 211be88178..452e1e89f9 100644
--- a/src/gallium/drivers/i965/brw_pipe_clear.c
+++ b/src/gallium/drivers/i965/brw_pipe_clear.c
@@ -114,18 +114,18 @@ static void color_clear(struct brw_context *brw,
const float *rgba )
{
enum pipe_error ret;
- unsigned value;
+ union util_color value;
util_pack_color( rgba, bsurface->base.format, &value );
if (bsurface->cpp == 2)
- value |= value << 16;
+ value.ui |= value.ui << 16;
- ret = try_clear( brw, bsurface, value );
+ ret = try_clear( brw, bsurface, value.ui );
if (ret != 0) {
brw_context_flush( brw );
- ret = try_clear( brw, bsurface, value );
+ ret = try_clear( brw, bsurface, value.ui );
assert( ret == 0 );
}
}
diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c
index 6b03094f50..5d4e5025f9 100644
--- a/src/gallium/drivers/i965/brw_pipe_fb.c
+++ b/src/gallium/drivers/i965/brw_pipe_fb.c
@@ -3,6 +3,7 @@
#include "pipe/p_state.h"
#include "brw_context.h"
+#include "brw_debug.h"
/**
* called from intelDrawBuffer()
@@ -51,8 +52,14 @@ static void brw_set_viewport_state( struct pipe_context *pipe,
struct brw_context *brw = brw_context(pipe);
brw->curr.viewport = *viewport;
- brw->curr.ccv.min_depth = 0.0; /* XXX: near */
- brw->curr.ccv.max_depth = 1.0; /* XXX: far */
+ brw->curr.ccv.min_depth = viewport->scale[2] * -1.0 + viewport->translate[2];
+ brw->curr.ccv.max_depth = viewport->scale[2] * 1.0 + viewport->translate[2];
+
+ if (0)
+ debug_printf("%s depth range %f .. %f\n",
+ __FUNCTION__,
+ brw->curr.ccv.min_depth,
+ brw->curr.ccv.max_depth);
brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT;
}
diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c
index 5ddc63f57e..81712798a5 100644
--- a/src/gallium/drivers/i965/brw_pipe_sampler.c
+++ b/src/gallium/drivers/i965/brw_pipe_sampler.c
@@ -48,8 +48,6 @@ static GLuint translate_img_filter( unsigned filter )
return BRW_MAPFILTER_NEAREST;
case PIPE_TEX_FILTER_LINEAR:
return BRW_MAPFILTER_LINEAR;
- case PIPE_TEX_FILTER_ANISO:
- return BRW_MAPFILTER_ANISOTROPIC;
default:
assert(0);
return BRW_MAPFILTER_NEAREST;
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index 6c376e5e5d..e389587f3e 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -197,6 +197,13 @@ static void *brw_create_vs_state( struct pipe_context *pipe,
vs->id = brw->program_id++;
vs->has_flow_control = has_flow_control(&vs->info);
+ vs->output_hpos = BRW_OUTPUT_NOT_PRESENT;
+ vs->output_color0 = BRW_OUTPUT_NOT_PRESENT;
+ vs->output_color1 = BRW_OUTPUT_NOT_PRESENT;
+ vs->output_bfc0 = BRW_OUTPUT_NOT_PRESENT;
+ vs->output_bfc1 = BRW_OUTPUT_NOT_PRESENT;
+ vs->output_edgeflag = BRW_OUTPUT_NOT_PRESENT;
+
for (i = 0; i < vs->info.num_outputs; i++) {
int index = vs->info.output_semantic_index[i];
switch (vs->info.output_semantic_name[i]) {
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 714def5046..8a16205d2f 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -79,18 +79,12 @@ static void release_tmps( struct brw_vs_compile *c )
static boolean is_position_output( struct brw_vs_compile *c,
unsigned vs_output )
{
- struct brw_vertex_shader *vs = c->vp;
-
- if (vs_output == c->prog_data.output_edgeflag) {
- return FALSE;
- }
- else {
- unsigned semantic = vs->info.output_semantic_name[vs_output];
- unsigned index = vs->info.output_semantic_index[vs_output];
+ const struct brw_vertex_shader *vs = c->vp;
+ unsigned semantic = vs->info.output_semantic_name[vs_output];
+ unsigned index = vs->info.output_semantic_index[vs_output];
- return (semantic == TGSI_SEMANTIC_POSITION &&
- index == 0);
- }
+ return (semantic == TGSI_SEMANTIC_POSITION &&
+ index == 0);
}
@@ -98,23 +92,16 @@ static boolean find_output_slot( struct brw_vs_compile *c,
unsigned vs_output,
unsigned *fs_input_slot )
{
- struct brw_vertex_shader *vs = c->vp;
+ const struct brw_vertex_shader *vs = c->vp;
+ unsigned semantic = vs->info.output_semantic_name[vs_output];
+ unsigned index = vs->info.output_semantic_index[vs_output];
+ unsigned i;
- if (vs_output == c->prog_data.output_edgeflag) {
- *fs_input_slot = c->key.fs_signature.nr_inputs;
- return TRUE;
- }
- else {
- unsigned semantic = vs->info.output_semantic_name[vs_output];
- unsigned index = vs->info.output_semantic_index[vs_output];
- unsigned i;
-
- for (i = 0; i < c->key.fs_signature.nr_inputs; i++) {
- if (c->key.fs_signature.input[i].semantic == semantic &&
+ for (i = 0; i < c->key.fs_signature.nr_inputs; i++) {
+ if (c->key.fs_signature.input[i].semantic == semantic &&
c->key.fs_signature.input[i].semantic_index == index) {
- *fs_input_slot = i;
- return TRUE;
- }
+ *fs_input_slot = i;
+ return TRUE;
}
}
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
index 7e57d0306b..8f983a60ae 100644
--- a/src/gallium/drivers/i965/brw_wm_emit.c
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -691,7 +691,7 @@ static void emit_xpd( struct brw_compile *p,
{
GLuint i;
- assert(!(mask & BRW_WRITEMASK_W) == BRW_WRITEMASK_X);
+ assert((mask & BRW_WRITEMASK_W) != BRW_WRITEMASK_W);
for (i = 0 ; i < 3; i++) {
if (mask & (1<<i)) {
diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c
index b975182e7b..f9063d90fb 100644
--- a/src/gallium/drivers/identity/id_context.c
+++ b/src/gallium/drivers/identity/id_context.c
@@ -45,7 +45,7 @@ identity_destroy(struct pipe_context *_pipe)
free(id_pipe);
}
-static boolean
+static void
identity_draw_arrays(struct pipe_context *_pipe,
unsigned prim,
unsigned start,
@@ -54,13 +54,13 @@ identity_draw_arrays(struct pipe_context *_pipe,
struct identity_context *id_pipe = identity_context(_pipe);
struct pipe_context *pipe = id_pipe->pipe;
- return pipe->draw_arrays(pipe,
- prim,
- start,
- count);
+ pipe->draw_arrays(pipe,
+ prim,
+ start,
+ count);
}
-static boolean
+static void
identity_draw_elements(struct pipe_context *_pipe,
struct pipe_buffer *_indexBuffer,
unsigned indexSize,
@@ -73,15 +73,15 @@ identity_draw_elements(struct pipe_context *_pipe,
struct pipe_context *pipe = id_pipe->pipe;
struct pipe_buffer *indexBuffer = id_buffer->buffer;
- return pipe->draw_elements(pipe,
- indexBuffer,
- indexSize,
- prim,
- start,
- count);
+ pipe->draw_elements(pipe,
+ indexBuffer,
+ indexSize,
+ prim,
+ start,
+ count);
}
-static boolean
+static void
identity_draw_range_elements(struct pipe_context *_pipe,
struct pipe_buffer *_indexBuffer,
unsigned indexSize,
@@ -96,14 +96,14 @@ identity_draw_range_elements(struct pipe_context *_pipe,
struct pipe_context *pipe = id_pipe->pipe;
struct pipe_buffer *indexBuffer = id_buffer->buffer;
- return pipe->draw_range_elements(pipe,
- indexBuffer,
- indexSize,
- minIndex,
- maxIndex,
- mode,
- start,
- count);
+ pipe->draw_range_elements(pipe,
+ indexBuffer,
+ indexSize,
+ minIndex,
+ maxIndex,
+ mode,
+ start,
+ count);
}
static struct pipe_query *
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index e038a5229e..7c6e46006b 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -50,7 +50,6 @@ C_SOURCES = \
lp_state_vs.c \
lp_surface.c \
lp_tex_cache.c \
- lp_tex_sample_c.c \
lp_tex_sample_llvm.c \
lp_texture.c \
lp_tile_cache.c \
diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README
index 0c3f00fd58..72d9f39658 100644
--- a/src/gallium/drivers/llvmpipe/README
+++ b/src/gallium/drivers/llvmpipe/README
@@ -59,27 +59,16 @@ Requirements
See /proc/cpuinfo to know what your CPU supports.
- - LLVM 2.5 or greater. LLVM 2.6 is preferred.
+ - LLVM 2.6.
- On Debian based distributions do:
+ For Linux, on a recent Debian based distribution do:
aptitude install llvm-dev
- There is a typo in one of the llvm 2.5 headers, that may cause compilation
- errors. To fix it apply the change:
-
- --- /usr/include/llvm-c/Core.h.orig 2009-08-10 15:38:54.000000000 +0100
- +++ /usr/include/llvm-c/Core.h 2009-08-10 15:38:25.000000000 +0100
- @@ -831,7 +831,7 @@
- template<typename T>
- inline T **unwrap(LLVMValueRef *Vals, unsigned Length) {
- #if DEBUG
- - for (LLVMValueRef *I = Vals, E = Vals + Length; I != E; ++I)
- + for (LLVMValueRef *I = Vals, *E = Vals + Length; I != E; ++I)
- cast<T>(*I);
- #endif
- return reinterpret_cast<T**>(Vals);
-
+ For Windows download pre-built MSVC 9.0 or MinGW binaries from
+ http://people.freedesktop.org/~jrfonseca/llvm/ and set the LLVM environment
+ variable to the extracted path.
+
- scons (optional)
- udis86, http://udis86.sourceforge.net/ (optional):
@@ -95,9 +84,9 @@ Requirements
Building
========
-To build everything invoke scons as:
+To build everything on Linux invoke scons as:
- scons debug=yes statetrackers=mesa drivers=llvmpipe winsys=xlib dri=false -k
+ scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=xlib dri=false
Alternatively, you can build it with GNU make, if you prefer, by invoking it as
@@ -105,12 +94,15 @@ Alternatively, you can build it with GNU make, if you prefer, by invoking it as
but the rest of these instructions assume that scons is used.
+For windows is everything the except except the winsys:
+
+ scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=gdi dri=false
Using
=====
-Building will create a drop-in alternative for libGL.so. To use it set the
-environment variables:
+On Linux, building will create a drop-in alternative for libGL.so. To use it
+set the environment variables:
export LD_LIBRARY_PATH=$PWD/build/linux-x86_64-debug/lib:$LD_LIBRARY_PATH
@@ -121,6 +113,11 @@ or
For performance evaluation pass debug=no to scons, and use the corresponding
lib directory without the "-debug" suffix.
+On Windows, building will create a drop-in alternative for opengl32.dll. To use
+it put it in the same directory as the application. It can also be used by
+replacing the native ICD driver, but it's quite an advanced usage, so if you
+need to ask, don't even try it.
+
Unit testing
============
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 3bd2e70013..6bb545a501 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -9,6 +9,8 @@ if not env.has_key('LLVM_VERSION'):
env.Tool('udis86')
+env.Append(CPPPATH = ['.'])
+
env.CodeGenerate(
target = 'lp_tile_soa.c',
script = 'lp_tile_soa.py',
@@ -64,7 +66,6 @@ llvmpipe = env.ConvenienceLibrary(
'lp_state_vs.c',
'lp_surface.c',
'lp_tex_cache.c',
- 'lp_tex_sample_c.c',
'lp_tex_sample_llvm.c',
'lp_texture.c',
'lp_tile_cache.c',
@@ -74,21 +75,19 @@ llvmpipe = env.ConvenienceLibrary(
env = env.Clone()
-env.Prepend(LIBS = [llvmpipe] + auxiliaries)
+env.Prepend(LIBS = [llvmpipe] + gallium)
-env.Program(
- target = 'lp_test_format',
- source = ['lp_test_format.c', 'lp_test_main.c'],
-)
+tests = [
+ 'format',
+ 'blend',
+ 'conv',
+]
-env.Program(
- target = 'lp_test_blend',
- source = ['lp_test_blend.c', 'lp_test_main.c'],
-)
-
-env.Program(
- target = 'lp_test_conv',
- source = ['lp_test_conv.c', 'lp_test_main.c'],
-)
+for test in tests:
+ target = env.Program(
+ target = 'lp_test_' + test,
+ source = ['lp_test_' + test + '.c', 'lp_test_main.c'],
+ )
+ env.InstallProgram(target)
Export('llvmpipe')
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index d14f468ba9..ced7b9c11d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -142,7 +142,7 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
enum lp_build_blend_swizzle {
LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
- LP_BUILD_BLEND_SWIZZLE_AAAA = 1,
+ LP_BUILD_BLEND_SWIZZLE_AAAA = 1
};
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
index dcc25fbff8..25c10af29f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
@@ -47,7 +47,7 @@
*/
enum lp_build_flow_construct_kind {
lP_BUILD_FLOW_SCOPE,
- LP_BUILD_FLOW_SKIP,
+ LP_BUILD_FLOW_SKIP
};
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
index 5836e0173f..10e82f120b 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
@@ -130,7 +130,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
- // UIToFP can't be expressed in SSE2
+ /* UIToFP can't be expressed in SSE2 */
casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
if (normalized)
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp
index d3f78c06d9..6e79438ead 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp
+++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp
@@ -59,3 +59,17 @@ LLVMInitializeNativeTarget(void)
#endif
+
+
+/*
+ * Hack to allow the linking of release LLVM static libraries on a debug build.
+ *
+ * See also:
+ * - http://social.msdn.microsoft.com/Forums/en-US/vclanguage/thread/7234ea2b-0042-42ed-b4e2-5d8644dfb57d
+ */
+#if defined(_MSC_VER) && defined(_DEBUG)
+#include <crtdefs.h>
+extern "C" {
+ _CRTIMP void __cdecl _invalid_parameter_noinfo(void) {}
+}
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
index af70ddc6ab..9003e108c1 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
@@ -69,8 +69,8 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
state->min_img_filter = sampler->min_img_filter;
state->min_mip_filter = sampler->min_mip_filter;
state->mag_img_filter = sampler->mag_img_filter;
- if(sampler->compare_mode) {
- state->compare_mode = sampler->compare_mode;
+ state->compare_mode = sampler->compare_mode;
+ if(sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
state->compare_func = sampler->compare_func;
}
state->normalized_coords = sampler->normalized_coords;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index 47b68b71e2..5ee8d556a6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -488,7 +488,7 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
LLVMValueRef res;
unsigned chan;
- if(!bld->static_state->compare_mode)
+ if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
return;
/* TODO: Compare before swizzling, to avoid redundant computations */
@@ -577,7 +577,6 @@ lp_build_sample_soa(LLVMBuilderRef builder,
lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel);
break;
case PIPE_TEX_FILTER_LINEAR:
- case PIPE_TEX_FILTER_ANISO:
if(lp_format_is_rgba8(bld.format_desc))
lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel);
else
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index a67c70ff25..fb1eda4423 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -321,7 +321,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
{
const uint unit = inst->Src[1].Register.Index;
LLVMValueRef lodbias;
- LLVMValueRef oow;
+ LLVMValueRef oow = NULL;
LLVMValueRef coords[3];
unsigned num_coords;
unsigned i;
@@ -361,6 +361,9 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
if (projected)
coords[i] = lp_build_mul(&bld->base, coords[i], oow);
}
+ for (i = num_coords; i < 3; i++) {
+ coords[i] = bld->base.undef;
+ }
bld->sampler->emit_fetch_texel(bld->sampler,
bld->base.builder,
@@ -446,7 +449,12 @@ emit_instruction(
{
unsigned chan_index;
LLVMValueRef src0, src1, src2;
- LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ LLVMValueRef tmp0, tmp1, tmp2;
+ LLVMValueRef tmp3 = NULL;
+ LLVMValueRef tmp4 = NULL;
+ LLVMValueRef tmp5 = NULL;
+ LLVMValueRef tmp6 = NULL;
+ LLVMValueRef tmp7 = NULL;
LLVMValueRef res;
LLVMValueRef dst0[NUM_CHANNELS];
@@ -1310,7 +1318,7 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_SHR:
+ case TGSI_OPCODE_ISHR:
/* deprecated? */
assert(0);
return 0;
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index f381156d00..aaa675aec7 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -140,6 +140,7 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe,
struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
unsigned i;
+ /* check if any of the bound drawing surfaces are this texture */
if(llvmpipe->dirty_render_cache) {
for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) {
if(llvmpipe->framebuffer.cbufs[i] &&
@@ -150,6 +151,13 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe,
llvmpipe->framebuffer.zsbuf->texture == texture)
return PIPE_REFERENCED_FOR_WRITE;
}
+
+ /* check if any of the tex_cache textures are this texture */
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ if (llvmpipe->tex_cache[i] &&
+ llvmpipe->tex_cache[i]->texture == texture)
+ return PIPE_REFERENCED_FOR_READ;
+ }
for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
if (llvmpipe->vertex_tex_cache[i] &&
llvmpipe->vertex_tex_cache[i]->texture == texture)
@@ -248,22 +256,6 @@ llvmpipe_create( struct pipe_screen *screen )
llvmpipe->vertex_tex_cache[i] = lp_create_tex_tile_cache(screen);
- /* vertex shader samplers */
- for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
- llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples;
- llvmpipe->tgsi.vert_samplers[i].processor = TGSI_PROCESSOR_VERTEX;
- llvmpipe->tgsi.vert_samplers[i].cache = llvmpipe->vertex_tex_cache[i];
- llvmpipe->tgsi.vert_samplers_list[i] = &llvmpipe->tgsi.vert_samplers[i];
- }
-
- /* fragment shader samplers */
- for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
- llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples;
- llvmpipe->tgsi.frag_samplers[i].processor = TGSI_PROCESSOR_FRAGMENT;
- llvmpipe->tgsi.frag_samplers[i].cache = llvmpipe->tex_cache[i];
- llvmpipe->tgsi.frag_samplers_list[i] = &llvmpipe->tgsi.frag_samplers[i];
- }
-
/*
* Create drawing context and plug our rendering stage into it.
*/
@@ -271,10 +263,7 @@ llvmpipe_create( struct pipe_screen *screen )
if (!llvmpipe->draw)
goto fail;
- draw_texture_samplers(llvmpipe->draw,
- PIPE_MAX_VERTEX_SAMPLERS,
- (struct tgsi_sampler **)
- llvmpipe->tgsi.vert_samplers_list);
+ /* FIXME: devise alternative to draw_texture_samplers */
if (debug_get_bool_option( "LP_NO_RAST", FALSE ))
llvmpipe->no_rast = TRUE;
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index ca50585896..426d6eb4a1 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -115,14 +115,6 @@ struct llvmpipe_context {
unsigned line_stipple_counter;
- /** TGSI exec things */
- struct {
- struct lp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS];
- struct lp_shader_sampler *vert_samplers_list[PIPE_MAX_SAMPLERS];
- struct lp_shader_sampler frag_samplers[PIPE_MAX_SAMPLERS];
- struct lp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS];
- } tgsi;
-
/** The primitive drawing context */
struct draw_context *draw;
diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index 2299566c66..c152b4413f 100644
--- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -45,11 +45,11 @@
-boolean
+void
llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
unsigned start, unsigned count)
{
- return llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count);
+ llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count);
}
@@ -58,7 +58,7 @@ llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
* Basically, map the vertex buffers (and drawing surfaces), then hand off
* the drawing to the 'draw' module.
*/
-boolean
+void
llvmpipe_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -103,7 +103,7 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
draw_arrays(draw, mode, start, count);
/*
- * unmap vertex/index buffers - will cause draw module to flush
+ * unmap vertex/index buffers
*/
for (i = 0; i < lp->num_vertex_buffers; i++) {
draw_set_mapped_vertex_buffer(draw, i, NULL);
@@ -112,24 +112,28 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
draw_set_mapped_element_buffer(draw, 0, NULL);
}
+ /*
+ * TODO: Flush only when a user vertex/index buffer is present
+ * (or even better, modify draw module to do this
+ * internally when this condition is seen?)
+ */
+ draw_flush(draw);
/* Note: leave drawing surfaces mapped */
lp->dirty_render_cache = TRUE;
-
- return TRUE;
}
-boolean
+void
llvmpipe_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
{
- return llvmpipe_draw_range_elements( pipe, indexBuffer,
- indexSize,
- 0, 0xffffffff,
- mode, start, count );
+ llvmpipe_draw_range_elements( pipe, indexBuffer,
+ indexSize,
+ 0, 0xffffffff,
+ mode, start, count );
}
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index bce3baec16..4ef0783f3e 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -79,25 +79,22 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
/* struct lp_jit_context */
{
- LLVMTypeRef elem_types[5];
+ LLVMTypeRef elem_types[4];
LLVMTypeRef context_type;
elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */
- elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */
- elem_types[2] = LLVMFloatType(); /* alpha_ref_value */
- elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */
- elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
+ elem_types[1] = LLVMFloatType(); /* alpha_ref_value */
+ elem_types[2] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */
+ elem_types[3] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants,
screen->target, context_type, 0);
- LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, samplers,
- screen->target, context_type, 1);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value,
- screen->target, context_type, 2);
+ screen->target, context_type, 1);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color,
- screen->target, context_type, 3);
+ screen->target, context_type, 2);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures,
screen->target, context_type,
LP_JIT_CONTEXT_TEXTURES_INDEX);
@@ -109,24 +106,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
screen->context_ptr_type = LLVMPointerType(context_type, 0);
}
- /* fetch_texel
- */
- {
- LLVMTypeRef ret_type;
- LLVMTypeRef arg_types[3];
- LLVMValueRef fetch_texel;
-
- ret_type = LLVMVoidType();
- arg_types[0] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */
- arg_types[1] = LLVMInt32Type(); /* unit */
- arg_types[2] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); /* store */
-
- fetch_texel = lp_declare_intrinsic(screen->module, "fetch_texel",
- ret_type, arg_types, Elements(arg_types));
-
- LLVMAddGlobalMapping(screen->engine, fetch_texel, lp_fetch_texel_soa);
- }
-
#ifdef DEBUG
LLVMDumpModule(screen->module);
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index 58f716ede2..277b690c02 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -41,7 +41,6 @@
#include "pipe/p_state.h"
-struct tgsi_sampler;
struct llvmpipe_screen;
@@ -78,8 +77,6 @@ struct lp_jit_context
{
const float *constants;
- struct tgsi_sampler **samplers;
-
float alpha_ref_value;
/* FIXME: store (also?) in floats */
@@ -92,16 +89,13 @@ struct lp_jit_context
#define lp_jit_context_constants(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, 0, "constants")
-#define lp_jit_context_samplers(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, 1, "samplers")
-
#define lp_jit_context_alpha_ref_value(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, 2, "alpha_ref_value")
+ lp_build_struct_get(_builder, _ptr, 1, "alpha_ref_value")
#define lp_jit_context_blend_color(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, 3, "blend_color")
+ lp_build_struct_get(_builder, _ptr, 2, "blend_color")
-#define LP_JIT_CONTEXT_TEXTURES_INDEX 4
+#define LP_JIT_CONTEXT_TEXTURES_INDEX 3
#define lp_jit_context_textures(_builder, _ptr) \
lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures")
@@ -118,12 +112,6 @@ typedef void
void *color,
void *depth);
-void PIPE_CDECL
-lp_fetch_texel_soa( struct tgsi_sampler **samplers,
- uint32_t unit,
- float *store );
-
-
void
lp_jit_screen_cleanup(struct llvmpipe_screen *screen);
diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
index 4abff4eccc..e8e2e2524a 100644
--- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
+++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
@@ -128,6 +128,7 @@ lp_vbuf_unmap_vertices(struct vbuf_render *vbr,
{
struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr);
assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size );
+ (void) cvbr;
/* do nothing */
}
diff --git a/src/gallium/drivers/llvmpipe/lp_quad.h b/src/gallium/drivers/llvmpipe/lp_quad.h
index 7eb05de77a..c3a48700a4 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad.h
+++ b/src/gallium/drivers/llvmpipe/lp_quad.h
@@ -31,6 +31,7 @@
#ifndef LP_QUAD_H
#define LP_QUAD_H
+#include "pipe/p_compiler.h"
#include "pipe/p_state.h"
#include "tgsi/tgsi_exec.h"
@@ -83,7 +84,7 @@ struct quad_header_inout
struct quad_header_output
{
/** colors in SOA format (rrrr, gggg, bbbb, aaaa) */
- float ALIGN16_ATTRIB color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE];
+ PIPE_ALIGN_VAR(16) float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE];
};
@@ -92,9 +93,9 @@ struct quad_header_output
*/
struct quad_interp_coef
{
- float ALIGN16_ATTRIB a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
- float ALIGN16_ATTRIB dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
- float ALIGN16_ATTRIB dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+ PIPE_ALIGN_VAR(16) float a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+ PIPE_ALIGN_VAR(16) float dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+ PIPE_ALIGN_VAR(16) float dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
};
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index b18f17c0cd..0b2d3a2801 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -117,7 +117,7 @@ struct setup_context {
/**
* Execute fragment shader for the four fragments in the quad.
*/
-ALIGN_STACK
+PIPE_ALIGN_STACK
static void
shade_quads(struct llvmpipe_context *llvmpipe,
struct quad_header *quads[],
@@ -130,7 +130,7 @@ shade_quads(struct llvmpipe_context *llvmpipe,
uint8_t *tile;
uint8_t *color;
void *depth;
- uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS];
+ PIPE_ALIGN_VAR(16) uint32_t mask[4][NUM_CHANNELS];
unsigned chan_index;
unsigned q;
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 8e0c773a63..e16793186b 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -56,7 +56,6 @@
#define LP_NEW_QUERY 0x4000
-struct tgsi_sampler;
struct vertex_info;
struct pipe_context;
struct llvmpipe_context;
@@ -197,14 +196,14 @@ void llvmpipe_update_fs(struct llvmpipe_context *lp);
void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe );
-boolean llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
+void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
unsigned start, unsigned count);
-boolean llvmpipe_draw_elements(struct pipe_context *pipe,
+void llvmpipe_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start, unsigned count);
-boolean
+void
llvmpipe_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c
index b2e75d3b14..a94cd05ef2 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c
@@ -34,6 +34,7 @@
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_debug_dump.h"
+#include "draw/draw_context.h"
#include "lp_screen.h"
#include "lp_context.h"
#include "lp_state.h"
@@ -51,6 +52,11 @@ void llvmpipe_bind_blend_state( struct pipe_context *pipe,
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ if (llvmpipe->blend == blend)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
llvmpipe->blend = blend;
llvmpipe->dirty |= LP_NEW_BLEND;
@@ -69,6 +75,11 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe,
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
unsigned i, j;
+ if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color);
if(!llvmpipe->jit_context.blend_color)
@@ -99,7 +110,12 @@ llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe,
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
- llvmpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil;
+ if (llvmpipe->depth_stencil == depth_stencil)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ llvmpipe->depth_stencil = depth_stencil;
if(llvmpipe->depth_stencil)
llvmpipe->jit_context.alpha_ref_value = llvmpipe->depth_stencil->alpha.ref_value;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index e703964aaa..6c1ef6bc42 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -66,7 +66,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
/* compute vertex layout now */
const struct lp_fragment_shader *lpfs = llvmpipe->fs;
struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf;
- const uint num = draw_num_vs_outputs(llvmpipe->draw);
+ const uint num = draw_current_shader_outputs(llvmpipe->draw);
uint i;
/* Tell draw_vbuf to simply emit the whole post-xform vertex
@@ -116,13 +116,13 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
}
/* this includes texcoords and varying vars */
- src = draw_find_vs_output(llvmpipe->draw,
+ src = draw_find_shader_output(llvmpipe->draw,
lpfs->info.input_semantic_name[i],
lpfs->info.input_semantic_index[i]);
draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
}
- llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw,
+ llvmpipe->psize_slot = draw_find_shader_output(llvmpipe->draw,
TGSI_SEMANTIC_PSIZE, 0);
if (llvmpipe->psize_slot > 0) {
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT,
@@ -192,36 +192,6 @@ compute_cliprect(struct llvmpipe_context *lp)
}
-static void
-update_tgsi_samplers( struct llvmpipe_context *llvmpipe )
-{
- unsigned i;
-
- /* vertex shader samplers */
- for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
- llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->vertex_samplers[i];
- llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->vertex_textures[i];
- llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples;
- }
-
- for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
- lp_tex_tile_cache_validate_texture( llvmpipe->vertex_tex_cache[i] );
- }
-
- /* fragment shader samplers */
- for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
- llvmpipe->tgsi.frag_samplers[i].sampler = llvmpipe->sampler[i];
- llvmpipe->tgsi.frag_samplers[i].texture = llvmpipe->texture[i];
- llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples;
- }
-
- for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
- lp_tex_tile_cache_validate_texture( llvmpipe->tex_cache[i] );
- }
-
- llvmpipe->jit_context.samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list;
-}
-
/* Hopefully this will remain quite simple, otherwise need to pull in
* something like the state tracker mechanism.
*/
@@ -237,8 +207,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
}
if (llvmpipe->dirty & (LP_NEW_SAMPLER |
- LP_NEW_TEXTURE))
- update_tgsi_samplers( llvmpipe );
+ LP_NEW_TEXTURE)) {
+ /* TODO */
+ }
if (llvmpipe->dirty & (LP_NEW_RASTERIZER |
LP_NEW_FS |
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index d129918fe2..9f4bbef73f 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -453,8 +453,8 @@ generate_fragment(struct llvmpipe_context *lp,
debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
debug_printf(" .mag_img_filter = %s\n",
debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
- if(key->sampler[i].compare_mode)
- debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
+ if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE)
+ debug_printf(" .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords);
debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter);
}
@@ -550,13 +550,8 @@ generate_fragment(struct llvmpipe_context *lp,
a0_ptr, dadx_ptr, dady_ptr,
x0, y0, 2, 0);
-#if 0
- /* C texture sampling */
- sampler = lp_c_sampler_soa_create(context_ptr);
-#else
/* code generated texture sampling */
sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
-#endif
for(i = 0; i < num_fs; ++i) {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
@@ -673,7 +668,12 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
- llvmpipe->fs = (struct lp_fragment_shader *) fs;
+ if (llvmpipe->fs == fs)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ llvmpipe->fs = fs;
llvmpipe->dirty |= LP_NEW_FS;
}
@@ -688,6 +688,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
struct lp_fragment_shader_variant *variant;
assert(fs != llvmpipe->fs);
+ (void) llvmpipe;
variant = shader->variants;
while(variant) {
@@ -722,8 +723,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
assert(shader < PIPE_SHADER_TYPES);
assert(index == 0);
- if(shader == PIPE_SHADER_VERTEX)
- draw_flush(llvmpipe->draw);
+ draw_flush(llvmpipe->draw);
/* note: reference counting */
pipe_buffer_reference(&llvmpipe->constants[shader], constants);
@@ -733,7 +733,8 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
}
if(shader == PIPE_SHADER_VERTEX) {
- draw_set_mapped_constant_buffer(llvmpipe->draw, data, size);
+ draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX,
+ data, size);
}
llvmpipe->dirty |= LP_NEW_CONSTANTS;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
index 4561c6b845..aa3b5a3f91 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
@@ -41,14 +41,17 @@ llvmpipe_create_rasterizer_state(struct pipe_context *pipe,
}
void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe,
- void *setup)
+ void *rasterizer)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ if (llvmpipe->rasterizer == rasterizer)
+ return;
+
/* pass-through to draw module */
- draw_set_rasterizer_state(llvmpipe->draw, setup);
+ draw_set_rasterizer_state(llvmpipe->draw, rasterizer);
- llvmpipe->rasterizer = (struct pipe_rasterizer_state *)setup;
+ llvmpipe->rasterizer = rasterizer;
llvmpipe->dirty |= LP_NEW_RASTERIZER;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c
index ba970cac98..e37ff04f3d 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c
@@ -51,6 +51,8 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
struct llvmpipe_context *lp = llvmpipe_context(pipe);
uint i;
+ draw_flush(lp->draw);
+
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
/* check if changing cbuf */
if (lp->framebuffer.cbufs[i] != fb->cbufs[i]) {
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c
index 8a761648e7..884e3878e6 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c
@@ -70,14 +70,18 @@ fail:
void
-llvmpipe_bind_vs_state(struct pipe_context *pipe, void *vs)
+llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ const struct lp_vertex_shader *vs = (const struct lp_vertex_shader *)_vs;
- llvmpipe->vs = (const struct lp_vertex_shader *)vs;
+ if (llvmpipe->vs == vs)
+ return;
- draw_bind_vertex_shader(llvmpipe->draw,
- (llvmpipe->vs ? llvmpipe->vs->draw_data : NULL));
+ draw_bind_vertex_shader(llvmpipe->draw,
+ vs ? vs->draw_data : NULL);
+
+ llvmpipe->vs = vs;
llvmpipe->dirty |= LP_NEW_VS;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 29fff91981..6c29e8d8ac 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -462,7 +462,7 @@ compute_blend_ref(const struct pipe_blend_state *blend,
}
-ALIGN_STACK
+PIPE_ALIGN_STACK
static boolean
test_one(unsigned verbose,
FILE *fp,
@@ -531,11 +531,11 @@ test_one(unsigned verbose,
success = TRUE;
for(i = 0; i < n && success; ++i) {
if(mode == AoS) {
- ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
- ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
- ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
- ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
- ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
int64_t start_counter = 0;
int64_t end_counter = 0;
@@ -596,11 +596,11 @@ test_one(unsigned verbose,
if(mode == SoA) {
const unsigned stride = type.length*type.width/8;
- ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
- ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
- ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
- ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
- ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
int64_t start_counter = 0;
int64_t end_counter = 0;
boolean mismatch;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index 968c7a2d4a..c1abee424c 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -142,7 +142,7 @@ add_conv_test(LLVMModuleRef module,
}
-ALIGN_STACK
+PIPE_ALIGN_STACK
static boolean
test_one(unsigned verbose,
FILE *fp,
@@ -230,8 +230,8 @@ test_one(unsigned verbose,
for(i = 0; i < n && success; ++i) {
unsigned src_stride = src_type.length*src_type.width/8;
unsigned dst_stride = dst_type.length*dst_type.width/8;
- ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
- ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ PIPE_ALIGN_VAR(16) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ PIPE_ALIGN_VAR(16) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
int64_t start_counter = 0;
@@ -330,7 +330,7 @@ test_one(unsigned verbose,
fprintf(stderr, "conv.bc written\n");
fprintf(stderr, "Invoke as \"llc -o - conv.bc\"\n");
firsttime = FALSE;
- //abort();
+ /* abort(); */
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 23ea9ebbe7..2b258f1052 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -199,7 +199,7 @@ add_store_rgba_test(LLVMModuleRef module,
}
-ALIGN_STACK
+PIPE_ALIGN_STACK
static boolean
test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test)
{
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.h b/src/gallium/drivers/llvmpipe/lp_tex_cache.h
index 9fa6c36812..05fded78e1 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_cache.h
+++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.h
@@ -115,7 +115,7 @@ extern const struct llvmpipe_cached_tex_tile *
lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc,
union tex_tile_address addr );
-static INLINE const union tex_tile_address
+static INLINE union tex_tile_address
tex_tile_address( unsigned x,
unsigned y,
unsigned z,
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
index 9ad1bde956..cb59a94464 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
@@ -31,64 +31,11 @@
#include <llvm-c/Core.h>
-#include "tgsi/tgsi_exec.h"
-
-struct llvmpipe_tex_tile_cache;
struct lp_sampler_static_state;
/**
- * Subclass of tgsi_sampler
- */
-struct lp_shader_sampler
-{
- struct tgsi_sampler base; /**< base class */
-
- unsigned processor;
-
- /* For lp_get_samples_2d_linear_POT:
- */
- unsigned xpot;
- unsigned ypot;
- unsigned level;
-
- const struct pipe_texture *texture;
- const struct pipe_sampler_state *sampler;
-
- struct llvmpipe_tex_tile_cache *cache;
-};
-
-
-
-static INLINE struct lp_shader_sampler *
-lp_shader_sampler(const struct tgsi_sampler *sampler)
-{
- return (struct lp_shader_sampler *) sampler;
-}
-
-
-
-extern void
-lp_get_samples(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE]);
-
-
-/**
- * Texture sampling code generator that just calls lp_get_samples C function
- * for the actual sampling computation.
- *
- * @param context_ptr LLVM value with the pointer to the struct lp_jit_context.
- */
-struct lp_build_sampler_soa *
-lp_c_sampler_soa_create(LLVMValueRef context_ptr);
-
-
-/**
* Pure-LLVM texture sampling code generator.
*
* @param context_ptr LLVM value with the pointer to the struct lp_jit_context.
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
deleted file mode 100644
index 0d01c07fb5..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
+++ /dev/null
@@ -1,1713 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * Copyright 2008 VMware, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * Texture sampling
- *
- * Authors:
- * Brian Paul
- */
-
-#include "lp_context.h"
-#include "lp_quad.h"
-#include "lp_surface.h"
-#include "lp_texture.h"
-#include "lp_tex_sample.h"
-#include "lp_tex_cache.h"
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_shader_tokens.h"
-#include "util/u_math.h"
-#include "util/u_memory.h"
-
-
-
-/*
- * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes
- * see 1-pixel bands of improperly weighted linear-filtered textures.
- * The tests/texwrap.c demo is a good test.
- * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0.
- * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x).
- */
-#define FRAC(f) ((f) - util_ifloor(f))
-
-
-/**
- * Linear interpolation macro
- */
-static INLINE float
-lerp(float a, float v0, float v1)
-{
- return v0 + a * (v1 - v0);
-}
-
-
-/**
- * Do 2D/biliner interpolation of float values.
- * v00, v10, v01 and v11 are typically four texture samples in a square/box.
- * a and b are the horizontal and vertical interpolants.
- * It's important that this function is inlined when compiled with
- * optimization! If we find that's not true on some systems, convert
- * to a macro.
- */
-static INLINE float
-lerp_2d(float a, float b,
- float v00, float v10, float v01, float v11)
-{
- const float temp0 = lerp(a, v00, v10);
- const float temp1 = lerp(a, v01, v11);
- return lerp(b, temp0, temp1);
-}
-
-
-/**
- * As above, but 3D interpolation of 8 values.
- */
-static INLINE float
-lerp_3d(float a, float b, float c,
- float v000, float v100, float v010, float v110,
- float v001, float v101, float v011, float v111)
-{
- const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
- const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
- return lerp(c, temp0, temp1);
-}
-
-
-
-/**
- * If A is a signed integer, A % B doesn't give the right value for A < 0
- * (in terms of texture repeat). Just casting to unsigned fixes that.
- */
-#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B))
-
-
-/**
- * Apply texture coord wrapping mode and return integer texture indexes
- * for a vector of four texcoords (S or T or P).
- * \param wrapMode PIPE_TEX_WRAP_x
- * \param s the incoming texcoords
- * \param size the texture image size
- * \param icoord returns the integer texcoords
- * \return integer texture index
- */
-static INLINE void
-nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
- int icoord[4])
-{
- uint ch;
- switch (wrapMode) {
- case PIPE_TEX_WRAP_REPEAT:
- /* s limited to [0,1) */
- /* i limited to [0,size-1] */
- for (ch = 0; ch < 4; ch++) {
- int i = util_ifloor(s[ch] * size);
- icoord[ch] = REMAINDER(i, size);
- }
- return;
- case PIPE_TEX_WRAP_CLAMP:
- /* s limited to [0,1] */
- /* i limited to [0,size-1] */
- for (ch = 0; ch < 4; ch++) {
- if (s[ch] <= 0.0F)
- icoord[ch] = 0;
- else if (s[ch] >= 1.0F)
- icoord[ch] = size - 1;
- else
- icoord[ch] = util_ifloor(s[ch] * size);
- }
- return;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- {
- /* s limited to [min,max] */
- /* i limited to [0, size-1] */
- const float min = 1.0F / (2.0F * size);
- const float max = 1.0F - min;
- for (ch = 0; ch < 4; ch++) {
- if (s[ch] < min)
- icoord[ch] = 0;
- else if (s[ch] > max)
- icoord[ch] = size - 1;
- else
- icoord[ch] = util_ifloor(s[ch] * size);
- }
- }
- return;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- {
- /* s limited to [min,max] */
- /* i limited to [-1, size] */
- const float min = -1.0F / (2.0F * size);
- const float max = 1.0F - min;
- for (ch = 0; ch < 4; ch++) {
- if (s[ch] <= min)
- icoord[ch] = -1;
- else if (s[ch] >= max)
- icoord[ch] = size;
- else
- icoord[ch] = util_ifloor(s[ch] * size);
- }
- }
- return;
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- {
- const float min = 1.0F / (2.0F * size);
- const float max = 1.0F - min;
- for (ch = 0; ch < 4; ch++) {
- const int flr = util_ifloor(s[ch]);
- float u;
- if (flr & 1)
- u = 1.0F - (s[ch] - (float) flr);
- else
- u = s[ch] - (float) flr;
- if (u < min)
- icoord[ch] = 0;
- else if (u > max)
- icoord[ch] = size - 1;
- else
- icoord[ch] = util_ifloor(u * size);
- }
- }
- return;
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- for (ch = 0; ch < 4; ch++) {
- /* s limited to [0,1] */
- /* i limited to [0,size-1] */
- const float u = fabsf(s[ch]);
- if (u <= 0.0F)
- icoord[ch] = 0;
- else if (u >= 1.0F)
- icoord[ch] = size - 1;
- else
- icoord[ch] = util_ifloor(u * size);
- }
- return;
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- {
- /* s limited to [min,max] */
- /* i limited to [0, size-1] */
- const float min = 1.0F / (2.0F * size);
- const float max = 1.0F - min;
- for (ch = 0; ch < 4; ch++) {
- const float u = fabsf(s[ch]);
- if (u < min)
- icoord[ch] = 0;
- else if (u > max)
- icoord[ch] = size - 1;
- else
- icoord[ch] = util_ifloor(u * size);
- }
- }
- return;
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- {
- /* s limited to [min,max] */
- /* i limited to [0, size-1] */
- const float min = -1.0F / (2.0F * size);
- const float max = 1.0F - min;
- for (ch = 0; ch < 4; ch++) {
- const float u = fabsf(s[ch]);
- if (u < min)
- icoord[ch] = -1;
- else if (u > max)
- icoord[ch] = size;
- else
- icoord[ch] = util_ifloor(u * size);
- }
- }
- return;
- default:
- assert(0);
- }
-}
-
-
-/**
- * Used to compute texel locations for linear sampling for four texcoords.
- * \param wrapMode PIPE_TEX_WRAP_x
- * \param s the texcoords
- * \param size the texture image size
- * \param icoord0 returns first texture indexes
- * \param icoord1 returns second texture indexes (usually icoord0 + 1)
- * \param w returns blend factor/weight between texture indexes
- * \param icoord returns the computed integer texture coords
- */
-static INLINE void
-linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
- int icoord0[4], int icoord1[4], float w[4])
-{
- uint ch;
-
- switch (wrapMode) {
- case PIPE_TEX_WRAP_REPEAT:
- for (ch = 0; ch < 4; ch++) {
- float u = s[ch] * size - 0.5F;
- icoord0[ch] = REMAINDER(util_ifloor(u), size);
- icoord1[ch] = REMAINDER(icoord0[ch] + 1, size);
- w[ch] = FRAC(u);
- }
- break;;
- case PIPE_TEX_WRAP_CLAMP:
- for (ch = 0; ch < 4; ch++) {
- float u = CLAMP(s[ch], 0.0F, 1.0F);
- u = u * size - 0.5f;
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- w[ch] = FRAC(u);
- }
- break;;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- for (ch = 0; ch < 4; ch++) {
- float u = CLAMP(s[ch], 0.0F, 1.0F);
- u = u * size - 0.5f;
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- if (icoord0[ch] < 0)
- icoord0[ch] = 0;
- if (icoord1[ch] >= (int) size)
- icoord1[ch] = size - 1;
- w[ch] = FRAC(u);
- }
- break;;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- {
- const float min = -1.0F / (2.0F * size);
- const float max = 1.0F - min;
- for (ch = 0; ch < 4; ch++) {
- float u = CLAMP(s[ch], min, max);
- u = u * size - 0.5f;
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- w[ch] = FRAC(u);
- }
- }
- break;;
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- for (ch = 0; ch < 4; ch++) {
- const int flr = util_ifloor(s[ch]);
- float u;
- if (flr & 1)
- u = 1.0F - (s[ch] - (float) flr);
- else
- u = s[ch] - (float) flr;
- u = u * size - 0.5F;
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- if (icoord0[ch] < 0)
- icoord0[ch] = 0;
- if (icoord1[ch] >= (int) size)
- icoord1[ch] = size - 1;
- w[ch] = FRAC(u);
- }
- break;;
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- for (ch = 0; ch < 4; ch++) {
- float u = fabsf(s[ch]);
- if (u >= 1.0F)
- u = (float) size;
- else
- u *= size;
- u -= 0.5F;
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- w[ch] = FRAC(u);
- }
- break;;
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- for (ch = 0; ch < 4; ch++) {
- float u = fabsf(s[ch]);
- if (u >= 1.0F)
- u = (float) size;
- else
- u *= size;
- u -= 0.5F;
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- if (icoord0[ch] < 0)
- icoord0[ch] = 0;
- if (icoord1[ch] >= (int) size)
- icoord1[ch] = size - 1;
- w[ch] = FRAC(u);
- }
- break;;
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- {
- const float min = -1.0F / (2.0F * size);
- const float max = 1.0F - min;
- for (ch = 0; ch < 4; ch++) {
- float u = fabsf(s[ch]);
- if (u <= min)
- u = min * size;
- else if (u >= max)
- u = max * size;
- else
- u *= size;
- u -= 0.5F;
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- w[ch] = FRAC(u);
- }
- }
- break;;
- default:
- assert(0);
- }
-}
-
-
-/**
- * For RECT textures / unnormalized texcoords
- * Only a subset of wrap modes supported.
- */
-static INLINE void
-nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
- int icoord[4])
-{
- uint ch;
- switch (wrapMode) {
- case PIPE_TEX_WRAP_CLAMP:
- for (ch = 0; ch < 4; ch++) {
- int i = util_ifloor(s[ch]);
- icoord[ch]= CLAMP(i, 0, (int) size-1);
- }
- return;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- /* fall-through */
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- for (ch = 0; ch < 4; ch++) {
- icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
- }
- return;
- default:
- assert(0);
- }
-}
-
-
-/**
- * For RECT textures / unnormalized texcoords.
- * Only a subset of wrap modes supported.
- */
-static INLINE void
-linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
- int icoord0[4], int icoord1[4], float w[4])
-{
- uint ch;
- switch (wrapMode) {
- case PIPE_TEX_WRAP_CLAMP:
- for (ch = 0; ch < 4; ch++) {
- /* Not exactly what the spec says, but it matches NVIDIA output */
- float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- w[ch] = FRAC(u);
- }
- return;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- /* fall-through */
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- for (ch = 0; ch < 4; ch++) {
- float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
- u -= 0.5F;
- icoord0[ch] = util_ifloor(u);
- icoord1[ch] = icoord0[ch] + 1;
- if (icoord1[ch] > (int) size - 1)
- icoord1[ch] = size - 1;
- w[ch] = FRAC(u);
- }
- break;
- default:
- assert(0);
- }
-}
-
-
-static unsigned
-choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
-{
- /*
- major axis
- direction target sc tc ma
- ---------- ------------------------------- --- --- ---
- +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
- -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
- +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
- -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
- +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
- -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
- */
- const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
- unsigned face;
- float sc, tc, ma;
-
- if (arx > ary && arx > arz) {
- if (rx >= 0.0F) {
- face = PIPE_TEX_FACE_POS_X;
- sc = -rz;
- tc = -ry;
- ma = arx;
- }
- else {
- face = PIPE_TEX_FACE_NEG_X;
- sc = rz;
- tc = -ry;
- ma = arx;
- }
- }
- else if (ary > arx && ary > arz) {
- if (ry >= 0.0F) {
- face = PIPE_TEX_FACE_POS_Y;
- sc = rx;
- tc = rz;
- ma = ary;
- }
- else {
- face = PIPE_TEX_FACE_NEG_Y;
- sc = rx;
- tc = -rz;
- ma = ary;
- }
- }
- else {
- if (rz > 0.0F) {
- face = PIPE_TEX_FACE_POS_Z;
- sc = rx;
- tc = -ry;
- ma = arz;
- }
- else {
- face = PIPE_TEX_FACE_NEG_Z;
- sc = -rx;
- tc = -ry;
- ma = arz;
- }
- }
-
- *newS = ( sc / ma + 1.0F ) * 0.5F;
- *newT = ( tc / ma + 1.0F ) * 0.5F;
-
- return face;
-}
-
-
-/**
- * Examine the quad's texture coordinates to compute the partial
- * derivatives w.r.t X and Y, then compute lambda (level of detail).
- *
- * This is only done for fragment shaders, not vertex shaders.
- */
-static float
-compute_lambda(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias)
-{
- const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
- const struct pipe_texture *texture = samp->texture;
- const struct pipe_sampler_state *sampler = samp->sampler;
- float rho, lambda;
-
- if (samp->processor == TGSI_PROCESSOR_VERTEX)
- return lodbias;
-
- assert(sampler->normalized_coords);
-
- assert(s);
- {
- float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT];
- float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT];
- dsdx = fabsf(dsdx);
- dsdy = fabsf(dsdy);
- rho = MAX2(dsdx, dsdy) * texture->width0;
- }
- if (t) {
- float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
- float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT];
- float max;
- dtdx = fabsf(dtdx);
- dtdy = fabsf(dtdy);
- max = MAX2(dtdx, dtdy) * texture->height0;
- rho = MAX2(rho, max);
- }
- if (p) {
- float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT];
- float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT];
- float max;
- dpdx = fabsf(dpdx);
- dpdy = fabsf(dpdy);
- max = MAX2(dpdx, dpdy) * texture->depth0;
- rho = MAX2(rho, max);
- }
-
- lambda = util_fast_log2(rho);
- lambda += lodbias + sampler->lod_bias;
- lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
-
- return lambda;
-}
-
-
-/**
- * Do several things here:
- * 1. Compute lambda from the texcoords, if needed
- * 2. Determine if we're minifying or magnifying
- * 3. If minifying, choose mipmap levels
- * 4. Return image filter to use within mipmap images
- * \param level0 Returns first mipmap level to sample from
- * \param level1 Returns second mipmap level to sample from
- * \param levelBlend Returns blend factor between levels, in [0,1]
- * \param imgFilter Returns either the min or mag filter, depending on lambda
- */
-static void
-choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- unsigned *level0, unsigned *level1, float *levelBlend,
- unsigned *imgFilter)
-{
- const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
- const struct pipe_texture *texture = samp->texture;
- const struct pipe_sampler_state *sampler = samp->sampler;
-
- if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
- /* no mipmap selection needed */
- *level0 = *level1 = CLAMP((int) sampler->min_lod,
- 0, (int) texture->last_level);
-
- if (sampler->min_img_filter != sampler->mag_img_filter) {
- /* non-mipmapped texture, but still need to determine if doing
- * minification or magnification.
- */
- float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
- if (lambda <= 0.0) {
- *imgFilter = sampler->mag_img_filter;
- }
- else {
- *imgFilter = sampler->min_img_filter;
- }
- }
- else {
- *imgFilter = sampler->mag_img_filter;
- }
- }
- else {
- float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
-
- if (lambda <= 0.0) { /* XXX threshold depends on the filter */
- /* magnifying */
- *imgFilter = sampler->mag_img_filter;
- *level0 = *level1 = 0;
- }
- else {
- /* minifying */
- *imgFilter = sampler->min_img_filter;
-
- /* choose mipmap level(s) and compute the blend factor between them */
- if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
- /* Nearest mipmap level */
- const int lvl = (int) (lambda + 0.5);
- *level0 =
- *level1 = CLAMP(lvl, 0, (int) texture->last_level);
- }
- else {
- /* Linear interpolation between mipmap levels */
- const int lvl = (int) lambda;
- *level0 = CLAMP(lvl, 0, (int) texture->last_level);
- *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level);
- *levelBlend = FRAC(lambda); /* blending weight between levels */
- }
- }
- }
-}
-
-
-/**
- * Get a texel from a texture, using the texture tile cache.
- *
- * \param face the cube face in 0..5
- * \param level the mipmap level
- * \param x the x coord of texel within 2D image
- * \param y the y coord of texel within 2D image
- * \param z which slice of a 3D texture
- * \param rgba the quad to put the texel/color into
- * \param j which element of the rgba quad to write to
- *
- * XXX maybe move this into lp_tile_cache.c and merge with the
- * lp_get_cached_tile_tex() function. Also, get 4 texels instead of 1...
- */
-static void
-get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler,
- unsigned face, unsigned level, int x, int y,
- const uint8_t *out[4])
-{
- const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-
- const struct llvmpipe_cached_tex_tile *tile
- = lp_get_cached_tex_tile(samp->cache,
- tex_tile_address(x, y, 0, face, level));
-
- y %= TEX_TILE_SIZE;
- x %= TEX_TILE_SIZE;
-
- out[0] = &tile->color[y ][x ][0];
- out[1] = &tile->color[y ][x+1][0];
- out[2] = &tile->color[y+1][x ][0];
- out[3] = &tile->color[y+1][x+1][0];
-}
-
-static INLINE const uint8_t *
-get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler,
- unsigned face, unsigned level, int x, int y)
-{
- const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-
- const struct llvmpipe_cached_tex_tile *tile
- = lp_get_cached_tex_tile(samp->cache,
- tex_tile_address(x, y, 0, face, level));
-
- y %= TEX_TILE_SIZE;
- x %= TEX_TILE_SIZE;
-
- return &tile->color[y][x][0];
-}
-
-
-static void
-get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler,
- unsigned face, unsigned level,
- int x0, int y0,
- int x1, int y1,
- const uint8_t *out[4])
-{
- unsigned i;
-
- for (i = 0; i < 4; i++) {
- unsigned tx = (i & 1) ? x1 : x0;
- unsigned ty = (i >> 1) ? y1 : y0;
-
- out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty );
- }
-}
-
-static void
-get_texel(const struct tgsi_sampler *tgsi_sampler,
- unsigned face, unsigned level, int x, int y, int z,
- float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j)
-{
- const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
- const struct pipe_texture *texture = samp->texture;
- const struct pipe_sampler_state *sampler = samp->sampler;
-
- if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
- y < 0 || y >= (int) u_minify(texture->height0, level) ||
- z < 0 || z >= (int) u_minify(texture->depth0, level)) {
- rgba[0][j] = sampler->border_color[0];
- rgba[1][j] = sampler->border_color[1];
- rgba[2][j] = sampler->border_color[2];
- rgba[3][j] = sampler->border_color[3];
- }
- else {
- const unsigned tx = x % TEX_TILE_SIZE;
- const unsigned ty = y % TEX_TILE_SIZE;
- const struct llvmpipe_cached_tex_tile *tile;
-
- tile = lp_get_cached_tex_tile(samp->cache,
- tex_tile_address(x, y, z, face, level));
-
- rgba[0][j] = ubyte_to_float(tile->color[ty][tx][0]);
- rgba[1][j] = ubyte_to_float(tile->color[ty][tx][1]);
- rgba[2][j] = ubyte_to_float(tile->color[ty][tx][2]);
- rgba[3][j] = ubyte_to_float(tile->color[ty][tx][3]);
- if (0)
- {
- debug_printf("Get texel %f %f %f %f from %s\n",
- rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j],
- pf_name(texture->format));
- }
- }
-}
-
-
-/**
- * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
- * When we sampled the depth texture, the depth value was put into all
- * RGBA channels. We look at the red channel here.
- * \param rgba quad of (depth) texel values
- * \param p texture 'P' components for four pixels in quad
- * \param j which pixel in the quad to test [0..3]
- */
-static INLINE void
-shadow_compare(const struct pipe_sampler_state *sampler,
- float rgba[NUM_CHANNELS][QUAD_SIZE],
- const float p[QUAD_SIZE],
- uint j)
-{
- int k;
- switch (sampler->compare_func) {
- case PIPE_FUNC_LESS:
- k = p[j] < rgba[0][j];
- break;
- case PIPE_FUNC_LEQUAL:
- k = p[j] <= rgba[0][j];
- break;
- case PIPE_FUNC_GREATER:
- k = p[j] > rgba[0][j];
- break;
- case PIPE_FUNC_GEQUAL:
- k = p[j] >= rgba[0][j];
- break;
- case PIPE_FUNC_EQUAL:
- k = p[j] == rgba[0][j];
- break;
- case PIPE_FUNC_NOTEQUAL:
- k = p[j] != rgba[0][j];
- break;
- case PIPE_FUNC_ALWAYS:
- k = 1;
- break;
- case PIPE_FUNC_NEVER:
- k = 0;
- break;
- default:
- k = 0;
- assert(0);
- break;
- }
-
- /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
- rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k;
- rgba[3][j] = 1.0F;
-}
-
-
-/**
- * As above, but do four z/texture comparisons.
- */
-static INLINE void
-shadow_compare4(const struct pipe_sampler_state *sampler,
- float rgba[NUM_CHANNELS][QUAD_SIZE],
- const float p[QUAD_SIZE])
-{
- int j, k0, k1, k2, k3;
- float val;
-
- /* compare four texcoords vs. four texture samples */
- switch (sampler->compare_func) {
- case PIPE_FUNC_LESS:
- k0 = p[0] < rgba[0][0];
- k1 = p[1] < rgba[0][1];
- k2 = p[2] < rgba[0][2];
- k3 = p[3] < rgba[0][3];
- break;
- case PIPE_FUNC_LEQUAL:
- k0 = p[0] <= rgba[0][0];
- k1 = p[1] <= rgba[0][1];
- k2 = p[2] <= rgba[0][2];
- k3 = p[3] <= rgba[0][3];
- break;
- case PIPE_FUNC_GREATER:
- k0 = p[0] > rgba[0][0];
- k1 = p[1] > rgba[0][1];
- k2 = p[2] > rgba[0][2];
- k3 = p[3] > rgba[0][3];
- break;
- case PIPE_FUNC_GEQUAL:
- k0 = p[0] >= rgba[0][0];
- k1 = p[1] >= rgba[0][1];
- k2 = p[2] >= rgba[0][2];
- k3 = p[3] >= rgba[0][3];
- break;
- case PIPE_FUNC_EQUAL:
- k0 = p[0] == rgba[0][0];
- k1 = p[1] == rgba[0][1];
- k2 = p[2] == rgba[0][2];
- k3 = p[3] == rgba[0][3];
- break;
- case PIPE_FUNC_NOTEQUAL:
- k0 = p[0] != rgba[0][0];
- k1 = p[1] != rgba[0][1];
- k2 = p[2] != rgba[0][2];
- k3 = p[3] != rgba[0][3];
- break;
- case PIPE_FUNC_ALWAYS:
- k0 = k1 = k2 = k3 = 1;
- break;
- case PIPE_FUNC_NEVER:
- k0 = k1 = k2 = k3 = 0;
- break;
- default:
- k0 = k1 = k2 = k3 = 0;
- assert(0);
- break;
- }
-
- /* convert four pass/fail values to an intensity in [0,1] */
- val = 0.25F * (k0 + k1 + k2 + k3);
-
- /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
- for (j = 0; j < 4; j++) {
- rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
- rgba[3][j] = 1.0F;
- }
-}
-
-
-
-static void
-lp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
- const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
- unsigned j;
- unsigned level = samp->level;
- unsigned xpot = 1 << (samp->xpot - level);
- unsigned ypot = 1 << (samp->ypot - level);
- unsigned xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
- unsigned ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
-
- for (j = 0; j < QUAD_SIZE; j++) {
- int c;
-
- float u = s[j] * xpot - 0.5F;
- float v = t[j] * ypot - 0.5F;
-
- int uflr = util_ifloor(u);
- int vflr = util_ifloor(v);
-
- float xw = u - (float)uflr;
- float yw = v - (float)vflr;
-
- int x0 = uflr & (xpot - 1);
- int y0 = vflr & (ypot - 1);
-
- const uint8_t *tx[4];
-
-
- /* Can we fetch all four at once:
- */
- if (x0 < xmax && y0 < ymax)
- {
- get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx);
- }
- else
- {
- unsigned x1 = (x0 + 1) & (xpot - 1);
- unsigned y1 = (y0 + 1) & (ypot - 1);
- get_texel_quad_2d_mt(tgsi_sampler, 0, level,
- x0, y0, x1, y1, tx);
- }
-
-
- /* interpolate R, G, B, A */
- for (c = 0; c < 4; c++) {
- rgba[c][j] = lerp_2d(xw, yw,
- ubyte_to_float(tx[0][c]), ubyte_to_float(tx[1][c]),
- ubyte_to_float(tx[2][c]), ubyte_to_float(tx[3][c]));
- }
- }
-}
-
-
-static void
-lp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
- const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
- unsigned j;
- unsigned level = samp->level;
- unsigned xpot = 1 << (samp->xpot - level);
- unsigned ypot = 1 << (samp->ypot - level);
-
- for (j = 0; j < QUAD_SIZE; j++) {
- int c;
-
- float u = s[j] * xpot;
- float v = t[j] * ypot;
-
- int uflr = util_ifloor(u);
- int vflr = util_ifloor(v);
-
- int x0 = uflr & (xpot - 1);
- int y0 = vflr & (ypot - 1);
-
- const uint8_t *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
-
- for (c = 0; c < 4; c++) {
- rgba[c][j] = ubyte_to_float(out[c]);
- }
- }
-}
-
-
-static void
-lp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
- const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
- unsigned j;
- unsigned level = samp->level;
- unsigned xpot = 1 << (samp->xpot - level);
- unsigned ypot = 1 << (samp->ypot - level);
-
- for (j = 0; j < QUAD_SIZE; j++) {
- int c;
-
- float u = s[j] * xpot;
- float v = t[j] * ypot;
-
- int x0, y0;
- const uint8_t *out;
-
- x0 = util_ifloor(u);
- if (x0 < 0)
- x0 = 0;
- else if (x0 > xpot - 1)
- x0 = xpot - 1;
-
- y0 = util_ifloor(v);
- if (y0 < 0)
- y0 = 0;
- else if (y0 > ypot - 1)
- y0 = ypot - 1;
-
- out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
-
- for (c = 0; c < 4; c++) {
- rgba[c][j] = ubyte_to_float(out[c]);
- }
- }
-}
-
-
-static void
-lp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
- struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
- const struct pipe_texture *texture = samp->texture;
- int level0;
- float lambda;
-
- lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
- level0 = (int)lambda;
-
- if (lambda < 0.0) {
- samp->level = 0;
- lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
- s, t, p, 0, rgba );
- }
- else if (level0 >= texture->last_level) {
- samp->level = texture->last_level;
- lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
- s, t, p, 0, rgba );
- }
- else {
- float levelBlend = lambda - level0;
- float rgba0[4][4];
- float rgba1[4][4];
- int c,j;
-
- samp->level = level0;
- lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
- s, t, p, 0, rgba0 );
-
- samp->level = level0+1;
- lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
- s, t, p, 0, rgba1 );
-
- for (j = 0; j < QUAD_SIZE; j++) {
- for (c = 0; c < 4; c++) {
- rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
- }
- }
- }
-}
-
-/**
- * Common code for sampling 1D/2D/cube textures.
- * Could probably extend for 3D...
- */
-static void
-lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE],
- const unsigned faces[4])
-{
- const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
- const struct pipe_texture *texture = samp->texture;
- const struct pipe_sampler_state *sampler = samp->sampler;
- unsigned level0, level1, j, imgFilter;
- int width, height;
- float levelBlend;
-
- choose_mipmap_levels(tgsi_sampler, s, t, p,
- lodbias,
- &level0, &level1, &levelBlend, &imgFilter);
-
- assert(sampler->normalized_coords);
-
- width = u_minify(texture->width0, level0);
- height = u_minify(texture->height0, level0);
-
- assert(width > 0);
-
- switch (imgFilter) {
- case PIPE_TEX_FILTER_NEAREST:
- {
- int x[4], y[4];
- nearest_texcoord_4(sampler->wrap_s, s, width, x);
- nearest_texcoord_4(sampler->wrap_t, t, height, y);
-
- for (j = 0; j < QUAD_SIZE; j++) {
- get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j);
- if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- shadow_compare(sampler, rgba, p, j);
- }
-
- if (level0 != level1) {
- /* get texels from second mipmap level and blend */
- float rgba2[4][4];
- unsigned c;
- x[j] /= 2;
- y[j] /= 2;
- get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0,
- rgba2, j);
- if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
- shadow_compare(sampler, rgba2, p, j);
- }
-
- for (c = 0; c < NUM_CHANNELS; c++) {
- rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
- }
- }
- }
- }
- break;
- case PIPE_TEX_FILTER_LINEAR:
- case PIPE_TEX_FILTER_ANISO:
- {
- int x0[4], y0[4], x1[4], y1[4];
- float xw[4], yw[4]; /* weights */
-
- linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
- linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
-
- for (j = 0; j < QUAD_SIZE; j++) {
- float tx[4][4]; /* texels */
- int c;
- get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0);
- get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1);
- get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2);
- get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3);
- if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- shadow_compare4(sampler, tx, p);
- }
-
- /* interpolate R, G, B, A */
- for (c = 0; c < 4; c++) {
- rgba[c][j] = lerp_2d(xw[j], yw[j],
- tx[c][0], tx[c][1],
- tx[c][2], tx[c][3]);
- }
-
- if (level0 != level1) {
- /* get texels from second mipmap level and blend */
- float rgba2[4][4];
-
- /* XXX: This is incorrect -- will often end up with (x0
- * == x1 && y0 == y1), meaning that we fetch the same
- * texel four times and linearly interpolate between
- * identical values. The correct approach would be to
- * call linear_texcoord again for the second level.
- */
- x0[j] /= 2;
- y0[j] /= 2;
- x1[j] /= 2;
- y1[j] /= 2;
- get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0);
- get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1);
- get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2);
- get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3);
- if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
- shadow_compare4(sampler, tx, p);
- }
-
- /* interpolate R, G, B, A */
- for (c = 0; c < 4; c++) {
- rgba2[c][j] = lerp_2d(xw[j], yw[j],
- tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
- }
-
- for (c = 0; c < NUM_CHANNELS; c++) {
- rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
- }
- }
- }
- }
- break;
- default:
- assert(0);
- }
-}
-
-
-static INLINE void
-lp_get_samples_1d(struct tgsi_sampler *sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
- static const unsigned faces[4] = {0, 0, 0, 0};
- static const float tzero[4] = {0, 0, 0, 0};
- lp_get_samples_2d_common(sampler, s, tzero, NULL,
- lodbias, rgba, faces);
-}
-
-
-static INLINE void
-lp_get_samples_2d(struct tgsi_sampler *sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
- static const unsigned faces[4] = {0, 0, 0, 0};
- lp_get_samples_2d_common(sampler, s, t, p,
- lodbias, rgba, faces);
-}
-
-
-static INLINE void
-lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
- const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
- const struct pipe_texture *texture = samp->texture;
- const struct pipe_sampler_state *sampler = samp->sampler;
- /* get/map pipe_surfaces corresponding to 3D tex slices */
- unsigned level0, level1, j, imgFilter;
- int width, height, depth;
- float levelBlend;
- const uint face = 0;
-
- choose_mipmap_levels(tgsi_sampler, s, t, p,
- lodbias,
- &level0, &level1, &levelBlend, &imgFilter);
-
- assert(sampler->normalized_coords);
-
- width = u_minify(texture->width0, level0);
- height = u_minify(texture->height0, level0);
- depth = u_minify(texture->depth0, level0);
-
- assert(width > 0);
- assert(height > 0);
- assert(depth > 0);
-
- switch (imgFilter) {
- case PIPE_TEX_FILTER_NEAREST:
- {
- int x[4], y[4], z[4];
- nearest_texcoord_4(sampler->wrap_s, s, width, x);
- nearest_texcoord_4(sampler->wrap_t, t, height, y);
- nearest_texcoord_4(sampler->wrap_r, p, depth, z);
- for (j = 0; j < QUAD_SIZE; j++) {
- get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j);
- if (level0 != level1) {
- /* get texels from second mipmap level and blend */
- float rgba2[4][4];
- unsigned c;
- x[j] /= 2;
- y[j] /= 2;
- z[j] /= 2;
- get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j);
- for (c = 0; c < NUM_CHANNELS; c++) {
- rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]);
- }
- }
- }
- }
- break;
- case PIPE_TEX_FILTER_LINEAR:
- case PIPE_TEX_FILTER_ANISO:
- {
- int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
- float xw[4], yw[4], zw[4]; /* interpolation weights */
- linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
- linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
- linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw);
-
- for (j = 0; j < QUAD_SIZE; j++) {
- int c;
- float tx0[4][4], tx1[4][4];
- get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0);
- get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1);
- get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2);
- get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3);
- get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0);
- get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1);
- get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2);
- get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3);
-
- /* interpolate R, G, B, A */
- for (c = 0; c < 4; c++) {
- rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
- tx0[c][0], tx0[c][1],
- tx0[c][2], tx0[c][3],
- tx1[c][0], tx1[c][1],
- tx1[c][2], tx1[c][3]);
- }
-
- if (level0 != level1) {
- /* get texels from second mipmap level and blend */
- float rgba2[4][4];
- x0[j] /= 2;
- y0[j] /= 2;
- z0[j] /= 2;
- x1[j] /= 2;
- y1[j] /= 2;
- z1[j] /= 2;
- get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0);
- get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1);
- get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2);
- get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3);
- get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0);
- get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1);
- get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2);
- get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3);
-
- /* interpolate R, G, B, A */
- for (c = 0; c < 4; c++) {
- rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j],
- tx0[c][0], tx0[c][1],
- tx0[c][2], tx0[c][3],
- tx1[c][0], tx1[c][1],
- tx1[c][2], tx1[c][3]);
- }
-
- /* blend mipmap levels */
- for (c = 0; c < NUM_CHANNELS; c++) {
- rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
- }
- }
- }
- }
- break;
- default:
- assert(0);
- }
-}
-
-
-static void
-lp_get_samples_cube(struct tgsi_sampler *sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
- unsigned faces[QUAD_SIZE], j;
- float ssss[4], tttt[4];
- for (j = 0; j < QUAD_SIZE; j++) {
- faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j);
- }
- lp_get_samples_2d_common(sampler, ssss, tttt, NULL,
- lodbias, rgba, faces);
-}
-
-
-static void
-lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
- const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
- const struct pipe_texture *texture = samp->texture;
- const struct pipe_sampler_state *sampler = samp->sampler;
- const uint face = 0;
- unsigned level0, level1, j, imgFilter;
- int width, height;
- float levelBlend;
-
- choose_mipmap_levels(tgsi_sampler, s, t, p,
- lodbias,
- &level0, &level1, &levelBlend, &imgFilter);
-
- /* texture RECTS cannot be mipmapped */
- assert(level0 == level1);
-
- width = u_minify(texture->width0, level0);
- height = u_minify(texture->height0, level0);
-
- assert(width > 0);
-
- switch (imgFilter) {
- case PIPE_TEX_FILTER_NEAREST:
- {
- int x[4], y[4];
- nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x);
- nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y);
- for (j = 0; j < QUAD_SIZE; j++) {
- get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j);
- if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- shadow_compare(sampler, rgba, p, j);
- }
- }
- }
- break;
- case PIPE_TEX_FILTER_LINEAR:
- case PIPE_TEX_FILTER_ANISO:
- {
- int x0[4], y0[4], x1[4], y1[4];
- float xw[4], yw[4]; /* weights */
- linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw);
- linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw);
- for (j = 0; j < QUAD_SIZE; j++) {
- float tx[4][4]; /* texels */
- int c;
- get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0);
- get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1);
- get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2);
- get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3);
- if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- shadow_compare4(sampler, tx, p);
- }
- for (c = 0; c < 4; c++) {
- rgba[c][j] = lerp_2d(xw[j], yw[j],
- tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
- }
- }
- }
- break;
- default:
- assert(0);
- }
-}
-
-
-/**
- * Error condition handler
- */
-static INLINE void
-lp_get_samples_null(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
- int i,j;
-
- for (i = 0; i < 4; i++)
- for (j = 0; j < 4; j++)
- rgba[i][j] = 1.0;
-}
-
-/**
- * Called via tgsi_sampler::get_samples() when using a sampler for the
- * first time. Determine the actual sampler function, link it in and
- * call it.
- */
-void
-lp_get_samples(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
- struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
- const struct pipe_texture *texture = samp->texture;
- const struct pipe_sampler_state *sampler = samp->sampler;
-
- /* Default to the 'undefined' case:
- */
- tgsi_sampler->get_samples = lp_get_samples_null;
-
- if (!texture) {
- assert(0); /* is this legal?? */
- goto out;
- }
-
- if (!sampler->normalized_coords) {
- assert (texture->target == PIPE_TEXTURE_2D);
- tgsi_sampler->get_samples = lp_get_samples_rect;
- goto out;
- }
-
- switch (texture->target) {
- case PIPE_TEXTURE_1D:
- tgsi_sampler->get_samples = lp_get_samples_1d;
- break;
- case PIPE_TEXTURE_2D:
- tgsi_sampler->get_samples = lp_get_samples_2d;
- break;
- case PIPE_TEXTURE_3D:
- tgsi_sampler->get_samples = lp_get_samples_3d;
- break;
- case PIPE_TEXTURE_CUBE:
- tgsi_sampler->get_samples = lp_get_samples_cube;
- break;
- default:
- assert(0);
- break;
- }
-
- /* Do this elsewhere:
- */
- samp->xpot = util_unsigned_logbase2( samp->texture->width0 );
- samp->ypot = util_unsigned_logbase2( samp->texture->height0 );
-
- /* Try to hook in a faster sampler. Ultimately we'll have to
- * code-generate these. Luckily most of this looks like it is
- * orthogonal state within the sampler.
- */
- if (texture->target == PIPE_TEXTURE_2D &&
- sampler->min_img_filter == sampler->mag_img_filter &&
- sampler->wrap_s == sampler->wrap_t &&
- sampler->compare_mode == FALSE &&
- sampler->normalized_coords)
- {
- if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
- samp->level = CLAMP((int) sampler->min_lod,
- 0, (int) texture->last_level);
-
- if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
- switch (sampler->min_img_filter) {
- case PIPE_TEX_FILTER_NEAREST:
- tgsi_sampler->get_samples = lp_get_samples_2d_nearest_repeat_POT;
- break;
- case PIPE_TEX_FILTER_LINEAR:
- tgsi_sampler->get_samples = lp_get_samples_2d_linear_repeat_POT;
- break;
- default:
- break;
- }
- }
- else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) {
- switch (sampler->min_img_filter) {
- case PIPE_TEX_FILTER_NEAREST:
- tgsi_sampler->get_samples = lp_get_samples_2d_nearest_clamp_POT;
- break;
- default:
- break;
- }
- }
- }
- else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
- switch (sampler->min_img_filter) {
- case PIPE_TEX_FILTER_LINEAR:
- tgsi_sampler->get_samples = lp_get_samples_2d_linear_mip_linear_repeat_POT;
- break;
- default:
- break;
- }
- }
- }
- }
- else if (0) {
- _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n",
- texture->target, PIPE_TEXTURE_2D,
- sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE,
- sampler->min_img_filter, sampler->mag_img_filter,
- sampler->wrap_s, sampler->wrap_t,
- sampler->compare_mode, FALSE,
- sampler->normalized_coords, TRUE);
- }
-
-out:
- tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba );
-}
-
-
-void PIPE_CDECL
-lp_fetch_texel_soa( struct tgsi_sampler **samplers,
- uint32_t unit,
- float *store )
-{
- struct tgsi_sampler *sampler = samplers[unit];
-
-#if 0
- uint j;
-
- debug_printf("%s sampler: %p (%p) store: %p\n",
- __FUNCTION__,
- sampler, *sampler,
- store );
-
- debug_printf("lodbias %f\n", store[12]);
-
- for (j = 0; j < 4; j++)
- debug_printf("sample %d texcoord %f %f\n",
- j,
- store[0+j],
- store[4+j]);
-#endif
-
- {
- float rgba[NUM_CHANNELS][QUAD_SIZE];
- sampler->get_samples(sampler,
- &store[0],
- &store[4],
- &store[8],
- 0.0f, /*store[12], lodbias */
- rgba);
- memcpy(store, rgba, sizeof rgba);
- }
-
-#if 0
- for (j = 0; j < 4; j++)
- debug_printf("sample %d result %f %f %f %f\n",
- j,
- store[0+j],
- store[4+j],
- store[8+j],
- store[12+j]);
-#endif
-}
-
-
-#include "lp_bld_type.h"
-#include "lp_bld_intr.h"
-#include "lp_bld_tgsi.h"
-
-
-struct lp_c_sampler_soa
-{
- struct lp_build_sampler_soa base;
-
- LLVMValueRef context_ptr;
-
- LLVMValueRef samplers_ptr;
-
- /** Coords/texels store */
- LLVMValueRef store_ptr;
-};
-
-
-static void
-lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
-{
- FREE(sampler);
-}
-
-
-static void
-lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler,
- LLVMBuilderRef builder,
- struct lp_type type,
- unsigned unit,
- unsigned num_coords,
- const LLVMValueRef *coords,
- LLVMValueRef lodbias,
- LLVMValueRef *texel)
-{
- struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler;
- LLVMTypeRef vec_type = LLVMTypeOf(coords[0]);
- LLVMValueRef args[3];
- unsigned i;
-
- if(!sampler->samplers_ptr)
- sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr);
-
- if(!sampler->store_ptr)
- sampler->store_ptr = LLVMBuildArrayAlloca(builder,
- vec_type,
- LLVMConstInt(LLVMInt32Type(), 4, 0),
- "texel_store");
-
- for (i = 0; i < num_coords; i++) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
- LLVMBuildStore(builder, coords[i], coord_ptr);
- }
-
- args[0] = sampler->samplers_ptr;
- args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
- args[2] = sampler->store_ptr;
-
- lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3);
-
- for (i = 0; i < NUM_CHANNELS; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
- texel[i] = LLVMBuildLoad(builder, texel_ptr, "");
- }
-}
-
-
-struct lp_build_sampler_soa *
-lp_c_sampler_soa_create(LLVMValueRef context_ptr)
-{
- struct lp_c_sampler_soa *sampler;
-
- sampler = CALLOC_STRUCT(lp_c_sampler_soa);
- if(!sampler)
- return NULL;
-
- sampler->base.destroy = lp_c_sampler_soa_destroy;
- sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel;
- sampler->context_ptr = context_ptr;
-
- return &sampler->base;
-}
-
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
index 040b01865d..19d00b58d3 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
@@ -29,7 +29,7 @@
#define LP_TILE_SOA_H
#include "pipe/p_compiler.h"
-#include "tgsi/tgsi_exec.h" // for NUM_CHANNELS
+#include "tgsi/tgsi_exec.h" /* for NUM_CHANNELS */
#ifdef __cplusplus
diff --git a/src/gallium/drivers/llvmpipe/lp_winsys.h b/src/gallium/drivers/llvmpipe/lp_winsys.h
index 595481c2cb..74b472b653 100644
--- a/src/gallium/drivers/llvmpipe/lp_winsys.h
+++ b/src/gallium/drivers/llvmpipe/lp_winsys.h
@@ -35,7 +35,7 @@
#define LP_WINSYS_H
-#include "pipe/p_compiler.h" // for boolean
+#include "pipe/p_compiler.h" /* for boolean */
#include "pipe/p_format.h"
diff --git a/src/gallium/drivers/nouveau/nouveau_push.h b/src/gallium/drivers/nouveau/nouveau_push.h
deleted file mode 100644
index 9c235080a5..0000000000
--- a/src/gallium/drivers/nouveau/nouveau_push.h
+++ /dev/null
@@ -1,93 +0,0 @@
-#ifndef __NOUVEAU_PUSH_H__
-#define __NOUVEAU_PUSH_H__
-
-#include "nouveau/nouveau_winsys.h"
-
-#ifndef NOUVEAU_PUSH_CONTEXT
-#error undefined push context
-#endif
-
-#define OUT_RING(data) do { \
- NOUVEAU_PUSH_CONTEXT(pc); \
- (*pc->base.channel->pushbuf->cur++) = (data); \
-} while(0)
-
-#define OUT_RINGp(src,size) do { \
- NOUVEAU_PUSH_CONTEXT(pc); \
- memcpy(pc->base.channel->pushbuf->cur, (src), (size) * 4); \
- pc->base.channel->pushbuf->cur += (size); \
-} while(0)
-
-#define OUT_RINGf(data) do { \
- union { float v; uint32_t u; } c; \
- c.v = (data); \
- OUT_RING(c.u); \
-} while(0)
-
-#define BEGIN_RING(obj,mthd,size) do { \
- NOUVEAU_PUSH_CONTEXT(pc); \
- struct nouveau_channel *chan = pc->base.channel; \
- if (chan->pushbuf->remaining < ((size) + 1)) \
- nouveau_pushbuf_flush(chan, ((size) + 1)); \
- OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd)); \
- chan->pushbuf->remaining -= ((size) + 1); \
-} while(0)
-
-#define BEGIN_RING_NI(obj,mthd,size) do { \
- BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \
-} while(0)
-
-static inline void
-DO_FIRE_RING(struct nouveau_channel *chan, struct pipe_fence_handle **fence)
-{
- nouveau_pushbuf_flush(chan, 0);
- if (fence)
- *fence = NULL;
-}
-
-#define FIRE_RING(fence) do { \
- NOUVEAU_PUSH_CONTEXT(pc); \
- DO_FIRE_RING(pc->base.channel, fence); \
-} while(0)
-
-#define OUT_RELOC(bo,data,flags,vor,tor) do { \
- NOUVEAU_PUSH_CONTEXT(pc); \
- struct nouveau_channel *chan = pc->base.channel; \
- nouveau_pushbuf_emit_reloc(chan, chan->pushbuf->cur++, nouveau_bo(bo), \
- (data), 0, (flags), (vor), (tor)); \
-} while(0)
-
-/* Raw data + flags depending on FB/TT buffer */
-#define OUT_RELOCd(bo,data,flags,vor,tor) do { \
- OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \
-} while(0)
-
-/* FB/TT object handle */
-#define OUT_RELOCo(bo,flags) do { \
- OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \
- pc->base.channel->vram->handle, \
- pc->base.channel->gart->handle); \
-} while(0)
-
-/* Low 32-bits of offset */
-#define OUT_RELOCl(bo,delta,flags) do { \
- OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \
-} while(0)
-
-/* High 32-bits of offset */
-#define OUT_RELOCh(bo,delta,flags) do { \
- OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \
-} while(0)
-
-/* A reloc which'll recombine into a NV_DMA_METHOD packet header */
-#define OUT_RELOCm(bo, flags, obj, mthd, size) do { \
- NOUVEAU_PUSH_CONTEXT(pc); \
- struct nouveau_channel *chan = pc->base.channel; \
- if (chan->pushbuf->remaining < ((size) + 1)) \
- nouveau_pushbuf_flush(chan, ((size) + 1)); \
- OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \
- (flags), 0, 0); \
- chan->pushbuf->remaining -= ((size) + 1); \
-} while(0)
-
-#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index e4cf91c005..7ebc94ed6c 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -31,7 +31,7 @@ nouveau_screen_bo_skel(struct pipe_screen *pscreen, struct nouveau_bo *bo,
unsigned alignment, unsigned usage, unsigned size)
{
struct pipe_buffer *pb;
-
+
pb = CALLOC(1, sizeof(struct pipe_buffer)+sizeof(struct nouveau_bo *));
if (!pb) {
nouveau_bo_ref(NULL, &bo);
@@ -127,8 +127,18 @@ nouveau_screen_bo_map(struct pipe_screen *pscreen, struct pipe_buffer *pb,
unsigned usage)
{
struct nouveau_bo *bo = nouveau_bo(pb);
+ struct nouveau_screen *nscreen = nouveau_screen(pscreen);
int ret;
+ if (nscreen->pre_pipebuffer_map_callback) {
+ ret = nscreen->pre_pipebuffer_map_callback(pscreen, pb, usage);
+ if (ret) {
+ debug_printf("pre_pipebuffer_map_callback failed %d\n",
+ ret);
+ return NULL;
+ }
+ }
+
ret = nouveau_bo_map(bo, nouveau_screen_map_flags(usage));
if (ret) {
debug_printf("map failed: %d\n", ret);
@@ -143,11 +153,22 @@ nouveau_screen_bo_map_range(struct pipe_screen *pscreen, struct pipe_buffer *pb,
unsigned offset, unsigned length, unsigned usage)
{
struct nouveau_bo *bo = nouveau_bo(pb);
+ struct nouveau_screen *nscreen = nouveau_screen(pscreen);
uint32_t flags = nouveau_screen_map_flags(usage);
int ret;
+ if (nscreen->pre_pipebuffer_map_callback) {
+ ret = nscreen->pre_pipebuffer_map_callback(pscreen, pb, usage);
+ if (ret) {
+ debug_printf("pre_pipebuffer_map_callback failed %d\n",
+ ret);
+ return NULL;
+ }
+ }
+
ret = nouveau_bo_map_range(bo, offset, length, flags);
if (ret) {
+ nouveau_bo_unmap(bo);
if (!(flags & NOUVEAU_BO_NOWAIT) || ret != -EBUSY)
debug_printf("map_range failed: %d\n", ret);
return NULL;
@@ -239,5 +260,6 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
void
nouveau_screen_fini(struct nouveau_screen *screen)
{
+ nouveau_channel_free(&screen->channel);
}
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index ebfc67ad1c..a7927d88df 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -5,6 +5,9 @@ struct nouveau_screen {
struct pipe_screen base;
struct nouveau_device *device;
struct nouveau_channel *channel;
+
+ int (*pre_pipebuffer_map_callback) (struct pipe_screen *pscreen,
+ struct pipe_buffer *pb, unsigned usage);
};
static inline struct nouveau_screen *
diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h
index 9aee9e4956..e844f6abb3 100644
--- a/src/gallium/drivers/nouveau/nouveau_stateobj.h
+++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h
@@ -3,41 +3,95 @@
#include "util/u_debug.h"
+#ifdef DEBUG
+#define DEBUG_NOUVEAU_STATEOBJ
+#endif /* DEBUG */
+
struct nouveau_stateobj_reloc {
struct nouveau_bo *bo;
- unsigned offset;
- unsigned packet;
+ struct nouveau_grobj *gr;
+ uint32_t push_offset;
+ uint32_t mthd;
- unsigned data;
+ uint32_t data;
unsigned flags;
unsigned vor;
unsigned tor;
};
+struct nouveau_stateobj_start {
+ struct nouveau_grobj *gr;
+ uint32_t mthd;
+ uint32_t size;
+ unsigned offset;
+};
+
struct nouveau_stateobj {
struct pipe_reference reference;
- unsigned *push;
+ struct nouveau_stateobj_start *start;
struct nouveau_stateobj_reloc *reloc;
- unsigned *cur;
- unsigned cur_packet;
+ /* Common memory pool for data. */
+ uint32_t *pool;
+ unsigned pool_cur;
+
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ unsigned start_alloc;
+ unsigned reloc_alloc;
+ unsigned pool_alloc;
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+ unsigned total; /* includes begin_ring */
+ unsigned cur; /* excludes begin_ring, offset from "cur_start" */
+ unsigned cur_start;
unsigned cur_reloc;
};
+static INLINE void
+so_dump(struct nouveau_stateobj *so)
+{
+ unsigned i, nr, total = 0;
+
+ for (i = 0; i < so->cur_start; i++) {
+ if (so->start[i].gr->subc > -1)
+ debug_printf("+0x%04x: 0x%08x\n", total++,
+ (so->start[i].size << 18) | (so->start[i].gr->subc << 13)
+ | so->start[i].mthd);
+ else
+ debug_printf("+0x%04x: 0x%08x\n", total++,
+ (so->start[i].size << 18) | so->start[i].mthd);
+ for (nr = 0; nr < so->start[i].size; nr++, total++)
+ debug_printf("+0x%04x: 0x%08x\n", total,
+ so->pool[so->start[i].offset + nr]);
+ }
+}
+
static INLINE struct nouveau_stateobj *
-so_new(unsigned push, unsigned reloc)
+so_new(unsigned start, unsigned push, unsigned reloc)
{
struct nouveau_stateobj *so;
so = MALLOC(sizeof(struct nouveau_stateobj));
pipe_reference_init(&so->reference, 1);
- so->push = MALLOC(sizeof(unsigned) * push);
- so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc);
+ so->total = so->cur = so->cur_start = so->cur_reloc = 0;
- so->cur = so->push;
- so->cur_reloc = so->cur_packet = 0;
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ so->start_alloc = start;
+ so->reloc_alloc = reloc;
+ so->pool_alloc = push;
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+ so->start = MALLOC(start * sizeof(struct nouveau_stateobj_start));
+ so->reloc = MALLOC(reloc * sizeof(struct nouveau_stateobj_reloc));
+ so->pool = MALLOC(push * sizeof(uint32_t));
+ so->pool_cur = 0;
+
+ if (!so->start || !so->reloc || !so->pool) {
+ debug_printf("malloc failed\n");
+ assert(0);
+ }
return so;
}
@@ -48,63 +102,128 @@ so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso)
struct nouveau_stateobj *so = *pso;
int i;
- if (pipe_reference(&(*pso)->reference, &ref->reference)) {
- free(so->push);
+ if (pipe_reference(&(*pso)->reference, &ref->reference)) {
+ FREE(so->start);
for (i = 0; i < so->cur_reloc; i++)
nouveau_bo_ref(NULL, &so->reloc[i].bo);
- free(so->reloc);
- free(so);
+ FREE(so->reloc);
+ FREE(so->pool);
+ FREE(so);
}
*pso = ref;
}
static INLINE void
-so_data(struct nouveau_stateobj *so, unsigned data)
+so_data(struct nouveau_stateobj *so, uint32_t data)
{
- (*so->cur++) = (data);
- so->cur_packet += 4;
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ if (so->cur >= so->start[so->cur_start - 1].size) {
+ debug_printf("exceeding specified size\n");
+ assert(0);
+ }
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+ so->pool[so->start[so->cur_start - 1].offset + so->cur++] = data;
}
static INLINE void
-so_datap(struct nouveau_stateobj *so, unsigned *data, unsigned size)
+so_datap(struct nouveau_stateobj *so, uint32_t *data, unsigned size)
{
- so->cur_packet += (4 * size);
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ if ((so->cur + size) > so->start[so->cur_start - 1].size) {
+ debug_printf("exceeding specified size\n");
+ assert(0);
+ }
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
while (size--)
- (*so->cur++) = (*data++);
+ so->pool[so->start[so->cur_start - 1].offset + so->cur++] =
+ *data++;
}
static INLINE void
so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr,
unsigned mthd, unsigned size)
{
- so->cur_packet = (gr->subc << 13) | (1 << 18) | (mthd - 4);
- so_data(so, (gr->subc << 13) | (size << 18) | mthd);
+ struct nouveau_stateobj_start *start;
+
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ if (so->start_alloc <= so->cur_start) {
+ debug_printf("exceeding num_start size\n");
+ assert(0);
+ } else
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+ start = so->start;
+
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ if (so->cur_start > 0 && start[so->cur_start - 1].size > so->cur) {
+ debug_printf("previous so_method was not filled\n");
+ assert(0);
+ }
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+ so->start = start;
+ start[so->cur_start].gr = gr;
+ start[so->cur_start].mthd = mthd;
+ start[so->cur_start].size = size;
+
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ if (so->pool_alloc < (size + so->pool_cur)) {
+ debug_printf("exceeding num_pool size\n");
+ assert(0);
+ }
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+ start[so->cur_start].offset = so->pool_cur;
+ so->pool_cur += size;
+
+ so->cur_start++;
+ /* The 1 is for *this* begin_ring. */
+ so->total += so->cur + 1;
+ so->cur = 0;
}
static INLINE void
so_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo,
unsigned data, unsigned flags, unsigned vor, unsigned tor)
{
- struct nouveau_stateobj_reloc *r = &so->reloc[so->cur_reloc++];
-
- r->bo = NULL;
- nouveau_bo_ref(bo, &r->bo);
- r->offset = so->cur - so->push;
- r->packet = so->cur_packet;
- r->data = data;
- r->flags = flags;
- r->vor = vor;
- r->tor = tor;
+ struct nouveau_stateobj_reloc *r;
+
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ if (so->reloc_alloc <= so->cur_reloc) {
+ debug_printf("exceeding num_reloc size\n");
+ assert(0);
+ } else
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+ r = so->reloc;
+
+ so->reloc = r;
+ r[so->cur_reloc].bo = NULL;
+ nouveau_bo_ref(bo, &(r[so->cur_reloc].bo));
+ r[so->cur_reloc].gr = so->start[so->cur_start-1].gr;
+ r[so->cur_reloc].push_offset = so->total + so->cur;
+ r[so->cur_reloc].data = data;
+ r[so->cur_reloc].flags = flags;
+ r[so->cur_reloc].mthd = so->start[so->cur_start-1].mthd +
+ (so->cur << 2);
+ r[so->cur_reloc].vor = vor;
+ r[so->cur_reloc].tor = tor;
+
so_data(so, data);
+ so->cur_reloc++;
}
-static INLINE void
-so_dump(struct nouveau_stateobj *so)
+/* Determine if this buffer object is referenced by this state object. */
+static INLINE boolean
+so_bo_is_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo)
{
- unsigned i, nr = so->cur - so->push;
+ int i;
+
+ for (i = 0; i < so->cur_reloc; i++)
+ if (so->reloc[i].bo == bo)
+ return true;
- for (i = 0; i < nr; i++)
- debug_printf("+0x%04x: 0x%08x\n", i, so->push[i]);
+ return false;
}
static INLINE void
@@ -114,75 +233,93 @@ so_emit(struct nouveau_channel *chan, struct nouveau_stateobj *so)
unsigned nr, i;
int ret = 0;
- nr = so->cur - so->push;
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ if (so->start[so->cur_start - 1].size > so->cur) {
+ debug_printf("emit: previous so_method was not filled\n");
+ assert(0);
+ }
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+ /* We cannot update total in case we so_emit again. */
+ nr = so->total + so->cur;
+
/* This will flush if we need space.
* We don't actually need the marker.
*/
if ((ret = nouveau_pushbuf_marker_emit(chan, nr, so->cur_reloc))) {
debug_printf("so_emit failed marker emit with error %d\n", ret);
- return;
+ assert(0);
+ }
+
+ /* Submit data. This will ensure proper binding of objects. */
+ for (i = 0; i < so->cur_start; i++) {
+ BEGIN_RING(chan, so->start[i].gr, so->start[i].mthd, so->start[i].size);
+ OUT_RINGp(chan, &(so->pool[so->start[i].offset]), so->start[i].size);
}
- pb->remaining -= nr;
- memcpy(pb->cur, so->push, nr * 4);
for (i = 0; i < so->cur_reloc; i++) {
struct nouveau_stateobj_reloc *r = &so->reloc[i];
- if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur + r->offset,
- r->bo, r->data, 0, r->flags,
- r->vor, r->tor))) {
+ if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur - nr +
+ r->push_offset, r->bo, r->data,
+ 0, r->flags, r->vor, r->tor))) {
debug_printf("so_emit failed reloc with error %d\n", ret);
- goto out;
+ assert(0);
}
}
-out:
- pb->cur += nr;
}
static INLINE void
so_emit_reloc_markers(struct nouveau_channel *chan, struct nouveau_stateobj *so)
{
struct nouveau_pushbuf *pb = chan->pushbuf;
+ struct nouveau_grobj *gr = NULL;
unsigned i;
int ret = 0;
if (!so)
return;
- i = so->cur_reloc << 1;
- /* This will flush if we need space.
- * We don't actually need the marker.
- */
- if ((ret = nouveau_pushbuf_marker_emit(chan, i, i))) {
- debug_printf("so_emit_reloc_markers failed marker emit with" \
- "error %d\n", ret);
- return;
- }
- pb->remaining -= i;
-
+ /* If we need to flush in flush notify, then we have a problem anyway. */
for (i = 0; i < so->cur_reloc; i++) {
struct nouveau_stateobj_reloc *r = &so->reloc[i];
- if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo,
- r->packet, 0,
- (r->flags & (NOUVEAU_BO_VRAM |
- NOUVEAU_BO_GART |
- NOUVEAU_BO_RDWR)) |
- NOUVEAU_BO_DUMMY, 0, 0))) {
- debug_printf("so_emit_reloc_markers failed reloc" \
- "with error %d\n", ret);
- pb->remaining += ((so->cur_reloc - i) << 1);
- return;
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ if (r->mthd & 0x40000000) {
+ debug_printf("error: NI mthd 0x%08X\n", r->mthd);
+ continue;
}
- if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo,
- r->data, 0,
- r->flags | NOUVEAU_BO_DUMMY,
- r->vor, r->tor))) {
- debug_printf("so_emit_reloc_markers failed reloc" \
- "with error %d\n", ret);
- pb->remaining += ((so->cur_reloc - i) << 1) - 1;
- return;
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+ /* The object needs to be bound and the system must know the
+ * subchannel is being used. Otherwise it will discard it.
+ */
+ if (gr != r->gr) {
+ BEGIN_RING(chan, r->gr, 0x100, 1);
+ OUT_RING(chan, 0);
+ gr = r->gr;
+ }
+
+ /* Some relocs really don't like to be hammered,
+ * NOUVEAU_BO_DUMMY makes sure it only
+ * happens when needed.
+ */
+ ret = OUT_RELOC(chan, r->bo, (r->gr->subc << 13) | (1<< 18) |
+ r->mthd, (r->flags & (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART
+ | NOUVEAU_BO_RDWR)) | NOUVEAU_BO_DUMMY, 0, 0);
+ if (ret) {
+ debug_printf("OUT_RELOC failed %d\n", ret);
+ assert(0);
}
+
+ ret = OUT_RELOC(chan, r->bo, r->data, r->flags |
+ NOUVEAU_BO_DUMMY, r->vor, r->tor);
+ if (ret) {
+ debug_printf("OUT_RELOC failed %d\n", ret);
+ assert(0);
+ }
+
+ pb->remaining -= 2;
}
}
diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
index 42c77e5e77..4c3e08a43f 100644
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -23,6 +23,9 @@
#define NOUVEAU_BUFFER_USAGE_ZETA (1 << 17)
#define NOUVEAU_BUFFER_USAGE_TRANSFER (1 << 18)
+/* use along with GPU_WRITE for 2D-only writes */
+#define NOUVEAU_BUFFER_USAGE_NO_RENDER (1 << 19)
+
extern struct pipe_screen *
nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c
index 4b33636b2e..edd96859cf 100644
--- a/src/gallium/drivers/nv04/nv04_context.c
+++ b/src/gallium/drivers/nv04/nv04_context.c
@@ -10,10 +10,14 @@ nv04_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence)
{
struct nv04_context *nv04 = nv04_context(pipe);
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
draw_flush(nv04->draw);
- FIRE_RING(fence);
+ FIRE_RING(chan);
+ if (fence)
+ *fence = NULL;
}
static void
@@ -30,32 +34,36 @@ nv04_destroy(struct pipe_context *pipe)
static boolean
nv04_init_hwctx(struct nv04_context *nv04)
{
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+
// requires a valid handle
-// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOTIFY, 1);
+// BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOTIFY, 1);
// OUT_RING(0);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOP, 1);
- OUT_RING(0);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOP, 1);
+ OUT_RING(chan, 0);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
- OUT_RING(0x40182800);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
+ OUT_RING(chan, 0x40182800);
// OUT_RING(1<<20/*no cull*/);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1);
// OUT_RING(0x24|(1<<6)|(1<<8));
- OUT_RING(0x120001a4);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FORMAT, 1);
- OUT_RING(0x332213a1);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FILTER, 1);
- OUT_RING(0x11001010);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_COLORKEY, 1);
- OUT_RING(0x0);
-// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 1);
+ OUT_RING(chan, 0x120001a4);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FORMAT, 1);
+ OUT_RING(chan, 0x332213a1);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FILTER, 1);
+ OUT_RING(chan, 0x11001010);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_COLORKEY, 1);
+ OUT_RING(chan, 0x0);
+// BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 1);
// OUT_RING(SCREEN_OFFSET);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR, 1);
- OUT_RING(0xff000000);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FOGCOLOR, 1);
+ OUT_RING(chan, 0xff000000);
- FIRE_RING (NULL);
+ FIRE_RING (chan);
return TRUE;
}
diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h
index 55326c787a..fe3b527423 100644
--- a/src/gallium/drivers/nv04/nv04_context.h
+++ b/src/gallium/drivers/nv04/nv04_context.h
@@ -15,10 +15,6 @@
#include "nouveau/nouveau_gldefs.h"
#include "nouveau/nouveau_context.h"
-#define NOUVEAU_PUSH_CONTEXT(ctx) \
- struct nv04_screen *ctx = nv04->screen
-#include "nouveau/nouveau_push.h"
-
#include "nv04_state.h"
#define NOUVEAU_ERR(fmt, args...) \
@@ -141,9 +137,9 @@ extern void nv04_emit_hw_state(struct nv04_context *nv04);
extern void nv04_state_tex_update(struct nv04_context *nv04);
/* nv04_vbo.c */
-extern boolean nv04_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv04_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv04_draw_elements( struct pipe_context *pipe,
+extern void nv04_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count);
diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c
index 0cce71ad1d..c152b52119 100644
--- a/src/gallium/drivers/nv04/nv04_fragtex.c
+++ b/src/gallium/drivers/nv04/nv04_fragtex.c
@@ -4,7 +4,7 @@
#define _(m,tf) \
{ \
PIPE_FORMAT_##m, \
- NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \
+ NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \
}
struct nv04_texture_format {
@@ -53,14 +53,14 @@ nv04_fragtex_build(struct nv04_context *nv04, int unit)
return;
}
- nv04->fragtex.format = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER
- | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER
+ nv04->fragtex.format = NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER
+ | NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER
| nv04_fragtex_format(pt->format)
- | ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT )
- | ( log2i(pt->width0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT )
- | ( log2i(pt->height0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT )
- | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE
- | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE
+ | ( (pt->last_level + 1) << NV04_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT )
+ | ( log2i(pt->width0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT )
+ | ( log2i(pt->height0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT )
+ | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE
+ | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE
;
}
diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c
index f6458232ae..0b795ea243 100644
--- a/src/gallium/drivers/nv04/nv04_prim_vbuf.c
+++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c
@@ -93,33 +93,45 @@ nv04_vbuf_render_set_primitive( struct vbuf_render *render,
static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5)
{
- BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49);
- OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v3,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v4,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v5,8);
- OUT_RING(0xFEDCBA);
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA), 49);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v4,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v5,8);
+ OUT_RING(chan, 0xFEDCBA);
}
static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2)
{
- BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25);
- OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
- OUT_RING(0xFED);
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD), 25);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8);
+ OUT_RING(chan, 0xFED);
}
static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3)
{
- BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33);
- OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v3,8);
- OUT_RING(0xFECEDC);
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC), 33);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8);
+ OUT_RING(chan, 0xFECEDC);
}
static void nv04_vbuf_render_triangles_elts(struct nv04_vbuf_render * render, const ushort * indices, uint nr_indices)
@@ -156,7 +168,10 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con
{
const uint32_t striptbl[]={0x321210,0x543432,0x765654,0x987876,0xBA9A98,0xDCBCBA,0xFEDEDC};
unsigned char* buffer = render->buffer;
- struct nv04_context* nv04 = render->nv04;
+ struct nv04_context *nv04 = render->nv04;
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
int i,j;
for(i = 0; i<nr_indices; i+=14)
@@ -166,15 +181,15 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con
if (numvert<3)
break;
- BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 );
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8);
for(j = 0; j<numvert; j++)
- OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 );
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * indices [i+j], 8 );
- BEGIN_RING_NI( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2 );
+ BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2 );
for(j = 0; j<numtri/2; j++ )
- OUT_RING(striptbl[j]);
+ OUT_RING(chan, striptbl[j]);
if (numtri%2)
- OUT_RING(striptbl[numtri/2]&0xFFF);
+ OUT_RING(chan, striptbl[numtri/2]&0xFFF);
}
}
@@ -182,11 +197,14 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const
{
const uint32_t fantbl[]={0x320210,0x540430,0x760650,0x980870,0xBA0A90,0xDC0CB0,0xFE0ED0};
unsigned char* buffer = render->buffer;
- struct nv04_context* nv04 = render->nv04;
+ struct nv04_context *nv04 = render->nv04;
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
int i,j;
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8);
- OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[0], 8);
for(i = 1; i<nr_indices; i+=14)
{
@@ -195,16 +213,16 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const
if (numvert < 3)
break;
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8);
for(j=0;j<numvert;j++)
- OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 );
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[ i+j ], 8 );
- BEGIN_RING_NI(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2);
+ BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2);
for(j = 0; j<numtri/2; j++)
- OUT_RING(fantbl[j]);
+ OUT_RING(chan, fantbl[j]);
if (numtri%2)
- OUT_RING(fantbl[numtri/2]&0xFFF);
+ OUT_RING(chan, fantbl[numtri/2]&0xFFF);
}
}
diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c
index 170ce3eb7e..7c5b6e8229 100644
--- a/src/gallium/drivers/nv04/nv04_screen.c
+++ b/src/gallium/drivers/nv04/nv04_screen.c
@@ -119,6 +119,8 @@ nv04_screen_destroy(struct pipe_screen *pscreen)
nouveau_grobj_free(&screen->fahrenheit);
nv04_surface_2d_takedown(&screen->eng2d);
+ nouveau_screen_fini(&screen->base);
+
FREE(pscreen);
}
@@ -163,10 +165,10 @@ nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
fahrenheit_class = 0;
sub3d_class = 0;
} else if (dev->chipset >= 0x10) {
- fahrenheit_class = NV10_DX5_TEXTURED_TRIANGLE;
+ fahrenheit_class = NV10_TEXTURED_TRIANGLE;
sub3d_class = NV10_CONTEXT_SURFACES_3D;
} else {
- fahrenheit_class=NV04_DX5_TEXTURED_TRIANGLE;
+ fahrenheit_class=NV04_TEXTURED_TRIANGLE;
sub3d_class = NV04_CONTEXT_SURFACES_3D;
}
diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c
index 871034ad0b..b67f1e16b1 100644
--- a/src/gallium/drivers/nv04/nv04_state.c
+++ b/src/gallium/drivers/nv04/nv04_state.c
@@ -50,28 +50,28 @@ wrap_mode(unsigned wrap) {
switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT;
break;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT;
break;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE;
break;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER;
break;
case PIPE_TEX_WRAP_CLAMP:
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
case PIPE_TEX_WRAP_MIRROR_CLAMP:
default:
NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
}
- return ret >> NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT;
+ return ret >> NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT;
}
static void *
@@ -84,20 +84,20 @@ nv04_sampler_state_create(struct pipe_context *pipe,
ss = MALLOC(sizeof(struct nv04_sampler_state));
- ss->format = ((wrap_mode(cso->wrap_s) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) |
- (wrap_mode(cso->wrap_t) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT));
+ ss->format = ((wrap_mode(cso->wrap_s) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT));
if (cso->max_anisotropy > 1.0) {
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE;
}
switch (cso->mag_img_filter) {
case PIPE_TEX_FILTER_LINEAR:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR;
break;
case PIPE_TEX_FILTER_NEAREST:
default:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST;
break;
}
@@ -105,14 +105,14 @@ nv04_sampler_state_create(struct pipe_context *pipe,
case PIPE_TEX_FILTER_LINEAR:
switch (cso->min_mip_filter) {
case PIPE_TEX_MIPFILTER_NEAREST:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
break;
case PIPE_TEX_MIPFILTER_LINEAR:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
break;
case PIPE_TEX_MIPFILTER_NONE:
default:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR;
break;
}
break;
@@ -120,14 +120,14 @@ nv04_sampler_state_create(struct pipe_context *pipe,
default:
switch (cso->min_mip_filter) {
case PIPE_TEX_MIPFILTER_NEAREST:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
break;
case PIPE_TEX_MIPFILTER_LINEAR:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
break;
case PIPE_TEX_MIPFILTER_NONE:
default:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST;
break;
}
break;
@@ -181,7 +181,7 @@ nv04_rasterizer_state_create(struct pipe_context *pipe,
*/
rs = MALLOC(sizeof(struct nv04_rasterizer_state));
- rs->blend = cso->flatshade ? NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD;
+ rs->blend = cso->flatshade ? NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD;
return (void *)rs;
}
@@ -229,16 +229,16 @@ nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe,
hw = MALLOC(sizeof(struct nv04_depth_stencil_alpha_state));
hw->control = float_to_ubyte(cso->alpha.ref_value);
- hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT );
- hw->control |= cso->alpha.enabled ? NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE : 0;
- hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN;
- hw->control |= cso->depth.enabled ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT) : 0;
- hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT );
- hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module
- hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE;
- hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE;
- hw->control |= cso->depth.writemask ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT) : 0;
- hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format
+ hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT );
+ hw->control |= cso->alpha.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_ENABLE : 0;
+ hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_ORIGIN;
+ hw->control |= cso->depth.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE : 0;
+ hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT );
+ hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module
+ hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE;
+ hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE;
+ hw->control |= cso->depth.writemask ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_WRITE : 0;
+ hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format
return (void *)hw;
}
@@ -377,7 +377,7 @@ nv04_set_scissor_state(struct pipe_context *pipe,
/* struct nv04_context *nv04 = nv04_context(pipe);
// XXX
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2);
+ BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2);
OUT_RING (((s->maxx - s->minx) << 16) | s->minx);
OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/
}
diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c
index eb2c1c57c6..b8d6dc560f 100644
--- a/src/gallium/drivers/nv04/nv04_state_emit.c
+++ b/src/gallium/drivers/nv04/nv04_state_emit.c
@@ -57,13 +57,19 @@ static uint32_t nv04_blend_func(uint32_t f)
static void nv04_emit_control(struct nv04_context* nv04)
{
uint32_t control = nv04->dsa->control;
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
- OUT_RING(control);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
+ OUT_RING(chan, control);
}
static void nv04_emit_blend(struct nv04_context* nv04)
{
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
uint32_t blend;
blend=0x4; // texture MODULATE_ALPHA
@@ -75,19 +81,23 @@ static void nv04_emit_blend(struct nv04_context* nv04)
blend|=(nv04_blend_func(nv04->blend->b_src)<<24);
blend|=(nv04_blend_func(nv04->blend->b_dst)<<28);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1);
- OUT_RING(blend);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1);
+ OUT_RING(chan, blend);
}
static void nv04_emit_sampler(struct nv04_context *nv04, int unit)
{
struct nv04_miptree *nv04mt = nv04->tex_miptree[unit];
struct pipe_texture *pt = &nv04mt->base;
-
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 3);
- OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
- OUT_RING(nv04->sampler[unit]->filter);
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+ struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer);
+
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 3);
+ OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ OUT_RING(chan, nv04->sampler[unit]->filter);
}
static void nv04_state_emit_framebuffer(struct nv04_context* nv04)
@@ -97,6 +107,10 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04)
uint32_t rt_format, w, h;
int colour_format = 0, zeta_format = 0;
struct nv04_miptree *nv04mt = 0;
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d;
+ struct nouveau_bo *bo;
w = fb->cbufs[0]->width;
h = fb->cbufs[0]->height;
@@ -128,24 +142,29 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04)
assert(0);
}
- BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1);
- OUT_RING(rt_format);
+ BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1);
+ OUT_RING(chan, rt_format);
nv04mt = (struct nv04_miptree *)rt->base.texture;
+ bo = nouveau_bo(nv04mt->buffer);
/* FIXME pitches have to be aligned ! */
- BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
- OUT_RING(rt->pitch|(zeta->pitch<<16));
- OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
+ OUT_RING(chan, rt->pitch|(zeta->pitch<<16));
+ OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
if (fb->zsbuf) {
nv04mt = (struct nv04_miptree *)zeta->base.texture;
- BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
- OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
+ OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
}
}
void
nv04_emit_hw_state(struct nv04_context *nv04)
{
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+ struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d;
int i;
if (nv04->dirty & NV04_NEW_VERTPROG) {
@@ -163,8 +182,8 @@ nv04_emit_hw_state(struct nv04_context *nv04)
if (nv04->dirty & NV04_NEW_CONTROL) {
nv04->dirty &= ~NV04_NEW_CONTROL;
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
- OUT_RING(nv04->dsa->control);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
+ OUT_RING(chan, nv04->dsa->control);
}
if (nv04->dirty & NV04_NEW_BLEND) {
@@ -205,12 +224,12 @@ nv04_emit_hw_state(struct nv04_context *nv04)
unsigned rt_pitch = ((struct nv04_surface *)nv04->rt)->pitch;
unsigned zeta_pitch = ((struct nv04_surface *)nv04->zeta)->pitch;
- BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
- OUT_RING(rt_pitch|(zeta_pitch<<16));
- OUT_RELOCl(nv04->rt, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
+ OUT_RING(chan, rt_pitch|(zeta_pitch<<16));
+ OUT_RELOCl(chan, nouveau_bo(nv04->rt), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
if (nv04->zeta) {
- BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
- OUT_RELOCl(nv04->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
+ OUT_RELOCl(chan, nouveau_bo(nv04->zeta), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
}
/* Texture images */
@@ -218,9 +237,10 @@ nv04_emit_hw_state(struct nv04_context *nv04)
if (!(nv04->fp_samplers & (1 << i)))
continue;
struct nv04_miptree *nv04mt = nv04->tex_miptree[i];
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 2);
- OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 2);
+ OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
}
}
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index 12df7fd199..b24a9cee5a 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -77,7 +77,7 @@ nv04_scaled_image_format(enum pipe_format format)
}
static INLINE unsigned
-nv04_swizzle_bits(unsigned x, unsigned y)
+nv04_swizzle_bits_square(unsigned x, unsigned y)
{
unsigned u = (x & 0x001) << 0 |
(x & 0x002) << 1 |
@@ -107,6 +107,15 @@ nv04_swizzle_bits(unsigned x, unsigned y)
return v | u;
}
+/* rectangular swizzled textures are linear concatenations of swizzled square tiles */
+static INLINE unsigned
+nv04_swizzle_bits(unsigned x, unsigned y, unsigned w, unsigned h)
+{
+ unsigned s = MIN2(w, h);
+ unsigned m = s - 1;
+ return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m);
+}
+
static int
nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
struct pipe_surface *dst, int dx, int dy,
@@ -158,20 +167,19 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
for (x = 0; x < w; x += sub_w) {
sub_w = MIN2(sub_w, w - x);
- /* Must be 64-byte aligned */
- assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * util_format_get_blocksize(dst->texture->format)) & 63));
+ assert(!(dst->offset & 63));
BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
- OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * util_format_get_blocksize(dst->texture->format),
+ OUT_RELOCl(chan, dst_bo, dst->offset,
NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9);
OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE);
OUT_RING (chan, nv04_scaled_image_format(src->format));
OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY);
- OUT_RING (chan, 0);
+ OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT));
OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w);
- OUT_RING (chan, 0);
+ OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT));
OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w);
OUT_RING (chan, 1 << 20);
OUT_RING (chan, 1 << 20);
@@ -491,3 +499,49 @@ nv04_surface_2d_init(struct nouveau_screen *screen)
ctx->fill = nv04_surface_fill;
return ctx;
}
+
+struct nv04_surface*
+nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns)
+{
+ int temp_flags;
+
+ // printf("creating temp, flags is %i!\n", flags);
+
+ if(ns->base.usage & PIPE_BUFFER_USAGE_DISCARD)
+ {
+ temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ;
+ ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_DISCARD;
+ }
+ else
+ {
+ temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE;
+ ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_GPU_READ;
+ }
+
+ struct nv40_screen* screen = (struct nv40_screen*)pscreen;
+ ns->base.usage = PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE;
+
+ struct pipe_texture templ;
+ memset(&templ, 0, sizeof(templ));
+ templ.format = ns->base.texture->format;
+ templ.target = PIPE_TEXTURE_2D;
+ templ.width0 = ns->base.width;
+ templ.height0 = ns->base.height;
+ templ.depth0 = 1;
+ templ.last_level = 0;
+
+ // TODO: this is probably wrong and we should specifically handle multisampling somehow once it is implemented
+ templ.nr_samples = ns->base.texture->nr_samples;
+
+ templ.tex_usage = ns->base.texture->tex_usage | PIPE_TEXTURE_USAGE_RENDER_TARGET;
+
+ struct pipe_texture* temp_tex = pscreen->texture_create(pscreen, &templ);
+ struct nv04_surface* temp_ns = (struct nv04_surface*)pscreen->get_tex_surface(pscreen, temp_tex, 0, 0, 0, temp_flags);
+ temp_ns->backing = ns;
+
+ if(ns->base.usage & PIPE_BUFFER_USAGE_GPU_READ)
+ eng2d->copy(eng2d, &temp_ns->backing->base, 0, 0, &ns->base, 0, 0, ns->base.width, ns->base.height);
+
+ return temp_ns;
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.h b/src/gallium/drivers/nv04/nv04_surface_2d.h
index 02b3f56ba8..ce696a11a3 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.h
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.h
@@ -4,6 +4,7 @@
struct nv04_surface {
struct pipe_surface base;
unsigned pitch;
+ struct nv04_surface* backing;
};
struct nv04_surface_2d {
@@ -30,4 +31,7 @@ nv04_surface_2d_init(struct nouveau_screen *screen);
void
nv04_surface_2d_takedown(struct nv04_surface_2d **);
+struct nv04_surface*
+nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns);
+
#endif
diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c
index 8446073ae8..2dd2e146a8 100644
--- a/src/gallium/drivers/nv04/nv04_transfer.c
+++ b/src/gallium/drivers/nv04/nv04_transfer.c
@@ -16,14 +16,14 @@ struct nv04_transfer {
};
static void
-nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
struct pipe_texture *template)
{
memset(template, 0, sizeof(struct pipe_texture));
template->target = pt->target;
template->format = pt->format;
- template->width0 = u_minify(pt->width0, level);
- template->height0 = u_minify(pt->height0, level);
+ template->width0 = width;
+ template->height0 = height;
template->depth0 = 1;
template->last_level = 0;
template->nr_samples = pt->nr_samples;
@@ -71,7 +71,7 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->direct = false;
- nv04_compatible_transfer_tex(pt, level, &tx_tex_template);
+ nv04_compatible_transfer_tex(pt, w, h, &tx_tex_template);
tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
if (!tx_tex)
@@ -80,6 +80,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
}
+ tx->base.stride = ((struct nv04_miptree*)tx_tex)->level[0].pitch;
+
tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
face, level, zslice,
pipe_transfer_buffer_flags(&tx->base));
@@ -105,8 +107,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
/* TODO: Check if SIFM can un-swizzle */
nvscreen->eng2d->copy(nvscreen->eng2d,
tx->surface, 0, 0,
- src, 0, 0,
- src->width, src->height);
+ src, x, y,
+ w, h);
pipe_surface_reference(&src, NULL);
}
@@ -130,9 +132,9 @@ nv04_transfer_del(struct pipe_transfer *ptx)
/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
nvscreen->eng2d->copy(nvscreen->eng2d,
- dst, 0, 0,
+ dst, tx->base.x, tx->base.y,
tx->surface, 0, 0,
- dst->width, dst->height);
+ tx->base.width, tx->base.height);
pipe_surface_reference(&dst, NULL);
}
@@ -151,8 +153,10 @@ nv04_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
void *map = pipe_buffer_map(pscreen, mt->buffer,
pipe_transfer_buffer_flags(ptx));
- return map + ns->base.offset +
- ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
+ if(!tx->direct)
+ return map + ns->base.offset;
+ else
+ return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
}
static void
diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c
index e3167814f2..3484771814 100644
--- a/src/gallium/drivers/nv04/nv04_vbo.c
+++ b/src/gallium/drivers/nv04/nv04_vbo.c
@@ -9,7 +9,7 @@
#include "nouveau/nouveau_channel.h"
#include "nouveau/nouveau_pushbuf.h"
-boolean nv04_draw_elements( struct pipe_context *pipe,
+void nv04_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count)
@@ -45,7 +45,7 @@ boolean nv04_draw_elements( struct pipe_context *pipe,
draw_set_mapped_element_buffer(draw, 0, NULL);
}
- draw_set_mapped_constant_buffer(draw,
+ draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX,
nv04->constbuf[PIPE_SHADER_VERTEX],
nv04->constbuf_nr[PIPE_SHADER_VERTEX]);
@@ -65,15 +65,13 @@ boolean nv04_draw_elements( struct pipe_context *pipe,
pipe_buffer_unmap(pscreen, indexBuffer);
draw_set_mapped_element_buffer(draw, 0, NULL);
}
-
- return TRUE;
}
-boolean nv04_draw_arrays( struct pipe_context *pipe,
- unsigned prim, unsigned start, unsigned count)
+void nv04_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
{
printf("coucou in draw arrays\n");
- return nv04_draw_elements(pipe, NULL, 0, prim, start, count);
+ nv04_draw_elements(pipe, NULL, 0, prim, start, count);
}
diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c
index 0dadeb03dd..1ecb73d06e 100644
--- a/src/gallium/drivers/nv10/nv10_context.c
+++ b/src/gallium/drivers/nv10/nv10_context.c
@@ -10,10 +10,14 @@ nv10_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence)
{
struct nv10_context *nv10 = nv10_context(pipe);
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
draw_flush(nv10->draw);
- FIRE_RING(fence);
+ FIRE_RING(chan);
+ if (fence)
+ *fence = NULL;
}
static void
@@ -31,225 +35,226 @@ static void nv10_init_hwctx(struct nv10_context *nv10)
{
struct nv10_screen *screen = nv10->screen;
struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
int i;
float projectionmatrix[16];
- BEGIN_RING(celsius, NV10TCL_DMA_NOTIFY, 1);
- OUT_RING (screen->sync->handle);
- BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY0, 2);
- OUT_RING (chan->vram->handle);
- OUT_RING (chan->gart->handle);
- BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY2, 2);
- OUT_RING (chan->vram->handle);
- OUT_RING (chan->vram->handle);
+ BEGIN_RING(chan, celsius, NV10TCL_DMA_NOTIFY, 1);
+ OUT_RING (chan, screen->sync->handle);
+ BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY0, 2);
+ OUT_RING (chan, chan->vram->handle);
+ OUT_RING (chan, chan->gart->handle);
+ BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY2, 2);
+ OUT_RING (chan, chan->vram->handle);
+ OUT_RING (chan, chan->vram->handle);
- BEGIN_RING(celsius, NV10TCL_NOP, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+ OUT_RING (chan, 0);
- BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 2);
- OUT_RING (0);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
- BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
- OUT_RING ((0x7ff<<16)|0x800);
- BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
- OUT_RING ((0x7ff<<16)|0x800);
+ BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
+ OUT_RING (chan, (0x7ff<<16)|0x800);
+ BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
+ OUT_RING (chan, (0x7ff<<16)|0x800);
for (i=1;i<8;i++) {
- BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1);
+ OUT_RING (chan, 0);
}
- BEGIN_RING(celsius, 0x290, 1);
- OUT_RING ((0x10<<16)|1);
- BEGIN_RING(celsius, 0x3f4, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, 0x290, 1);
+ OUT_RING (chan, (0x10<<16)|1);
+ BEGIN_RING(chan, celsius, 0x3f4, 1);
+ OUT_RING (chan, 0);
- BEGIN_RING(celsius, NV10TCL_NOP, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+ OUT_RING (chan, 0);
if (nv10->screen->celsius->grclass != NV10TCL) {
/* For nv11, nv17 */
- BEGIN_RING(celsius, 0x120, 3);
- OUT_RING (0);
- OUT_RING (1);
- OUT_RING (2);
+ BEGIN_RING(chan, celsius, 0x120, 3);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 2);
- BEGIN_RING(celsius, NV10TCL_NOP, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+ OUT_RING (chan, 0);
}
- BEGIN_RING(celsius, NV10TCL_NOP, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+ OUT_RING (chan, 0);
/* Set state */
- BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 2);
- OUT_RING (0x207);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_TX_ENABLE(0), 2);
- OUT_RING (0);
- OUT_RING (0);
-
- BEGIN_RING(celsius, NV10TCL_RC_IN_ALPHA(0), 12);
- OUT_RING (0x30141010);
- OUT_RING (0);
- OUT_RING (0x20040000);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0x00000c00);
- OUT_RING (0);
- OUT_RING (0x00000c00);
- OUT_RING (0x18000000);
- OUT_RING (0x300e0300);
- OUT_RING (0x0c091c80);
-
- BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 2);
- OUT_RING (1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_SRC, 4);
- OUT_RING (1);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0x8006);
- BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 8);
- OUT_RING (0xff);
- OUT_RING (0x207);
- OUT_RING (0);
- OUT_RING (0xff);
- OUT_RING (0x1e00);
- OUT_RING (0x1e00);
- OUT_RING (0x1e00);
- OUT_RING (0x1d01);
- BEGIN_RING(celsius, NV10TCL_NORMALIZE_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 2);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_LIGHT_MODEL, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_COLOR_CONTROL, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_ENABLED_LIGHTS, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1);
- OUT_RING (0x201);
- BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1);
- OUT_RING (8);
- BEGIN_RING(celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_LINE_WIDTH, 1);
- OUT_RING (8);
- BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
- OUT_RING (0x1b02);
- OUT_RING (0x1b02);
- BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2);
- OUT_RING (0x405);
- OUT_RING (0x901);
- BEGIN_RING(celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_TX_GEN_S(0), 8);
+ BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 2);
+ OUT_RING (chan, 0x207);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(0), 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+
+ BEGIN_RING(chan, celsius, NV10TCL_RC_IN_ALPHA(0), 12);
+ OUT_RING (chan, 0x30141010);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0x20040000);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0x00000c00);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0x00000c00);
+ OUT_RING (chan, 0x18000000);
+ OUT_RING (chan, 0x300e0300);
+ OUT_RING (chan, 0x0c091c80);
+
+ BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 2);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_SRC, 4);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0x8006);
+ BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 8);
+ OUT_RING (chan, 0xff);
+ OUT_RING (chan, 0x207);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0xff);
+ OUT_RING (chan, 0x1e00);
+ OUT_RING (chan, 0x1e00);
+ OUT_RING (chan, 0x1e00);
+ OUT_RING (chan, 0x1d01);
+ BEGIN_RING(chan, celsius, NV10TCL_NORMALIZE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_LIGHT_MODEL, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_COLOR_CONTROL, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_ENABLED_LIGHTS, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1);
+ OUT_RING (chan, 0x201);
+ BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1);
+ OUT_RING (chan, 8);
+ BEGIN_RING(chan, celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_LINE_WIDTH, 1);
+ OUT_RING (chan, 8);
+ BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (chan, 0x1b02);
+ OUT_RING (chan, 0x1b02);
+ BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2);
+ OUT_RING (chan, 0x405);
+ OUT_RING (chan, 0x901);
+ BEGIN_RING(chan, celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_TX_GEN_S(0), 8);
for (i=0;i<8;i++) {
- OUT_RING (0);
+ OUT_RING (chan, 0);
}
- BEGIN_RING(celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3);
- OUT_RING (0x3fc00000); /* -1.50 */
- OUT_RING (0xbdb8aa0a); /* -0.09 */
- OUT_RING (0); /* 0.00 */
+ BEGIN_RING(chan, celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3);
+ OUT_RING (chan, 0x3fc00000); /* -1.50 */
+ OUT_RING (chan, 0xbdb8aa0a); /* -0.09 */
+ OUT_RING (chan, 0); /* 0.00 */
- BEGIN_RING(celsius, NV10TCL_NOP, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+ OUT_RING (chan, 0);
- BEGIN_RING(celsius, NV10TCL_FOG_MODE, 2);
- OUT_RING (0x802);
- OUT_RING (2);
+ BEGIN_RING(chan, celsius, NV10TCL_FOG_MODE, 2);
+ OUT_RING (chan, 0x802);
+ OUT_RING (chan, 2);
/* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when
* using texturing, except when using the texture matrix
*/
- BEGIN_RING(celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1);
- OUT_RING (6);
- BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1);
- OUT_RING (0x01010101);
+ BEGIN_RING(chan, celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1);
+ OUT_RING (chan, 6);
+ BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1);
+ OUT_RING (chan, 0x01010101);
/* Set vertex component */
- BEGIN_RING(celsius, NV10TCL_VERTEX_COL_4F_R, 4);
- OUT_RINGf (1.0);
- OUT_RINGf (1.0);
- OUT_RINGf (1.0);
- OUT_RINGf (1.0);
- BEGIN_RING(celsius, NV10TCL_VERTEX_COL2_3F_R, 3);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_VERTEX_NOR_3F_X, 3);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RINGf (1.0);
- BEGIN_RING(celsius, NV10TCL_VERTEX_TX0_4F_S, 4);
- OUT_RINGf (0.0);
- OUT_RINGf (0.0);
- OUT_RINGf (0.0);
- OUT_RINGf (1.0);
- BEGIN_RING(celsius, NV10TCL_VERTEX_TX1_4F_S, 4);
- OUT_RINGf (0.0);
- OUT_RINGf (0.0);
- OUT_RINGf (0.0);
- OUT_RINGf (1.0);
- BEGIN_RING(celsius, NV10TCL_VERTEX_FOG_1F, 1);
- OUT_RINGf (0.0);
- BEGIN_RING(celsius, NV10TCL_EDGEFLAG_ENABLE, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL_4F_R, 4);
+ OUT_RINGf (chan, 1.0);
+ OUT_RINGf (chan, 1.0);
+ OUT_RINGf (chan, 1.0);
+ OUT_RINGf (chan, 1.0);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL2_3F_R, 3);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_NOR_3F_X, 3);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RINGf (chan, 1.0);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX0_4F_S, 4);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 1.0);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX1_4F_S, 4);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 1.0);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_FOG_1F, 1);
+ OUT_RINGf (chan, 0.0);
+ BEGIN_RING(chan, celsius, NV10TCL_EDGEFLAG_ENABLE, 1);
+ OUT_RING (chan, 1);
memset(projectionmatrix, 0, sizeof(projectionmatrix));
- BEGIN_RING(celsius, NV10TCL_PROJECTION_MATRIX(0), 16);
+ BEGIN_RING(chan, celsius, NV10TCL_PROJECTION_MATRIX(0), 16);
projectionmatrix[0*4+0] = 1.0;
projectionmatrix[1*4+1] = 1.0;
projectionmatrix[2*4+2] = 1.0;
projectionmatrix[3*4+3] = 1.0;
for (i=0;i<16;i++) {
- OUT_RINGf (projectionmatrix[i]);
+ OUT_RINGf (chan, projectionmatrix[i]);
}
- BEGIN_RING(celsius, NV10TCL_DEPTH_RANGE_NEAR, 2);
- OUT_RING (0.0);
- OUT_RINGf (16777216.0);
+ BEGIN_RING(chan, celsius, NV10TCL_DEPTH_RANGE_NEAR, 2);
+ OUT_RING (chan, 0.0);
+ OUT_RINGf (chan, 16777216.0);
- BEGIN_RING(celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4);
- OUT_RINGf (-2048.0);
- OUT_RINGf (-2048.0);
- OUT_RINGf (16777215.0 * 0.5);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4);
+ OUT_RINGf (chan, -2048.0);
+ OUT_RINGf (chan, -2048.0);
+ OUT_RINGf (chan, 16777215.0 * 0.5);
+ OUT_RING (chan, 0);
- FIRE_RING (NULL);
+ FIRE_RING (chan);
}
struct pipe_context *
diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h
index 36a6aa7a74..ab4b825487 100644
--- a/src/gallium/drivers/nv10/nv10_context.h
+++ b/src/gallium/drivers/nv10/nv10_context.h
@@ -15,10 +15,6 @@
#include "nouveau/nouveau_gldefs.h"
#include "nouveau/nouveau_context.h"
-#define NOUVEAU_PUSH_CONTEXT(ctx) \
- struct nv10_screen *ctx = nv10->screen
-#include "nouveau/nouveau_push.h"
-
#include "nv10_state.h"
#define NOUVEAU_ERR(fmt, args...) \
@@ -144,9 +140,9 @@ extern void nv10_emit_hw_state(struct nv10_context *nv10);
extern void nv10_state_tex_update(struct nv10_context *nv10);
/* nv10_vbo.c */
-extern boolean nv10_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv10_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv10_draw_elements( struct pipe_context *pipe,
+extern void nv10_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count);
diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c
index 906fdfeeb9..c1f7ccb9ab 100644
--- a/src/gallium/drivers/nv10/nv10_fragtex.c
+++ b/src/gallium/drivers/nv10/nv10_fragtex.c
@@ -52,6 +52,9 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit)
struct nv10_miptree *nv10mt = nv10->tex_miptree[unit];
struct pipe_texture *pt = &nv10mt->base;
struct nv10_texture_format *tf;
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
uint32_t txf, txs, txp;
tf = nv10_fragtex_format(pt->format);
@@ -82,15 +85,15 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit)
return;
}
- BEGIN_RING(celsius, NV10TCL_TX_OFFSET(unit), 8);
- OUT_RELOCl(nv10mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- OUT_RELOCd(nv10mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
- OUT_RING (ps->wrap);
- OUT_RING (0x40000000); /* enable */
- OUT_RING (txs);
- OUT_RING (ps->filt | 0x2000 /* magic */);
- OUT_RING ((pt->width0 << 16) | pt->height0);
- OUT_RING (ps->bcol);
+ BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(unit), 8);
+ OUT_RELOCl(chan, nouveau_bo(nv10mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(chan, nouveau_bo(nv10mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ OUT_RING (chan, ps->wrap);
+ OUT_RING (chan, 0x40000000); /* enable */
+ OUT_RING (chan, txs);
+ OUT_RING (chan, ps->filt | 0x2000 /* magic */);
+ OUT_RING (chan, (pt->width0 << 16) | pt->height0);
+ OUT_RING (chan, ps->bcol);
#endif
}
@@ -99,6 +102,9 @@ nv10_fragtex_bind(struct nv10_context *nv10)
{
#if 0
struct nv10_fragment_program *fp = nv10->fragprog.active;
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
unsigned samplers, unit;
samplers = nv10->fp_samplers & ~fp->samplers;
@@ -106,8 +112,8 @@ nv10_fragtex_bind(struct nv10_context *nv10)
unit = ffs(samplers) - 1;
samplers &= ~(1 << unit);
- BEGIN_RING(celsius, NV10TCL_TX_ENABLE(unit), 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(unit), 1);
+ OUT_RING (chan, 0);
}
samplers = nv10->dirty_samplers & fp->samplers;
diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c
index 7ba9777a22..c5dbe43dbc 100644
--- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c
+++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c
@@ -67,12 +67,15 @@ struct nv10_vbuf_render {
void nv10_vtxbuf_bind( struct nv10_context* nv10 )
{
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
int i;
for(i = 0; i < 8; i++) {
- BEGIN_RING(celsius, NV10TCL_VTXBUF_ADDRESS(i), 1);
- OUT_RING(0/*nv10->vtxbuf*/);
- BEGIN_RING(celsius, NV10TCL_VTXFMT(i), 1);
- OUT_RING(0/*XXX*/);
+ BEGIN_RING(chan, celsius, NV10TCL_VTXBUF_ADDRESS(i), 1);
+ OUT_RING(chan, 0/*nv10->vtxbuf*/);
+ BEGIN_RING(chan, celsius, NV10TCL_VTXFMT(i), 1);
+ OUT_RING(chan, 0/*XXX*/);
}
}
@@ -163,19 +166,22 @@ nv10_vbuf_render_draw( struct vbuf_render *render,
{
struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
struct nv10_context *nv10 = nv10_render->nv10;
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
int push, i;
nv10_emit_hw_state(nv10);
- BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
- OUT_RELOCl(nv10_render->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
+ OUT_RELOCl(chan, nouveau_bo(nv10_render->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
- OUT_RING(nv10_render->hwprim);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING(chan, nv10_render->hwprim);
if (nr_indices & 1) {
- BEGIN_RING(celsius, NV10TCL_VB_ELEMENT_U32, 1);
- OUT_RING (indices[0]);
+ BEGIN_RING(chan, celsius, NV10TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (chan, indices[0]);
indices++; nr_indices--;
}
@@ -183,16 +189,16 @@ nv10_vbuf_render_draw( struct vbuf_render *render,
// XXX too big/small ? check the size
push = MIN2(nr_indices, 1200 * 2);
- BEGIN_RING_NI(celsius, NV10TCL_VB_ELEMENT_U16, push >> 1);
+ BEGIN_RING_NI(chan, celsius, NV10TCL_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
- OUT_RING((indices[i+1] << 16) | indices[i]);
+ OUT_RING(chan, (indices[i+1] << 16) | indices[i]);
nr_indices -= push;
indices += push;
}
- BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING (chan, 0);
}
diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c
index ee5901e743..69a6dab866 100644
--- a/src/gallium/drivers/nv10/nv10_screen.c
+++ b/src/gallium/drivers/nv10/nv10_screen.c
@@ -115,6 +115,9 @@ nv10_screen_destroy(struct pipe_screen *pscreen)
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->celsius);
+ nv04_surface_2d_takedown(&screen->eng2d);
+
+ nouveau_screen_fini(&screen->base);
FREE(pscreen);
}
@@ -177,7 +180,6 @@ nv10_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
return FALSE;
}
- BIND_RING(chan, screen->celsius, 7);
/* 2D engine setup */
screen->eng2d = nv04_surface_2d_init(&screen->base);
diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c
index 2577ab73b5..30a596ca60 100644
--- a/src/gallium/drivers/nv10/nv10_state_emit.c
+++ b/src/gallium/drivers/nv10/nv10_state_emit.c
@@ -4,25 +4,32 @@
static void nv10_state_emit_blend(struct nv10_context* nv10)
{
struct nv10_blend_state *b = nv10->blend;
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
- BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 1);
- OUT_RING (b->d_enable);
+ BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 1);
+ OUT_RING (chan, b->d_enable);
- BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 3);
- OUT_RING (b->b_enable);
- OUT_RING (b->b_srcfunc);
- OUT_RING (b->b_dstfunc);
+ BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 3);
+ OUT_RING (chan, b->b_enable);
+ OUT_RING (chan, b->b_srcfunc);
+ OUT_RING (chan, b->b_dstfunc);
- BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1);
- OUT_RING (b->c_mask);
+ BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1);
+ OUT_RING (chan, b->c_mask);
}
static void nv10_state_emit_blend_color(struct nv10_context* nv10)
{
struct pipe_blend_color *c = nv10->blend_color;
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
- BEGIN_RING(celsius, NV10TCL_BLEND_COLOR, 1);
- OUT_RING ((float_to_ubyte(c->color[3]) << 24)|
+ BEGIN_RING(chan, celsius, NV10TCL_BLEND_COLOR, 1);
+ OUT_RING (chan,
+ (float_to_ubyte(c->color[3]) << 24)|
(float_to_ubyte(c->color[0]) << 16)|
(float_to_ubyte(c->color[1]) << 8) |
(float_to_ubyte(c->color[2]) << 0));
@@ -31,60 +38,66 @@ static void nv10_state_emit_blend_color(struct nv10_context* nv10)
static void nv10_state_emit_rast(struct nv10_context* nv10)
{
struct nv10_rasterizer_state *r = nv10->rast;
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
- BEGIN_RING(celsius, NV10TCL_SHADE_MODEL, 2);
- OUT_RING (r->shade_model);
- OUT_RING (r->line_width);
+ BEGIN_RING(chan, celsius, NV10TCL_SHADE_MODEL, 2);
+ OUT_RING (chan, r->shade_model);
+ OUT_RING (chan, r->line_width);
- BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1);
- OUT_RING (r->point_size);
+ BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1);
+ OUT_RING (chan, r->point_size);
- BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
- OUT_RING (r->poly_mode_front);
- OUT_RING (r->poly_mode_back);
+ BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (chan, r->poly_mode_front);
+ OUT_RING (chan, r->poly_mode_back);
- BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2);
- OUT_RING (r->cull_face);
- OUT_RING (r->front_face);
+ BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2);
+ OUT_RING (chan, r->cull_face);
+ OUT_RING (chan, r->front_face);
- BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2);
- OUT_RING (r->line_smooth_en);
- OUT_RING (r->poly_smooth_en);
+ BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2);
+ OUT_RING (chan, r->line_smooth_en);
+ OUT_RING (chan, r->poly_smooth_en);
- BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1);
- OUT_RING (r->cull_face_en);
+ BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (chan, r->cull_face_en);
}
static void nv10_state_emit_dsa(struct nv10_context* nv10)
{
struct nv10_depth_stencil_alpha_state *d = nv10->dsa;
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
- BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1);
- OUT_RING (d->depth.func);
+ BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1);
+ OUT_RING (chan, d->depth.func);
- BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
- OUT_RING (d->depth.write_enable);
+ BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (chan, d->depth.write_enable);
- BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
- OUT_RING (d->depth.test_enable);
+ BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (chan, d->depth.test_enable);
#if 0
- BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1);
- OUT_RING (d->stencil.enable);
- BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7);
- OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7);
+ BEGIN_RING(chan, celsius, NV10TCL_STENCIL_ENABLE, 1);
+ OUT_RING (chan, d->stencil.enable);
+ BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 7);
+ OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7);
#endif
- BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
- OUT_RING (d->alpha.enabled);
+ BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (chan, d->alpha.enabled);
- BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 1);
- OUT_RING (d->alpha.func);
+ BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 1);
+ OUT_RING (chan, d->alpha.func);
- BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_REF, 1);
- OUT_RING (d->alpha.ref);
+ BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_REF, 1);
+ OUT_RING (chan, d->alpha.ref);
}
static void nv10_state_emit_viewport(struct nv10_context* nv10)
@@ -108,6 +121,10 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10)
int colour_format = 0, zeta_format = 0;
struct nv10_miptree *nv10mt = 0;
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
+
w = fb->cbufs[0]->width;
h = fb->cbufs[0]->height;
colour_format = fb->cbufs[0]->format;
@@ -144,11 +161,11 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10)
}
if (zeta) {
- BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1);
- OUT_RING (rt->pitch | (zeta->pitch << 16));
+ BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1);
+ OUT_RING (chan, rt->pitch | (zeta->pitch << 16));
} else {
- BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1);
- OUT_RING (rt->pitch | (rt->pitch << 16));
+ BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1);
+ OUT_RING (chan, rt->pitch | (rt->pitch << 16));
}
nv10mt = (struct nv10_miptree *)rt->base.texture;
@@ -160,13 +177,13 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10)
nv10->zeta = nv10mt->buffer;
}
- BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3);
- OUT_RING ((w << 16) | 0);
- OUT_RING ((h << 16) | 0);
- OUT_RING (rt_format);
- BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2);
- OUT_RING (((w - 1) << 16) | 0 | 0x08000800);
- OUT_RING (((h - 1) << 16) | 0 | 0x08000800);
+ BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 3);
+ OUT_RING (chan, (w << 16) | 0);
+ OUT_RING (chan, (h << 16) | 0);
+ OUT_RING (chan, rt_format);
+ BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+ OUT_RING (chan, ((w - 1) << 16) | 0 | 0x08000800);
+ OUT_RING (chan, ((h - 1) << 16) | 0 | 0x08000800);
}
static void nv10_vertex_layout(struct nv10_context *nv10)
@@ -201,6 +218,10 @@ static void nv10_vertex_layout(struct nv10_context *nv10)
void
nv10_emit_hw_state(struct nv10_context *nv10)
{
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
+ struct nouveau_bo *rt_bo;
int i;
if (nv10->dirty & NV10_NEW_VERTPROG) {
@@ -269,38 +290,41 @@ nv10_emit_hw_state(struct nv10_context *nv10)
*/
/* Render target */
+ rt_bo = nouveau_bo(nv10->rt[0]);
// XXX figre out who's who for NV10TCL_DMA_* and fill accordingly
-// BEGIN_RING(celsius, NV10TCL_DMA_COLOR0, 1);
-// OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1);
- OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+// BEGIN_RING(chan, celsius, NV10TCL_DMA_COLOR0, 1);
+// OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
if (nv10->zeta) {
+ struct nouveau_bo *zeta_bo = nouveau_bo(nv10->zeta);
// XXX
-// BEGIN_RING(celsius, NV10TCL_DMA_ZETA, 1);
-// OUT_RELOCo(nv10->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(celsius, NV10TCL_ZETA_OFFSET, 1);
- OUT_RELOCl(nv10->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+// BEGIN_RING(chan, celsius, NV10TCL_DMA_ZETA, 1);
+// OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, celsius, NV10TCL_ZETA_OFFSET, 1);
+ OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
/* XXX for when we allocate LMA on nv17 */
-/* BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
- OUT_RELOCl(nv10->zeta + lma_offset);*/
+/* BEGIN_RING(chan, celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
+ OUT_RELOCl(chan, nouveau_bo(nv10->zeta + lma_offset));*/
}
/* Vertex buffer */
- BEGIN_RING(celsius, NV10TCL_DMA_VTXBUF0, 1);
- OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1);
- OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, celsius, NV10TCL_DMA_VTXBUF0, 1);
+ OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
/* Texture images */
for (i = 0; i < 2; i++) {
if (!(nv10->fp_samplers & (1 << i)))
continue;
- BEGIN_RING(celsius, NV10TCL_TX_OFFSET(i), 1);
- OUT_RELOCl(nv10->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
+ struct nouveau_bo *bo = nouveau_bo(nv10->tex[i].buffer);
+ BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(i), 1);
+ OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- BEGIN_RING(celsius, NV10TCL_TX_FORMAT(i), 1);
- OUT_RELOCd(nv10->tex[i].buffer, nv10->tex[i].format,
+ BEGIN_RING(chan, celsius, NV10TCL_TX_FORMAT(i), 1);
+ OUT_RELOCd(chan, bo, nv10->tex[i].format,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
NOUVEAU_BO_OR, NV10TCL_TX_FORMAT_DMA0,
NV10TCL_TX_FORMAT_DMA1);
diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c
index c664973e90..eb04af9782 100644
--- a/src/gallium/drivers/nv10/nv10_transfer.c
+++ b/src/gallium/drivers/nv10/nv10_transfer.c
@@ -16,14 +16,14 @@ struct nv10_transfer {
};
static void
-nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
struct pipe_texture *template)
{
memset(template, 0, sizeof(struct pipe_texture));
template->target = pt->target;
template->format = pt->format;
- template->width0 = u_minify(pt->width0, level);
- template->height0 = u_minify(pt->height0, level);
+ template->width0 = width;
+ template->height0 = height;
template->depth0 = 1;
template->last_level = 0;
template->nr_samples = pt->nr_samples;
@@ -71,7 +71,7 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->direct = false;
- nv10_compatible_transfer_tex(pt, level, &tx_tex_template);
+ nv10_compatible_transfer_tex(pt, w, h, &tx_tex_template);
tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
if (!tx_tex)
@@ -80,6 +80,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
}
+ tx->base.stride = ((struct nv10_miptree*)tx_tex)->level[0].pitch;
+
tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
face, level, zslice,
pipe_transfer_buffer_flags(&tx->base));
@@ -105,8 +107,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
/* TODO: Check if SIFM can un-swizzle */
nvscreen->eng2d->copy(nvscreen->eng2d,
tx->surface, 0, 0,
- src, 0, 0,
- src->width, src->height);
+ src, x, y,
+ w, h);
pipe_surface_reference(&src, NULL);
}
@@ -130,9 +132,9 @@ nv10_transfer_del(struct pipe_transfer *ptx)
/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
nvscreen->eng2d->copy(nvscreen->eng2d,
- dst, 0, 0,
+ dst, tx->base.x, tx->base.y,
tx->surface, 0, 0,
- dst->width, dst->height);
+ tx->base.width, tx->base.height);
pipe_surface_reference(&dst, NULL);
}
@@ -151,8 +153,10 @@ nv10_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
void *map = pipe_buffer_map(pscreen, mt->buffer,
pipe_transfer_buffer_flags(ptx));
- return map + ns->base.offset +
- ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
+ if(!tx->direct)
+ return map + ns->base.offset;
+ else
+ return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
}
static void
diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c
index 441a4f75f3..9180c72c9b 100644
--- a/src/gallium/drivers/nv10/nv10_vbo.c
+++ b/src/gallium/drivers/nv10/nv10_vbo.c
@@ -9,7 +9,7 @@
#include "nouveau/nouveau_channel.h"
#include "nouveau/nouveau_pushbuf.h"
-boolean nv10_draw_elements( struct pipe_context *pipe,
+void nv10_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count)
@@ -45,6 +45,7 @@ boolean nv10_draw_elements( struct pipe_context *pipe,
}
draw_set_mapped_constant_buffer(draw,
+ PIPE_SHADER_VERTEX,
nv10->constbuf[PIPE_SHADER_VERTEX],
nv10->constbuf_nr[PIPE_SHADER_VERTEX]);
@@ -64,14 +65,12 @@ boolean nv10_draw_elements( struct pipe_context *pipe,
pipe_buffer_unmap(pscreen, indexBuffer);
draw_set_mapped_element_buffer(draw, 0, NULL);
}
-
- return TRUE;
}
-boolean nv10_draw_arrays( struct pipe_context *pipe,
- unsigned prim, unsigned start, unsigned count)
+void nv10_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
{
- return nv10_draw_elements(pipe, NULL, 0, prim, start, count);
+ nv10_draw_elements(pipe, NULL, 0, prim, start, count);
}
diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c
index 6a147a4159..5b80af2d22 100644
--- a/src/gallium/drivers/nv20/nv20_context.c
+++ b/src/gallium/drivers/nv20/nv20_context.c
@@ -10,10 +10,14 @@ nv20_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence)
{
struct nv20_context *nv20 = nv20_context(pipe);
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
draw_flush(nv20->draw);
- FIRE_RING(fence);
+ FIRE_RING(chan);
+ if (fence)
+ *fence = NULL;
}
static void
@@ -31,348 +35,352 @@ static void nv20_init_hwctx(struct nv20_context *nv20)
{
struct nv20_screen *screen = nv20->screen;
struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
int i;
float projectionmatrix[16];
- const boolean is_nv25tcl = (nv20->screen->kelvin->grclass == NV25TCL);
+ const boolean is_nv25tcl = (kelvin->grclass == NV25TCL);
- BEGIN_RING(kelvin, NV20TCL_DMA_NOTIFY, 1);
- OUT_RING (screen->sync->handle);
- BEGIN_RING(kelvin, NV20TCL_DMA_TEXTURE0, 2);
- OUT_RING (chan->vram->handle);
- OUT_RING (chan->gart->handle); /* TEXTURE1 */
- BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 2);
- OUT_RING (chan->vram->handle);
- OUT_RING (chan->vram->handle); /* ZETA */
+ BEGIN_RING(chan, kelvin, NV20TCL_DMA_NOTIFY, 1);
+ OUT_RING (chan, screen->sync->handle);
+ BEGIN_RING(chan, kelvin, NV20TCL_DMA_TEXTURE0, 2);
+ OUT_RING (chan, chan->vram->handle);
+ OUT_RING (chan, chan->gart->handle); /* TEXTURE1 */
+ BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 2);
+ OUT_RING (chan, chan->vram->handle);
+ OUT_RING (chan, chan->vram->handle); /* ZETA */
- BEGIN_RING(kelvin, NV20TCL_DMA_QUERY, 1);
- OUT_RING (0); /* renouveau: beef0351, unique */
+ BEGIN_RING(chan, kelvin, NV20TCL_DMA_QUERY, 1);
+ OUT_RING (chan, 0); /* renouveau: beef0351, unique */
- BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2);
- OUT_RING (0);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1);
- OUT_RING ((0xfff << 16) | 0x0);
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1);
- OUT_RING ((0xfff << 16) | 0x0);
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1);
+ OUT_RING (chan, (0xfff << 16) | 0x0);
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1);
+ OUT_RING (chan, (0xfff << 16) | 0x0);
for (i = 1; i < NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE; i++) {
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1);
+ OUT_RING (chan, 0);
}
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1);
+ OUT_RING (chan, 0);
- BEGIN_RING(kelvin, 0x17e0, 3);
- OUT_RINGf (0.0);
- OUT_RINGf (0.0);
- OUT_RINGf (1.0);
+ BEGIN_RING(chan, kelvin, 0x17e0, 3);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 1.0);
if (is_nv25tcl) {
- BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1);
- OUT_RING (NV20TCL_TX_RCOMP_LEQUAL | 0xdb0);
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1);
+ OUT_RING (chan, NV20TCL_TX_RCOMP_LEQUAL | 0xdb0);
} else {
- BEGIN_RING(kelvin, 0x1e68, 1);
- OUT_RING (0x4b800000); /* 16777216.000000 */
- BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1);
- OUT_RING (NV20TCL_TX_RCOMP_LEQUAL);
+ BEGIN_RING(chan, kelvin, 0x1e68, 1);
+ OUT_RING (chan, 0x4b800000); /* 16777216.000000 */
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1);
+ OUT_RING (chan, NV20TCL_TX_RCOMP_LEQUAL);
}
- BEGIN_RING(kelvin, 0x290, 1);
- OUT_RING ((0x10 << 16) | 1);
- BEGIN_RING(kelvin, 0x9fc, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, 0x1d80, 1);
- OUT_RING (1);
- BEGIN_RING(kelvin, 0x9f8, 1);
- OUT_RING (4);
- BEGIN_RING(kelvin, 0x17ec, 3);
- OUT_RINGf (0.0);
- OUT_RINGf (1.0);
- OUT_RINGf (0.0);
+ BEGIN_RING(chan, kelvin, 0x290, 1);
+ OUT_RING (chan, (0x10 << 16) | 1);
+ BEGIN_RING(chan, kelvin, 0x9fc, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, 0x1d80, 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, kelvin, 0x9f8, 1);
+ OUT_RING (chan, 4);
+ BEGIN_RING(chan, kelvin, 0x17ec, 3);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 1.0);
+ OUT_RINGf (chan, 0.0);
if (is_nv25tcl) {
- BEGIN_RING(kelvin, 0x1d88, 1);
- OUT_RING (3);
+ BEGIN_RING(chan, kelvin, 0x1d88, 1);
+ OUT_RING (chan, 3);
- BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY9, 1);
- OUT_RING (chan->vram->handle);
- BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY8, 1);
- OUT_RING (chan->vram->handle);
+ BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY9, 1);
+ OUT_RING (chan, chan->vram->handle);
+ BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY8, 1);
+ OUT_RING (chan, chan->vram->handle);
}
- BEGIN_RING(kelvin, NV20TCL_DMA_FENCE, 1);
- OUT_RING (0); /* renouveau: beef1e10 */
+ BEGIN_RING(chan, kelvin, NV20TCL_DMA_FENCE, 1);
+ OUT_RING (chan, 0); /* renouveau: beef1e10 */
- BEGIN_RING(kelvin, 0x1e98, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, 0x1e98, 1);
+ OUT_RING (chan, 0);
#if 0
if (is_nv25tcl) {
- BEGIN_RING(NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2);
- OUT_RING (NvDmaTT); /* renouveau: beef0202 */
- OUT_RING (NvDmaFB); /* renouveau: beef0201 */
+ BEGIN_RING(chan, NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2);
+ OUT_RING (chan, NvDmaTT); /* renouveau: beef0202 */
+ OUT_RING (chan, NvDmaFB); /* renouveau: beef0201 */
- BEGIN_RING(NvSub3D, NV20TCL_DMA_TEXTURE1, 1);
- OUT_RING (NvDmaTT); /* renouveau: beef0202 */
+ BEGIN_RING(chan, NvSub3D, NV20TCL_DMA_TEXTURE1, 1);
+ OUT_RING (chan, NvDmaTT); /* renouveau: beef0202 */
}
#endif
- BEGIN_RING(kelvin, NV20TCL_NOTIFY, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV20TCL_NOTIFY, 1);
+ OUT_RING (chan, 0);
- BEGIN_RING(kelvin, 0x120, 3);
- OUT_RING (0);
- OUT_RING (1);
- OUT_RING (2);
+ BEGIN_RING(chan, kelvin, 0x120, 3);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 2);
/* error: ILLEGAL_MTHD, PROTECTION_FAULT
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4);
- OUT_RINGf (0.0);
- OUT_RINGf (512.0);
- OUT_RINGf (0.0);
- OUT_RINGf (0.0);
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 512.0);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 0.0);
*/
if (is_nv25tcl) {
- BEGIN_RING(kelvin, 0x022c, 2);
- OUT_RING (0x280);
- OUT_RING (0x07d28000);
+ BEGIN_RING(chan, kelvin, 0x022c, 2);
+ OUT_RING (chan, 0x280);
+ OUT_RING (chan, 0x07d28000);
}
/* * illegal method, protection fault
- BEGIN_RING(NvSub3D, 0x1c2c, 1);
- OUT_RING (0); */
+ BEGIN_RING(chan, NvSub3D, 0x1c2c, 1);
+ OUT_RING (chan, 0); */
if (is_nv25tcl) {
- BEGIN_RING(kelvin, 0x1da4, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, 0x1da4, 1);
+ OUT_RING (chan, 0);
}
/* * crashes with illegal method, protection fault
- BEGIN_RING(NvSub3D, 0x1c18, 1);
- OUT_RING (0x200); */
+ BEGIN_RING(chan, NvSub3D, 0x1c18, 1);
+ OUT_RING (chan, 0x200); */
- BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2);
- OUT_RING ((0 << 16) | 0);
- OUT_RING ((0 << 16) | 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2);
+ OUT_RING (chan, (0 << 16) | 0);
+ OUT_RING (chan, (0 << 16) | 0);
/* *** Set state *** */
- BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2);
- OUT_RING (NV20TCL_ALPHA_FUNC_FUNC_ALWAYS);
- OUT_RING (0); /* NV20TCL_ALPHA_FUNC_REF */
+ BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2);
+ OUT_RING (chan, NV20TCL_ALPHA_FUNC_FUNC_ALWAYS);
+ OUT_RING (chan, 0); /* NV20TCL_ALPHA_FUNC_REF */
for (i = 0; i < NV20TCL_TX_ENABLE__SIZE; ++i) {
- BEGIN_RING(kelvin, NV20TCL_TX_ENABLE(i), 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_ENABLE(i), 1);
+ OUT_RING (chan, 0);
}
- BEGIN_RING(kelvin, NV20TCL_TX_SHADER_OP, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_RC_IN_ALPHA(0), 4);
- OUT_RING (0x30d410d0);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_RC_OUT_RGB(0), 4);
- OUT_RING (0x00000c00);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_RC_ENABLE, 1);
- OUT_RING (0x00011101);
- BEGIN_RING(kelvin, NV20TCL_RC_FINAL0, 2);
- OUT_RING (0x130e0300);
- OUT_RING (0x0c091c80);
- BEGIN_RING(kelvin, NV20TCL_RC_OUT_ALPHA(0), 4);
- OUT_RING (0x00000c00);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_RC_IN_RGB(0), 4);
- OUT_RING (0x20c400c0);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_RC_COLOR0, 2);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4);
- OUT_RING (0x035125a0);
- OUT_RING (0);
- OUT_RING (0x40002000);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1);
- OUT_RING (0xffff0000);
-
- BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 4);
- OUT_RING (NV20TCL_BLEND_FUNC_SRC_ONE);
- OUT_RING (NV20TCL_BLEND_FUNC_DST_ZERO);
- OUT_RING (0); /* NV20TCL_BLEND_COLOR */
- OUT_RING (NV20TCL_BLEND_EQUATION_FUNC_ADD);
- BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7);
- OUT_RING (0xff);
- OUT_RING (NV20TCL_STENCIL_FUNC_FUNC_ALWAYS);
- OUT_RING (0); /* NV20TCL_STENCIL_FUNC_REF */
- OUT_RING (0xff); /* NV20TCL_STENCIL_FUNC_MASK */
- OUT_RING (NV20TCL_STENCIL_OP_FAIL_KEEP);
- OUT_RING (NV20TCL_STENCIL_OP_ZFAIL_KEEP);
- OUT_RING (NV20TCL_STENCIL_OP_ZPASS_KEEP);
-
- BEGIN_RING(kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2);
- OUT_RING (0);
- OUT_RING (NV20TCL_COLOR_LOGIC_OP_OP_COPY);
- BEGIN_RING(kelvin, 0x17cc, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_OP, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_ALPHA(0), 4);
+ OUT_RING (chan, 0x30d410d0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_RGB(0), 4);
+ OUT_RING (chan, 0x00000c00);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RC_ENABLE, 1);
+ OUT_RING (chan, 0x00011101);
+ BEGIN_RING(chan, kelvin, NV20TCL_RC_FINAL0, 2);
+ OUT_RING (chan, 0x130e0300);
+ OUT_RING (chan, 0x0c091c80);
+ BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_ALPHA(0), 4);
+ OUT_RING (chan, 0x00000c00);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_RGB(0), 4);
+ OUT_RING (chan, 0x20c400c0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RC_COLOR0, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4);
+ OUT_RING (chan, 0x035125a0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0x40002000);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1);
+ OUT_RING (chan, 0xffff0000);
+
+ BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 4);
+ OUT_RING (chan, NV20TCL_BLEND_FUNC_SRC_ONE);
+ OUT_RING (chan, NV20TCL_BLEND_FUNC_DST_ZERO);
+ OUT_RING (chan, 0); /* NV20TCL_BLEND_COLOR */
+ OUT_RING (chan, NV20TCL_BLEND_EQUATION_FUNC_ADD);
+ BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7);
+ OUT_RING (chan, 0xff);
+ OUT_RING (chan, NV20TCL_STENCIL_FUNC_FUNC_ALWAYS);
+ OUT_RING (chan, 0); /* NV20TCL_STENCIL_FUNC_REF */
+ OUT_RING (chan, 0xff); /* NV20TCL_STENCIL_FUNC_MASK */
+ OUT_RING (chan, NV20TCL_STENCIL_OP_FAIL_KEEP);
+ OUT_RING (chan, NV20TCL_STENCIL_OP_ZFAIL_KEEP);
+ OUT_RING (chan, NV20TCL_STENCIL_OP_ZPASS_KEEP);
+
+ BEGIN_RING(chan, kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, NV20TCL_COLOR_LOGIC_OP_OP_COPY);
+ BEGIN_RING(chan, kelvin, 0x17cc, 1);
+ OUT_RING (chan, 0);
if (is_nv25tcl) {
- BEGIN_RING(kelvin, 0x1d84, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, kelvin, 0x1d84, 1);
+ OUT_RING (chan, 1);
}
- BEGIN_RING(kelvin, NV20TCL_LIGHTING_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_LIGHT_CONTROL, 1);
- OUT_RING (0x00020000);
- BEGIN_RING(kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_ENABLED_LIGHTS, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_NORMALIZE_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0),
+ BEGIN_RING(chan, kelvin, NV20TCL_LIGHTING_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_CONTROL, 1);
+ OUT_RING (chan, 0x00020000);
+ BEGIN_RING(chan, kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_ENABLED_LIGHTS, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_NORMALIZE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0),
NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE);
for (i = 0; i < NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE; ++i) {
- OUT_RING(0xffffffff);
+ OUT_RING(chan, 0xffffffff);
}
- BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
- OUT_RING (0);
- OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */
- OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */
- BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1);
- OUT_RING (NV20TCL_DEPTH_FUNC_LESS);
- BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2);
- OUT_RINGf (0.0);
- OUT_RINGf (0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */
- BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */
+ OUT_RING (chan, 0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1);
+ OUT_RING (chan, NV20TCL_DEPTH_FUNC_LESS);
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1);
+ OUT_RING (chan, 1);
if (!is_nv25tcl) {
- BEGIN_RING(kelvin, 0x1d84, 1);
- OUT_RING (3);
+ BEGIN_RING(chan, kelvin, 0x1d84, 1);
+ OUT_RING (chan, 3);
}
- BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1);
+ BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1);
if (!is_nv25tcl) {
- OUT_RING (8);
+ OUT_RING (chan, 8);
} else {
- OUT_RINGf (1.0);
+ OUT_RINGf (chan, 1.0);
}
if (!is_nv25tcl) {
- BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2);
- OUT_RING (0);
- OUT_RING (0); /* NV20TCL.POINT_SMOOTH_ENABLE */
+ BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0); /* NV20TCL.POINT_SMOOTH_ENABLE */
} else {
- BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, 0x0a1c, 1);
- OUT_RING (0x800);
+ BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, 0x0a1c, 1);
+ OUT_RING (chan, 0x800);
}
- BEGIN_RING(kelvin, NV20TCL_LINE_WIDTH, 1);
- OUT_RING (8);
- BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
- OUT_RING (NV20TCL_POLYGON_MODE_FRONT_FILL);
- OUT_RING (NV20TCL_POLYGON_MODE_BACK_FILL);
- BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2);
- OUT_RING (NV20TCL_CULL_FACE_BACK);
- OUT_RING (NV20TCL_FRONT_FACE_CCW);
- BEGIN_RING(kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 1);
- OUT_RING (NV20TCL_SHADE_MODEL_SMOOTH);
- BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE);
+ BEGIN_RING(chan, kelvin, NV20TCL_LINE_WIDTH, 1);
+ OUT_RING (chan, 8);
+ BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (chan, NV20TCL_POLYGON_MODE_FRONT_FILL);
+ OUT_RING (chan, NV20TCL_POLYGON_MODE_BACK_FILL);
+ BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2);
+ OUT_RING (chan, NV20TCL_CULL_FACE_BACK);
+ OUT_RING (chan, NV20TCL_FRONT_FACE_CCW);
+ BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 1);
+ OUT_RING (chan, NV20TCL_SHADE_MODEL_SMOOTH);
+ BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE);
for (i=0; i < 4 * NV20TCL_TX_GEN_S__SIZE; ++i) {
- OUT_RING(0);
+ OUT_RING(chan, 0);
}
- BEGIN_RING(kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3);
- OUT_RINGf (1.5);
- OUT_RINGf (-0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */
- OUT_RINGf (0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */
- BEGIN_RING(kelvin, NV20TCL_FOG_MODE, 2);
- OUT_RING (NV20TCL_FOG_MODE_EXP_2);
- OUT_RING (NV20TCL_FOG_COORD_DIST_COORD_FOG);
- BEGIN_RING(kelvin, NV20TCL_FOG_ENABLE, 2);
- OUT_RING (0);
- OUT_RING (0); /* NV20TCL.FOG_COLOR */
- BEGIN_RING(kelvin, NV20TCL_ENGINE, 1);
- OUT_RING (NV20TCL_ENGINE_FIXED);
+ BEGIN_RING(chan, kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3);
+ OUT_RINGf (chan, 1.5);
+ OUT_RINGf (chan, -0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */
+ OUT_RINGf (chan, 0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */
+ BEGIN_RING(chan, kelvin, NV20TCL_FOG_MODE, 2);
+ OUT_RING (chan, NV20TCL_FOG_MODE_EXP_SIGNED);
+ OUT_RING (chan, NV20TCL_FOG_COORD_FOG);
+ BEGIN_RING(chan, kelvin, NV20TCL_FOG_ENABLE, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0); /* NV20TCL.FOG_COLOR */
+ BEGIN_RING(chan, kelvin, NV20TCL_ENGINE, 1);
+ OUT_RING (chan, NV20TCL_ENGINE_FIXED);
for (i = 0; i < NV20TCL_TX_MATRIX_ENABLE__SIZE; ++i) {
- BEGIN_RING(kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1);
+ OUT_RING (chan, 0);
}
- BEGIN_RING(kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15);
- OUT_RINGf(1.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0);
- OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); OUT_RINGf(1.0);
- OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0);
+ BEGIN_RING(chan, kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15);
+ OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0);
+ OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0);
+ OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0);
for (i = 4; i < 16; ++i) {
- OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0);
+ OUT_RINGf(chan, 0.0);
+ OUT_RINGf(chan, 0.0);
+ OUT_RINGf(chan, 0.0);
+ OUT_RINGf(chan, 1.0);
}
- BEGIN_RING(kelvin, NV20TCL_EDGEFLAG_ENABLE, 1);
- OUT_RING (1);
- BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1);
- OUT_RING (0x00010101);
- BEGIN_RING(kelvin, NV20TCL_CLEAR_VALUE, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV20TCL_EDGEFLAG_ENABLE, 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1);
+ OUT_RING (chan, 0x00010101);
+ BEGIN_RING(chan, kelvin, NV20TCL_CLEAR_VALUE, 1);
+ OUT_RING (chan, 0);
memset(projectionmatrix, 0, sizeof(projectionmatrix));
projectionmatrix[0*4+0] = 1.0;
projectionmatrix[1*4+1] = 1.0;
projectionmatrix[2*4+2] = 16777215.0;
projectionmatrix[3*4+3] = 1.0;
- BEGIN_RING(kelvin, NV20TCL_PROJECTION_MATRIX(0), 16);
+ BEGIN_RING(chan, kelvin, NV20TCL_PROJECTION_MATRIX(0), 16);
for (i = 0; i < 16; i++) {
- OUT_RINGf (projectionmatrix[i]);
+ OUT_RINGf (chan, projectionmatrix[i]);
}
- BEGIN_RING(kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2);
- OUT_RINGf (0.0);
- OUT_RINGf (16777216.0); /* [0, 1] scaled approx to [0, 2^24] */
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 16777216.0); /* [0, 1] scaled approx to [0, 2^24] */
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4);
- OUT_RINGf (0.0); /* x-offset, w/2 + 1.031250 */
- OUT_RINGf (0.0); /* y-offset, h/2 + 0.030762 */
- OUT_RINGf (0.0);
- OUT_RINGf (16777215.0);
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4);
+ OUT_RINGf (chan, 0.0); /* x-offset, w/2 + 1.031250 */
+ OUT_RINGf (chan, 0.0); /* y-offset, h/2 + 0.030762 */
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 16777215.0);
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE_X, 4);
- OUT_RINGf (0.0); /* no effect?, w/2 */
- OUT_RINGf (0.0); /* no effect?, h/2 */
- OUT_RINGf (16777215.0 * 0.5);
- OUT_RINGf (65535.0);
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_SCALE_X, 4);
+ OUT_RINGf (chan, 0.0); /* no effect?, w/2 */
+ OUT_RINGf (chan, 0.0); /* no effect?, h/2 */
+ OUT_RINGf (chan, 16777215.0 * 0.5);
+ OUT_RINGf (chan, 65535.0);
- FIRE_RING (NULL);
+ FIRE_RING (chan);
}
struct pipe_context *
diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h
index a4eaa95660..c7dfadaa31 100644
--- a/src/gallium/drivers/nv20/nv20_context.h
+++ b/src/gallium/drivers/nv20/nv20_context.h
@@ -15,10 +15,6 @@
#include "nouveau/nouveau_gldefs.h"
#include "nouveau/nouveau_context.h"
-#define NOUVEAU_PUSH_CONTEXT(ctx) \
- struct nv20_screen *ctx = nv20->screen
-#include "nouveau/nouveau_push.h"
-
#include "nv20_state.h"
#define NOUVEAU_ERR(fmt, args...) \
@@ -143,9 +139,9 @@ extern void nv20_emit_hw_state(struct nv20_context *nv20);
extern void nv20_state_tex_update(struct nv20_context *nv20);
/* nv20_vbo.c */
-extern boolean nv20_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv20_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv20_draw_elements( struct pipe_context *pipe,
+extern void nv20_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count);
diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c
index 2db4a4015a..dedbec73f3 100644
--- a/src/gallium/drivers/nv20/nv20_fragtex.c
+++ b/src/gallium/drivers/nv20/nv20_fragtex.c
@@ -52,6 +52,9 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit)
struct nv20_miptree *nv20mt = nv20->tex_miptree[unit];
struct pipe_texture *pt = &nv20mt->base;
struct nv20_texture_format *tf;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
uint32_t txf, txs, txp;
tf = nv20_fragtex_format(pt->format);
@@ -82,15 +85,15 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit)
return;
}
- BEGIN_RING(kelvin, NV10TCL_TX_OFFSET(unit), 8);
- OUT_RELOCl(nv20mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- OUT_RELOCd(nv20mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
- OUT_RING (ps->wrap);
- OUT_RING (0x40000000); /* enable */
- OUT_RING (txs);
- OUT_RING (ps->filt | 0x2000 /* magic */);
- OUT_RING ((pt->width0 << 16) | pt->height0);
- OUT_RING (ps->bcol);
+ BEGIN_RING(chan, kelvin, NV10TCL_TX_OFFSET(unit), 8);
+ OUT_RELOCl(chan, nouveau_bo(nv20mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(chan, nouveau_bo(nv20mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ OUT_RING (chan, ps->wrap);
+ OUT_RING (chan, 0x40000000); /* enable */
+ OUT_RING (chan, txs);
+ OUT_RING (chan, ps->filt | 0x2000 /* magic */);
+ OUT_RING (chan, (pt->width0 << 16) | pt->height0);
+ OUT_RING (chan, ps->bcol);
#endif
}
@@ -99,6 +102,9 @@ nv20_fragtex_bind(struct nv20_context *nv20)
{
#if 0
struct nv20_fragment_program *fp = nv20->fragprog.active;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
unsigned samplers, unit;
samplers = nv20->fp_samplers & ~fp->samplers;
@@ -106,8 +112,8 @@ nv20_fragtex_bind(struct nv20_context *nv20)
unit = ffs(samplers) - 1;
samplers &= ~(1 << unit);
- BEGIN_RING(kelvin, NV10TCL_TX_ENABLE(unit), 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV10TCL_TX_ENABLE(unit), 1);
+ OUT_RING (chan, 0);
}
samplers = nv20->dirty_samplers & fp->samplers;
diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c
index d1291a92e0..8f7538e7f5 100644
--- a/src/gallium/drivers/nv20/nv20_miptree.c
+++ b/src/gallium/drivers/nv20/nv20_miptree.c
@@ -6,6 +6,7 @@
#include "nv20_context.h"
#include "nv20_screen.h"
+#include "../nv04/nv04_surface_2d.h"
static void
nv20_miptree_layout(struct nv20_miptree *nv20mt)
@@ -127,6 +128,12 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+ /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
+ * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
+ * This also happens for small mipmaps of large textures. */
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64)
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
nv20_miptree_layout(mt);
mt->buffer = screen->buffer_create(screen, 256, buf_usage, mt->total_size);
@@ -183,12 +190,27 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt,
ns->base.offset = nv20mt->level[level].image_offset[0];
}
+ /* create a linear temporary that we can render into if necessary.
+ * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so
+ * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/
+ if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE)
+ return &nv04_surface_wrap_for_render(screen, ((struct nv20_screen*)screen)->eng2d, ns)->base;
+
return &ns->base;
}
static void
nv20_miptree_surface_destroy(struct pipe_surface *ps)
{
+ struct nv04_surface* ns = (struct nv04_surface*)ps;
+ if(ns->backing)
+ {
+ struct nv20_screen* screen = (struct nv20_screen*)ps->texture->screen;
+ if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE)
+ screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
+ nv20_miptree_surface_destroy(&ns->backing->base);
+ }
+
pipe_texture_reference(&ps->texture, NULL);
FREE(ps);
}
diff --git a/src/gallium/drivers/nv20/nv20_prim_vbuf.c b/src/gallium/drivers/nv20/nv20_prim_vbuf.c
index ddfcdb8057..2e145672da 100644
--- a/src/gallium/drivers/nv20/nv20_prim_vbuf.c
+++ b/src/gallium/drivers/nv20/nv20_prim_vbuf.c
@@ -81,12 +81,15 @@ nv20_vbuf_render(struct vbuf_render *render)
void nv20_vtxbuf_bind( struct nv20_context* nv20 )
{
#if 0
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
int i;
for(i = 0; i < NV20TCL_VTXBUF_ADDRESS__SIZE; i++) {
- BEGIN_RING(kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1);
- OUT_RING(0/*nv20->vtxbuf*/);
- BEGIN_RING(kelvin, NV20TCL_VTXFMT(i) ,1);
- OUT_RING(0/*XXX*/);
+ BEGIN_RING(chan, kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1);
+ OUT_RING(chan, 0/*nv20->vtxbuf*/);
+ BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(i) ,1);
+ OUT_RING(chan, 0/*XXX*/);
}
#endif
}
@@ -202,6 +205,9 @@ nv20__vtxhwformat(unsigned stride, unsigned fields, unsigned type)
static unsigned
nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr)
{
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
uint32_t hwfmt = 0;
unsigned fields;
@@ -231,8 +237,8 @@ nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr)
return 0;
}
- BEGIN_RING(kelvin, NV20TCL_VTXFMT(hwattr), 1);
- OUT_RING(hwfmt);
+ BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(hwattr), 1);
+ OUT_RING(chan, hwfmt);
return fields;
}
@@ -262,6 +268,9 @@ nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render,
uint nr_indices)
{
struct nv20_context *nv20 = nv20_render->nv20;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
struct vertex_info *vinfo = &nv20->vertex_info;
unsigned nr_fields;
int max_push;
@@ -270,29 +279,29 @@ nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render,
nr_fields = nv20__emit_vertex_array_format(nv20);
- BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
- OUT_RING(nv20_render->hwprim);
+ BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING(chan, nv20_render->hwprim);
max_push = 1200 / nr_fields;
while (nr_indices) {
int i;
int push = MIN2(nr_indices, max_push);
- BEGIN_RING_NI(kelvin, NV20TCL_VERTEX_DATA, push * nr_fields);
+ BEGIN_RING_NI(chan, kelvin, NV20TCL_VERTEX_DATA, push * nr_fields);
for (i = 0; i < push; i++) {
/* XXX: fixme to handle other than floats? */
int f = nr_fields;
float *attrv = (float*)&data[indices[i] * vsz];
while (f-- > 0)
- OUT_RINGf(*attrv++);
+ OUT_RINGf(chan, *attrv++);
}
nr_indices -= push;
indices += push;
}
- BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
- OUT_RING(NV20TCL_VERTEX_BEGIN_END_STOP);
+ BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING(chan, NV20TCL_VERTEX_BEGIN_END_STOP);
}
static void
@@ -301,20 +310,23 @@ nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render,
uint nr_indices)
{
struct nv20_context *nv20 = nv20_render->nv20;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
int push, i;
NOUVEAU_ERR("nv20__draw_pbuffer: this path is broken.\n");
- BEGIN_RING(kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
- OUT_RELOCl(nv20_render->pbuffer, 0,
+ BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
+ OUT_RELOCl(chan, nouveau_bo(nv20_render->pbuffer), 0,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
- OUT_RING(nv20_render->hwprim);
+ BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING(chan, nv20_render->hwprim);
if (nr_indices & 1) {
- BEGIN_RING(kelvin, NV10TCL_VB_ELEMENT_U32, 1);
- OUT_RING (indices[0]);
+ BEGIN_RING(chan, kelvin, NV10TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (chan, indices[0]);
indices++; nr_indices--;
}
@@ -322,16 +334,16 @@ nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render,
// XXX too big/small ? check the size
push = MIN2(nr_indices, 1200 * 2);
- BEGIN_RING_NI(kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1);
+ BEGIN_RING_NI(chan, kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
- OUT_RING((indices[i+1] << 16) | indices[i]);
+ OUT_RING(chan, (indices[i+1] << 16) | indices[i]);
nr_indices -= push;
indices += push;
}
- BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING (chan, 0);
}
static void
diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c
index 4eeacd1afd..d091335063 100644
--- a/src/gallium/drivers/nv20/nv20_screen.c
+++ b/src/gallium/drivers/nv20/nv20_screen.c
@@ -115,6 +115,9 @@ nv20_screen_destroy(struct pipe_screen *pscreen)
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->kelvin);
+ nv04_surface_2d_takedown(&screen->eng2d);
+
+ nouveau_screen_fini(&screen->base);
FREE(pscreen);
}
@@ -173,7 +176,6 @@ nv20_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
return FALSE;
}
- BIND_RING(chan, screen->kelvin, 7);
/* 2D engine setup */
screen->eng2d = nv04_surface_2d_init(&screen->base);
diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c
index 0122b1c2cd..6bbd1fdae9 100644
--- a/src/gallium/drivers/nv20/nv20_state_emit.c
+++ b/src/gallium/drivers/nv20/nv20_state_emit.c
@@ -5,27 +5,34 @@
static void nv20_state_emit_blend(struct nv20_context* nv20)
{
struct nv20_blend_state *b = nv20->blend;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
- BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1);
- OUT_RING (b->d_enable);
+ BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1);
+ OUT_RING (chan, b->d_enable);
- BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
- OUT_RING (b->b_enable);
+ BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
+ OUT_RING (chan, b->b_enable);
- BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 2);
- OUT_RING (b->b_srcfunc);
- OUT_RING (b->b_dstfunc);
+ BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 2);
+ OUT_RING (chan, b->b_srcfunc);
+ OUT_RING (chan, b->b_dstfunc);
- BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1);
- OUT_RING (b->c_mask);
+ BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1);
+ OUT_RING (chan, b->c_mask);
}
static void nv20_state_emit_blend_color(struct nv20_context* nv20)
{
struct pipe_blend_color *c = nv20->blend_color;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
- BEGIN_RING(kelvin, NV20TCL_BLEND_COLOR, 1);
- OUT_RING ((float_to_ubyte(c->color[3]) << 24)|
+ BEGIN_RING(chan, kelvin, NV20TCL_BLEND_COLOR, 1);
+ OUT_RING (chan,
+ (float_to_ubyte(c->color[3]) << 24)|
(float_to_ubyte(c->color[0]) << 16)|
(float_to_ubyte(c->color[1]) << 8) |
(float_to_ubyte(c->color[2]) << 0));
@@ -34,63 +41,69 @@ static void nv20_state_emit_blend_color(struct nv20_context* nv20)
static void nv20_state_emit_rast(struct nv20_context* nv20)
{
struct nv20_rasterizer_state *r = nv20->rast;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
- BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 2);
- OUT_RING (r->shade_model);
- OUT_RING (r->line_width);
+ BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 2);
+ OUT_RING (chan, r->shade_model);
+ OUT_RING (chan, r->line_width);
- BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1);
- OUT_RING (r->point_size);
+ BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1);
+ OUT_RING (chan, r->point_size);
- BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
- OUT_RING (r->poly_mode_front);
- OUT_RING (r->poly_mode_back);
+ BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (chan, r->poly_mode_front);
+ OUT_RING (chan, r->poly_mode_back);
- BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2);
- OUT_RING (r->cull_face);
- OUT_RING (r->front_face);
+ BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2);
+ OUT_RING (chan, r->cull_face);
+ OUT_RING (chan, r->front_face);
- BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2);
- OUT_RING (r->line_smooth_en);
- OUT_RING (r->poly_smooth_en);
+ BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2);
+ OUT_RING (chan, r->line_smooth_en);
+ OUT_RING (chan, r->poly_smooth_en);
- BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
- OUT_RING (r->cull_face_en);
+ BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (chan, r->cull_face_en);
}
static void nv20_state_emit_dsa(struct nv20_context* nv20)
{
struct nv20_depth_stencil_alpha_state *d = nv20->dsa;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
- BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1);
- OUT_RING (d->depth.func);
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1);
+ OUT_RING (chan, d->depth.func);
- BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
- OUT_RING (d->depth.write_enable);
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (chan, d->depth.write_enable);
- BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
- OUT_RING (d->depth.test_enable);
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (chan, d->depth.test_enable);
- BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1);
+ OUT_RING (chan, 1);
#if 0
- BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1);
- OUT_RING (d->stencil.enable);
- BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7);
- OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7);
+ BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1);
+ OUT_RING (chan, d->stencil.enable);
+ BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7);
+ OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7);
#endif
- BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
- OUT_RING (d->alpha.enabled);
+ BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (chan, d->alpha.enabled);
- BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1);
- OUT_RING (d->alpha.func);
+ BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1);
+ OUT_RING (chan, d->alpha.func);
- BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_REF, 1);
- OUT_RING (d->alpha.ref);
+ BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_REF, 1);
+ OUT_RING (chan, d->alpha.ref);
}
static void nv20_state_emit_viewport(struct nv20_context* nv20)
@@ -101,9 +114,13 @@ static void nv20_state_emit_scissor(struct nv20_context* nv20)
{
/* NV20TCL_SCISSOR_* is probably a software method */
/* struct pipe_scissor_state *s = nv20->scissor;
- BEGIN_RING(kelvin, NV20TCL_SCISSOR_HORIZ, 2);
- OUT_RING (((s->maxx - s->minx) << 16) | s->minx);
- OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
+
+ BEGIN_RING(chan, kelvin, NV20TCL_SCISSOR_HORIZ, 2);
+ OUT_RING (chan, ((s->maxx - s->minx) << 16) | s->minx);
+ OUT_RING (chan, ((s->maxy - s->miny) << 16) | s->miny);*/
}
static void nv20_state_emit_framebuffer(struct nv20_context* nv20)
@@ -113,6 +130,9 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20)
uint32_t rt_format, w, h;
int colour_format = 0, zeta_format = 0;
struct nv20_miptree *nv20mt = 0;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
w = fb->cbufs[0]->width;
h = fb->cbufs[0]->height;
@@ -150,11 +170,11 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20)
}
if (zeta) {
- BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1);
- OUT_RING (rt->pitch | (zeta->pitch << 16));
+ BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1);
+ OUT_RING (chan, rt->pitch | (zeta->pitch << 16));
} else {
- BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1);
- OUT_RING (rt->pitch | (rt->pitch << 16));
+ BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1);
+ OUT_RING (chan, rt->pitch | (rt->pitch << 16));
}
nv20mt = (struct nv20_miptree *)rt->base.texture;
@@ -166,13 +186,13 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20)
nv20->zeta = nv20mt->buffer;
}
- BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 3);
- OUT_RING ((w << 16) | 0);
- OUT_RING ((h << 16) | 0); /*NV20TCL_RT_VERT */
- OUT_RING (rt_format); /* NV20TCL_RT_FORMAT */
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2);
- OUT_RING (((w - 1) << 16) | 0);
- OUT_RING (((h - 1) << 16) | 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 3);
+ OUT_RING (chan, (w << 16) | 0);
+ OUT_RING (chan, (h << 16) | 0); /*NV20TCL_RT_VERT */
+ OUT_RING (chan, rt_format); /* NV20TCL_RT_FORMAT */
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+ OUT_RING (chan, ((w - 1) << 16) | 0);
+ OUT_RING (chan, ((h - 1) << 16) | 0);
}
static void nv20_vertex_layout(struct nv20_context *nv20)
@@ -228,7 +248,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
}
/* always do position */ {
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_POSITION, 0);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_POSITION, 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
vinfo->hwfmt[0] |= (1 << 0);
}
@@ -237,19 +257,19 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
for (i = 4; i < 6; i++) {
if (!generics[i])
continue;
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
vinfo->hwfmt[0] |= (1 << (i - 3));
}
if (colors[0]) {
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 0);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
vinfo->hwfmt[0] |= (1 << 3);
}
if (colors[1]) {
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 1);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 1);
draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
vinfo->hwfmt[0] |= (1 << 4);
}
@@ -258,7 +278,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
for (i = 6; i < 10; i++) {
if (!generics[i])
continue;
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
vinfo->hwfmt[0] |= (1 << (i - 1));
}
@@ -267,7 +287,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
for (i = 0; i < 4; i++) {
if (!generics[i])
continue;
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
vinfo->hwfmt[0] |= (1 << (i + 9));
}
@@ -276,13 +296,13 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
for (i = 10; i < 12; i++) {
if (!generics[i])
continue;
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
vinfo->hwfmt[0] |= (1 << (i + 3));
}
if (fog) {
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_FOG, 0);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_FOG, 0);
draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
vinfo->hwfmt[0] |= (1 << 15);
}
@@ -293,6 +313,10 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
void
nv20_emit_hw_state(struct nv20_context *nv20)
{
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
+ struct nouveau_bo *rt_bo;
int i;
if (nv20->dirty & NV20_NEW_VERTPROG) {
@@ -361,36 +385,39 @@ nv20_emit_hw_state(struct nv20_context *nv20)
*/
/* Render target */
- BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 1);
- OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1);
- OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ rt_bo = nouveau_bo(nv20->rt[0]);
+ BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 1);
+ OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
if (nv20->zeta) {
- BEGIN_RING(kelvin, NV20TCL_DMA_ZETA, 1);
- OUT_RELOCo(nv20->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(kelvin, NV20TCL_ZETA_OFFSET, 1);
- OUT_RELOCl(nv20->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ struct nouveau_bo *zeta_bo = nouveau_bo(nv20->zeta);
+ BEGIN_RING(chan, kelvin, NV20TCL_DMA_ZETA, 1);
+ OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, kelvin, NV20TCL_ZETA_OFFSET, 1);
+ OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
/* XXX for when we allocate LMA on nv17 */
-/* BEGIN_RING(kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
- OUT_RELOCl(nv20->zeta + lma_offset);*/
+/* BEGIN_RING(chan, kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
+ OUT_RELOCl(chan, nouveau_bo(nv20->zeta + lma_offset));*/
}
/* Vertex buffer */
- BEGIN_RING(kelvin, NV20TCL_DMA_VTXBUF0, 1);
- OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1);
- OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, kelvin, NV20TCL_DMA_VTXBUF0, 1);
+ OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
/* Texture images */
for (i = 0; i < 2; i++) {
if (!(nv20->fp_samplers & (1 << i)))
continue;
- BEGIN_RING(kelvin, NV20TCL_TX_OFFSET(i), 1);
- OUT_RELOCl(nv20->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
+ struct nouveau_bo *bo = nouveau_bo(nv20->tex[i].buffer);
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_OFFSET(i), 1);
+ OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- BEGIN_RING(kelvin, NV20TCL_TX_FORMAT(i), 1);
- OUT_RELOCd(nv20->tex[i].buffer, nv20->tex[i].format,
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_FORMAT(i), 1);
+ OUT_RELOCd(chan, bo, nv20->tex[i].format,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
NOUVEAU_BO_OR, NV20TCL_TX_FORMAT_DMA0,
NV20TCL_TX_FORMAT_DMA1);
diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c
index 69b79c809f..699773e8e6 100644
--- a/src/gallium/drivers/nv20/nv20_transfer.c
+++ b/src/gallium/drivers/nv20/nv20_transfer.c
@@ -16,14 +16,14 @@ struct nv20_transfer {
};
static void
-nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
struct pipe_texture *template)
{
memset(template, 0, sizeof(struct pipe_texture));
template->target = pt->target;
template->format = pt->format;
- template->width0 = u_minify(pt->width0, level);
- template->height0 = u_minify(pt->height0, level);
+ template->width0 = width;
+ template->height0 = height;
template->depth0 = 1;
template->last_level = 0;
template->nr_samples = pt->nr_samples;
@@ -71,7 +71,7 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->direct = false;
- nv20_compatible_transfer_tex(pt, level, &tx_tex_template);
+ nv20_compatible_transfer_tex(pt, w, h, &tx_tex_template);
tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
if (!tx_tex)
@@ -80,6 +80,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
}
+ tx->base.stride = ((struct nv20_miptree*)tx_tex)->level[0].pitch;
+
tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
face, level, zslice,
pipe_transfer_buffer_flags(&tx->base));
@@ -105,8 +107,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
/* TODO: Check if SIFM can un-swizzle */
nvscreen->eng2d->copy(nvscreen->eng2d,
tx->surface, 0, 0,
- src, 0, 0,
- src->width, src->height);
+ src, x, y,
+ w, h);
pipe_surface_reference(&src, NULL);
}
@@ -126,13 +128,13 @@ nv20_transfer_del(struct pipe_transfer *ptx)
dst = pscreen->get_tex_surface(pscreen, ptx->texture,
ptx->face, ptx->level, ptx->zslice,
- PIPE_BUFFER_USAGE_GPU_WRITE);
+ PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER);
/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
nvscreen->eng2d->copy(nvscreen->eng2d,
- dst, 0, 0,
+ dst, tx->base.x, tx->base.y,
tx->surface, 0, 0,
- dst->width, dst->height);
+ tx->base.width, tx->base.height);
pipe_surface_reference(&dst, NULL);
}
@@ -151,8 +153,10 @@ nv20_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
void *map = pipe_buffer_map(pscreen, mt->buffer,
pipe_transfer_buffer_flags(ptx));
- return map + ns->base.offset +
- ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
+ if(!tx->direct)
+ return map + ns->base.offset;
+ else
+ return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
}
static void
diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c
index 84d7db6c5e..52991a0d85 100644
--- a/src/gallium/drivers/nv20/nv20_vbo.c
+++ b/src/gallium/drivers/nv20/nv20_vbo.c
@@ -9,7 +9,7 @@
#include "nouveau/nouveau_channel.h"
#include "nouveau/nouveau_pushbuf.h"
-boolean nv20_draw_elements( struct pipe_context *pipe,
+void nv20_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count)
@@ -45,7 +45,7 @@ boolean nv20_draw_elements( struct pipe_context *pipe,
draw_set_mapped_element_buffer(draw, 0, NULL);
}
- draw_set_mapped_constant_buffer(draw,
+ draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX,
nv20->constbuf[PIPE_SHADER_VERTEX],
nv20->constbuf_nr[PIPE_SHADER_VERTEX]);
@@ -67,13 +67,12 @@ boolean nv20_draw_elements( struct pipe_context *pipe,
}
draw_flush(nv20->draw);
- return TRUE;
}
-boolean nv20_draw_arrays( struct pipe_context *pipe,
+void nv20_draw_arrays( struct pipe_context *pipe,
unsigned prim, unsigned start, unsigned count)
{
- return nv20_draw_elements(pipe, NULL, 0, prim, start, count);
+ nv20_draw_elements(pipe, NULL, 0, prim, start, count);
}
diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c
index 46a821a48b..54572e9ab3 100644
--- a/src/gallium/drivers/nv30/nv30_context.c
+++ b/src/gallium/drivers/nv30/nv30_context.c
@@ -10,21 +10,32 @@ nv30_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence)
{
struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
- BEGIN_RING(rankine, 0x1fd8, 1);
- OUT_RING (2);
- BEGIN_RING(rankine, 0x1fd8, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, rankine, 0x1fd8, 1);
+ OUT_RING (chan, 2);
+ BEGIN_RING(chan, rankine, 0x1fd8, 1);
+ OUT_RING (chan, 1);
}
- FIRE_RING(fence);
+ FIRE_RING(chan);
+ if (fence)
+ *fence = NULL;
}
static void
nv30_destroy(struct pipe_context *pipe)
{
struct nv30_context *nv30 = nv30_context(pipe);
+ unsigned i;
+
+ for (i = 0; i < NV30_STATE_MAX; i++) {
+ if (nv30->state.hw[i])
+ so_ref(NULL, &nv30->state.hw[i]);
+ }
if (nv30->draw)
draw_destroy(nv30->draw);
diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h
index 864ddaeb59..e59449287b 100644
--- a/src/gallium/drivers/nv30/nv30_context.h
+++ b/src/gallium/drivers/nv30/nv30_context.h
@@ -14,10 +14,6 @@
#include "nouveau/nouveau_winsys.h"
#include "nouveau/nouveau_gldefs.h"
#include "nouveau/nouveau_context.h"
-
-#define NOUVEAU_PUSH_CONTEXT(ctx) \
- struct nv30_screen *ctx = nv30->screen
-#include "nouveau/nouveau_push.h"
#include "nouveau/nouveau_stateobj.h"
#include "nv30_state.h"
@@ -198,9 +194,9 @@ extern struct nv30_state_entry nv30_state_fragtex;
extern struct nv30_state_entry nv30_state_vbo;
/* nv30_vbo.c */
-extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv30_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv30_draw_elements(struct pipe_context *pipe,
+extern void nv30_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start,
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index 40965a9772..2d565cb631 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -435,10 +435,11 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_CMP:
- tmp = temp(fpc);
- arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+ tmp = nv30_sr(NV30SR_NONE, 0);
tmp.cc_update = 1;
arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+ dst.cc_test = NV30_VP_INST_COND_GE;
+ arith(fpc, sat, MOV, dst, mask, src[2], none, none);
dst.cc_test = NV30_VP_INST_COND_LT;
arith(fpc, sat, MOV, dst, mask, src[1], none, none);
break;
@@ -517,13 +518,28 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
break;
case TGSI_OPCODE_SCS:
- if (mask & MASK_X) {
- arith(fpc, sat, COS, dst, MASK_X,
- swz(src[0], X, X, X, X), none, none);
+ /* avoid overwriting the source */
+ if(src[0].swz[SWZ_X] != SWZ_X)
+ {
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
}
- if (mask & MASK_Y) {
- arith(fpc, sat, SIN, dst, MASK_Y,
- swz(src[0], X, X, X, X), none, none);
+ else
+ {
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
}
break;
case TGSI_OPCODE_SIN:
@@ -821,7 +837,7 @@ nv30_fragprog_validate(struct nv30_context *nv30)
fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4);
nv30_fragprog_upload(nv30, fp);
- so = so_new(8, 1);
+ so = so_new(4, 4, 1);
so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1);
so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
@@ -870,6 +886,12 @@ void
nv30_fragprog_destroy(struct nv30_context *nv30,
struct nv30_fragment_program *fp)
{
+ if (fp->buffer)
+ pipe_buffer_reference(&fp->buffer, NULL);
+
+ if (fp->so)
+ so_ref(NULL, &fp->so);
+
if (fp->insn_len)
FREE(fp->insn);
}
diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c
index b3293ee700..9893567891 100644
--- a/src/gallium/drivers/nv30/nv30_fragtex.c
+++ b/src/gallium/drivers/nv30/nv30_fragtex.c
@@ -106,7 +106,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit)
txs = tf->swizzle;
- so = so_new(16, 2);
+ so = so_new(1, 8, 2);
so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8);
so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR,
@@ -135,7 +135,7 @@ nv30_fragtex_validate(struct nv30_context *nv30)
unit = ffs(samplers) - 1;
samplers &= ~(1 << unit);
- so = so_new(2, 0);
+ so = so_new(1, 1, 0);
so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1);
so_data (so, 0);
so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]);
diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
index ce95d9700f..8fbba38e78 100644
--- a/src/gallium/drivers/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -5,6 +5,7 @@
#include "util/u_math.h"
#include "nv30_context.h"
+#include "../nv04/nv04_surface_2d.h"
static void
nv30_miptree_layout(struct nv30_miptree *nv30mt)
@@ -108,6 +109,12 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+ /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
+ * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
+ * This also happens for small mipmaps of large textures. */
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64)
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
nv30_miptree_layout(mt);
mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage,
@@ -196,12 +203,27 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
ns->base.offset = nv30mt->level[level].image_offset[0];
}
+ /* create a linear temporary that we can render into if necessary.
+ * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so
+ * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/
+ if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE)
+ return &nv04_surface_wrap_for_render(pscreen, ((struct nv30_screen*)pscreen)->eng2d, ns)->base;
+
return &ns->base;
}
static void
nv30_miptree_surface_del(struct pipe_surface *ps)
{
+ struct nv04_surface* ns = (struct nv04_surface*)ps;
+ if(ns->backing)
+ {
+ struct nv30_screen* screen = (struct nv30_screen*)ps->texture->screen;
+ if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE)
+ screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
+ nv30_miptree_surface_del(&ns->backing->base);
+ }
+
pipe_texture_reference(&ps->texture, NULL);
FREE(ps);
}
diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c
index 1d1c8a484e..e27e9ccbf6 100644
--- a/src/gallium/drivers/nv30/nv30_query.c
+++ b/src/gallium/drivers/nv30/nv30_query.c
@@ -41,6 +41,9 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_query *q = nv30_query(pq);
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);
@@ -57,10 +60,10 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
assert(0);
nouveau_notifier_reset(nv30->screen->query, q->object->start);
- BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1);
- OUT_RING (1);
- BEGIN_RING(rankine, NV34TCL_QUERY_UNK17CC, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, rankine, NV34TCL_QUERY_RESET, 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, rankine, NV34TCL_QUERY_UNK17CC, 1);
+ OUT_RING (chan, 1);
q->ready = FALSE;
}
@@ -69,12 +72,15 @@ static void
nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
struct nv30_query *q = nv30_query(pq);
- BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1);
- OUT_RING ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) |
+ BEGIN_RING(chan, rankine, NV34TCL_QUERY_GET, 1);
+ OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) |
((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT));
- FIRE_RING(NULL);
+ FIRE_RING(chan);
}
static boolean
diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
index 7cd36902eb..9ed48178dc 100644
--- a/src/gallium/drivers/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -156,6 +156,12 @@ static void
nv30_screen_destroy(struct pipe_screen *pscreen)
{
struct nv30_screen *screen = nv30_screen(pscreen);
+ unsigned i;
+
+ for (i = 0; i < NV30_STATE_MAX; i++) {
+ if (screen->state[i])
+ so_ref(NULL, &screen->state[i]);
+ }
nouveau_resource_free(&screen->vp_exec_heap);
nouveau_resource_free(&screen->vp_data_heap);
@@ -163,6 +169,9 @@ nv30_screen_destroy(struct pipe_screen *pscreen)
nouveau_notifier_free(&screen->query);
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->rankine);
+ nv04_surface_2d_takedown(&screen->eng2d);
+
+ nouveau_screen_fini(&screen->base);
FREE(pscreen);
}
@@ -224,7 +233,6 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
return FALSE;
}
- BIND_RING(chan, screen->rankine, 7);
/* 2D engine setup */
screen->eng2d = nv04_surface_2d_init(&screen->base);
@@ -261,7 +269,7 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
}
/* Static rankine initialisation */
- so = so_new(128, 0);
+ so = so_new(36, 60, 0);
so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1);
so_data (so, screen->sync->handle);
so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2);
diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c
index 268d3a8ab8..065c927a10 100644
--- a/src/gallium/drivers/nv30/nv30_state.c
+++ b/src/gallium/drivers/nv30/nv30_state.c
@@ -14,7 +14,7 @@ nv30_blend_state_create(struct pipe_context *pipe,
struct nv30_context *nv30 = nv30_context(pipe);
struct nouveau_grobj *rankine = nv30->screen->rankine;
struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso));
- struct nouveau_stateobj *so = so_new(16, 0);
+ struct nouveau_stateobj *so = so_new(5, 8, 0);
if (cso->blend_enable) {
so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3);
@@ -300,7 +300,7 @@ nv30_rasterizer_state_create(struct pipe_context *pipe,
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso));
- struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_stateobj *so = so_new(9, 19, 0);
struct nouveau_grobj *rankine = nv30->screen->rankine;
/*XXX: ignored:
@@ -435,7 +435,7 @@ nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe,
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
- struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_stateobj *so = so_new(5, 21, 0);
struct nouveau_grobj *rankine = nv30->screen->rankine;
so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3);
diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c
index 64cf9ae93a..c36d58c040 100644
--- a/src/gallium/drivers/nv30/nv30_state_blend.c
+++ b/src/gallium/drivers/nv30/nv30_state_blend.c
@@ -18,7 +18,7 @@ struct nv30_state_entry nv30_state_blend = {
static boolean
nv30_state_blend_colour_validate(struct nv30_context *nv30)
{
- struct nouveau_stateobj *so = so_new(2, 0);
+ struct nouveau_stateobj *so = so_new(1, 1, 0);
struct pipe_blend_color *bcol = &nv30->blend_colour;
so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1);
diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c
index 6f6d1740d6..2ed2ea55e8 100644
--- a/src/gallium/drivers/nv30/nv30_state_fb.c
+++ b/src/gallium/drivers/nv30/nv30_state_fb.c
@@ -10,7 +10,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
struct nv04_surface *rt[2], *zeta = NULL;
uint32_t rt_enable = 0, rt_format = 0;
int i, colour_format = 0, zeta_format = 0, depth_only = 0;
- struct nouveau_stateobj *so = so_new(64, 10);
+ struct nouveau_stateobj *so = so_new(12, 18, 10);
unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
unsigned w = fb->width;
unsigned h = fb->height;
diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c
index 3ac7a8471e..ba61a9e24a 100644
--- a/src/gallium/drivers/nv30/nv30_state_scissor.c
+++ b/src/gallium/drivers/nv30/nv30_state_scissor.c
@@ -12,7 +12,7 @@ nv30_state_scissor_validate(struct nv30_context *nv30)
return FALSE;
nv30->state.scissor_enabled = rast->scissor;
- so = so_new(3, 0);
+ so = so_new(1, 2, 0);
so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2);
if (nv30->state.scissor_enabled) {
so_data (so, ((s->maxx - s->minx) << 16) | s->minx);
diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c
index d0c791ac08..ed520a4f43 100644
--- a/src/gallium/drivers/nv30/nv30_state_stipple.c
+++ b/src/gallium/drivers/nv30/nv30_state_stipple.c
@@ -14,14 +14,14 @@ nv30_state_stipple_validate(struct nv30_context *nv30)
if (rast->poly_stipple_enable) {
unsigned i;
- so = so_new(35, 0);
+ so = so_new(2, 33, 0);
so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, 1);
so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32);
for (i = 0; i < 32; i++)
so_data(so, nv30->stipple[i]);
} else {
- so = so_new(2, 0);
+ so = so_new(1, 1, 0);
so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, 0);
}
diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c
index c3eb413dac..2d7781292b 100644
--- a/src/gallium/drivers/nv30/nv30_state_viewport.c
+++ b/src/gallium/drivers/nv30/nv30_state_viewport.c
@@ -19,7 +19,7 @@ nv30_state_viewport_validate(struct nv30_context *nv30)
return FALSE;
nv30->state.viewport_bypass = bypass;
- so = so_new(11, 0);
+ so = so_new(3, 10, 0);
if (!bypass) {
so_method(so, nv30->screen->rankine,
NV34TCL_VIEWPORT_TRANSLATE_X, 8);
diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c
index 2255a02cae..65598991c6 100644
--- a/src/gallium/drivers/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nv30/nv30_transfer.c
@@ -16,14 +16,14 @@ struct nv30_transfer {
};
static void
-nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
struct pipe_texture *template)
{
memset(template, 0, sizeof(struct pipe_texture));
template->target = pt->target;
template->format = pt->format;
- template->width0 = u_minify(pt->width0, level);
- template->height0 = u_minify(pt->height0, level);
+ template->width0 = width;
+ template->height0 = height;
template->depth0 = 1;
template->last_level = 0;
template->nr_samples = pt->nr_samples;
@@ -71,7 +71,7 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->direct = false;
- nv30_compatible_transfer_tex(pt, level, &tx_tex_template);
+ nv30_compatible_transfer_tex(pt, w, h, &tx_tex_template);
tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
if (!tx_tex)
@@ -80,6 +80,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
}
+ tx->base.stride = ((struct nv30_miptree*)tx_tex)->level[0].pitch;
+
tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
0, 0, 0,
pipe_transfer_buffer_flags(&tx->base));
@@ -105,8 +107,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
/* TODO: Check if SIFM can un-swizzle */
nvscreen->eng2d->copy(nvscreen->eng2d,
tx->surface, 0, 0,
- src, 0, 0,
- src->width, src->height);
+ src, x, y,
+ w, h);
pipe_surface_reference(&src, NULL);
}
@@ -126,13 +128,13 @@ nv30_transfer_del(struct pipe_transfer *ptx)
dst = pscreen->get_tex_surface(pscreen, ptx->texture,
ptx->face, ptx->level, ptx->zslice,
- PIPE_BUFFER_USAGE_GPU_WRITE);
+ PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER);
/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
nvscreen->eng2d->copy(nvscreen->eng2d,
- dst, 0, 0,
+ dst, tx->base.x, tx->base.y,
tx->surface, 0, 0,
- dst->width, dst->height);
+ tx->base.width, tx->base.height);
pipe_surface_reference(&dst, NULL);
}
@@ -151,8 +153,10 @@ nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
void *map = pipe_buffer_map(pscreen, mt->buffer,
pipe_transfer_buffer_flags(ptx));
- return map + ns->base.offset +
- ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
+ if(!tx->direct)
+ return map + ns->base.offset;
+ else
+ return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
}
static void
diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c
index e32b8141af..1c5db03ea2 100644
--- a/src/gallium/drivers/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nv30/nv30_vbo.c
@@ -163,19 +163,21 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so,
return TRUE;
}
-boolean
+void
nv30_draw_arrays(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
struct nv30_context *nv30 = nv30_context(pipe);
- struct nouveau_channel *chan = nv30->screen->base.channel;
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
unsigned restart = 0;
nv30_vbo_set_idxbuf(nv30, NULL, 0);
if (FORCE_SWTNL || !nv30_state_validate(nv30)) {
/*return nv30_draw_elements_swtnl(pipe, NULL, 0,
mode, start, count);*/
- return FALSE;
+ return;
}
while (count) {
@@ -186,17 +188,17 @@ nv30_draw_arrays(struct pipe_context *pipe,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
mode, start, count, &restart);
if (!vc) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
nr = (vc & 0xff);
if (nr) {
- BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1);
- OUT_RING (((nr - 1) << 24) | start);
+ BEGIN_RING(chan, rankine, NV34TCL_VB_VERTEX_BATCH, 1);
+ OUT_RING (chan, ((nr - 1) << 24) | start);
start += nr;
}
@@ -206,15 +208,15 @@ nv30_draw_arrays(struct pipe_context *pipe,
nr -= push;
- BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push);
+ BEGIN_RING_NI(chan, rankine, NV34TCL_VB_VERTEX_BATCH, push);
while (push--) {
- OUT_RING(((0x100 - 1) << 24) | start);
+ OUT_RING(chan, ((0x100 - 1) << 24) | start);
start += 0x100;
}
}
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, 0);
count -= vc;
start = restart;
@@ -228,7 +230,9 @@ static INLINE void
nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,
unsigned mode, unsigned start, unsigned count)
{
- struct nouveau_channel *chan = nv30->screen->base.channel;
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
while (count) {
uint8_t *elts = (uint8_t *)ib + start;
@@ -239,17 +243,17 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
mode, start, count, &restart);
if (vc == 0) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
count -= vc;
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
if (vc & 1) {
- BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1);
- OUT_RING (elts[0]);
+ BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (chan, elts[0]);
elts++; vc--;
}
@@ -258,16 +262,16 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,
push = MIN2(vc, 2047 * 2);
- BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
+ BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
- OUT_RING((elts[i+1] << 16) | elts[i]);
+ OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
vc -= push;
elts += push;
}
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, 0);
start = restart;
}
@@ -277,7 +281,9 @@ static INLINE void
nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,
unsigned mode, unsigned start, unsigned count)
{
- struct nouveau_channel *chan = nv30->screen->base.channel;
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
while (count) {
uint16_t *elts = (uint16_t *)ib + start;
@@ -288,17 +294,17 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
mode, start, count, &restart);
if (vc == 0) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
count -= vc;
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
if (vc & 1) {
- BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1);
- OUT_RING (elts[0]);
+ BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (chan, elts[0]);
elts++; vc--;
}
@@ -307,16 +313,16 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,
push = MIN2(vc, 2047 * 2);
- BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
+ BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
- OUT_RING((elts[i+1] << 16) | elts[i]);
+ OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
vc -= push;
elts += push;
}
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, 0);
start = restart;
}
@@ -326,7 +332,9 @@ static INLINE void
nv30_draw_elements_u32(struct nv30_context *nv30, void *ib,
unsigned mode, unsigned start, unsigned count)
{
- struct nouveau_channel *chan = nv30->screen->base.channel;
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
while (count) {
uint32_t *elts = (uint32_t *)ib + start;
@@ -337,32 +345,32 @@ nv30_draw_elements_u32(struct nv30_context *nv30, void *ib,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1,
mode, start, count, &restart);
if (vc == 0) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
count -= vc;
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
while (vc) {
push = MIN2(vc, 2047);
- BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push);
- OUT_RINGp (elts, push);
+ BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U32, push);
+ OUT_RINGp (chan, elts, push);
vc -= push;
elts += push;
}
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, 0);
start = restart;
}
}
-static boolean
+static void
nv30_draw_elements_inline(struct pipe_context *pipe,
struct pipe_buffer *ib, unsigned ib_size,
unsigned mode, unsigned start, unsigned count)
@@ -393,15 +401,16 @@ nv30_draw_elements_inline(struct pipe_context *pipe,
}
pipe_buffer_unmap(pscreen, ib);
- return TRUE;
}
-static boolean
+static void
nv30_draw_elements_vbo(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
struct nv30_context *nv30 = nv30_context(pipe);
- struct nouveau_channel *chan = nv30->screen->base.channel;
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
unsigned restart = 0;
while (count) {
@@ -412,17 +421,17 @@ nv30_draw_elements_vbo(struct pipe_context *pipe,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
mode, start, count, &restart);
if (!vc) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
nr = (vc & 0xff);
if (nr) {
- BEGIN_RING(rankine, NV34TCL_VB_INDEX_BATCH, 1);
- OUT_RING (((nr - 1) << 24) | start);
+ BEGIN_RING(chan, rankine, NV34TCL_VB_INDEX_BATCH, 1);
+ OUT_RING (chan, ((nr - 1) << 24) | start);
start += nr;
}
@@ -432,24 +441,22 @@ nv30_draw_elements_vbo(struct pipe_context *pipe,
nr -= push;
- BEGIN_RING_NI(rankine, NV34TCL_VB_INDEX_BATCH, push);
+ BEGIN_RING_NI(chan, rankine, NV34TCL_VB_INDEX_BATCH, push);
while (push--) {
- OUT_RING(((0x100 - 1) << 24) | start);
+ OUT_RING(chan, ((0x100 - 1) << 24) | start);
start += 0x100;
}
}
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, 0);
count -= vc;
start = restart;
}
-
- return TRUE;
}
-boolean
+void
nv30_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer, unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
@@ -461,7 +468,7 @@ nv30_draw_elements(struct pipe_context *pipe,
if (FORCE_SWTNL || !nv30_state_validate(nv30)) {
/*return nv30_draw_elements_swtnl(pipe, NULL, 0,
mode, start, count);*/
- return FALSE;
+ return;
}
if (idxbuf) {
@@ -472,7 +479,6 @@ nv30_draw_elements(struct pipe_context *pipe,
}
pipe->flush(pipe, 0, NULL);
- return TRUE;
}
static boolean
@@ -485,9 +491,9 @@ nv30_vbo_validate(struct nv30_context *nv30)
unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
int hw;
- vtxbuf = so_new(20, 18);
+ vtxbuf = so_new(3, 17, 18);
so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr);
- vtxfmt = so_new(17, 0);
+ vtxfmt = so_new(1, 16, 0);
so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr);
for (hw = 0; hw < nv30->vtxelt_nr; hw++) {
@@ -500,7 +506,7 @@ nv30_vbo_validate(struct nv30_context *nv30)
if (!vb->stride) {
if (!sattr)
- sattr = so_new(16 * 5, 0);
+ sattr = so_new(16, 16 * 4, 0);
if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) {
so_data(vtxbuf, 0);
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
index 5d60984622..e77a5be3f2 100644
--- a/src/gallium/drivers/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -650,7 +650,9 @@ static boolean
nv30_vertprog_validate(struct nv30_context *nv30)
{
struct pipe_screen *pscreen = nv30->pipe.screen;
- struct nouveau_grobj *rankine = nv30->screen->rankine;
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
struct nv30_vertex_program *vp;
struct pipe_buffer *constbuf;
boolean upload_code = FALSE, upload_data = FALSE;
@@ -684,7 +686,7 @@ nv30_vertprog_validate(struct nv30_context *nv30)
assert(0);
}
- so = so_new(2, 0);
+ so = so_new(1, 1, 0);
so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1);
so_data (so, vp->exec->start);
so_ref(so, &vp->so);
@@ -770,9 +772,9 @@ nv30_vertprog_validate(struct nv30_context *nv30)
4 * sizeof(float));
}
- BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5);
- OUT_RING (i + vp->data->start);
- OUT_RINGp ((uint32_t *)vpd->value, 4);
+ BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5);
+ OUT_RING (chan, i + vp->data->start);
+ OUT_RINGp (chan, (uint32_t *)vpd->value, 4);
}
if (constbuf)
@@ -788,11 +790,11 @@ nv30_vertprog_validate(struct nv30_context *nv30)
vp->insns[i].data[2], vp->insns[i].data[3]);
}
#endif
- BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1);
- OUT_RING (vp->exec->start);
+ BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1);
+ OUT_RING (chan, vp->exec->start);
for (i = 0; i < vp->nr_insns; i++) {
- BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4);
- OUT_RINGp (vp->insns[i].data, 4);
+ BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_INST(0), 4);
+ OUT_RINGp (chan, vp->insns[i].data, 4);
}
}
diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c
index eb9cce4c78..f79ae4db84 100644
--- a/src/gallium/drivers/nv40/nv40_context.c
+++ b/src/gallium/drivers/nv40/nv40_context.c
@@ -10,21 +10,32 @@ nv40_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence)
{
struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
- BEGIN_RING(curie, 0x1fd8, 1);
- OUT_RING (2);
- BEGIN_RING(curie, 0x1fd8, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, curie, 0x1fd8, 1);
+ OUT_RING (chan, 2);
+ BEGIN_RING(chan, curie, 0x1fd8, 1);
+ OUT_RING (chan, 1);
}
- FIRE_RING(fence);
+ FIRE_RING(chan);
+ if (fence)
+ *fence = NULL;
}
static void
nv40_destroy(struct pipe_context *pipe)
{
struct nv40_context *nv40 = nv40_context(pipe);
+ unsigned i;
+
+ for (i = 0; i < NV40_STATE_MAX; i++) {
+ if (nv40->state.hw[i])
+ so_ref(NULL, &nv40->state.hw[i]);
+ }
if (nv40->draw)
draw_destroy(nv40->draw);
diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
index 83fcf1785d..e219bb537a 100644
--- a/src/gallium/drivers/nv40/nv40_context.h
+++ b/src/gallium/drivers/nv40/nv40_context.h
@@ -14,10 +14,6 @@
#include "nouveau/nouveau_winsys.h"
#include "nouveau/nouveau_gldefs.h"
#include "nouveau/nouveau_context.h"
-
-#define NOUVEAU_PUSH_CONTEXT(ctx) \
- struct nv40_screen *ctx = nv40->screen
-#include "nouveau/nouveau_push.h"
#include "nouveau/nouveau_stateobj.h"
#include "nv40_state.h"
@@ -183,7 +179,7 @@ extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen);
/* nv40_draw.c */
extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40);
-extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe,
+extern void nv40_draw_elements_swtnl(struct pipe_context *pipe,
struct pipe_buffer *idxbuf,
unsigned ib_size, unsigned mode,
unsigned start, unsigned count);
@@ -219,9 +215,9 @@ extern struct nv40_state_entry nv40_state_vbo;
extern struct nv40_state_entry nv40_state_vtxfmt;
/* nv40_vbo.c */
-extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv40_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv40_draw_elements(struct pipe_context *pipe,
+extern void nv40_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start,
diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c
index b2f19ecb69..d826f8c2f5 100644
--- a/src/gallium/drivers/nv40/nv40_draw.c
+++ b/src/gallium/drivers/nv40/nv40_draw.c
@@ -31,6 +31,9 @@ nv40_render_stage(struct draw_stage *stage)
static INLINE void
nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v)
{
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
unsigned i;
for (i = 0; i < nv40->swtnl.nr_attribs; i++) {
@@ -41,30 +44,30 @@ nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v)
case EMIT_OMIT:
break;
case EMIT_1F:
- BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1);
- OUT_RING (fui(v->data[idx][0]));
+ BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_1F(hw), 1);
+ OUT_RING (chan, fui(v->data[idx][0]));
break;
case EMIT_2F:
- BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2);
- OUT_RING (fui(v->data[idx][0]));
- OUT_RING (fui(v->data[idx][1]));
+ BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2F_X(hw), 2);
+ OUT_RING (chan, fui(v->data[idx][0]));
+ OUT_RING (chan, fui(v->data[idx][1]));
break;
case EMIT_3F:
- BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3);
- OUT_RING (fui(v->data[idx][0]));
- OUT_RING (fui(v->data[idx][1]));
- OUT_RING (fui(v->data[idx][2]));
+ BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_3F_X(hw), 3);
+ OUT_RING (chan, fui(v->data[idx][0]));
+ OUT_RING (chan, fui(v->data[idx][1]));
+ OUT_RING (chan, fui(v->data[idx][2]));
break;
case EMIT_4F:
- BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4);
- OUT_RING (fui(v->data[idx][0]));
- OUT_RING (fui(v->data[idx][1]));
- OUT_RING (fui(v->data[idx][2]));
- OUT_RING (fui(v->data[idx][3]));
+ BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4F_X(hw), 4);
+ OUT_RING (chan, fui(v->data[idx][0]));
+ OUT_RING (chan, fui(v->data[idx][1]));
+ OUT_RING (chan, fui(v->data[idx][2]));
+ OUT_RING (chan, fui(v->data[idx][3]));
break;
case EMIT_4UB:
- BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1);
- OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]),
+ BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4UB(hw), 1);
+ OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]),
float_to_ubyte(v->data[idx][1]),
float_to_ubyte(v->data[idx][2]),
float_to_ubyte(v->data[idx][3])));
@@ -82,7 +85,11 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
{
struct nv40_render_stage *rs = nv40_render_stage(stage);
struct nv40_context *nv40 = rs->nv40;
- struct nouveau_pushbuf *pb = nv40->screen->base.channel->pushbuf;
+
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_pushbuf *pb = chan->pushbuf;
+ struct nouveau_grobj *curie = screen->curie;
unsigned i;
/* Ensure there's room for 4xfloat32 + potentially 3 begin/end */
@@ -91,19 +98,19 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
NOUVEAU_ERR("AIII, missed flush\n");
assert(0);
}
- FIRE_RING(NULL);
+ FIRE_RING(chan);
nv40_state_emit(nv40);
}
/* Switch primitive modes if necessary */
if (rs->prim != mode) {
if (rs->prim != NV40TCL_BEGIN_END_STOP) {
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (NV40TCL_BEGIN_END_STOP);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, NV40TCL_BEGIN_END_STOP);
}
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (mode);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, mode);
rs->prim = mode;
}
@@ -115,8 +122,8 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
* off the primitive now.
*/
if (pb->remaining < ((count * 20) + 6)) {
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (NV40TCL_BEGIN_END_STOP);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, NV40TCL_BEGIN_END_STOP);
rs->prim = NV40TCL_BEGIN_END_STOP;
}
}
@@ -144,10 +151,13 @@ nv40_render_flush(struct draw_stage *draw, unsigned flags)
{
struct nv40_render_stage *rs = nv40_render_stage(draw);
struct nv40_context *nv40 = rs->nv40;
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
if (rs->prim != NV40TCL_BEGIN_END_STOP) {
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (NV40TCL_BEGIN_END_STOP);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, NV40TCL_BEGIN_END_STOP);
rs->prim = NV40TCL_BEGIN_END_STOP;
}
}
@@ -226,7 +236,7 @@ nv40_draw_render_stage(struct nv40_context *nv40)
return &render->stage;
}
-boolean
+void
nv40_draw_elements_swtnl(struct pipe_context *pipe,
struct pipe_buffer *idxbuf, unsigned idxbuf_size,
unsigned mode, unsigned start, unsigned count)
@@ -237,7 +247,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe,
void *map;
if (!nv40_state_validate_swtnl(nv40))
- return FALSE;
+ return;
nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF);
nv40_state_emit(nv40);
@@ -261,7 +271,8 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe,
map = pipe_buffer_map(pscreen,
nv40->constbuf[PIPE_SHADER_VERTEX],
PIPE_BUFFER_USAGE_CPU_READ);
- draw_set_mapped_constant_buffer(nv40->draw, map, nr);
+ draw_set_mapped_constant_buffer(nv40->draw, PIPE_SHADER_VERTEX,
+ map, nr);
}
draw_arrays(nv40->draw, mode, start, count);
@@ -277,15 +288,13 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe,
draw_flush(nv40->draw);
pipe->flush(pipe, 0, NULL);
-
- return TRUE;
}
static INLINE void
emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit,
unsigned semantic, unsigned index)
{
- unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index);
+ unsigned draw_out = draw_find_shader_output(nv40->draw, semantic, index);
unsigned a = nv40->swtnl.nr_attribs++;
nv40->swtnl.hw[a] = hw;
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index 1bf16726d1..1237066c39 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -149,7 +149,7 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src)
sizeof(uint32_t) * 4);
}
- sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);
+ sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);
break;
case NV40SR_NONE:
sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
@@ -445,10 +445,11 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_CMP:
- tmp = temp(fpc);
- arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+ tmp = nv40_sr(NV40SR_NONE, 0);
tmp.cc_update = 1;
arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+ dst.cc_test = NV40_VP_INST_COND_GE;
+ arith(fpc, sat, MOV, dst, mask, src[2], none, none);
dst.cc_test = NV40_VP_INST_COND_LT;
arith(fpc, sat, MOV, dst, mask, src[1], none, none);
break;
@@ -573,13 +574,28 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
neg(swz(tmp, X, X, X, X)), none, none);
break;
case TGSI_OPCODE_SCS:
- if (mask & MASK_X) {
- arith(fpc, sat, COS, dst, MASK_X,
- swz(src[0], X, X, X, X), none, none);
+ /* avoid overwriting the source */
+ if(src[0].swz[SWZ_X] != SWZ_X)
+ {
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
}
- if (mask & MASK_Y) {
- arith(fpc, sat, SIN, dst, MASK_Y,
- swz(src[0], X, X, X, X), none, none);
+ else
+ {
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
}
break;
case TGSI_OPCODE_SEQ:
@@ -752,7 +768,7 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc)
{
struct tgsi_full_immediate *imm;
float vals[4];
-
+
imm = &p.FullToken.FullImmediate;
assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
assert(fpc->nr_imm < MAX_IMM);
@@ -836,7 +852,7 @@ nv40_fragprog_translate(struct nv40_context *nv40,
fp->insn[fpc->inst_offset + 1] = 0x00000000;
fp->insn[fpc->inst_offset + 2] = 0x00000000;
fp->insn[fpc->inst_offset + 3] = 0x00000000;
-
+
fp->translated = TRUE;
out_err:
tgsi_parse_free(&parse);
@@ -903,7 +919,7 @@ nv40_fragprog_validate(struct nv40_context *nv40)
fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4);
nv40_fragprog_upload(nv40, fp);
- so = so_new(4, 1);
+ so = so_new(2, 2, 1);
so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1);
so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
@@ -917,7 +933,7 @@ nv40_fragprog_validate(struct nv40_context *nv40)
update_constants:
if (fp->nr_consts) {
float *map;
-
+
map = pipe_buffer_map(pscreen, constbuf,
PIPE_BUFFER_USAGE_CPU_READ);
for (i = 0; i < fp->nr_consts; i++) {
@@ -948,6 +964,12 @@ void
nv40_fragprog_destroy(struct nv40_context *nv40,
struct nv40_fragment_program *fp)
{
+ if (fp->buffer)
+ pipe_buffer_reference(&fp->buffer, NULL);
+
+ if (fp->so)
+ so_ref(NULL, &fp->so);
+
if (fp->insn_len)
FREE(fp->insn);
}
diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c
index 44abc84596..aad9198210 100644
--- a/src/gallium/drivers/nv40/nv40_fragtex.c
+++ b/src/gallium/drivers/nv40/nv40_fragtex.c
@@ -108,7 +108,7 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit)
txs = tf->swizzle;
- so = so_new(16, 2);
+ so = so_new(2, 9, 2);
so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8);
so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR,
@@ -139,7 +139,7 @@ nv40_fragtex_validate(struct nv40_context *nv40)
unit = ffs(samplers) - 1;
samplers &= ~(1 << unit);
- so = so_new(2, 0);
+ so = so_new(1, 1, 0);
so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1);
so_data (so, 0);
so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]);
diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c
index b974e68a07..89bd155ff4 100644
--- a/src/gallium/drivers/nv40/nv40_miptree.c
+++ b/src/gallium/drivers/nv40/nv40_miptree.c
@@ -5,6 +5,7 @@
#include "util/u_math.h"
#include "nv40_context.h"
+#include "../nv04/nv04_surface_2d.h"
@@ -105,6 +106,12 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+ /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
+ * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
+ * This also happens for small mipmaps of large textures. */
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64)
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
nv40_miptree_layout(mt);
mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, mt->total_size);
@@ -191,12 +198,27 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
ns->base.offset = mt->level[level].image_offset[0];
}
+ /* create a linear temporary that we can render into if necessary.
+ * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so
+ * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/
+ if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE)
+ return &nv04_surface_wrap_for_render(pscreen, ((struct nv40_screen*)pscreen)->eng2d, ns)->base;
+
return &ns->base;
}
static void
nv40_miptree_surface_del(struct pipe_surface *ps)
{
+ struct nv04_surface* ns = (struct nv04_surface*)ps;
+ if(ns->backing)
+ {
+ struct nv40_screen* screen = (struct nv40_screen*)ps->texture->screen;
+ if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE)
+ screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
+ nv40_miptree_surface_del(&ns->backing->base);
+ }
+
pipe_texture_reference(&ps->texture, NULL);
FREE(ps);
}
diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c
index 7874aedd42..8ed4a67dd0 100644
--- a/src/gallium/drivers/nv40/nv40_query.c
+++ b/src/gallium/drivers/nv40/nv40_query.c
@@ -41,6 +41,9 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_query *q = nv40_query(pq);
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);
@@ -57,10 +60,10 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
assert(0);
nouveau_notifier_reset(nv40->screen->query, q->object->start);
- BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1);
- OUT_RING (1);
- BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, curie, NV40TCL_QUERY_RESET, 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, curie, NV40TCL_QUERY_UNK17CC, 1);
+ OUT_RING (chan, 1);
q->ready = FALSE;
}
@@ -70,11 +73,14 @@ nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_query *q = nv40_query(pq);
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
- BEGIN_RING(curie, NV40TCL_QUERY_GET, 1);
- OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) |
+ BEGIN_RING(chan, curie, NV40TCL_QUERY_GET, 1);
+ OUT_RING (chan, (0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) |
((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT));
- FIRE_RING(NULL);
+ FIRE_RING(chan);
}
static boolean
diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c
index bd13dfddd1..9e55e5a089 100644
--- a/src/gallium/drivers/nv40/nv40_screen.c
+++ b/src/gallium/drivers/nv40/nv40_screen.c
@@ -140,6 +140,12 @@ static void
nv40_screen_destroy(struct pipe_screen *pscreen)
{
struct nv40_screen *screen = nv40_screen(pscreen);
+ unsigned i;
+
+ for (i = 0; i < NV40_STATE_MAX; i++) {
+ if (screen->state[i])
+ so_ref(NULL, &screen->state[i]);
+ }
nouveau_resource_free(&screen->vp_exec_heap);
nouveau_resource_free(&screen->vp_data_heap);
@@ -147,6 +153,7 @@ nv40_screen_destroy(struct pipe_screen *pscreen)
nouveau_notifier_free(&screen->query);
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->curie);
+ nv04_surface_2d_takedown(&screen->eng2d);
nouveau_screen_fini(&screen->base);
@@ -208,7 +215,6 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
return FALSE;
}
- BIND_RING(chan, screen->curie, 7);
/* 2D engine setup */
screen->eng2d = nv04_surface_2d_init(&screen->base);
@@ -245,7 +251,7 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
}
/* Static curie initialisation */
- so = so_new(128, 0);
+ so = so_new(16, 25, 0);
so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1);
so_data (so, screen->sync->handle);
so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2);
diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c
index f6e5dee814..7d990f7d56 100644
--- a/src/gallium/drivers/nv40/nv40_state.c
+++ b/src/gallium/drivers/nv40/nv40_state.c
@@ -16,7 +16,7 @@ nv40_blend_state_create(struct pipe_context *pipe,
struct nv40_context *nv40 = nv40_context(pipe);
struct nouveau_grobj *curie = nv40->screen->curie;
struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso));
- struct nouveau_stateobj *so = so_new(16, 0);
+ struct nouveau_stateobj *so = so_new(5, 8, 0);
if (cso->blend_enable) {
so_method(so, curie, NV40TCL_BLEND_ENABLE, 3);
@@ -310,7 +310,7 @@ nv40_rasterizer_state_create(struct pipe_context *pipe,
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso));
- struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_stateobj *so = so_new(8, 18, 0);
struct nouveau_grobj *curie = nv40->screen->curie;
/*XXX: ignored:
@@ -445,7 +445,7 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe,
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
- struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_stateobj *so = so_new(4, 21, 0);
struct nouveau_grobj *curie = nv40->screen->curie;
so_method(so, curie, NV40TCL_DEPTH_FUNC, 3);
diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c
index 8cd05ce66e..3ff00a37f6 100644
--- a/src/gallium/drivers/nv40/nv40_state_blend.c
+++ b/src/gallium/drivers/nv40/nv40_state_blend.c
@@ -18,7 +18,7 @@ struct nv40_state_entry nv40_state_blend = {
static boolean
nv40_state_blend_colour_validate(struct nv40_context *nv40)
{
- struct nouveau_stateobj *so = so_new(2, 0);
+ struct nouveau_stateobj *so = so_new(1, 1, 0);
struct pipe_blend_color *bcol = &nv40->blend_colour;
so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1);
diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c
index 789ed16126..13fe854915 100644
--- a/src/gallium/drivers/nv40/nv40_state_emit.c
+++ b/src/gallium/drivers/nv40/nv40_state_emit.c
@@ -54,9 +54,10 @@ nv40_state_do_validate(struct nv40_context *nv40,
void
nv40_state_emit(struct nv40_context *nv40)
{
- struct nouveau_channel *chan = nv40->screen->base.channel;
struct nv40_state *state = &nv40->state;
struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
unsigned i;
uint64_t states;
@@ -80,10 +81,10 @@ nv40_state_emit(struct nv40_context *nv40)
if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) |
(1ULL << NV40_STATE_FRAGTEX0))) {
- BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
- OUT_RING (2);
- BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1);
+ OUT_RING (chan, 2);
+ BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1);
+ OUT_RING (chan, 1);
}
state->dirty = 0;
diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c
index 1c7a7cd64f..a58fe9ddb1 100644
--- a/src/gallium/drivers/nv40/nv40_state_fb.c
+++ b/src/gallium/drivers/nv40/nv40_state_fb.c
@@ -19,7 +19,7 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40)
struct nv04_surface *rt[4], *zeta;
uint32_t rt_enable, rt_format;
int i, colour_format = 0, zeta_format = 0;
- struct nouveau_stateobj *so = so_new(64, 10);
+ struct nouveau_stateobj *so = so_new(18, 24, 10);
unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
unsigned w = fb->width;
unsigned h = fb->height;
diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c
index cf58d33906..753a505e93 100644
--- a/src/gallium/drivers/nv40/nv40_state_scissor.c
+++ b/src/gallium/drivers/nv40/nv40_state_scissor.c
@@ -12,7 +12,7 @@ nv40_state_scissor_validate(struct nv40_context *nv40)
return FALSE;
nv40->state.scissor_enabled = rast->scissor;
- so = so_new(3, 0);
+ so = so_new(1, 2, 0);
so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2);
if (nv40->state.scissor_enabled) {
so_data (so, ((s->maxx - s->minx) << 16) | s->minx);
diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c
index b51024ad9b..2b371ebfec 100644
--- a/src/gallium/drivers/nv40/nv40_state_stipple.c
+++ b/src/gallium/drivers/nv40/nv40_state_stipple.c
@@ -14,14 +14,14 @@ nv40_state_stipple_validate(struct nv40_context *nv40)
if (rast->poly_stipple_enable) {
unsigned i;
- so = so_new(35, 0);
+ so = so_new(2, 33, 0);
so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, 1);
so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32);
for (i = 0; i < 32; i++)
so_data(so, nv40->stipple[i]);
} else {
- so = so_new(2, 0);
+ so = so_new(1, 1, 0);
so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, 0);
}
diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c
index 665d2d5fca..9919ba1d0b 100644
--- a/src/gallium/drivers/nv40/nv40_state_viewport.c
+++ b/src/gallium/drivers/nv40/nv40_state_viewport.c
@@ -19,7 +19,7 @@ nv40_state_viewport_validate(struct nv40_context *nv40)
return FALSE;
nv40->state.viewport_bypass = bypass;
- so = so_new(11, 0);
+ so = so_new(2, 9, 0);
if (!bypass) {
so_method(so, nv40->screen->curie,
NV40TCL_VIEWPORT_TRANSLATE_X, 8);
diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c
index b084a38b48..791ee6823d 100644
--- a/src/gallium/drivers/nv40/nv40_transfer.c
+++ b/src/gallium/drivers/nv40/nv40_transfer.c
@@ -16,14 +16,14 @@ struct nv40_transfer {
};
static void
-nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
struct pipe_texture *template)
{
memset(template, 0, sizeof(struct pipe_texture));
template->target = pt->target;
template->format = pt->format;
- template->width0 = u_minify(pt->width0, level);
- template->height0 = u_minify(pt->height0, level);
+ template->width0 = width;
+ template->height0 = height;
template->depth0 = 1;
template->last_level = 0;
template->nr_samples = pt->nr_samples;
@@ -71,7 +71,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->direct = false;
- nv40_compatible_transfer_tex(pt, level, &tx_tex_template);
+ nv40_compatible_transfer_tex(pt, w, h, &tx_tex_template);
tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
if (!tx_tex)
@@ -80,6 +80,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
}
+ tx->base.stride = ((struct nv40_miptree*)tx_tex)->level[0].pitch;
+
tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
0, 0, 0,
pipe_transfer_buffer_flags(&tx->base));
@@ -105,8 +107,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
/* TODO: Check if SIFM can un-swizzle */
nvscreen->eng2d->copy(nvscreen->eng2d,
tx->surface, 0, 0,
- src, 0, 0,
- src->width, src->height);
+ src, x, y,
+ w, h);
pipe_surface_reference(&src, NULL);
}
@@ -126,13 +128,13 @@ nv40_transfer_del(struct pipe_transfer *ptx)
dst = pscreen->get_tex_surface(pscreen, ptx->texture,
ptx->face, ptx->level, ptx->zslice,
- PIPE_BUFFER_USAGE_GPU_WRITE);
+ PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER);
/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
nvscreen->eng2d->copy(nvscreen->eng2d,
- dst, 0, 0,
+ dst, tx->base.x, tx->base.y,
tx->surface, 0, 0,
- dst->width, dst->height);
+ tx->base.width, tx->base.height);
pipe_surface_reference(&dst, NULL);
}
@@ -151,8 +153,10 @@ nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
void *map = pipe_buffer_map(pscreen, mt->buffer,
pipe_transfer_buffer_flags(ptx));
- return map + ns->base.offset +
- ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
+ if(!tx->direct)
+ return map + ns->base.offset;
+ else
+ return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
}
static void
diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c
index af3fcf6a34..a777898f68 100644
--- a/src/gallium/drivers/nv40/nv40_vbo.c
+++ b/src/gallium/drivers/nv40/nv40_vbo.c
@@ -164,18 +164,21 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so,
return TRUE;
}
-boolean
+void
nv40_draw_arrays(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
struct nv40_context *nv40 = nv40_context(pipe);
- struct nouveau_channel *chan = nv40->screen->base.channel;
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
unsigned restart;
nv40_vbo_set_idxbuf(nv40, NULL, 0);
if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
- return nv40_draw_elements_swtnl(pipe, NULL, 0,
- mode, start, count);
+ nv40_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);
+ return;
}
while (count) {
@@ -186,17 +189,17 @@ nv40_draw_arrays(struct pipe_context *pipe,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
mode, start, count, &restart);
if (!vc) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
nr = (vc & 0xff);
if (nr) {
- BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1);
- OUT_RING (((nr - 1) << 24) | start);
+ BEGIN_RING(chan, curie, NV40TCL_VB_VERTEX_BATCH, 1);
+ OUT_RING (chan, ((nr - 1) << 24) | start);
start += nr;
}
@@ -206,29 +209,30 @@ nv40_draw_arrays(struct pipe_context *pipe,
nr -= push;
- BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push);
+ BEGIN_RING_NI(chan, curie, NV40TCL_VB_VERTEX_BATCH, push);
while (push--) {
- OUT_RING(((0x100 - 1) << 24) | start);
+ OUT_RING(chan, ((0x100 - 1) << 24) | start);
start += 0x100;
}
}
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, 0);
count -= vc;
start = restart;
}
pipe->flush(pipe, 0, NULL);
- return TRUE;
}
static INLINE void
nv40_draw_elements_u08(struct nv40_context *nv40, void *ib,
unsigned mode, unsigned start, unsigned count)
{
- struct nouveau_channel *chan = nv40->screen->base.channel;
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
while (count) {
uint8_t *elts = (uint8_t *)ib + start;
@@ -239,17 +243,17 @@ nv40_draw_elements_u08(struct nv40_context *nv40, void *ib,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
mode, start, count, &restart);
if (vc == 0) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
count -= vc;
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
if (vc & 1) {
- BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1);
- OUT_RING (elts[0]);
+ BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (chan, elts[0]);
elts++; vc--;
}
@@ -258,16 +262,16 @@ nv40_draw_elements_u08(struct nv40_context *nv40, void *ib,
push = MIN2(vc, 2047 * 2);
- BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
+ BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
- OUT_RING((elts[i+1] << 16) | elts[i]);
+ OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
vc -= push;
elts += push;
}
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, 0);
start = restart;
}
@@ -277,7 +281,9 @@ static INLINE void
nv40_draw_elements_u16(struct nv40_context *nv40, void *ib,
unsigned mode, unsigned start, unsigned count)
{
- struct nouveau_channel *chan = nv40->screen->base.channel;
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
while (count) {
uint16_t *elts = (uint16_t *)ib + start;
@@ -288,17 +294,17 @@ nv40_draw_elements_u16(struct nv40_context *nv40, void *ib,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
mode, start, count, &restart);
if (vc == 0) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
count -= vc;
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
if (vc & 1) {
- BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1);
- OUT_RING (elts[0]);
+ BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (chan, elts[0]);
elts++; vc--;
}
@@ -307,16 +313,16 @@ nv40_draw_elements_u16(struct nv40_context *nv40, void *ib,
push = MIN2(vc, 2047 * 2);
- BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
+ BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
- OUT_RING((elts[i+1] << 16) | elts[i]);
+ OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
vc -= push;
elts += push;
}
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, 0);
start = restart;
}
@@ -326,7 +332,9 @@ static INLINE void
nv40_draw_elements_u32(struct nv40_context *nv40, void *ib,
unsigned mode, unsigned start, unsigned count)
{
- struct nouveau_channel *chan = nv40->screen->base.channel;
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
while (count) {
uint32_t *elts = (uint32_t *)ib + start;
@@ -337,32 +345,32 @@ nv40_draw_elements_u32(struct nv40_context *nv40, void *ib,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1,
mode, start, count, &restart);
if (vc == 0) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
count -= vc;
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
while (vc) {
push = MIN2(vc, 2047);
- BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push);
- OUT_RINGp (elts, push);
+ BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U32, push);
+ OUT_RINGp (chan, elts, push);
vc -= push;
elts += push;
}
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, 0);
start = restart;
}
}
-static boolean
+static void
nv40_draw_elements_inline(struct pipe_context *pipe,
struct pipe_buffer *ib, unsigned ib_size,
unsigned mode, unsigned start, unsigned count)
@@ -393,15 +401,16 @@ nv40_draw_elements_inline(struct pipe_context *pipe,
}
pipe_buffer_unmap(pscreen, ib);
- return TRUE;
}
-static boolean
+static void
nv40_draw_elements_vbo(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
struct nv40_context *nv40 = nv40_context(pipe);
- struct nouveau_channel *chan = nv40->screen->base.channel;
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
unsigned restart;
while (count) {
@@ -412,17 +421,17 @@ nv40_draw_elements_vbo(struct pipe_context *pipe,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
mode, start, count, &restart);
if (!vc) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
nr = (vc & 0xff);
if (nr) {
- BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1);
- OUT_RING (((nr - 1) << 24) | start);
+ BEGIN_RING(chan, curie, NV40TCL_VB_INDEX_BATCH, 1);
+ OUT_RING (chan, ((nr - 1) << 24) | start);
start += nr;
}
@@ -432,24 +441,22 @@ nv40_draw_elements_vbo(struct pipe_context *pipe,
nr -= push;
- BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push);
+ BEGIN_RING_NI(chan, curie, NV40TCL_VB_INDEX_BATCH, push);
while (push--) {
- OUT_RING(((0x100 - 1) << 24) | start);
+ OUT_RING(chan, ((0x100 - 1) << 24) | start);
start += 0x100;
}
}
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, 0);
count -= vc;
start = restart;
}
-
- return TRUE;
}
-boolean
+void
nv40_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer, unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
@@ -459,8 +466,9 @@ nv40_draw_elements(struct pipe_context *pipe,
idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize);
if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
- return nv40_draw_elements_swtnl(pipe, NULL, 0,
- mode, start, count);
+ nv40_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);
+ return;
}
if (idxbuf) {
@@ -471,7 +479,6 @@ nv40_draw_elements(struct pipe_context *pipe,
}
pipe->flush(pipe, 0, NULL);
- return TRUE;
}
static boolean
@@ -484,9 +491,9 @@ nv40_vbo_validate(struct nv40_context *nv40)
unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
int hw;
- vtxbuf = so_new(20, 18);
+ vtxbuf = so_new(3, 17, 18);
so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr);
- vtxfmt = so_new(17, 0);
+ vtxfmt = so_new(1, 16, 0);
so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr);
for (hw = 0; hw < nv40->vtxelt_nr; hw++) {
@@ -499,7 +506,7 @@ nv40_vbo_validate(struct nv40_context *nv40)
if (!vb->stride) {
if (!sattr)
- sattr = so_new(16 * 5, 0);
+ sattr = so_new(16, 16 * 4, 0);
if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) {
so_data(vtxbuf, 0);
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index d9fc31006f..8d80fcad38 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -834,7 +834,9 @@ static boolean
nv40_vertprog_validate(struct nv40_context *nv40)
{
struct pipe_screen *pscreen = nv40->pipe.screen;
- struct nouveau_grobj *curie = nv40->screen->curie;
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
struct nv40_vertex_program *vp;
struct pipe_buffer *constbuf;
boolean upload_code = FALSE, upload_data = FALSE;
@@ -884,7 +886,7 @@ check_gpu_resources:
assert(0);
}
- so = so_new(7, 0);
+ so = so_new(3, 4, 0);
so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1);
so_data (so, vp->exec->start);
so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2);
@@ -974,9 +976,9 @@ check_gpu_resources:
4 * sizeof(float));
}
- BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5);
- OUT_RING (i + vp->data->start);
- OUT_RINGp ((uint32_t *)vpd->value, 4);
+ BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_CONST_ID, 5);
+ OUT_RING (chan, i + vp->data->start);
+ OUT_RINGp (chan, (uint32_t *)vpd->value, 4);
}
if (constbuf)
@@ -993,11 +995,11 @@ check_gpu_resources:
NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]);
}
#endif
- BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1);
- OUT_RING (vp->exec->start);
+ BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_FROM_ID, 1);
+ OUT_RING (chan, vp->exec->start);
for (i = 0; i < vp->nr_insns; i++) {
- BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4);
- OUT_RINGp (vp->insns[i].data, 4);
+ BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_INST(0), 4);
+ OUT_RINGp (chan, vp->insns[i].data, 4);
}
}
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index d21b80eab8..5997456e4c 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -43,6 +43,39 @@ nv50_destroy(struct pipe_context *pipe)
{
struct nv50_context *nv50 = nv50_context(pipe);
+ if (nv50->state.fb)
+ so_ref(NULL, &nv50->state.fb);
+ if (nv50->state.blend)
+ so_ref(NULL, &nv50->state.blend);
+ if (nv50->state.blend_colour)
+ so_ref(NULL, &nv50->state.blend_colour);
+ if (nv50->state.zsa)
+ so_ref(NULL, &nv50->state.zsa);
+ if (nv50->state.rast)
+ so_ref(NULL, &nv50->state.rast);
+ if (nv50->state.stipple)
+ so_ref(NULL, &nv50->state.stipple);
+ if (nv50->state.scissor)
+ so_ref(NULL, &nv50->state.scissor);
+ if (nv50->state.viewport)
+ so_ref(NULL, &nv50->state.viewport);
+ if (nv50->state.tsc_upload)
+ so_ref(NULL, &nv50->state.tsc_upload);
+ if (nv50->state.tic_upload)
+ so_ref(NULL, &nv50->state.tic_upload);
+ if (nv50->state.vertprog)
+ so_ref(NULL, &nv50->state.vertprog);
+ if (nv50->state.fragprog)
+ so_ref(NULL, &nv50->state.fragprog);
+ if (nv50->state.programs)
+ so_ref(NULL, &nv50->state.programs);
+ if (nv50->state.vtxfmt)
+ so_ref(NULL, &nv50->state.vtxfmt);
+ if (nv50->state.vtxbuf)
+ so_ref(NULL, &nv50->state.vtxbuf);
+ if (nv50->state.vtxattr)
+ so_ref(NULL, &nv50->state.vtxattr);
+
draw_destroy(nv50->draw);
FREE(nv50);
}
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 5578a5838f..cbd4c3ff86 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -191,9 +191,9 @@ nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst,
extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50);
/* nv50_vbo.c */
-extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv50_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv50_draw_elements(struct pipe_context *pipe,
+extern void nv50_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start,
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 3f1edf0a13..cecb1efc90 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -145,7 +145,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
mt->level[0].tile_mode, tile_flags,
&mt->base.bo);
if (ret) {
- for (l = 0; l < pt->last_level; ++l)
+ for (l = 0; l <= pt->last_level; ++l)
FREE(mt->level[l].image_offset);
FREE(mt);
return NULL;
@@ -188,7 +188,7 @@ nv50_miptree_destroy(struct pipe_texture *pt)
struct nv50_miptree *mt = nv50_miptree(pt);
unsigned l;
- for (l = 0; l < pt->last_level; ++l)
+ for (l = 0; l <= pt->last_level; ++l)
FREE(mt->level[l].image_offset);
nouveau_bo_ref(NULL, &mt->base.bo);
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 679c28ce4b..069f815938 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -96,7 +96,11 @@ struct nv50_reg {
#define NV50_MOD_NEG 1
#define NV50_MOD_ABS 2
+#define NV50_MOD_NEG_ABS (NV50_MOD_NEG | NV50_MOD_ABS)
#define NV50_MOD_SAT 4
+#define NV50_MOD_I32 8
+
+/* NV50_MOD_I32 is used to indicate integer mode for neg/abs */
/* STACK: Conditionals and loops have to use the (per warp) stack.
* Stack entries consist of an entry type (divergent path, join at),
@@ -134,6 +138,7 @@ struct nv50_pc {
uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */
struct nv50_reg *temp_temp[16];
+ struct nv50_program_exec *temp_temp_exec[16];
unsigned temp_temp_nr;
/* broadcast and destination replacement regs */
@@ -154,26 +159,17 @@ struct nv50_pc {
int if_lvl, loop_lvl;
unsigned loop_pos[NV50_MAX_LOOP_NESTING];
+ unsigned *insn_pos; /* actual program offset of each TGSI insn */
+ boolean in_subroutine;
+
/* current instruction and total number of insns */
unsigned insn_cur;
unsigned insn_nr;
boolean allow32;
-};
-
-static INLINE struct nv50_reg *
-reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
-{
- struct nv50_reg *ri;
- assert(pc->reg_instance_nr < 16);
- ri = &pc->reg_instances[pc->reg_instance_nr++];
- if (reg) {
- *ri = *reg;
- reg->mod = 0;
- }
- return ri;
-}
+ uint8_t edgeflag_out;
+};
static INLINE void
ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
@@ -250,7 +246,23 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
}
}
- assert(0);
+ NOUVEAU_ERR("out of registers\n");
+ abort();
+}
+
+static INLINE struct nv50_reg *
+reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
+{
+ struct nv50_reg *ri;
+
+ assert(pc->reg_instance_nr < 16);
+ ri = &pc->reg_instances[pc->reg_instance_nr++];
+ if (reg) {
+ alloc_reg(pc, reg);
+ *ri = *reg;
+ reg->mod = 0;
+ }
+ return ri;
}
/* XXX: For shaders that aren't executed linearly (e.g. shaders that
@@ -275,26 +287,11 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
}
}
- assert(0);
+ NOUVEAU_ERR("out of registers\n");
+ abort();
return NULL;
}
-/* Assign the hw of the discarded temporary register src
- * to the tgsi register dst and free src.
- */
-static void
-assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
- assert(src->index == -1 && src->hw != -1);
-
- if (dst->hw != -1)
- pc->r_temp[dst->hw] = NULL;
- pc->r_temp[src->hw] = dst;
- dst->hw = src->hw;
-
- FREE(src);
-}
-
/* release the hardware resource held by r */
static void
release_hw(struct nv50_pc *pc, struct nv50_reg *r)
@@ -353,23 +350,29 @@ free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4])
}
static struct nv50_reg *
-temp_temp(struct nv50_pc *pc)
+temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e)
{
if (pc->temp_temp_nr >= 16)
assert(0);
pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
+ pc->temp_temp_exec[pc->temp_temp_nr] = e;
return pc->temp_temp[pc->temp_temp_nr++];
}
+/* This *must* be called for all nv50_program_exec that have been
+ * given as argument to temp_temp, or the temps will be leaked !
+ */
static void
-kill_temp_temp(struct nv50_pc *pc)
+kill_temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e)
{
int i;
for (i = 0; i < pc->temp_temp_nr; i++)
- free_temp(pc, pc->temp_temp[i]);
- pc->temp_temp_nr = 0;
+ if (pc->temp_temp_exec[i] == e)
+ free_temp(pc, pc->temp_temp[i]);
+ if (!e)
+ pc->temp_temp_nr = 0;
}
static int
@@ -431,6 +434,8 @@ emit(struct nv50_pc *pc, struct nv50_program_exec *e)
p->exec_head = e;
p->exec_tail = e;
p->exec_size += (e->inst[0] & 1) ? 2 : 1;
+
+ kill_temp_temp(pc, e);
}
static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *);
@@ -451,10 +456,19 @@ is_immd(struct nv50_program_exec *e)
return FALSE;
}
+static boolean
+is_join(struct nv50_program_exec *e)
+{
+ if (is_long(e) && (e->inst[1] & 3) == 2)
+ return TRUE;
+ return FALSE;
+}
+
static INLINE void
set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
struct nv50_program_exec *e)
{
+ assert(!is_immd(e));
set_long(pc, e);
e->inst[1] &= ~((0x1f << 7) | (0x3 << 12));
e->inst[1] |= (pred << 7) | (idx << 12);
@@ -497,15 +511,6 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
static INLINE void
set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
{
- union {
- float f;
- uint32_t ui;
- } u;
- u.ui = pc->immd_buf[imm->hw];
-
- u.f = (imm->mod & NV50_MOD_ABS) ? fabsf(u.f) : u.f;
- u.f = (imm->mod & NV50_MOD_NEG) ? -u.f : u.f;
-
set_long(pc, e);
/* XXX: can't be predicated - bits overlap; cases where both
* are required should be avoided by using pc->allow32 */
@@ -513,8 +518,8 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
set_pred_wr(pc, 0, 0, e);
e->inst[1] |= 0x00000002 | 0x00000001;
- e->inst[0] |= (u.ui & 0x3f) << 16;
- e->inst[1] |= (u.ui >> 6) << 2;
+ e->inst[0] |= (pc->immd_buf[imm->hw] & 0x3f) << 16;
+ e->inst[1] |= (pc->immd_buf[imm->hw] >> 6) << 2;
}
static INLINE void
@@ -663,6 +668,7 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22);
}
+/* Never apply nv50_reg::mod in emit_mov, or carefully check the code !!! */
static void
emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
@@ -715,6 +721,34 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f)
FREE(imm);
}
+/* Assign the hw of the discarded temporary register src
+ * to the tgsi register dst and free src.
+ */
+static void
+assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ assert(src->index == -1 && src->hw != -1);
+
+ if (pc->if_lvl || pc->loop_lvl ||
+ (dst->type != P_TEMP) ||
+ (src->hw < pc->result_nr * 4 &&
+ pc->p->type == PIPE_SHADER_FRAGMENT) ||
+ pc->p->info.opcode_count[TGSI_OPCODE_CAL] ||
+ pc->p->info.opcode_count[TGSI_OPCODE_BRA]) {
+
+ emit_mov(pc, dst, src);
+ free_temp(pc, src);
+ return;
+ }
+
+ if (dst->hw != -1)
+ pc->r_temp[dst->hw] = NULL;
+ pc->r_temp[src->hw] = dst;
+ dst->hw = src->hw;
+
+ FREE(src);
+}
+
static void
emit_nop(struct nv50_pc *pc)
{
@@ -757,7 +791,7 @@ set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src,
struct nv50_reg *temp;
if (src->type != P_TEMP) {
- temp = temp_temp(pc);
+ temp = temp_temp(pc, e);
emit_mov(pc, temp, src);
src = temp;
}
@@ -776,7 +810,7 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
e->inst[1] |= 0x00200000;
} else
if (src->type == P_CONST || src->type == P_IMMD) {
- struct nv50_reg *temp = temp_temp(pc);
+ struct nv50_reg *temp = temp_temp(pc, e);
emit_mov(pc, temp, src);
src = temp;
@@ -792,7 +826,7 @@ static void
set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
{
if (src->type == P_ATTR) {
- struct nv50_reg *temp = temp_temp(pc);
+ struct nv50_reg *temp = temp_temp(pc, e);
emit_mov(pc, temp, src);
src = temp;
@@ -800,7 +834,7 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
if (src->type == P_CONST || src->type == P_IMMD) {
assert(!(e->inst[0] & 0x00800000));
if (e->inst[0] & 0x01000000) {
- struct nv50_reg *temp = temp_temp(pc);
+ struct nv50_reg *temp = temp_temp(pc, e);
emit_mov(pc, temp, src);
src = temp;
@@ -822,7 +856,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
set_long(pc, e);
if (src->type == P_ATTR) {
- struct nv50_reg *temp = temp_temp(pc);
+ struct nv50_reg *temp = temp_temp(pc, e);
emit_mov(pc, temp, src);
src = temp;
@@ -830,7 +864,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
if (src->type == P_CONST || src->type == P_IMMD) {
assert(!(e->inst[0] & 0x01000000));
if (e->inst[0] & 0x00800000) {
- struct nv50_reg *temp = temp_temp(pc);
+ struct nv50_reg *temp = temp_temp(pc, e);
emit_mov(pc, temp, src);
src = temp;
@@ -845,6 +879,26 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
}
static void
+set_half_src(struct nv50_pc *pc, struct nv50_reg *src, int lh,
+ struct nv50_program_exec *e, int pos)
+{
+ struct nv50_reg *r = src;
+
+ alloc_reg(pc, r);
+ if (r->type != P_TEMP) {
+ r = temp_temp(pc, e);
+ emit_mov(pc, r, src);
+ }
+
+ if (r->hw > (NV50_SU_MAX_TEMP / 2)) {
+ NOUVEAU_ERR("out of low GPRs\n");
+ abort();
+ }
+
+ e->inst[pos / 32] |= ((src->hw * 2) + lh) << (pos % 32);
+}
+
+static void
emit_mov_from_pred(struct nv50_pc *pc, struct nv50_reg *dst, int pred)
{
struct nv50_program_exec *e = exec(pc);
@@ -886,7 +940,7 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
set_dst(pc, dst, e);
set_src_0(pc, src0, e);
if (src1->type == P_IMMD && !is_long(e)) {
- if (src0->mod & NV50_MOD_NEG)
+ if (src0->mod ^ src1->mod)
e->inst[0] |= 0x00008000;
set_immd(pc, src1, e);
} else {
@@ -948,6 +1002,13 @@ emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
emit(pc, e);
}
+#define NV50_MAX_F32 0x880
+#define NV50_MAX_S32 0x08c
+#define NV50_MAX_U32 0x084
+#define NV50_MIN_F32 0x8a0
+#define NV50_MIN_S32 0x0ac
+#define NV50_MIN_U32 0x0a4
+
static void
emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
struct nv50_reg *src0, struct nv50_reg *src1)
@@ -955,8 +1016,8 @@ emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
struct nv50_program_exec *e = exec(pc);
set_long(pc, e);
- e->inst[0] |= 0xb0000000;
- e->inst[1] |= (sub << 29);
+ e->inst[0] |= 0x30000000 | ((sub & 0x800) << 20);
+ e->inst[1] |= (sub << 24);
check_swap_src_0_1(pc, &src0, &src1);
set_dst(pc, dst, e);
@@ -997,6 +1058,8 @@ emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
op != TGSI_OPCODE_XOR)
assert(!"invalid bit op");
+ assert(!(src0->mod | src1->mod));
+
if (src1->type == P_IMMD && src0->type == P_TEMP && pc->allow32) {
set_immd(pc, src1, e);
if (op == TGSI_OPCODE_OR)
@@ -1018,6 +1081,69 @@ emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
}
static void
+emit_not(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xd0000000;
+ e->inst[1] = 0x0402c000;
+ set_long(pc, e);
+ set_dst(pc, dst, e);
+ set_src_1(pc, src, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_shift(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src0, struct nv50_reg *src1, unsigned dir)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0x30000000;
+ e->inst[1] = 0xc4000000;
+
+ set_long(pc, e);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+
+ if (src1->type == P_IMMD) {
+ e->inst[1] |= (1 << 20);
+ e->inst[0] |= (pc->immd_buf[src1->hw] & 0x7f) << 16;
+ } else
+ set_src_1(pc, src1, e);
+
+ if (dir != TGSI_OPCODE_SHL)
+ e->inst[1] |= (1 << 29);
+
+ if (dir == TGSI_OPCODE_ISHR)
+ e->inst[1] |= (1 << 27);
+
+ emit(pc, e);
+}
+
+static void
+emit_shl_imm(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src, int s)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0x30000000;
+ e->inst[1] = 0xc4100000;
+ if (s < 0) {
+ e->inst[1] |= 1 << 29;
+ s = -s;
+ }
+ e->inst[1] |= ((s & 0x7f) << 16);
+
+ set_long(pc, e);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+
+ emit(pc, e);
+}
+
+static void
emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
struct nv50_reg *src1, struct nv50_reg *src2)
{
@@ -1048,6 +1174,14 @@ emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
src2->mod ^= NV50_MOD_NEG;
}
+#define NV50_FLOP_RCP 0
+#define NV50_FLOP_RSQ 2
+#define NV50_FLOP_LG2 3
+#define NV50_FLOP_SIN 4
+#define NV50_FLOP_COS 5
+#define NV50_FLOP_EX2 6
+
+/* rcp, rsqrt, lg2 support neg and abs */
static void
emit_flop(struct nv50_pc *pc, unsigned sub,
struct nv50_reg *dst, struct nv50_reg *src)
@@ -1055,17 +1189,20 @@ emit_flop(struct nv50_pc *pc, unsigned sub,
struct nv50_program_exec *e = exec(pc);
e->inst[0] |= 0x90000000;
- if (sub) {
+ if (sub || src->mod) {
set_long(pc, e);
e->inst[1] |= (sub << 29);
}
set_dst(pc, dst, e);
+ set_src_0_restricted(pc, src, e);
- if (sub == 0 || sub == 2)
- set_src_0_restricted(pc, src, e);
- else
- set_src_0(pc, src, e);
+ assert(!src->mod || sub < 4);
+
+ if (src->mod & NV50_MOD_NEG)
+ e->inst[1] |= 0x04000000;
+ if (src->mod & NV50_MOD_ABS)
+ e->inst[1] |= 0x00100000;
emit(pc, e);
}
@@ -1082,6 +1219,11 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
set_long(pc, e);
e->inst[1] |= (6 << 29) | 0x00004000;
+ if (src->mod & NV50_MOD_NEG)
+ e->inst[1] |= 0x04000000;
+ if (src->mod & NV50_MOD_ABS)
+ e->inst[1] |= 0x00100000;
+
emit(pc, e);
}
@@ -1097,39 +1239,49 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
set_long(pc, e);
e->inst[1] |= (6 << 29);
+ if (src->mod & NV50_MOD_NEG)
+ e->inst[1] |= 0x04000000;
+ if (src->mod & NV50_MOD_ABS)
+ e->inst[1] |= 0x00100000;
+
emit(pc, e);
}
-#define CVTOP_RN 0x01
-#define CVTOP_FLOOR 0x03
-#define CVTOP_CEIL 0x05
-#define CVTOP_TRUNC 0x07
-#define CVTOP_SAT 0x08
-#define CVTOP_ABS 0x10
-
-/* 0x04 == 32 bit dst */
-/* 0x40 == dst is float */
-/* 0x80 == src is float */
-#define CVT_F32_F32 0xc4
-#define CVT_F32_S32 0x44
-#define CVT_S32_F32 0x8c
-#define CVT_S32_S32 0x0c
-#define CVT_NEG 0x20
-#define CVT_RI 0x08
+#define CVT_RN (0x00 << 16)
+#define CVT_FLOOR (0x02 << 16)
+#define CVT_CEIL (0x04 << 16)
+#define CVT_TRUNC (0x06 << 16)
+#define CVT_SAT (0x08 << 16)
+#define CVT_ABS (0x10 << 16)
+
+#define CVT_X32_X32 0x04004000
+#define CVT_X32_S32 0x04014000
+#define CVT_F32_F32 ((0xc0 << 24) | CVT_X32_X32)
+#define CVT_S32_F32 ((0x88 << 24) | CVT_X32_X32)
+#define CVT_U32_F32 ((0x80 << 24) | CVT_X32_X32)
+#define CVT_F32_S32 ((0x40 << 24) | CVT_X32_S32)
+#define CVT_F32_U32 ((0x40 << 24) | CVT_X32_X32)
+#define CVT_S32_S32 ((0x08 << 24) | CVT_X32_S32)
+#define CVT_S32_U32 ((0x08 << 24) | CVT_X32_X32)
+#define CVT_U32_S32 ((0x00 << 24) | CVT_X32_S32)
+
+#define CVT_NEG 0x20000000
+#define CVT_RI 0x08000000
static void
emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
- int wp, unsigned cvn, unsigned fmt)
+ int wp, uint32_t cvn)
{
struct nv50_program_exec *e;
e = exec(pc);
- set_long(pc, e);
- e->inst[0] |= 0xa0000000;
- e->inst[1] |= 0x00004000; /* 32 bit src */
- e->inst[1] |= (cvn << 16);
- e->inst[1] |= (fmt << 24);
+ if (src->mod & NV50_MOD_NEG) cvn |= CVT_NEG;
+ if (src->mod & NV50_MOD_ABS) cvn |= CVT_ABS;
+
+ e->inst[0] = 0xa0000000;
+ e->inst[1] = cvn;
+ set_long(pc, e);
set_src_0(pc, src, e);
if (wp >= 0)
@@ -1154,10 +1306,12 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
* 0x6 = GE
* 0x7 = set condition code ? (used before bra.lt/le/gt/ge)
* 0x8 = unordered bit (allows NaN)
+ *
+ * mode = 0x04 (u32), 0x0c (s32), 0x80 (f32)
*/
static void
emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp,
- struct nv50_reg *src0, struct nv50_reg *src1)
+ struct nv50_reg *src0, struct nv50_reg *src1, uint8_t mode)
{
static const unsigned cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
@@ -1172,16 +1326,10 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp,
if (dst && dst->type != P_TEMP)
dst = alloc_temp(pc, NULL);
- /* set.u32 */
set_long(pc, e);
- e->inst[0] |= 0xb0000000;
+ e->inst[0] |= 0x30000000 | (mode << 24);
e->inst[1] |= 0x60000000 | (ccode << 14);
- /* XXX: decuda will disasm as .u16 and use .lo/.hi regs, but
- * that doesn't seem to match what the hw actually does
- e->inst[1] |= 0x04000000; << breaks things, u32 by default ?
- */
-
if (wp >= 0)
set_pred_wr(pc, 1, wp, e);
if (dst)
@@ -1196,33 +1344,146 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp,
emit(pc, e);
- /* cvt.f32.u32/s32 (?) if we didn't only write the predicate */
- if (rdst)
- emit_cvt(pc, rdst, dst, -1, CVTOP_ABS | CVTOP_RN, CVT_F32_S32);
+ if (rdst && mode == 0x80) /* convert to float ? */
+ emit_cvt(pc, rdst, dst, -1, CVT_ABS | CVT_F32_S32);
if (rdst && rdst != dst)
free_temp(pc, dst);
}
-static INLINE unsigned
-map_tgsi_setop_cc(unsigned op)
+static INLINE void
+map_tgsi_setop_hw(unsigned op, uint8_t *cc, uint8_t *ty)
{
switch (op) {
- case TGSI_OPCODE_SLT: return 0x1;
- case TGSI_OPCODE_SGE: return 0x6;
- case TGSI_OPCODE_SEQ: return 0x2;
- case TGSI_OPCODE_SGT: return 0x4;
- case TGSI_OPCODE_SLE: return 0x3;
- case TGSI_OPCODE_SNE: return 0xd;
+ case TGSI_OPCODE_SLT: *cc = 0x1; *ty = 0x80; break;
+ case TGSI_OPCODE_SGE: *cc = 0x6; *ty = 0x80; break;
+ case TGSI_OPCODE_SEQ: *cc = 0x2; *ty = 0x80; break;
+ case TGSI_OPCODE_SGT: *cc = 0x4; *ty = 0x80; break;
+ case TGSI_OPCODE_SLE: *cc = 0x3; *ty = 0x80; break;
+ case TGSI_OPCODE_SNE: *cc = 0xd; *ty = 0x80; break;
+
+ case TGSI_OPCODE_ISLT: *cc = 0x1; *ty = 0x0c; break;
+ case TGSI_OPCODE_ISGE: *cc = 0x6; *ty = 0x0c; break;
+ case TGSI_OPCODE_USEQ: *cc = 0x2; *ty = 0x04; break;
+ case TGSI_OPCODE_USGE: *cc = 0x6; *ty = 0x04; break;
+ case TGSI_OPCODE_USLT: *cc = 0x1; *ty = 0x04; break;
+ case TGSI_OPCODE_USNE: *cc = 0x5; *ty = 0x04; break;
default:
assert(0);
- return 0;
+ return;
}
}
+static void
+emit_add_b32(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src0, struct nv50_reg *rsrc1)
+{
+ struct nv50_program_exec *e = exec(pc);
+ struct nv50_reg *src1;
+
+ e->inst[0] = 0x20000000;
+
+ alloc_reg(pc, rsrc1);
+ check_swap_src_0_1(pc, &src0, &rsrc1);
+
+ src1 = rsrc1;
+ if (src0->mod & rsrc1->mod & NV50_MOD_NEG) {
+ src1 = temp_temp(pc, e);
+ emit_cvt(pc, src1, rsrc1, -1, CVT_S32_S32);
+ }
+
+ if (!pc->allow32 || src1->hw > 63 ||
+ (src1->type != P_TEMP && src1->type != P_IMMD))
+ set_long(pc, e);
+
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+
+ if (is_long(e)) {
+ e->inst[1] |= 1 << 26;
+ set_src_2(pc, src1, e);
+ } else {
+ e->inst[0] |= 0x8000;
+ if (src1->type == P_IMMD)
+ set_immd(pc, src1, e);
+ else
+ set_src_1(pc, src1, e);
+ }
+
+ if (src0->mod & NV50_MOD_NEG)
+ e->inst[0] |= 1 << 28;
+ else
+ if (src1->mod & NV50_MOD_NEG)
+ e->inst[0] |= 1 << 22;
+
+ emit(pc, e);
+}
+
+static void
+emit_mad_u16(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1,
+ struct nv50_reg *src2)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0x60000000;
+ if (!pc->allow32)
+ set_long(pc, e);
+ set_dst(pc, dst, e);
+
+ set_half_src(pc, src0, lh_0, e, 9);
+ set_half_src(pc, src1, lh_1, e, 16);
+ alloc_reg(pc, src2);
+ if (is_long(e) || (src2->type != P_TEMP) || (src2->hw != dst->hw))
+ set_src_2(pc, src2, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_mul_u16(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0x40000000;
+ set_long(pc, e);
+ set_dst(pc, dst, e);
+
+ set_half_src(pc, src0, lh_0, e, 9);
+ set_half_src(pc, src1, lh_1, e, 16);
+
+ emit(pc, e);
+}
+
+static void
+emit_sad(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0x50000000;
+ if (!pc->allow32)
+ set_long(pc, e);
+ check_swap_src_0_1(pc, &src0, &src1);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_1(pc, src1, e);
+ alloc_reg(pc, src2);
+ if (is_long(e) || (src2->type != dst->type) || (src2->hw != dst->hw))
+ set_src_2(pc, src2, e);
+
+ if (is_long(e))
+ e->inst[1] |= 0x0c << 24;
+ else
+ e->inst[0] |= 0x81 << 8;
+
+ emit(pc, e);
+}
+
static INLINE void
emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
- emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32 | CVT_RI);
+ emit_cvt(pc, dst, src, -1, CVT_FLOOR | CVT_F32_F32 | CVT_RI);
}
static void
@@ -1231,24 +1492,18 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
{
struct nv50_reg *temp = alloc_temp(pc, NULL);
- emit_flop(pc, 3, temp, v);
+ emit_flop(pc, NV50_FLOP_LG2, temp, v);
emit_mul(pc, temp, temp, e);
emit_preex2(pc, temp, temp);
- emit_flop(pc, 6, dst, temp);
+ emit_flop(pc, NV50_FLOP_EX2, dst, temp);
free_temp(pc, temp);
}
static INLINE void
-emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
- emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32);
-}
-
-static INLINE void
emit_sat(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
- emit_cvt(pc, dst, src, -1, CVTOP_SAT, CVT_F32_F32);
+ emit_cvt(pc, dst, src, -1, CVT_SAT | CVT_F32_F32);
}
static void
@@ -1266,18 +1521,18 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
if (mask & (3 << 1)) {
tmp[0] = alloc_temp(pc, NULL);
- emit_minmax(pc, 4, tmp[0], src[0], zero);
+ emit_minmax(pc, NV50_MAX_F32, tmp[0], src[0], zero);
}
if (mask & (1 << 2)) {
set_pred_wr(pc, 1, 0, pc->p->exec_tail);
- tmp[1] = temp_temp(pc);
- emit_minmax(pc, 4, tmp[1], src[1], zero);
+ tmp[1] = temp_temp(pc, NULL);
+ emit_minmax(pc, NV50_MAX_F32, tmp[1], src[1], zero);
- tmp[3] = temp_temp(pc);
- emit_minmax(pc, 4, tmp[3], src[3], neg128);
- emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
+ tmp[3] = temp_temp(pc, NULL);
+ emit_minmax(pc, NV50_MAX_F32, tmp[3], src[3], neg128);
+ emit_minmax(pc, NV50_MIN_F32, tmp[3], tmp[3], pos128);
emit_pow(pc, dst[2], tmp[1], tmp[3]);
emit_mov(pc, dst[2], zero);
@@ -1305,12 +1560,6 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
FREE(one);
}
-static INLINE void
-emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
- emit_cvt(pc, dst, src, -1, CVTOP_RN, CVT_F32_F32 | CVT_NEG);
-}
-
static void
emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
{
@@ -1322,80 +1571,62 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
set_long(pc, e); /* sets cond code to ALWAYS */
if (src) {
- unsigned cvn = CVT_F32_F32;
-
set_pred(pc, 0x1 /* cc = LT */, r_pred, e);
-
- if (src->mod & NV50_MOD_NEG)
- cvn |= CVT_NEG;
- /* write predicate reg */
- emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn);
+ /* write to predicate reg */
+ emit_cvt(pc, NULL, src, r_pred, CVT_F32_F32);
}
emit(pc, e);
}
static struct nv50_program_exec *
-emit_breakaddr(struct nv50_pc *pc)
+emit_control_flow(struct nv50_pc *pc, unsigned op, int pred, unsigned cc)
{
struct nv50_program_exec *e = exec(pc);
- e->inst[0] = 0x40000002;
+ e->inst[0] = (op << 28) | 2;
set_long(pc, e);
+ if (pred >= 0)
+ set_pred(pc, cc, pred, e);
emit(pc, e);
return e;
}
-static void
-emit_break(struct nv50_pc *pc, int pred, unsigned cc)
+static INLINE struct nv50_program_exec *
+emit_breakaddr(struct nv50_pc *pc)
{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0x50000002;
- set_long(pc, e);
- if (pred >= 0)
- set_pred(pc, cc, pred, e);
+ return emit_control_flow(pc, 0x4, -1, 0);
+}
- emit(pc, e);
+static INLINE void
+emit_break(struct nv50_pc *pc, int pred, unsigned cc)
+{
+ emit_control_flow(pc, 0x5, pred, cc);
}
-static struct nv50_program_exec *
+static INLINE struct nv50_program_exec *
emit_joinat(struct nv50_pc *pc)
{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0xa0000002;
- set_long(pc, e);
-
- emit(pc, e);
- return e;
+ return emit_control_flow(pc, 0xa, -1, 0);
}
-static struct nv50_program_exec *
+static INLINE struct nv50_program_exec *
emit_branch(struct nv50_pc *pc, int pred, unsigned cc)
{
- struct nv50_program_exec *e = exec(pc);
+ return emit_control_flow(pc, 0x1, pred, cc);
+}
- e->inst[0] = 0x10000002;
- set_long(pc, e);
- if (pred >= 0)
- set_pred(pc, cc, pred, e);
- emit(pc, e);
- return pc->p->exec_tail;
+static INLINE struct nv50_program_exec *
+emit_call(struct nv50_pc *pc, int pred, unsigned cc)
+{
+ return emit_control_flow(pc, 0x2, pred, cc);
}
-static void
+static INLINE void
emit_ret(struct nv50_pc *pc, int pred, unsigned cc)
{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0x30000002;
- set_long(pc, e);
- if (pred >= 0)
- set_pred(pc, cc, pred, e);
-
- emit(pc, e);
+ emit_control_flow(pc, 0x3, pred, cc);
}
#define QOP_ADD 0
@@ -1445,8 +1676,8 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
src[1]->mod |= NV50_MOD_ABS;
src[2]->mod |= NV50_MOD_ABS;
- emit_minmax(pc, 4, t[2], src[0], src[1]);
- emit_minmax(pc, 4, t[2], src[2], t[2]);
+ emit_minmax(pc, NV50_MAX_F32, t[2], src[0], src[1]);
+ emit_minmax(pc, NV50_MAX_F32, t[2], src[2], t[2]);
src[0]->mod = mod[0];
src[1]->mod = mod[1];
@@ -1458,7 +1689,7 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
if (arg == 4) /* there is no textureProj(samplerCubeShadow) */
emit_mov(pc, t[3], src[3]);
- emit_flop(pc, 0, t[2], t[2]);
+ emit_flop(pc, NV50_FLOP_RCP, t[2], t[2]);
emit_mul(pc, t[0], src[0], t[2]);
emit_mul(pc, t[1], src[1], t[2]);
@@ -1476,7 +1707,7 @@ load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
t[3]->rhw = src[3]->rhw;
emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
- emit_flop(pc, 0, t[3], t[3]);
+ emit_flop(pc, NV50_FLOP_RCP, t[3], t[3]);
for (c = 0; c < dim; ++c) {
t[c]->rhw = src[c]->rhw;
@@ -1490,7 +1721,7 @@ load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
/* XXX: for some reason the blob sometimes uses MAD
* (mad f32 $rX $rY $rZ neg $r63)
*/
- emit_flop(pc, 0, t[3], src[3]);
+ emit_flop(pc, NV50_FLOP_RCP, t[3], src[3]);
for (c = 0; c < dim; ++c)
emit_mul(pc, t[c], src[c], t[3]);
if (arg != dim) /* depth reference value */
@@ -1537,7 +1768,13 @@ emit_texlod_sequence(struct nv50_pc *pc, struct nv50_reg *tlod,
struct nv50_reg *src, struct nv50_program_exec *tex)
{
struct nv50_program_exec *join_at;
- unsigned i, target = pc->p->exec_size + 7 * 2;
+ unsigned i, target = pc->p->exec_size + 9 * 2;
+
+ if (pc->p->type != PIPE_SHADER_FRAGMENT) {
+ emit(pc, tex);
+ return;
+ }
+ pc->allow32 = FALSE;
/* Subtract lod of each pixel from lod of top left pixel, jump
* texlod insn if result is 0, then repeat for 2 other pixels.
@@ -1663,6 +1900,7 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
emit(pc, e);
} else
if (bias_lod < 0) {
+ assert(pc->p->type == PIPE_SHADER_FRAGMENT);
e->inst[0] |= arg << 22;
e->inst[1] |= 0x20000000; /* texbias */
emit_mov(pc, t[arg], src[3]);
@@ -1742,6 +1980,21 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
q = 0x0403c000;
m = 0xffff7fff;
break;
+ case 0x2:
+ case 0x3:
+ /* ADD, SUB, SUBR b32 */
+ m = ~(0x8000 | (127 << 16));
+ q = ((e->inst[0] & (~m)) >> 2) | (1 << 26);
+ break;
+ case 0x5:
+ /* SAD */
+ m = ~(0x81 << 8);
+ q = (0x0c << 24) | ((e->inst[0] & (0x7f << 2)) << 12);
+ break;
+ case 0x6:
+ /* MAD u16 */
+ q = (e->inst[0] & (0x7f << 2)) << 12;
+ break;
case 0x8:
/* INTERP (move centroid, perspective and flat bits) */
m = ~0x03000100;
@@ -1778,26 +2031,57 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
}
/* Some operations support an optional negation flag. */
-static boolean
-negate_supported(const struct tgsi_full_instruction *insn, int i)
+static int
+get_supported_mods(const struct tgsi_full_instruction *insn, int i)
{
switch (insn->Instruction.Opcode) {
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_COS:
case TGSI_OPCODE_DDX:
case TGSI_OPCODE_DDY:
case TGSI_OPCODE_DP3:
case TGSI_OPCODE_DP4:
- case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_EX2:
case TGSI_OPCODE_KIL:
- case TGSI_OPCODE_ADD:
- case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_LG2:
case TGSI_OPCODE_MAD:
- return TRUE;
+ case TGSI_OPCODE_MUL:
case TGSI_OPCODE_POW:
- if (i == 1)
- return TRUE;
- return FALSE;
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ: /* ignored, RSQ = rsqrt(abs(src.x)) */
+ case TGSI_OPCODE_SCS:
+ case TGSI_OPCODE_SIN:
+ case TGSI_OPCODE_SUB:
+ return NV50_MOD_NEG;
+ case TGSI_OPCODE_MAX:
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_INEG: /* tgsi src sign toggle/set would be stupid */
+ return NV50_MOD_ABS;
+ case TGSI_OPCODE_CEIL:
+ case TGSI_OPCODE_FLR:
+ case TGSI_OPCODE_TRUNC:
+ return NV50_MOD_NEG | NV50_MOD_ABS;
+ case TGSI_OPCODE_F2I:
+ case TGSI_OPCODE_F2U:
+ case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_U2F:
+ return NV50_MOD_NEG | NV50_MOD_ABS | NV50_MOD_I32;
+ case TGSI_OPCODE_UADD:
+ return NV50_MOD_NEG | NV50_MOD_I32;
+ case TGSI_OPCODE_SAD:
+ case TGSI_OPCODE_SHL:
+ case TGSI_OPCODE_IMAX:
+ case TGSI_OPCODE_IMIN:
+ case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_NOT:
+ case TGSI_OPCODE_UMAD:
+ case TGSI_OPCODE_UMAX:
+ case TGSI_OPCODE_UMIN:
+ case TGSI_OPCODE_UMUL:
+ case TGSI_OPCODE_USHR:
+ return NV50_MOD_I32;
default:
- return FALSE;
+ return 0;
}
}
@@ -1820,7 +2104,9 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
case TGSI_OPCODE_DST:
return mask & (c ? 0xa : 0x6);
case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_EXP:
case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_LOG:
case TGSI_OPCODE_POW:
case TGSI_OPCODE_RCP:
case TGSI_OPCODE_RSQ:
@@ -1902,11 +2188,11 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
static struct nv50_reg *
tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
- boolean neg)
+ int mod)
{
struct nv50_reg *r = NULL;
- struct nv50_reg *temp;
- unsigned sgn, c, swz;
+ struct nv50_reg *temp = NULL;
+ unsigned sgn, c, swz, cvn;
if (src->Register.File != TGSI_FILE_CONSTANT)
assert(!src->Register.Indirect);
@@ -1946,7 +2232,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
r = &pc->immd[src->Register.Index * 4 + c];
break;
case TGSI_FILE_SAMPLER:
- break;
+ return NULL;
case TGSI_FILE_ADDRESS:
r = pc->addr[src->Register.Index * 4 + c];
assert(r);
@@ -1961,35 +2247,34 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
break;
}
+ cvn = (mod & NV50_MOD_I32) ? CVT_S32_S32 : CVT_F32_F32;
+
switch (sgn) {
- case TGSI_UTIL_SIGN_KEEP:
- break;
case TGSI_UTIL_SIGN_CLEAR:
- temp = temp_temp(pc);
- emit_abs(pc, temp, r);
- r = temp;
- break;
- case TGSI_UTIL_SIGN_TOGGLE:
- if (neg)
- r->mod = NV50_MOD_NEG;
- else {
- temp = temp_temp(pc);
- emit_neg(pc, temp, r);
- r = temp;
- }
+ r->mod = NV50_MOD_ABS;
break;
case TGSI_UTIL_SIGN_SET:
- temp = temp_temp(pc);
- emit_cvt(pc, temp, r, -1, CVTOP_ABS, CVT_F32_F32 | CVT_NEG);
- r = temp;
+ r->mod = NV50_MOD_NEG_ABS;
+ break;
+ case TGSI_UTIL_SIGN_TOGGLE:
+ r->mod = NV50_MOD_NEG;
break;
default:
- assert(0);
+ assert(!r->mod && sgn == TGSI_UTIL_SIGN_KEEP);
break;
}
- if (r && r->acc >= 0 && r != temp)
- return reg_instance(pc, r);
+ if ((r->mod & mod) != r->mod) {
+ temp = temp_temp(pc, NULL);
+ emit_cvt(pc, temp, r, -1, cvn);
+ r->mod = 0;
+ r = temp;
+ } else
+ r->mod |= mod & NV50_MOD_I32;
+
+ assert(r);
+ if (r->acc >= 0 && r != temp)
+ return reg_instance(pc, r); /* will clear r->mod */
return r;
}
@@ -2042,6 +2327,8 @@ nv50_tgsi_dst_revdep(unsigned op, int s, int c)
assert(0);
return 0x0;
}
+ case TGSI_OPCODE_EXP:
+ case TGSI_OPCODE_LOG:
case TGSI_OPCODE_LIT:
case TGSI_OPCODE_SCS:
case TGSI_OPCODE_TEX:
@@ -2082,6 +2369,8 @@ nv50_kill_branch(struct nv50_pc *pc)
if (pc->if_insn[lvl]->next != pc->p->exec_tail)
return FALSE;
+ if (is_immd(pc->p->exec_tail))
+ return FALSE;
/* if ccode == 'true', the BRA is from an ELSE and the predicate
* reg may no longer be valid, since we currently always use $p0
@@ -2149,22 +2438,22 @@ nv50_program_tx_insn(struct nv50_pc *pc,
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fs = &inst->Src[i];
unsigned src_mask;
- boolean neg_supp;
+ int mod_supp;
src_mask = nv50_tgsi_src_mask(inst, i);
- neg_supp = negate_supported(inst, i);
+ mod_supp = get_supported_mods(inst, i);
if (fs->Register.File == TGSI_FILE_SAMPLER)
unit = fs->Register.Index;
for (c = 0; c < 4; c++)
if (src_mask & (1 << c))
- src[i][c] = tgsi_src(pc, c, fs, neg_supp);
+ src[i][c] = tgsi_src(pc, c, fs, mod_supp);
}
brdc = temp = pc->r_brdc;
if (brdc && brdc->type != P_TEMP) {
- temp = temp_temp(pc);
+ temp = temp_temp(pc, NULL);
if (sat)
brdc = temp;
} else
@@ -2173,7 +2462,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
if (!(mask & (1 << c)) || dst[c]->type == P_TEMP)
continue;
/* rdst[c] = dst[c]; */ /* done above */
- dst[c] = temp_temp(pc);
+ dst[c] = temp_temp(pc, NULL);
}
}
@@ -2184,7 +2473,8 @@ nv50_program_tx_insn(struct nv50_pc *pc,
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- emit_abs(pc, dst[c], src[0][c]);
+ emit_cvt(pc, dst[c], src[0][c], -1,
+ CVT_ABS | CVT_F32_F32);
}
break;
case TGSI_OPCODE_ADD:
@@ -2206,8 +2496,8 @@ nv50_program_tx_insn(struct nv50_pc *pc,
break;
case TGSI_OPCODE_ARL:
assert(src[0][0]);
- temp = temp_temp(pc);
- emit_cvt(pc, temp, src[0][0], -1, CVTOP_FLOOR, CVT_S32_F32);
+ temp = temp_temp(pc, NULL);
+ emit_cvt(pc, temp, src[0][0], -1, CVT_FLOOR | CVT_S32_F32);
emit_arl(pc, dst[0], temp, 4);
break;
case TGSI_OPCODE_BGNLOOP:
@@ -2215,16 +2505,28 @@ nv50_program_tx_insn(struct nv50_pc *pc,
pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size;
terminate_mbb(pc);
break;
+ case TGSI_OPCODE_BGNSUB:
+ assert(!pc->in_subroutine);
+ pc->in_subroutine = TRUE;
+ /* probably not necessary, but align to 8 byte boundary */
+ if (!is_long(pc->p->exec_tail))
+ convert_to_long(pc, pc->p->exec_tail);
+ break;
case TGSI_OPCODE_BRK:
assert(pc->loop_lvl > 0);
emit_break(pc, -1, 0);
break;
+ case TGSI_OPCODE_CAL:
+ assert(inst->Label.Label < pc->insn_nr);
+ emit_call(pc, -1, 0)->param.index = inst->Label.Label;
+ /* replaced by actual offset in nv50_program_fixup_insns */
+ break;
case TGSI_OPCODE_CEIL:
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
emit_cvt(pc, dst[c], src[0][c], -1,
- CVTOP_CEIL, CVT_F32_F32 | CVT_RI);
+ CVT_CEIL | CVT_F32_F32 | CVT_RI);
}
break;
case TGSI_OPCODE_CMP:
@@ -2232,24 +2534,29 @@ nv50_program_tx_insn(struct nv50_pc *pc,
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- emit_cvt(pc, NULL, src[0][c], 1, CVTOP_RN, CVT_F32_F32);
+ emit_cvt(pc, NULL, src[0][c], 1, CVT_F32_F32);
emit_mov(pc, dst[c], src[1][c]);
set_pred(pc, 0x1, 1, pc->p->exec_tail); /* @SF */
emit_mov(pc, dst[c], src[2][c]);
set_pred(pc, 0x6, 1, pc->p->exec_tail); /* @NSF */
}
break;
+ case TGSI_OPCODE_CONT:
+ assert(pc->loop_lvl > 0);
+ emit_branch(pc, -1, 0)->param.index =
+ pc->loop_pos[pc->loop_lvl - 1];
+ break;
case TGSI_OPCODE_COS:
if (mask & 8) {
emit_precossin(pc, temp, src[0][3]);
- emit_flop(pc, 5, dst[3], temp);
+ emit_flop(pc, NV50_FLOP_COS, dst[3], temp);
if (!(mask &= 7))
break;
if (temp == dst[3])
- temp = brdc = temp_temp(pc);
+ temp = brdc = temp_temp(pc, NULL);
}
emit_precossin(pc, temp, src[0][0]);
- emit_flop(pc, 5, brdc, temp);
+ emit_flop(pc, NV50_FLOP_COS, brdc, temp);
break;
case TGSI_OPCODE_DDX:
for (c = 0; c < 4; c++) {
@@ -2321,9 +2628,56 @@ nv50_program_tx_insn(struct nv50_pc *pc,
pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size;
terminate_mbb(pc);
break;
+ case TGSI_OPCODE_ENDSUB:
+ assert(pc->in_subroutine);
+ pc->in_subroutine = FALSE;
+ break;
case TGSI_OPCODE_EX2:
emit_preex2(pc, temp, src[0][0]);
- emit_flop(pc, 6, brdc, temp);
+ emit_flop(pc, NV50_FLOP_EX2, brdc, temp);
+ break;
+ case TGSI_OPCODE_EXP:
+ {
+ struct nv50_reg *t[2];
+
+ assert(!temp);
+ t[0] = temp_temp(pc, NULL);
+ t[1] = temp_temp(pc, NULL);
+
+ if (mask & 0x6)
+ emit_mov(pc, t[0], src[0][0]);
+ if (mask & 0x3)
+ emit_flr(pc, t[1], src[0][0]);
+
+ if (mask & (1 << 1))
+ emit_sub(pc, dst[1], t[0], t[1]);
+ if (mask & (1 << 0)) {
+ emit_preex2(pc, t[1], t[1]);
+ emit_flop(pc, NV50_FLOP_EX2, dst[0], t[1]);
+ }
+ if (mask & (1 << 2)) {
+ emit_preex2(pc, t[0], t[0]);
+ emit_flop(pc, NV50_FLOP_EX2, dst[2], t[0]);
+ }
+ if (mask & (1 << 3))
+ emit_mov_immdval(pc, dst[3], 1.0f);
+ }
+ break;
+ case TGSI_OPCODE_F2I:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_cvt(pc, dst[c], src[0][c], -1,
+ CVT_TRUNC | CVT_S32_F32);
+ }
+ break;
+ case TGSI_OPCODE_F2U:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_cvt(pc, dst[c], src[0][c], -1,
+ CVT_TRUNC | CVT_U32_F32);
+ }
break;
case TGSI_OPCODE_FLR:
for (c = 0; c < 4; c++) {
@@ -2333,7 +2687,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
}
break;
case TGSI_OPCODE_FRC:
- temp = temp_temp(pc);
+ temp = temp_temp(pc, NULL);
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
@@ -2341,14 +2695,42 @@ nv50_program_tx_insn(struct nv50_pc *pc,
emit_sub(pc, dst[c], src[0][c], temp);
}
break;
+ case TGSI_OPCODE_I2F:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_S32);
+ }
+ break;
case TGSI_OPCODE_IF:
assert(pc->if_lvl < NV50_MAX_COND_NESTING);
- emit_cvt(pc, NULL, src[0][0], 0, CVTOP_ABS | CVTOP_RN,
- CVT_F32_F32);
+ emit_cvt(pc, NULL, src[0][0], 0, CVT_ABS | CVT_F32_F32);
pc->if_join[pc->if_lvl] = emit_joinat(pc);
pc->if_insn[pc->if_lvl++] = emit_branch(pc, 0, 2);;
terminate_mbb(pc);
break;
+ case TGSI_OPCODE_IMAX:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_minmax(pc, 0x08c, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_IMIN:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_minmax(pc, 0x0ac, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_INEG:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_cvt(pc, dst[c], src[0][c], -1,
+ CVT_S32_S32 | CVT_NEG);
+ }
+ break;
case TGSI_OPCODE_KIL:
assert(src[0][0] && src[0][1] && src[0][2] && src[0][3]);
emit_kil(pc, src[0][0]);
@@ -2363,10 +2745,38 @@ nv50_program_tx_insn(struct nv50_pc *pc,
emit_lit(pc, &dst[0], mask, &src[0][0]);
break;
case TGSI_OPCODE_LG2:
- emit_flop(pc, 3, brdc, src[0][0]);
+ emit_flop(pc, NV50_FLOP_LG2, brdc, src[0][0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ {
+ struct nv50_reg *t[2];
+
+ t[0] = temp_temp(pc, NULL);
+ if (mask & (1 << 1))
+ t[1] = temp_temp(pc, NULL);
+ else
+ t[1] = t[0];
+
+ emit_cvt(pc, t[0], src[0][0], -1, CVT_ABS | CVT_F32_F32);
+ emit_flop(pc, NV50_FLOP_LG2, t[1], t[0]);
+ if (mask & (1 << 2))
+ emit_mov(pc, dst[2], t[1]);
+ emit_flr(pc, t[1], t[1]);
+ if (mask & (1 << 0))
+ emit_mov(pc, dst[0], t[1]);
+ if (mask & (1 << 1)) {
+ t[1]->mod = NV50_MOD_NEG;
+ emit_preex2(pc, t[1], t[1]);
+ t[1]->mod = 0;
+ emit_flop(pc, NV50_FLOP_EX2, t[1], t[1]);
+ emit_mul(pc, dst[1], t[0], t[1]);
+ }
+ if (mask & (1 << 3))
+ emit_mov_immdval(pc, dst[3], 1.0f);
+ }
break;
case TGSI_OPCODE_LRP:
- temp = temp_temp(pc);
+ temp = temp_temp(pc, NULL);
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
@@ -2385,14 +2795,14 @@ nv50_program_tx_insn(struct nv50_pc *pc,
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
+ emit_minmax(pc, 0x880, dst[c], src[0][c], src[1][c]);
}
break;
case TGSI_OPCODE_MIN:
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
+ emit_minmax(pc, 0x8a0, dst[c], src[0][c], src[1][c]);
}
break;
case TGSI_OPCODE_MOV:
@@ -2409,44 +2819,73 @@ nv50_program_tx_insn(struct nv50_pc *pc,
emit_mul(pc, dst[c], src[0][c], src[1][c]);
}
break;
+ case TGSI_OPCODE_NOT:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_not(pc, dst[c], src[0][c]);
+ }
+ break;
case TGSI_OPCODE_POW:
emit_pow(pc, brdc, src[0][0], src[1][0]);
break;
case TGSI_OPCODE_RCP:
- emit_flop(pc, 0, brdc, src[0][0]);
+ if (!sat && popcnt4(mask) == 1)
+ brdc = dst[ffs(mask) - 1];
+ emit_flop(pc, NV50_FLOP_RCP, brdc, src[0][0]);
break;
case TGSI_OPCODE_RET:
- if (pc->p->type == PIPE_SHADER_FRAGMENT)
+ if (pc->p->type == PIPE_SHADER_FRAGMENT && !pc->in_subroutine)
nv50_fp_move_results(pc);
emit_ret(pc, -1, 0);
break;
case TGSI_OPCODE_RSQ:
- emit_flop(pc, 2, brdc, src[0][0]);
+ if (!sat && popcnt4(mask) == 1)
+ brdc = dst[ffs(mask) - 1];
+ src[0][0]->mod |= NV50_MOD_ABS;
+ emit_flop(pc, NV50_FLOP_RSQ, brdc, src[0][0]);
+ break;
+ case TGSI_OPCODE_SAD:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_sad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
+ }
break;
case TGSI_OPCODE_SCS:
- temp = temp_temp(pc);
+ temp = temp_temp(pc, NULL);
if (mask & 3)
emit_precossin(pc, temp, src[0][0]);
if (mask & (1 << 0))
- emit_flop(pc, 5, dst[0], temp);
+ emit_flop(pc, NV50_FLOP_COS, dst[0], temp);
if (mask & (1 << 1))
- emit_flop(pc, 4, dst[1], temp);
+ emit_flop(pc, NV50_FLOP_SIN, dst[1], temp);
if (mask & (1 << 2))
emit_mov_immdval(pc, dst[2], 0.0);
if (mask & (1 << 3))
emit_mov_immdval(pc, dst[3], 1.0);
break;
+ case TGSI_OPCODE_SHL:
+ case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_USHR:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_shift(pc, dst[c], src[0][c], src[1][c],
+ inst->Instruction.Opcode);
+ }
+ break;
case TGSI_OPCODE_SIN:
if (mask & 8) {
emit_precossin(pc, temp, src[0][3]);
- emit_flop(pc, 4, dst[3], temp);
+ emit_flop(pc, NV50_FLOP_SIN, dst[3], temp);
if (!(mask &= 7))
break;
if (temp == dst[3])
- temp = brdc = temp_temp(pc);
+ temp = brdc = temp_temp(pc, NULL);
}
emit_precossin(pc, temp, src[0][0]);
- emit_flop(pc, 4, brdc, temp);
+ emit_flop(pc, NV50_FLOP_SIN, brdc, temp);
break;
case TGSI_OPCODE_SLT:
case TGSI_OPCODE_SGE:
@@ -2454,12 +2893,23 @@ nv50_program_tx_insn(struct nv50_pc *pc,
case TGSI_OPCODE_SGT:
case TGSI_OPCODE_SLE:
case TGSI_OPCODE_SNE:
- i = map_tgsi_setop_cc(inst->Instruction.Opcode);
+ case TGSI_OPCODE_ISLT:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_USNE:
+ {
+ uint8_t cc, ty;
+
+ map_tgsi_setop_hw(inst->Instruction.Opcode, &cc, &ty);
+
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- emit_set(pc, i, dst[c], -1, src[0][c], src[1][c]);
+ emit_set(pc, cc, dst[c], -1, src[0][c], src[1][c], ty);
}
+ }
break;
case TGSI_OPCODE_SUB:
for (c = 0; c < 4; c++) {
@@ -2489,11 +2939,72 @@ nv50_program_tx_insn(struct nv50_pc *pc,
if (!(mask & (1 << c)))
continue;
emit_cvt(pc, dst[c], src[0][c], -1,
- CVTOP_TRUNC, CVT_F32_F32 | CVT_RI);
+ CVT_TRUNC | CVT_F32_F32 | CVT_RI);
+ }
+ break;
+ case TGSI_OPCODE_U2F:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_U32);
+ }
+ break;
+ case TGSI_OPCODE_UADD:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_add_b32(pc, dst[c], src[0][c], src[1][c]);
}
break;
+ case TGSI_OPCODE_UMAX:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_minmax(pc, 0x084, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_UMIN:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_minmax(pc, 0x0a4, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_UMAD:
+ {
+ assert(!temp);
+ temp = temp_temp(pc, NULL);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1);
+ emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0,
+ temp);
+ emit_shl_imm(pc, temp, temp, 16);
+ emit_mad_u16(pc, temp, src[0][c], 0, src[1][c], 0,
+ temp);
+ emit_add_b32(pc, dst[c], temp, src[2][c]);
+ }
+ }
+ break;
+ case TGSI_OPCODE_UMUL:
+ {
+ assert(!temp);
+ temp = temp_temp(pc, NULL);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1);
+ emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0,
+ temp);
+ emit_shl_imm(pc, temp, temp, 16);
+ emit_mad_u16(pc, dst[c], src[0][c], 0, src[1][c], 0,
+ temp);
+ }
+ }
+ break;
case TGSI_OPCODE_XPD:
- temp = temp_temp(pc);
+ temp = temp_temp(pc, NULL);
if (mask & (1 << 0)) {
emit_mul(pc, temp, src[0][2], src[1][1]);
emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
@@ -2510,6 +3021,17 @@ nv50_program_tx_insn(struct nv50_pc *pc,
emit_mov_immdval(pc, dst[3], 1.0);
break;
case TGSI_OPCODE_END:
+ if (pc->p->type == PIPE_SHADER_FRAGMENT)
+ nv50_fp_move_results(pc);
+
+ /* last insn must be long so it can have the exit bit set */
+ if (!is_long(pc->p->exec_tail))
+ convert_to_long(pc, pc->p->exec_tail);
+ else
+ if (is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail))
+ emit_nop(pc);
+
+ pc->p->exec_tail->inst[1] |= 1; /* set exit bit */
break;
default:
NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
@@ -2536,7 +3058,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
}
}
- kill_temp_temp(pc);
+ kill_temp_temp(pc, NULL);
pc->reg_instance_nr = 0;
return TRUE;
@@ -2545,7 +3067,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
static void
prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
{
- struct nv50_reg *reg = NULL;
+ struct nv50_reg *r, *reg = NULL;
const struct tgsi_full_src_register *src;
const struct tgsi_dst_register *dst;
unsigned i, c, k, mask;
@@ -2554,10 +3076,16 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
mask = dst->WriteMask;
if (dst->File == TGSI_FILE_TEMPORARY)
- reg = pc->temp;
+ reg = pc->temp;
else
- if (dst->File == TGSI_FILE_OUTPUT)
- reg = pc->result;
+ if (dst->File == TGSI_FILE_OUTPUT) {
+ reg = pc->result;
+
+ if (insn->Instruction.Opcode == TGSI_OPCODE_MOV &&
+ dst->Index == pc->edgeflag_out &&
+ insn->Src[0].Register.File == TGSI_FILE_INPUT)
+ pc->p->cfg.edgeflag_in = insn->Src[0].Register.Index;
+ }
if (reg) {
for (c = 0; c < 4; c++) {
@@ -2585,7 +3113,15 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
continue;
k = tgsi_util_get_full_src_register_swizzle(src, c);
- reg[src->Register.Index * 4 + k].acc = pc->insn_nr;
+ r = &reg[src->Register.Index * 4 + k];
+
+ /* If used before written, pre-allocate the reg,
+ * lest we overwrite results from a subroutine.
+ */
+ if (!r->acc && r->type == P_TEMP)
+ alloc_reg(pc, r);
+
+ r->acc = pc->insn_nr;
}
}
}
@@ -2674,7 +3210,7 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn,
for (i = 0; i < insn->Instruction.NumSrcRegs; i++) {
unsigned chn, mask = nv50_tgsi_src_mask(insn, i);
- boolean neg_supp = negate_supported(insn, i);
+ int ms = get_supported_mods(insn, i);
fs = &insn->Src[i];
if (fs->Register.File != fd->Register.File ||
@@ -2692,10 +3228,12 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn,
if (!(fd->Register.WriteMask & (1 << c)))
continue;
- /* no danger if src is copied to TEMP first */
- if ((s != TGSI_UTIL_SIGN_KEEP) &&
- (s != TGSI_UTIL_SIGN_TOGGLE || !neg_supp))
- continue;
+ if (s == TGSI_UTIL_SIGN_TOGGLE && !(ms & NV50_MOD_NEG))
+ continue;
+ if (s == TGSI_UTIL_SIGN_CLEAR && !(ms & NV50_MOD_ABS))
+ continue;
+ if ((s == TGSI_UTIL_SIGN_SET) && ((ms & 3) != 3))
+ continue;
rdep[c] |= nv50_tgsi_dst_revdep(
insn->Instruction.Opcode, i, chn);
@@ -2719,12 +3257,12 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
if (is_scalar_op(insn.Instruction.Opcode)) {
pc->r_brdc = tgsi_broadcast_dst(pc, fd, deqs);
if (!pc->r_brdc)
- pc->r_brdc = temp_temp(pc);
+ pc->r_brdc = temp_temp(pc, NULL);
return nv50_program_tx_insn(pc, &insn);
}
pc->r_brdc = NULL;
- if (!deqs)
+ if (!deqs || (!rdep[0] && !rdep[1] && !rdep[2] && !rdep[3]))
return nv50_program_tx_insn(pc, &insn);
deqs = nv50_revdep_reorder(m, rdep);
@@ -2775,7 +3313,7 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg)
iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1;
emit_interp(pc, iv, NULL, mode & INTERP_CENTROID);
- emit_flop(pc, 0, iv, iv);
+ emit_flop(pc, NV50_FLOP_RCP, iv, iv);
/* XXX: when loading interpolants dynamically, move these
* to the program head, or make sure it can't be skipped.
@@ -2856,6 +3394,9 @@ nv50_program_tx_prep(struct nv50_pc *pc)
if (p->cfg.io_nr > first)
p->cfg.io_nr = first;
break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ pc->edgeflag_out = first;
+ break;
/*
case TGSI_SEMANTIC_CLIP_DISTANCE:
p->cfg.clpd = MIN2(p->cfg.clpd, first);
@@ -3081,6 +3622,8 @@ free_nv50_pc(struct nv50_pc *pc)
FREE(pc->attr);
if (pc->temp)
FREE(pc->temp);
+ if (pc->insn_pos)
+ FREE(pc->insn_pos);
FREE(pc);
}
@@ -3104,6 +3647,8 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
p->cfg.two_side[0].hw = 0x40;
p->cfg.two_side[1].hw = 0x40;
+ p->cfg.edgeflag_in = pc->edgeflag_out = 0xff;
+
switch (p->type) {
case PIPE_SHADER_VERTEX:
p->cfg.psiz = 0x40;
@@ -3192,16 +3737,6 @@ nv50_program_fixup_insns(struct nv50_pc *pc)
if (e->param.index >= 0 && !e->param.mask)
bra_list[n++] = e;
- /* last instruction must be long so it can have the exit bit set */
- if (!is_long(pc->p->exec_tail))
- convert_to_long(pc, pc->p->exec_tail);
- /* set exit bit */
- pc->p->exec_tail->inst[1] |= 1;
-
- /* !immd on exit insn simultaneously means !join */
- assert(!is_immd(pc->p->exec_head));
- assert(!is_immd(pc->p->exec_tail));
-
/* Make sure we don't have any single 32 bit instructions. */
for (e = pc->p->exec_head, pos = 0; e; e = e->next) {
pos += is_long(e) ? 2 : 1;
@@ -3210,12 +3745,24 @@ nv50_program_fixup_insns(struct nv50_pc *pc)
for (i = 0; i < n; ++i)
if (bra_list[i]->param.index >= pos)
bra_list[i]->param.index += 1;
+ for (i = 0; i < pc->insn_nr; ++i)
+ if (pc->insn_pos[i] >= pos)
+ pc->insn_pos[i] += 1;
convert_to_long(pc, e);
++pos;
}
}
FREE(bra_list);
+
+ if (!pc->p->info.opcode_count[TGSI_OPCODE_CAL])
+ return;
+
+ /* fill in CALL offsets */
+ for (e = pc->p->exec_head; e; e = e->next) {
+ if ((e->inst[0] & 2) && (e->inst[0] >> 28) == 0x2)
+ e->param.index = pc->insn_pos[e->param.index];
+ }
}
static boolean
@@ -3237,19 +3784,20 @@ nv50_program_tx(struct nv50_program *p)
if (ret == FALSE)
goto out_cleanup;
+ pc->insn_pos = MALLOC(pc->insn_nr * sizeof(unsigned));
+
tgsi_parse_init(&parse, pc->p->pipe.tokens);
while (!tgsi_parse_end_of_tokens(&parse)) {
const union tgsi_full_token *tok = &parse.FullToken;
- /* don't allow half insn/immd on first and last instruction */
+ /* previously allow32 was FALSE for first & last instruction */
pc->allow32 = TRUE;
- if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr)
- pc->allow32 = FALSE;
tgsi_parse_token(&parse);
switch (tok->Token.Type) {
case TGSI_TOKEN_TYPE_INSTRUCTION:
+ pc->insn_pos[pc->insn_cur] = pc->p->exec_size;
++pc->insn_cur;
ret = nv50_tgsi_insn(pc, tok);
if (ret == FALSE)
@@ -3260,9 +3808,6 @@ nv50_program_tx(struct nv50_program *p)
}
}
- if (pc->p->type == PIPE_SHADER_FRAGMENT)
- nv50_fp_move_results(pc);
-
nv50_program_fixup_insns(pc);
p->param_nr = pc->param_nr * 4;
@@ -3434,7 +3979,7 @@ nv50_vertprog_validate(struct nv50_context *nv50)
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
- so = so_new(13, 2);
+ so = so_new(5, 8, 2);
so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
NOUVEAU_BO_HIGH, 0, 0);
@@ -3470,7 +4015,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
- so = so_new(64, 2);
+ so = so_new(6, 7, 2);
so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
NOUVEAU_BO_HIGH, 0, 0);
@@ -3480,7 +4025,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
so_data (so, p->cfg.high_temp);
so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1);
so_data (so, p->cfg.high_result);
- so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1);
+ so_method(so, tesla, NV50TCL_FP_CONTROL, 1);
so_data (so, p->cfg.regs[2]);
so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1);
so_data (so, p->cfg.regs[3]);
@@ -3490,12 +4035,13 @@ nv50_fragprog_validate(struct nv50_context *nv50)
so_ref(NULL, &so);
}
-static void
+static uint32_t
nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base)
{
struct nv50_program *fp = nv50->fragprog;
struct nv50_program *vp = nv50->vertprog;
unsigned i, c, m = base;
+ uint32_t origin = 0x00000010;
/* XXX: this might not work correctly in all cases yet - we'll
* just assume that an FP generic input that is not written in
@@ -3529,7 +4075,9 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base)
if (mode == PIPE_SPRITE_COORD_NONE) {
m += n;
continue;
- }
+ } else
+ if (mode == PIPE_SPRITE_COORD_LOWER_LEFT)
+ origin = 0;
}
/* this is either PointCoord or replaced by sprite coords */
@@ -3540,6 +4088,7 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base)
++m;
}
}
+ return origin;
}
static int
@@ -3638,7 +4187,7 @@ nv50_linkage_validate(struct nv50_context *nv50)
}
/* now fill the stateobj */
- so = so_new(64, 0);
+ so = so_new(7, 57, 0);
n = (m + 3) / 4;
so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
@@ -3652,11 +4201,13 @@ nv50_linkage_validate(struct nv50_context *nv50)
so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1);
so_data (so, reg[4]);
- so_method(so, tesla, 0x1540, 4);
+ so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4);
so_datap (so, lin, 4);
if (nv50->rasterizer->pipe.point_sprite) {
- nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff);
+ so_method(so, tesla, NV50TCL_POINT_SPRITE_CTRL, 1);
+ so_data (so,
+ nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff));
so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8);
so_datap (so, pcrd, 8);
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 4a90c372ce..461fec1d89 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -58,6 +58,7 @@ struct nv50_program {
/* VP only */
uint8_t clpd, clpd_nr;
uint8_t psiz;
+ uint8_t edgeflag_in;
} cfg;
};
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
index 268c9823f7..5a4ab3508b 100644
--- a/src/gallium/drivers/nv50/nv50_query.c
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -77,9 +77,9 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nv50_query *q = nv50_query(pq);
- BEGIN_RING(chan, tesla, 0x1530, 1);
+ BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_RESET, 1);
OUT_RING (chan, 1);
- BEGIN_RING(chan, tesla, 0x1514, 1);
+ BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_ENABLE, 1);
OUT_RING (chan, 1);
q->ready = FALSE;
@@ -111,7 +111,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
if (!q->ready) {
ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD |
- wait ? 0 : NOUVEAU_BO_NOWAIT);
+ (wait ? 0 : NOUVEAU_BO_NOWAIT));
if (ret)
return false;
q->result = ((uint32_t *)q->bo->map)[1];
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index d443ca3ad0..28e2b35dea 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -128,7 +128,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
return 1;
case PIPE_CAP_TGSI_CONT_SUPPORTED:
- return 0;
+ return 1;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
return 1;
case NOUVEAU_CAP_HW_VTXBUF:
@@ -165,6 +165,21 @@ static void
nv50_screen_destroy(struct pipe_screen *pscreen)
{
struct nv50_screen *screen = nv50_screen(pscreen);
+ unsigned i;
+
+ for (i = 0; i < 2; i++) {
+ if (screen->constbuf_parm[i])
+ nouveau_bo_ref(NULL, &screen->constbuf_parm[i]);
+ }
+
+ if (screen->constbuf_misc[0])
+ nouveau_bo_ref(NULL, &screen->constbuf_misc[0]);
+ if (screen->tic)
+ nouveau_bo_ref(NULL, &screen->tic);
+ if (screen->tsc)
+ nouveau_bo_ref(NULL, &screen->tsc);
+ if (screen->static_init)
+ so_ref(NULL, &screen->static_init);
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->tesla);
@@ -174,6 +189,28 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
FREE(screen);
}
+static int
+nv50_pre_pipebuffer_map(struct pipe_screen *pscreen, struct pipe_buffer *pb,
+ unsigned usage)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ struct nv50_context *ctx = screen->cur_ctx;
+
+ if (!(pb->usage & PIPE_BUFFER_USAGE_VERTEX))
+ return 0;
+
+ /* Our vtxbuf got mapped, it can no longer be considered part of current
+ * state, remove it to avoid emitting reloc markers.
+ */
+ if (ctx && ctx->state.vtxbuf && so_bo_is_reloc(ctx->state.vtxbuf,
+ nouveau_bo(pb))) {
+ so_ref(NULL, &ctx->state.vtxbuf);
+ ctx->dirty |= NV50_NEW_ARRAYS;
+ }
+
+ return 0;
+}
+
struct pipe_screen *
nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
{
@@ -201,6 +238,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
pscreen->get_param = nv50_screen_get_param;
pscreen->get_paramf = nv50_screen_get_paramf;
pscreen->is_format_supported = nv50_screen_is_format_supported;
+ screen->base.pre_pipebuffer_map_callback = nv50_pre_pipebuffer_map;
nv50_screen_init_miptree_functions(pscreen);
nv50_transfer_init_screen_functions(pscreen);
@@ -213,7 +251,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
nv50_screen_destroy(pscreen);
return NULL;
}
- BIND_RING(chan, screen->m2mf, 1);
/* 2D object */
ret = nouveau_grobj_alloc(chan, 0xbeef502d, NV50_2D, &screen->eng2d);
@@ -222,7 +259,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
nv50_screen_destroy(pscreen);
return NULL;
}
- BIND_RING(chan, screen->eng2d, 2);
/* 3D object */
switch (chipset & 0xf0) {
@@ -231,8 +267,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
break;
case 0x80:
case 0x90:
- /* this stupid name should be corrected. */
- tesla_class = NV54TCL;
+ tesla_class = NV84TCL;
break;
case 0xa0:
switch (chipset) {
@@ -242,7 +277,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
tesla_class = NVA0TCL;
break;
default:
- tesla_class = 0x8597;
+ tesla_class = NVA8TCL;
break;
}
break;
@@ -259,7 +294,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
nv50_screen_destroy(pscreen);
return NULL;
}
- BIND_RING(chan, screen->tesla, 3);
/* Sync notifier */
ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync);
@@ -270,7 +304,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
}
/* Static M2MF init */
- so = so_new(32, 0);
+ so = so_new(1, 3, 0);
so_method(so, screen->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3);
so_data (so, screen->sync->handle);
so_data (so, chan->vram->handle);
@@ -279,7 +313,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_ref (NULL, &so);
/* Static 2D init */
- so = so_new(64, 0);
+ so = so_new(4, 7, 0);
so_method(so, screen->eng2d, NV50_2D_DMA_NOTIFY, 4);
so_data (so, screen->sync->handle);
so_data (so, chan->vram->handle);
@@ -287,7 +321,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_data (so, chan->vram->handle);
so_method(so, screen->eng2d, NV50_2D_OPERATION, 1);
so_data (so, NV50_2D_OPERATION_SRCCOPY);
- so_method(so, screen->eng2d, 0x0290, 1);
+ so_method(so, screen->eng2d, NV50_2D_CLIP_ENABLE, 1);
so_data (so, 0);
so_method(so, screen->eng2d, 0x0888, 1);
so_data (so, 1);
@@ -295,36 +329,35 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_ref(NULL, &so);
/* Static tesla init */
- so = so_new(256, 20);
+ so = so_new(40, 84, 20);
- so_method(so, screen->tesla, 0x1558, 1);
- so_data (so, 1);
+ so_method(so, screen->tesla, NV50TCL_COND_MODE, 1);
+ so_data (so, NV50TCL_COND_MODE_ALWAYS);
so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1);
so_data (so, screen->sync->handle);
- so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0),
- NV50TCL_DMA_UNK0__SIZE);
- for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++)
+ so_method(so, screen->tesla, NV50TCL_DMA_ZETA, 11);
+ for (i = 0; i < 11; i++)
so_data(so, chan->vram->handle);
- so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0),
- NV50TCL_DMA_UNK1__SIZE);
- for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++)
+ so_method(so, screen->tesla, NV50TCL_DMA_COLOR(0),
+ NV50TCL_DMA_COLOR__SIZE);
+ for (i = 0; i < NV50TCL_DMA_COLOR__SIZE; i++)
so_data(so, chan->vram->handle);
- so_method(so, screen->tesla, 0x121c, 1);
+ so_method(so, screen->tesla, NV50TCL_RT_CONTROL, 1);
so_data (so, 1);
/* activate all 32 lanes (threads) in a warp */
- so_method(so, screen->tesla, 0x19a0, 1);
+ so_method(so, screen->tesla, NV50TCL_WARP_HALVES, 1);
so_data (so, 0x2);
so_method(so, screen->tesla, 0x1400, 1);
so_data (so, 0xf);
/* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */
- so_method(so, screen->tesla, 0x13b4, 1);
+ so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(0), 1);
so_data (so, 0x54);
- so_method(so, screen->tesla, 0x13bc, 1);
+ so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(2), 1);
so_data (so, 0x54);
/* origin is top left (set to 1 for bottom left) */
- so_method(so, screen->tesla, 0x13ac, 1);
+ so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1);
so_data (so, 0);
so_method(so, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
so_data (so, 8);
@@ -360,7 +393,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
// B = buffer ID (maybe more than 1 byte)
// N = CB index used in shader instruction
// P = program type (0 = VP, 2 = GP, 3 = FP)
- so_method(so, screen->tesla, 0x1694, 1);
+ so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
so_data (so, 0x000BBNP1);
*/
@@ -424,23 +457,26 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
/* Vertex array limits - max them out */
for (i = 0; i < 16; i++) {
- so_method(so, screen->tesla, NV50TCL_UNK1080_OFFSET_HIGH(i), 2);
+ so_method(so, screen->tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
so_data (so, 0x000000ff);
so_data (so, 0xffffffff);
}
- so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2);
+ so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR(0), 2);
so_data (so, fui(0.0));
so_data (so, fui(1.0));
/* no dynamic combination of TIC & TSC entries => only BIND_TIC used */
- so_method(so, screen->tesla, 0x1234, 1);
+ so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1);
so_data (so, 1);
/* activate first scissor rectangle */
- so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE, 1);
+ so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1);
so_data (so, 1);
+ so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+ so_data (so, 1); /* default edgeflag to TRUE */
+
so_emit(chan, so);
so_ref (so, &screen->static_init);
so_ref (NULL, &so);
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
index 61e24a5b57..a038a4e3c2 100644
--- a/src/gallium/drivers/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -2,6 +2,7 @@
#define __NV50_SCREEN_H__
#include "nouveau/nouveau_screen.h"
+#include "nv50_context.h"
struct nv50_screen {
struct nouveau_screen base;
@@ -9,6 +10,7 @@ struct nv50_screen {
struct nouveau_winsys *nvws;
unsigned cur_pctx;
+ struct nv50_context *cur_ctx;
struct nouveau_grobj *tesla;
struct nouveau_grobj *eng2d;
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index d609b4cbc6..1bbbbdd5f0 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -35,7 +35,7 @@ static void *
nv50_blend_state_create(struct pipe_context *pipe,
const struct pipe_blend_state *cso)
{
- struct nouveau_stateobj *so = so_new(64, 0);
+ struct nouveau_stateobj *so = so_new(5, 24, 0);
struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj);
unsigned cmask = 0, i;
@@ -146,7 +146,6 @@ nv50_sampler_state_create(struct pipe_context *pipe,
(wrap_mode(cso->wrap_r) << 6));
switch (cso->mag_img_filter) {
- case PIPE_TEX_FILTER_ANISO:
case PIPE_TEX_FILTER_LINEAR:
tsc[1] |= NV50TSC_1_1_MAGF_LINEAR;
break;
@@ -157,7 +156,6 @@ nv50_sampler_state_create(struct pipe_context *pipe,
}
switch (cso->min_img_filter) {
- case PIPE_TEX_FILTER_ANISO:
case PIPE_TEX_FILTER_LINEAR:
tsc[1] |= NV50TSC_1_1_MINF_LINEAR;
break;
@@ -280,7 +278,7 @@ static void *
nv50_rasterizer_state_create(struct pipe_context *pipe,
const struct pipe_rasterizer_state *cso)
{
- struct nouveau_stateobj *so = so_new(64, 0);
+ struct nouveau_stateobj *so = so_new(15, 21, 0);
struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
struct nv50_rasterizer_stateobj *rso =
CALLOC_STRUCT(nv50_rasterizer_stateobj);
@@ -295,7 +293,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,
so_method(so, tesla, NV50TCL_SHADE_MODEL, 1);
so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT :
NV50TCL_SHADE_MODEL_SMOOTH);
- so_method(so, tesla, 0x1684, 1);
+ so_method(so, tesla, NV50TCL_PROVOKING_VERTEX_LAST, 1);
so_data (so, cso->flatshade_first ? 0 : 1);
so_method(so, tesla, NV50TCL_VERTEX_TWO_SIDE_ENABLE, 1);
@@ -392,7 +390,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,
so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1);
so_data (so, fui(cso->offset_scale));
so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1);
- so_data (so, fui(cso->offset_units));
+ so_data (so, fui(cso->offset_units * 2.0f));
}
rso->pipe = *cso;
@@ -425,7 +423,7 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe,
{
struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj);
- struct nouveau_stateobj *so = so_new(64, 0);
+ struct nouveau_stateobj *so = so_new(8, 22, 0);
so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1);
so_data (so, cso->depth.writemask ? 1 : 0);
@@ -439,9 +437,8 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe,
so_data (so, 0);
}
- /* XXX: keep hex values until header is updated (names reversed) */
if (cso->stencil[0].enabled) {
- so_method(so, tesla, 0x1380, 8);
+ so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8);
so_data (so, 1);
so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op));
so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
@@ -451,23 +448,23 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe,
so_data (so, cso->stencil[0].writemask);
so_data (so, cso->stencil[0].valuemask);
} else {
- so_method(so, tesla, 0x1380, 1);
+ so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1);
so_data (so, 0);
}
if (cso->stencil[1].enabled) {
- so_method(so, tesla, 0x1594, 5);
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5);
so_data (so, 1);
so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op));
so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
so_data (so, nvgl_comparison_op(cso->stencil[1].func));
- so_method(so, tesla, 0x0f54, 3);
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3);
so_data (so, cso->stencil[1].ref_value);
so_data (so, cso->stencil[1].writemask);
so_data (so, cso->stencil[1].valuemask);
} else {
- so_method(so, tesla, 0x1594, 1);
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1);
so_data (so, 0);
}
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 871e8097b6..f83232f43c 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -33,7 +33,7 @@ static void
nv50_state_validate_fb(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so = so_new(128, 18);
+ struct nouveau_stateobj *so = so_new(32, 79, 18);
struct pipe_framebuffer_state *fb = &nv50->framebuffer;
unsigned i, w, h, gw = 0;
@@ -41,7 +41,7 @@ nv50_state_validate_fb(struct nv50_context *nv50)
* FP result 0 always goes to RT[0], bits 4 - 6 are ignored.
* Ambiguous assignment results in no rendering (no DATA_ERROR).
*/
- so_method(so, tesla, 0x121c, 1);
+ so_method(so, tesla, NV50TCL_RT_CONTROL, 1);
so_data (so, fb->nr_cbufs |
(0 << 4) | (1 << 7) | (2 << 10) | (3 << 13) |
(4 << 16) | (5 << 19) | (6 << 22) | (7 << 25));
@@ -87,7 +87,7 @@ nv50_state_validate_fb(struct nv50_context *nv50)
level[fb->cbufs[i]->level].tile_mode << 4);
so_data(so, 0x00000000);
- so_method(so, tesla, 0x1224, 1);
+ so_method(so, tesla, NV50TCL_RT_ARRAY_MODE, 1);
so_data (so, 1);
}
@@ -124,22 +124,22 @@ nv50_state_validate_fb(struct nv50_context *nv50)
level[fb->zsbuf->level].tile_mode << 4);
so_data(so, 0x00000000);
- so_method(so, tesla, 0x1538, 1);
+ so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1);
so_data (so, 1);
so_method(so, tesla, NV50TCL_ZETA_HORIZ, 3);
so_data (so, fb->zsbuf->width);
so_data (so, fb->zsbuf->height);
so_data (so, 0x00010001);
} else {
- so_method(so, tesla, 0x1538, 1);
+ so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1);
so_data (so, 0);
}
- so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2);
+ so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2);
so_data (so, w << 16);
so_data (so, h << 16);
/* set window lower left corner */
- so_method(so, tesla, NV50TCL_WINDOW_LEFT, 2);
+ so_method(so, tesla, NV50TCL_WINDOW_OFFSET_X, 2);
so_data (so, 0);
so_data (so, 0);
/* set screen scissor rectangle */
@@ -185,6 +185,9 @@ nv50_state_emit(struct nv50_context *nv50)
struct nv50_screen *screen = nv50->screen;
struct nouveau_channel *chan = screen->base.channel;
+ /* I don't want to copy headers from the winsys. */
+ screen->cur_ctx = nv50;
+
if (nv50->pctx_id != screen->cur_pctx) {
if (nv50->state.fb)
nv50->state.dirty |= NV50_NEW_FRAMEBUFFER;
@@ -296,7 +299,7 @@ nv50_state_validate(struct nv50_context *nv50)
so_ref(nv50->rasterizer->so, &nv50->state.rast);
if (nv50->dirty & NV50_NEW_BLEND_COLOUR) {
- so = so_new(5, 0);
+ so = so_new(1, 4, 0);
so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4);
so_data (so, fui(nv50->blend_colour.color[0]));
so_data (so, fui(nv50->blend_colour.color[1]));
@@ -307,7 +310,7 @@ nv50_state_validate(struct nv50_context *nv50)
}
if (nv50->dirty & NV50_NEW_STIPPLE) {
- so = so_new(33, 0);
+ so = so_new(1, 32, 0);
so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
for (i = 0; i < 32; i++)
so_data(so, util_bswap32(nv50->stipple.stipple[i]));
@@ -324,8 +327,8 @@ nv50_state_validate(struct nv50_context *nv50)
goto scissor_uptodate;
nv50->state.scissor_enabled = rast->scissor;
- so = so_new(3, 0);
- so_method(so, tesla, NV50TCL_SCISSOR_HORIZ, 2);
+ so = so_new(1, 2, 0);
+ so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2);
if (nv50->state.scissor_enabled) {
so_data(so, (s->maxx << 16) | s->minx);
so_data(so, (s->maxy << 16) | s->miny);
@@ -353,13 +356,13 @@ scissor_uptodate:
goto viewport_uptodate;
nv50->state.viewport_bypass = bypass;
- so = so_new(14, 0);
+ so = so_new(5, 9, 0);
if (!bypass) {
- so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE(0), 3);
+ so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3);
so_data (so, fui(nv50->viewport.translate[0]));
so_data (so, fui(nv50->viewport.translate[1]));
so_data (so, fui(nv50->viewport.translate[2]));
- so_method(so, tesla, NV50TCL_VIEWPORT_SCALE(0), 3);
+ so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3);
so_data (so, fui(nv50->viewport.scale[0]));
so_data (so, fui(nv50->viewport.scale[1]));
so_data (so, fui(nv50->viewport.scale[2]));
@@ -397,7 +400,8 @@ viewport_uptodate:
for (i = 0; i < PIPE_SHADER_TYPES; ++i)
nr += nv50->sampler_nr[i];
- so = so_new(nr * 8 + 24 * PIPE_SHADER_TYPES + 2, 4);
+ so = so_new(1+ 5 * PIPE_SHADER_TYPES, 1+ 19 * PIPE_SHADER_TYPES
+ + nr * 8, PIPE_SHADER_TYPES * 2);
nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
@@ -440,7 +444,7 @@ void nv50_so_init_sifc(struct nv50_context *nv50,
so_data (so, 1);
so_reloc (so, bo, offset, reloc | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, bo, offset, reloc | NOUVEAU_BO_LOW, 0, 0);
- so_method(so, eng2d, NV50_2D_SIFC_UNK0800, 2);
+ so_method(so, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2);
so_data (so, 0);
so_data (so, NV50_2D_SIFC_FORMAT_R8_UNORM);
so_method(so, eng2d, NV50_2D_SIFC_WIDTH, 10);
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index 79655fc08d..6378132979 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -176,11 +176,11 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
if (ret)
return;
- BEGIN_RING(chan, eng2d, 0x0580, 3);
- OUT_RING (chan, 4);
+ BEGIN_RING(chan, eng2d, NV50_2D_DRAW_SHAPE, 3);
+ OUT_RING (chan, NV50_2D_DRAW_SHAPE_RECTANGLES);
OUT_RING (chan, format);
OUT_RING (chan, value);
- BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4);
+ BEGIN_RING(chan, eng2d, NV50_2D_DRAW_POINT32_X(0), 4);
OUT_RING (chan, destx);
OUT_RING (chan, desty);
OUT_RING (chan, width);
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index c4ca096d6a..bef548b728 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -199,16 +199,18 @@ nv50_tex_validate(struct nv50_context *nv50)
{
struct nouveau_stateobj *so;
struct nouveau_grobj *tesla = nv50->screen->tesla;
- unsigned p, push, nrlc;
+ unsigned p, start, push, nrlc;
- for (nrlc = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) {
+ for (nrlc = 0, start = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) {
+ start += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]);
push += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]);
nrlc += nv50->miptree_nr[p];
}
- push = push * 11 + 23 * PIPE_SHADER_TYPES + 4;
+ start = start * 2 + 4 * PIPE_SHADER_TYPES + 2;
+ push = push * 9 + 19 * PIPE_SHADER_TYPES + 2;
nrlc = nrlc * 2 + 2 * PIPE_SHADER_TYPES;
- so = so_new(push, nrlc);
+ so = so_new(start, push, nrlc);
if (nv50_validate_textures(nv50, so, PIPE_SHADER_VERTEX) == FALSE ||
nv50_validate_textures(nv50, so, PIPE_SHADER_FRAGMENT) == FALSE) {
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 4d9afa6fed..a2f1db2914 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -47,7 +47,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1);
OUT_RING (chan, 1);
BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1);
+ NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1);
OUT_RING (chan, src_pitch);
src_offset += (sy * src_pitch) + (sx * cpp);
} else {
@@ -66,7 +66,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1);
OUT_RING (chan, 1);
BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1);
+ NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1);
OUT_RING (chan, dst_pitch);
dst_offset += (dy * dst_pitch) + (dx * cpp);
} else {
@@ -89,7 +89,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
OUT_RELOCh(chan, src_bo, src_offset, src_reloc);
OUT_RELOCh(chan, dst_bo, dst_offset, dst_reloc);
BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2);
+ NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2);
OUT_RELOCl(chan, src_bo, src_offset, src_reloc);
OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc);
if (src_bo->tile_flags) {
@@ -107,7 +107,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
dst_offset += (line_count * dst_pitch);
}
BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4);
+ NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4);
OUT_RING (chan, width * cpp);
OUT_RING (chan, line_count);
OUT_RING (chan, 0x00000101);
@@ -291,7 +291,7 @@ nv50_upload_sifc(struct nv50_context *nv50,
/* NV50_2D_OPERATION_SRCCOPY assumed already set */
- BEGIN_RING(chan, eng2d, NV50_2D_SIFC_UNK0800, 2);
+ BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2);
OUT_RING (chan, 0);
OUT_RING (chan, src_format);
BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10);
@@ -334,6 +334,6 @@ nv50_upload_sifc(struct nv50_context *nv50,
src += src_pitch;
}
- BEGIN_RING(chan, tesla, 0x1440, 1);
+ BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1);
OUT_RING (chan, 0);
}
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index f7fa0659e8..f2e510fba6 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -99,19 +99,19 @@ nv50_vbo_size_to_hw(unsigned size, unsigned nr_c)
{
static const uint32_t hw_values[] = {
0, 0, 0, 0,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8_8,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16_16,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16,
0, 0, 0, 0,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32_32 };
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 };
/* we'd also have R11G11B10 and R10G10B10A2 */
@@ -152,7 +152,7 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
return (hw_type | hw_size);
}
-boolean
+void
nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
unsigned count)
{
@@ -182,7 +182,9 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
- return ret;
+ /* XXX: not sure what to do if ret != TRUE: flush and retry?
+ */
+ assert(ret);
}
static INLINE boolean
@@ -198,7 +200,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
return nv50_push_elements_u08(nv50, map, count);
if (count & 1) {
- BEGIN_RING(chan, tesla, 0x15e8, 1);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
OUT_RING (chan, map[0]);
map++;
count--;
@@ -208,7 +210,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
unsigned nr = count > 2046 ? 2046 : count;
int i;
- BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1);
for (i = 0; i < nr; i += 2)
OUT_RING (chan, (map[i + 1] << 16) | map[i]);
@@ -231,7 +233,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
return nv50_push_elements_u16(nv50, map, count);
if (count & 1) {
- BEGIN_RING(chan, tesla, 0x15e8, 1);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
OUT_RING (chan, map[0]);
map++;
count--;
@@ -241,7 +243,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
unsigned nr = count > 2046 ? 2046 : count;
int i;
- BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1);
for (i = 0; i < nr; i += 2)
OUT_RING (chan, (map[i + 1] << 16) | map[i]);
@@ -266,7 +268,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
while (count) {
unsigned nr = count > 2047 ? 2047 : count;
- BEGIN_RING(chan, tesla, 0x400015e8, nr);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x40000000, nr);
OUT_RINGp (chan, map, nr);
count -= nr;
@@ -275,7 +277,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
return TRUE;
}
-boolean
+void
nv50_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer, unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
@@ -317,8 +319,10 @@ nv50_draw_elements(struct pipe_context *pipe,
OUT_RING (chan, 0);
pipe_buffer_unmap(pscreen, indexBuffer);
-
- return ret;
+
+ /* XXX: what to do if ret != TRUE? Flush and retry?
+ */
+ assert(ret);
}
static INLINE boolean
@@ -350,7 +354,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
so = *pso;
if (!so)
- *pso = so = so_new(nv50->vtxelt_nr * 5, 0);
+ *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0);
switch (ve->nr_components) {
case 4:
@@ -372,6 +376,10 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
so_data (so, fui(v[1]));
break;
case 1:
+ if (attrib == nv50->vertprog->cfg.edgeflag_in) {
+ so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+ so_data (so, v[0] ? 1 : 0);
+ }
so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1);
so_data (so, fui(v[0]));
break;
@@ -401,11 +409,14 @@ nv50_vbo_validate(struct nv50_context *nv50)
!(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX))
nv50->vbo_fifo = 0xffff;
+ if (nv50->vertprog->cfg.edgeflag_in < 16)
+ nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */
+
n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr);
vtxattr = NULL;
- vtxbuf = so_new(n_ve * 7, nv50->vtxelt_nr * 4);
- vtxfmt = so_new(n_ve + 1, 0);
+ vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4);
+ vtxfmt = so_new(1, n_ve, 0);
so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve);
for (i = 0; i < nv50->vtxelt_nr; i++) {
@@ -445,7 +456,7 @@ nv50_vbo_validate(struct nv50_context *nv50)
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
/* vertex array limits */
- so_method(vtxbuf, tesla, 0x1080 + (i * 8), 2);
+ so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
so_reloc (vtxbuf, bo, vb->buffer->size - 1,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
NOUVEAU_BO_HIGH, 0, 0);
@@ -479,6 +490,9 @@ struct nv50_vbo_emitctx
unsigned nr_ve;
unsigned vtx_dwords;
unsigned vtx_max;
+
+ float edgeflag;
+ unsigned ve_edgeflag;
};
static INLINE void
@@ -622,6 +636,9 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
if (nv50_map_vbufs(nv50) == FALSE)
return FALSE;
+ emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in;
+
+ emit->edgeflag = 0.5f;
emit->nr_ve = 0;
emit->vtx_dwords = 0;
@@ -644,7 +661,8 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
desc = util_format_description(ve->src_format);
assert(desc);
- size = util_format_get_component_bits(ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
+ size = util_format_get_component_bits(
+ ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
assert(ve->nr_components > 0 && ve->nr_components <= 4);
@@ -686,10 +704,31 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
}
emit->vtx_max = 512 / emit->vtx_dwords;
+ if (emit->ve_edgeflag < 16)
+ emit->vtx_max = 1;
return TRUE;
}
+static INLINE void
+set_edgeflag(struct nouveau_channel *chan,
+ struct nouveau_grobj *tesla,
+ struct nv50_vbo_emitctx *emit, uint32_t index)
+{
+ unsigned i = emit->ve_edgeflag;
+
+ if (i < 16) {
+ float f = *((float *)(emit->map[i] + index * emit->stride[i]));
+
+ if (emit->edgeflag != f) {
+ emit->edgeflag = f;
+
+ BEGIN_RING(chan, tesla, 0x15e4, 1);
+ OUT_RING (chan, f ? 1 : 0);
+ }
+ }
+}
+
static boolean
nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
{
@@ -704,6 +743,8 @@ nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
unsigned i, dw, nr = MIN2(count, emit.vtx_max);
dw = nr * emit.vtx_dwords;
+ set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */
+
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
for (i = 0; i < nr; ++i)
emit_vtx_next(chan, &emit);
@@ -729,6 +770,8 @@ nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count)
unsigned i, dw, nr = MIN2(count, emit.vtx_max);
dw = nr * emit.vtx_dwords;
+ set_edgeflag(chan, tesla, &emit, *map);
+
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
for (i = 0; i < nr; ++i)
emit_vtx(chan, &emit, *map++);
@@ -754,6 +797,8 @@ nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count)
unsigned i, dw, nr = MIN2(count, emit.vtx_max);
dw = nr * emit.vtx_dwords;
+ set_edgeflag(chan, tesla, &emit, *map);
+
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
for (i = 0; i < nr; ++i)
emit_vtx(chan, &emit, *map++);
@@ -779,6 +824,8 @@ nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count)
unsigned i, dw, nr = MIN2(count, emit.vtx_max);
dw = nr * emit.vtx_dwords;
+ set_edgeflag(chan, tesla, &emit, *map);
+
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
for (i = 0; i < nr; ++i)
emit_vtx(chan, &emit, *map++);
diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript
index 0d2de17be9..183aa17f9b 100644
--- a/src/gallium/drivers/r300/SConscript
+++ b/src/gallium/drivers/r300/SConscript
@@ -4,7 +4,12 @@ r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript')
env = env.Clone()
# add the paths for r300compiler
-env.Append(CPPPATH = ['#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/src/mesa'])
+env.Append(CPPPATH = [
+ '#/src/mesa/drivers/dri/r300/compiler',
+ '#/src/gallium/winsys/drm/radeon/core',
+ '#/include',
+ '#/src/mesa',
+])
r300 = env.ConvenienceLibrary(
target = 'r300',
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index ffe066d536..c14414fff6 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -27,9 +27,9 @@
static void r300_blitter_save_states(struct r300_context* r300)
{
- util_blitter_save_blend(r300->blitter, r300->blend_state);
- util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state);
- util_blitter_save_rasterizer(r300->blitter, r300->rs_state);
+ util_blitter_save_blend(r300->blitter, r300->blend_state.state);
+ util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state.state);
+ util_blitter_save_rasterizer(r300->blitter, r300->rs_state.state);
util_blitter_save_fragment_shader(r300->blitter, r300->fs);
util_blitter_save_vertex_shader(r300->blitter, r300->vs);
}
diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index 51fdb82ff3..92de297ef1 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -33,6 +33,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
/* Reasonable defaults */
caps->num_vert_fpus = 4;
caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE;
+ caps->is_r400 = FALSE;
caps->is_r500 = FALSE;
caps->high_second_pipe = FALSE;
@@ -123,6 +124,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x4A54:
caps->family = CHIP_FAMILY_R420;
caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
break;
case 0x5548:
@@ -136,6 +138,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x5D57:
caps->family = CHIP_FAMILY_R423;
caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
break;
case 0x554C:
@@ -147,6 +150,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x5D4A:
caps->family = CHIP_FAMILY_R430;
caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
break;
case 0x5D4C:
@@ -157,6 +161,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x5D52:
caps->family = CHIP_FAMILY_R480;
caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
break;
case 0x4B48:
@@ -166,6 +171,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x4B4C:
caps->family = CHIP_FAMILY_R481;
caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
break;
case 0x5E4C:
@@ -182,6 +188,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x5E4D:
caps->family = CHIP_FAMILY_RV410;
caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
break;
case 0x5954:
@@ -212,6 +219,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x791F:
caps->family = CHIP_FAMILY_RS690;
caps->has_tcl = FALSE;
+ caps->is_r400 = TRUE;
break;
case 0x793F:
@@ -219,6 +227,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x7942:
caps->family = CHIP_FAMILY_RS600;
caps->has_tcl = FALSE;
+ caps->is_r400 = TRUE;
break;
case 0x796C:
@@ -227,6 +236,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x796F:
caps->family = CHIP_FAMILY_RS740;
caps->has_tcl = FALSE;
+ caps->is_r400 = TRUE;
break;
case 0x7100:
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index 0633a8b8a7..2808486492 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -40,11 +40,18 @@ struct r300_capabilities {
unsigned num_z_pipes;
/* Whether or not TCL is physically present */
boolean has_tcl;
+ /* Whether or not this is R400. The differences compared to their R3xx
+ * cousins are:
+ * - Extended fragment shader registers
+ * - Blend LTE/GTE thresholds */
+ boolean is_r400;
/* Whether or not this is an RV515 or newer; R500s have many differences
* that require extra consideration, compared to their R3xx cousins:
* - Extra bit of width and height on texture sizes
* - Blend color is split across two registers
- * - Universal Shader (US) block used for fragment shaders */
+ * - Blend LTE/GTE thresholds
+ * - Universal Shader (US) block used for fragment shaders
+ * - FP16 blending and multisampling */
boolean is_r500;
/* Whether or not the second pixel pipe is accessed with the high bit */
boolean high_second_pipe;
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index d5c2d63d39..5e4f6552c3 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -30,6 +30,7 @@
#include "r300_blit.h"
#include "r300_context.h"
+#include "r300_emit.h"
#include "r300_flush.h"
#include "r300_query.h"
#include "r300_render.h"
@@ -69,11 +70,13 @@ static void r300_destroy_context(struct pipe_context* context)
FREE(query);
}
- FREE(r300->blend_color_state);
+ FREE(r300->blend_color_state.state);
+ FREE(r300->clip_state.state);
FREE(r300->rs_block);
- FREE(r300->scissor_state);
+ FREE(r300->scissor_state.state);
FREE(r300->vertex_info);
- FREE(r300->viewport_state);
+ FREE(r300->viewport_state.state);
+ FREE(r300->ztop_state.state);
FREE(r300);
}
@@ -107,6 +110,35 @@ static void r300_flush_cb(void *data)
cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL);
}
+#define R300_INIT_ATOM(atomname, atomsize) \
+ r300->atomname##_state.name = #atomname; \
+ r300->atomname##_state.state = NULL; \
+ r300->atomname##_state.size = atomsize; \
+ r300->atomname##_state.emit = r300_emit_##atomname##_state; \
+ r300->atomname##_state.dirty = FALSE; \
+ insert_at_tail(&r300->atom_list, &r300->atomname##_state);
+
+static void r300_setup_atoms(struct r300_context* r300)
+{
+ /* Create the actual atom list.
+ *
+ * Each atom is examined and emitted in the order it appears here, which
+ * can affect performance and conformance if not handled with care.
+ *
+ * Some atoms never change size, others change every emit. This is just
+ * an upper bound on each atom, to keep the emission machinery from
+ * underallocating space. */
+ make_empty_list(&r300->atom_list);
+ R300_INIT_ATOM(ztop, 2);
+ R300_INIT_ATOM(blend, 8);
+ R300_INIT_ATOM(blend_color, 3);
+ R300_INIT_ATOM(clip, 29);
+ R300_INIT_ATOM(dsa, 8);
+ R300_INIT_ATOM(rs, 22);
+ R300_INIT_ATOM(scissor, 3);
+ R300_INIT_ATOM(viewport, 9);
+}
+
struct pipe_context* r300_create_context(struct pipe_screen* screen,
struct radeon_winsys* radeon_winsys)
{
@@ -155,11 +187,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300->shader_hash_table = util_hash_table_create(r300_shader_key_hash,
r300_shader_key_compare);
- r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state);
+ r300_setup_atoms(r300);
+
+ r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state);
+ r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state);
r300->rs_block = CALLOC_STRUCT(r300_rs_block);
- r300->scissor_state = CALLOC_STRUCT(r300_scissor_state);
+ r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state);
r300->vertex_info = CALLOC_STRUCT(r300_vertex_info);
- r300->viewport_state = CALLOC_STRUCT(r300_viewport_state);
+ r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state);
+ r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state);
/* Open up the OQ BO. */
r300->oqbo = screen->buffer_create(screen, 4096,
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 232530b7dc..682b9179c8 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -30,9 +30,28 @@
#include "pipe/p_context.h"
#include "pipe/p_inlines.h"
+struct r300_context;
+
struct r300_fragment_shader;
struct r300_vertex_shader;
+struct r300_atom {
+ /* List pointers. */
+ struct r300_atom *prev, *next;
+ /* Name, for debugging. */
+ const char* name;
+ /* Opaque state. */
+ void* state;
+ /* Emit the state to the context. */
+ void (*emit)(struct r300_context*, void*);
+ /* Upper bound on number of dwords to emit. */
+ unsigned size;
+ /* Whether this atom should be emitted. */
+ boolean dirty;
+ /* Another dirty flag that is never automatically cleared. */
+ boolean always_dirty;
+};
+
struct r300_blend_state {
uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */
uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */
@@ -62,11 +81,6 @@ struct r300_rs_state {
/* Draw-specific rasterizer state */
struct pipe_rasterizer_state rs;
- /* Whether or not to enable the VTE. This is referenced at the very
- * last moment during emission of VTE state, to decide whether or not
- * the VTE should be used for transformation. */
- boolean enable_vte;
-
uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */
uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */
uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */
@@ -102,19 +116,6 @@ struct r300_sampler_state {
unsigned min_lod, max_lod;
};
-struct r300_scissor_regs {
- uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */
- uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */
-
- /* Whether everything is culled by scissoring. */
- boolean empty_area;
-};
-
-struct r300_scissor_state {
- struct r300_scissor_regs framebuffer;
- struct r300_scissor_regs scissor;
-};
-
struct r300_texture_state {
uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */
uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */
@@ -135,24 +136,17 @@ struct r300_ztop_state {
uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */
};
-#define R300_NEW_BLEND 0x00000001
-#define R300_NEW_BLEND_COLOR 0x00000002
-#define R300_NEW_CLIP 0x00000004
-#define R300_NEW_DSA 0x00000008
#define R300_NEW_FRAMEBUFFERS 0x00000010
#define R300_NEW_FRAGMENT_SHADER 0x00000020
#define R300_NEW_FRAGMENT_SHADER_CONSTANTS 0x00000040
-#define R300_NEW_RASTERIZER 0x00000080
#define R300_NEW_RS_BLOCK 0x00000100
#define R300_NEW_SAMPLER 0x00000200
#define R300_ANY_NEW_SAMPLERS 0x0001fe00
-#define R300_NEW_SCISSOR 0x00020000
#define R300_NEW_TEXTURE 0x00040000
#define R300_ANY_NEW_TEXTURES 0x03fc0000
#define R300_NEW_VERTEX_FORMAT 0x04000000
#define R300_NEW_VERTEX_SHADER 0x08000000
#define R300_NEW_VERTEX_SHADER_CONSTANTS 0x10000000
-#define R300_NEW_VIEWPORT 0x20000000
#define R300_NEW_QUERY 0x40000000
#define R300_NEW_KITCHEN_SINK 0x7fffffff
@@ -194,6 +188,12 @@ struct r300_query {
struct r300_query* next;
};
+enum r300_buffer_tiling {
+ R300_BUFFER_LINEAR = 0,
+ R300_BUFFER_TILED,
+ R300_BUFFER_SQUARETILED
+};
+
struct r300_texture {
/* Parent class */
struct pipe_texture tex;
@@ -230,6 +230,9 @@ struct r300_texture {
/* Registers carrying texture format data. */
struct r300_texture_state state;
+
+ /* Buffer tiling */
+ enum r300_buffer_tiling microtile, macrotile;
};
struct r300_vertex_info {
@@ -273,38 +276,40 @@ struct r300_context {
struct r300_vertex_info* vertex_info;
/* Various CSO state objects. */
+ /* Beginning of atom list. */
+ struct r300_atom atom_list;
/* Blend state. */
- struct r300_blend_state* blend_state;
+ struct r300_atom blend_state;
/* Blend color state. */
- struct r300_blend_color_state* blend_color_state;
+ struct r300_atom blend_color_state;
/* User clip planes. */
- struct pipe_clip_state clip_state;
+ struct r300_atom clip_state;
/* Shader constants. */
struct r300_constant_buffer shader_constants[PIPE_SHADER_TYPES];
/* Depth, stencil, and alpha state. */
- struct r300_dsa_state* dsa_state;
+ struct r300_atom dsa_state;
/* Fragment shader. */
struct r300_fragment_shader* fs;
/* Framebuffer state. We currently don't need our own version of this. */
struct pipe_framebuffer_state framebuffer_state;
/* Rasterizer state. */
- struct r300_rs_state* rs_state;
+ struct r300_atom rs_state;
/* RS block state. */
struct r300_rs_block* rs_block;
/* Sampler states. */
struct r300_sampler_state* sampler_states[8];
int sampler_count;
/* Scissor state. */
- struct r300_scissor_state* scissor_state;
+ struct r300_atom scissor_state;
/* Texture states. */
struct r300_texture* textures[8];
int texture_count;
/* Vertex shader. */
struct r300_vertex_shader* vs;
/* Viewport state. */
- struct r300_viewport_state* viewport_state;
+ struct r300_atom viewport_state;
/* ZTOP state. */
- struct r300_ztop_state ztop_state;
+ struct r300_atom ztop_state;
/* Vertex buffers for Gallium. */
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
@@ -317,6 +322,8 @@ struct r300_context {
uint32_t dirty_state;
/* Flag indicating whether or not the HW is dirty. */
uint32_t dirty_hw;
+ /* Whether the TCL engine should be in bypass mode. */
+ boolean tcl_bypass;
/** Combination of DBG_xxx flags */
unsigned debug;
diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index d142fee050..151f72b0fe 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -52,7 +52,7 @@
#define CS_LOCALS(context) \
struct r300_context* const cs_context_copy = (context); \
struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \
- int cs_count = 0;
+ int cs_count = 0; (void) cs_count;
#define CHECK_CS(size) \
assert(cs_winsys->check_cs(cs_winsys, (size)))
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 199ce3a945..9f93327e59 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -25,6 +25,7 @@
#include "util/u_format.h"
#include "util/u_math.h"
+#include "util/u_simple_list.h"
#include "r300_context.h"
#include "r300_cs.h"
@@ -36,11 +37,13 @@
#include "r300_texture.h"
#include "r300_vs.h"
-void r300_emit_blend_state(struct r300_context* r300,
- struct r300_blend_state* blend)
+void r300_emit_blend_state(struct r300_context* r300, void* state)
{
+ struct r300_blend_state* blend = (struct r300_blend_state*)state;
CS_LOCALS(r300);
+
BEGIN_CS(8);
+ OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop);
OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3);
if (r300->framebuffer_state.nr_cbufs) {
OUT_CS(blend->blend_control);
@@ -52,14 +55,13 @@ void r300_emit_blend_state(struct r300_context* r300,
OUT_CS(0);
/* XXX also disable fastfill here once it's supported */
}
- OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop);
OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither);
END_CS;
}
-void r300_emit_blend_color_state(struct r300_context* r300,
- struct r300_blend_color_state* bc)
+void r300_emit_blend_color_state(struct r300_context* r300, void* state)
{
+ struct r300_blend_color_state* bc = (struct r300_blend_color_state*)state;
struct r300_screen* r300screen = r300_screen(r300->context.screen);
CS_LOCALS(r300);
@@ -76,9 +78,9 @@ void r300_emit_blend_color_state(struct r300_context* r300,
}
}
-void r300_emit_clip_state(struct r300_context* r300,
- struct pipe_clip_state* clip)
+void r300_emit_clip_state(struct r300_context* r300, void* state)
{
+ struct pipe_clip_state* clip = (struct pipe_clip_state*)state;
int i;
struct r300_screen* r300screen = r300_screen(r300->context.screen);
CS_LOCALS(r300);
@@ -106,13 +108,13 @@ void r300_emit_clip_state(struct r300_context* r300,
}
-void r300_emit_dsa_state(struct r300_context* r300,
- struct r300_dsa_state* dsa)
+void r300_emit_dsa_state(struct r300_context* r300, void* state)
{
+ struct r300_dsa_state* dsa = (struct r300_dsa_state*)state;
struct r300_screen* r300screen = r300_screen(r300->context.screen);
CS_LOCALS(r300);
- BEGIN_CS(r300screen->caps->is_r500 ? 10 : 8);
+ BEGIN_CS(r300screen->caps->is_r500 ? 8 : 6);
OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function);
/* not needed since we use the 8bit alpha ref */
@@ -121,10 +123,16 @@ void r300_emit_dsa_state(struct r300_context* r300,
}*/
OUT_CS_REG_SEQ(R300_ZB_CNTL, 3);
- OUT_CS(dsa->z_buffer_control);
- OUT_CS(dsa->z_stencil_control);
+
+ if (r300->framebuffer_state.zsbuf) {
+ OUT_CS(dsa->z_buffer_control);
+ OUT_CS(dsa->z_stencil_control);
+ } else {
+ OUT_CS(0);
+ OUT_CS(0);
+ }
+
OUT_CS(dsa->stencil_ref_mask);
- OUT_CS_REG(R300_ZB_ZTOP, r300->ztop_state.z_buffer_top);
/* XXX it seems r3xx doesn't support STENCILREFMASK_BF */
if (r300screen->caps->is_r500) {
@@ -138,6 +146,8 @@ static const float * get_shader_constant(
struct rc_constant * constant,
struct r300_constant_buffer * externals)
{
+ struct r300_viewport_state* viewport =
+ (struct r300_viewport_state*)r300->viewport_state.state;
static float vec[4] = { 0.0, 0.0, 0.0, 1.0 };
struct pipe_texture *tex;
@@ -160,11 +170,31 @@ static const float * get_shader_constant(
/* Texture compare-fail value. */
/* XXX Since Gallium doesn't support GL_ARB_shadow_ambient,
- * this is always (0,0,0,0). */
+ * this is always (0,0,0,0), right? */
case RC_STATE_SHADOW_AMBIENT:
vec[3] = 0;
break;
+ case RC_STATE_R300_VIEWPORT_SCALE:
+ if (r300->tcl_bypass) {
+ vec[0] = 1;
+ vec[1] = 1;
+ vec[2] = 1;
+ } else {
+ vec[0] = viewport->xscale;
+ vec[1] = viewport->yscale;
+ vec[2] = viewport->zscale;
+ }
+ break;
+
+ case RC_STATE_R300_VIEWPORT_OFFSET:
+ if (!r300->tcl_bypass) {
+ vec[0] = viewport->xoffset;
+ vec[1] = viewport->yoffset;
+ vec[2] = viewport->zoffset;
+ }
+ break;
+
default:
debug_printf("r300: Implementation error: "
"Unknown RC_CONSTANT type %d\n", constant->u.State[0]);
@@ -283,6 +313,22 @@ void r300_emit_fs_constant_buffer(struct r300_context* r300,
END_CS;
}
+static void r300_emit_fragment_depth_config(struct r300_context* r300,
+ struct r300_fragment_shader* fs)
+{
+ CS_LOCALS(r300);
+
+ BEGIN_CS(4);
+ if (r300_fragment_shader_writes_depth(fs)) {
+ OUT_CS_REG(R300_FG_DEPTH_SRC, R300_FG_DEPTH_SRC_SHADER);
+ OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W24 | R300_W_SRC_US);
+ } else {
+ OUT_CS_REG(R300_FG_DEPTH_SRC, R300_FG_DEPTH_SRC_SCAN);
+ OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W0 | R300_W_SRC_US);
+ }
+ END_CS;
+}
+
void r500_emit_fragment_program_code(struct r300_context* r300,
struct rX00_fragment_program_code* generic_code)
{
@@ -374,8 +420,10 @@ void r300_emit_fb_state(struct r300_context* r300,
OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1);
OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] |
- r300_translate_colorformat(tex->tex.format), 0,
- RADEON_GEM_DOMAIN_VRAM, 0);
+ r300_translate_colorformat(tex->tex.format) |
+ R300_COLOR_TILE(tex->macrotile) |
+ R300_COLOR_MICROTILE(tex->microtile),
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i),
r300_translate_out_fmt(surf->format));
@@ -398,8 +446,10 @@ void r300_emit_fb_state(struct r300_context* r300,
OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format));
OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
- OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level], 0,
- RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] |
+ R300_DEPTHMACROTILE(tex->macrotile) |
+ R300_DEPTHMICROTILE(tex->microtile),
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
}
END_CS;
@@ -531,8 +581,9 @@ void r300_emit_query_end(struct r300_context* r300)
r300_emit_query_finish(r300, query);
}
-void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs)
+void r300_emit_rs_state(struct r300_context* r300, void* state)
{
+ struct r300_rs_state* rs = (struct r300_rs_state*)state;
CS_LOCALS(r300);
BEGIN_CS(22);
@@ -595,26 +646,47 @@ void r300_emit_rs_block_state(struct r300_context* r300,
END_CS;
}
-static void r300_emit_scissor_regs(struct r300_context* r300,
- struct r300_scissor_regs* scissor)
+void r300_emit_scissor_state(struct r300_context* r300, void* state)
{
+ unsigned minx, miny, maxx, maxy;
+ uint32_t top_left, bottom_right;
+ struct r300_screen* r300screen = r300_screen(r300->context.screen);
+ struct pipe_scissor_state* scissor = (struct pipe_scissor_state*)state;
CS_LOCALS(r300);
- BEGIN_CS(3);
- OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
- OUT_CS(scissor->top_left);
- OUT_CS(scissor->bottom_right);
- END_CS;
-}
+ minx = miny = 0;
+ maxx = r300->framebuffer_state.width;
+ maxy = r300->framebuffer_state.height;
-void r300_emit_scissor_state(struct r300_context* r300,
- struct r300_scissor_state* scissor)
-{
- if (r300->rs_state->rs.scissor) {
- r300_emit_scissor_regs(r300, &scissor->scissor);
+ if (((struct r300_rs_state*)r300->rs_state.state)->rs.scissor) {
+ minx = MAX2(minx, scissor->minx);
+ miny = MAX2(miny, scissor->miny);
+ maxx = MIN2(maxx, scissor->maxx);
+ maxy = MIN2(maxy, scissor->maxy);
+ }
+
+ if (r300screen->caps->is_r500) {
+ top_left =
+ (minx << R300_SCISSORS_X_SHIFT) |
+ (miny << R300_SCISSORS_Y_SHIFT);
+ bottom_right =
+ ((maxx - 1) << R300_SCISSORS_X_SHIFT) |
+ ((maxy - 1) << R300_SCISSORS_Y_SHIFT);
} else {
- r300_emit_scissor_regs(r300, &scissor->framebuffer);
+ /* Offset of 1440 in non-R500 chipsets. */
+ top_left =
+ ((minx + 1440) << R300_SCISSORS_X_SHIFT) |
+ ((miny + 1440) << R300_SCISSORS_Y_SHIFT);
+ bottom_right =
+ (((maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) |
+ (((maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT);
}
+
+ BEGIN_CS(3);
+ OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
+ OUT_CS(top_left);
+ OUT_CS(bottom_right);
+ END_CS;
}
void r300_emit_texture(struct r300_context* r300,
@@ -650,8 +722,10 @@ void r300_emit_texture(struct r300_context* r300,
OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1);
OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2);
OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1);
- OUT_CS_RELOC(tex->buffer, 0,
- RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ OUT_CS_RELOC(tex->buffer,
+ R300_TXO_MACRO_TILE(tex->macrotile) |
+ R300_TXO_MICRO_TILE(tex->microtile),
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0);
END_CS;
}
@@ -717,32 +791,6 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset)
END_CS;
}
-#if 0
-void r300_emit_draw_packet(struct r300_context* r300)
-{
- CS_LOCALS(r300);
-
- DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, "
- "vertex size %d\n", r300->vbo,
- r300->vertex_info->vinfo.size);
- /* Set the pointer to our vertex buffer. The emitted values are this:
- * PACKET3 [3D_LOAD_VBPNTR]
- * COUNT [1]
- * FORMAT [size | stride << 8]
- * OFFSET [offset into BO]
- * VBPNTR [relocated BO]
- */
- BEGIN_CS(7);
- OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3);
- OUT_CS(1);
- OUT_CS(r300->vertex_info->vinfo.size |
- (r300->vertex_info->vinfo.size << 8));
- OUT_CS(r300->vbo_offset);
- OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0);
- END_CS;
-}
-#endif
-
void r300_emit_vertex_format_state(struct r300_context* r300)
{
int i;
@@ -867,26 +915,27 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300,
END_CS;
}
-void r300_emit_viewport_state(struct r300_context* r300,
- struct r300_viewport_state* viewport)
+void r300_emit_viewport_state(struct r300_context* r300, void* state)
{
+ struct r300_viewport_state* viewport = (struct r300_viewport_state*)state;
CS_LOCALS(r300);
- BEGIN_CS(9);
- OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6);
- OUT_CS_32F(viewport->xscale);
- OUT_CS_32F(viewport->xoffset);
- OUT_CS_32F(viewport->yscale);
- OUT_CS_32F(viewport->yoffset);
- OUT_CS_32F(viewport->zscale);
- OUT_CS_32F(viewport->zoffset);
-
- if (r300->rs_state->enable_vte) {
- OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control);
- } else {
+ if (r300->tcl_bypass) {
+ BEGIN_CS(2);
OUT_CS_REG(R300_VAP_VTE_CNTL, 0);
+ END_CS;
+ } else {
+ BEGIN_CS(9);
+ OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6);
+ OUT_CS_32F(viewport->xscale);
+ OUT_CS_32F(viewport->xoffset);
+ OUT_CS_32F(viewport->yscale);
+ OUT_CS_32F(viewport->yoffset);
+ OUT_CS_32F(viewport->zscale);
+ OUT_CS_32F(viewport->zoffset);
+ OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control);
+ END_CS;
}
- END_CS;
}
void r300_emit_texture_count(struct r300_context* r300)
@@ -910,6 +959,16 @@ void r300_emit_texture_count(struct r300_context* r300)
}
+void r300_emit_ztop_state(struct r300_context* r300, void* state)
+{
+ struct r300_ztop_state* ztop = (struct r300_ztop_state*)state;
+ CS_LOCALS(r300);
+
+ BEGIN_CS(2);
+ OUT_CS_REG(R300_ZB_ZTOP, ztop->z_buffer_top);
+ END_CS;
+}
+
void r300_flush_textures(struct r300_context* r300)
{
CS_LOCALS(r300);
@@ -933,18 +992,24 @@ void r300_emit_dirty_state(struct r300_context* r300)
{
struct r300_screen* r300screen = r300_screen(r300->context.screen);
struct r300_texture* tex;
- int i, dirty_tex = 0;
+ struct r300_atom* atom;
+ unsigned i, dwords = 1024;
+ int dirty_tex = 0;
boolean invalid = FALSE;
- if (!(r300->dirty_state)) {
- return;
+ /* Check the required number of dwords against the space remaining in the
+ * current CS object. If we need more, then flush. */
+
+ foreach(atom, &r300->atom_list) {
+ if (atom->dirty || atom->always_dirty) {
+ dwords += atom->size;
+ }
}
- /* Check size of CS. */
- /* Make sure we have at least 8*1024 spare dwords. */
+ /* Make sure we have at least 2*1024 spare dwords. */
/* XXX It would be nice to know the number of dwords we really need to
* XXX emit. */
- if (!r300->winsys->check_cs(r300->winsys, 8*1024)) {
+ if (!r300->winsys->check_cs(r300->winsys, dwords)) {
r300->context.flush(&r300->context, 0, NULL);
}
@@ -984,10 +1049,12 @@ validate:
}
}
/* ...occlusion query buffer... */
- if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo,
- 0, RADEON_GEM_DOMAIN_GTT)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
+ if (r300->dirty_state & R300_NEW_QUERY) {
+ if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo,
+ 0, RADEON_GEM_DOMAIN_GTT)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ goto validate;
+ }
}
/* ...and vertex buffer. */
if (r300->vbo) {
@@ -997,7 +1064,7 @@ validate:
goto validate;
}
} else {
- // debug_printf("No VBO while emitting dirty state!\n");
+ /* debug_printf("No VBO while emitting dirty state!\n"); */
}
if (!r300->winsys->validate(r300->winsys)) {
r300->context.flush(&r300->context, 0, NULL);
@@ -1015,27 +1082,15 @@ validate:
r300->dirty_state &= ~R300_NEW_QUERY;
}
- if (r300->dirty_state & R300_NEW_BLEND) {
- r300_emit_blend_state(r300, r300->blend_state);
- r300->dirty_state &= ~R300_NEW_BLEND;
- }
-
- if (r300->dirty_state & R300_NEW_BLEND_COLOR) {
- r300_emit_blend_color_state(r300, r300->blend_color_state);
- r300->dirty_state &= ~R300_NEW_BLEND_COLOR;
- }
-
- if (r300->dirty_state & R300_NEW_CLIP) {
- r300_emit_clip_state(r300, &r300->clip_state);
- r300->dirty_state &= ~R300_NEW_CLIP;
- }
-
- if (r300->dirty_state & R300_NEW_DSA) {
- r300_emit_dsa_state(r300, r300->dsa_state);
- r300->dirty_state &= ~R300_NEW_DSA;
+ foreach(atom, &r300->atom_list) {
+ if (atom->dirty || atom->always_dirty) {
+ atom->emit(r300, atom->state);
+ atom->dirty = FALSE;
+ }
}
if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) {
+ r300_emit_fragment_depth_config(r300, r300->fs);
if (r300screen->caps->is_r500) {
r500_emit_fragment_program_code(r300, &r300->fs->shader->code);
} else {
@@ -1060,21 +1115,11 @@ validate:
r300->dirty_state &= ~R300_NEW_FRAMEBUFFERS;
}
- if (r300->dirty_state & R300_NEW_RASTERIZER) {
- r300_emit_rs_state(r300, r300->rs_state);
- r300->dirty_state &= ~R300_NEW_RASTERIZER;
- }
-
if (r300->dirty_state & R300_NEW_RS_BLOCK) {
r300_emit_rs_block_state(r300, r300->rs_block);
r300->dirty_state &= ~R300_NEW_RS_BLOCK;
}
- if (r300->dirty_state & R300_NEW_SCISSOR) {
- r300_emit_scissor_state(r300, r300->scissor_state);
- r300->dirty_state &= ~R300_NEW_SCISSOR;
- }
-
/* Samplers and textures are tracked separately but emitted together. */
if (r300->dirty_state &
(R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) {
@@ -1096,11 +1141,6 @@ validate:
r300->dirty_state &= ~(R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES);
}
- if (r300->dirty_state & R300_NEW_VIEWPORT) {
- r300_emit_viewport_state(r300, r300->viewport_state);
- r300->dirty_state &= ~R300_NEW_VIEWPORT;
- }
-
if (dirty_tex) {
r300_flush_textures(r300);
}
@@ -1129,7 +1169,7 @@ validate:
*/
/* Finally, emit the VBO. */
- //r300_emit_vertex_buffer(r300);
+ /* r300_emit_vertex_buffer(r300); */
r300->dirty_hw++;
}
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 3797d3d332..05a6bfeae8 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -31,17 +31,13 @@ struct r300_vertex_program_code;
void r300_emit_aos(struct r300_context* r300, unsigned offset);
-void r300_emit_blend_state(struct r300_context* r300,
- struct r300_blend_state* blend);
+void r300_emit_blend_state(struct r300_context* r300, void* state);
-void r300_emit_blend_color_state(struct r300_context* r300,
- struct r300_blend_color_state* bc);
+void r300_emit_blend_color_state(struct r300_context* r300, void* state);
-void r300_emit_clip_state(struct r300_context* r300,
- struct pipe_clip_state* clip);
+void r300_emit_clip_state(struct r300_context* r300, void* state);
-void r300_emit_dsa_state(struct r300_context* r300,
- struct r300_dsa_state* dsa);
+void r300_emit_dsa_state(struct r300_context* r300, void* state);
void r300_emit_fragment_program_code(struct r300_context* r300,
struct rX00_fragment_program_code* generic_code);
@@ -63,13 +59,12 @@ void r300_emit_query_begin(struct r300_context* r300,
void r300_emit_query_end(struct r300_context* r300);
-void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs);
+void r300_emit_rs_state(struct r300_context* r300, void* state);
void r300_emit_rs_block_state(struct r300_context* r300,
struct r300_rs_block* rs);
-void r300_emit_scissor_state(struct r300_context* r300,
- struct r300_scissor_state* scissor);
+void r300_emit_scissor_state(struct r300_context* r300, void* state);
void r300_emit_texture(struct r300_context* r300,
struct r300_sampler_state* sampler,
@@ -89,11 +84,12 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300,
void r300_emit_vertex_shader(struct r300_context* r300,
struct r300_vertex_shader* vs);
-void r300_emit_viewport_state(struct r300_context* r300,
- struct r300_viewport_state* viewport);
+void r300_emit_viewport_state(struct r300_context* r300, void* state);
void r300_emit_texture_count(struct r300_context* r300);
+void r300_emit_ztop_state(struct r300_context* r300, void* state);
+
void r300_flush_textures(struct r300_context* r300);
/* Emit all dirty state. */
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 14a08241fc..59819cb106 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -37,8 +37,10 @@ static void r300_flush(struct pipe_context* pipe,
{
struct r300_context *r300 = r300_context(pipe);
struct r300_query *query;
+ struct r300_atom *atom;
CS_LOCALS(r300);
+ (void) cs_count;
/* We probably need to flush Draw, but we may have been called from
* within Draw. This feels kludgy, but it might be the best thing.
*
@@ -54,7 +56,15 @@ static void r300_flush(struct pipe_context* pipe,
r300_emit_invariant_state(r300);
r300->dirty_state = R300_NEW_KITCHEN_SINK;
r300->dirty_hw = 0;
+
+ /* New kitchen sink, baby. */
+ foreach(atom, &r300->atom_list) {
+ if (atom->state) {
+ atom->dirty = TRUE;
+ }
+ }
}
+
/* reset flushed query */
foreach(query, &r300->query_list) {
query->flushed = TRUE;
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 4e1b61ca40..60ea9c171d 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -63,6 +63,11 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
fs_inputs->fog = i;
break;
+ case TGSI_SEMANTIC_POSITION:
+ assert(index == 0);
+ fs_inputs->wpos = i;
+ break;
+
default:
assert(0);
}
@@ -114,6 +119,9 @@ static void allocate_hardware_inputs(
if (inputs->fog != ATTR_UNUSED) {
allocate(mydata, inputs->fog, reg++);
}
+ if (inputs->wpos != ATTR_UNUSED) {
+ allocate(mydata, inputs->wpos, reg++);
+ }
}
static void get_compare_state(
@@ -144,6 +152,7 @@ static void r300_translate_fragment_shader(
struct r300_fragment_shader* fs = r300->fs;
struct r300_fragment_program_compiler compiler;
struct tgsi_to_rc ttr;
+ int wpos = fs->inputs.wpos;
/* Setup the compiler. */
memset(&compiler, 0, sizeof(compiler));
@@ -171,6 +180,18 @@ static void r300_translate_fragment_shader(
fs->shadow_samplers = compiler.Base.Program.ShadowSamplers;
+ /**
+ * Transform the program to support WPOS.
+ *
+ * Introduce a small fragment at the start of the program that will be
+ * the only code that directly reads the WPOS input.
+ * All other code pieces that reference that input will be rewritten
+ * to read from a newly allocated temporary. */
+ if (wpos != ATTR_UNUSED) {
+ /* Moving the input to some other reg is not really necessary. */
+ rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE);
+ }
+
/* Invoke the compiler */
r3xx_compile_fragment_program(&compiler);
if (compiler.Base.Error) {
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index d8d08fbe26..361813891f 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -1619,18 +1619,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_TX_OFFSET_5 0x4554
#define R300_TX_OFFSET_6 0x4558
#define R300_TX_OFFSET_7 0x455C
- /* BEGIN: Guess from R200 */
+
# define R300_TXO_ENDIAN_NO_SWAP (0 << 0)
# define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0)
# define R300_TXO_ENDIAN_WORD_SWAP (2 << 0)
# define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0)
-# define R300_TXO_MACRO_TILE (1 << 2)
+# define R300_TXO_MACRO_TILE_LINEAR (0 << 2)
+# define R300_TXO_MACRO_TILE_TILED (1 << 2)
+# define R300_TXO_MACRO_TILE(x) ((x) << 2)
# define R300_TXO_MICRO_TILE_LINEAR (0 << 3)
-# define R300_TXO_MICRO_TILE (1 << 3)
-# define R300_TXO_MICRO_TILE_SQUARE (2 << 3)
+# define R300_TXO_MICRO_TILE_TILED (1 << 3)
+# define R300_TXO_MICRO_TILE_TILED_SQUARE (2 << 3)
+# define R300_TXO_MICRO_TILE(x) ((x) << 3)
# define R300_TXO_OFFSET_MASK 0xffffffe0
# define R300_TXO_OFFSET_SHIFT 5
- /* END: Guess from R200 */
/* 32 bit chroma key */
#define R300_TX_CHROMA_KEY_0 0x4580
@@ -2186,6 +2188,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3)
# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3)
# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3)
+# define R500_SRC_ALPHA_0_NO_READ (1 << 30)
+# define R500_SRC_ALPHA_1_NO_READ (1 << 31)
/* the following are shared between CBLEND and ABLEND */
# define R300_FCN_MASK (3 << 12)
@@ -2281,9 +2285,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_COLORPITCH_MASK 0x00003FFE
# define R300_COLOR_TILE_DISABLE (0 << 16)
# define R300_COLOR_TILE_ENABLE (1 << 16)
+# define R300_COLOR_TILE(x) ((x) << 16)
# define R300_COLOR_MICROTILE_DISABLE (0 << 17)
# define R300_COLOR_MICROTILE_ENABLE (1 << 17)
# define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */
+# define R300_COLOR_MICROTILE(x) ((x) << 17)
# define R300_COLOR_ENDIAN_NO_SWAP (0 << 19)
# define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19)
# define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19)
@@ -2542,9 +2548,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_DEPTHPITCH_MASK 0x00003FFC
# define R300_DEPTHMACROTILE_DISABLE (0 << 16)
# define R300_DEPTHMACROTILE_ENABLE (1 << 16)
+# define R300_DEPTHMACROTILE(x) ((x) << 16)
# define R300_DEPTHMICROTILE_LINEAR (0 << 17)
# define R300_DEPTHMICROTILE_TILED (1 << 17)
# define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17)
+# define R300_DEPTHMICROTILE(x) ((x) << 17)
# define R300_DEPTHENDIAN_NO_SWAP (0 << 18)
# define R300_DEPTHENDIAN_WORD_SWAP (1 << 18)
# define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18)
@@ -2638,7 +2646,7 @@ enum {
VE_COND_MUX_GTE = 25,
VE_SET_GREATER_THAN = 26,
VE_SET_EQUAL = 27,
- VE_SET_NOT_EQUAL = 28,
+ VE_SET_NOT_EQUAL = 28
};
enum {
@@ -2672,20 +2680,20 @@ enum {
ME_PRED_SET_CLR = 25,
ME_PRED_SET_INV = 26,
ME_PRED_SET_POP = 27,
- ME_PRED_SET_RESTORE = 28,
+ ME_PRED_SET_RESTORE = 28
};
enum {
/* R3XX */
PVS_MACRO_OP_2CLK_MADD = 0,
- PVS_MACRO_OP_2CLK_M2X_ADD = 1,
+ PVS_MACRO_OP_2CLK_M2X_ADD = 1
};
enum {
PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */
PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */
PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */
- PVS_SRC_REG_ALT_TEMPORARY = 3, /* Alternate Intermediate Storage */
+ PVS_SRC_REG_ALT_TEMPORARY = 3 /* Alternate Intermediate Storage */
};
enum {
@@ -2694,7 +2702,7 @@ enum {
PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */
PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */
PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */
- PVS_DST_REG_INPUT = 5, /* Output Memory & Replicate X to all channels */
+ PVS_DST_REG_INPUT = 5 /* Output Memory & Replicate X to all channels */
};
enum {
@@ -2703,7 +2711,7 @@ enum {
PVS_SRC_SELECT_Z = 2, /* Select Z Component */
PVS_SRC_SELECT_W = 3, /* Select W Component */
PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */
- PVS_SRC_SELECT_FORCE_1 = 5, /* Force Component to 1.0 */
+ PVS_SRC_SELECT_FORCE_1 = 5 /* Force Component to 1.0 */
};
/* PVS Opcode & Destination Operand Description */
@@ -2742,7 +2750,7 @@ enum {
PVS_DST_ADDR_SEL_MASK = 0x3,
PVS_DST_ADDR_SEL_SHIFT = 29,
PVS_DST_ADDR_MODE_0_MASK = 0x1,
- PVS_DST_ADDR_MODE_0_SHIFT = 31,
+ PVS_DST_ADDR_MODE_0_SHIFT = 31
};
/* PVS Source Operand Description */
@@ -2777,7 +2785,7 @@ enum {
PVS_SRC_ADDR_SEL_MASK = 0x3,
PVS_SRC_ADDR_SEL_SHIFT = 29,
PVS_SRC_ADDR_MODE_1_MASK = 0x0,
- PVS_SRC_ADDR_MODE_1_SHIFT = 32,
+ PVS_SRC_ADDR_MODE_1_SHIFT = 32
};
/*\}*/
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 2d70ec2ac9..710d850163 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -26,6 +26,8 @@
#include "draw/draw_context.h"
#include "draw/draw_vbuf.h"
+#include "indices/u_indices.h"
+
#include "pipe/p_inlines.h"
#include "util/u_memory.h"
@@ -69,16 +71,11 @@ uint32_t r300_translate_primitive(unsigned prim)
}
}
-static boolean r300_nothing_to_draw(struct r300_context *r300)
-{
- return r300->rs_state->rs.scissor &&
- r300->scissor_state->scissor.empty_area;
-}
-
static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300,
unsigned mode)
{
- uint32_t color_control = r300->rs_state->color_control;
+ struct r300_rs_state* rs = (struct r300_rs_state*)r300->rs_state.state;
+ uint32_t color_control = rs->color_control;
/* By default (see r300_state.c:r300_create_rs_state) color_control is
* initialized to provoking the first vertex.
@@ -98,7 +95,7 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300,
* ~ C.
*/
- if (r300->rs_state->rs.flatshade_first) {
+ if (rs->rs.flatshade_first) {
switch (mode) {
case PIPE_PRIM_TRIANGLE_FAN:
color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND;
@@ -119,6 +116,44 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300,
return color_control;
}
+static void r300_emit_draw_immediate(struct r300_context *r300,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
+{
+ struct pipe_buffer* vbo = r300->vertex_buffer[0].buffer;
+ unsigned vertex_size = r300->vertex_buffer[0].stride / sizeof(float);
+ unsigned i;
+ uint32_t* map;
+ CS_LOCALS(r300);
+
+ map = (uint32_t*)pipe_buffer_map_range(r300->context.screen, vbo,
+ start * vertex_size, count * vertex_size,
+ PIPE_BUFFER_USAGE_CPU_READ);
+
+ BEGIN_CS(10 + count * vertex_size);
+ OUT_CS_REG(R300_GA_COLOR_CONTROL,
+ r300_provoking_vertex_fixes(r300, mode));
+ OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
+ OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0);
+ OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size);
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) |
+ r300_translate_primitive(mode));
+ //debug_printf("r300: Immd %d verts, %d attrs\n", count, vertex_size);
+ for (i = 0; i < count * vertex_size; i++) {
+ if (i % vertex_size == 0) {
+ //debug_printf("r300: -- vert --\n");
+ }
+ //debug_printf("r300: 0x%08x\n", *map);
+ OUT_CS(*map);
+ map++;
+ }
+ END_CS;
+
+ pipe_buffer_unmap(r300->context.screen, vbo);
+}
+
static void r300_emit_draw_arrays(struct r300_context *r300,
unsigned mode,
unsigned count)
@@ -212,43 +247,84 @@ validate:
return TRUE;
}
+static struct pipe_buffer* r300_translate_elts(struct r300_context* r300,
+ struct pipe_buffer* elts,
+ unsigned* size,
+ unsigned* mode,
+ unsigned* count)
+{
+ struct pipe_screen* screen = r300->context.screen;
+ struct pipe_buffer* new_elts;
+ void *in_map, *out_map;
+ unsigned out_prim, out_index_size, out_nr;
+ u_translate_func out_translate;
+
+ (void)u_index_translator(~0, *mode, *size, *count, PV_LAST, PV_LAST,
+ &out_prim, &out_index_size, &out_nr, &out_translate);
+
+ new_elts = screen->buffer_create(screen, 32,
+ PIPE_BUFFER_USAGE_INDEX |
+ PIPE_BUFFER_USAGE_CPU_WRITE |
+ PIPE_BUFFER_USAGE_GPU_READ,
+ out_index_size * out_nr);
+
+ in_map = pipe_buffer_map(screen, elts, PIPE_BUFFER_USAGE_CPU_READ);
+ out_map = pipe_buffer_map(screen, new_elts, PIPE_BUFFER_USAGE_CPU_WRITE);
+
+ out_translate(in_map, *count, out_map);
+
+ pipe_buffer_unmap(screen, elts);
+ pipe_buffer_unmap(screen, new_elts);
+
+ *size = out_index_size;
+ *mode = out_prim;
+ *count = out_nr;
+
+ return new_elts;
+}
+
/* This is the fast-path drawing & emission for HW TCL. */
-boolean r300_draw_range_elements(struct pipe_context* pipe,
- struct pipe_buffer* indexBuffer,
- unsigned indexSize,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count)
+void r300_draw_range_elements(struct pipe_context* pipe,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
{
struct r300_context* r300 = r300_context(pipe);
+ struct pipe_buffer* orgIndexBuffer = indexBuffer;
if (!u_trim_pipe_prim(mode, &count)) {
- return FALSE;
+ return;
}
if (count > 65535) {
- return FALSE;
- }
-
- if (r300_nothing_to_draw(r300)) {
- return TRUE;
+ /* XXX: use aux/indices functions to split this into smaller
+ * primitives.
+ */
+ return;
}
r300_update_derived_state(r300);
if (!r300_setup_vertex_buffers(r300)) {
- return FALSE;
+ return;
+ }
+
+ if (indexSize == 1) {
+ indexBuffer = r300_translate_elts(r300, indexBuffer,
+ &indexSize, &mode, &count);
}
if (!r300->winsys->add_buffer(r300->winsys, indexBuffer,
RADEON_GEM_DOMAIN_GTT, 0)) {
- return FALSE;
+ goto cleanup;
}
if (!r300->winsys->validate(r300->winsys)) {
- return FALSE;
+ goto cleanup;
}
r300_emit_dirty_state(r300);
@@ -258,49 +334,52 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex,
mode, start, count);
- return TRUE;
+cleanup:
+ if (indexBuffer != orgIndexBuffer) {
+ pipe->screen->buffer_destroy(indexBuffer);
+ }
}
/* Simple helpers for context setup. Should probably be moved to util. */
-boolean r300_draw_elements(struct pipe_context* pipe,
- struct pipe_buffer* indexBuffer,
- unsigned indexSize, unsigned mode,
- unsigned start, unsigned count)
+void r300_draw_elements(struct pipe_context* pipe,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize, unsigned mode,
+ unsigned start, unsigned count)
{
- return pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0,
- mode, start, count);
+ pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0,
+ mode, start, count);
}
-boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
- unsigned start, unsigned count)
+void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
+ unsigned start, unsigned count)
{
struct r300_context* r300 = r300_context(pipe);
if (!u_trim_pipe_prim(mode, &count)) {
- return FALSE;
+ return;
}
if (count > 65535) {
- return FALSE;
- }
-
- if (r300_nothing_to_draw(r300)) {
- return TRUE;
+ /* XXX: driver needs to handle this -- use the functions in
+ * aux/indices to split this into several smaller primitives.
+ */
+ return;
}
r300_update_derived_state(r300);
if (!r300_setup_vertex_buffers(r300)) {
- return FALSE;
+ return;
}
r300_emit_dirty_state(r300);
- r300_emit_aos(r300, start);
-
- r300_emit_draw_arrays(r300, mode, count);
-
- return TRUE;
+ if (FALSE && count <= 4 && r300->vertex_buffer_count == 1) {
+ r300_emit_draw_immediate(r300, mode, start, count);
+ } else {
+ r300_emit_aos(r300, start);
+ r300_emit_draw_arrays(r300, mode, count);
+ }
}
/****************************************************************************
@@ -309,7 +388,7 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
***************************************************************************/
/* SW TCL arrays, using Draw. */
-boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
+void r300_swtcl_draw_arrays(struct pipe_context* pipe,
unsigned mode,
unsigned start,
unsigned count)
@@ -318,11 +397,7 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
int i;
if (!u_trim_pipe_prim(mode, &count)) {
- return FALSE;
- }
-
- if (r300_nothing_to_draw(r300)) {
- return TRUE;
+ return;
}
for (i = 0; i < r300->vertex_buffer_count; i++) {
@@ -335,8 +410,9 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
draw_set_mapped_element_buffer(r300->draw, 0, NULL);
draw_set_mapped_constant_buffer(r300->draw,
- r300->shader_constants[PIPE_SHADER_VERTEX].constants,
- r300->shader_constants[PIPE_SHADER_VERTEX].count *
+ PIPE_SHADER_VERTEX,
+ r300->shader_constants[PIPE_SHADER_VERTEX].constants,
+ r300->shader_constants[PIPE_SHADER_VERTEX].count *
(sizeof(float) * 4));
draw_arrays(r300->draw, mode, start, count);
@@ -345,12 +421,10 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
pipe_buffer_unmap(pipe->screen, r300->vertex_buffer[i].buffer);
draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
}
-
- return TRUE;
}
/* SW TCL elements, using Draw. */
-boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
+void r300_swtcl_draw_range_elements(struct pipe_context* pipe,
struct pipe_buffer* indexBuffer,
unsigned indexSize,
unsigned minIndex,
@@ -361,13 +435,10 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
{
struct r300_context* r300 = r300_context(pipe);
int i;
+ void* indices;
if (!u_trim_pipe_prim(mode, &count)) {
- return FALSE;
- }
-
- if (r300_nothing_to_draw(r300)) {
- return TRUE;
+ return;
}
for (i = 0; i < r300->vertex_buffer_count; i++) {
@@ -377,12 +448,13 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
draw_set_mapped_vertex_buffer(r300->draw, i, buf);
}
- void* indices = pipe_buffer_map(pipe->screen, indexBuffer,
- PIPE_BUFFER_USAGE_CPU_READ);
+ indices = pipe_buffer_map(pipe->screen, indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_element_buffer_range(r300->draw, indexSize,
minIndex, maxIndex, indices);
draw_set_mapped_constant_buffer(r300->draw,
+ PIPE_SHADER_VERTEX,
r300->shader_constants[PIPE_SHADER_VERTEX].constants,
r300->shader_constants[PIPE_SHADER_VERTEX].count *
(sizeof(float) * 4));
@@ -397,8 +469,6 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
pipe_buffer_unmap(pipe->screen, indexBuffer);
draw_set_mapped_element_buffer_range(r300->draw, 0, start,
start + count - 1, NULL);
-
- return TRUE;
}
/* Object for rendering using Draw. */
@@ -474,7 +544,7 @@ static void* r300_render_map_vertices(struct vbuf_render* render)
r300render->vbo_ptr = pipe_buffer_map(screen, r300render->vbo,
PIPE_BUFFER_USAGE_CPU_WRITE);
- return (r300render->vbo_ptr + r300render->vbo_offset);
+ return ((uint8_t*)r300render->vbo_ptr + r300render->vbo_offset);
}
static void r300_render_unmap_vertices(struct vbuf_render* render,
diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h
index da83069083..27b5e6a963 100644
--- a/src/gallium/drivers/r300/r300_render.h
+++ b/src/gallium/drivers/r300/r300_render.h
@@ -25,35 +25,35 @@
uint32_t r300_translate_primitive(unsigned prim);
-boolean r300_draw_range_elements(struct pipe_context* pipe,
- struct pipe_buffer* indexBuffer,
- unsigned indexSize,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count);
-
-boolean r300_draw_elements(struct pipe_context* pipe,
- struct pipe_buffer* indexBuffer,
- unsigned indexSize, unsigned mode,
- unsigned start, unsigned count);
-
-boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
- unsigned start, unsigned count);
-
-boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
- unsigned mode,
- unsigned start,
- unsigned count);
-
-boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
- struct pipe_buffer* indexBuffer,
- unsigned indexSize,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count);
+void r300_draw_range_elements(struct pipe_context* pipe,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count);
+
+void r300_draw_elements(struct pipe_context* pipe,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize, unsigned mode,
+ unsigned start, unsigned count);
+
+void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
+ unsigned start, unsigned count);
+
+void r300_swtcl_draw_arrays(struct pipe_context* pipe,
+ unsigned mode,
+ unsigned start,
+ unsigned count);
+
+void r300_swtcl_draw_range_elements(struct pipe_context* pipe,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count);
#endif /* R300_RENDER_H */
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 2a8667d483..287664b1d2 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -83,6 +83,7 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
switch (param) {
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
/* XXX I'm told this goes up to 16 */
return 8;
case PIPE_CAP_NPOT_TEXTURES:
@@ -143,9 +144,11 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
return 1;
case PIPE_CAP_SM3:
- return 1;
- case PIPE_CAP_MAX_COMBINED_SAMPLERS:
- return 8;
+ if (r300screen->caps->is_r500) {
+ return 1;
+ } else {
+ return 0;
+ }
default:
debug_printf("r300: Implementation error: Bad param %d\n",
param);
diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h
index 85184e2cfd..6796841b29 100644
--- a/src/gallium/drivers/r300/r300_shader_semantics.h
+++ b/src/gallium/drivers/r300/r300_shader_semantics.h
@@ -40,6 +40,7 @@ struct r300_shader_semantics {
int bcolor[ATTR_COLOR_COUNT];
int generic[ATTR_GENERIC_COUNT];
int fog;
+ int wpos;
};
static INLINE void r300_shader_semantics_reset(
@@ -50,6 +51,7 @@ static INLINE void r300_shader_semantics_reset(
info->pos = ATTR_UNUSED;
info->psize = ATTR_UNUSED;
info->fog = ATTR_UNUSED;
+ info->wpos = ATTR_UNUSED;
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
info->color[i] = ATTR_UNUSED;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 541b0abc9c..60ad763cf4 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -1,5 +1,6 @@
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -41,6 +42,120 @@
/* r300_state: Functions used to intialize state context by translating
* Gallium state objects into semi-native r300 state objects. */
+static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_ALPHA == 0, and the following state is set, the colorbuffer
+ * will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_SRC_COLOR ||
+ srcA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_alpha_1(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_ALPHA == 1, and the following state is set, the colorbuffer
+ * will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_color_0(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_COLOR == (0,0,0), and the following state is set, the colorbuffer
+ * will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_color_1(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_COLOR == (1,1,1), and the following state is set, the colorbuffer
+ * will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_alpha_color_0(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_ALPHA_COLOR == (0,0,0,0), and the following state is set,
+ * the colorbuffer will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
+ srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_SRC_COLOR ||
+ srcA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_alpha_color_1(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_ALPHA_COLOR == (1,1,1,1), and the following state is set,
+ * the colorbuffer will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
+ dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ONE);
+}
+
/* Create a new blend state based on the CSO blend state.
*
* This encompasses alpha blending, logic/raster ops, and blend dithering. */
@@ -66,7 +181,11 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) |
( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT);
- /* optimization: some operations do not require the destination color */
+ /* Optimization: some operations do not require the destination color.
+ *
+ * When SRC_ALPHA_SATURATE is used, colorbuffer reads must be enabled,
+ * otherwise blending gives incorrect results. It seems to be
+ * a hardware bug. */
if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN ||
eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX ||
dstRGB != PIPE_BLENDFACTOR_ZERO ||
@@ -78,11 +197,81 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
srcA == PIPE_BLENDFACTOR_DST_COLOR ||
srcA == PIPE_BLENDFACTOR_DST_ALPHA ||
srcA == PIPE_BLENDFACTOR_INV_DST_COLOR ||
- srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA)
+ srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) {
+ /* Enable reading from the colorbuffer. */
blend->blend_control |= R300_READ_ENABLE;
- /* XXX implement the optimization with DISCARD_SRC_PIXELS*/
- /* XXX implement the optimization with SRC_ALPHA_?_NO_READ */
+ if (r300_screen(r300_context(pipe)->context.screen)->caps->is_r500) {
+ /* Optimization: Depending on incoming pixels, we can
+ * conditionally disable the reading in hardware... */
+ if (eqRGB != PIPE_BLEND_MIN && eqA != PIPE_BLEND_MIN &&
+ eqRGB != PIPE_BLEND_MAX && eqA != PIPE_BLEND_MAX) {
+ /* Disable reading if SRC_ALPHA == 0. */
+ if ((dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ZERO)) {
+ blend->blend_control |= R500_SRC_ALPHA_0_NO_READ;
+ }
+
+ /* Disable reading if SRC_ALPHA == 1. */
+ if ((dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ZERO)) {
+ blend->blend_control |= R500_SRC_ALPHA_1_NO_READ;
+ }
+ }
+ }
+ }
+
+ /* Optimization: discard pixels which don't change the colorbuffer.
+ *
+ * The code below is non-trivial and some math is involved.
+ *
+ * Discarding pixels must be disabled when FP16 AA is enabled.
+ * This is a hardware bug. Also, this implementation wouldn't work
+ * with FP blending enabled and equation clamping disabled.
+ *
+ * Equations other than ADD are rarely used and therefore won't be
+ * optimized. */
+ if ((eqRGB == PIPE_BLEND_ADD || eqRGB == PIPE_BLEND_REVERSE_SUBTRACT) &&
+ (eqA == PIPE_BLEND_ADD || eqA == PIPE_BLEND_REVERSE_SUBTRACT)) {
+ /* ADD: X+Y
+ * REVERSE_SUBTRACT: Y-X
+ *
+ * The idea is:
+ * If X = src*srcFactor = 0 and Y = dst*dstFactor = 1,
+ * then CB will not be changed.
+ *
+ * Given the srcFactor and dstFactor variables, we can derive
+ * what src and dst should be equal to and discard appropriate
+ * pixels.
+ */
+ if (blend_discard_if_src_alpha_0(srcRGB, srcA, dstRGB, dstA)) {
+ blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0;
+ } else if (blend_discard_if_src_alpha_1(srcRGB, srcA,
+ dstRGB, dstA)) {
+ blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1;
+ } else if (blend_discard_if_src_color_0(srcRGB, srcA,
+ dstRGB, dstA)) {
+ blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0;
+ } else if (blend_discard_if_src_color_1(srcRGB, srcA,
+ dstRGB, dstA)) {
+ blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1;
+ } else if (blend_discard_if_src_alpha_color_0(srcRGB, srcA,
+ dstRGB, dstA)) {
+ blend->blend_control |=
+ R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0;
+ } else if (blend_discard_if_src_alpha_color_1(srcRGB, srcA,
+ dstRGB, dstA)) {
+ blend->blend_control |=
+ R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1;
+ }
+ }
/* separate alpha */
if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
@@ -128,8 +317,8 @@ static void r300_bind_blend_state(struct pipe_context* pipe,
{
struct r300_context* r300 = r300_context(pipe);
- r300->blend_state = (struct r300_blend_state*)state;
- r300->dirty_state |= R300_NEW_BLEND;
+ r300->blend_state.state = state;
+ r300->blend_state.dirty = TRUE;
}
/* Free blend state. */
@@ -151,20 +340,24 @@ static void r300_set_blend_color(struct pipe_context* pipe,
const struct pipe_blend_color* color)
{
struct r300_context* r300 = r300_context(pipe);
+ struct r300_screen* r300screen = r300_screen(pipe->screen);
+ struct r300_blend_color_state* state =
+ (struct r300_blend_color_state*)r300->blend_color_state.state;
union util_color uc;
util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc);
- r300->blend_color_state->blend_color = uc.ui;
+ state->blend_color = uc.ui;
/* XXX if FP16 blending is enabled, we should use the FP16 format */
- r300->blend_color_state->blend_color_red_alpha =
+ state->blend_color_red_alpha =
float_to_fixed10(color->color[0]) |
(float_to_fixed10(color->color[3]) << 16);
- r300->blend_color_state->blend_color_green_blue =
+ state->blend_color_green_blue =
float_to_fixed10(color->color[2]) |
(float_to_fixed10(color->color[1]) << 16);
- r300->dirty_state |= R300_NEW_BLEND_COLOR;
+ r300->blend_color_state.size = r300screen->caps->is_r500 ? 3 : 2;
+ r300->blend_color_state.dirty = TRUE;
}
static void r300_set_clip_state(struct pipe_context* pipe,
@@ -173,12 +366,15 @@ static void r300_set_clip_state(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
if (r300_screen(pipe->screen)->caps->has_tcl) {
- r300->clip_state = *state;
- r300->dirty_state |= R300_NEW_CLIP;
+ memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state));
+ r300->clip_state.size = 29;
} else {
draw_flush(r300->draw);
draw_set_clip_state(r300->draw, state);
+ r300->clip_state.size = 2;
}
+
+ r300->clip_state.dirty = TRUE;
}
/* Create a new depth, stencil, and alpha state based on the CSO dsa state.
@@ -271,9 +467,11 @@ static void r300_bind_dsa_state(struct pipe_context* pipe,
void* state)
{
struct r300_context* r300 = r300_context(pipe);
+ struct r300_screen* r300screen = r300_screen(pipe->screen);
- r300->dsa_state = (struct r300_dsa_state*)state;
- r300->dirty_state |= R300_NEW_DSA;
+ r300->dsa_state.state = state;
+ r300->dsa_state.size = r300screen->caps->is_r500 ? 8 : 6;
+ r300->dsa_state.dirty = TRUE;
}
/* Free DSA state. */
@@ -283,37 +481,11 @@ static void r300_delete_dsa_state(struct pipe_context* pipe,
FREE(state);
}
-static void r300_set_scissor_regs(const struct pipe_scissor_state* state,
- struct r300_scissor_regs *scissor,
- boolean is_r500)
-{
- if (is_r500) {
- scissor->top_left =
- (state->minx << R300_SCISSORS_X_SHIFT) |
- (state->miny << R300_SCISSORS_Y_SHIFT);
- scissor->bottom_right =
- ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) |
- ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT);
- } else {
- /* Offset of 1440 in non-R500 chipsets. */
- scissor->top_left =
- ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) |
- ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT);
- scissor->bottom_right =
- (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) |
- (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT);
- }
-
- scissor->empty_area = state->minx >= state->maxx ||
- state->miny >= state->maxy;
-}
-
static void
r300_set_framebuffer_state(struct pipe_context* pipe,
const struct pipe_framebuffer_state* state)
{
struct r300_context* r300 = r300_context(pipe);
- struct pipe_scissor_state scissor;
if (r300->draw) {
draw_flush(r300->draw);
@@ -321,18 +493,12 @@ static void
r300->framebuffer_state = *state;
- scissor.minx = scissor.miny = 0;
- scissor.maxx = state->width;
- scissor.maxy = state->height;
- r300_set_scissor_regs(&scissor, &r300->scissor_state->framebuffer,
- r300_screen(r300->context.screen)->caps->is_r500);
-
/* Don't rely on the order of states being set for the first time. */
- if (!r300->rs_state || !r300->rs_state->rs.scissor) {
- r300->dirty_state |= R300_NEW_SCISSOR;
- }
r300->dirty_state |= R300_NEW_FRAMEBUFFERS;
- r300->dirty_state |= R300_NEW_BLEND;
+
+ r300->blend_state.dirty = TRUE;
+ r300->dsa_state.dirty = TRUE;
+ r300->scissor_state.dirty = TRUE;
}
/* Create fragment shader state. */
@@ -367,6 +533,10 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
r300->fs = fs;
r300_pick_fragment_shader(r300);
+ if (r300->vs && r300_vertex_shader_setup_wpos(r300)) {
+ r300->dirty_state |= R300_NEW_VERTEX_FORMAT;
+ }
+
r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS;
}
@@ -407,8 +577,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
/* Copy rasterizer state for Draw. */
rs->rs = *state;
- rs->enable_vte = !state->bypass_vs_clip_and_viewport;
-
#ifdef PIPE_ARCH_LITTLE_ENDIAN
rs->vap_control_status = R300_VC_NO_SWAP;
#else
@@ -524,12 +692,23 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
draw_set_rasterizer_state(r300->draw, &rs->rs);
}
- r300->rs_state = rs;
+ if (rs) {
+ r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport;
+ } else {
+ r300->tcl_bypass = FALSE;
+ }
+
+ r300->rs_state.state = rs;
+ r300->rs_state.dirty = TRUE;
+ /* XXX Why is this still needed, dammit!? */
+ r300->scissor_state.dirty = TRUE;
+ r300->viewport_state.dirty = TRUE;
+
/* XXX Clean these up when we move to atom emits */
- r300->dirty_state |= R300_NEW_RASTERIZER;
r300->dirty_state |= R300_NEW_RS_BLOCK;
- r300->dirty_state |= R300_NEW_SCISSOR;
- r300->dirty_state |= R300_NEW_VIEWPORT;
+ if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) {
+ r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+ }
}
/* Free rasterizer state. */
@@ -556,7 +735,8 @@ static void*
sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter,
state->mag_img_filter,
- state->min_mip_filter);
+ state->min_mip_filter,
+ state->max_anisotropy > 1.0);
/* Unfortunately, r300-r500 don't support floating-point mipmap lods. */
/* We must pass these to the emit function to clamp them properly. */
@@ -664,49 +844,51 @@ static void r300_set_scissor_state(struct pipe_context* pipe,
{
struct r300_context* r300 = r300_context(pipe);
- r300_set_scissor_regs(state, &r300->scissor_state->scissor,
- r300_screen(r300->context.screen)->caps->is_r500);
+ memcpy(r300->scissor_state.state, state,
+ sizeof(struct pipe_scissor_state));
- /* Don't rely on the order of states being set for the first time. */
- if (!r300->rs_state || r300->rs_state->rs.scissor) {
- r300->dirty_state |= R300_NEW_SCISSOR;
- }
+ r300->scissor_state.dirty = TRUE;
}
static void r300_set_viewport_state(struct pipe_context* pipe,
const struct pipe_viewport_state* state)
{
struct r300_context* r300 = r300_context(pipe);
+ struct r300_viewport_state* viewport =
+ (struct r300_viewport_state*)r300->viewport_state.state;
/* Do the transform in HW. */
- r300->viewport_state->vte_control = R300_VTX_W0_FMT;
+ viewport->vte_control = R300_VTX_W0_FMT;
if (state->scale[0] != 1.0f) {
- r300->viewport_state->xscale = state->scale[0];
- r300->viewport_state->vte_control |= R300_VPORT_X_SCALE_ENA;
+ viewport->xscale = state->scale[0];
+ viewport->vte_control |= R300_VPORT_X_SCALE_ENA;
}
if (state->scale[1] != 1.0f) {
- r300->viewport_state->yscale = state->scale[1];
- r300->viewport_state->vte_control |= R300_VPORT_Y_SCALE_ENA;
+ viewport->yscale = state->scale[1];
+ viewport->vte_control |= R300_VPORT_Y_SCALE_ENA;
}
if (state->scale[2] != 1.0f) {
- r300->viewport_state->zscale = state->scale[2];
- r300->viewport_state->vte_control |= R300_VPORT_Z_SCALE_ENA;
+ viewport->zscale = state->scale[2];
+ viewport->vte_control |= R300_VPORT_Z_SCALE_ENA;
}
if (state->translate[0] != 0.0f) {
- r300->viewport_state->xoffset = state->translate[0];
- r300->viewport_state->vte_control |= R300_VPORT_X_OFFSET_ENA;
+ viewport->xoffset = state->translate[0];
+ viewport->vte_control |= R300_VPORT_X_OFFSET_ENA;
}
if (state->translate[1] != 0.0f) {
- r300->viewport_state->yoffset = state->translate[1];
- r300->viewport_state->vte_control |= R300_VPORT_Y_OFFSET_ENA;
+ viewport->yoffset = state->translate[1];
+ viewport->vte_control |= R300_VPORT_Y_OFFSET_ENA;
}
if (state->translate[2] != 0.0f) {
- r300->viewport_state->zoffset = state->translate[2];
- r300->viewport_state->vte_control |= R300_VPORT_Z_OFFSET_ENA;
+ viewport->zoffset = state->translate[2];
+ viewport->vte_control |= R300_VPORT_Z_OFFSET_ENA;
}
- r300->dirty_state |= R300_NEW_VIEWPORT;
+ r300->viewport_state.dirty = TRUE;
+ if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) {
+ r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+ }
}
static void r300_set_vertex_buffers(struct pipe_context* pipe,
@@ -778,7 +960,13 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
}
r300->vs = vs;
- r300->dirty_state |= R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS;
+ if (r300->fs) {
+ r300_vertex_shader_setup_wpos(r300);
+ }
+
+ r300->dirty_state |=
+ R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS |
+ R300_NEW_VERTEX_FORMAT;
} else {
draw_flush(r300->draw);
draw_bind_vertex_shader(r300->draw,
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 29bc701a86..192846411b 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -71,9 +71,9 @@ static void r300_draw_emit_attrib(struct r300_context* r300,
struct tgsi_shader_info* info = &r300->vs->info;
int output;
- output = draw_find_vs_output(r300->draw,
- info->output_semantic_name[index],
- info->output_semantic_index[index]);
+ output = draw_find_shader_output(r300->draw,
+ info->output_semantic_name[index],
+ info->output_semantic_index[index]);
draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output);
}
@@ -139,10 +139,10 @@ static void r300_vertex_psc(struct r300_context* r300)
/* If TCL is bypassed, map vertex streams to equivalent VS output
* locations. */
- if (r300->rs_state->enable_vte) {
- stream_tab = identity;
- } else {
+ if (r300->tcl_bypass) {
stream_tab = r300->vs->stream_loc_notcl;
+ } else {
+ stream_tab = identity;
}
/* Vertex shaders have no semantics on their inputs,
@@ -333,6 +333,8 @@ static void r300_update_rs_block(struct r300_context* r300,
void (*rX00_rs_col_write)(struct r300_rs_block*, int, int);
void (*rX00_rs_tex)(struct r300_rs_block*, int, int, boolean);
void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int);
+ boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
+ vs_outputs->bcolor[1] != ATTR_UNUSED;
if (r300_screen(r300->context.screen)->caps->is_r500) {
rX00_rs_col = r500_rs_col;
@@ -348,7 +350,7 @@ static void r300_update_rs_block(struct r300_context* r300,
/* Rasterize colors. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
- if (vs_outputs->color[i] != ATTR_UNUSED) {
+ if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) {
/* Always rasterize if it's written by the VS,
* otherwise it locks up. */
rX00_rs_col(rs, col_count, i, FALSE);
@@ -410,6 +412,16 @@ static void r300_update_rs_block(struct r300_context* r300,
}
}
+ /* Rasterize WPOS. */
+ /* If the FS doesn't need it, it's not written by the VS. */
+ if (fs_inputs->wpos != ATTR_UNUSED) {
+ rX00_rs_tex(rs, tex_count, tex_count, FALSE);
+ rX00_rs_tex_write(rs, tex_count, fp_offset);
+
+ fp_offset++;
+ tex_count++;
+ }
+
/* Rasterize at least one color, or bad things happen. */
if (col_count == 0 && tex_count == 0) {
rX00_rs_col(rs, 0, 0, TRUE);
@@ -496,7 +508,8 @@ static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa)
static void r300_update_ztop(struct r300_context* r300)
{
- r300->ztop_state.z_buffer_top = R300_ZTOP_ENABLE;
+ struct r300_ztop_state* ztop_state =
+ (struct r300_ztop_state*)r300->ztop_state.state;
/* This is important enough that I felt it warranted a comment.
*
@@ -518,31 +531,37 @@ static void r300_update_ztop(struct r300_context* r300)
* 5) Depth writes in fragment shader
* 6) Outstanding occlusion queries
*
+ * This register causes stalls all the way from SC to CB when changed,
+ * but it is buffered on-chip so it does not hurt to write it if it has
+ * not changed.
+ *
* ~C.
*/
/* ZS writes */
- if (r300_dsa_writes_depth_stencil(r300->dsa_state) &&
- (r300_dsa_alpha_test_enabled(r300->dsa_state) || /* (1) */
- r300->fs->info.uses_kill)) { /* (2) */
- r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
- } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */
- r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
- } else if (r300->query_current) { /* (6) */
- r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
+ if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) &&
+ (r300_dsa_alpha_test_enabled(r300->dsa_state.state) ||/* (1) */
+ r300->fs->info.uses_kill)) { /* (2) */
+ ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
+ } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */
+ ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
+ } else if (r300->query_current) { /* (6) */
+ ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
+ } else {
+ ztop_state->z_buffer_top = R300_ZTOP_ENABLE;
}
+
+ r300->ztop_state.dirty = TRUE;
}
void r300_update_derived_state(struct r300_context* r300)
{
+ /* XXX */
if (r300->dirty_state &
(R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER |
- R300_NEW_VERTEX_FORMAT)) {
+ R300_NEW_VERTEX_FORMAT) || r300->rs_state.dirty) {
r300_update_derived_shader_state(r300);
}
- if (r300->dirty_state &
- (R300_NEW_DSA | R300_NEW_FRAGMENT_SHADER | R300_NEW_QUERY)) {
- r300_update_ztop(r300);
- }
+ r300_update_ztop(r300);
}
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index dbe42edd91..35be00e1b0 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -257,38 +257,37 @@ static INLINE uint32_t r300_translate_wrap(int wrap)
}
}
-static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip)
+static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip,
+ int is_anisotropic)
{
uint32_t retval = 0;
- switch (min) {
+ if (is_anisotropic)
+ retval |= R300_TX_MIN_FILTER_ANISO | R300_TX_MAG_FILTER_ANISO;
+ else {
+ switch (min) {
case PIPE_TEX_FILTER_NEAREST:
retval |= R300_TX_MIN_FILTER_NEAREST;
break;
case PIPE_TEX_FILTER_LINEAR:
retval |= R300_TX_MIN_FILTER_LINEAR;
break;
- case PIPE_TEX_FILTER_ANISO:
- retval |= R300_TX_MIN_FILTER_ANISO;
- break;
default:
debug_printf("r300: Unknown texture filter %d\n", min);
assert(0);
break;
- }
- switch (mag) {
+ }
+ switch (mag) {
case PIPE_TEX_FILTER_NEAREST:
retval |= R300_TX_MAG_FILTER_NEAREST;
break;
case PIPE_TEX_FILTER_LINEAR:
retval |= R300_TX_MAG_FILTER_LINEAR;
break;
- case PIPE_TEX_FILTER_ANISO:
- retval |= R300_TX_MAG_FILTER_ANISO;
- break;
default:
debug_printf("r300: Unknown texture filter %d\n", mag);
assert(0);
break;
+ }
}
switch (mip) {
case PIPE_TEX_MIPFILTER_NONE:
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index bcd4c030f9..b0f309695c 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -43,7 +43,7 @@ void r300_emit_invariant_state(struct r300_context* r300)
struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
CS_LOCALS(r300);
- BEGIN_CS(20 + (caps->has_tcl ? 2: 0));
+ BEGIN_CS(16 + (caps->has_tcl ? 2: 0));
/*** Graphics Backend (GB) ***/
/* Various GB enables */
@@ -66,8 +66,6 @@ void r300_emit_invariant_state(struct r300_context* r300)
OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0);
OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0);
OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0);
- OUT_CS_REG(R300_FG_DEPTH_SRC, 0x0);
- OUT_CS_REG(R300_US_W_FMT, 0x0);
/*** VAP ***/
/* Sign/normalize control */
@@ -117,10 +115,12 @@ void r300_emit_invariant_state(struct r300_context* r300)
OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C);
OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525);
OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000);
- if (caps->is_r500) {
- OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000);
- OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF);
+
+ if (caps->family >= CHIP_FAMILY_RV350) {
+ OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101);
+ OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE);
}
+
OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000);
OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000);
OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000);
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 9a96206a4d..a9bbdd56d8 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -30,6 +30,18 @@
#include "r300_texture.h"
#include "r300_screen.h"
+#define TILE_WIDTH 0
+#define TILE_HEIGHT 1
+
+static const unsigned microblock_table[5][3][2] = {
+ /*linear tiled square-tiled */
+ {{32, 1}, {8, 4}, {0, 0}}, /* 8 bits per pixel */
+ {{16, 1}, {8, 2}, {4, 4}}, /* 16 bits per pixel */
+ {{ 8, 1}, {4, 2}, {0, 0}}, /* 32 bits per pixel */
+ {{ 4, 1}, {0, 0}, {2, 2}}, /* 64 bits per pixel */
+ {{ 2, 1}, {0, 0}, {0, 0}} /* 128 bits per pixel */
+};
+
static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500)
{
struct r300_texture_state* state = &tex->state;
@@ -92,33 +104,67 @@ unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level,
}
/**
+ * Return the width (dim==TILE_WIDTH) or height (dim==TILE_HEIGHT) of one tile
+ * of the given texture.
+ */
+static unsigned r300_texture_get_tile_size(struct r300_texture* tex, int dim)
+{
+ unsigned pixsize, tile_size;
+
+ pixsize = util_format_get_blocksize(tex->tex.format);
+ tile_size = microblock_table[util_logbase2(pixsize)][tex->microtile][dim] *
+ (tex->macrotile == R300_BUFFER_TILED ? 8 : 1);
+
+ assert(tile_size);
+ return tile_size;
+}
+
+/**
* Return the stride, in bytes, of the texture images of the given texture
* at the given level.
*/
unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level)
{
+ unsigned tile_width, width;
+
if (tex->stride_override)
return tex->stride_override;
+ /* Check the level. */
if (level > tex->tex.last_level) {
debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__,
level, tex->tex.last_level);
return 0;
}
- return align(util_format_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32);
+ tile_width = r300_texture_get_tile_size(tex, TILE_WIDTH);
+ width = align(u_minify(tex->tex.width0, level), tile_width);
+
+ /* Should already be aligned except for S3TC. */
+ return align(util_format_get_stride(tex->tex.format, width), 32);
+}
+
+static unsigned r300_texture_get_nblocksy(struct r300_texture* tex,
+ unsigned level)
+{
+ unsigned height, tile_height;
+
+ tile_height = r300_texture_get_tile_size(tex, TILE_HEIGHT);
+ height = align(u_minify(tex->tex.height0, level), tile_height);
+
+ return util_format_get_nblocksy(tex->tex.format, height);
}
static void r300_setup_miptree(struct r300_texture* tex)
{
struct pipe_texture* base = &tex->tex;
- int stride, size, layer_size;
- int i;
+ unsigned stride, size, layer_size, nblocksy, i;
- for (i = 0; i <= base->last_level; i++) {
- unsigned nblocksy = util_format_get_nblocksy(base->format, u_minify(base->height0, i));
+ debug_printf("r300: Making miptree for texture, format %s\n", pf_name(base->format));
+ for (i = 0; i <= base->last_level; i++) {
stride = r300_texture_get_stride(tex, i);
+ nblocksy = r300_texture_get_nblocksy(tex, i);
layer_size = stride * nblocksy;
if (base->target == PIPE_TEXTURE_CUBE)
@@ -132,9 +178,9 @@ static void r300_setup_miptree(struct r300_texture* tex)
tex->pitch[i] = stride / util_format_get_blocksize(base->format);
debug_printf("r300: Texture miptree: Level %d "
- "(%dx%dx%d px, pitch %d bytes)\n",
+ "(%dx%dx%d px, pitch %d bytes) %d bytes total\n",
i, u_minify(base->width0, i), u_minify(base->height0, i),
- u_minify(base->depth0, i), stride);
+ u_minify(base->depth0, i), stride, tex->size);
}
}
@@ -163,7 +209,7 @@ static struct pipe_texture*
r300_setup_miptree(tex);
r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500);
- tex->buffer = screen->buffer_create(screen, 1024,
+ tex->buffer = screen->buffer_create(screen, 2048,
PIPE_BUFFER_USAGE_PIXEL,
tex->size);
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 096cdb20bb..a792c2cf98 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -120,7 +120,7 @@ static unsigned translate_opcode(unsigned opcode)
/* case TGSI_OPCODE_NOT: return RC_OPCODE_NOT; */
/* case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC; */
/* case TGSI_OPCODE_SHL: return RC_OPCODE_SHL; */
- /* case TGSI_OPCODE_SHR: return RC_OPCODE_SHR; */
+ /* case TGSI_OPCODE_ISHR: return RC_OPCODE_SHR; */
/* case TGSI_OPCODE_AND: return RC_OPCODE_AND; */
/* case TGSI_OPCODE_OR: return RC_OPCODE_OR; */
/* case TGSI_OPCODE_MOD: return RC_OPCODE_MOD; */
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index c4ed0d712f..68aef70872 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -22,6 +22,7 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "r300_vs.h"
+#include "r300_fs.h"
#include "r300_context.h"
#include "r300_screen.h"
@@ -33,6 +34,8 @@
#include "radeon_compiler.h"
+#include "util/u_math.h"
+
/* Convert info about VS output semantics into r300_shader_semantics. */
static void r300_shader_read_vs_outputs(
struct tgsi_shader_info* info,
@@ -88,11 +91,13 @@ static void r300_shader_read_vs_outputs(
}
}
-static void r300_shader_vap_output_fmt(
- struct r300_shader_semantics* vs_outputs,
- uint* hwfmt)
+static void r300_shader_vap_output_fmt(struct r300_vertex_shader* vs)
{
+ struct r300_shader_semantics* vs_outputs = &vs->outputs;
+ uint32_t* hwfmt = vs->hwfmt;
int i, gen_count;
+ boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
+ vs_outputs->bcolor[1] != ATTR_UNUSED;
/* Do the actual vertex_info setup.
*
@@ -119,13 +124,19 @@ static void r300_shader_vap_output_fmt(
/* Colors. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
- if (vs_outputs->color[i] != ATTR_UNUSED) {
+ if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) {
hwfmt[1] |= R300_INPUT_CNTL_COLOR;
hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i;
}
}
- /* XXX Back-face colors. */
+ /* Back-face colors. */
+ if (any_bcolor_used) {
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ hwfmt[1] |= R300_INPUT_CNTL_COLOR;
+ hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i);
+ }
+ }
/* Texture coordinates. */
gen_count = 0;
@@ -146,6 +157,9 @@ static void r300_shader_vap_output_fmt(
/* XXX magic */
assert(gen_count <= 8);
+
+ /* WPOS. */
+ vs->wpos_tex_output = gen_count;
}
/* Sets up stream mapping to equivalent VS outputs if TCL is bypassed
@@ -155,6 +169,8 @@ static void r300_stream_locations_notcl(
int* stream_loc)
{
int i, tabi = 0, gen_count;
+ boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
+ vs_outputs->bcolor[1] != ATTR_UNUSED;
/* Position. */
stream_loc[tabi++] = 0;
@@ -166,14 +182,14 @@ static void r300_stream_locations_notcl(
/* Colors. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
- if (vs_outputs->color[i] != ATTR_UNUSED) {
+ if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) {
stream_loc[tabi++] = 2 + i;
}
}
/* Back-face colors. */
- for (i = 0; i < ATTR_COLOR_COUNT; i++) {
- if (vs_outputs->bcolor[i] != ATTR_UNUSED) {
+ if (any_bcolor_used) {
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
stream_loc[tabi++] = 4 + i;
}
}
@@ -181,7 +197,7 @@ static void r300_stream_locations_notcl(
/* Texture coordinates. */
gen_count = 0;
for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
- if (vs_outputs->bcolor[i] != ATTR_UNUSED) {
+ if (vs_outputs->generic[i] != ATTR_UNUSED) {
assert(tabi < 16);
stream_loc[tabi++] = 6 + gen_count;
gen_count++;
@@ -195,8 +211,12 @@ static void r300_stream_locations_notcl(
gen_count++;
}
- /* XXX magic */
- assert(gen_count <= 8);
+ /* WPOS. */
+ if (vs_outputs->wpos != ATTR_UNUSED) {
+ assert(tabi < 16);
+ stream_loc[tabi++] = 6 + gen_count;
+ gen_count++;
+ }
for (; tabi < 16;) {
stream_loc[tabi++] = -1;
@@ -209,6 +229,8 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
struct r300_shader_semantics* outputs = &vs->outputs;
struct tgsi_shader_info* info = &vs->info;
int i, reg = 0;
+ boolean any_bcolor_used = outputs->bcolor[0] != ATTR_UNUSED ||
+ outputs->bcolor[1] != ATTR_UNUSED;
/* Fill in the input mapping */
for (i = 0; i < info->num_inputs; i++)
@@ -226,14 +248,30 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
c->code->outputs[outputs->psize] = reg++;
}
+ /* If we're writing back facing colors we need to send
+ * four colors to make front/back face colors selection work.
+ * If the vertex program doesn't write all 4 colors, lets
+ * pretend it does by skipping output index reg so the colors
+ * get written into appropriate output vectors.
+ */
+
/* Colors. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
if (outputs->color[i] != ATTR_UNUSED) {
c->code->outputs[outputs->color[i]] = reg++;
+ } else if (any_bcolor_used) {
+ reg++;
}
}
- /* XXX Back-face colors. */
+ /* Back-face colors. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ if (outputs->bcolor[i] != ATTR_UNUSED) {
+ c->code->outputs[outputs->bcolor[i]] = reg++;
+ } else if (any_bcolor_used) {
+ reg++;
+ }
+ }
/* Texture coordinates. */
for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
@@ -246,6 +284,33 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
if (outputs->fog != ATTR_UNUSED) {
c->code->outputs[outputs->fog] = reg++;
}
+
+ /* WPOS. */
+ if (outputs->wpos != ATTR_UNUSED) {
+ c->code->outputs[outputs->wpos] = reg++;
+ }
+}
+
+static void r300_insert_wpos(struct r300_vertex_program_compiler* c,
+ struct r300_shader_semantics* outputs)
+{
+ int i, lastOutput = 0;
+
+ /* Find the max output index. */
+ lastOutput = MAX2(lastOutput, outputs->psize);
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ lastOutput = MAX2(lastOutput, outputs->color[i]);
+ lastOutput = MAX2(lastOutput, outputs->bcolor[i]);
+ }
+ for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+ lastOutput = MAX2(lastOutput, outputs->generic[i]);
+ }
+ lastOutput = MAX2(lastOutput, outputs->fog);
+
+ /* Set WPOS after the last output. */
+ lastOutput++;
+ rc_copy_output(&c->Base, 0, lastOutput); /* out[lastOutput] = out[0]; */
+ outputs->wpos = lastOutput;
}
void r300_translate_vertex_shader(struct r300_context* r300,
@@ -256,8 +321,6 @@ void r300_translate_vertex_shader(struct r300_context* r300,
/* Initialize. */
r300_shader_read_vs_outputs(&vs->info, &vs->outputs);
- r300_shader_vap_output_fmt(&vs->outputs, vs->hwfmt);
- r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl);
/* Setup the compiler */
rc_init(&compiler.Base);
@@ -277,9 +340,15 @@ void r300_translate_vertex_shader(struct r300_context* r300,
r300_tgsi_to_rc(&ttr, vs->state.tokens);
- compiler.RequiredOutputs = ~(~0 << vs->info.num_outputs);
+ compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs+1));
compiler.SetHwInputOutput = &set_vertex_inputs_outputs;
+ /* Insert the WPOS output. */
+ r300_insert_wpos(&compiler, &vs->outputs);
+
+ r300_shader_vap_output_fmt(vs);
+ r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl);
+
/* Invoke the compiler */
r3xx_compile_vertex_program(&compiler);
if (compiler.Base.Error) {
@@ -292,3 +361,30 @@ void r300_translate_vertex_shader(struct r300_context* r300,
rc_destroy(&compiler.Base);
vs->translated = TRUE;
}
+
+boolean r300_vertex_shader_setup_wpos(struct r300_context* r300)
+{
+ struct r300_vertex_shader* vs = r300->vs;
+ int tex_output = r300->vs->wpos_tex_output;
+ uint32_t tex_fmt = R300_INPUT_CNTL_TC0 << tex_output;
+ uint32_t* hwfmt = vs->hwfmt;
+
+ if (r300->fs->inputs.wpos != ATTR_UNUSED) {
+ /* Enable WPOS in VAP. */
+ if (!(hwfmt[1] & tex_fmt)) {
+ hwfmt[1] |= tex_fmt;
+ hwfmt[3] |= (4 << (3 * tex_output));
+
+ assert(tex_output < 8);
+ return TRUE;
+ }
+ } else {
+ /* Disable WPOS in VAP. */
+ if (hwfmt[1] & tex_fmt) {
+ hwfmt[1] &= ~tex_fmt;
+ hwfmt[3] &= ~(4 << (3 * tex_output));
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h
index 67e9db5366..18cfeee3cd 100644
--- a/src/gallium/drivers/r300/r300_vs.h
+++ b/src/gallium/drivers/r300/r300_vs.h
@@ -43,6 +43,9 @@ struct r300_vertex_shader {
/* Stream locations for SWTCL or if TCL is bypassed. */
int stream_loc_notcl[16];
+ /* Output stream location for WPOS. */
+ int wpos_tex_output;
+
/* Has this shader been translated yet? */
boolean translated;
@@ -53,4 +56,7 @@ struct r300_vertex_shader {
void r300_translate_vertex_shader(struct r300_context* r300,
struct r300_vertex_shader* vs);
+/* Return TRUE if VAP (hwfmt) needs to be re-emitted. */
+boolean r300_vertex_shader_setup_wpos(struct r300_context* r300);
+
#endif /* R300_VS_H */
diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c
index f98087deb8..5f130453c3 100644
--- a/src/gallium/drivers/softpipe/sp_clear.c
+++ b/src/gallium/drivers/softpipe/sp_clear.c
@@ -36,6 +36,7 @@
#include "util/u_pack_color.h"
#include "sp_clear.h"
#include "sp_context.h"
+#include "sp_query.h"
#include "sp_tile_cache.h"
@@ -55,6 +56,9 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
if (softpipe->no_rast)
return;
+ if (!softpipe_check_render_cond(softpipe))
+ return;
+
#if 0
softpipe_update_derived(softpipe); /* not needed?? */
#endif
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 0862e9f24b..8e01793940 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -176,6 +176,19 @@ softpipe_is_buffer_referenced( struct pipe_context *pipe,
}
+static void
+softpipe_render_condition( struct pipe_context *pipe,
+ struct pipe_query *query,
+ uint mode )
+{
+ struct softpipe_context *softpipe = softpipe_context( pipe );
+
+ softpipe->render_cond_query = query;
+ softpipe->render_cond_mode = mode;
+}
+
+
+
struct pipe_context *
softpipe_create( struct pipe_screen *screen )
{
@@ -191,6 +204,7 @@ softpipe_create( struct pipe_screen *screen )
#endif
softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE );
+ softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE );
softpipe->pipe.winsys = screen->winsys;
softpipe->pipe.screen = screen;
@@ -222,6 +236,10 @@ softpipe_create( struct pipe_screen *screen )
softpipe->pipe.bind_vs_state = softpipe_bind_vs_state;
softpipe->pipe.delete_vs_state = softpipe_delete_vs_state;
+ softpipe->pipe.create_gs_state = softpipe_create_gs_state;
+ softpipe->pipe.bind_gs_state = softpipe_bind_gs_state;
+ softpipe->pipe.delete_gs_state = softpipe_delete_gs_state;
+
softpipe->pipe.set_blend_color = softpipe_set_blend_color;
softpipe->pipe.set_clip_state = softpipe_set_clip_state;
softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer;
@@ -238,6 +256,8 @@ softpipe_create( struct pipe_screen *screen )
softpipe->pipe.draw_arrays = softpipe_draw_arrays;
softpipe->pipe.draw_elements = softpipe_draw_elements;
softpipe->pipe.draw_range_elements = softpipe_draw_range_elements;
+ softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced;
+ softpipe->pipe.draw_elements_instanced = softpipe_draw_elements_instanced;
softpipe->pipe.clear = softpipe_clear;
softpipe->pipe.flush = softpipe_flush;
@@ -247,6 +267,8 @@ softpipe_create( struct pipe_screen *screen )
softpipe_init_query_funcs( softpipe );
+ softpipe->pipe.render_condition = softpipe_render_condition;
+
/*
* Alloc caches for accessing drawing surfaces and textures.
* Must be before quad stage setup!
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index ac24fccb4c..da673c57ad 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -58,6 +58,7 @@ struct softpipe_context {
struct pipe_rasterizer_state *rasterizer;
struct sp_fragment_shader *fs;
struct sp_vertex_shader *vs;
+ struct sp_geometry_shader *gs;
/** Other rendering state */
struct pipe_blend_color blend_color;
@@ -115,6 +116,10 @@ struct softpipe_context {
unsigned line_stipple_counter;
+ /** Conditional query object and mode */
+ struct pipe_query *render_cond_query;
+ uint render_cond_mode;
+
/** Software quad rendering pipeline */
struct {
struct quad_stage *shade;
@@ -147,6 +152,7 @@ struct softpipe_context {
unsigned use_sse : 1;
unsigned dump_fs : 1;
+ unsigned dump_gs : 1;
unsigned no_rast : 1;
};
diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c
index 96e1c5d815..03b58d2fb7 100644
--- a/src/gallium/drivers/softpipe/sp_draw_arrays.c
+++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c
@@ -38,6 +38,7 @@
#include "util/u_prim.h"
#include "sp_context.h"
+#include "sp_query.h"
#include "sp_state.h"
#include "draw/draw_context.h"
@@ -48,7 +49,7 @@ static void
softpipe_map_constant_buffers(struct softpipe_context *sp)
{
struct pipe_winsys *ws = sp->pipe.winsys;
- uint i, size;
+ uint i, vssize, gssize;
for (i = 0; i < PIPE_SHADER_TYPES; i++) {
if (sp->constants[i] && sp->constants[i]->size)
@@ -57,13 +58,21 @@ softpipe_map_constant_buffers(struct softpipe_context *sp)
}
if (sp->constants[PIPE_SHADER_VERTEX])
- size = sp->constants[PIPE_SHADER_VERTEX]->size;
+ vssize = sp->constants[PIPE_SHADER_VERTEX]->size;
else
- size = 0;
+ vssize = 0;
- draw_set_mapped_constant_buffer(sp->draw,
+ if (sp->constants[PIPE_SHADER_GEOMETRY])
+ gssize = sp->constants[PIPE_SHADER_GEOMETRY]->size;
+ else
+ gssize = 0;
+
+ draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX,
sp->mapped_constants[PIPE_SHADER_VERTEX],
- size);
+ vssize);
+ draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY,
+ sp->mapped_constants[PIPE_SHADER_GEOMETRY],
+ gssize);
}
@@ -78,9 +87,10 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp)
*/
draw_flush(sp->draw);
- draw_set_mapped_constant_buffer(sp->draw, NULL, 0);
+ draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, NULL, 0);
+ draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, NULL, 0);
- for (i = 0; i < 2; i++) {
+ for (i = 0; i < PIPE_SHADER_TYPES; i++) {
if (sp->constants[i] && sp->constants[i]->size)
ws->buffer_unmap(ws, sp->constants[i]);
sp->mapped_constants[i] = NULL;
@@ -88,20 +98,42 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp)
}
-boolean
+/**
+ * Draw vertex arrays, with optional indexing.
+ * Basically, map the vertex buffers (and drawing surfaces), then hand off
+ * the drawing to the 'draw' module.
+ */
+static void
+softpipe_draw_range_elements_instanced(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount);
+
+
+void
softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
unsigned start, unsigned count)
{
- return softpipe_draw_elements(pipe, NULL, 0, mode, start, count);
+ softpipe_draw_range_elements_instanced(pipe,
+ NULL,
+ 0,
+ 0,
+ 0xffffffff,
+ mode,
+ start,
+ count,
+ 0,
+ 1);
}
-/**
- * Draw vertex arrays, with optional indexing.
- * Basically, map the vertex buffers (and drawing surfaces), then hand off
- * the drawing to the 'draw' module.
- */
-boolean
+void
softpipe_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -109,51 +141,142 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
unsigned max_index,
unsigned mode, unsigned start, unsigned count)
{
+ softpipe_draw_range_elements_instanced(pipe,
+ indexBuffer,
+ indexSize,
+ min_index,
+ max_index,
+ mode,
+ start,
+ count,
+ 0,
+ 1);
+}
+
+
+void
+softpipe_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count)
+{
+ softpipe_draw_range_elements_instanced(pipe,
+ indexBuffer,
+ indexSize,
+ 0,
+ 0xffffffff,
+ mode,
+ start,
+ count,
+ 0,
+ 1);
+}
+
+void
+softpipe_draw_arrays_instanced(struct pipe_context *pipe,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount)
+{
+ softpipe_draw_range_elements_instanced(pipe,
+ NULL,
+ 0,
+ 0,
+ 0xffffffff,
+ mode,
+ start,
+ count,
+ startInstance,
+ instanceCount);
+}
+
+void
+softpipe_draw_elements_instanced(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount)
+{
+ softpipe_draw_range_elements_instanced(pipe,
+ indexBuffer,
+ indexSize,
+ 0,
+ 0xffffffff,
+ mode,
+ start,
+ count,
+ startInstance,
+ instanceCount);
+}
+
+static void
+softpipe_draw_range_elements_instanced(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount)
+{
struct softpipe_context *sp = softpipe_context(pipe);
struct draw_context *draw = sp->draw;
unsigned i;
+ if (!softpipe_check_render_cond(sp))
+ return;
+
sp->reduced_api_prim = u_reduced_prim(mode);
- if (sp->dirty)
- softpipe_update_derived( sp );
+ if (sp->dirty) {
+ softpipe_update_derived(sp);
+ }
softpipe_map_transfers(sp);
softpipe_map_constant_buffers(sp);
- /*
- * Map vertex buffers
- */
+ /* Map vertex buffers */
for (i = 0; i < sp->num_vertex_buffers; i++) {
- void *buf
- = pipe_buffer_map(pipe->screen,
- sp->vertex_buffer[i].buffer,
- PIPE_BUFFER_USAGE_CPU_READ);
+ void *buf;
+
+ buf = pipe_buffer_map(pipe->screen,
+ sp->vertex_buffer[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_vertex_buffer(draw, i, buf);
}
/* Map index buffer, if present */
if (indexBuffer) {
- void *mapped_indexes
- = pipe_buffer_map(pipe->screen, indexBuffer,
- PIPE_BUFFER_USAGE_CPU_READ);
- draw_set_mapped_element_buffer_range(draw, indexSize,
- min_index,
- max_index,
+ void *mapped_indexes;
+
+ mapped_indexes = pipe_buffer_map(pipe->screen,
+ indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer_range(draw,
+ indexSize,
+ minIndex,
+ maxIndex,
mapped_indexes);
- }
- else {
+ } else {
/* no index/element buffer */
- draw_set_mapped_element_buffer_range(draw, 0, start,
- start + count - 1, NULL);
+ draw_set_mapped_element_buffer_range(draw,
+ 0,
+ start,
+ start + count - 1,
+ NULL);
}
/* draw! */
- draw_arrays(draw, mode, start, count);
+ draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount);
- /*
- * unmap vertex/index buffers - will cause draw module to flush
- */
+ /* unmap vertex/index buffers - will cause draw module to flush */
for (i = 0; i < sp->num_vertex_buffers; i++) {
draw_set_mapped_vertex_buffer(draw, i, NULL);
pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer);
@@ -163,24 +286,8 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
pipe_buffer_unmap(pipe->screen, indexBuffer);
}
-
/* Note: leave drawing surfaces mapped */
softpipe_unmap_constant_buffers(sp);
sp->dirty_render_cache = TRUE;
-
- return TRUE;
-}
-
-
-boolean
-softpipe_draw_elements(struct pipe_context *pipe,
- struct pipe_buffer *indexBuffer,
- unsigned indexSize,
- unsigned mode, unsigned start, unsigned count)
-{
- return softpipe_draw_range_elements( pipe, indexBuffer,
- indexSize,
- 0, 0xffffffff,
- mode, start, count );
}
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
index 5fbac06a53..7f573aef3c 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -128,6 +128,7 @@ sp_vbuf_unmap_vertices(struct vbuf_render *vbr,
{
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size );
+ (void) cvbr;
/* do nothing */
}
diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index fe6b6cec35..d9babe81da 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -229,7 +229,7 @@ blend_quad(struct quad_stage *qs,
static const float zero[4] = { 0, 0, 0, 0 };
static const float one[4] = { 1, 1, 1, 1 };
struct softpipe_context *softpipe = qs->softpipe;
- float source[4][QUAD_SIZE];
+ float source[4][QUAD_SIZE] = { { 0 } };
/*
* Compute src/first term RGB
diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c
index 379cf4ad06..4ef5d9f7b1 100644
--- a/src/gallium/drivers/softpipe/sp_query.c
+++ b/src/gallium/drivers/softpipe/sp_query.c
@@ -99,6 +99,32 @@ softpipe_get_query_result(struct pipe_context *pipe,
}
+/**
+ * Called by rendering function to check rendering is conditional.
+ * \return TRUE if we should render, FALSE if we should skip rendering
+ */
+boolean
+softpipe_check_render_cond(struct softpipe_context *sp)
+{
+ struct pipe_context *pipe = &sp->pipe;
+ boolean b, wait;
+ uint64_t result;
+
+ if (!sp->render_cond_query) {
+ return TRUE; /* no query predicate, draw normally */
+ }
+
+ wait = (sp->render_cond_mode == PIPE_RENDER_COND_WAIT ||
+ sp->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT);
+
+ b = pipe->get_query_result(pipe, sp->render_cond_query, wait, &result);
+ if (b)
+ return result > 0;
+ else
+ return TRUE;
+}
+
+
void softpipe_init_query_funcs(struct softpipe_context *softpipe )
{
softpipe->pipe.create_query = softpipe_create_query;
diff --git a/src/gallium/drivers/softpipe/sp_query.h b/src/gallium/drivers/softpipe/sp_query.h
index 05060a4575..736c033897 100644
--- a/src/gallium/drivers/softpipe/sp_query.h
+++ b/src/gallium/drivers/softpipe/sp_query.h
@@ -32,6 +32,10 @@
#ifndef SP_QUERY_H
#define SP_QUERY_H
+extern boolean
+softpipe_check_render_cond(struct softpipe_context *sp);
+
+
struct softpipe_context;
extern void softpipe_init_query_funcs(struct softpipe_context * );
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index 615581b95f..3da75364c5 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -1268,7 +1268,7 @@ void sp_setup_prepare( struct setup_context *setup )
}
/* Note: nr_attrs is only used for debugging (vertex printing) */
- setup->nr_vertex_attrs = draw_num_vs_outputs(sp->draw);
+ setup->nr_vertex_attrs = draw_num_shader_outputs(sp->draw);
sp->quad.first->begin( sp->quad.first );
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index c41e718488..7f244c4fd4 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -50,6 +50,7 @@
#define SP_NEW_VERTEX 0x1000
#define SP_NEW_VS 0x2000
#define SP_NEW_QUERY 0x4000
+#define SP_NEW_GS 0x8000
struct tgsi_sampler;
@@ -90,6 +91,11 @@ struct sp_vertex_shader {
int max_sampler; /* -1 if no samplers */
};
+/** Subclass of pipe_shader_state */
+struct sp_geometry_shader {
+ struct pipe_shader_state shader;
+ struct draw_geometry_shader *draw_data;
+};
void *
@@ -143,6 +149,10 @@ void *softpipe_create_vs_state(struct pipe_context *,
const struct pipe_shader_state *);
void softpipe_bind_vs_state(struct pipe_context *, void *);
void softpipe_delete_vs_state(struct pipe_context *, void *);
+void *softpipe_create_gs_state(struct pipe_context *,
+ const struct pipe_shader_state *);
+void softpipe_bind_gs_state(struct pipe_context *, void *);
+void softpipe_delete_gs_state(struct pipe_context *, void *);
void softpipe_set_polygon_stipple( struct pipe_context *,
const struct pipe_poly_stipple * );
@@ -174,14 +184,14 @@ void softpipe_set_vertex_buffers(struct pipe_context *,
void softpipe_update_derived( struct softpipe_context *softpipe );
-boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
- unsigned start, unsigned count);
+void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
+ unsigned start, unsigned count);
-boolean softpipe_draw_elements(struct pipe_context *pipe,
- struct pipe_buffer *indexBuffer,
- unsigned indexSize,
- unsigned mode, unsigned start, unsigned count);
-boolean
+void softpipe_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count);
+void
softpipe_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -190,6 +200,24 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count);
void
+softpipe_draw_arrays_instanced(struct pipe_context *pipe,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount);
+
+void
+softpipe_draw_elements_instanced(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount);
+
+void
softpipe_map_transfers(struct softpipe_context *sp);
void
diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c
index efed082f82..95ab323433 100644
--- a/src/gallium/drivers/softpipe/sp_state_blend.c
+++ b/src/gallium/drivers/softpipe/sp_state_blend.c
@@ -29,6 +29,7 @@
*/
#include "util/u_memory.h"
+#include "draw/draw_context.h"
#include "sp_context.h"
#include "sp_state.h"
@@ -45,6 +46,8 @@ void softpipe_bind_blend_state( struct pipe_context *pipe,
{
struct softpipe_context *softpipe = softpipe_context(pipe);
+ draw_flush(softpipe->draw);
+
softpipe->blend = (struct pipe_blend_state *)blend;
softpipe->dirty |= SP_NEW_BLEND;
@@ -62,6 +65,8 @@ void softpipe_set_blend_color( struct pipe_context *pipe,
{
struct softpipe_context *softpipe = softpipe_context(pipe);
+ draw_flush(softpipe->draw);
+
softpipe->blend_color = *blend_color;
softpipe->dirty |= SP_NEW_BLEND;
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index c24a737d07..f6856a5f69 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -67,7 +67,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
/* compute vertex layout now */
const struct sp_fragment_shader *spfs = softpipe->fs;
struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf;
- const uint num = draw_num_vs_outputs(softpipe->draw);
+ const uint num = draw_current_shader_outputs(softpipe->draw);
uint i;
/* Tell draw_vbuf to simply emit the whole post-xform vertex
@@ -117,13 +117,13 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
}
/* this includes texcoords and varying vars */
- src = draw_find_vs_output(softpipe->draw,
- spfs->info.input_semantic_name[i],
- spfs->info.input_semantic_index[i]);
+ src = draw_find_shader_output(softpipe->draw,
+ spfs->info.input_semantic_name[i],
+ spfs->info.input_semantic_index[i]);
draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
}
- softpipe->psize_slot = draw_find_vs_output(softpipe->draw,
+ softpipe->psize_slot = draw_find_shader_output(softpipe->draw,
TGSI_SEMANTIC_PSIZE, 0);
if (softpipe->psize_slot > 0) {
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT,
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index eba0563c62..b7ed4441b4 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -69,7 +69,14 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
- softpipe->fs = (struct sp_fragment_shader *) fs;
+ draw_flush(softpipe->draw);
+
+ if (softpipe->fs == fs)
+ return;
+
+ draw_flush(softpipe->draw);
+
+ softpipe->fs = fs;
softpipe->dirty |= SP_NEW_FS;
}
@@ -159,8 +166,74 @@ softpipe_set_constant_buffer(struct pipe_context *pipe,
assert(shader < PIPE_SHADER_TYPES);
assert(index == 0);
+ draw_flush(softpipe->draw);
+
/* note: reference counting */
pipe_buffer_reference(&softpipe->constants[shader], buf);
softpipe->dirty |= SP_NEW_CONSTANTS;
}
+
+void *
+softpipe_create_gs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+ struct sp_geometry_shader *state;
+
+ state = CALLOC_STRUCT(sp_geometry_shader);
+ if (state == NULL )
+ goto fail;
+
+ /* debug */
+ if (softpipe->dump_gs)
+ tgsi_dump(templ->tokens, 0);
+
+ /* copy shader tokens, the ones passed in will go away.
+ */
+ state->shader.tokens = tgsi_dup_tokens(templ->tokens);
+ if (state->shader.tokens == NULL)
+ goto fail;
+
+ state->draw_data = draw_create_geometry_shader(softpipe->draw, templ);
+ if (state->draw_data == NULL)
+ goto fail;
+
+ return state;
+
+fail:
+ if (state) {
+ FREE( (void *)state->shader.tokens );
+ FREE( state->draw_data );
+ FREE( state );
+ }
+ return NULL;
+}
+
+
+void
+softpipe_bind_gs_state(struct pipe_context *pipe, void *gs)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ softpipe->gs = (struct sp_geometry_shader *)gs;
+
+ draw_bind_geometry_shader(softpipe->draw,
+ (softpipe->gs ? softpipe->gs->draw_data : NULL));
+
+ softpipe->dirty |= SP_NEW_GS;
+}
+
+
+void
+softpipe_delete_gs_state(struct pipe_context *pipe, void *gs)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ struct sp_geometry_shader *state =
+ (struct sp_geometry_shader *)gs;
+
+ draw_delete_geometry_shader(softpipe->draw,
+ (state) ? state->draw_data : 0);
+ FREE(state);
+}
diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c
index 87b7219683..a5b00336d4 100644
--- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c
+++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c
@@ -41,14 +41,17 @@ softpipe_create_rasterizer_state(struct pipe_context *pipe,
}
void softpipe_bind_rasterizer_state(struct pipe_context *pipe,
- void *setup)
+ void *rasterizer)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
+ if (softpipe->rasterizer == rasterizer)
+ return;
+
/* pass-through to draw module */
- draw_set_rasterizer_state(softpipe->draw, setup);
+ draw_set_rasterizer_state(softpipe->draw, rasterizer);
- softpipe->rasterizer = (struct pipe_rasterizer_state *)setup;
+ softpipe->rasterizer = rasterizer;
softpipe->dirty |= SP_NEW_RASTERIZER;
}
diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c
index a518248bb1..f6154109ea 100644
--- a/src/gallium/drivers/softpipe/sp_state_surface.c
+++ b/src/gallium/drivers/softpipe/sp_state_surface.c
@@ -51,6 +51,8 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe,
struct softpipe_context *sp = softpipe_context(pipe);
uint i;
+ draw_flush(sp->draw);
+
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
/* check if changing cbuf */
if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) {
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index e26153b1d9..1ae8fecacf 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2,7 +2,7 @@
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
- * Copyright 2008 VMware, Inc. All rights reserved.
+ * Copyright 2008-2010 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -514,21 +514,15 @@ static float
compute_lambda_1d(const struct sp_sampler_varient *samp,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias)
+ const float p[QUAD_SIZE])
{
const struct pipe_texture *texture = samp->texture;
const struct pipe_sampler_state *sampler = samp->sampler;
float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
float rho = MAX2(dsdx, dsdy) * texture->width0;
- float lambda;
-
- lambda = util_fast_log2(rho);
- lambda += lodbias + sampler->lod_bias;
- lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
- return lambda;
+ return util_fast_log2(rho);
}
@@ -536,8 +530,7 @@ static float
compute_lambda_2d(const struct sp_sampler_varient *samp,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias)
+ const float p[QUAD_SIZE])
{
const struct pipe_texture *texture = samp->texture;
const struct pipe_sampler_state *sampler = samp->sampler;
@@ -548,13 +541,8 @@ compute_lambda_2d(const struct sp_sampler_varient *samp,
float maxx = MAX2(dsdx, dsdy) * texture->width0;
float maxy = MAX2(dtdx, dtdy) * texture->height0;
float rho = MAX2(maxx, maxy);
- float lambda;
- lambda = util_fast_log2(rho);
- lambda += lodbias + sampler->lod_bias;
- lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
-
- return lambda;
+ return util_fast_log2(rho);
}
@@ -562,8 +550,7 @@ static float
compute_lambda_3d(const struct sp_sampler_varient *samp,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias)
+ const float p[QUAD_SIZE])
{
const struct pipe_texture *texture = samp->texture;
const struct pipe_sampler_state *sampler = samp->sampler;
@@ -576,31 +563,26 @@ compute_lambda_3d(const struct sp_sampler_varient *samp,
float maxx = MAX2(dsdx, dsdy) * texture->width0;
float maxy = MAX2(dtdx, dtdy) * texture->height0;
float maxz = MAX2(dpdx, dpdy) * texture->depth0;
- float rho, lambda;
+ float rho;
rho = MAX2(maxx, maxy);
rho = MAX2(rho, maxz);
- lambda = util_fast_log2(rho);
- lambda += lodbias + sampler->lod_bias;
- lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
-
- return lambda;
+ return util_fast_log2(rho);
}
/**
* Compute lambda for a vertex texture sampler.
- * Since there aren't derivatives to use, just return the LOD bias.
+ * Since there aren't derivatives to use, just return 0.
*/
static float
compute_lambda_vert(const struct sp_sampler_varient *samp,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias)
+ const float p[QUAD_SIZE])
{
- return lodbias;
+ return 0.0f;
}
@@ -769,7 +751,8 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -827,7 +810,8 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -866,7 +850,8 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -914,7 +899,8 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -949,7 +935,8 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -996,7 +983,8 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1035,7 +1023,8 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1076,7 +1065,8 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1115,7 +1105,8 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1161,7 +1152,8 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1209,7 +1201,8 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1261,29 +1254,60 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
}
+/* Calculate level of detail for every fragment.
+ * Note that lambda has already been biased by global LOD bias.
+ */
+static INLINE void
+compute_lod(const struct pipe_sampler_state *sampler,
+ const float biased_lambda,
+ const float lodbias[QUAD_SIZE],
+ float lod[QUAD_SIZE])
+{
+ uint i;
+
+ for (i = 0; i < QUAD_SIZE; i++) {
+ lod[i] = biased_lambda + lodbias[i];
+ lod[i] = CLAMP(lod[i], sampler->min_lod, sampler->max_lod);
+ }
+}
+
+
static void
mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
const struct pipe_texture *texture = samp->texture;
int level0;
float lambda;
+ float lod[QUAD_SIZE];
+
+ if (control == tgsi_sampler_lod_bias) {
+ lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+ compute_lod(samp->sampler, lambda, c0, lod);
+ } else {
+ assert(control == tgsi_sampler_lod_explicit);
- lambda = samp->compute_lambda(samp, s, t, p, lodbias);
+ memcpy(lod, c0, sizeof(lod));
+ }
+
+ /* XXX: Take into account all lod values.
+ */
+ lambda = lod[0];
level0 = (int)lambda;
if (lambda < 0.0) {
samp->level = 0;
- samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+ samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
}
else if (level0 >= texture->last_level) {
samp->level = texture->last_level;
- samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+ samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
}
else {
float levelBlend = lambda - level0;
@@ -1292,10 +1316,10 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
int c,j;
samp->level = level0;
- samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba0 );
+ samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba0);
samp->level = level0+1;
- samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba1 );
+ samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba1);
for (j = 0; j < QUAD_SIZE; j++) {
for (c = 0; c < 4; c++) {
@@ -1311,23 +1335,36 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
const struct pipe_texture *texture = samp->texture;
float lambda;
+ float lod[QUAD_SIZE];
- lambda = samp->compute_lambda(samp, s, t, p, lodbias);
+ if (control == tgsi_sampler_lod_bias) {
+ lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+ compute_lod(samp->sampler, lambda, c0, lod);
+ } else {
+ assert(control == tgsi_sampler_lod_explicit);
+
+ memcpy(lod, c0, sizeof(lod));
+ }
+
+ /* XXX: Take into account all lod values.
+ */
+ lambda = lod[0];
if (lambda < 0.0) {
samp->level = 0;
- samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+ samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
}
else {
samp->level = (int)(lambda + 0.5) ;
samp->level = MIN2(samp->level, (int)texture->last_level);
- samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+ samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
}
#if 0
@@ -1345,17 +1382,32 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
- float lambda = samp->compute_lambda(samp, s, t, p, lodbias);
+ float lambda;
+ float lod[QUAD_SIZE];
+
+ if (control == tgsi_sampler_lod_bias) {
+ lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+ compute_lod(samp->sampler, lambda, c0, lod);
+ } else {
+ assert(control == tgsi_sampler_lod_explicit);
+
+ memcpy(lod, c0, sizeof(lod));
+ }
+
+ /* XXX: Take into account all lod values.
+ */
+ lambda = lod[0];
if (lambda < 0.0) {
- samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+ samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
}
else {
- samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+ samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
}
}
@@ -1371,15 +1423,28 @@ mip_filter_linear_2d_linear_repeat_POT(
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
const struct pipe_texture *texture = samp->texture;
int level0;
float lambda;
+ float lod[QUAD_SIZE];
- lambda = compute_lambda_2d(samp, s, t, p, lodbias);
+ if (control == tgsi_sampler_lod_bias) {
+ lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+ compute_lod(samp->sampler, lambda, c0, lod);
+ } else {
+ assert(control == tgsi_sampler_lod_explicit);
+
+ memcpy(lod, c0, sizeof(lod));
+ }
+
+ /* XXX: Take into account all lod values.
+ */
+ lambda = lod[0];
level0 = (int)lambda;
/* Catches both negative and large values of level0:
@@ -1390,7 +1455,7 @@ mip_filter_linear_2d_linear_repeat_POT(
else
samp->level = texture->last_level;
- img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba );
+ img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
}
else {
float levelBlend = lambda - level0;
@@ -1399,10 +1464,10 @@ mip_filter_linear_2d_linear_repeat_POT(
int c,j;
samp->level = level0;
- img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba0 );
+ img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba0);
samp->level = level0+1;
- img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba1 );
+ img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba1);
for (j = 0; j < QUAD_SIZE; j++) {
for (c = 0; c < 4; c++) {
@@ -1422,7 +1487,8 @@ sample_compare(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1430,7 +1496,7 @@ sample_compare(struct tgsi_sampler *tgsi_sampler,
int j, k0, k1, k2, k3;
float val;
- samp->mip_filter( tgsi_sampler, s, t, p, lodbias, rgba );
+ samp->mip_filter(tgsi_sampler, s, t, p, c0, control, rgba);
/**
* Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
@@ -1508,7 +1574,8 @@ sample_cube(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1589,7 +1656,7 @@ sample_cube(struct tgsi_sampler *tgsi_sampler,
* is not active, this will point somewhere deeper into the
* pipeline, eg. to mip_filter or even img_filter.
*/
- samp->compare(tgsi_sampler, ssss, tttt, NULL, lodbias, rgba);
+ samp->compare(tgsi_sampler, ssss, tttt, NULL, c0, control, rgba);
}
@@ -1862,7 +1929,7 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
break;
}
- if (sampler->compare_mode != FALSE) {
+ if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
samp->compare = sample_compare;
}
else {
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index b0797711d3..b6e66c998a 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -2,6 +2,7 @@
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
+ * Copyright 2010 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -46,14 +47,14 @@ typedef void (*wrap_linear_func)(const float s[4],
typedef float (*compute_lambda_func)(const struct sp_sampler_varient *sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias);
+ const float p[QUAD_SIZE]);
typedef void (*filter_func)(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE]);
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
index c3de12b4a3..af99c9de37 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -29,6 +29,7 @@
#include "pipe/p_inlines.h"
#include "pipe/p_screen.h"
#include "util/u_memory.h"
+#include "util/u_bitmask.h"
#include "util/u_upload_mgr.h"
#include "svga_context.h"
@@ -61,6 +62,9 @@ static void svga_destroy( struct pipe_context *pipe )
u_upload_destroy( svga->upload_vb );
u_upload_destroy( svga->upload_ib );
+ util_bitmask_destroy( svga->vs_bm );
+ util_bitmask_destroy( svga->fs_bm );
+
for(shader = 0; shader < PIPE_SHADER_TYPES; ++shader)
pipe_buffer_reference( &svga->curr.cb[shader], NULL );
@@ -130,7 +134,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen )
svga = CALLOC_STRUCT(svga_context);
if (svga == NULL)
- goto error1;
+ goto no_svga;
svga->pipe.winsys = screen->winsys;
svga->pipe.screen = screen;
@@ -142,7 +146,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen )
svga->swc = svgascreen->sws->context_create(svgascreen->sws);
if(!svga->swc)
- goto error2;
+ goto no_swc;
svga_init_blend_functions(svga);
svga_init_blit_functions(svga);
@@ -165,32 +169,40 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen )
svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0);
if (!svga_init_swtnl(svga))
- goto error3;
+ goto no_swtnl;
+
+ svga->fs_bm = util_bitmask_create();
+ if (svga->fs_bm == NULL)
+ goto no_fs_bm;
+
+ svga->vs_bm = util_bitmask_create();
+ if (svga->vs_bm == NULL)
+ goto no_vs_bm;
svga->upload_ib = u_upload_create( svga->pipe.screen,
32 * 1024,
16,
PIPE_BUFFER_USAGE_INDEX );
if (svga->upload_ib == NULL)
- goto error4;
+ goto no_upload_ib;
svga->upload_vb = u_upload_create( svga->pipe.screen,
128 * 1024,
16,
PIPE_BUFFER_USAGE_VERTEX );
if (svga->upload_vb == NULL)
- goto error5;
+ goto no_upload_vb;
svga->hwtnl = svga_hwtnl_create( svga,
svga->upload_ib,
svga->swc );
if (svga->hwtnl == NULL)
- goto error6;
+ goto no_hwtnl;
ret = svga_emit_initial_state( svga );
if (ret)
- goto error7;
+ goto no_state;
/* Avoid shortcircuiting state with initial value of zero.
*/
@@ -209,19 +221,23 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen )
return &svga->pipe;
-error7:
+no_state:
svga_hwtnl_destroy( svga->hwtnl );
-error6:
+no_hwtnl:
u_upload_destroy( svga->upload_vb );
-error5:
+no_upload_vb:
u_upload_destroy( svga->upload_ib );
-error4:
+no_upload_ib:
+ util_bitmask_destroy( svga->vs_bm );
+no_vs_bm:
+ util_bitmask_destroy( svga->fs_bm );
+no_fs_bm:
svga_destroy_swtnl(svga);
-error3:
+no_swtnl:
svga->swc->destroy(svga->swc);
-error2:
+no_swc:
FREE(svga);
-error1:
+no_svga:
return NULL;
}
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index 0885d9ca74..66259fd010 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -41,6 +41,7 @@
struct draw_vertex_shader;
struct svga_shader_result;
struct SVGACmdMemory;
+struct util_bitmask;
struct u_upload_mgr;
@@ -265,8 +266,6 @@ struct svga_hw_draw_state
unsigned ts[16][TS_MAX];
float cb[PIPE_SHADER_TYPES][CB_MAX][4];
- unsigned shader_id[PIPE_SHADER_TYPES];
-
struct svga_shader_result *fs;
struct svga_shader_result *vs;
struct svga_hw_view_state views[PIPE_MAX_SAMPLERS];
@@ -319,12 +318,14 @@ struct svga_context
boolean new_vdecl;
} swtnl;
+ /* Bitmask of used shader IDs */
+ struct util_bitmask *fs_bm;
+ struct util_bitmask *vs_bm;
+
struct {
unsigned dirty[4];
unsigned texture_timestamp;
- unsigned next_fs_id;
- unsigned next_vs_id;
/* Internally generated shaders:
*/
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index 8db40d0fd5..ca73cf9d5a 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -164,7 +164,8 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
}
SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n",
- svga_surface(svga->curr.framebuffer.cbufs[0])->handle,
+ svga->curr.framebuffer.cbufs[0] ?
+ svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL,
hwtnl->cmd.prim_count);
ret = SVGA3D_BeginDrawPrimitives(swc,
diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c
index 71a552862e..0f24ef4ee8 100644
--- a/src/gallium/drivers/svga/svga_pipe_draw.c
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -149,7 +149,7 @@ retry:
-static boolean
+static void
svga_draw_range_elements( struct pipe_context *pipe,
struct pipe_buffer *index_buffer,
unsigned index_size,
@@ -162,7 +162,7 @@ svga_draw_range_elements( struct pipe_context *pipe,
enum pipe_error ret = 0;
if (!u_trim_pipe_prim( prim, &count ))
- return TRUE;
+ return;
/*
* Mark currently bound target surfaces as dirty
@@ -183,7 +183,7 @@ svga_draw_range_elements( struct pipe_context *pipe,
#ifdef DEBUG
if (svga->curr.vs->base.id == svga->debug.disable_shader ||
svga->curr.fs->base.id == svga->debug.disable_shader)
- return 0;
+ return;
#endif
if (svga->state.sw.need_swtnl)
@@ -225,31 +225,29 @@ svga_draw_range_elements( struct pipe_context *pipe,
svga_hwtnl_flush_retry( svga );
svga_context_flush(svga, NULL);
}
-
- return ret == PIPE_OK;
}
-static boolean
+static void
svga_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *index_buffer,
unsigned index_size,
unsigned prim, unsigned start, unsigned count)
{
- return svga_draw_range_elements( pipe, index_buffer,
- index_size,
- 0, 0xffffffff,
- prim, start, count );
+ svga_draw_range_elements( pipe, index_buffer,
+ index_size,
+ 0, 0xffffffff,
+ prim, start, count );
}
-static boolean
+static void
svga_draw_arrays( struct pipe_context *pipe,
unsigned prim, unsigned start, unsigned count)
{
- return svga_draw_range_elements(pipe, NULL, 0,
- start, start + count - 1,
- prim,
- start, count);
+ svga_draw_range_elements(pipe, NULL, 0,
+ start, start + count - 1,
+ prim,
+ start, count);
}
diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c
index e3be840d92..5f1213e46a 100644
--- a/src/gallium/drivers/svga/svga_pipe_fs.c
+++ b/src/gallium/drivers/svga/svga_pipe_fs.c
@@ -26,6 +26,7 @@
#include "pipe/p_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_bitmask.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_text.h"
@@ -107,7 +108,16 @@ void svga_delete_fs_state(struct pipe_context *pipe, void *shader)
assert(ret == PIPE_OK);
}
+ util_bitmask_clear( svga->fs_bm, result->id );
+
svga_destroy_shader_result( result );
+
+ /*
+ * Remove stale references to this result to ensure a new result on the
+ * same address will be detected as a change.
+ */
+ if(result == svga->state.hw_draw.fs)
+ svga->state.hw_draw.fs = NULL;
}
FREE((void *)fs->base.tokens);
diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c
index 78053e755e..460a101f8c 100644
--- a/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -76,7 +76,6 @@ static INLINE unsigned translate_img_filter( unsigned filter )
switch (filter) {
case PIPE_TEX_FILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST;
case PIPE_TEX_FILTER_LINEAR: return SVGA3D_TEX_FILTER_LINEAR;
- case PIPE_TEX_FILTER_ANISO: return SVGA3D_TEX_FILTER_ANISOTROPIC;
default:
assert(0);
return SVGA3D_TEX_FILTER_NEAREST;
@@ -107,6 +106,8 @@ svga_create_sampler_state(struct pipe_context *pipe,
cso->magfilter = translate_img_filter( sampler->mag_img_filter );
cso->minfilter = translate_img_filter( sampler->min_img_filter );
cso->aniso_level = MAX2( (unsigned) sampler->max_anisotropy, 1 );
+ if(cso->aniso_level != 1)
+ cso->magfilter = cso->minfilter = SVGA3D_TEX_FILTER_ANISOTROPIC;
cso->lod_bias = sampler->lod_bias;
cso->addressu = translate_wrap_mode(sampler->wrap_s);
cso->addressv = translate_wrap_mode(sampler->wrap_t);
diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c
index c104c41f5f..7e6ab576ad 100644
--- a/src/gallium/drivers/svga/svga_pipe_vs.c
+++ b/src/gallium/drivers/svga/svga_pipe_vs.c
@@ -27,6 +27,7 @@
#include "pipe/p_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_bitmask.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_text.h"
@@ -172,7 +173,16 @@ static void svga_delete_vs_state(struct pipe_context *pipe, void *shader)
assert(ret == PIPE_OK);
}
+ util_bitmask_clear( svga->vs_bm, result->id );
+
svga_destroy_shader_result( result );
+
+ /*
+ * Remove stale references to this result to ensure a new result on the
+ * same address will be detected as a change.
+ */
+ if(result == svga->state.hw_draw.vs)
+ svga->state.hw_draw.vs = NULL;
}
FREE((void *)vs->base.tokens);
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
index 6ec38ed3e4..ec2886348b 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -26,6 +26,7 @@
#include "pipe/p_inlines.h"
#include "pipe/p_defines.h"
#include "util/u_math.h"
+#include "util/u_bitmask.h"
#include "svga_context.h"
#include "svga_state.h"
@@ -39,8 +40,13 @@
static INLINE int compare_fs_keys( const struct svga_fs_compile_key *a,
const struct svga_fs_compile_key *b )
{
- unsigned keysize = svga_fs_key_size( a );
- return memcmp( a, b, keysize );
+ unsigned keysize_a = svga_fs_key_size( a );
+ unsigned keysize_b = svga_fs_key_size( b );
+
+ if (keysize_a != keysize_b) {
+ return (int)(keysize_a - keysize_b);
+ }
+ return memcmp( a, b, keysize_a );
}
@@ -66,7 +72,7 @@ static enum pipe_error compile_fs( struct svga_context *svga,
struct svga_shader_result **out_result )
{
struct svga_shader_result *result;
- enum pipe_error ret;
+ enum pipe_error ret = PIPE_ERROR;
result = svga_translate_fragment_program( fs, key );
if (result == NULL) {
@@ -74,9 +80,14 @@ static enum pipe_error compile_fs( struct svga_context *svga,
goto fail;
}
+ result->id = util_bitmask_add(svga->fs_bm);
+ if(result->id == UTIL_BITMASK_INVALID_INDEX) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
+ goto fail;
+ }
ret = SVGA3D_DefineShader(svga->swc,
- svga->state.next_fs_id,
+ result->id,
SVGA3D_SHADERTYPE_PS,
result->tokens,
result->nr_tokens * sizeof result->tokens[0]);
@@ -84,14 +95,16 @@ static enum pipe_error compile_fs( struct svga_context *svga,
goto fail;
*out_result = result;
- result->id = svga->state.next_fs_id++;
result->next = fs->base.results;
fs->base.results = result;
return PIPE_OK;
fail:
- if (result)
+ if (result) {
+ if (result->id != UTIL_BITMASK_INVALID_INDEX)
+ util_bitmask_clear( svga->fs_bm, result->id );
svga_destroy_shader_result( result );
+ }
return ret;
}
@@ -116,7 +129,7 @@ fail:
*/
static int emit_white_fs( struct svga_context *svga )
{
- int ret;
+ int ret = PIPE_ERROR;
/* ps_3_0
* def c0, 1.000000, 0.000000, 0.000000, 1.000000
@@ -137,16 +150,26 @@ static int emit_white_fs( struct svga_context *svga )
0x0000ffff,
};
+ assert(SVGA3D_INVALID_ID == UTIL_BITMASK_INVALID_INDEX);
+ svga->state.white_fs_id = util_bitmask_add(svga->fs_bm);
+ if(svga->state.white_fs_id == SVGA3D_INVALID_ID)
+ goto no_fs_id;
+
ret = SVGA3D_DefineShader(svga->swc,
- svga->state.next_fs_id,
+ svga->state.white_fs_id,
SVGA3D_SHADERTYPE_PS,
white_tokens,
sizeof(white_tokens));
if (ret)
- return ret;
+ goto no_definition;
- svga->state.white_fs_id = svga->state.next_fs_id++;
return 0;
+
+no_definition:
+ util_bitmask_clear(svga->fs_bm, svga->state.white_fs_id);
+ svga->state.white_fs_id = SVGA3D_INVALID_ID;
+no_fs_id:
+ return ret;
}
@@ -251,15 +274,14 @@ static int emit_hw_fs( struct svga_context *svga,
assert(id != SVGA3D_INVALID_ID);
- if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) {
- ret = SVGA3D_SetShader(svga->swc,
- SVGA3D_SHADERTYPE_PS,
+ if (result != svga->state.hw_draw.fs) {
+ ret = SVGA3D_SetShader(svga->swc,
+ SVGA3D_SHADERTYPE_PS,
id );
if (ret)
return ret;
svga->dirty |= SVGA_NEW_FS_RESULT;
- svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT] = id;
svga->state.hw_draw.fs = result;
}
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index 44b7ceb4fa..e7e6c08432 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -27,6 +27,7 @@
#include "pipe/p_defines.h"
#include "util/u_format.h"
#include "util/u_math.h"
+#include "util/u_bitmask.h"
#include "translate/translate.h"
#include "svga_context.h"
@@ -70,7 +71,7 @@ static enum pipe_error compile_vs( struct svga_context *svga,
struct svga_shader_result **out_result )
{
struct svga_shader_result *result;
- enum pipe_error ret = PIPE_OK;
+ enum pipe_error ret = PIPE_ERROR;
result = svga_translate_vertex_program( vs, key );
if (result == NULL) {
@@ -78,8 +79,14 @@ static enum pipe_error compile_vs( struct svga_context *svga,
goto fail;
}
+ result->id = util_bitmask_add(svga->vs_bm);
+ if(result->id == UTIL_BITMASK_INVALID_INDEX) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
+ goto fail;
+ }
+
ret = SVGA3D_DefineShader(svga->swc,
- svga->state.next_vs_id,
+ result->id,
SVGA3D_SHADERTYPE_VS,
result->tokens,
result->nr_tokens * sizeof result->tokens[0]);
@@ -87,14 +94,16 @@ static enum pipe_error compile_vs( struct svga_context *svga,
goto fail;
*out_result = result;
- result->id = svga->state.next_vs_id++;
result->next = vs->base.results;
vs->base.results = result;
return PIPE_OK;
fail:
- if (result)
+ if (result) {
+ if (result->id != UTIL_BITMASK_INVALID_INDEX)
+ util_bitmask_clear( svga->vs_bm, result->id );
svga_destroy_shader_result( result );
+ }
return ret;
}
@@ -142,15 +151,14 @@ static int emit_hw_vs( struct svga_context *svga,
id = result->id;
}
- if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) {
- ret = SVGA3D_SetShader(svga->swc,
- SVGA3D_SHADERTYPE_VS,
+ if (result != svga->state.hw_draw.vs) {
+ ret = SVGA3D_SetShader(svga->swc,
+ SVGA3D_SHADERTYPE_VS,
id );
if (ret)
return ret;
svga->dirty |= SVGA_NEW_VS_RESULT;
- svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX] = id;
svga->state.hw_draw.vs = result;
}
@@ -194,10 +202,12 @@ static int update_zero_stride( struct svga_context *svga,
key.output_stride = 4 * sizeof(float);
key.nr_elements = 1;
+ key.element[0].type = TRANSLATE_ELEMENT_NORMAL;
key.element[0].input_format = vel->src_format;
key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
key.element[0].input_buffer = vel->vertex_buffer_index;
key.element[0].input_offset = vel->src_offset;
+ key.element[0].instance_divisor = vel->instance_divisor;
key.element[0].output_offset = const_idx * 4 * sizeof(float);
translate_key_sanitize(&key);
@@ -216,7 +226,7 @@ static int update_zero_stride( struct svga_context *svga,
translate->set_buffer(translate, vel->vertex_buffer_index,
mapped_buffer,
vbuffer->stride);
- translate->run(translate, 0, 1,
+ translate->run(translate, 0, 1, 0,
svga->curr.zero_stride_constants);
pipe_buffer_unmap(svga->pipe.screen,
diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c
index 8b14c913f7..7655121bec 100644
--- a/src/gallium/drivers/svga/svga_swtnl_draw.c
+++ b/src/gallium/drivers/svga/svga_swtnl_draw.c
@@ -90,7 +90,7 @@ svga_swtnl_draw_range_elements(struct svga_context *svga,
PIPE_BUFFER_USAGE_CPU_READ);
assert(map);
draw_set_mapped_constant_buffer(
- draw,
+ draw, PIPE_SHADER_VERTEX,
map,
svga->curr.cb[PIPE_SHADER_VERTEX]->size);
}
diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c
index 25b8c2af3a..94b6ccc62d 100644
--- a/src/gallium/drivers/svga/svga_swtnl_state.c
+++ b/src/gallium/drivers/svga/svga_swtnl_state.c
@@ -156,7 +156,7 @@ int svga_swtnl_update_vdecl( struct svga_context *svga )
memset(vdecl, 0, sizeof(vdecl));
/* always add position */
- src = draw_find_vs_output(draw, TGSI_SEMANTIC_POSITION, 0);
+ src = draw_find_shader_output(draw, TGSI_SEMANTIC_POSITION, 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
vinfo->attrib[0].emit = EMIT_4F;
vdecl[0].array.offset = offset;
@@ -169,7 +169,7 @@ int svga_swtnl_update_vdecl( struct svga_context *svga )
for (i = 0; i < fs->base.info.num_inputs; i++) {
unsigned name = fs->base.info.input_semantic_name[i];
unsigned index = fs->base.info.input_semantic_index[i];
- src = draw_find_vs_output(draw, name, index);
+ src = draw_find_shader_output(draw, name, index);
vdecl[nr_decls].array.offset = offset;
vdecl[nr_decls].identity.usageIndex = fs->base.info.input_semantic_index[i];
diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c
index b8ef137c01..0cd620189b 100644
--- a/src/gallium/drivers/svga/svga_tgsi.c
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@@ -31,6 +31,7 @@
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_scan.h"
#include "util/u_memory.h"
+#include "util/u_bitmask.h"
#include "svgadump/svga_shader_dump.h"
@@ -221,6 +222,7 @@ svga_tgsi_translate( const struct svga_shader *shader,
result->tokens = (const unsigned *)emit.buf;
result->nr_tokens = (emit.ptr - emit.buf) / sizeof(unsigned);
memcpy(&result->key, &key, sizeof key);
+ result->id = UTIL_BITMASK_INVALID_INDEX;
if (SVGA_DEBUG & DEBUG_TGSI)
{
diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h
index 896c90a89a..737a2213af 100644
--- a/src/gallium/drivers/svga/svga_tgsi.h
+++ b/src/gallium/drivers/svga/svga_tgsi.h
@@ -39,26 +39,24 @@ struct tgsi_token;
struct svga_vs_compile_key
{
- ubyte need_prescale:1;
- ubyte allow_psiz:1;
unsigned zero_stride_vertex_elements;
- ubyte num_zero_stride_vertex_elements:6;
+ unsigned need_prescale:1;
+ unsigned allow_psiz:1;
+ unsigned num_zero_stride_vertex_elements:6;
};
struct svga_fs_compile_key
{
- boolean light_twoside:1;
- boolean front_cw:1;
- ubyte num_textures;
- ubyte num_unnormalized_coords;
+ unsigned light_twoside:1;
+ unsigned front_cw:1;
+ unsigned num_textures:8;
+ unsigned num_unnormalized_coords:8;
struct {
- ubyte compare_mode : 1;
- ubyte compare_func : 3;
- ubyte unnormalized : 1;
-
- ubyte width_height_idx : 7;
-
- ubyte texture_target;
+ unsigned compare_mode:1;
+ unsigned compare_func:3;
+ unsigned unnormalized:1;
+ unsigned width_height_idx:7;
+ unsigned texture_target:8;
} tex[PIPE_MAX_SAMPLERS];
};
@@ -121,8 +119,7 @@ static INLINE unsigned svga_vs_key_size( const struct svga_vs_compile_key *key )
static INLINE unsigned svga_fs_key_size( const struct svga_fs_compile_key *key )
{
- return (const char *)&key->tex[key->num_textures].texture_target -
- (const char *)key;
+ return (const char *)&key->tex[key->num_textures] - (const char *)key;
}
struct svga_shader_result *
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index 1670da8bfa..dc5eb8fc60 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -2109,7 +2109,7 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_NOT:
case TGSI_OPCODE_SHL:
- case TGSI_OPCODE_SHR:
+ case TGSI_OPCODE_ISHR:
case TGSI_OPCODE_XOR:
return FALSE;
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c
index e6d4a74e86..d59fb89a58 100644
--- a/src/gallium/drivers/svga/svgadump/svga_dump.c
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.c
@@ -1444,6 +1444,312 @@ dump_SVGA3dCmdBlitSurfaceToScreen(const SVGA3dCmdBlitSurfaceToScreen *cmd)
void
+svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size)
+{
+ const uint8_t *body = (const uint8_t *)data;
+ const uint8_t *next = body + size;
+
+ switch(cmd_id) {
+ case SVGA_3D_CMD_SURFACE_DEFINE:
+ _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n");
+ {
+ const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body;
+ dump_SVGA3dCmdDefineSurface(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dSize) <= next) {
+ dump_SVGA3dSize((const SVGA3dSize *)body);
+ body += sizeof(SVGA3dSize);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SURFACE_DESTROY:
+ _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n");
+ {
+ const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body;
+ dump_SVGA3dCmdDestroySurface(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SURFACE_COPY:
+ _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n");
+ {
+ const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body;
+ dump_SVGA3dCmdSurfaceCopy(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dCopyBox) <= next) {
+ dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
+ body += sizeof(SVGA3dCopyBox);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SURFACE_STRETCHBLT:
+ _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n");
+ {
+ const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body;
+ dump_SVGA3dCmdSurfaceStretchBlt(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SURFACE_DMA:
+ _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n");
+ {
+ const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body;
+ dump_SVGA3dCmdSurfaceDMA(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dCopyBox) <= next) {
+ dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
+ body += sizeof(SVGA3dCopyBox);
+ }
+ while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) {
+ dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body);
+ body += sizeof(SVGA3dCmdSurfaceDMASuffix);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_CONTEXT_DEFINE:
+ _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n");
+ {
+ const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body;
+ dump_SVGA3dCmdDefineContext(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_CONTEXT_DESTROY:
+ _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n");
+ {
+ const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body;
+ dump_SVGA3dCmdDestroyContext(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETTRANSFORM:
+ _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n");
+ {
+ const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body;
+ dump_SVGA3dCmdSetTransform(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETZRANGE:
+ _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n");
+ {
+ const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body;
+ dump_SVGA3dCmdSetZRange(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETRENDERSTATE:
+ _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n");
+ {
+ const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body;
+ dump_SVGA3dCmdSetRenderState(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dRenderState) <= next) {
+ dump_SVGA3dRenderState((const SVGA3dRenderState *)body);
+ body += sizeof(SVGA3dRenderState);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SETRENDERTARGET:
+ _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n");
+ {
+ const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body;
+ dump_SVGA3dCmdSetRenderTarget(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETTEXTURESTATE:
+ _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n");
+ {
+ const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body;
+ dump_SVGA3dCmdSetTextureState(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dTextureState) <= next) {
+ dump_SVGA3dTextureState((const SVGA3dTextureState *)body);
+ body += sizeof(SVGA3dTextureState);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SETMATERIAL:
+ _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n");
+ {
+ const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body;
+ dump_SVGA3dCmdSetMaterial(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETLIGHTDATA:
+ _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n");
+ {
+ const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body;
+ dump_SVGA3dCmdSetLightData(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETLIGHTENABLED:
+ _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n");
+ {
+ const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body;
+ dump_SVGA3dCmdSetLightEnabled(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETVIEWPORT:
+ _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n");
+ {
+ const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body;
+ dump_SVGA3dCmdSetViewport(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETCLIPPLANE:
+ _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n");
+ {
+ const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body;
+ dump_SVGA3dCmdSetClipPlane(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_CLEAR:
+ _debug_printf("\tSVGA_3D_CMD_CLEAR\n");
+ {
+ const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body;
+ dump_SVGA3dCmdClear(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dRect) <= next) {
+ dump_SVGA3dRect((const SVGA3dRect *)body);
+ body += sizeof(SVGA3dRect);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_PRESENT:
+ _debug_printf("\tSVGA_3D_CMD_PRESENT\n");
+ {
+ const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body;
+ dump_SVGA3dCmdPresent(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dCopyRect) <= next) {
+ dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body);
+ body += sizeof(SVGA3dCopyRect);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SHADER_DEFINE:
+ _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n");
+ {
+ const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body;
+ dump_SVGA3dCmdDefineShader(cmd);
+ body = (const uint8_t *)&cmd[1];
+ svga_shader_dump((const uint32_t *)body,
+ (unsigned)(next - body)/sizeof(uint32_t),
+ FALSE );
+ body = next;
+ }
+ break;
+ case SVGA_3D_CMD_SHADER_DESTROY:
+ _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n");
+ {
+ const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body;
+ dump_SVGA3dCmdDestroyShader(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SET_SHADER:
+ _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n");
+ {
+ const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body;
+ dump_SVGA3dCmdSetShader(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SET_SHADER_CONST:
+ _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n");
+ {
+ const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body;
+ dump_SVGA3dCmdSetShaderConst(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_DRAW_PRIMITIVES:
+ _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n");
+ {
+ const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body;
+ unsigned i, j;
+ dump_SVGA3dCmdDrawPrimitives(cmd);
+ body = (const uint8_t *)&cmd[1];
+ for(i = 0; i < cmd->numVertexDecls; ++i) {
+ dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body);
+ body += sizeof(SVGA3dVertexDecl);
+ }
+ for(j = 0; j < cmd->numRanges; ++j) {
+ dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body);
+ body += sizeof(SVGA3dPrimitiveRange);
+ }
+ while(body + sizeof(SVGA3dVertexDivisor) <= next) {
+ dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body);
+ body += sizeof(SVGA3dVertexDivisor);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SETSCISSORRECT:
+ _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n");
+ {
+ const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body;
+ dump_SVGA3dCmdSetScissorRect(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_BEGIN_QUERY:
+ _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n");
+ {
+ const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body;
+ dump_SVGA3dCmdBeginQuery(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_END_QUERY:
+ _debug_printf("\tSVGA_3D_CMD_END_QUERY\n");
+ {
+ const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body;
+ dump_SVGA3dCmdEndQuery(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_WAIT_FOR_QUERY:
+ _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n");
+ {
+ const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body;
+ dump_SVGA3dCmdWaitForQuery(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN:
+ _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n");
+ {
+ const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body;
+ dump_SVGA3dCmdBlitSurfaceToScreen(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGASignedRect) <= next) {
+ dump_SVGASignedRect((const SVGASignedRect *)body);
+ body += sizeof(SVGASignedRect);
+ }
+ }
+ break;
+ default:
+ _debug_printf("\t0x%08x\n", cmd_id);
+ break;
+ }
+
+ while(body + sizeof(uint32_t) <= next) {
+ _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
+ body += sizeof(uint32_t);
+ }
+ while(body + sizeof(uint32_t) <= next)
+ _debug_printf("\t\t0x%02x\n", *body++);
+}
+
+
+void
svga_dump_commands(const void *commands, uint32_t size)
{
const uint8_t *next = commands;
@@ -1458,307 +1764,11 @@ svga_dump_commands(const void *commands, uint32_t size)
const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next;
const uint8_t *body = (const uint8_t *)&header[1];
- next = (const uint8_t *)body + header->size;
+ next = body + header->size;
if(next > last)
break;
- switch(cmd_id) {
- case SVGA_3D_CMD_SURFACE_DEFINE:
- _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n");
- {
- const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body;
- dump_SVGA3dCmdDefineSurface(cmd);
- body = (const uint8_t *)&cmd[1];
- while(body + sizeof(SVGA3dSize) <= next) {
- dump_SVGA3dSize((const SVGA3dSize *)body);
- body += sizeof(SVGA3dSize);
- }
- }
- break;
- case SVGA_3D_CMD_SURFACE_DESTROY:
- _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n");
- {
- const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body;
- dump_SVGA3dCmdDestroySurface(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SURFACE_COPY:
- _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n");
- {
- const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body;
- dump_SVGA3dCmdSurfaceCopy(cmd);
- body = (const uint8_t *)&cmd[1];
- while(body + sizeof(SVGA3dCopyBox) <= next) {
- dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
- body += sizeof(SVGA3dCopyBox);
- }
- }
- break;
- case SVGA_3D_CMD_SURFACE_STRETCHBLT:
- _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n");
- {
- const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body;
- dump_SVGA3dCmdSurfaceStretchBlt(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SURFACE_DMA:
- _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n");
- {
- const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body;
- dump_SVGA3dCmdSurfaceDMA(cmd);
- body = (const uint8_t *)&cmd[1];
- while(body + sizeof(SVGA3dCopyBox) <= next) {
- dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
- body += sizeof(SVGA3dCopyBox);
- }
- while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) {
- dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body);
- body += sizeof(SVGA3dCmdSurfaceDMASuffix);
- }
- }
- break;
- case SVGA_3D_CMD_CONTEXT_DEFINE:
- _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n");
- {
- const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body;
- dump_SVGA3dCmdDefineContext(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_CONTEXT_DESTROY:
- _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n");
- {
- const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body;
- dump_SVGA3dCmdDestroyContext(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SETTRANSFORM:
- _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n");
- {
- const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body;
- dump_SVGA3dCmdSetTransform(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SETZRANGE:
- _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n");
- {
- const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body;
- dump_SVGA3dCmdSetZRange(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SETRENDERSTATE:
- _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n");
- {
- const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body;
- dump_SVGA3dCmdSetRenderState(cmd);
- body = (const uint8_t *)&cmd[1];
- while(body + sizeof(SVGA3dRenderState) <= next) {
- dump_SVGA3dRenderState((const SVGA3dRenderState *)body);
- body += sizeof(SVGA3dRenderState);
- }
- }
- break;
- case SVGA_3D_CMD_SETRENDERTARGET:
- _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n");
- {
- const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body;
- dump_SVGA3dCmdSetRenderTarget(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SETTEXTURESTATE:
- _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n");
- {
- const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body;
- dump_SVGA3dCmdSetTextureState(cmd);
- body = (const uint8_t *)&cmd[1];
- while(body + sizeof(SVGA3dTextureState) <= next) {
- dump_SVGA3dTextureState((const SVGA3dTextureState *)body);
- body += sizeof(SVGA3dTextureState);
- }
- }
- break;
- case SVGA_3D_CMD_SETMATERIAL:
- _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n");
- {
- const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body;
- dump_SVGA3dCmdSetMaterial(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SETLIGHTDATA:
- _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n");
- {
- const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body;
- dump_SVGA3dCmdSetLightData(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SETLIGHTENABLED:
- _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n");
- {
- const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body;
- dump_SVGA3dCmdSetLightEnabled(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SETVIEWPORT:
- _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n");
- {
- const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body;
- dump_SVGA3dCmdSetViewport(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SETCLIPPLANE:
- _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n");
- {
- const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body;
- dump_SVGA3dCmdSetClipPlane(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_CLEAR:
- _debug_printf("\tSVGA_3D_CMD_CLEAR\n");
- {
- const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body;
- dump_SVGA3dCmdClear(cmd);
- body = (const uint8_t *)&cmd[1];
- while(body + sizeof(SVGA3dRect) <= next) {
- dump_SVGA3dRect((const SVGA3dRect *)body);
- body += sizeof(SVGA3dRect);
- }
- }
- break;
- case SVGA_3D_CMD_PRESENT:
- _debug_printf("\tSVGA_3D_CMD_PRESENT\n");
- {
- const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body;
- dump_SVGA3dCmdPresent(cmd);
- body = (const uint8_t *)&cmd[1];
- while(body + sizeof(SVGA3dCopyRect) <= next) {
- dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body);
- body += sizeof(SVGA3dCopyRect);
- }
- }
- break;
- case SVGA_3D_CMD_SHADER_DEFINE:
- _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n");
- {
- const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body;
- dump_SVGA3dCmdDefineShader(cmd);
- body = (const uint8_t *)&cmd[1];
- svga_shader_dump((const uint32_t *)body,
- (unsigned)(next - body)/sizeof(uint32_t),
- FALSE );
- body = next;
- }
- break;
- case SVGA_3D_CMD_SHADER_DESTROY:
- _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n");
- {
- const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body;
- dump_SVGA3dCmdDestroyShader(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SET_SHADER:
- _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n");
- {
- const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body;
- dump_SVGA3dCmdSetShader(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_SET_SHADER_CONST:
- _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n");
- {
- const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body;
- dump_SVGA3dCmdSetShaderConst(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_DRAW_PRIMITIVES:
- _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n");
- {
- const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body;
- unsigned i, j;
- dump_SVGA3dCmdDrawPrimitives(cmd);
- body = (const uint8_t *)&cmd[1];
- for(i = 0; i < cmd->numVertexDecls; ++i) {
- dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body);
- body += sizeof(SVGA3dVertexDecl);
- }
- for(j = 0; j < cmd->numRanges; ++j) {
- dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body);
- body += sizeof(SVGA3dPrimitiveRange);
- }
- while(body + sizeof(SVGA3dVertexDivisor) <= next) {
- dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body);
- body += sizeof(SVGA3dVertexDivisor);
- }
- }
- break;
- case SVGA_3D_CMD_SETSCISSORRECT:
- _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n");
- {
- const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body;
- dump_SVGA3dCmdSetScissorRect(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_BEGIN_QUERY:
- _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n");
- {
- const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body;
- dump_SVGA3dCmdBeginQuery(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_END_QUERY:
- _debug_printf("\tSVGA_3D_CMD_END_QUERY\n");
- {
- const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body;
- dump_SVGA3dCmdEndQuery(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_WAIT_FOR_QUERY:
- _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n");
- {
- const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body;
- dump_SVGA3dCmdWaitForQuery(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
- case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN:
- _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n");
- {
- const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body;
- dump_SVGA3dCmdBlitSurfaceToScreen(cmd);
- body = (const uint8_t *)&cmd[1];
- while(body + sizeof(SVGASignedRect) <= next) {
- dump_SVGASignedRect((const SVGASignedRect *)body);
- body += sizeof(SVGASignedRect);
- }
- }
- break;
- default:
- _debug_printf("\t0x%08x\n", cmd_id);
- break;
- }
-
- while(body + sizeof(uint32_t) <= next) {
- _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
- body += sizeof(uint32_t);
- }
- while(body + sizeof(uint32_t) <= next)
- _debug_printf("\t\t0x%02x\n", *body++);
+ svga_dump_command(cmd_id, body, header->size);
}
else if(cmd_id == SVGA_CMD_FENCE) {
_debug_printf("\tSVGA_CMD_FENCE\n");
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.h b/src/gallium/drivers/svga/svgadump/svga_dump.h
index 69a8702087..ca0154361c 100644
--- a/src/gallium/drivers/svga/svgadump/svga_dump.h
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.h
@@ -28,6 +28,9 @@
#include "pipe/p_compiler.h"
+void
+svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size);
+
void
svga_dump_commands(const void *commands, uint32_t size);
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py
index a1ada29ef8..0bc0b3ae31 100755
--- a/src/gallium/drivers/svga/svgadump/svga_dump.py
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.py
@@ -208,6 +208,56 @@ cmds = [
def dump_cmds():
print r'''
void
+svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size)
+{
+ const uint8_t *body = (const uint8_t *)data;
+ const uint8_t *next = body + size;
+'''
+ print ' switch(cmd_id) {'
+ indexes = 'ijklmn'
+ for id, header, body, footer in cmds:
+ print ' case %s:' % id
+ print ' _debug_printf("\\t%s\\n");' % id
+ print ' {'
+ print ' const %s *cmd = (const %s *)body;' % (header, header)
+ if len(body):
+ print ' unsigned ' + ', '.join(indexes[:len(body)]) + ';'
+ print ' dump_%s(cmd);' % header
+ print ' body = (const uint8_t *)&cmd[1];'
+ for i in range(len(body)):
+ struct, count = body[i]
+ idx = indexes[i]
+ print ' for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx)
+ print ' dump_%s((const %s *)body);' % (struct, struct)
+ print ' body += sizeof(%s);' % struct
+ print ' }'
+ if footer is not None:
+ print ' while(body + sizeof(%s) <= next) {' % footer
+ print ' dump_%s((const %s *)body);' % (footer, footer)
+ print ' body += sizeof(%s);' % footer
+ print ' }'
+ if id == 'SVGA_3D_CMD_SHADER_DEFINE':
+ print ' svga_shader_dump((const uint32_t *)body,'
+ print ' (unsigned)(next - body)/sizeof(uint32_t),'
+ print ' FALSE);'
+ print ' body = next;'
+ print ' }'
+ print ' break;'
+ print ' default:'
+ print ' _debug_printf("\\t0x%08x\\n", cmd_id);'
+ print ' break;'
+ print ' }'
+ print r'''
+ while(body + sizeof(uint32_t) <= next) {
+ _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
+ body += sizeof(uint32_t);
+ }
+ while(body + sizeof(uint32_t) <= next)
+ _debug_printf("\t\t0x%02x\n", *body++);
+}
+'''
+ print r'''
+void
svga_dump_commands(const void *commands, uint32_t size)
{
const uint8_t *next = commands;
@@ -222,51 +272,11 @@ svga_dump_commands(const void *commands, uint32_t size)
const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next;
const uint8_t *body = (const uint8_t *)&header[1];
- next = (const uint8_t *)body + header->size;
+ next = body + header->size;
if(next > last)
break;
-'''
- print ' switch(cmd_id) {'
- indexes = 'ijklmn'
- for id, header, body, footer in cmds:
- print ' case %s:' % id
- print ' _debug_printf("\\t%s\\n");' % id
- print ' {'
- print ' const %s *cmd = (const %s *)body;' % (header, header)
- if len(body):
- print ' unsigned ' + ', '.join(indexes[:len(body)]) + ';'
- print ' dump_%s(cmd);' % header
- print ' body = (const uint8_t *)&cmd[1];'
- for i in range(len(body)):
- struct, count = body[i]
- idx = indexes[i]
- print ' for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx)
- print ' dump_%s((const %s *)body);' % (struct, struct)
- print ' body += sizeof(%s);' % struct
- print ' }'
- if footer is not None:
- print ' while(body + sizeof(%s) <= next) {' % footer
- print ' dump_%s((const %s *)body);' % (footer, footer)
- print ' body += sizeof(%s);' % footer
- print ' }'
- if id == 'SVGA_3D_CMD_SHADER_DEFINE':
- print ' sh_svga_dump((const uint32_t *)body, (unsigned)(next - body)/sizeof(uint32_t));'
- print ' body = next;'
- print ' }'
- print ' break;'
- print ' default:'
- print ' _debug_printf("\\t0x%08x\\n", cmd_id);'
- print ' break;'
- print ' }'
-
- print r'''
- while(body + sizeof(uint32_t) <= next) {
- _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
- body += sizeof(uint32_t);
- }
- while(body + sizeof(uint32_t) <= next)
- _debug_printf("\t\t0x%02x\n", *body++);
+ svga_dump_command(cmd_id, body, header->size);
}
else if(cmd_id == SVGA_CMD_FENCE) {
_debug_printf("\tSVGA_CMD_FENCE\n");
diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README
index 1000c31e49..203c3851bc 100644
--- a/src/gallium/drivers/trace/README
+++ b/src/gallium/drivers/trace/README
@@ -24,11 +24,10 @@ ensure the right libGL.so is being picked by doing
ldd progs/trivial/tri
-== Traceing ==
+== Tracing ==
-For traceing then do
+For tracing then do
- export XMESA_TRACE=y
GALLIUM_TRACE=tri.trace progs/trivial/tri
which should create a tri.trace file, which is an XML file. You can view copying
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 8008b596c3..5a9f0fc690 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -52,6 +52,7 @@ trace_buffer_unwrap(struct trace_context *tr_ctx,
assert(tr_buf->buffer);
assert(tr_buf->buffer->screen == tr_scr->screen);
+ (void) tr_scr;
return tr_buf->buffer;
}
@@ -90,6 +91,7 @@ trace_surface_unwrap(struct trace_context *tr_ctx,
assert(tr_surf->surface);
assert(tr_surf->surface->texture->screen == tr_scr->screen);
+ (void) tr_scr;
return tr_surf->surface;
}
@@ -159,16 +161,15 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag)
pipe_mutex_unlock(tr_ctx->draw_mutex);
}
-static INLINE boolean
+static INLINE void
trace_context_draw_arrays(struct pipe_context *_pipe,
unsigned mode, unsigned start, unsigned count)
{
struct trace_context *tr_ctx = trace_context(_pipe);
struct pipe_context *pipe = tr_ctx->pipe;
- boolean result;
if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled)
- return 0;
+ return;
trace_context_draw_block(tr_ctx, 1);
@@ -179,19 +180,15 @@ trace_context_draw_arrays(struct pipe_context *_pipe,
trace_dump_arg(uint, start);
trace_dump_arg(uint, count);
- result = pipe->draw_arrays(pipe, mode, start, count);
-
- trace_dump_ret(bool, result);
+ pipe->draw_arrays(pipe, mode, start, count);
trace_dump_call_end();
trace_context_draw_block(tr_ctx, 2);
-
- return result;
}
-static INLINE boolean
+static INLINE void
trace_context_draw_elements(struct pipe_context *_pipe,
struct pipe_buffer *_indexBuffer,
unsigned indexSize,
@@ -201,10 +198,9 @@ trace_context_draw_elements(struct pipe_context *_pipe,
struct trace_buffer *tr_buf = trace_buffer(_indexBuffer);
struct pipe_context *pipe = tr_ctx->pipe;
struct pipe_buffer *indexBuffer = tr_buf->buffer;
- boolean result;
if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled)
- return 0;
+ return;
trace_context_draw_block(tr_ctx, 1);
@@ -219,19 +215,15 @@ trace_context_draw_elements(struct pipe_context *_pipe,
trace_dump_arg(uint, start);
trace_dump_arg(uint, count);
- result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);
-
- trace_dump_ret(bool, result);
+ pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);
trace_dump_call_end();
trace_context_draw_block(tr_ctx, 2);
-
- return result;
}
-static INLINE boolean
+static INLINE void
trace_context_draw_range_elements(struct pipe_context *_pipe,
struct pipe_buffer *_indexBuffer,
unsigned indexSize,
@@ -245,10 +237,9 @@ trace_context_draw_range_elements(struct pipe_context *_pipe,
struct trace_buffer *tr_buf = trace_buffer(_indexBuffer);
struct pipe_context *pipe = tr_ctx->pipe;
struct pipe_buffer *indexBuffer = tr_buf->buffer;
- boolean result;
if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled)
- return 0;
+ return;
trace_context_draw_block(tr_ctx, 1);
@@ -265,18 +256,14 @@ trace_context_draw_range_elements(struct pipe_context *_pipe,
trace_dump_arg(uint, start);
trace_dump_arg(uint, count);
- result = pipe->draw_range_elements(pipe,
- indexBuffer,
- indexSize, minIndex, maxIndex,
- mode, start, count);
-
- trace_dump_ret(bool, result);
+ pipe->draw_range_elements(pipe,
+ indexBuffer,
+ indexSize, minIndex, maxIndex,
+ mode, start, count);
trace_dump_call_end();
trace_context_draw_block(tr_ctx, 2);
-
- return result;
}
diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c
index 7e2ccbcfdc..0f45e211a3 100644
--- a/src/gallium/drivers/trace/tr_dump.c
+++ b/src/gallium/drivers/trace/tr_dump.c
@@ -40,7 +40,7 @@
#include "pipe/p_config.h"
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
#include <stdlib.h>
#endif
@@ -258,7 +258,7 @@ boolean trace_dump_trace_begin()
trace_dump_writes("<?xml-stylesheet type='text/xsl' href='trace.xsl'?>\n");
trace_dump_writes("<trace version='0.1'>\n");
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
/* Linux applications rarely cleanup GL / Gallium resources so catch
* application exit here */
atexit(trace_dump_trace_close);
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index 5794c90298..32f61f8c94 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -409,7 +409,7 @@ void trace_dump_sampler_state(const struct pipe_sampler_state *state)
trace_dump_member(uint, state, min_img_filter);
trace_dump_member(uint, state, min_mip_filter);
trace_dump_member(uint, state, mag_img_filter);
- trace_dump_member(bool, state, compare_mode);
+ trace_dump_member(uint, state, compare_mode);
trace_dump_member(uint, state, compare_func);
trace_dump_member(bool, state, normalized_coords);
trace_dump_member(uint, state, prefilter);
diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c
index c31b1d8698..0546aad9b5 100644
--- a/src/gallium/drivers/trace/tr_rbug.c
+++ b/src/gallium/drivers/trace/tr_rbug.c
@@ -45,7 +45,7 @@
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
# define sleep Sleep
-#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD)
+#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_APPLE)
void usleep(int);
# define sleep usleep
#else
@@ -180,7 +180,7 @@ static int
trace_rbug_texture_info(struct trace_rbug *tr_rbug, struct rbug_header *header, uint32_t serial)
{
struct trace_screen *tr_scr = tr_rbug->tr_scr;
- struct trace_texture *tr_tex;
+ struct trace_texture *tr_tex = NULL;
struct rbug_proto_texture_info *gpti = (struct rbug_proto_texture_info *)header;
struct tr_list *ptr;
struct pipe_texture *t;
@@ -223,7 +223,7 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header,
struct rbug_proto_texture_read *gptr = (struct rbug_proto_texture_read *)header;
struct trace_screen *tr_scr = tr_rbug->tr_scr;
- struct trace_texture *tr_tex;
+ struct trace_texture *tr_tex = NULL;
struct tr_list *ptr;
struct pipe_screen *screen = tr_scr->screen;
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index ac20a47af1..117503aaff 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -426,7 +426,7 @@ trace_screen_transfer_unmap(struct pipe_screen *_screen,
struct pipe_transfer *transfer = tr_trans->transfer;
if(tr_trans->map) {
- size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->width) * transfer->stride;
+ size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->height) * transfer->stride;
trace_dump_call_begin("pipe_screen", "transfer_write");
diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h
index 1c16042ee5..e2f981d051 100644
--- a/src/gallium/drivers/trace/tr_state.h
+++ b/src/gallium/drivers/trace/tr_state.h
@@ -32,7 +32,7 @@ struct tgsi_token;
enum trace_shader_type {
TRACE_SHADER_FRAGMENT = 0,
TRACE_SHADER_VERTEX = 1,
- TRACE_SHADER_GEOMETRY = 2,
+ TRACE_SHADER_GEOMETRY = 2
};
struct trace_shader