summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/r300
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/r300')
-rw-r--r--src/gallium/drivers/r300/Makefile1
-rw-r--r--src/gallium/drivers/r300/SConscript1
-rw-r--r--src/gallium/drivers/r300/r300_context.c16
-rw-r--r--src/gallium/drivers/r300/r300_context.h26
-rw-r--r--src/gallium/drivers/r300/r300_cs.h10
-rw-r--r--src/gallium/drivers/r300/r300_debug.c2
-rw-r--r--src/gallium/drivers/r300/r300_emit.c106
-rw-r--r--src/gallium/drivers/r300/r300_emit.h5
-rw-r--r--src/gallium/drivers/r300/r300_fs.c8
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.c108
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.h30
-rw-r--r--src/gallium/drivers/r300/r300_reg.h2
-rw-r--r--src/gallium/drivers/r300/r300_render.c98
-rw-r--r--src/gallium/drivers/r300/r300_screen.c3
-rw-r--r--src/gallium/drivers/r300/r300_screen.h2
-rw-r--r--src/gallium/drivers/r300/r300_state.c137
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c241
-rw-r--r--src/gallium/drivers/r300/r300_state_inlines.h23
-rw-r--r--src/gallium/drivers/r300/r300_state_invariant.c14
-rw-r--r--src/gallium/drivers/r300/r300_texture.c53
-rw-r--r--src/gallium/drivers/r300/r300_texture.h2
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c2
-rw-r--r--src/gallium/drivers/r300/r300_vs.c131
-rw-r--r--src/gallium/drivers/r300/r300_vs.h10
-rw-r--r--src/gallium/drivers/r300/r300_winsys.h6
25 files changed, 550 insertions, 487 deletions
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index 5a8e00f15a..d3cd6bef96 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -11,6 +11,7 @@ C_SOURCES = \
r300_emit.c \
r300_flush.c \
r300_fs.c \
+ r300_hyperz.c \
r300_query.c \
r300_render.c \
r300_resource.c \
diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript
index 08aec427a1..3921085d76 100644
--- a/src/gallium/drivers/r300/SConscript
+++ b/src/gallium/drivers/r300/SConscript
@@ -21,6 +21,7 @@ r300 = env.ConvenienceLibrary(
'r300_emit.c',
'r300_flush.c',
'r300_fs.c',
+ 'r300_hyperz.c',
'r300_query.c',
'r300_render.c',
'r300_resource.c',
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index deaa03e1f6..e84bce0010 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -37,14 +37,27 @@
#include "r300_state_invariant.h"
#include "r300_winsys.h"
+#include <inttypes.h>
+
static void r300_destroy_context(struct pipe_context* context)
{
struct r300_context* r300 = r300_context(context);
struct r300_query* query, * temp;
+ struct r300_atom *atom;
util_blitter_destroy(r300->blitter);
draw_destroy(r300->draw);
+ /* Print stats, if enabled. */
+ if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) {
+ fprintf(stderr, "r300: Stats for context %p:\n", r300);
+ fprintf(stderr, " : Flushes: %" PRIu64 "\n", r300->flush_counter);
+ foreach(atom, &r300->atom_list) {
+ fprintf(stderr, " : %s: %" PRIu64 " emits\n",
+ atom->name, atom->counter);
+ }
+ }
+
/* Free the OQ BO. */
context->screen->resource_destroy(context->screen, r300->oqbo);
@@ -63,7 +76,6 @@ static void r300_destroy_context(struct pipe_context* context)
FREE(r300->rs_block_state.state);
FREE(r300->scissor_state.state);
FREE(r300->textures_state.state);
- FREE(r300->vap_output_state.state);
FREE(r300->viewport_state.state);
FREE(r300->ztop_state.state);
FREE(r300->fs_constants.state);
@@ -112,7 +124,6 @@ static void r300_setup_atoms(struct r300_context* r300)
R300_INIT_ATOM(viewport_state, 9);
R300_INIT_ATOM(rs_block_state, 0);
R300_INIT_ATOM(vertex_stream_state, 0);
- R300_INIT_ATOM(vap_output_state, 6);
R300_INIT_ATOM(pvs_flush, 2);
R300_INIT_ATOM(vs_state, 0);
R300_INIT_ATOM(vs_constants, 0);
@@ -136,7 +147,6 @@ static void r300_setup_atoms(struct r300_context* r300)
r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block);
r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state);
r300->textures_state.state = CALLOC_STRUCT(r300_textures_state);
- r300->vap_output_state.state = CALLOC_STRUCT(r300_vap_output_state);
r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state);
r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state);
r300->fs_constants.state = CALLOC_STRUCT(r300_constant_buffer);
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 1e4fd9e5ed..e9c8fcdc15 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -45,6 +45,8 @@ struct r300_atom {
struct r300_atom *prev, *next;
/* Name, for debugging. */
const char* name;
+ /* Stat counter. */
+ uint64_t counter;
/* Opaque state. */
void* state;
/* Emit the state to the context. */
@@ -117,6 +119,10 @@ struct r300_rs_state {
};
struct r300_rs_block {
+ uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */
+ uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */
+ uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */
+
uint32_t ip[8]; /* R300_RS_IP_[0-7], R500_RS_IP_[0-7] */
uint32_t count; /* R300_RS_COUNT */
uint32_t inst_count; /* R300_RS_INST_COUNT */
@@ -188,12 +194,6 @@ struct r300_vertex_stream_state {
unsigned count;
};
-struct r300_vap_output_state {
- uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */
- uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */
- uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */
-};
-
struct r300_viewport_state {
float xscale; /* R300_VAP_VPORT_XSCALE: 0x2098 */
float xoffset; /* R300_VAP_VPORT_XOFFSET: 0x209c */
@@ -255,6 +255,10 @@ struct r300_texture {
/* A pitch for each mip-level */
unsigned pitch[R300_MAX_TEXTURE_LEVELS];
+ /* A pitch multiplied by blockwidth as hardware wants
+ * the number of pixels instead of the number of blocks. */
+ unsigned hwpitch[R300_MAX_TEXTURE_LEVELS];
+
/* Size of one zslice or face based on the texture target */
unsigned layer_size[R300_MAX_TEXTURE_LEVELS];
@@ -375,7 +379,7 @@ struct r300_context {
struct r300_atom query_start;
/* Rasterizer state. */
struct r300_atom rs_state;
- /* RS block state. */
+ /* RS block state + VAP (vertex shader) output mapping state. */
struct r300_atom rs_block_state;
/* Scissor state. */
struct r300_atom scissor_state;
@@ -383,8 +387,6 @@ struct r300_context {
struct r300_atom textures_state;
/* Vertex stream formatting state. */
struct r300_atom vertex_stream_state;
- /* VAP (vertex shader) output mapping state. */
- struct r300_atom vap_output_state;
/* Vertex shader. */
struct r300_atom vs_state;
/* Vertex shader constant buffer. */
@@ -418,6 +420,9 @@ struct r300_context {
struct pipe_viewport_state viewport;
+ /* Stream locations for SWTCL. */
+ int stream_loc_notcl[16];
+
/* Flag indicating whether or not the HW is dirty. */
uint32_t dirty_hw;
/* Whether polygon offset is enabled. */
@@ -435,6 +440,9 @@ struct r300_context {
/* upload managers */
struct u_upload_mgr *upload_vb;
struct u_upload_mgr *upload_ib;
+
+ /* Stat counter. */
+ uint64_t flush_counter;
};
/* Convenience cast wrapper. */
diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index 456b2ec7b9..996a4f491e 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -104,6 +104,13 @@
cs_count--; \
} while (0)
+#define OUT_CS_TABLE(values, count) do { \
+ if (VERY_VERBOSE_REGISTERS) \
+ DBG(cs_context_copy, DBG_CS, "r300: writing table of %d dwords\n", count); \
+ cs_winsys->write_cs_table(cs_winsys, values, count); \
+ cs_count -= count; \
+} while (0)
+
#define OUT_CS_BUF_RELOC(bo, offset, rd, wd, flags) do { \
DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, offset %d, " \
"domains (%d, %d, %d)\n", \
@@ -150,6 +157,9 @@
DBG(cs_context_copy, DBG_CS, "r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \
__FILE__, __LINE__); \
} \
+ if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { \
+ r300->flush_counter++; \
+ } \
cs_winsys->flush_cs(cs_winsys); \
} while (0)
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index 6e84bf8246..4c2836f36a 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -38,9 +38,11 @@ static struct debug_option debug_options[] = {
{ "draw", DBG_DRAW, "Draw and emit (for debugging)" },
{ "tex", DBG_TEX, "Textures (for debugging)" },
{ "fall", DBG_FALL, "Fallbacks (for debugging)" },
+ { "rs", DBG_RS, "Rasterizer (for debugging)" },
{ "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" },
{ "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" },
{ "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" },
+ { "stats", DBG_STATS, "Gather statistics (for lulz)" },
{ "all", ~0, "Convenience option that enables all debug flags" },
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 19acdaba62..23bbc6a99c 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -83,7 +83,6 @@ void r300_emit_clip_state(struct r300_context* r300,
unsigned size, void* state)
{
struct pipe_clip_state* clip = (struct pipe_clip_state*)state;
- int i;
CS_LOCALS(r300);
if (r300->screen->caps.has_tcl) {
@@ -92,12 +91,7 @@ void r300_emit_clip_state(struct r300_context* r300,
(r300->screen->caps.is_r500 ?
R500_PVS_UCP_START : R300_PVS_UCP_START));
OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4);
- for (i = 0; i < 6; i++) {
- OUT_CS_32F(clip->ucp[i][0]);
- OUT_CS_32F(clip->ucp[i][1]);
- OUT_CS_32F(clip->ucp[i][2]);
- OUT_CS_32F(clip->ucp[i][3]);
- }
+ OUT_CS_TABLE(clip->ucp, 6 * 4);
OUT_CS_REG(R300_VAP_CLIP_CNTL, ((1 << clip->nr) - 1) |
R300_PS_UCP_MODE_CLIP_AS_TRIFAN);
END_CS;
@@ -106,7 +100,6 @@ void r300_emit_clip_state(struct r300_context* r300,
OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
END_CS;
}
-
}
void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state)
@@ -244,8 +237,7 @@ void r300_emit_fs(struct r300_context* r300, unsigned size, void *state)
OUT_CS_REG(R300_US_CODE_OFFSET, code->code_offset);
OUT_CS_REG_SEQ(R300_US_CODE_ADDR_0, 4);
- for(i = 0; i < 4; ++i)
- OUT_CS(code->code_addr[i]);
+ OUT_CS_TABLE(code->code_addr, 4);
OUT_CS_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length);
for (i = 0; i < code->alu.length; i++)
@@ -265,8 +257,7 @@ void r300_emit_fs(struct r300_context* r300, unsigned size, void *state)
if (code->tex.length) {
OUT_CS_REG_SEQ(R300_US_TEX_INST_0, code->tex.length);
- for(i = 0; i < code->tex.length; ++i)
- OUT_CS(code->tex.inst[i]);
+ OUT_CS_TABLE(code->tex.inst, code->tex.length);
}
/* Emit immediates. */
@@ -396,10 +387,7 @@ void r500_emit_fs(struct r300_context* r300, unsigned size, void *state)
R500_GA_US_VECTOR_INDEX_TYPE_CONST |
(i & R500_GA_US_VECTOR_INDEX_MASK));
OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, 4);
- OUT_CS_32F(data[0]);
- OUT_CS_32F(data[1]);
- OUT_CS_32F(data[2]);
- OUT_CS_32F(data[3]);
+ OUT_CS_TABLE(data, 4);
}
}
}
@@ -424,15 +412,9 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat
OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST);
OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count * 4);
for(i = 0; i < count; ++i) {
- const float *data;
assert(constants->Constants[i].Type == RC_CONSTANT_EXTERNAL);
- data = buf->constants[i];
-
- OUT_CS_32F(data[0]);
- OUT_CS_32F(data[1]);
- OUT_CS_32F(data[2]);
- OUT_CS_32F(data[3]);
}
+ OUT_CS_TABLE(buf->constants, count * 4);
END_CS;
}
@@ -459,10 +441,7 @@ void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo
R500_GA_US_VECTOR_INDEX_TYPE_CONST |
(i & R500_GA_US_VECTOR_INDEX_MASK));
OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, 4);
- OUT_CS_32F(data[0]);
- OUT_CS_32F(data[1]);
- OUT_CS_32F(data[2]);
- OUT_CS_32F(data[3]);
+ OUT_CS_TABLE(data, 4);
}
}
END_CS;
@@ -738,13 +717,20 @@ void r300_emit_rs_block_state(struct r300_context* r300,
DBG(r300, DBG_DRAW, "r300: RS emit:\n");
BEGIN_CS(size);
+ OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2);
+ OUT_CS(rs->vap_vtx_state_cntl);
+ OUT_CS(rs->vap_vsm_vtx_assm);
+ OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
+ OUT_CS(rs->vap_out_vtx_fmt[0]);
+ OUT_CS(rs->vap_out_vtx_fmt[1]);
+
if (r300->screen->caps.is_r500) {
OUT_CS_REG_SEQ(R500_RS_IP_0, count);
} else {
OUT_CS_REG_SEQ(R300_RS_IP_0, count);
}
+ OUT_CS_TABLE(rs->ip, count);
for (i = 0; i < count; i++) {
- OUT_CS(rs->ip[i]);
DBG(r300, DBG_DRAW, " : ip %d: 0x%08x\n", i, rs->ip[i]);
}
@@ -757,8 +743,8 @@ void r300_emit_rs_block_state(struct r300_context* r300,
} else {
OUT_CS_REG_SEQ(R300_RS_INST_0, count);
}
+ OUT_CS_TABLE(rs->inst, count);
for (i = 0; i < count; i++) {
- OUT_CS(rs->inst[i]);
DBG(r300, DBG_DRAW, " : inst %d: 0x%08x\n", i, rs->inst[i]);
}
@@ -823,7 +809,7 @@ void r300_emit_textures_state(struct r300_context *r300,
END_CS;
}
-void r300_emit_aos(struct r300_context* r300, unsigned offset)
+void r300_emit_aos(struct r300_context* r300, unsigned offset, boolean indexed)
{
struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer;
struct pipe_vertex_element *velem = r300->velems->velem;
@@ -832,9 +818,18 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset)
unsigned packet_size = (aos_count * 3 + 1) / 2;
CS_LOCALS(r300);
+ for (i = 0; i < aos_count; i++) {
+ if ((vbuf[velem[i].vertex_buffer_index].buffer_offset + velem[i].src_offset) % 4 != 0) {
+ /* XXX We must align the buffer. */
+ assert(0);
+ fprintf(stderr, "r300: Unaligned vertex buffer offsets aren't supported, aborting..\n");
+ abort();
+ }
+ }
+
BEGIN_CS(2 + packet_size + aos_count * 2);
OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
- OUT_CS(aos_count);
+ OUT_CS(aos_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
for (i = 0; i < aos_count - 1; i += 2) {
vb1 = &vbuf[velem[i].vertex_buffer_index];
@@ -899,39 +894,20 @@ void r300_emit_vertex_stream_state(struct r300_context* r300,
BEGIN_CS(size);
OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count);
+ OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count);
for (i = 0; i < streams->count; i++) {
- OUT_CS(streams->vap_prog_stream_cntl[i]);
DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i,
streams->vap_prog_stream_cntl[i]);
}
OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count);
+ OUT_CS_TABLE(streams->vap_prog_stream_cntl_ext, streams->count);
for (i = 0; i < streams->count; i++) {
- OUT_CS(streams->vap_prog_stream_cntl_ext[i]);
DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i,
streams->vap_prog_stream_cntl_ext[i]);
}
END_CS;
}
-void r300_emit_vap_output_state(struct r300_context* r300,
- unsigned size, void* state)
-{
- struct r300_vap_output_state *vap_out_state =
- (struct r300_vap_output_state*)state;
- CS_LOCALS(r300);
-
- DBG(r300, DBG_DRAW, "r300: VAP emit:\n");
-
- BEGIN_CS(size);
- OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2);
- OUT_CS(vap_out_state->vap_vtx_state_cntl);
- OUT_CS(vap_out_state->vap_vsm_vtx_assm);
- OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
- OUT_CS(vap_out_state->vap_out_vtx_fmt[0]);
- OUT_CS(vap_out_state->vap_out_vtx_fmt[1]);
- END_CS;
-}
-
void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state)
{
CS_LOCALS(r300);
@@ -978,9 +954,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);
OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length);
- for (i = 0; i < code->length; i++) {
- OUT_CS(code->body.d[i]);
- }
+ OUT_CS_TABLE(code->body.d, code->length);
OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) |
R300_PVS_NUM_CNTLRS(pvs_num_controllers) |
@@ -997,10 +971,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, imm_count * 4);
for (i = imm_first; i < imm_end; i++) {
const float *data = vs->code.constants.Constants[i].u.Immediate;
- OUT_CS_32F(data[0]);
- OUT_CS_32F(data[1]);
- OUT_CS_32F(data[2]);
- OUT_CS_32F(data[3]);
+ OUT_CS_TABLE(data, 4);
}
}
END_CS;
@@ -1009,7 +980,6 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
void r300_emit_vs_constants(struct r300_context* r300,
unsigned size, void *state)
{
- unsigned i;
unsigned count =
((struct r300_vertex_shader*)r300->vs_state.state)->externals_count;
struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;
@@ -1023,13 +993,7 @@ void r300_emit_vs_constants(struct r300_context* r300,
(r300->screen->caps.is_r500 ?
R500_PVS_CONST_START : R300_PVS_CONST_START));
OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4);
- for (i = 0; i < count; i++) {
- const float *data = buf->constants[i];
- OUT_CS_32F(data[0]);
- OUT_CS_32F(data[1]);
- OUT_CS_32F(data[2]);
- OUT_CS_32F(data[3]);
- }
+ OUT_CS_TABLE(buf->constants, count * 4);
END_CS;
}
@@ -1188,6 +1152,11 @@ unsigned r300_get_num_dirty_dwords(struct r300_context *r300)
}
}
+ /* emit_query_end is not atomized. */
+ dwords += 26;
+ /* let's reserve some more, just in case */
+ dwords += 32;
+
return dwords;
}
@@ -1200,6 +1169,9 @@ void r300_emit_dirty_state(struct r300_context* r300)
foreach(atom, &r300->atom_list) {
if (atom->dirty) {
atom->emit(r300, atom->size, atom->state);
+ if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) {
+ atom->counter++;
+ }
atom->dirty = FALSE;
}
}
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 56f7318cdb..3c0edf6fdc 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -29,7 +29,7 @@
struct rX00_fragment_program_code;
struct r300_vertex_program_code;
-void r300_emit_aos(struct r300_context* r300, unsigned offset);
+void r300_emit_aos(struct r300_context* r300, unsigned offset, boolean indexed);
void r300_emit_blend_state(struct r300_context* r300,
unsigned size, void* state);
@@ -81,9 +81,6 @@ void r300_emit_vertex_buffer(struct r300_context* r300);
void r300_emit_vertex_stream_state(struct r300_context* r300,
unsigned size, void* state);
-void r300_emit_vap_output_state(struct r300_context* r300,
- unsigned size, void* state);
-
void r300_emit_vs_constants(struct r300_context* r300,
unsigned size, void *state);
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 4d61f63853..88303f074c 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -275,6 +275,14 @@ static void r300_translate_fragment_shader(
/* Invoke the compiler */
r3xx_compile_fragment_program(&compiler);
+ /* Shaders with zero instructions are invalid,
+ * use the dummy shader instead. */
+ if (shader->code.code.r500.inst_end == -1) {
+ rc_destroy(&compiler.Base);
+ r300_dummy_fragment_shader(r300, shader);
+ return;
+ }
+
if (compiler.Base.Error) {
fprintf(stderr, "r300 FP: Compiler Error:\n%sUsing a dummy shader"
" instead.\n", compiler.Base.ErrorMsg);
diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c
new file mode 100644
index 0000000000..b41b6b1508
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_hyperz.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+
+#include "r300_hyperz.h"
+#include "r300_context.h"
+#include "r300_reg.h"
+#include "r300_fs.h"
+
+/*****************************************************************************/
+/* The ZTOP state */
+/*****************************************************************************/
+
+static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa)
+{
+ /* We are interested only in the cases when a new depth or stencil value
+ * can be written and changed. */
+
+ /* We might optionally check for [Z func: never] and inspect the stencil
+ * state in a similar fashion, but it's not terribly important. */
+ return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) ||
+ (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) ||
+ ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) &&
+ (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK));
+}
+
+static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa)
+{
+ /* We are interested only in the cases when alpha testing can kill
+ * a fragment. */
+ uint32_t af = dsa->alpha_function;
+
+ return (af & R300_FG_ALPHA_FUNC_ENABLE) &&
+ (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS;
+}
+
+static void r300_update_ztop(struct r300_context* r300)
+{
+ struct r300_ztop_state* ztop_state =
+ (struct r300_ztop_state*)r300->ztop_state.state;
+
+ /* This is important enough that I felt it warranted a comment.
+ *
+ * According to the docs, these are the conditions where ZTOP must be
+ * disabled:
+ * 1) Alpha testing enabled
+ * 2) Texture kill instructions in fragment shader
+ * 3) Chroma key culling enabled
+ * 4) W-buffering enabled
+ *
+ * The docs claim that for the first three cases, if no ZS writes happen,
+ * then ZTOP can be used.
+ *
+ * (3) will never apply since we do not support chroma-keyed operations.
+ * (4) will need to be re-examined (and this comment updated) if/when
+ * Hyper-Z becomes supported.
+ *
+ * Additionally, the following conditions require disabled ZTOP:
+ * 5) Depth writes in fragment shader
+ * 6) Outstanding occlusion queries
+ *
+ * This register causes stalls all the way from SC to CB when changed,
+ * but it is buffered on-chip so it does not hurt to write it if it has
+ * not changed.
+ *
+ * ~C.
+ */
+
+ /* ZS writes */
+ if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) &&
+ (r300_dsa_alpha_test_enabled(r300->dsa_state.state) || /* (1) */
+ r300_fs(r300)->shader->info.uses_kill)) { /* (2) */
+ ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
+ } else if (r300_fragment_shader_writes_depth(r300_fs(r300))) { /* (5) */
+ ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
+ } else if (r300->query_current) { /* (6) */
+ ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
+ } else {
+ ztop_state->z_buffer_top = R300_ZTOP_ENABLE;
+ }
+
+ r300->ztop_state.dirty = TRUE;
+}
+
+void r300_update_hyperz_state(struct r300_context* r300)
+{
+ r300_update_ztop(r300);
+}
diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h
new file mode 100644
index 0000000000..3df5053b89
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_hyperz.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_HYPERZ_H
+#define R300_HYPERZ_H
+
+struct r300_context;
+
+void r300_update_hyperz_state(struct r300_context* r300);
+
+#endif
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 239f91443f..675a9317f9 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -3377,7 +3377,7 @@ enum {
* the last block is omitted.
*/
#define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00
-
+# define R300_VC_FORCE_PREFETCH (1 << 5)
# define R300_VBPNTR_SIZE0(x) ((x) >> 2)
# define R300_VBPNTR_STRIDE0(x) (((x) >> 2) << 8)
# define R300_VBPNTR_SIZE1(x) (((x) >> 2) << 16)
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 23b61df89c..7c3a7902a4 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -41,9 +41,6 @@
#include "r300_render.h"
#include "r300_state_derived.h"
-/* XXX The DRM rejects VAP_ALT_NUM_VERTICES.. */
-//#define ENABLE_ALT_NUM_VERTS
-
static uint32_t r300_translate_primitive(unsigned prim)
{
switch (prim) {
@@ -169,6 +166,24 @@ static boolean immd_is_good_idea(struct r300_context *r300,
* after resolving fallback issues (e.g. stencil ref two-sided). *
****************************************************************************/
+static boolean r500_emit_index_offset(struct r300_context *r300, int indexBias)
+{
+ CS_LOCALS(r300);
+
+ if (r300->screen->caps.is_r500 &&
+ r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) {
+ BEGIN_CS(2);
+ OUT_CS_REG(R500_VAP_INDEX_OFFSET,
+ (indexBias & 0xFFFFFF) | (indexBias < 0 ? 1<<24 : 0));
+ END_CS;
+ } else {
+ if (indexBias)
+ return FALSE; /* Can't do anything :( */
+ }
+
+ return TRUE;
+}
+
void r500_emit_draw_arrays_immediate(struct r300_context *r300,
unsigned mode,
unsigned start,
@@ -220,10 +235,12 @@ void r500_emit_draw_arrays_immediate(struct r300_context *r300,
dwords = 9 + count * vertex_size;
- r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + dwords);
+ r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 2 + dwords);
r300_emit_buffer_validate(r300, FALSE, NULL);
r300_emit_dirty_state(r300);
+ r500_emit_index_offset(r300, 0);
+
BEGIN_CS(dwords);
OUT_CS_REG(R300_GA_COLOR_CONTROL,
r300_provoking_vertex_fixes(r300, mode));
@@ -265,23 +282,20 @@ void r500_emit_draw_arrays(struct r300_context *r300,
unsigned mode,
unsigned count)
{
-#if defined(ENABLE_ALT_NUM_VERTS)
boolean alt_num_verts = count > 65535;
-#else
- boolean alt_num_verts = FALSE;
-#endif
CS_LOCALS(r300);
+ if (count >= (1 << 24)) {
+ fprintf(stderr, "r300: Got a huge number of vertices: %i, "
+ "refusing to render.\n", count);
+ return;
+ }
+
+ r500_emit_index_offset(r300, 0);
+
+ BEGIN_CS(7 + (alt_num_verts ? 2 : 0));
if (alt_num_verts) {
- if (count >= (1 << 24)) {
- fprintf(stderr, "r300: Got a huge number of vertices: %i, "
- "refusing to render.\n", count);
- return;
- }
- BEGIN_CS(9);
OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
- } else {
- BEGIN_CS(7);
}
OUT_CS_REG(R300_GA_COLOR_CONTROL,
r300_provoking_vertex_fixes(r300, mode));
@@ -307,11 +321,7 @@ void r500_emit_draw_elements(struct r300_context *r300,
{
uint32_t count_dwords;
uint32_t offset_dwords = indexSize * start / sizeof(uint32_t);
-#if defined(ENABLE_ALT_NUM_VERTS)
boolean alt_num_verts = count > 65535;
-#else
- boolean alt_num_verts = FALSE;
-#endif
CS_LOCALS(r300);
if (count >= (1 << 24)) {
@@ -320,18 +330,20 @@ void r500_emit_draw_elements(struct r300_context *r300,
return;
}
- assert(indexBias == 0);
-
maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index);
DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n",
count, minIndex, maxIndex);
+ if (!r500_emit_index_offset(r300, indexBias)) {
+ fprintf(stderr, "r300: Got a non-zero index bias, "
+ "refusing to render.\n");
+ return;
+ }
+
+ BEGIN_CS(13 + (alt_num_verts ? 2 : 0));
if (alt_num_verts) {
- BEGIN_CS(15);
OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
- } else {
- BEGIN_CS(13);
}
OUT_CS_REG(R300_GA_COLOR_CONTROL,
r300_provoking_vertex_fixes(r300, mode));
@@ -541,12 +553,9 @@ void r300_draw_range_elements(struct pipe_context* pipe,
{
struct r300_context* r300 = r300_context(pipe);
struct pipe_resource* orgIndexBuffer = indexBuffer;
-#if defined(ENABLE_ALT_NUM_VERTS)
boolean alt_num_verts = r300->screen->caps.is_r500 &&
- count > 65536;
-#else
- boolean alt_num_verts = FALSE;
-#endif
+ count > 65536 &&
+ r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
unsigned short_count;
if (r300->skip_rendering) {
@@ -574,7 +583,7 @@ void r300_draw_range_elements(struct pipe_context* pipe,
r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 128);
r300_emit_buffer_validate(r300, TRUE, indexBuffer);
r300_emit_dirty_state(r300);
- r300_emit_aos(r300, 0);
+ r300_emit_aos(r300, 0, TRUE);
u_upload_flush(r300->upload_vb);
u_upload_flush(r300->upload_ib);
@@ -591,11 +600,12 @@ void r300_draw_range_elements(struct pipe_context* pipe,
start += short_count;
count -= short_count;
- /* 16 spare dwords are enough for emit_draw_elements. */
- if (count && r300_reserve_cs_space(r300, 16)) {
+ /* 16 spare dwords are enough for emit_draw_elements.
+ * Also reserve some space for emit_query_end. */
+ if (count && r300_reserve_cs_space(r300, 74)) {
r300_emit_buffer_validate(r300, TRUE, indexBuffer);
r300_emit_dirty_state(r300);
- r300_emit_aos(r300, 0);
+ r300_emit_aos(r300, 0, TRUE);
}
} while (count);
}
@@ -622,12 +632,9 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
unsigned start, unsigned count)
{
struct r300_context* r300 = r300_context(pipe);
-#if defined(ENABLE_ALT_NUM_VERTS)
boolean alt_num_verts = r300->screen->caps.is_r500 &&
- count > 65536;
-#else
- boolean alt_num_verts = FALSE;
-#endif
+ count > 65536 &&
+ r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
unsigned short_count;
if (r300->skip_rendering) {
@@ -650,20 +657,21 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
r300_emit_dirty_state(r300);
if (alt_num_verts || count <= 65535) {
- r300_emit_aos(r300, start);
+ r300_emit_aos(r300, start, FALSE);
r300->emit_draw_arrays(r300, mode, count);
} else {
do {
short_count = MIN2(count, 65535);
- r300_emit_aos(r300, start);
+ r300_emit_aos(r300, start, FALSE);
r300->emit_draw_arrays(r300, mode, short_count);
start += short_count;
count -= short_count;
/* Again, we emit both AOS and draw_arrays so there should be
- * at least 128 spare dwords. */
- if (count && r300_reserve_cs_space(r300, 128)) {
+ * at least 128 spare dwords.
+ * Also reserve some space for emit_query_end. */
+ if (count && r300_reserve_cs_space(r300, 186)) {
r300_emit_buffer_validate(r300, TRUE, NULL);
r300_emit_dirty_state(r300);
}
@@ -896,6 +904,8 @@ static void r500_render_draw_arrays(struct vbuf_render* render,
DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count);
+ r500_emit_index_offset(r300, 0);
+
BEGIN_CS(2);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
@@ -918,6 +928,8 @@ static void r500_render_draw(struct vbuf_render* render,
r300_emit_buffer_validate(r300, FALSE, NULL);
r300_emit_dirty_state(r300);
+ r500_emit_index_offset(r300, 0);
+
BEGIN_CS(dwords);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 8fc1d5aa00..c039126703 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -22,6 +22,7 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "util/u_format.h"
+#include "util/u_format_s3tc.h"
#include "util/u_memory.h"
#include "r300_context.h"
@@ -319,6 +320,8 @@ struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws)
r300_init_screen_resource_functions(r300screen);
+ util_format_s3tc_init();
+
return &r300screen->screen;
}
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 330bd9b36b..735c233c9e 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -71,6 +71,8 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) {
#define DBG_ANISOHQ 0x0000080
#define DBG_NO_TILING 0x0000100
#define DBG_NO_IMMD 0x0000200
+#define DBG_STATS 0x0000400
+#define DBG_RS 0x0000800
/*@}*/
static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags)
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 9eb8539a65..d31e7c53f7 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -538,46 +538,12 @@ static void r300_set_stencil_ref(struct pipe_context* pipe,
}
/* This switcheroo is needed just because of goddamned MACRO_SWITCH. */
-static void r300_fb_update_tiling_flags(struct r300_context *r300,
+static void r300_fb_set_tiling_flags(struct r300_context *r300,
const struct pipe_framebuffer_state *old_state,
const struct pipe_framebuffer_state *new_state)
{
struct r300_texture *tex;
- unsigned i, j, level;
-
- /* Reset tiling flags for old surfaces to default values. */
- for (i = 0; i < old_state->nr_cbufs; i++) {
- for (j = 0; j < new_state->nr_cbufs; j++) {
- if (old_state->cbufs[i]->texture == new_state->cbufs[j]->texture) {
- break;
- }
- }
- /* If not binding the surface again... */
- if (j != new_state->nr_cbufs) {
- continue;
- }
-
- tex = r300_texture(old_state->cbufs[i]->texture);
-
- if (tex) {
- r300->rws->buffer_set_tiling(r300->rws, tex->buffer,
- tex->pitch[0],
- tex->microtile,
- tex->macrotile);
- }
- }
- if (old_state->zsbuf &&
- (!new_state->zsbuf ||
- old_state->zsbuf->texture != new_state->zsbuf->texture)) {
- tex = r300_texture(old_state->zsbuf->texture);
-
- if (tex) {
- r300->rws->buffer_set_tiling(r300->rws, tex->buffer,
- tex->pitch[0],
- tex->microtile,
- tex->macrotile);
- }
- }
+ unsigned i, level;
/* Set tiling flags for new surfaces. */
for (i = 0; i < new_state->nr_cbufs; i++) {
@@ -585,7 +551,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300,
level = new_state->cbufs[i]->level;
r300->rws->buffer_set_tiling(r300->rws, tex->buffer,
- tex->pitch[level],
+ tex->pitch[0],
tex->microtile,
tex->mip_macrotile[level]);
}
@@ -594,7 +560,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300,
level = new_state->zsbuf->level;
r300->rws->buffer_set_tiling(r300->rws, tex->buffer,
- tex->pitch[level],
+ tex->pitch[0],
tex->microtile,
tex->mip_macrotile[level]);
}
@@ -644,7 +610,8 @@ static void
r300->dsa_state.dirty = TRUE;
}
- r300_fb_update_tiling_flags(r300, r300->fb_state.state, state);
+ /* The tiling flags are dependent on the surface miplevel, unfortunately. */
+ r300_fb_set_tiling_flags(r300, r300->fb_state.state, state);
memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state));
@@ -719,10 +686,6 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
r300_mark_fs_code_dirty(r300);
r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */
-
- if (r300->vs_state.state && r300_vertex_shader_setup_wpos(r300)) {
- r300->vap_output_state.dirty = TRUE;
- }
}
/* Delete fragment shader state. */
@@ -1072,11 +1035,9 @@ r300_create_sampler_view(struct pipe_context *pipe,
swizzle[2] = templ->swizzle_b;
swizzle[3] = templ->swizzle_a;
- /* XXX Enable swizzles when they become supported. Now we get RGBA
- * everywhere. And do testing! */
view->format = tex->tx_format;
view->format.format1 |= r300_translate_texformat(templ->format,
- 0); /*swizzle);*/
+ swizzle);
if (r300_screen(pipe->screen)->caps.is_r500) {
view->format.format2 |= r500_tx_format_msb_bit(templ->format);
}
@@ -1271,6 +1232,7 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
{
struct r300_vertex_element_state *velems;
unsigned i, size;
+ enum pipe_format *format;
assert(count <= PIPE_MAX_ATTRIBS);
velems = CALLOC_STRUCT(r300_vertex_element_state);
@@ -1279,21 +1241,88 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count);
if (r300_screen(pipe->screen)->caps.has_tcl) {
- /* Check if the format is aligned to the size of DWORD. */
+ r300_vertex_psc(velems);
+
+ /* Check if the format is aligned to the size of DWORD.
+ * We only care about the blocksizes of the formats since
+ * swizzles are already set up. */
for (i = 0; i < count; i++) {
- size = util_format_get_blocksize(attribs[i].src_format);
+ format = &velems->velem[i].src_format;
+
+ /* Replace some formats with their aligned counterparts,
+ * this is OK because we check for aligned strides too. */
+ switch (*format) {
+ /* Align to RGBA8. */
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ *format = PIPE_FORMAT_R8G8B8A8_UNORM;
+ continue;
+ case PIPE_FORMAT_R8_SNORM:
+ case PIPE_FORMAT_R8G8_SNORM:
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ *format = PIPE_FORMAT_R8G8B8A8_SNORM;
+ continue;
+ case PIPE_FORMAT_R8_USCALED:
+ case PIPE_FORMAT_R8G8_USCALED:
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ *format = PIPE_FORMAT_R8G8B8A8_USCALED;
+ continue;
+ case PIPE_FORMAT_R8_SSCALED:
+ case PIPE_FORMAT_R8G8_SSCALED:
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ *format = PIPE_FORMAT_R8G8B8A8_SSCALED;
+ continue;
+
+ /* Align to RG16. */
+ case PIPE_FORMAT_R16_UNORM:
+ *format = PIPE_FORMAT_R16G16_UNORM;
+ continue;
+ case PIPE_FORMAT_R16_SNORM:
+ *format = PIPE_FORMAT_R16G16_SNORM;
+ continue;
+ case PIPE_FORMAT_R16_USCALED:
+ *format = PIPE_FORMAT_R16G16_USCALED;
+ continue;
+ case PIPE_FORMAT_R16_SSCALED:
+ *format = PIPE_FORMAT_R16G16_SSCALED;
+ continue;
+ case PIPE_FORMAT_R16_FLOAT:
+ *format = PIPE_FORMAT_R16G16_FLOAT;
+ continue;
+
+ /* Align to RGBA16. */
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ *format = PIPE_FORMAT_R16G16B16A16_UNORM;
+ continue;
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ *format = PIPE_FORMAT_R16G16B16A16_SNORM;
+ continue;
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ *format = PIPE_FORMAT_R16G16B16A16_USCALED;
+ continue;
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ *format = PIPE_FORMAT_R16G16B16A16_SSCALED;
+ continue;
+ case PIPE_FORMAT_R16G16B16_FLOAT:
+ *format = PIPE_FORMAT_R16G16B16A16_FLOAT;
+ continue;
+
+ default:;
+ }
+
+ size = util_format_get_blocksize(*format);
if (size % 4 != 0) {
/* XXX Shouldn't we align the format? */
fprintf(stderr, "r300_create_vertex_elements_state: "
"Unaligned format %s:%i isn't supported\n",
- util_format_name(attribs[i].src_format), size);
+ util_format_name(*format), size);
assert(0);
abort();
}
}
- r300_vertex_psc(velems);
}
}
return velems;
@@ -1359,14 +1388,6 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
}
r300->vs_state.state = vs;
- // VS output mapping for HWTCL or stream mapping for SWTCL to the RS block
- if (r300->fs.state) {
- r300_vertex_shader_setup_wpos(r300);
- }
- memcpy(r300->vap_output_state.state, &vs->vap_out,
- sizeof(struct r300_vap_output_state));
- r300->vap_output_state.dirty = TRUE;
-
/* The majority of the RS block bits is dependent on the vertex shader. */
r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 46c192eae1..e3adace0fa 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -28,6 +28,7 @@
#include "r300_context.h"
#include "r300_fs.h"
+#include "r300_hyperz.h"
#include "r300_screen.h"
#include "r300_shader_semantics.h"
#include "r300_state.h"
@@ -42,6 +43,7 @@ enum r300_rs_swizzle {
SWIZ_XYZW = 0,
SWIZ_X001,
SWIZ_XY01,
+ SWIZ_0001,
};
static void r300_draw_emit_attrib(struct r300_context* r300,
@@ -113,12 +115,11 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300)
static void r300_swtcl_vertex_psc(struct r300_context *r300)
{
struct r300_vertex_stream_state *vstream = r300->vertex_stream_state.state;
- struct r300_vertex_shader* vs = r300->vs_state.state;
- struct vertex_info* vinfo = &r300->vertex_info;
+ struct vertex_info* vinfo = &r300->vertex_info;
uint16_t type, swizzle;
enum pipe_format format;
unsigned i, attrib_count;
- int* vs_output_tab = vs->stream_loc_notcl;
+ int* vs_output_tab = r300->stream_loc_notcl;
/* XXX hax */
memset(vstream, 0, sizeof(struct r300_vertex_stream_state));
@@ -169,10 +170,10 @@ static void r300_swtcl_vertex_psc(struct r300_context *r300)
}
static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr,
- boolean swizzle_0001)
+ enum r300_rs_swizzle swiz)
{
rs->ip[id] |= R300_RS_COL_PTR(ptr);
- if (swizzle_0001) {
+ if (swiz == SWIZ_0001) {
rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
} else {
rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
@@ -218,10 +219,10 @@ static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
}
static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr,
- boolean swizzle_0001)
+ enum r300_rs_swizzle swiz)
{
rs->ip[id] |= R500_RS_COL_PTR(ptr);
- if (swizzle_0001) {
+ if (swiz == SWIZ_0001) {
rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
} else {
rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
@@ -267,21 +268,29 @@ static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
/* Set up the RS block.
*
- * This is the part of the chipset that actually does the rasterization
- * of vertices into fragments. This is also the part of the chipset that
- * locks up if any part of it is even slightly wrong. */
-static void r300_update_rs_block(struct r300_context* r300,
- struct r300_shader_semantics* vs_outputs,
- struct r300_shader_semantics* fs_inputs)
+ * This is the part of the chipset that is responsible for linking vertex
+ * and fragment shaders and stuffed texture coordinates.
+ *
+ * The rasterizer reads data from VAP, which produces vertex shader outputs,
+ * and GA, which produces stuffed texture coordinates. VAP outputs have
+ * precedence over GA. All outputs must be rasterized otherwise it locks up.
+ * If there are more outputs rasterized than is set in VAP/GA, it locks up
+ * too. The funky part is that this info has been pretty much obtained by trial
+ * and error. */
+static void r300_update_rs_block(struct r300_context *r300)
{
- struct r300_rs_block rs = { { 0 } };
- int i, col_count = 0, tex_count = 0, fp_offset = 0, count;
- void (*rX00_rs_col)(struct r300_rs_block*, int, int, boolean);
+ struct r300_vertex_shader *vs = r300->vs_state.state;
+ struct r300_shader_semantics *vs_outputs = &vs->outputs;
+ struct r300_shader_semantics *fs_inputs = &r300_fs(r300)->shader->inputs;
+ struct r300_rs_block rs = {0};
+ int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0;
+ void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle);
void (*rX00_rs_col_write)(struct r300_rs_block*, int, int);
void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle);
void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int);
boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
vs_outputs->bcolor[1] != ATTR_UNUSED;
+ int *stream_loc_notcl = r300->stream_loc_notcl;
if (r300->screen->caps.is_r500) {
rX00_rs_col = r500_rs_col;
@@ -295,18 +304,39 @@ static void r300_update_rs_block(struct r300_context* r300,
rX00_rs_tex_write = r300_rs_tex_write;
}
- /* Rasterize colors. */
+ /* The position is always present in VAP. */
+ rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS;
+ rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
+ stream_loc_notcl[loc++] = 0;
+
+ /* Set up the point size in VAP. */
+ if (vs_outputs->psize != ATTR_UNUSED) {
+ rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
+ stream_loc_notcl[loc++] = 1;
+ }
+
+ /* Set up and rasterize colors. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used ||
vs_outputs->color[1] != ATTR_UNUSED) {
- /* Always rasterize if it's written by the VS,
- * otherwise it locks up. */
- rX00_rs_col(&rs, col_count, i, FALSE);
+ /* Set up the color in VAP. */
+ rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR;
+ rs.vap_out_vtx_fmt[0] |=
+ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i;
+ stream_loc_notcl[loc++] = 2 + i;
+
+ /* Rasterize it. */
+ rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW);
- /* Write it to the FS input register if it's used by the FS. */
+ /* Write it to the FS input register if it's needed by the FS. */
if (fs_inputs->color[i] != ATTR_UNUSED) {
rX00_rs_col_write(&rs, col_count, fp_offset);
fp_offset++;
+
+ DBG(r300, DBG_RS,
+ "r300: Rasterized color %i written to FS.\n", i);
+ } else {
+ DBG(r300, DBG_RS, "r300: Rasterized color %i unused.\n", i);
}
col_count++;
} else {
@@ -314,26 +344,51 @@ static void r300_update_rs_block(struct r300_context* r300,
/* If we try to set it to (0,0,0,1), it will lock up. */
if (fs_inputs->color[i] != ATTR_UNUSED) {
fp_offset++;
+
+ DBG(r300, DBG_RS, "r300: FS input color %i unassigned%s.\n",
+ i);
}
}
}
+ /* Set up back-face colors. The rasterizer will do the color selection
+ * automatically. */
+ if (any_bcolor_used) {
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR;
+ rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i);
+ stream_loc_notcl[loc++] = 4 + i;
+ }
+ }
+
/* Rasterize texture coordinates. */
- for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+ for (i = 0; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) {
bool sprite_coord = !!(r300->sprite_coord_enable & (1 << i));
if (vs_outputs->generic[i] != ATTR_UNUSED || sprite_coord) {
- /* Always rasterize if it's written by the VS,
- * otherwise it locks up. */
+ if (!sprite_coord) {
+ /* Set up the texture coordinates in VAP. */
+ rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count);
+ rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count));
+ stream_loc_notcl[loc++] = 6 + tex_count;
+ }
+
+ /* Rasterize it. */
rX00_rs_tex(&rs, tex_count, tex_count,
sprite_coord ? SWIZ_XY01 : SWIZ_XYZW);
- /* Write it to the FS input register if it's used by the FS. */
+ /* Write it to the FS input register if it's needed by the FS. */
if (fs_inputs->generic[i] != ATTR_UNUSED) {
rX00_rs_tex_write(&rs, tex_count, fp_offset);
- if (sprite_coord)
- debug_printf("r300: SpriteCoord (generic index %i) is being written to reg %i\n", i, fp_offset);
fp_offset++;
+
+ DBG(r300, DBG_RS,
+ "r300: Rasterized generic %i written to FS%s.\n",
+ i, sprite_coord ? " (sprite coord)" : "");
+ } else {
+ DBG(r300, DBG_RS,
+ "r300: Rasterized generic %i unused%s.\n",
+ i, sprite_coord ? " (sprite coord)" : "");
}
tex_count++;
} else {
@@ -341,20 +396,31 @@ static void r300_update_rs_block(struct r300_context* r300,
/* If we try to set it to (0,0,0,1), it will lock up. */
if (fs_inputs->generic[i] != ATTR_UNUSED) {
fp_offset++;
+
+ DBG(r300, DBG_RS, "r300: FS input generic %i unassigned%s.\n",
+ i, sprite_coord ? " (sprite coord)" : "");
}
}
}
/* Rasterize fog coordinates. */
- if (vs_outputs->fog != ATTR_UNUSED) {
- /* Always rasterize if it's written by the VS,
- * otherwise it locks up. */
+ if (vs_outputs->fog != ATTR_UNUSED && tex_count < 8) {
+ /* Set up the fog coordinates in VAP. */
+ rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count);
+ rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count));
+ stream_loc_notcl[loc++] = 6 + tex_count;
+
+ /* Rasterize it. */
rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_X001);
- /* Write it to the FS input register if it's used by the FS. */
+ /* Write it to the FS input register if it's needed by the FS. */
if (fs_inputs->fog != ATTR_UNUSED) {
rX00_rs_tex_write(&rs, tex_count, fp_offset);
fp_offset++;
+
+ DBG(r300, DBG_RS, "r300: Rasterized fog written to FS.\n");
+ } else {
+ DBG(r300, DBG_RS, "r300: Rasterized fog unused.\n");
}
tex_count++;
} else {
@@ -362,25 +428,47 @@ static void r300_update_rs_block(struct r300_context* r300,
/* If we try to set it to (0,0,0,1), it will lock up. */
if (fs_inputs->fog != ATTR_UNUSED) {
fp_offset++;
+
+ DBG(r300, DBG_RS, "r300: FS input fog unassigned.\n");
}
}
/* Rasterize WPOS. */
- /* If the FS doesn't need it, it's not written by the VS. */
- if (vs_outputs->wpos != ATTR_UNUSED && fs_inputs->wpos != ATTR_UNUSED) {
+ /* Don't set it in VAP if the FS doesn't need it. */
+ if (fs_inputs->wpos != ATTR_UNUSED && tex_count < 8) {
+ /* Set up the WPOS coordinates in VAP. */
+ rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count);
+ rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count));
+ stream_loc_notcl[loc++] = 6 + tex_count;
+
+ /* Rasterize it. */
rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_XYZW);
+
+ /* Write it to the FS input register. */
rX00_rs_tex_write(&rs, tex_count, fp_offset);
+ DBG(r300, DBG_RS, "r300: Rasterized WPOS written to FS.\n");
+
fp_offset++;
tex_count++;
}
+ /* Invalidate the rest of the no-TCL (GA) stream locations. */
+ for (; loc < 16;) {
+ stream_loc_notcl[loc++] = -1;
+ }
+
/* Rasterize at least one color, or bad things happen. */
if (col_count == 0 && tex_count == 0) {
- rX00_rs_col(&rs, 0, 0, TRUE);
+ rX00_rs_col(&rs, 0, 0, SWIZ_0001);
col_count++;
+
+ DBG(r300, DBG_RS, "r300: Rasterized color 0 to prevent lockups.\n");
}
+ DBG(r300, DBG_RS, "r300: --- Rasterizer status ---: colors: %i, "
+ "generics: %i.\n", col_count, tex_count);
+
rs.count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) |
R300_HIRES_EN;
@@ -390,87 +478,8 @@ static void r300_update_rs_block(struct r300_context* r300,
/* Now, after all that, see if we actually need to update the state. */
if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) {
memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block));
- r300->rs_block_state.size = 5 + count*2;
- }
-}
-
-/* Update the shader-dependant states. */
-static void r300_update_derived_shader_state(struct r300_context* r300)
-{
- struct r300_vertex_shader* vs = r300->vs_state.state;
-
- r300_update_rs_block(r300, &vs->outputs, &r300_fs(r300)->shader->inputs);
-}
-
-static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa)
-{
- /* We are interested only in the cases when a new depth or stencil value
- * can be written and changed. */
-
- /* We might optionally check for [Z func: never] and inspect the stencil
- * state in a similar fashion, but it's not terribly important. */
- return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) ||
- (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) ||
- ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) &&
- (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK));
-}
-
-static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa)
-{
- /* We are interested only in the cases when alpha testing can kill
- * a fragment. */
- uint32_t af = dsa->alpha_function;
-
- return (af & R300_FG_ALPHA_FUNC_ENABLE) &&
- (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS;
-}
-
-static void r300_update_ztop(struct r300_context* r300)
-{
- struct r300_ztop_state* ztop_state =
- (struct r300_ztop_state*)r300->ztop_state.state;
-
- /* This is important enough that I felt it warranted a comment.
- *
- * According to the docs, these are the conditions where ZTOP must be
- * disabled:
- * 1) Alpha testing enabled
- * 2) Texture kill instructions in fragment shader
- * 3) Chroma key culling enabled
- * 4) W-buffering enabled
- *
- * The docs claim that for the first three cases, if no ZS writes happen,
- * then ZTOP can be used.
- *
- * (3) will never apply since we do not support chroma-keyed operations.
- * (4) will need to be re-examined (and this comment updated) if/when
- * Hyper-Z becomes supported.
- *
- * Additionally, the following conditions require disabled ZTOP:
- * 5) Depth writes in fragment shader
- * 6) Outstanding occlusion queries
- *
- * This register causes stalls all the way from SC to CB when changed,
- * but it is buffered on-chip so it does not hurt to write it if it has
- * not changed.
- *
- * ~C.
- */
-
- /* ZS writes */
- if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) &&
- (r300_dsa_alpha_test_enabled(r300->dsa_state.state) || /* (1) */
- r300_fs(r300)->shader->info.uses_kill)) { /* (2) */
- ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
- } else if (r300_fragment_shader_writes_depth(r300_fs(r300))) { /* (5) */
- ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
- } else if (r300->query_current) { /* (6) */
- ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
- } else {
- ztop_state->z_buffer_top = R300_ZTOP_ENABLE;
+ r300->rs_block_state.size = 11 + count*2;
}
-
- r300->ztop_state.dirty = TRUE;
}
static void r300_merge_textures_and_samplers(struct r300_context* r300)
@@ -568,7 +577,7 @@ void r300_update_derived_state(struct r300_context* r300)
}
if (r300->rs_block_state.dirty) {
- r300_update_derived_shader_state(r300);
+ r300_update_rs_block(r300);
}
if (r300->draw) {
@@ -578,5 +587,5 @@ void r300_update_derived_state(struct r300_context* r300)
r300_swtcl_vertex_psc(r300);
}
- r300_update_ztop(r300);
+ r300_update_hyperz_state(r300);
}
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index 480d0f7c4a..fcbdb91b67 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -374,6 +374,7 @@ r300_translate_vertex_data_type(enum pipe_format format) {
fprintf(stderr, "r300: Bad format %s in %s:%d\n", util_format_name(format),
__FUNCTION__, __LINE__);
assert(0);
+ abort();
}
switch (desc->channel[0].type) {
@@ -395,6 +396,7 @@ r300_translate_vertex_data_type(enum pipe_format format) {
fprintf(stderr, "r300: Bad format %s in %s:%d\n",
util_format_name(format), __FUNCTION__, __LINE__);
assert(0);
+ abort();
}
break;
/* Unsigned ints */
@@ -418,12 +420,14 @@ r300_translate_vertex_data_type(enum pipe_format format) {
fprintf(stderr, "r300: desc->channel[0].size == %d\n",
desc->channel[0].size);
assert(0);
+ abort();
}
break;
default:
fprintf(stderr, "r300: Bad format %s in %s:%d\n",
util_format_name(format), __FUNCTION__, __LINE__);
assert(0);
+ abort();
}
if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
@@ -439,6 +443,7 @@ r300_translate_vertex_data_type(enum pipe_format format) {
static INLINE uint16_t
r300_translate_vertex_data_swizzle(enum pipe_format format) {
const struct util_format_description *desc = util_format_description(format);
+ unsigned i, swizzle = 0;
assert(format);
@@ -448,11 +453,19 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) {
return 0;
}
- return ((desc->swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) |
- (desc->swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) |
- (desc->swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) |
- (desc->swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) |
- (0xf << R300_WRITE_ENA_SHIFT));
+ for (i = 0; i < desc->nr_channels; i++) {
+ swizzle |=
+ MIN2(desc->swizzle[i], R300_SWIZZLE_SELECT_FP_ONE) << (3*i);
+ }
+ /* Set (0,0,0,1) in unused components. */
+ for (; i < 3; i++) {
+ swizzle |= R300_SWIZZLE_SELECT_FP_ZERO << (3*i);
+ }
+ for (; i < 4; i++) {
+ swizzle |= R300_SWIZZLE_SELECT_FP_ONE << (3*i);
+ }
+
+ return swizzle | (0xf << R300_WRITE_ENA_SHIFT);
}
#endif /* R300_STATE_INLINES_H */
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index 64d1d18b45..cd9443fa26 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -43,15 +43,17 @@ void r300_emit_invariant_state(struct r300_context* r300,
{
CS_LOCALS(r300);
+ if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) {
+ /* Subpixel multisampling for AA. */
+ BEGIN_CS(4);
+ OUT_CS_REG(R300_GB_MSPOS0, 0x66666666);
+ OUT_CS_REG(R300_GB_MSPOS1, 0x6666666);
+ END_CS;
+ }
+
BEGIN_CS(12 + (r300->screen->caps.has_tcl ? 2 : 0));
/*** Graphics Backend (GB) ***/
- /* Subpixel multisampling for AA
- * These are commented out because glisse's CS checker doesn't like them.
- * I presume these will be re-enabled later.
- * OUT_CS_REG(R300_GB_MSPOS0, 0x66666666);
- * OUT_CS_REG(R300_GB_MSPOS1, 0x6666666);
- */
/* Source of fog depth */
OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W);
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 8bebeacf86..69e6a12445 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -34,9 +34,6 @@
#include "r300_screen.h"
#include "r300_winsys.h"
-/* XXX Enable float textures here. */
-/*#define ENABLE_FLOAT_TEXTURES*/
-
#define TILE_WIDTH 0
#define TILE_HEIGHT 1
@@ -74,7 +71,7 @@ static boolean r300_format_is_plain(enum pipe_format format)
* The FORMAT specifies how the texture sampler will treat the texture, and
* makes available X, Y, Z, W, ZERO, and ONE for swizzling. */
uint32_t r300_translate_texformat(enum pipe_format format,
- const unsigned char *swizzle)
+ const unsigned char *swizzle_view)
{
uint32_t result = 0;
const struct util_format_description *desc;
@@ -98,6 +95,7 @@ uint32_t r300_translate_texformat(enum pipe_format format,
R300_TX_FORMAT_SIGNED_Z,
R300_TX_FORMAT_SIGNED_W,
};
+ unsigned char swizzle[4];
desc = util_format_description(format);
@@ -144,25 +142,18 @@ uint32_t r300_translate_texformat(enum pipe_format format,
}
}
- /* Add swizzle. */
- if (!swizzle) {
- swizzle = desc->swizzle;
- } /*else {
- if (swizzle[0] != desc->swizzle[0] ||
- swizzle[1] != desc->swizzle[1] ||
- swizzle[2] != desc->swizzle[2] ||
- swizzle[3] != desc->swizzle[3])
- {
- const char n[6] = "RGBA01";
- fprintf(stderr, "Got different swizzling! Format: %c%c%c%c, "
- "View: %c%c%c%c\n",
- n[desc->swizzle[0]], n[desc->swizzle[1]],
- n[desc->swizzle[2]], n[desc->swizzle[3]],
- n[swizzle[0]], n[swizzle[1]], n[swizzle[2]],
- n[swizzle[3]]);
+ /* Get swizzle. */
+ if (swizzle_view) {
+ /* Compose two sets of swizzles. */
+ for (i = 0; i < 4; i++) {
+ swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ?
+ desc->swizzle[swizzle_view[i]] : swizzle_view[i];
}
- }*/
+ } else {
+ memcpy(swizzle, desc->swizzle, sizeof(swizzle));
+ }
+ /* Add swizzle. */
for (i = 0; i < 4; i++) {
switch (swizzle[i]) {
case UTIL_FORMAT_SWIZZLE_X:
@@ -316,7 +307,6 @@ uint32_t r300_translate_texformat(enum pipe_format format,
}
return ~0;
-#if defined(ENABLE_FLOAT_TEXTURES)
case UTIL_FORMAT_TYPE_FLOAT:
switch (desc->channel[0].size) {
case 16:
@@ -340,7 +330,6 @@ uint32_t r300_translate_texformat(enum pipe_format format,
return R300_TX_FORMAT_32F_32F_32F_32F | result;
}
}
-#endif
}
return ~0; /* Unsupported/unknown. */
@@ -405,16 +394,12 @@ static uint32_t r300_translate_colorformat(enum pipe_format format)
/* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
-#if defined(ENABLE_FLOAT_TEXTURES)
case PIPE_FORMAT_R16G16B16A16_FLOAT:
-#endif
return R300_COLOR_FORMAT_ARGB16161616;
/* 128-bit buffers. */
-#if defined(ENABLE_FLOAT_TEXTURES)
case PIPE_FORMAT_R32G32B32A32_FLOAT:
return R300_COLOR_FORMAT_ARGB32323232;
-#endif
/* YUV buffers. */
case PIPE_FORMAT_UYVY:
@@ -532,7 +517,7 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
- //case PIPE_FORMAT_R16G16B16A16_FLOAT: /* not in pipe_format */
+ case PIPE_FORMAT_R16G16B16A16_FLOAT:
case PIPE_FORMAT_R32G32B32A32_FLOAT:
return modifier |
R300_C0_SEL_R | R300_C1_SEL_G |
@@ -573,7 +558,7 @@ static void r300_texture_setup_immutable_state(struct r300_screen* screen,
if (tex->uses_pitch) {
/* rectangles love this */
f->format0 |= R300_TX_PITCH_EN;
- f->format2 = (tex->pitch[0] - 1) & 0x1fff;
+ f->format2 = (tex->hwpitch[0] - 1) & 0x1fff;
} else {
/* power of two textures (3D, mipmaps, and no pitch) */
f->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf);
@@ -614,7 +599,7 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen,
if (util_format_is_depth_or_stencil(tex->b.b.format)) {
for (i = 0; i <= tex->b.b.last_level; i++) {
tex->fb_state.depthpitch[i] =
- tex->pitch[i] |
+ tex->hwpitch[i] |
R300_DEPTHMACROTILE(tex->mip_macrotile[i]) |
R300_DEPTHMICROTILE(tex->microtile);
}
@@ -622,7 +607,7 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen,
} else {
for (i = 0; i <= tex->b.b.last_level; i++) {
tex->fb_state.colorpitch[i] =
- tex->pitch[i] |
+ tex->hwpitch[i] |
r300_translate_colorformat(tex->b.b.format) |
R300_COLOR_TILE(tex->mip_macrotile[i]) |
R300_COLOR_MICROTILE(tex->microtile);
@@ -762,12 +747,12 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture* tex,
static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen,
struct r300_texture *tex)
{
- /* The kernels <= 2.6.34-rc3 compute the size of mipmapped 3D textures
+ /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures
* incorrectly. This is a workaround to prevent CS from being rejected. */
unsigned i, size;
- if (screen->rws->get_value(screen->rws, R300_VID_TEX3D_MIP_BUG) &&
+ if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) &&
tex->b.b.target == PIPE_TEXTURE_3D &&
tex->b.b.last_level > 0) {
size = 0;
@@ -813,6 +798,8 @@ static void r300_setup_miptree(struct r300_screen* screen,
tex->size = tex->offset[i] + size;
tex->layer_size[i] = layer_size;
tex->pitch[i] = stride / util_format_get_blocksize(base->format);
+ tex->hwpitch[i] =
+ tex->pitch[i] * util_format_get_blockwidth(base->format);
SCREEN_DBG(screen, DBG_TEX, "r300: Texture miptree: Level %d "
"(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n",
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index 453d42b188..ba79ec068a 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -28,7 +28,7 @@
struct r300_texture;
uint32_t r300_translate_texformat(enum pipe_format format,
- const unsigned char *swizzle);
+ const unsigned char *swizzle_view);
uint32_t r500_tx_format_msb_bit(enum pipe_format format);
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index f6428ed760..89f39af976 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -108,11 +108,9 @@ static unsigned translate_opcode(unsigned opcode)
/* case TGSI_OPCODE_BRK: return RC_OPCODE_BRK; */
case TGSI_OPCODE_IF: return RC_OPCODE_IF;
/* case TGSI_OPCODE_LOOP: return RC_OPCODE_LOOP; */
- /* case TGSI_OPCODE_REP: return RC_OPCODE_REP; */
case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE;
case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF;
/* case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP; */
- /* case TGSI_OPCODE_ENDREP: return RC_OPCODE_ENDREP; */
/* case TGSI_OPCODE_PUSHA: return RC_OPCODE_PUSHA; */
/* case TGSI_OPCODE_POPA: return RC_OPCODE_POPA; */
case TGSI_OPCODE_CEIL: return RC_OPCODE_CEIL;
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index bfab9c3b01..b7609bad81 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -94,94 +94,6 @@ static void r300_shader_read_vs_outputs(
vs_outputs->wpos = i;
}
-/* This function sets up:
- * - VAP mapping, which maps VS registers to output semantics and
- * at the same time it indicates which attributes are enabled and should
- * be rasterized.
- * - Stream mapping to VS outputs if TCL is not present. */
-static void r300_init_vs_output_mapping(struct r300_vertex_shader* vs)
-{
- struct r300_shader_semantics* vs_outputs = &vs->outputs;
- struct r300_vap_output_state *vap_out = &vs->vap_out;
- int *stream_loc = vs->stream_loc_notcl;
- int i, gen_count, tabi = 0;
- boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
- vs_outputs->bcolor[1] != ATTR_UNUSED;
-
- vap_out->vap_vtx_state_cntl = 0x5555; /* XXX this is classic Mesa bonghits */
-
- /* Position. */
- if (vs_outputs->pos != ATTR_UNUSED) {
- vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS;
- vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
-
- stream_loc[tabi++] = 0;
- } else {
- assert(0);
- }
-
- /* Point size. */
- if (vs_outputs->psize != ATTR_UNUSED) {
- vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
-
- stream_loc[tabi++] = 1;
- }
-
- /* Colors. */
- for (i = 0; i < ATTR_COLOR_COUNT; i++) {
- if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used ||
- vs_outputs->color[1] != ATTR_UNUSED) {
- vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR;
- vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i;
-
- stream_loc[tabi++] = 2 + i;
- }
- }
-
- /* Back-face colors. */
- if (any_bcolor_used) {
- for (i = 0; i < ATTR_COLOR_COUNT; i++) {
- vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR;
- vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i);
-
- stream_loc[tabi++] = 4 + i;
- }
- }
-
- /* Texture coordinates. */
- gen_count = 0;
- for (i = 0; i < ATTR_GENERIC_COUNT && gen_count < 8; i++) {
- if (vs_outputs->generic[i] != ATTR_UNUSED) {
- vap_out->vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << gen_count);
- vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * gen_count));
-
- stream_loc[tabi++] = 6 + gen_count;
- gen_count++;
- }
- }
-
- /* Fog coordinates. */
- if (gen_count < 8 && vs_outputs->fog != ATTR_UNUSED) {
- vap_out->vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << gen_count);
- vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * gen_count));
-
- stream_loc[tabi++] = 6 + gen_count;
- gen_count++;
- }
-
- /* WPOS. */
- if (gen_count < 8) {
- vs->wpos_tex_output = gen_count;
- stream_loc[tabi++] = 6 + gen_count;
- } else {
- vs_outputs->wpos = ATTR_UNUSED;
- }
-
- for (; tabi < 16;) {
- stream_loc[tabi++] = -1;
- }
-}
-
static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
{
struct r300_vertex_shader * vs = c->UserData;
@@ -246,9 +158,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
}
/* WPOS. */
- if (outputs->wpos != ATTR_UNUSED) {
- c->code->outputs[outputs->wpos] = reg++;
- }
+ c->code->outputs[outputs->wpos] = reg++;
}
static void r300_dummy_vertex_shader(
@@ -286,7 +196,6 @@ void r300_translate_vertex_shader(struct r300_context* r300,
tgsi_scan_shader(tokens, &vs->info);
r300_shader_read_vs_outputs(&vs->info, &vs->outputs);
- r300_init_vs_output_mapping(vs);
/* Setup the compiler */
rc_init(&compiler.Base);
@@ -307,16 +216,11 @@ void r300_translate_vertex_shader(struct r300_context* r300,
r300_tgsi_to_rc(&ttr, tokens);
- compiler.RequiredOutputs =
- ~(~0 << (vs->info.num_outputs +
- (vs->outputs.wpos != ATTR_UNUSED ? 1 : 0)));
-
+ compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs + 1));
compiler.SetHwInputOutput = &set_vertex_inputs_outputs;
/* Insert the WPOS output. */
- if (vs->outputs.wpos != ATTR_UNUSED) {
- rc_copy_output(&compiler.Base, 0, vs->outputs.wpos);
- }
+ rc_copy_output(&compiler.Base, 0, vs->outputs.wpos);
/* Invoke the compiler */
r3xx_compile_vertex_program(&compiler);
@@ -343,32 +247,3 @@ void r300_translate_vertex_shader(struct r300_context* r300,
/* And, finally... */
rc_destroy(&compiler.Base);
}
-
-boolean r300_vertex_shader_setup_wpos(struct r300_context* r300)
-{
- struct r300_vertex_shader* vs = r300->vs_state.state;
- struct r300_vap_output_state *vap_out = &vs->vap_out;
- int tex_output = vs->wpos_tex_output;
- uint32_t tex_fmt = R300_INPUT_CNTL_TC0 << tex_output;
-
- if (vs->outputs.wpos == ATTR_UNUSED) {
- return FALSE;
- }
-
- if (r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED) {
- /* Enable WPOS in VAP. */
- if (!(vap_out->vap_vsm_vtx_assm & tex_fmt)) {
- vap_out->vap_vsm_vtx_assm |= tex_fmt;
- vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * tex_output));
- return TRUE;
- }
- } else {
- /* Disable WPOS in VAP. */
- if (vap_out->vap_vsm_vtx_assm & tex_fmt) {
- vap_out->vap_vsm_vtx_assm &= ~tex_fmt;
- vap_out->vap_out_vtx_fmt[1] &= ~(4 << (3 * tex_output));
- return TRUE;
- }
- }
- return FALSE;
-}
diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h
index 56bcc3b70b..57b3fbca0b 100644
--- a/src/gallium/drivers/r300/r300_vs.h
+++ b/src/gallium/drivers/r300/r300_vs.h
@@ -39,7 +39,6 @@ struct r300_vertex_shader {
struct tgsi_shader_info info;
struct r300_shader_semantics outputs;
- struct r300_vap_output_state vap_out;
/* Whether the shader was replaced by a dummy one due to a shader
* compilation failure. */
@@ -49,12 +48,6 @@ struct r300_vertex_shader {
unsigned externals_count;
unsigned immediates_count;
- /* Stream locations for SWTCL or if TCL is bypassed. */
- int stream_loc_notcl[16];
-
- /* Output stream location for WPOS. */
- int wpos_tex_output;
-
/* HWTCL-specific. */
/* Machine code (if translated) */
struct r300_vertex_program_code code;
@@ -67,7 +60,4 @@ void r300_translate_vertex_shader(struct r300_context* r300,
struct r300_vertex_shader* vs,
const struct tgsi_token *tokens);
-/* Return TRUE if VAP (hwfmt) needs to be re-emitted. */
-boolean r300_vertex_shader_setup_wpos(struct r300_context* r300);
-
#endif /* R300_VS_H */
diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h
index 2bd40176d1..1642981eaa 100644
--- a/src/gallium/drivers/r300/r300_winsys.h
+++ b/src/gallium/drivers/r300/r300_winsys.h
@@ -39,7 +39,7 @@ enum r300_value_id {
R300_VID_GB_PIPES,
R300_VID_Z_PIPES,
R300_VID_SQUARE_TILING_SUPPORT,
- R300_VID_TEX3D_MIP_BUG,
+ R300_VID_DRM_2_3_0,
};
enum r300_reference_domain { /* bitfield */
@@ -119,6 +119,10 @@ struct r300_winsys_screen {
/* Write a dword to the command buffer. */
void (*write_cs_dword)(struct r300_winsys_screen* winsys, uint32_t dword);
+ /* Write a table of dwords to the command buffer. */
+ void (*write_cs_table)(struct r300_winsys_screen* winsys,
+ const void *dwords, unsigned count);
+
/* Write a relocated dword to the command buffer. */
void (*write_cs_reloc)(struct r300_winsys_screen *winsys,
struct r300_winsys_buffer *buf,