diff options
Diffstat (limited to 'src/gallium/drivers/r300')
25 files changed, 550 insertions, 487 deletions
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 5a8e00f15a..d3cd6bef96 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -11,6 +11,7 @@ C_SOURCES = \ r300_emit.c \ r300_flush.c \ r300_fs.c \ + r300_hyperz.c \ r300_query.c \ r300_render.c \ r300_resource.c \ diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 08aec427a1..3921085d76 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -21,6 +21,7 @@ r300 = env.ConvenienceLibrary( 'r300_emit.c', 'r300_flush.c', 'r300_fs.c', + 'r300_hyperz.c', 'r300_query.c', 'r300_render.c', 'r300_resource.c', diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index deaa03e1f6..e84bce0010 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -37,14 +37,27 @@ #include "r300_state_invariant.h" #include "r300_winsys.h" +#include <inttypes.h> + static void r300_destroy_context(struct pipe_context* context) { struct r300_context* r300 = r300_context(context); struct r300_query* query, * temp; + struct r300_atom *atom; util_blitter_destroy(r300->blitter); draw_destroy(r300->draw); + /* Print stats, if enabled. */ + if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { + fprintf(stderr, "r300: Stats for context %p:\n", r300); + fprintf(stderr, " : Flushes: %" PRIu64 "\n", r300->flush_counter); + foreach(atom, &r300->atom_list) { + fprintf(stderr, " : %s: %" PRIu64 " emits\n", + atom->name, atom->counter); + } + } + /* Free the OQ BO. */ context->screen->resource_destroy(context->screen, r300->oqbo); @@ -63,7 +76,6 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300->rs_block_state.state); FREE(r300->scissor_state.state); FREE(r300->textures_state.state); - FREE(r300->vap_output_state.state); FREE(r300->viewport_state.state); FREE(r300->ztop_state.state); FREE(r300->fs_constants.state); @@ -112,7 +124,6 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(viewport_state, 9); R300_INIT_ATOM(rs_block_state, 0); R300_INIT_ATOM(vertex_stream_state, 0); - R300_INIT_ATOM(vap_output_state, 6); R300_INIT_ATOM(pvs_flush, 2); R300_INIT_ATOM(vs_state, 0); R300_INIT_ATOM(vs_constants, 0); @@ -136,7 +147,6 @@ static void r300_setup_atoms(struct r300_context* r300) r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); r300->textures_state.state = CALLOC_STRUCT(r300_textures_state); - r300->vap_output_state.state = CALLOC_STRUCT(r300_vap_output_state); r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); r300->fs_constants.state = CALLOC_STRUCT(r300_constant_buffer); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 1e4fd9e5ed..e9c8fcdc15 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -45,6 +45,8 @@ struct r300_atom { struct r300_atom *prev, *next; /* Name, for debugging. */ const char* name; + /* Stat counter. */ + uint64_t counter; /* Opaque state. */ void* state; /* Emit the state to the context. */ @@ -117,6 +119,10 @@ struct r300_rs_state { }; struct r300_rs_block { + uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */ + uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */ + uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */ + uint32_t ip[8]; /* R300_RS_IP_[0-7], R500_RS_IP_[0-7] */ uint32_t count; /* R300_RS_COUNT */ uint32_t inst_count; /* R300_RS_INST_COUNT */ @@ -188,12 +194,6 @@ struct r300_vertex_stream_state { unsigned count; }; -struct r300_vap_output_state { - uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */ - uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */ - uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */ -}; - struct r300_viewport_state { float xscale; /* R300_VAP_VPORT_XSCALE: 0x2098 */ float xoffset; /* R300_VAP_VPORT_XOFFSET: 0x209c */ @@ -255,6 +255,10 @@ struct r300_texture { /* A pitch for each mip-level */ unsigned pitch[R300_MAX_TEXTURE_LEVELS]; + /* A pitch multiplied by blockwidth as hardware wants + * the number of pixels instead of the number of blocks. */ + unsigned hwpitch[R300_MAX_TEXTURE_LEVELS]; + /* Size of one zslice or face based on the texture target */ unsigned layer_size[R300_MAX_TEXTURE_LEVELS]; @@ -375,7 +379,7 @@ struct r300_context { struct r300_atom query_start; /* Rasterizer state. */ struct r300_atom rs_state; - /* RS block state. */ + /* RS block state + VAP (vertex shader) output mapping state. */ struct r300_atom rs_block_state; /* Scissor state. */ struct r300_atom scissor_state; @@ -383,8 +387,6 @@ struct r300_context { struct r300_atom textures_state; /* Vertex stream formatting state. */ struct r300_atom vertex_stream_state; - /* VAP (vertex shader) output mapping state. */ - struct r300_atom vap_output_state; /* Vertex shader. */ struct r300_atom vs_state; /* Vertex shader constant buffer. */ @@ -418,6 +420,9 @@ struct r300_context { struct pipe_viewport_state viewport; + /* Stream locations for SWTCL. */ + int stream_loc_notcl[16]; + /* Flag indicating whether or not the HW is dirty. */ uint32_t dirty_hw; /* Whether polygon offset is enabled. */ @@ -435,6 +440,9 @@ struct r300_context { /* upload managers */ struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; + + /* Stat counter. */ + uint64_t flush_counter; }; /* Convenience cast wrapper. */ diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 456b2ec7b9..996a4f491e 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -104,6 +104,13 @@ cs_count--; \ } while (0) +#define OUT_CS_TABLE(values, count) do { \ + if (VERY_VERBOSE_REGISTERS) \ + DBG(cs_context_copy, DBG_CS, "r300: writing table of %d dwords\n", count); \ + cs_winsys->write_cs_table(cs_winsys, values, count); \ + cs_count -= count; \ +} while (0) + #define OUT_CS_BUF_RELOC(bo, offset, rd, wd, flags) do { \ DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, offset %d, " \ "domains (%d, %d, %d)\n", \ @@ -150,6 +157,9 @@ DBG(cs_context_copy, DBG_CS, "r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \ __FILE__, __LINE__); \ } \ + if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { \ + r300->flush_counter++; \ + } \ cs_winsys->flush_cs(cs_winsys); \ } while (0) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 6e84bf8246..4c2836f36a 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -38,9 +38,11 @@ static struct debug_option debug_options[] = { { "draw", DBG_DRAW, "Draw and emit (for debugging)" }, { "tex", DBG_TEX, "Textures (for debugging)" }, { "fall", DBG_FALL, "Fallbacks (for debugging)" }, + { "rs", DBG_RS, "Rasterizer (for debugging)" }, { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" }, { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" }, { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" }, + { "stats", DBG_STATS, "Gather statistics (for lulz)" }, { "all", ~0, "Convenience option that enables all debug flags" }, diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 19acdaba62..23bbc6a99c 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -83,7 +83,6 @@ void r300_emit_clip_state(struct r300_context* r300, unsigned size, void* state) { struct pipe_clip_state* clip = (struct pipe_clip_state*)state; - int i; CS_LOCALS(r300); if (r300->screen->caps.has_tcl) { @@ -92,12 +91,7 @@ void r300_emit_clip_state(struct r300_context* r300, (r300->screen->caps.is_r500 ? R500_PVS_UCP_START : R300_PVS_UCP_START)); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4); - for (i = 0; i < 6; i++) { - OUT_CS_32F(clip->ucp[i][0]); - OUT_CS_32F(clip->ucp[i][1]); - OUT_CS_32F(clip->ucp[i][2]); - OUT_CS_32F(clip->ucp[i][3]); - } + OUT_CS_TABLE(clip->ucp, 6 * 4); OUT_CS_REG(R300_VAP_CLIP_CNTL, ((1 << clip->nr) - 1) | R300_PS_UCP_MODE_CLIP_AS_TRIFAN); END_CS; @@ -106,7 +100,6 @@ void r300_emit_clip_state(struct r300_context* r300, OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); END_CS; } - } void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state) @@ -244,8 +237,7 @@ void r300_emit_fs(struct r300_context* r300, unsigned size, void *state) OUT_CS_REG(R300_US_CODE_OFFSET, code->code_offset); OUT_CS_REG_SEQ(R300_US_CODE_ADDR_0, 4); - for(i = 0; i < 4; ++i) - OUT_CS(code->code_addr[i]); + OUT_CS_TABLE(code->code_addr, 4); OUT_CS_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length); for (i = 0; i < code->alu.length; i++) @@ -265,8 +257,7 @@ void r300_emit_fs(struct r300_context* r300, unsigned size, void *state) if (code->tex.length) { OUT_CS_REG_SEQ(R300_US_TEX_INST_0, code->tex.length); - for(i = 0; i < code->tex.length; ++i) - OUT_CS(code->tex.inst[i]); + OUT_CS_TABLE(code->tex.inst, code->tex.length); } /* Emit immediates. */ @@ -396,10 +387,7 @@ void r500_emit_fs(struct r300_context* r300, unsigned size, void *state) R500_GA_US_VECTOR_INDEX_TYPE_CONST | (i & R500_GA_US_VECTOR_INDEX_MASK)); OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, 4); - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); + OUT_CS_TABLE(data, 4); } } } @@ -424,15 +412,9 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count * 4); for(i = 0; i < count; ++i) { - const float *data; assert(constants->Constants[i].Type == RC_CONSTANT_EXTERNAL); - data = buf->constants[i]; - - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); } + OUT_CS_TABLE(buf->constants, count * 4); END_CS; } @@ -459,10 +441,7 @@ void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo R500_GA_US_VECTOR_INDEX_TYPE_CONST | (i & R500_GA_US_VECTOR_INDEX_MASK)); OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, 4); - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); + OUT_CS_TABLE(data, 4); } } END_CS; @@ -738,13 +717,20 @@ void r300_emit_rs_block_state(struct r300_context* r300, DBG(r300, DBG_DRAW, "r300: RS emit:\n"); BEGIN_CS(size); + OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); + OUT_CS(rs->vap_vtx_state_cntl); + OUT_CS(rs->vap_vsm_vtx_assm); + OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); + OUT_CS(rs->vap_out_vtx_fmt[0]); + OUT_CS(rs->vap_out_vtx_fmt[1]); + if (r300->screen->caps.is_r500) { OUT_CS_REG_SEQ(R500_RS_IP_0, count); } else { OUT_CS_REG_SEQ(R300_RS_IP_0, count); } + OUT_CS_TABLE(rs->ip, count); for (i = 0; i < count; i++) { - OUT_CS(rs->ip[i]); DBG(r300, DBG_DRAW, " : ip %d: 0x%08x\n", i, rs->ip[i]); } @@ -757,8 +743,8 @@ void r300_emit_rs_block_state(struct r300_context* r300, } else { OUT_CS_REG_SEQ(R300_RS_INST_0, count); } + OUT_CS_TABLE(rs->inst, count); for (i = 0; i < count; i++) { - OUT_CS(rs->inst[i]); DBG(r300, DBG_DRAW, " : inst %d: 0x%08x\n", i, rs->inst[i]); } @@ -823,7 +809,7 @@ void r300_emit_textures_state(struct r300_context *r300, END_CS; } -void r300_emit_aos(struct r300_context* r300, unsigned offset) +void r300_emit_aos(struct r300_context* r300, unsigned offset, boolean indexed) { struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; @@ -832,9 +818,18 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) unsigned packet_size = (aos_count * 3 + 1) / 2; CS_LOCALS(r300); + for (i = 0; i < aos_count; i++) { + if ((vbuf[velem[i].vertex_buffer_index].buffer_offset + velem[i].src_offset) % 4 != 0) { + /* XXX We must align the buffer. */ + assert(0); + fprintf(stderr, "r300: Unaligned vertex buffer offsets aren't supported, aborting..\n"); + abort(); + } + } + BEGIN_CS(2 + packet_size + aos_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); - OUT_CS(aos_count); + OUT_CS(aos_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); for (i = 0; i < aos_count - 1; i += 2) { vb1 = &vbuf[velem[i].vertex_buffer_index]; @@ -899,39 +894,20 @@ void r300_emit_vertex_stream_state(struct r300_context* r300, BEGIN_CS(size); OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count); + OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count); for (i = 0; i < streams->count; i++) { - OUT_CS(streams->vap_prog_stream_cntl[i]); DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i, streams->vap_prog_stream_cntl[i]); } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count); + OUT_CS_TABLE(streams->vap_prog_stream_cntl_ext, streams->count); for (i = 0; i < streams->count; i++) { - OUT_CS(streams->vap_prog_stream_cntl_ext[i]); DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i, streams->vap_prog_stream_cntl_ext[i]); } END_CS; } -void r300_emit_vap_output_state(struct r300_context* r300, - unsigned size, void* state) -{ - struct r300_vap_output_state *vap_out_state = - (struct r300_vap_output_state*)state; - CS_LOCALS(r300); - - DBG(r300, DBG_DRAW, "r300: VAP emit:\n"); - - BEGIN_CS(size); - OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); - OUT_CS(vap_out_state->vap_vtx_state_cntl); - OUT_CS(vap_out_state->vap_vsm_vtx_assm); - OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); - OUT_CS(vap_out_state->vap_out_vtx_fmt[0]); - OUT_CS(vap_out_state->vap_out_vtx_fmt[1]); - END_CS; -} - void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state) { CS_LOCALS(r300); @@ -978,9 +954,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length); - for (i = 0; i < code->length; i++) { - OUT_CS(code->body.d[i]); - } + OUT_CS_TABLE(code->body.d, code->length); OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) | R300_PVS_NUM_CNTLRS(pvs_num_controllers) | @@ -997,10 +971,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, imm_count * 4); for (i = imm_first; i < imm_end; i++) { const float *data = vs->code.constants.Constants[i].u.Immediate; - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); + OUT_CS_TABLE(data, 4); } } END_CS; @@ -1009,7 +980,6 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) void r300_emit_vs_constants(struct r300_context* r300, unsigned size, void *state) { - unsigned i; unsigned count = ((struct r300_vertex_shader*)r300->vs_state.state)->externals_count; struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; @@ -1023,13 +993,7 @@ void r300_emit_vs_constants(struct r300_context* r300, (r300->screen->caps.is_r500 ? R500_PVS_CONST_START : R300_PVS_CONST_START)); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4); - for (i = 0; i < count; i++) { - const float *data = buf->constants[i]; - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); - } + OUT_CS_TABLE(buf->constants, count * 4); END_CS; } @@ -1188,6 +1152,11 @@ unsigned r300_get_num_dirty_dwords(struct r300_context *r300) } } + /* emit_query_end is not atomized. */ + dwords += 26; + /* let's reserve some more, just in case */ + dwords += 32; + return dwords; } @@ -1200,6 +1169,9 @@ void r300_emit_dirty_state(struct r300_context* r300) foreach(atom, &r300->atom_list) { if (atom->dirty) { atom->emit(r300, atom->size, atom->state); + if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { + atom->counter++; + } atom->dirty = FALSE; } } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 56f7318cdb..3c0edf6fdc 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -29,7 +29,7 @@ struct rX00_fragment_program_code; struct r300_vertex_program_code; -void r300_emit_aos(struct r300_context* r300, unsigned offset); +void r300_emit_aos(struct r300_context* r300, unsigned offset, boolean indexed); void r300_emit_blend_state(struct r300_context* r300, unsigned size, void* state); @@ -81,9 +81,6 @@ void r300_emit_vertex_buffer(struct r300_context* r300); void r300_emit_vertex_stream_state(struct r300_context* r300, unsigned size, void* state); -void r300_emit_vap_output_state(struct r300_context* r300, - unsigned size, void* state); - void r300_emit_vs_constants(struct r300_context* r300, unsigned size, void *state); diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 4d61f63853..88303f074c 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -275,6 +275,14 @@ static void r300_translate_fragment_shader( /* Invoke the compiler */ r3xx_compile_fragment_program(&compiler); + /* Shaders with zero instructions are invalid, + * use the dummy shader instead. */ + if (shader->code.code.r500.inst_end == -1) { + rc_destroy(&compiler.Base); + r300_dummy_fragment_shader(r300, shader); + return; + } + if (compiler.Base.Error) { fprintf(stderr, "r300 FP: Compiler Error:\n%sUsing a dummy shader" " instead.\n", compiler.Base.ErrorMsg); diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c new file mode 100644 index 0000000000..b41b6b1508 --- /dev/null +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -0,0 +1,108 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2009 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + + +#include "r300_hyperz.h" +#include "r300_context.h" +#include "r300_reg.h" +#include "r300_fs.h" + +/*****************************************************************************/ +/* The ZTOP state */ +/*****************************************************************************/ + +static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) +{ + /* We are interested only in the cases when a new depth or stencil value + * can be written and changed. */ + + /* We might optionally check for [Z func: never] and inspect the stencil + * state in a similar fashion, but it's not terribly important. */ + return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) || + (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) || + ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) && + (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK)); +} + +static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) +{ + /* We are interested only in the cases when alpha testing can kill + * a fragment. */ + uint32_t af = dsa->alpha_function; + + return (af & R300_FG_ALPHA_FUNC_ENABLE) && + (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS; +} + +static void r300_update_ztop(struct r300_context* r300) +{ + struct r300_ztop_state* ztop_state = + (struct r300_ztop_state*)r300->ztop_state.state; + + /* This is important enough that I felt it warranted a comment. + * + * According to the docs, these are the conditions where ZTOP must be + * disabled: + * 1) Alpha testing enabled + * 2) Texture kill instructions in fragment shader + * 3) Chroma key culling enabled + * 4) W-buffering enabled + * + * The docs claim that for the first three cases, if no ZS writes happen, + * then ZTOP can be used. + * + * (3) will never apply since we do not support chroma-keyed operations. + * (4) will need to be re-examined (and this comment updated) if/when + * Hyper-Z becomes supported. + * + * Additionally, the following conditions require disabled ZTOP: + * 5) Depth writes in fragment shader + * 6) Outstanding occlusion queries + * + * This register causes stalls all the way from SC to CB when changed, + * but it is buffered on-chip so it does not hurt to write it if it has + * not changed. + * + * ~C. + */ + + /* ZS writes */ + if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) && + (r300_dsa_alpha_test_enabled(r300->dsa_state.state) || /* (1) */ + r300_fs(r300)->shader->info.uses_kill)) { /* (2) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300_fragment_shader_writes_depth(r300_fs(r300))) { /* (5) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300->query_current) { /* (6) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else { + ztop_state->z_buffer_top = R300_ZTOP_ENABLE; + } + + r300->ztop_state.dirty = TRUE; +} + +void r300_update_hyperz_state(struct r300_context* r300) +{ + r300_update_ztop(r300); +} diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h new file mode 100644 index 0000000000..3df5053b89 --- /dev/null +++ b/src/gallium/drivers/r300/r300_hyperz.h @@ -0,0 +1,30 @@ +/* + * Copyright 2010 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_HYPERZ_H +#define R300_HYPERZ_H + +struct r300_context; + +void r300_update_hyperz_state(struct r300_context* r300); + +#endif diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 239f91443f..675a9317f9 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -3377,7 +3377,7 @@ enum { * the last block is omitted. */ #define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00 - +# define R300_VC_FORCE_PREFETCH (1 << 5) # define R300_VBPNTR_SIZE0(x) ((x) >> 2) # define R300_VBPNTR_STRIDE0(x) (((x) >> 2) << 8) # define R300_VBPNTR_SIZE1(x) (((x) >> 2) << 16) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 23b61df89c..7c3a7902a4 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -41,9 +41,6 @@ #include "r300_render.h" #include "r300_state_derived.h" -/* XXX The DRM rejects VAP_ALT_NUM_VERTICES.. */ -//#define ENABLE_ALT_NUM_VERTS - static uint32_t r300_translate_primitive(unsigned prim) { switch (prim) { @@ -169,6 +166,24 @@ static boolean immd_is_good_idea(struct r300_context *r300, * after resolving fallback issues (e.g. stencil ref two-sided). * ****************************************************************************/ +static boolean r500_emit_index_offset(struct r300_context *r300, int indexBias) +{ + CS_LOCALS(r300); + + if (r300->screen->caps.is_r500 && + r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { + BEGIN_CS(2); + OUT_CS_REG(R500_VAP_INDEX_OFFSET, + (indexBias & 0xFFFFFF) | (indexBias < 0 ? 1<<24 : 0)); + END_CS; + } else { + if (indexBias) + return FALSE; /* Can't do anything :( */ + } + + return TRUE; +} + void r500_emit_draw_arrays_immediate(struct r300_context *r300, unsigned mode, unsigned start, @@ -220,10 +235,12 @@ void r500_emit_draw_arrays_immediate(struct r300_context *r300, dwords = 9 + count * vertex_size; - r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + dwords); + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 2 + dwords); r300_emit_buffer_validate(r300, FALSE, NULL); r300_emit_dirty_state(r300); + r500_emit_index_offset(r300, 0); + BEGIN_CS(dwords); OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); @@ -265,23 +282,20 @@ void r500_emit_draw_arrays(struct r300_context *r300, unsigned mode, unsigned count) { -#if defined(ENABLE_ALT_NUM_VERTS) boolean alt_num_verts = count > 65535; -#else - boolean alt_num_verts = FALSE; -#endif CS_LOCALS(r300); + if (count >= (1 << 24)) { + fprintf(stderr, "r300: Got a huge number of vertices: %i, " + "refusing to render.\n", count); + return; + } + + r500_emit_index_offset(r300, 0); + + BEGIN_CS(7 + (alt_num_verts ? 2 : 0)); if (alt_num_verts) { - if (count >= (1 << 24)) { - fprintf(stderr, "r300: Got a huge number of vertices: %i, " - "refusing to render.\n", count); - return; - } - BEGIN_CS(9); OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); - } else { - BEGIN_CS(7); } OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); @@ -307,11 +321,7 @@ void r500_emit_draw_elements(struct r300_context *r300, { uint32_t count_dwords; uint32_t offset_dwords = indexSize * start / sizeof(uint32_t); -#if defined(ENABLE_ALT_NUM_VERTS) boolean alt_num_verts = count > 65535; -#else - boolean alt_num_verts = FALSE; -#endif CS_LOCALS(r300); if (count >= (1 << 24)) { @@ -320,18 +330,20 @@ void r500_emit_draw_elements(struct r300_context *r300, return; } - assert(indexBias == 0); - maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index); DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n", count, minIndex, maxIndex); + if (!r500_emit_index_offset(r300, indexBias)) { + fprintf(stderr, "r300: Got a non-zero index bias, " + "refusing to render.\n"); + return; + } + + BEGIN_CS(13 + (alt_num_verts ? 2 : 0)); if (alt_num_verts) { - BEGIN_CS(15); OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); - } else { - BEGIN_CS(13); } OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); @@ -541,12 +553,9 @@ void r300_draw_range_elements(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); struct pipe_resource* orgIndexBuffer = indexBuffer; -#if defined(ENABLE_ALT_NUM_VERTS) boolean alt_num_verts = r300->screen->caps.is_r500 && - count > 65536; -#else - boolean alt_num_verts = FALSE; -#endif + count > 65536 && + r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); unsigned short_count; if (r300->skip_rendering) { @@ -574,7 +583,7 @@ void r300_draw_range_elements(struct pipe_context* pipe, r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 128); r300_emit_buffer_validate(r300, TRUE, indexBuffer); r300_emit_dirty_state(r300); - r300_emit_aos(r300, 0); + r300_emit_aos(r300, 0, TRUE); u_upload_flush(r300->upload_vb); u_upload_flush(r300->upload_ib); @@ -591,11 +600,12 @@ void r300_draw_range_elements(struct pipe_context* pipe, start += short_count; count -= short_count; - /* 16 spare dwords are enough for emit_draw_elements. */ - if (count && r300_reserve_cs_space(r300, 16)) { + /* 16 spare dwords are enough for emit_draw_elements. + * Also reserve some space for emit_query_end. */ + if (count && r300_reserve_cs_space(r300, 74)) { r300_emit_buffer_validate(r300, TRUE, indexBuffer); r300_emit_dirty_state(r300); - r300_emit_aos(r300, 0); + r300_emit_aos(r300, 0, TRUE); } } while (count); } @@ -622,12 +632,9 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count) { struct r300_context* r300 = r300_context(pipe); -#if defined(ENABLE_ALT_NUM_VERTS) boolean alt_num_verts = r300->screen->caps.is_r500 && - count > 65536; -#else - boolean alt_num_verts = FALSE; -#endif + count > 65536 && + r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); unsigned short_count; if (r300->skip_rendering) { @@ -650,20 +657,21 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_emit_dirty_state(r300); if (alt_num_verts || count <= 65535) { - r300_emit_aos(r300, start); + r300_emit_aos(r300, start, FALSE); r300->emit_draw_arrays(r300, mode, count); } else { do { short_count = MIN2(count, 65535); - r300_emit_aos(r300, start); + r300_emit_aos(r300, start, FALSE); r300->emit_draw_arrays(r300, mode, short_count); start += short_count; count -= short_count; /* Again, we emit both AOS and draw_arrays so there should be - * at least 128 spare dwords. */ - if (count && r300_reserve_cs_space(r300, 128)) { + * at least 128 spare dwords. + * Also reserve some space for emit_query_end. */ + if (count && r300_reserve_cs_space(r300, 186)) { r300_emit_buffer_validate(r300, TRUE, NULL); r300_emit_dirty_state(r300); } @@ -896,6 +904,8 @@ static void r500_render_draw_arrays(struct vbuf_render* render, DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); + r500_emit_index_offset(r300, 0); + BEGIN_CS(2); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | @@ -918,6 +928,8 @@ static void r500_render_draw(struct vbuf_render* render, r300_emit_buffer_validate(r300, FALSE, NULL); r300_emit_dirty_state(r300); + r500_emit_index_offset(r300, 0); + BEGIN_CS(dwords); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 8fc1d5aa00..c039126703 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -22,6 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "util/u_format.h" +#include "util/u_format_s3tc.h" #include "util/u_memory.h" #include "r300_context.h" @@ -319,6 +320,8 @@ struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws) r300_init_screen_resource_functions(r300screen); + util_format_s3tc_init(); + return &r300screen->screen; } diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 330bd9b36b..735c233c9e 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -71,6 +71,8 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { #define DBG_ANISOHQ 0x0000080 #define DBG_NO_TILING 0x0000100 #define DBG_NO_IMMD 0x0000200 +#define DBG_STATS 0x0000400 +#define DBG_RS 0x0000800 /*@}*/ static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 9eb8539a65..d31e7c53f7 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -538,46 +538,12 @@ static void r300_set_stencil_ref(struct pipe_context* pipe, } /* This switcheroo is needed just because of goddamned MACRO_SWITCH. */ -static void r300_fb_update_tiling_flags(struct r300_context *r300, +static void r300_fb_set_tiling_flags(struct r300_context *r300, const struct pipe_framebuffer_state *old_state, const struct pipe_framebuffer_state *new_state) { struct r300_texture *tex; - unsigned i, j, level; - - /* Reset tiling flags for old surfaces to default values. */ - for (i = 0; i < old_state->nr_cbufs; i++) { - for (j = 0; j < new_state->nr_cbufs; j++) { - if (old_state->cbufs[i]->texture == new_state->cbufs[j]->texture) { - break; - } - } - /* If not binding the surface again... */ - if (j != new_state->nr_cbufs) { - continue; - } - - tex = r300_texture(old_state->cbufs[i]->texture); - - if (tex) { - r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[0], - tex->microtile, - tex->macrotile); - } - } - if (old_state->zsbuf && - (!new_state->zsbuf || - old_state->zsbuf->texture != new_state->zsbuf->texture)) { - tex = r300_texture(old_state->zsbuf->texture); - - if (tex) { - r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[0], - tex->microtile, - tex->macrotile); - } - } + unsigned i, level; /* Set tiling flags for new surfaces. */ for (i = 0; i < new_state->nr_cbufs; i++) { @@ -585,7 +551,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, level = new_state->cbufs[i]->level; r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[level], + tex->pitch[0], tex->microtile, tex->mip_macrotile[level]); } @@ -594,7 +560,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, level = new_state->zsbuf->level; r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[level], + tex->pitch[0], tex->microtile, tex->mip_macrotile[level]); } @@ -644,7 +610,8 @@ static void r300->dsa_state.dirty = TRUE; } - r300_fb_update_tiling_flags(r300, r300->fb_state.state, state); + /* The tiling flags are dependent on the surface miplevel, unfortunately. */ + r300_fb_set_tiling_flags(r300, r300->fb_state.state, state); memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state)); @@ -719,10 +686,6 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300_mark_fs_code_dirty(r300); r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ - - if (r300->vs_state.state && r300_vertex_shader_setup_wpos(r300)) { - r300->vap_output_state.dirty = TRUE; - } } /* Delete fragment shader state. */ @@ -1072,11 +1035,9 @@ r300_create_sampler_view(struct pipe_context *pipe, swizzle[2] = templ->swizzle_b; swizzle[3] = templ->swizzle_a; - /* XXX Enable swizzles when they become supported. Now we get RGBA - * everywhere. And do testing! */ view->format = tex->tx_format; view->format.format1 |= r300_translate_texformat(templ->format, - 0); /*swizzle);*/ + swizzle); if (r300_screen(pipe->screen)->caps.is_r500) { view->format.format2 |= r500_tx_format_msb_bit(templ->format); } @@ -1271,6 +1232,7 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, { struct r300_vertex_element_state *velems; unsigned i, size; + enum pipe_format *format; assert(count <= PIPE_MAX_ATTRIBS); velems = CALLOC_STRUCT(r300_vertex_element_state); @@ -1279,21 +1241,88 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count); if (r300_screen(pipe->screen)->caps.has_tcl) { - /* Check if the format is aligned to the size of DWORD. */ + r300_vertex_psc(velems); + + /* Check if the format is aligned to the size of DWORD. + * We only care about the blocksizes of the formats since + * swizzles are already set up. */ for (i = 0; i < count; i++) { - size = util_format_get_blocksize(attribs[i].src_format); + format = &velems->velem[i].src_format; + + /* Replace some formats with their aligned counterparts, + * this is OK because we check for aligned strides too. */ + switch (*format) { + /* Align to RGBA8. */ + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + *format = PIPE_FORMAT_R8G8B8A8_UNORM; + continue; + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8G8B8_SNORM: + *format = PIPE_FORMAT_R8G8B8A8_SNORM; + continue; + case PIPE_FORMAT_R8_USCALED: + case PIPE_FORMAT_R8G8_USCALED: + case PIPE_FORMAT_R8G8B8_USCALED: + *format = PIPE_FORMAT_R8G8B8A8_USCALED; + continue; + case PIPE_FORMAT_R8_SSCALED: + case PIPE_FORMAT_R8G8_SSCALED: + case PIPE_FORMAT_R8G8B8_SSCALED: + *format = PIPE_FORMAT_R8G8B8A8_SSCALED; + continue; + + /* Align to RG16. */ + case PIPE_FORMAT_R16_UNORM: + *format = PIPE_FORMAT_R16G16_UNORM; + continue; + case PIPE_FORMAT_R16_SNORM: + *format = PIPE_FORMAT_R16G16_SNORM; + continue; + case PIPE_FORMAT_R16_USCALED: + *format = PIPE_FORMAT_R16G16_USCALED; + continue; + case PIPE_FORMAT_R16_SSCALED: + *format = PIPE_FORMAT_R16G16_SSCALED; + continue; + case PIPE_FORMAT_R16_FLOAT: + *format = PIPE_FORMAT_R16G16_FLOAT; + continue; + + /* Align to RGBA16. */ + case PIPE_FORMAT_R16G16B16_UNORM: + *format = PIPE_FORMAT_R16G16B16A16_UNORM; + continue; + case PIPE_FORMAT_R16G16B16_SNORM: + *format = PIPE_FORMAT_R16G16B16A16_SNORM; + continue; + case PIPE_FORMAT_R16G16B16_USCALED: + *format = PIPE_FORMAT_R16G16B16A16_USCALED; + continue; + case PIPE_FORMAT_R16G16B16_SSCALED: + *format = PIPE_FORMAT_R16G16B16A16_SSCALED; + continue; + case PIPE_FORMAT_R16G16B16_FLOAT: + *format = PIPE_FORMAT_R16G16B16A16_FLOAT; + continue; + + default:; + } + + size = util_format_get_blocksize(*format); if (size % 4 != 0) { /* XXX Shouldn't we align the format? */ fprintf(stderr, "r300_create_vertex_elements_state: " "Unaligned format %s:%i isn't supported\n", - util_format_name(attribs[i].src_format), size); + util_format_name(*format), size); assert(0); abort(); } } - r300_vertex_psc(velems); } } return velems; @@ -1359,14 +1388,6 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) } r300->vs_state.state = vs; - // VS output mapping for HWTCL or stream mapping for SWTCL to the RS block - if (r300->fs.state) { - r300_vertex_shader_setup_wpos(r300); - } - memcpy(r300->vap_output_state.state, &vs->vap_out, - sizeof(struct r300_vap_output_state)); - r300->vap_output_state.dirty = TRUE; - /* The majority of the RS block bits is dependent on the vertex shader. */ r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 46c192eae1..e3adace0fa 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -28,6 +28,7 @@ #include "r300_context.h" #include "r300_fs.h" +#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_shader_semantics.h" #include "r300_state.h" @@ -42,6 +43,7 @@ enum r300_rs_swizzle { SWIZ_XYZW = 0, SWIZ_X001, SWIZ_XY01, + SWIZ_0001, }; static void r300_draw_emit_attrib(struct r300_context* r300, @@ -113,12 +115,11 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300) static void r300_swtcl_vertex_psc(struct r300_context *r300) { struct r300_vertex_stream_state *vstream = r300->vertex_stream_state.state; - struct r300_vertex_shader* vs = r300->vs_state.state; - struct vertex_info* vinfo = &r300->vertex_info; + struct vertex_info* vinfo = &r300->vertex_info; uint16_t type, swizzle; enum pipe_format format; unsigned i, attrib_count; - int* vs_output_tab = vs->stream_loc_notcl; + int* vs_output_tab = r300->stream_loc_notcl; /* XXX hax */ memset(vstream, 0, sizeof(struct r300_vertex_stream_state)); @@ -169,10 +170,10 @@ static void r300_swtcl_vertex_psc(struct r300_context *r300) } static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr, - boolean swizzle_0001) + enum r300_rs_swizzle swiz) { rs->ip[id] |= R300_RS_COL_PTR(ptr); - if (swizzle_0001) { + if (swiz == SWIZ_0001) { rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001); } else { rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); @@ -218,10 +219,10 @@ static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) } static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr, - boolean swizzle_0001) + enum r300_rs_swizzle swiz) { rs->ip[id] |= R500_RS_COL_PTR(ptr); - if (swizzle_0001) { + if (swiz == SWIZ_0001) { rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); } else { rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); @@ -267,21 +268,29 @@ static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) /* Set up the RS block. * - * This is the part of the chipset that actually does the rasterization - * of vertices into fragments. This is also the part of the chipset that - * locks up if any part of it is even slightly wrong. */ -static void r300_update_rs_block(struct r300_context* r300, - struct r300_shader_semantics* vs_outputs, - struct r300_shader_semantics* fs_inputs) + * This is the part of the chipset that is responsible for linking vertex + * and fragment shaders and stuffed texture coordinates. + * + * The rasterizer reads data from VAP, which produces vertex shader outputs, + * and GA, which produces stuffed texture coordinates. VAP outputs have + * precedence over GA. All outputs must be rasterized otherwise it locks up. + * If there are more outputs rasterized than is set in VAP/GA, it locks up + * too. The funky part is that this info has been pretty much obtained by trial + * and error. */ +static void r300_update_rs_block(struct r300_context *r300) { - struct r300_rs_block rs = { { 0 } }; - int i, col_count = 0, tex_count = 0, fp_offset = 0, count; - void (*rX00_rs_col)(struct r300_rs_block*, int, int, boolean); + struct r300_vertex_shader *vs = r300->vs_state.state; + struct r300_shader_semantics *vs_outputs = &vs->outputs; + struct r300_shader_semantics *fs_inputs = &r300_fs(r300)->shader->inputs; + struct r300_rs_block rs = {0}; + int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0; + void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || vs_outputs->bcolor[1] != ATTR_UNUSED; + int *stream_loc_notcl = r300->stream_loc_notcl; if (r300->screen->caps.is_r500) { rX00_rs_col = r500_rs_col; @@ -295,18 +304,39 @@ static void r300_update_rs_block(struct r300_context* r300, rX00_rs_tex_write = r300_rs_tex_write; } - /* Rasterize colors. */ + /* The position is always present in VAP. */ + rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS; + rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + stream_loc_notcl[loc++] = 0; + + /* Set up the point size in VAP. */ + if (vs_outputs->psize != ATTR_UNUSED) { + rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + stream_loc_notcl[loc++] = 1; + } + + /* Set up and rasterize colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || vs_outputs->color[1] != ATTR_UNUSED) { - /* Always rasterize if it's written by the VS, - * otherwise it locks up. */ - rX00_rs_col(&rs, col_count, i, FALSE); + /* Set up the color in VAP. */ + rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; + rs.vap_out_vtx_fmt[0] |= + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; + stream_loc_notcl[loc++] = 2 + i; + + /* Rasterize it. */ + rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW); - /* Write it to the FS input register if it's used by the FS. */ + /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->color[i] != ATTR_UNUSED) { rX00_rs_col_write(&rs, col_count, fp_offset); fp_offset++; + + DBG(r300, DBG_RS, + "r300: Rasterized color %i written to FS.\n", i); + } else { + DBG(r300, DBG_RS, "r300: Rasterized color %i unused.\n", i); } col_count++; } else { @@ -314,26 +344,51 @@ static void r300_update_rs_block(struct r300_context* r300, /* If we try to set it to (0,0,0,1), it will lock up. */ if (fs_inputs->color[i] != ATTR_UNUSED) { fp_offset++; + + DBG(r300, DBG_RS, "r300: FS input color %i unassigned%s.\n", + i); } } } + /* Set up back-face colors. The rasterizer will do the color selection + * automatically. */ + if (any_bcolor_used) { + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; + rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); + stream_loc_notcl[loc++] = 4 + i; + } + } + /* Rasterize texture coordinates. */ - for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + for (i = 0; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) { bool sprite_coord = !!(r300->sprite_coord_enable & (1 << i)); if (vs_outputs->generic[i] != ATTR_UNUSED || sprite_coord) { - /* Always rasterize if it's written by the VS, - * otherwise it locks up. */ + if (!sprite_coord) { + /* Set up the texture coordinates in VAP. */ + rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); + rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); + stream_loc_notcl[loc++] = 6 + tex_count; + } + + /* Rasterize it. */ rX00_rs_tex(&rs, tex_count, tex_count, sprite_coord ? SWIZ_XY01 : SWIZ_XYZW); - /* Write it to the FS input register if it's used by the FS. */ + /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->generic[i] != ATTR_UNUSED) { rX00_rs_tex_write(&rs, tex_count, fp_offset); - if (sprite_coord) - debug_printf("r300: SpriteCoord (generic index %i) is being written to reg %i\n", i, fp_offset); fp_offset++; + + DBG(r300, DBG_RS, + "r300: Rasterized generic %i written to FS%s.\n", + i, sprite_coord ? " (sprite coord)" : ""); + } else { + DBG(r300, DBG_RS, + "r300: Rasterized generic %i unused%s.\n", + i, sprite_coord ? " (sprite coord)" : ""); } tex_count++; } else { @@ -341,20 +396,31 @@ static void r300_update_rs_block(struct r300_context* r300, /* If we try to set it to (0,0,0,1), it will lock up. */ if (fs_inputs->generic[i] != ATTR_UNUSED) { fp_offset++; + + DBG(r300, DBG_RS, "r300: FS input generic %i unassigned%s.\n", + i, sprite_coord ? " (sprite coord)" : ""); } } } /* Rasterize fog coordinates. */ - if (vs_outputs->fog != ATTR_UNUSED) { - /* Always rasterize if it's written by the VS, - * otherwise it locks up. */ + if (vs_outputs->fog != ATTR_UNUSED && tex_count < 8) { + /* Set up the fog coordinates in VAP. */ + rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); + rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); + stream_loc_notcl[loc++] = 6 + tex_count; + + /* Rasterize it. */ rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_X001); - /* Write it to the FS input register if it's used by the FS. */ + /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->fog != ATTR_UNUSED) { rX00_rs_tex_write(&rs, tex_count, fp_offset); fp_offset++; + + DBG(r300, DBG_RS, "r300: Rasterized fog written to FS.\n"); + } else { + DBG(r300, DBG_RS, "r300: Rasterized fog unused.\n"); } tex_count++; } else { @@ -362,25 +428,47 @@ static void r300_update_rs_block(struct r300_context* r300, /* If we try to set it to (0,0,0,1), it will lock up. */ if (fs_inputs->fog != ATTR_UNUSED) { fp_offset++; + + DBG(r300, DBG_RS, "r300: FS input fog unassigned.\n"); } } /* Rasterize WPOS. */ - /* If the FS doesn't need it, it's not written by the VS. */ - if (vs_outputs->wpos != ATTR_UNUSED && fs_inputs->wpos != ATTR_UNUSED) { + /* Don't set it in VAP if the FS doesn't need it. */ + if (fs_inputs->wpos != ATTR_UNUSED && tex_count < 8) { + /* Set up the WPOS coordinates in VAP. */ + rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); + rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); + stream_loc_notcl[loc++] = 6 + tex_count; + + /* Rasterize it. */ rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_XYZW); + + /* Write it to the FS input register. */ rX00_rs_tex_write(&rs, tex_count, fp_offset); + DBG(r300, DBG_RS, "r300: Rasterized WPOS written to FS.\n"); + fp_offset++; tex_count++; } + /* Invalidate the rest of the no-TCL (GA) stream locations. */ + for (; loc < 16;) { + stream_loc_notcl[loc++] = -1; + } + /* Rasterize at least one color, or bad things happen. */ if (col_count == 0 && tex_count == 0) { - rX00_rs_col(&rs, 0, 0, TRUE); + rX00_rs_col(&rs, 0, 0, SWIZ_0001); col_count++; + + DBG(r300, DBG_RS, "r300: Rasterized color 0 to prevent lockups.\n"); } + DBG(r300, DBG_RS, "r300: --- Rasterizer status ---: colors: %i, " + "generics: %i.\n", col_count, tex_count); + rs.count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; @@ -390,87 +478,8 @@ static void r300_update_rs_block(struct r300_context* r300, /* Now, after all that, see if we actually need to update the state. */ if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) { memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block)); - r300->rs_block_state.size = 5 + count*2; - } -} - -/* Update the shader-dependant states. */ -static void r300_update_derived_shader_state(struct r300_context* r300) -{ - struct r300_vertex_shader* vs = r300->vs_state.state; - - r300_update_rs_block(r300, &vs->outputs, &r300_fs(r300)->shader->inputs); -} - -static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) -{ - /* We are interested only in the cases when a new depth or stencil value - * can be written and changed. */ - - /* We might optionally check for [Z func: never] and inspect the stencil - * state in a similar fashion, but it's not terribly important. */ - return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) || - (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) || - ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) && - (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK)); -} - -static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) -{ - /* We are interested only in the cases when alpha testing can kill - * a fragment. */ - uint32_t af = dsa->alpha_function; - - return (af & R300_FG_ALPHA_FUNC_ENABLE) && - (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS; -} - -static void r300_update_ztop(struct r300_context* r300) -{ - struct r300_ztop_state* ztop_state = - (struct r300_ztop_state*)r300->ztop_state.state; - - /* This is important enough that I felt it warranted a comment. - * - * According to the docs, these are the conditions where ZTOP must be - * disabled: - * 1) Alpha testing enabled - * 2) Texture kill instructions in fragment shader - * 3) Chroma key culling enabled - * 4) W-buffering enabled - * - * The docs claim that for the first three cases, if no ZS writes happen, - * then ZTOP can be used. - * - * (3) will never apply since we do not support chroma-keyed operations. - * (4) will need to be re-examined (and this comment updated) if/when - * Hyper-Z becomes supported. - * - * Additionally, the following conditions require disabled ZTOP: - * 5) Depth writes in fragment shader - * 6) Outstanding occlusion queries - * - * This register causes stalls all the way from SC to CB when changed, - * but it is buffered on-chip so it does not hurt to write it if it has - * not changed. - * - * ~C. - */ - - /* ZS writes */ - if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) && - (r300_dsa_alpha_test_enabled(r300->dsa_state.state) || /* (1) */ - r300_fs(r300)->shader->info.uses_kill)) { /* (2) */ - ztop_state->z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300_fragment_shader_writes_depth(r300_fs(r300))) { /* (5) */ - ztop_state->z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300->query_current) { /* (6) */ - ztop_state->z_buffer_top = R300_ZTOP_DISABLE; - } else { - ztop_state->z_buffer_top = R300_ZTOP_ENABLE; + r300->rs_block_state.size = 11 + count*2; } - - r300->ztop_state.dirty = TRUE; } static void r300_merge_textures_and_samplers(struct r300_context* r300) @@ -568,7 +577,7 @@ void r300_update_derived_state(struct r300_context* r300) } if (r300->rs_block_state.dirty) { - r300_update_derived_shader_state(r300); + r300_update_rs_block(r300); } if (r300->draw) { @@ -578,5 +587,5 @@ void r300_update_derived_state(struct r300_context* r300) r300_swtcl_vertex_psc(r300); } - r300_update_ztop(r300); + r300_update_hyperz_state(r300); } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 480d0f7c4a..fcbdb91b67 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -374,6 +374,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { fprintf(stderr, "r300: Bad format %s in %s:%d\n", util_format_name(format), __FUNCTION__, __LINE__); assert(0); + abort(); } switch (desc->channel[0].type) { @@ -395,6 +396,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { fprintf(stderr, "r300: Bad format %s in %s:%d\n", util_format_name(format), __FUNCTION__, __LINE__); assert(0); + abort(); } break; /* Unsigned ints */ @@ -418,12 +420,14 @@ r300_translate_vertex_data_type(enum pipe_format format) { fprintf(stderr, "r300: desc->channel[0].size == %d\n", desc->channel[0].size); assert(0); + abort(); } break; default: fprintf(stderr, "r300: Bad format %s in %s:%d\n", util_format_name(format), __FUNCTION__, __LINE__); assert(0); + abort(); } if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { @@ -439,6 +443,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { static INLINE uint16_t r300_translate_vertex_data_swizzle(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); + unsigned i, swizzle = 0; assert(format); @@ -448,11 +453,19 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) { return 0; } - return ((desc->swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | - (desc->swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | - (desc->swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | - (desc->swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) | - (0xf << R300_WRITE_ENA_SHIFT)); + for (i = 0; i < desc->nr_channels; i++) { + swizzle |= + MIN2(desc->swizzle[i], R300_SWIZZLE_SELECT_FP_ONE) << (3*i); + } + /* Set (0,0,0,1) in unused components. */ + for (; i < 3; i++) { + swizzle |= R300_SWIZZLE_SELECT_FP_ZERO << (3*i); + } + for (; i < 4; i++) { + swizzle |= R300_SWIZZLE_SELECT_FP_ONE << (3*i); + } + + return swizzle | (0xf << R300_WRITE_ENA_SHIFT); } #endif /* R300_STATE_INLINES_H */ diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 64d1d18b45..cd9443fa26 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -43,15 +43,17 @@ void r300_emit_invariant_state(struct r300_context* r300, { CS_LOCALS(r300); + if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { + /* Subpixel multisampling for AA. */ + BEGIN_CS(4); + OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); + OUT_CS_REG(R300_GB_MSPOS1, 0x6666666); + END_CS; + } + BEGIN_CS(12 + (r300->screen->caps.has_tcl ? 2 : 0)); /*** Graphics Backend (GB) ***/ - /* Subpixel multisampling for AA - * These are commented out because glisse's CS checker doesn't like them. - * I presume these will be re-enabled later. - * OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); - * OUT_CS_REG(R300_GB_MSPOS1, 0x6666666); - */ /* Source of fog depth */ OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 8bebeacf86..69e6a12445 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -34,9 +34,6 @@ #include "r300_screen.h" #include "r300_winsys.h" -/* XXX Enable float textures here. */ -/*#define ENABLE_FLOAT_TEXTURES*/ - #define TILE_WIDTH 0 #define TILE_HEIGHT 1 @@ -74,7 +71,7 @@ static boolean r300_format_is_plain(enum pipe_format format) * The FORMAT specifies how the texture sampler will treat the texture, and * makes available X, Y, Z, W, ZERO, and ONE for swizzling. */ uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle) + const unsigned char *swizzle_view) { uint32_t result = 0; const struct util_format_description *desc; @@ -98,6 +95,7 @@ uint32_t r300_translate_texformat(enum pipe_format format, R300_TX_FORMAT_SIGNED_Z, R300_TX_FORMAT_SIGNED_W, }; + unsigned char swizzle[4]; desc = util_format_description(format); @@ -144,25 +142,18 @@ uint32_t r300_translate_texformat(enum pipe_format format, } } - /* Add swizzle. */ - if (!swizzle) { - swizzle = desc->swizzle; - } /*else { - if (swizzle[0] != desc->swizzle[0] || - swizzle[1] != desc->swizzle[1] || - swizzle[2] != desc->swizzle[2] || - swizzle[3] != desc->swizzle[3]) - { - const char n[6] = "RGBA01"; - fprintf(stderr, "Got different swizzling! Format: %c%c%c%c, " - "View: %c%c%c%c\n", - n[desc->swizzle[0]], n[desc->swizzle[1]], - n[desc->swizzle[2]], n[desc->swizzle[3]], - n[swizzle[0]], n[swizzle[1]], n[swizzle[2]], - n[swizzle[3]]); + /* Get swizzle. */ + if (swizzle_view) { + /* Compose two sets of swizzles. */ + for (i = 0; i < 4; i++) { + swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ? + desc->swizzle[swizzle_view[i]] : swizzle_view[i]; } - }*/ + } else { + memcpy(swizzle, desc->swizzle, sizeof(swizzle)); + } + /* Add swizzle. */ for (i = 0; i < 4; i++) { switch (swizzle[i]) { case UTIL_FORMAT_SWIZZLE_X: @@ -316,7 +307,6 @@ uint32_t r300_translate_texformat(enum pipe_format format, } return ~0; -#if defined(ENABLE_FLOAT_TEXTURES) case UTIL_FORMAT_TYPE_FLOAT: switch (desc->channel[0].size) { case 16: @@ -340,7 +330,6 @@ uint32_t r300_translate_texformat(enum pipe_format format, return R300_TX_FORMAT_32F_32F_32F_32F | result; } } -#endif } return ~0; /* Unsupported/unknown. */ @@ -405,16 +394,12 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: -#if defined(ENABLE_FLOAT_TEXTURES) case PIPE_FORMAT_R16G16B16A16_FLOAT: -#endif return R300_COLOR_FORMAT_ARGB16161616; /* 128-bit buffers. */ -#if defined(ENABLE_FLOAT_TEXTURES) case PIPE_FORMAT_R32G32B32A32_FLOAT: return R300_COLOR_FORMAT_ARGB32323232; -#endif /* YUV buffers. */ case PIPE_FORMAT_UYVY: @@ -532,7 +517,7 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) case PIPE_FORMAT_R10SG10SB10SA2U_NORM: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: - //case PIPE_FORMAT_R16G16B16A16_FLOAT: /* not in pipe_format */ + case PIPE_FORMAT_R16G16B16A16_FLOAT: case PIPE_FORMAT_R32G32B32A32_FLOAT: return modifier | R300_C0_SEL_R | R300_C1_SEL_G | @@ -573,7 +558,7 @@ static void r300_texture_setup_immutable_state(struct r300_screen* screen, if (tex->uses_pitch) { /* rectangles love this */ f->format0 |= R300_TX_PITCH_EN; - f->format2 = (tex->pitch[0] - 1) & 0x1fff; + f->format2 = (tex->hwpitch[0] - 1) & 0x1fff; } else { /* power of two textures (3D, mipmaps, and no pitch) */ f->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf); @@ -614,7 +599,7 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen, if (util_format_is_depth_or_stencil(tex->b.b.format)) { for (i = 0; i <= tex->b.b.last_level; i++) { tex->fb_state.depthpitch[i] = - tex->pitch[i] | + tex->hwpitch[i] | R300_DEPTHMACROTILE(tex->mip_macrotile[i]) | R300_DEPTHMICROTILE(tex->microtile); } @@ -622,7 +607,7 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen, } else { for (i = 0; i <= tex->b.b.last_level; i++) { tex->fb_state.colorpitch[i] = - tex->pitch[i] | + tex->hwpitch[i] | r300_translate_colorformat(tex->b.b.format) | R300_COLOR_TILE(tex->mip_macrotile[i]) | R300_COLOR_MICROTILE(tex->microtile); @@ -762,12 +747,12 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, struct r300_texture *tex) { - /* The kernels <= 2.6.34-rc3 compute the size of mipmapped 3D textures + /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures * incorrectly. This is a workaround to prevent CS from being rejected. */ unsigned i, size; - if (screen->rws->get_value(screen->rws, R300_VID_TEX3D_MIP_BUG) && + if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && tex->b.b.target == PIPE_TEXTURE_3D && tex->b.b.last_level > 0) { size = 0; @@ -813,6 +798,8 @@ static void r300_setup_miptree(struct r300_screen* screen, tex->size = tex->offset[i] + size; tex->layer_size[i] = layer_size; tex->pitch[i] = stride / util_format_get_blocksize(base->format); + tex->hwpitch[i] = + tex->pitch[i] * util_format_get_blockwidth(base->format); SCREEN_DBG(screen, DBG_TEX, "r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 453d42b188..ba79ec068a 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -28,7 +28,7 @@ struct r300_texture; uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle); + const unsigned char *swizzle_view); uint32_t r500_tx_format_msb_bit(enum pipe_format format); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index f6428ed760..89f39af976 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -108,11 +108,9 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_BRK: return RC_OPCODE_BRK; */ case TGSI_OPCODE_IF: return RC_OPCODE_IF; /* case TGSI_OPCODE_LOOP: return RC_OPCODE_LOOP; */ - /* case TGSI_OPCODE_REP: return RC_OPCODE_REP; */ case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE; case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF; /* case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP; */ - /* case TGSI_OPCODE_ENDREP: return RC_OPCODE_ENDREP; */ /* case TGSI_OPCODE_PUSHA: return RC_OPCODE_PUSHA; */ /* case TGSI_OPCODE_POPA: return RC_OPCODE_POPA; */ case TGSI_OPCODE_CEIL: return RC_OPCODE_CEIL; diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index bfab9c3b01..b7609bad81 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -94,94 +94,6 @@ static void r300_shader_read_vs_outputs( vs_outputs->wpos = i; } -/* This function sets up: - * - VAP mapping, which maps VS registers to output semantics and - * at the same time it indicates which attributes are enabled and should - * be rasterized. - * - Stream mapping to VS outputs if TCL is not present. */ -static void r300_init_vs_output_mapping(struct r300_vertex_shader* vs) -{ - struct r300_shader_semantics* vs_outputs = &vs->outputs; - struct r300_vap_output_state *vap_out = &vs->vap_out; - int *stream_loc = vs->stream_loc_notcl; - int i, gen_count, tabi = 0; - boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || - vs_outputs->bcolor[1] != ATTR_UNUSED; - - vap_out->vap_vtx_state_cntl = 0x5555; /* XXX this is classic Mesa bonghits */ - - /* Position. */ - if (vs_outputs->pos != ATTR_UNUSED) { - vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS; - vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; - - stream_loc[tabi++] = 0; - } else { - assert(0); - } - - /* Point size. */ - if (vs_outputs->psize != ATTR_UNUSED) { - vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; - - stream_loc[tabi++] = 1; - } - - /* Colors. */ - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || - vs_outputs->color[1] != ATTR_UNUSED) { - vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; - vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; - - stream_loc[tabi++] = 2 + i; - } - } - - /* Back-face colors. */ - if (any_bcolor_used) { - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; - vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); - - stream_loc[tabi++] = 4 + i; - } - } - - /* Texture coordinates. */ - gen_count = 0; - for (i = 0; i < ATTR_GENERIC_COUNT && gen_count < 8; i++) { - if (vs_outputs->generic[i] != ATTR_UNUSED) { - vap_out->vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << gen_count); - vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * gen_count)); - - stream_loc[tabi++] = 6 + gen_count; - gen_count++; - } - } - - /* Fog coordinates. */ - if (gen_count < 8 && vs_outputs->fog != ATTR_UNUSED) { - vap_out->vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << gen_count); - vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * gen_count)); - - stream_loc[tabi++] = 6 + gen_count; - gen_count++; - } - - /* WPOS. */ - if (gen_count < 8) { - vs->wpos_tex_output = gen_count; - stream_loc[tabi++] = 6 + gen_count; - } else { - vs_outputs->wpos = ATTR_UNUSED; - } - - for (; tabi < 16;) { - stream_loc[tabi++] = -1; - } -} - static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) { struct r300_vertex_shader * vs = c->UserData; @@ -246,9 +158,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) } /* WPOS. */ - if (outputs->wpos != ATTR_UNUSED) { - c->code->outputs[outputs->wpos] = reg++; - } + c->code->outputs[outputs->wpos] = reg++; } static void r300_dummy_vertex_shader( @@ -286,7 +196,6 @@ void r300_translate_vertex_shader(struct r300_context* r300, tgsi_scan_shader(tokens, &vs->info); r300_shader_read_vs_outputs(&vs->info, &vs->outputs); - r300_init_vs_output_mapping(vs); /* Setup the compiler */ rc_init(&compiler.Base); @@ -307,16 +216,11 @@ void r300_translate_vertex_shader(struct r300_context* r300, r300_tgsi_to_rc(&ttr, tokens); - compiler.RequiredOutputs = - ~(~0 << (vs->info.num_outputs + - (vs->outputs.wpos != ATTR_UNUSED ? 1 : 0))); - + compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs + 1)); compiler.SetHwInputOutput = &set_vertex_inputs_outputs; /* Insert the WPOS output. */ - if (vs->outputs.wpos != ATTR_UNUSED) { - rc_copy_output(&compiler.Base, 0, vs->outputs.wpos); - } + rc_copy_output(&compiler.Base, 0, vs->outputs.wpos); /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); @@ -343,32 +247,3 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* And, finally... */ rc_destroy(&compiler.Base); } - -boolean r300_vertex_shader_setup_wpos(struct r300_context* r300) -{ - struct r300_vertex_shader* vs = r300->vs_state.state; - struct r300_vap_output_state *vap_out = &vs->vap_out; - int tex_output = vs->wpos_tex_output; - uint32_t tex_fmt = R300_INPUT_CNTL_TC0 << tex_output; - - if (vs->outputs.wpos == ATTR_UNUSED) { - return FALSE; - } - - if (r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED) { - /* Enable WPOS in VAP. */ - if (!(vap_out->vap_vsm_vtx_assm & tex_fmt)) { - vap_out->vap_vsm_vtx_assm |= tex_fmt; - vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * tex_output)); - return TRUE; - } - } else { - /* Disable WPOS in VAP. */ - if (vap_out->vap_vsm_vtx_assm & tex_fmt) { - vap_out->vap_vsm_vtx_assm &= ~tex_fmt; - vap_out->vap_out_vtx_fmt[1] &= ~(4 << (3 * tex_output)); - return TRUE; - } - } - return FALSE; -} diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 56bcc3b70b..57b3fbca0b 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -39,7 +39,6 @@ struct r300_vertex_shader { struct tgsi_shader_info info; struct r300_shader_semantics outputs; - struct r300_vap_output_state vap_out; /* Whether the shader was replaced by a dummy one due to a shader * compilation failure. */ @@ -49,12 +48,6 @@ struct r300_vertex_shader { unsigned externals_count; unsigned immediates_count; - /* Stream locations for SWTCL or if TCL is bypassed. */ - int stream_loc_notcl[16]; - - /* Output stream location for WPOS. */ - int wpos_tex_output; - /* HWTCL-specific. */ /* Machine code (if translated) */ struct r300_vertex_program_code code; @@ -67,7 +60,4 @@ void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs, const struct tgsi_token *tokens); -/* Return TRUE if VAP (hwfmt) needs to be re-emitted. */ -boolean r300_vertex_shader_setup_wpos(struct r300_context* r300); - #endif /* R300_VS_H */ diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 2bd40176d1..1642981eaa 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -39,7 +39,7 @@ enum r300_value_id { R300_VID_GB_PIPES, R300_VID_Z_PIPES, R300_VID_SQUARE_TILING_SUPPORT, - R300_VID_TEX3D_MIP_BUG, + R300_VID_DRM_2_3_0, }; enum r300_reference_domain { /* bitfield */ @@ -119,6 +119,10 @@ struct r300_winsys_screen { /* Write a dword to the command buffer. */ void (*write_cs_dword)(struct r300_winsys_screen* winsys, uint32_t dword); + /* Write a table of dwords to the command buffer. */ + void (*write_cs_table)(struct r300_winsys_screen* winsys, + const void *dwords, unsigned count); + /* Write a relocated dword to the command buffer. */ void (*write_cs_reloc)(struct r300_winsys_screen *winsys, struct r300_winsys_buffer *buf, |