diff options
Diffstat (limited to 'src/gallium/drivers/r300')
22 files changed, 966 insertions, 734 deletions
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 121b65063f..9c9fc6f64b 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -23,7 +23,8 @@ C_SOURCES = \ r300_tgsi_to_rc.c LIBRARY_INCLUDES = \ - -I$(TOP)/src/mesa/drivers/dri/r300/compiler + -I$(TOP)/src/mesa/drivers/dri/r300/compiler \ + -I$(TOP)/src/gallium/winsys/drm/radeon/core COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index ae23329b83..5b337f03ac 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -36,6 +36,7 @@ #include "r300_screen.h" #include "r300_state_derived.h" #include "r300_state_invariant.h" +#include "r300_texture.h" #include "r300_winsys.h" static enum pipe_error r300_clear_hash_table(void* key, void* value, @@ -69,6 +70,7 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300->blend_color_state); FREE(r300->rs_block); FREE(r300->scissor_state); + FREE(r300->vertex_info); FREE(r300->viewport_state); FREE(r300); } @@ -104,7 +106,7 @@ static void r300_flush_cb(void *data) } struct pipe_context* r300_create_context(struct pipe_screen* screen, - struct r300_winsys* r300_winsys) + struct radeon_winsys* radeon_winsys) { struct r300_context* r300 = CALLOC_STRUCT(r300_context); struct r300_screen* r300screen = r300_screen(screen); @@ -112,9 +114,9 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (!r300) return NULL; - r300->winsys = r300_winsys; + r300->winsys = radeon_winsys; - r300->context.winsys = (struct pipe_winsys*)r300_winsys; + r300->context.winsys = (struct pipe_winsys*)radeon_winsys; r300->context.screen = screen; r300_init_debug(r300); @@ -123,15 +125,24 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.clear = r300_clear; - if (r300screen->caps->has_tcl) - { + if (r300screen->caps->has_tcl) { r300->context.draw_arrays = r300_draw_arrays; r300->context.draw_elements = r300_draw_elements; r300->context.draw_range_elements = r300_draw_range_elements; - } - else - { - assert(0); + } else { + r300->context.draw_arrays = r300_swtcl_draw_arrays; + r300->context.draw_elements = r300_draw_elements; + r300->context.draw_range_elements = r300_swtcl_draw_range_elements; + + /* Create a Draw. This is used for SW TCL. */ + r300->draw = draw_create(); + /* Enable our renderer. */ + draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300)); + /* Enable Draw's clipping. */ + draw_set_driver_clipping(r300->draw, FALSE); + /* Force Draw to never do viewport transform, since we can do + * transform in hardware, always. */ + draw_set_viewport_state(r300->draw, &r300_viewport_identity); } r300->context.is_texture_referenced = r300_is_texture_referenced; @@ -143,18 +154,9 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); + r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); r300->viewport_state = CALLOC_STRUCT(r300_viewport_state); - /* Create a Draw. This is used for vert collation and SW TCL. */ - r300->draw = draw_create(); - /* Enable our renderer. */ - draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300)); - /* Disable Draw's clipping if TCL is present. */ - draw_set_driver_clipping(r300->draw, r300_screen(screen)->caps->has_tcl); - /* Force Draw to never do viewport transform, since (again) we can do - * transform in hardware, always. */ - draw_set_viewport_state(r300->draw, &r300_viewport_identity); - /* Open up the OQ BO. */ r300->oqbo = screen->buffer_create(screen, 4096, PIPE_BUFFER_USAGE_VERTEX, 4096); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index f954ba7f9a..0be190392a 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -92,11 +92,23 @@ struct r300_sampler_state { uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */ uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */ uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */ + + /* Min/max LOD must be clamped to [0, last_level], thus + * it's dependent on a currently bound texture */ + unsigned min_lod, max_lod; +}; + +struct r300_scissor_regs { + uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ + uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ + + /* Whether everything is culled by scissoring. */ + boolean empty_area; }; struct r300_scissor_state { - uint32_t scissor_top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ - uint32_t scissor_bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ + struct r300_scissor_regs framebuffer; + struct r300_scissor_regs scissor; }; struct r300_texture_state { @@ -219,11 +231,6 @@ struct r300_texture { struct r300_vertex_info { /* Parent class */ struct vertex_info vinfo; - /* Map of vertex attributes into PVS memory for HW TCL, - * or GA memory for SW TCL. */ - int vs_tab[16]; - /* Map of rasterizer attributes from GB through RS to US. */ - int fs_tab[16]; /* R300_VAP_PROG_STREAK_CNTL_[0-7] */ uint32_t vap_prog_stream_cntl[8]; @@ -238,7 +245,7 @@ struct r300_context { struct pipe_context context; /* The interface to the windowing system, etc. */ - struct r300_winsys* winsys; + struct radeon_winsys* winsys; /* Draw module. Used mostly for SW TCL. */ struct draw_context* draw; diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 86ba91db52..9fcf3ab538 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -26,7 +26,8 @@ #include "util/u_math.h" #include "r300_reg.h" -#include "r300_winsys.h" + +#include "radeon_winsys.h" /* Yes, I know macros are ugly. However, they are much prettier than the code * that they neatly hide away, and don't have the cost of function setup,so @@ -50,11 +51,11 @@ #define CS_LOCALS(context) \ struct r300_context* const cs_context_copy = (context); \ - struct r300_winsys* cs_winsys = cs_context_copy->winsys; \ + struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \ int cs_count = 0; #define CHECK_CS(size) \ - cs_winsys->check_cs(cs_winsys, (size)) + assert(cs_winsys->check_cs(cs_winsys, (size))) #define BEGIN_CS(size) do { \ CHECK_CS(size); \ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index eeb97a2d37..dbf316a9b5 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -129,7 +129,9 @@ static const float * get_shader_constant( struct rc_constant * constant, struct r300_constant_buffer * externals) { - static const float zero[4] = { 0.0, 0.0, 0.0, 0.0 }; + static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; + struct pipe_texture *tex; + switch(constant->Type) { case RC_CONSTANT_EXTERNAL: return externals->constants[constant->u.External]; @@ -137,11 +139,31 @@ static const float * get_shader_constant( case RC_CONSTANT_IMMEDIATE: return constant->u.Immediate; + case RC_CONSTANT_STATE: + switch (constant->u.State[0]) { + /* Factor for converting rectangle coords to + * normalized coords. Should only show up on non-r500. */ + case RC_STATE_R300_TEXRECT_FACTOR: + tex = &r300->textures[constant->u.State[1]]->tex; + vec[0] = 1.0 / tex->width0; + vec[1] = 1.0 / tex->height0; + break; + + default: + debug_printf("r300: Implementation error: " + "Unknown RC_CONSTANT type %d\n", constant->u.State[0]); + } + break; + default: - debug_printf("r300: Implementation error: Unhandled constant type %i\n", - constant->Type); - return zero; + debug_printf("r300: Implementation error: " + "Unhandled constant type %d\n", constant->Type); } + + /* This should either be (0, 0, 0, 1), which should be a relatively safe + * RGBA or STRQ value, or it could be one of the RC_CONSTANT_STATE + * state factors. */ + return vec; } /* Convert a normal single-precision float into the 7.16 format @@ -360,8 +382,6 @@ static void r300_emit_query_start(struct r300_context *r300) if (!query) return; - /* XXX This will almost certainly not return good results - * for overlapping queries. */ BEGIN_CS(4); if (caps->family == CHIP_FAMILY_RV530) { OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); @@ -543,24 +563,36 @@ void r300_emit_rs_block_state(struct r300_context* r300, END_CS; } -void r300_emit_scissor_state(struct r300_context* r300, - struct r300_scissor_state* scissor) +static void r300_emit_scissor_regs(struct r300_context* r300, + struct r300_scissor_regs* scissor) { CS_LOCALS(r300); BEGIN_CS(3); OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - OUT_CS(scissor->scissor_top_left); - OUT_CS(scissor->scissor_bottom_right); + OUT_CS(scissor->top_left); + OUT_CS(scissor->bottom_right); END_CS; } +void r300_emit_scissor_state(struct r300_context* r300, + struct r300_scissor_state* scissor) +{ + if (r300->rs_state->rs.scissor) { + r300_emit_scissor_regs(r300, &scissor->scissor); + } else { + r300_emit_scissor_regs(r300, &scissor->framebuffer); + } +} + void r300_emit_texture(struct r300_context* r300, struct r300_sampler_state* sampler, struct r300_texture* tex, unsigned offset) { uint32_t filter0 = sampler->filter0; + uint32_t format0 = tex->state.format0; + unsigned min_level, max_level; CS_LOCALS(r300); /* to emulate 1D textures through 2D ones correctly */ @@ -569,13 +601,20 @@ void r300_emit_texture(struct r300_context* r300, filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } + /* determine min/max levels */ + /* the MAX_MIP level is the largest (finest) one */ + max_level = MIN2(sampler->max_lod, tex->tex.last_level); + min_level = MIN2(sampler->min_lod, max_level); + format0 |= R300_TX_NUM_LEVELS(max_level); + filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); + BEGIN_CS(16); OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 | (offset << 28)); OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1); OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color); - OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), tex->state.format0); + OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), format0); OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1); @@ -600,10 +639,10 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) for (i = 0; i < aos_count - 1; i += 2) { int buf_num1 = velem[i].vertex_buffer_index; int buf_num2 = velem[i+1].vertex_buffer_index; - assert(vbuf[buf_num1].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0); - assert(vbuf[buf_num2].stride % 4 == 0 && pf_get_size(velem[i+1].src_format) % 4 == 0); - OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num1].stride << 6) | - (pf_get_size(velem[i+1].src_format) << 14) | (vbuf[buf_num2].stride << 22)); + assert(vbuf[buf_num1].stride % 4 == 0 && pf_get_blocksize(velem[i].src_format) % 4 == 0); + assert(vbuf[buf_num2].stride % 4 == 0 && pf_get_blocksize(velem[i+1].src_format) % 4 == 0); + OUT_CS((pf_get_blocksize(velem[i].src_format) >> 2) | (vbuf[buf_num1].stride << 6) | + (pf_get_blocksize(velem[i+1].src_format) << 14) | (vbuf[buf_num2].stride << 22)); OUT_CS(vbuf[buf_num1].buffer_offset + velem[i].src_offset + offset * vbuf[buf_num1].stride); OUT_CS(vbuf[buf_num2].buffer_offset + velem[i+1].src_offset + @@ -611,8 +650,8 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) } if (aos_count & 1) { int buf_num = velem[i].vertex_buffer_index; - assert(vbuf[buf_num].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0); - OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num].stride << 6)); + assert(vbuf[buf_num].stride % 4 == 0 && pf_get_blocksize(velem[i].src_format) % 4 == 0); + OUT_CS((pf_get_blocksize(velem[i].src_format) >> 2) | (vbuf[buf_num].stride << 6)); OUT_CS(vbuf[buf_num].buffer_offset + velem[i].src_offset + offset * vbuf[buf_num].stride); } @@ -690,12 +729,22 @@ void r300_emit_vertex_format_state(struct r300_context* r300) END_CS; } + void r300_emit_vertex_program_code(struct r300_context* r300, struct r300_vertex_program_code* code) { int i; struct r300_screen* r300screen = r300_screen(r300->context.screen); unsigned instruction_count = code->length / 4; + + int vtx_mem_size = r300screen->caps->is_r500 ? 128 : 72; + int input_count = MAX2(util_bitcount(code->InputsRead), 1); + int output_count = MAX2(util_bitcount(code->OutputsWritten), 1); + int temp_count = MAX2(code->num_temporaries, 1); + int pvs_num_slots = MIN3(vtx_mem_size / input_count, + vtx_mem_size / output_count, 10); + int pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6); + CS_LOCALS(r300); if (!r300screen->caps->has_tcl) { @@ -708,8 +757,7 @@ void r300_emit_vertex_program_code(struct r300_context* r300, /* R300_VAP_PVS_CODE_CNTL_0 * R300_VAP_PVS_CONST_CNTL * R300_VAP_PVS_CODE_CNTL_1 - * See the r5xx docs for instructions on how to use these. - * XXX these could be optimized to select better values... */ + * See the r5xx docs for instructions on how to use these. */ OUT_CS_REG_SEQ(R300_VAP_PVS_CODE_CNTL_0, 3); OUT_CS(R300_PVS_FIRST_INST(0) | R300_PVS_XYZW_VALID_INST(instruction_count - 1) | @@ -722,10 +770,11 @@ void r300_emit_vertex_program_code(struct r300_context* r300, for (i = 0; i < code->length; i++) OUT_CS(code->body.d[i]); - OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(10) | - R300_PVS_NUM_CNTLRS(5) | + OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) | + R300_PVS_NUM_CNTLRS(pvs_num_controllers) | R300_PVS_NUM_FPUS(r300screen->caps->num_vert_fpus) | - R300_PVS_VF_MAX_VTX_NUM(12)); + R300_PVS_VF_MAX_VTX_NUM(12) | + (r300screen->caps->is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0)); END_CS; } @@ -790,13 +839,22 @@ void r300_emit_viewport_state(struct r300_context* r300, END_CS; } +void r300_emit_texture_count(struct r300_context* r300) +{ + CS_LOCALS(r300); + + BEGIN_CS(2); + OUT_CS_REG(R300_TX_ENABLE, (1 << r300->texture_count) - 1); + END_CS; + +} + void r300_flush_textures(struct r300_context* r300) { CS_LOCALS(r300); - BEGIN_CS(4); + BEGIN_CS(2); OUT_CS_REG(R300_TX_INVALTAGS, 0); - OUT_CS_REG(R300_TX_ENABLE, (1 << r300->texture_count) - 1); END_CS; } @@ -821,10 +879,17 @@ void r300_emit_dirty_state(struct r300_context* r300) return; } + /* Check size of CS. */ + /* Make sure we have at least 8*1024 spare dwords. */ + /* XXX It would be nice to know the number of dwords we really need to + * XXX emit. */ + if (!r300->winsys->check_cs(r300->winsys, 8*1024)) { + r300->context.flush(&r300->context, 0, NULL); + } + /* Clean out BOs. */ r300->winsys->reset_bos(r300->winsys); - /* XXX check size */ validate: /* Color buffers... */ for (i = 0; i < r300->framebuffer_state.nr_cbufs; i++) { @@ -950,6 +1015,8 @@ validate: /* Samplers and textures are tracked separately but emitted together. */ if (r300->dirty_state & (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { + r300_emit_texture_count(r300); + for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) { if (r300->dirty_state & ((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 7c83c5166d..3797d3d332 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -92,6 +92,8 @@ void r300_emit_vertex_shader(struct r300_context* r300, void r300_emit_viewport_state(struct r300_context* r300, struct r300_viewport_state* viewport); +void r300_emit_texture_count(struct r300_context* r300); + void r300_flush_textures(struct r300_context* r300); /* Emit all dirty state. */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 29ddc84c41..79b01bb4dc 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -1,6 +1,7 @@ /* * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> * Joakim Sindholt <opensource@zhasha.com> + * Copyright 2009 Marek Olšák <maraeo@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -31,6 +32,41 @@ #include "radeon_code.h" #include "radeon_compiler.h" +/* Convert info about FS input semantics to r300_shader_semantics. */ +static void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, + struct r300_shader_semantics* fs_inputs) +{ + int i; + unsigned index; + + r300_shader_semantics_reset(fs_inputs); + + for (i = 0; i < info->num_inputs; i++) { + index = info->input_semantic_index[i]; + + switch (info->input_semantic_name[i]) { + case TGSI_SEMANTIC_COLOR: + assert(index <= ATTR_COLOR_COUNT); + fs_inputs->color[index] = i; + break; + + case TGSI_SEMANTIC_GENERIC: + assert(index <= ATTR_GENERIC_COUNT); + fs_inputs->generic[index] = i; + break; + + case TGSI_SEMANTIC_FOG: + assert(index == 0); + fs_inputs->fog = i; + break; + + default: + assert(0); + } + } +} + + static void find_output_registers(struct r300_fragment_program_compiler * compiler, struct r300_fragment_shader * fs) { @@ -58,38 +94,24 @@ static void allocate_hardware_inputs( void (*allocate)(void * data, unsigned input, unsigned hwreg), void * mydata) { - struct tgsi_shader_info* info = &((struct r300_fragment_shader*)c->UserData)->info; - int total_colors = 0; - int colors = 0; - int total_generic = 0; - int generic = 0; - int i; - - for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - total_colors++; - break; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - total_generic++; - break; + struct r300_shader_semantics* inputs = + &((struct r300_fragment_shader*)c->UserData)->inputs; + int i, reg = 0; + + /* Allocate input registers. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (inputs->color[i] != ATTR_UNUSED) { + allocate(mydata, inputs->color[i], reg++); } } - - for(i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - allocate(mydata, i, colors); - colors++; - break; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - allocate(mydata, i, total_colors + generic); - generic++; - break; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (inputs->generic[i] != ATTR_UNUSED) { + allocate(mydata, inputs->generic[i], reg++); } } + if (inputs->fog != ATTR_UNUSED) { + allocate(mydata, inputs->fog, reg++); + } } void r300_translate_fragment_shader(struct r300_context* r300, @@ -98,6 +120,10 @@ void r300_translate_fragment_shader(struct r300_context* r300, struct r300_fragment_program_compiler compiler; struct tgsi_to_rc ttr; + /* Initialize. */ + r300_shader_read_fs_inputs(&fs->info, &fs->inputs); + + /* Setup the compiler. */ memset(&compiler, 0, sizeof(compiler)); rc_init(&compiler.Base); compiler.Base.Debug = DBG_ON(r300, DBG_FP); @@ -107,7 +133,7 @@ void r300_translate_fragment_shader(struct r300_context* r300, compiler.AllocateHwInputs = &allocate_hardware_inputs; compiler.UserData = fs; - /* TODO: Program compilation depends on texture compare modes, + /* XXX: Program compilation depends on texture compare modes, * which are sampler state. Therefore, programs need to be recompiled * depending on this state as in the classic Mesa driver. * @@ -133,6 +159,7 @@ void r300_translate_fragment_shader(struct r300_context* r300, /* XXX failover maybe? */ DBG(r300, DBG_FP, "r300: Error compiling fragment program: %s\n", compiler.Base.ErrorMsg); + assert(0); } /* And, finally... */ diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index e831c30301..630e2d0c8a 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -1,6 +1,7 @@ /* * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> * Joakim Sindholt <opensource@zhasha.com> + * Copyright 2009 Marek Olšák <maraeo@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,15 +26,16 @@ #define R300_FS_H #include "pipe/p_state.h" - #include "tgsi/tgsi_scan.h" - #include "radeon_code.h" +#include "r300_shader_semantics.h" struct r300_fragment_shader { /* Parent class */ struct pipe_shader_state state; + struct tgsi_shader_info info; + struct r300_shader_semantics inputs; /* Has this shader been translated yet? */ boolean translated; diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 8ca785cb58..85b1ea568a 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -661,20 +661,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_GB_SUPER_TILE_B (1 << 15) # define R300_GB_SUBPIXEL_1_12 (0 << 16) # define R300_GB_SUBPIXEL_1_16 (1 << 16) -# define GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17) -# define GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17) -# define GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17) -# define GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17) -# define GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19) -# define GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19) -# define GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20) -# define GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20) -# define GB_TILE_CONFIG_ALT_OFFSET (0 << 21) -# define GB_TILE_CONFIG_SUBPRECISION (0 << 22) -# define GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23) -# define GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23) -# define GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24) -# define GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24) +# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17) +# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17) +# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17) +# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17) +# define R300_GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19) +# define R300_GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19) +# define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20) +# define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20) +# define R300_GB_TILE_CONFIG_ALT_OFFSET (0 << 21) +# define R300_GB_TILE_CONFIG_SUBPRECISION (0 << 22) +# define R300_GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23) +# define R300_GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23) +# define R300_GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24) +# define R300_GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24) /* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */ #define R300_GB_FIFO_SIZE 0x4024 @@ -700,9 +700,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */ # define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24 -#define GB_Z_PEQ_CONFIG 0x4028 -# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0) -# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0) +#define R300_GB_Z_PEQ_CONFIG 0x4028 +# define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0) +# define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0) /* Specifies various polygon specific selects (fog, depth, perspective). */ #define R300_GB_SELECT 0x401c @@ -725,39 +725,39 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Specifies the graphics pipeline configuration for antialiasing. */ #define R300_GB_AA_CONFIG 0x4020 -# define GB_AA_CONFIG_AA_DISABLE (0 << 0) -# define GB_AA_CONFIG_AA_ENABLE (1 << 0) -# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1) -# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1) -# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1) -# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1) +# define R300_GB_AA_CONFIG_AA_DISABLE (0 << 0) +# define R300_GB_AA_CONFIG_AA_ENABLE (1 << 0) +# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1) +# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1) +# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1) +# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1) /* Selects which of 4 pipes are active. */ -#define GB_PIPE_SELECT 0x402c -# define GB_PIPE_SELECT_PIPE0_ID_SHIFT 0 -# define GB_PIPE_SELECT_PIPE1_ID_SHIFT 2 -# define GB_PIPE_SELECT_PIPE2_ID_SHIFT 4 -# define GB_PIPE_SELECT_PIPE3_ID_SHIFT 6 -# define GB_PIPE_SELECT_PIPE_MASK_SHIFT 8 -# define GB_PIPE_SELECT_MAX_PIPE 12 -# define GB_PIPE_SELECT_BAD_PIPES 14 -# define GB_PIPE_SELECT_CONFIG_PIPES 18 +#define R300_GB_PIPE_SELECT 0x402c +# define R300_GB_PIPE_SELECT_PIPE0_ID_SHIFT 0 +# define R300_GB_PIPE_SELECT_PIPE1_ID_SHIFT 2 +# define R300_GB_PIPE_SELECT_PIPE2_ID_SHIFT 4 +# define R300_GB_PIPE_SELECT_PIPE3_ID_SHIFT 6 +# define R300_GB_PIPE_SELECT_PIPE_MASK_SHIFT 8 +# define R300_GB_PIPE_SELECT_MAX_PIPE 12 +# define R300_GB_PIPE_SELECT_BAD_PIPES 14 +# define R300_GB_PIPE_SELECT_CONFIG_PIPES 18 /* Specifies the sizes of the various FIFO`s in the sc/rs. */ -#define GB_FIFO_SIZE1 0x4070 +#define R300_GB_FIFO_SIZE1 0x4070 /* High water mark for SC input fifo */ -# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0 -# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f /* High water mark for SC input fifo (B) */ -# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6 -# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0 /* High water mark for RS colors' fifo */ -# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12 -# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000 /* High water mark for RS textures' fifo */ -# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18 -# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000 /* This table specifies the source location and format for up to 16 texture * addresses (i[0]:i[15]) and four colors (c[0]:c[3]) @@ -1293,7 +1293,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R500_RS_INST_TEX_ID(x) ((x) << 0) #define R500_RS_INST_TEX_CN_WRITE (1 << 4) #define R500_RS_INST_TEX_ADDR_SHIFT 5 -# define R500_RS_INST_TEX_ADDR(x) ((x) << 0) +# define R500_RS_INST_TEX_ADDR(x) ((x) << 5) #define R500_RS_INST_COL_ID_SHIFT 12 # define R500_RS_INST_COL_ID(x) ((x) << 12) #define R500_RS_INST_COL_CN_NO_WRITE (0 << 16) @@ -1463,6 +1463,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13) # define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13) # define R300_TX_MIN_FILTER_MIP_MASK (3 << 13) +# define R300_TX_MAX_MIP_LEVEL_SHIFT 17 +# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 17) # define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) # define R300_TX_MAX_ANISO_2_TO_1 (1 << 21) # define R300_TX_MAX_ANISO_4_TO_1 (2 << 21) @@ -1471,6 +1473,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_MAX_ANISO_MASK (7 << 21) # define R300_TX_WRAP_S(x) ((x) << 0) # define R300_TX_WRAP_T(x) ((x) << 3) +# define R300_TX_MAX_MIP_LEVEL(x) ((x) << 17) #define R300_TX_FILTER1_0 0x4440 # define R300_CHROMA_KEY_MODE_DISABLE 0 @@ -1500,8 +1503,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_HEIGHTMASK_MASK (2047 << 11) # define R300_TX_DEPTHMASK_SHIFT 22 # define R300_TX_DEPTHMASK_MASK (0xf << 22) -# define R300_TX_MAX_MIP_LEVEL_SHIFT 26 -# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26) # define R300_TX_SIZE_PROJECTED (1 << 30) # define R300_TX_PITCH_EN (1 << 31) # define R300_TX_WIDTH(x) ((x) << 0) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 62e1456ed3..35b335df6a 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -70,6 +70,12 @@ uint32_t r300_translate_primitive(unsigned prim) } } +static boolean r300_nothing_to_draw(struct r300_context *r300) +{ + return r300->rs_state->rs.scissor && + r300->scissor_state->scissor.empty_area; +} + static void r300_emit_draw_arrays(struct r300_context *r300, unsigned mode, unsigned count) @@ -173,18 +179,21 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, return FALSE; } + if (count > 65535) { return FALSE; } + if (r300_nothing_to_draw(r300)) { + return TRUE; + } + r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { return FALSE; } - setup_vertex_attributes(r300); - setup_index_buffer(r300, indexBuffer, indexSize); r300_emit_dirty_state(r300); @@ -220,14 +229,16 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, return FALSE; } + if (r300_nothing_to_draw(r300)) { + return TRUE; + } + r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { return FALSE; } - setup_vertex_attributes(r300); - r300_emit_dirty_state(r300); r300_emit_aos(r300, start); @@ -255,6 +266,10 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, return FALSE; } + if (r300_nothing_to_draw(r300)) { + return TRUE; + } + for (i = 0; i < r300->vertex_buffer_count; i++) { void* buf = pipe_buffer_map(pipe->screen, r300->vertex_buffer[i].buffer, @@ -296,6 +311,10 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, return FALSE; } + if (r300_nothing_to_draw(r300)) { + return TRUE; + } + for (i = 0; i < r300->vertex_buffer_count; i++) { void* buf = pipe_buffer_map(pipe->screen, r300->vertex_buffer[i].buffer, diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 390b63007e..c0d9797020 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -27,7 +27,8 @@ #include "r300_context.h" #include "r300_screen.h" #include "r300_texture.h" -#include "r300_winsys.h" + +#include "radeon_winsys.h" /* Return the identifier behind whom the brave coders responsible for this * amalgamation of code, sweat, and duct tape, routinely obscure their names. @@ -311,14 +312,10 @@ r300_get_tex_transfer(struct pipe_screen *screen, trans = CALLOC_STRUCT(r300_transfer); if (trans) { pipe_texture_reference(&trans->transfer.texture, texture); - trans->transfer.format = texture->format; trans->transfer.x = x; trans->transfer.y = y; trans->transfer.width = w; trans->transfer.height = h; - trans->transfer.block = texture->block; - trans->transfer.nblocksx = texture->nblocksx[level]; - trans->transfer.nblocksy = texture->nblocksy[level]; trans->transfer.stride = r300_texture_get_stride(tex, level); trans->transfer.usage = usage; @@ -344,6 +341,7 @@ static void* r300_transfer_map(struct pipe_screen* screen, { struct r300_texture* tex = (struct r300_texture*)transfer->texture; char* map; + enum pipe_format format = tex->tex.format; map = pipe_buffer_map(screen, tex->buffer, pipe_transfer_buffer_flags(transfer)); @@ -353,8 +351,8 @@ static void* r300_transfer_map(struct pipe_screen* screen, } return map + r300_transfer(transfer)->offset + - transfer->y / transfer->block.height * transfer->stride + - transfer->x / transfer->block.width * transfer->block.size; + transfer->y / pf_get_blockheight(format) * transfer->stride + + transfer->x / pf_get_blockwidth(format) * pf_get_blocksize(format); } static void r300_transfer_unmap(struct pipe_screen* screen, @@ -372,7 +370,7 @@ static void r300_destroy_screen(struct pipe_screen* pscreen) FREE(r300screen); } -struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys) +struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) { struct r300_screen* r300screen = CALLOC_STRUCT(r300_screen); struct r300_capabilities* caps = CALLOC_STRUCT(r300_capabilities); @@ -380,14 +378,14 @@ struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys) if (!r300screen || !caps) return NULL; - caps->pci_id = r300_winsys->pci_id; - caps->num_frag_pipes = r300_winsys->gb_pipes; - caps->num_z_pipes = r300_winsys->z_pipes; + caps->pci_id = radeon_winsys->pci_id; + caps->num_frag_pipes = radeon_winsys->gb_pipes; + caps->num_z_pipes = radeon_winsys->z_pipes; r300_parse_chipset(caps); r300screen->caps = caps; - r300screen->screen.winsys = (struct pipe_winsys*)r300_winsys; + r300screen->screen.winsys = (struct pipe_winsys*)radeon_winsys; r300screen->screen.destroy = r300_destroy_screen; r300screen->screen.get_name = r300_get_name; r300screen->screen.get_vendor = r300_get_vendor; diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 41df31f670..2217988add 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -27,6 +27,8 @@ #include "r300_chipset.h" +struct radeon_winsys; + struct r300_screen { /* Parent class */ struct pipe_screen screen; @@ -56,6 +58,6 @@ r300_transfer(struct pipe_transfer* transfer) } /* Creates a new r300 screen. */ -struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys); +struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys); #endif /* R300_SCREEN_H */ diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h new file mode 100644 index 0000000000..85184e2cfd --- /dev/null +++ b/src/gallium/drivers/r300/r300_shader_semantics.h @@ -0,0 +1,64 @@ +/* + * Copyright 2009 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_SHADER_SEMANTICS_H +#define R300_SHADER_SEMANTICS_H + +#define ATTR_UNUSED (-1) +#define ATTR_COLOR_COUNT 2 +#define ATTR_GENERIC_COUNT 16 + +/* This structure contains information about what attributes are written by VS + * or read by FS. (but not both) It's much easier to work with than + * tgsi_shader_info. + * + * The variables contain indices to tgsi_shader_info semantics and those + * indices are nothing else than input/output register numbers. */ +struct r300_shader_semantics { + int pos; + int psize; + int color[ATTR_COLOR_COUNT]; + int bcolor[ATTR_COLOR_COUNT]; + int generic[ATTR_GENERIC_COUNT]; + int fog; +}; + +static INLINE void r300_shader_semantics_reset( + struct r300_shader_semantics* info) +{ + int i; + + info->pos = ATTR_UNUSED; + info->psize = ATTR_UNUSED; + info->fog = ATTR_UNUSED; + + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + info->color[i] = ATTR_UNUSED; + info->bcolor[i] = ATTR_UNUSED; + } + + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + info->generic[i] = ATTR_UNUSED; + } +} + +#endif diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index d1eced61db..8ef0b3b268 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -289,11 +289,37 @@ static void r300_set_edgeflags(struct pipe_context* pipe, /* XXX and even worse, I have no idea WTF the bitfield is */ } +static void r300_set_scissor_regs(const struct pipe_scissor_state* state, + struct r300_scissor_regs *scissor, + boolean is_r500) +{ + if (is_r500) { + scissor->top_left = + (state->minx << R300_SCISSORS_X_SHIFT) | + (state->miny << R300_SCISSORS_Y_SHIFT); + scissor->bottom_right = + ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) | + ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT); + } else { + /* Offset of 1440 in non-R500 chipsets. */ + scissor->top_left = + ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | + ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); + scissor->bottom_right = + (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | + (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); + } + + scissor->empty_area = state->minx >= state->maxx || + state->miny >= state->maxy; +} + static void r300_set_framebuffer_state(struct pipe_context* pipe, const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); + struct pipe_scissor_state scissor; if (r300->draw) { draw_flush(r300->draw); @@ -301,6 +327,16 @@ static void r300->framebuffer_state = *state; + scissor.minx = scissor.miny = 0; + scissor.maxx = state->width; + scissor.maxy = state->height; + r300_set_scissor_regs(&scissor, &r300->scissor_state->framebuffer, + r300_screen(r300->context.screen)->caps->is_r500); + + /* Don't rely on the order of states being set for the first time. */ + if (!r300->rs_state || !r300->rs_state->rs.scissor) { + r300->dirty_state |= R300_NEW_SCISSOR; + } r300->dirty_state |= R300_NEW_FRAMEBUFFERS; } @@ -523,6 +559,11 @@ static void* state->mag_img_filter, state->min_mip_filter); + /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */ + /* We must pass these to the emit function to clamp them properly. */ + sampler->min_lod = MAX2((unsigned)state->min_lod, 0); + sampler->max_lod = MAX2((unsigned)ceilf(state->max_lod), 0); + lod_bias = CLAMP((int)(state->lod_bias * 32), -(1 << 9), (1 << 9) - 1); sampler->filter1 |= lod_bias << R300_LOD_BIAS_SHIFT; @@ -561,6 +602,12 @@ static void r300_bind_sampler_states(struct pipe_context* pipe, r300->sampler_count = count; } +static void r300_lacks_vertex_textures(struct pipe_context* pipe, + unsigned count, + void** states) +{ +} + static void r300_delete_sampler_state(struct pipe_context* pipe, void* state) { FREE(state); @@ -571,6 +618,7 @@ static void r300_set_sampler_textures(struct pipe_context* pipe, struct pipe_texture** texture) { struct r300_context* r300 = r300_context(pipe); + boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500; int i; /* XXX magic num */ @@ -585,6 +633,13 @@ static void r300_set_sampler_textures(struct pipe_context* pipe, pipe_texture_reference((struct pipe_texture**)&r300->textures[i], texture[i]); r300->dirty_state |= (R300_NEW_TEXTURE << i); + + /* R300-specific - set the texrect factor in a fragment shader */ + if (!is_r500 && r300->textures[i]->is_npot) { + /* XXX It would be nice to re-emit just 1 constant, + * XXX not all of them */ + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; + } } } @@ -604,24 +659,13 @@ static void r300_set_scissor_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - if (r300_screen(r300->context.screen)->caps->is_r500) { - r300->scissor_state->scissor_top_left = - (state->minx << R300_SCISSORS_X_SHIFT) | - (state->miny << R300_SCISSORS_Y_SHIFT); - r300->scissor_state->scissor_bottom_right = - ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) | - ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT); - } else { - /* Offset of 1440 in non-R500 chipsets. */ - r300->scissor_state->scissor_top_left = - ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | - ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); - r300->scissor_state->scissor_bottom_right = - (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | - (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); - } + r300_set_scissor_regs(state, &r300->scissor_state->scissor, + r300_screen(r300->context.screen)->caps->is_r500); - r300->dirty_state |= R300_NEW_SCISSOR; + /* Don't rely on the order of states being set for the first time. */ + if (!r300->rs_state || r300->rs_state->rs.scissor) { + r300->dirty_state |= R300_NEW_SCISSOR; + } } static void r300_set_viewport_state(struct pipe_context* pipe, @@ -674,6 +718,8 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, draw_flush(r300->draw); draw_set_vertex_buffers(r300->draw, count, buffers); } + + r300->dirty_state |= R300_NEW_VERTEX_FORMAT; } static void r300_set_vertex_elements(struct pipe_context* pipe, @@ -706,9 +752,6 @@ static void* r300_create_vs_state(struct pipe_context* pipe, tgsi_scan_shader(shader->tokens, &vs->info); - /* Appease Draw. */ - vs->draw = draw_create_vertex_shader(r300->draw, shader); - return (void*)vs; } else { return draw_create_vertex_shader(r300->draw, shader); @@ -719,8 +762,6 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) { struct r300_context* r300 = r300_context(pipe); - draw_flush(r300->draw); - if (r300_screen(pipe->screen)->caps->has_tcl) { struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; @@ -731,10 +772,10 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) r300_translate_vertex_shader(r300, vs); } - draw_bind_vertex_shader(r300->draw, vs->draw); r300->vs = vs; r300->dirty_state |= R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS; } else { + draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, (struct draw_vertex_shader*)shader); } @@ -748,7 +789,6 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; rc_constants_destroy(&vs->code.constants); - draw_delete_vertex_shader(r300->draw, vs->draw); FREE((void*)vs->state.tokens); FREE(shader); } else { @@ -813,10 +853,11 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.delete_rasterizer_state = r300_delete_rs_state; r300->context.create_sampler_state = r300_create_sampler_state; - r300->context.bind_sampler_states = r300_bind_sampler_states; + r300->context.bind_fragment_sampler_states = r300_bind_sampler_states; + r300->context.bind_vertex_sampler_states = r300_lacks_vertex_textures; r300->context.delete_sampler_state = r300_delete_sampler_state; - r300->context.set_sampler_textures = r300_set_sampler_textures; + r300->context.set_fragment_sampler_textures = r300_set_sampler_textures; r300->context.set_scissor_state = r300_set_scissor_state; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 7166694edf..6af49888b9 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2009 Marek Olšák <maraeo@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -28,6 +29,7 @@ #include "r300_context.h" #include "r300_fs.h" #include "r300_screen.h" +#include "r300_shader_semantics.h" #include "r300_state_derived.h" #include "r300_state_inlines.h" #include "r300_vs.h" @@ -47,8 +49,8 @@ struct r300_shader_derived_value { unsigned r300_shader_key_hash(void* key) { struct r300_shader_key* shader_key = (struct r300_shader_key*)key; - unsigned vs = (unsigned)shader_key->vs; - unsigned fs = (unsigned)shader_key->fs; + unsigned vs = (intptr_t)shader_key->vs; + unsigned fs = (intptr_t)shader_key->fs; return (vs << 16) | (fs & 0xffff); } @@ -61,209 +63,143 @@ int r300_shader_key_compare(void* key1, void* key2) { (shader_key1->fs == shader_key2->fs); } -/* Set up the vs_tab and routes. */ -static void r300_vs_tab_routes(struct r300_context* r300, - struct r300_vertex_info* vformat) +static void r300_draw_emit_attrib(struct r300_context* r300, + enum attrib_emit emit, + enum interp_mode interp, + int index) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); - struct vertex_info* vinfo = &vformat->vinfo; - int* tab = vformat->vs_tab; - boolean pos = FALSE, psize = FALSE, fog = FALSE; - int i, texs = 0, cols = 0; - struct tgsi_shader_info* info; + struct tgsi_shader_info* info = &r300->vs->info; + int output; - if (r300screen->caps->has_tcl) { - /* Use vertex shader to determine required routes. */ - info = &r300->vs->info; + output = draw_find_vs_output(r300->draw, + info->output_semantic_name[index], + info->output_semantic_index[index]); + draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output); +} + +static void r300_draw_emit_all_attribs(struct r300_context* r300) +{ + struct r300_shader_semantics* vs_outputs = &r300->vs->outputs; + int i, gen_count; + + /* Position. */ + if (vs_outputs->pos != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->pos); } else { - /* Use fragment shader to determine required routes. */ - info = &r300->fs->info; + assert(0); } - assert(info->num_inputs <= 16); - - if (!r300screen->caps->has_tcl || !r300->rs_state->enable_vte) - { - for (i = 0; i < info->num_inputs; i++) { - switch (r300->vs->code.inputs[i]) { - case TGSI_SEMANTIC_POSITION: - pos = TRUE; - tab[i] = 0; - break; - case TGSI_SEMANTIC_COLOR: - tab[i] = 2 + cols; - cols++; - break; - case TGSI_SEMANTIC_PSIZE: - assert(psize == FALSE); - psize = TRUE; - tab[i] = 15; - break; - case TGSI_SEMANTIC_FOG: - assert(fog == FALSE); - fog = TRUE; - /* Fall through */ - case TGSI_SEMANTIC_GENERIC: - tab[i] = 6 + texs; - texs++; - break; - default: - debug_printf("r300: Unknown vertex input %d\n", - info->input_semantic_name[i]); - break; - } - } + /* Point size. */ + if (vs_outputs->psize != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS, + vs_outputs->psize); } - else - { - /* Just copy vert attribs over as-is. */ - for (i = 0; i < info->num_inputs; i++) { - tab[i] = i; - } - for (i = 0; i < info->num_outputs; i++) { - switch (info->output_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - pos = TRUE; - break; - case TGSI_SEMANTIC_COLOR: - cols++; - break; - case TGSI_SEMANTIC_PSIZE: - psize = TRUE; - break; - case TGSI_SEMANTIC_FOG: - fog = TRUE; - /* Fall through */ - case TGSI_SEMANTIC_GENERIC: - texs++; - break; - default: - debug_printf("r300: Unknown vertex output %d\n", - info->output_semantic_name[i]); - break; - } + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR, + vs_outputs->color[i]); } } - /* XXX magic */ - assert(texs <= 8); + /* XXX Back-face colors. */ - /* Do the actual vertex_info setup. - * - * vertex_info has four uints of hardware-specific data in it. - * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL - * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM - * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0 - * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */ - - vinfo->hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */ - - /* We need to add vertex position attribute only for SW TCL case, - * for HW TCL case it could be generated by vertex shader */ - if (!pos && !r300screen->caps->has_tcl) { - debug_printf("r300: Forcing vertex position attribute emit...\n"); - /* Make room for the position attribute - * at the beginning of the tab. */ - for (i = 15; i > 0; i--) { - tab[i] = tab[i-1]; + /* Texture coordinates. */ + gen_count = 0; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->generic[i]); + gen_count++; } - tab[0] = 0; } - /* Position. */ - if (r300->draw) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0)); + /* Fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->fog); + gen_count++; } - vinfo->hwfmt[1] |= R300_INPUT_CNTL_POS; - vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; - /* Point size. */ - if (psize) { - if (r300->draw) { - draw_emit_vertex_attr(vinfo, EMIT_1F_PSIZE, INTERP_POS, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_PSIZE, 0)); - } - vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; - } + /* XXX magic */ + assert(gen_count <= 8); +} - /* Colors. */ - for (i = 0; i < cols; i++) { - if (r300->draw) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_COLOR, i)); - } - vinfo->hwfmt[1] |= R300_INPUT_CNTL_COLOR; - vinfo->hwfmt[2] |= (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i); - } +/* Update the PSC tables. */ +static void r300_vertex_psc(struct r300_context* r300) +{ + struct r300_vertex_info *vformat = r300->vertex_info; + uint16_t type, swizzle; + enum pipe_format format; + unsigned i; - /* Init i right here, increment it if fog is enabled. - * This gets around a double-increment problem. */ - i = 0; + /* Vertex shaders have no semantics on their inputs, + * so PSC should just route stuff based on the vertex elements, + * and not on attrib information. */ + DBG(r300, DBG_DRAW, "r300: vs expects %d attribs, routing %d elements" + " in psc\n", + r300->vs->info.num_inputs, + r300->vertex_element_count); - /* Fog. This is a special-cased texcoord. */ - if (fog) { - i++; - if (r300->draw) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0)); - } - vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); - vinfo->hwfmt[3] |= (4 << (3 * i)); - } + for (i = 0; i < r300->vertex_element_count; i++) { + format = r300->vertex_element[i].src_format; + + type = r300_translate_vertex_data_type(format) | + (i << R300_DST_VEC_LOC_SHIFT); + swizzle = r300_translate_vertex_data_swizzle(format); - /* Texcoords. */ - for (; i < texs; i++) { - if (r300->draw) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i)); + if (i % 2) { + vformat->vap_prog_stream_cntl[i >> 1] |= type << 16; + vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; + } else { + vformat->vap_prog_stream_cntl[i >> 1] |= type; + vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; } - vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); - vinfo->hwfmt[3] |= (4 << (3 * i)); } - draw_compute_vertex_size(vinfo); + + assert(i <= 15); + + /* Set the last vector in the PSC. */ + if (i) { + i -= 1; + } + vformat->vap_prog_stream_cntl[i >> 1] |= + (R300_LAST_VEC << (i & 1 ? 16 : 0)); } -/* Update the PSC tables. */ -static void r300_vertex_psc(struct r300_context* r300, - struct r300_vertex_info* vformat) +/* Update the PSC tables for SW TCL, using Draw. */ +static void r300_swtcl_vertex_psc(struct r300_context* r300) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct r300_vertex_info *vformat = r300->vertex_info; struct vertex_info* vinfo = &vformat->vinfo; - int* tab = vformat->vs_tab; uint16_t type, swizzle; enum pipe_format format; unsigned i, attrib_count; + int* vs_output_tab = r300->vs->output_stream_loc_swtcl; - /* Vertex shaders have no semantics on their inputs, - * so PSC should just route stuff based on their info, - * and not on attrib information. */ - if (r300screen->caps->has_tcl) { - attrib_count = r300->vs->info.num_inputs; - DBG(r300, DBG_DRAW, "r300: routing %d attribs in psc for vs\n", - attrib_count); - } else { - attrib_count = vinfo->num_attribs; - DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); - for (i = 0; i < attrib_count; i++) { - DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," - " tab %d\n", vinfo->attrib[i].src_index, - vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, - tab[i]); - } + /* For each Draw attribute, route it to the fragment shader according + * to the vs_output_tab. */ + attrib_count = vinfo->num_attribs; + DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); + for (i = 0; i < attrib_count; i++) { + DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," + " vs_output_tab %d\n", vinfo->attrib[i].src_index, + vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, + vs_output_tab[i]); } for (i = 0; i < attrib_count; i++) { /* Make sure we have a proper destination for our attribute. */ - assert(tab[i] != -1); + assert(vs_output_tab[i] != -1); format = draw_translate_vinfo_format(vinfo->attrib[i].emit); /* Obtain the type of data in this attribute. */ type = r300_translate_vertex_data_type(format) | - tab[i] << R300_DST_VEC_LOC_SHIFT; + vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT; /* Obtain the swizzle for this attribute. Note that the default * swizzle in the hardware is not XYZW! */ @@ -272,12 +208,10 @@ static void r300_vertex_psc(struct r300_context* r300, /* Add the attribute to the PSC table. */ if (i & 1) { vformat->vap_prog_stream_cntl[i >> 1] |= type << 16; - vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; } else { - vformat->vap_prog_stream_cntl[i >> 1] |= type << 0; - - vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 0; + vformat->vap_prog_stream_cntl[i >> 1] |= type; + vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; } } @@ -289,185 +223,200 @@ static void r300_vertex_psc(struct r300_context* r300, (R300_LAST_VEC << (i & 1 ? 16 : 0)); } -/* Set up the mappings from GB to US, for RS block. */ -static void r300_update_fs_tab(struct r300_context* r300, - struct r300_vertex_info* vformat) +static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr, + boolean swizzle_0001) +{ + rs->ip[id] |= R300_RS_COL_PTR(ptr); + if (swizzle_0001) { + rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001); + } else { + rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); + } + rs->inst[id] |= R300_RS_INST_COL_ID(id); +} + +static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset) { - struct tgsi_shader_info* info = &r300->fs->info; - int i, cols = 0, texs = 0, cols_emitted = 0; - int* tab = vformat->fs_tab; + rs->inst[id] |= R300_RS_INST_COL_CN_WRITE | + R300_RS_INST_COL_ADDR(fp_offset); +} - for (i = 0; i < 16; i++) { - tab[i] = -1; +static void r300_rs_tex(struct r300_rs_block* rs, int id, int ptr, + boolean swizzle_X001) +{ + if (swizzle_X001) { + rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + R300_RS_SEL_S(R300_RS_SEL_C0) | + R300_RS_SEL_T(R300_RS_SEL_K0) | + R300_RS_SEL_R(R300_RS_SEL_K0) | + R300_RS_SEL_Q(R300_RS_SEL_K1); + } else { + rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + R300_RS_SEL_S(R300_RS_SEL_C0) | + R300_RS_SEL_T(R300_RS_SEL_C1) | + R300_RS_SEL_R(R300_RS_SEL_C2) | + R300_RS_SEL_Q(R300_RS_SEL_C3); } + rs->inst[id] |= R300_RS_INST_TEX_ID(id); +} - assert(info->num_inputs <= 16); - for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - tab[i] = INTERP_LINEAR; - cols++; - break; - case TGSI_SEMANTIC_POSITION: - case TGSI_SEMANTIC_PSIZE: - debug_printf("r300: Implementation error: Can't use " - "pos attribs in fragshader yet!\n"); - /* Pass through for now */ - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - tab[i] = INTERP_PERSPECTIVE; - break; - default: - debug_printf("r300: Unknown vertex input %d\n", - info->input_semantic_name[i]); - break; - } +static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) +{ + rs->inst[id] |= R300_RS_INST_TEX_CN_WRITE | + R300_RS_INST_TEX_ADDR(fp_offset); +} + +static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr, + boolean swizzle_0001) +{ + rs->ip[id] |= R500_RS_COL_PTR(ptr); + if (swizzle_0001) { + rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + } else { + rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); } + rs->inst[id] |= R500_RS_INST_COL_ID(id); +} - /* Now that we know where everything is... */ - DBG(r300, DBG_DRAW, "r300: fp input count: %d\n", info->num_inputs); - for (i = 0; i < info->num_inputs; i++) { - switch (tab[i]) { - case INTERP_LINEAR: - DBG(r300, DBG_DRAW, "r300: attrib: " - "stack offset %d, color, tab %d\n", - i, cols_emitted); - tab[i] = cols_emitted; - cols_emitted++; - break; - case INTERP_PERSPECTIVE: - DBG(r300, DBG_DRAW, "r300: attrib: " - "stack offset %d, texcoord, tab %d\n", - i, cols + texs); - tab[i] = cols + texs; - texs++; - break; - case -1: - debug_printf("r300: Implementation error: Bad fp interp!\n"); - default: - break; - } +static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset) +{ + rs->inst[id] |= R500_RS_INST_COL_CN_WRITE | + R500_RS_INST_COL_ADDR(fp_offset); +} + +static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr, + boolean swizzle_X001) +{ + int rs_tex_comp = ptr*4; + + if (swizzle_X001) { + rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | + R500_RS_SEL_T(R500_RS_IP_PTR_K0) | + R500_RS_SEL_R(R500_RS_IP_PTR_K0) | + R500_RS_SEL_Q(R500_RS_IP_PTR_K1); + } else { + rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | + R500_RS_SEL_T(rs_tex_comp + 1) | + R500_RS_SEL_R(rs_tex_comp + 2) | + R500_RS_SEL_Q(rs_tex_comp + 3); } + rs->inst[id] |= R500_RS_INST_TEX_ID(id); +} +static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) +{ + rs->inst[id] |= R500_RS_INST_TEX_CN_WRITE | + R500_RS_INST_TEX_ADDR(fp_offset); } -/* Set up the RS block. This is the part of the chipset that actually does - * the rasterization of vertices into fragments. This is also the part of the - * chipset that locks up if any part of it is even slightly wrong. */ +/* Set up the RS block. + * + * This is the part of the chipset that actually does the rasterization + * of vertices into fragments. This is also the part of the chipset that + * locks up if any part of it is even slightly wrong. */ static void r300_update_rs_block(struct r300_context* r300, - struct r300_rs_block* rs) + struct r300_shader_semantics* vs_outputs, + struct r300_shader_semantics* fs_inputs) { - struct tgsi_shader_info* info = &r300->fs->info; - int col_count = 0, fp_offset = 0, i, tex_count = 0; - int rs_tex_comp = 0; + struct r300_rs_block* rs = r300->rs_block; + int i, col_count = 0, tex_count = 0, fp_offset = 0; + void (*rX00_rs_col)(struct r300_rs_block*, int, int, boolean); + void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); + void (*rX00_rs_tex)(struct r300_rs_block*, int, int, boolean); + void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); if (r300_screen(r300->context.screen)->caps->is_r500) { - for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - rs->ip[col_count] |= - R500_RS_COL_PTR(col_count) | - R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); - col_count++; - break; - case TGSI_SEMANTIC_GENERIC: - rs->ip[tex_count] |= - R500_RS_SEL_S(rs_tex_comp) | - R500_RS_SEL_T(rs_tex_comp + 1) | - R500_RS_SEL_R(rs_tex_comp + 2) | - R500_RS_SEL_Q(rs_tex_comp + 3); - tex_count++; - rs_tex_comp += 4; - break; - default: - break; - } - } + rX00_rs_col = r500_rs_col; + rX00_rs_col_write = r500_rs_col_write; + rX00_rs_tex = r500_rs_tex; + rX00_rs_tex_write = r500_rs_tex_write; + } else { + rX00_rs_col = r300_rs_col; + rX00_rs_col_write = r300_rs_col_write; + rX00_rs_tex = r300_rs_tex; + rX00_rs_tex_write = r300_rs_tex_write; + } - /* Rasterize at least one color, or bad things happen. */ - if ((col_count == 0) && (tex_count == 0)) { - rs->ip[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + /* Rasterize colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + /* Always rasterize if it's written by the VS, + * otherwise it locks up. */ + rX00_rs_col(rs, col_count, i, FALSE); + + /* Write it to the FS input register if it's used by the FS. */ + if (fs_inputs->color[i] != ATTR_UNUSED) { + rX00_rs_col_write(rs, col_count, fp_offset); + fp_offset++; + } col_count++; - } - - for (i = 0; i < tex_count; i++) { - rs->inst[i] |= R500_RS_INST_TEX_ID(i) | - R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_offset); - fp_offset++; - } - - for (i = 0; i < col_count; i++) { - rs->inst[i] |= R500_RS_INST_COL_ID(i) | - R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_offset); - fp_offset++; - } - } else { - for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - rs->ip[col_count] |= - R300_RS_COL_PTR(col_count) | - R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); - col_count++; - break; - case TGSI_SEMANTIC_GENERIC: - rs->ip[tex_count] |= - R300_RS_TEX_PTR(rs_tex_comp) | - R300_RS_SEL_S(R300_RS_SEL_C0) | - R300_RS_SEL_T(R300_RS_SEL_C1) | - R300_RS_SEL_R(R300_RS_SEL_C2) | - R300_RS_SEL_Q(R300_RS_SEL_C3); - tex_count++; - rs_tex_comp+=4; - break; - default: - break; + } else { + /* Skip the FS input register, leave it uninitialized. */ + /* If we try to set it to (0,0,0,1), it will lock up. */ + if (fs_inputs->color[i] != ATTR_UNUSED) { + fp_offset++; } } + } - if (col_count == 0) { - rs->ip[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001); - } - - if (tex_count == 0) { - rs->ip[0] |= - R300_RS_SEL_S(R300_RS_SEL_K0) | - R300_RS_SEL_T(R300_RS_SEL_K0) | - R300_RS_SEL_R(R300_RS_SEL_K0) | - R300_RS_SEL_Q(R300_RS_SEL_K1); + /* Rasterize texture coordinates. */ + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { + /* Always rasterize if it's written by the VS, + * otherwise it locks up. */ + rX00_rs_tex(rs, tex_count, tex_count, FALSE); + + /* Write it to the FS input register if it's used by the FS. */ + if (fs_inputs->generic[i] != ATTR_UNUSED) { + rX00_rs_tex_write(rs, tex_count, fp_offset); + fp_offset++; + } + tex_count++; + } else { + /* Skip the FS input register, leave it uninitialized. */ + /* If we try to set it to (0,0,0,1), it will lock up. */ + if (fs_inputs->generic[i] != ATTR_UNUSED) { + fp_offset++; + } } + } - /* Rasterize at least one color, or bad things happen. */ - if ((col_count == 0) && (tex_count == 0)) { - col_count++; - } + /* Rasterize fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + /* Always rasterize if it's written by the VS, + * otherwise it locks up. */ + rX00_rs_tex(rs, tex_count, tex_count, TRUE); - for (i = 0; i < tex_count; i++) { - rs->inst[i] |= R300_RS_INST_TEX_ID(i) | - R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_offset); + /* Write it to the FS input register if it's used by the FS. */ + if (fs_inputs->fog != ATTR_UNUSED) { + rX00_rs_tex_write(rs, tex_count, fp_offset); fp_offset++; } - - for (i = 0; i < col_count; i++) { - rs->inst[i] |= R300_RS_INST_COL_ID(i) | - R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_offset); + tex_count++; + } else { + /* Skip the FS input register, leave it uninitialized. */ + /* If we try to set it to (0,0,0,1), it will lock up. */ + if (fs_inputs->fog != ATTR_UNUSED) { fp_offset++; } } - rs->count = (rs_tex_comp) | (col_count << R300_IC_COUNT_SHIFT) | + /* Rasterize at least one color, or bad things happen. */ + if (col_count == 0 && tex_count == 0) { + rX00_rs_col(rs, 0, 0, TRUE); + col_count++; + } + + rs->count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; - rs->inst_count = MAX2(MAX2(col_count - 1, tex_count - 1), 0); + rs->inst_count = MAX3(col_count - 1, tex_count - 1, 0); } /* Update the vertex format. */ static void r300_update_derived_shader_state(struct r300_context* r300) { struct r300_screen* r300screen = r300_screen(r300->context.screen); - struct r300_vertex_info* vformat; - struct r300_rs_block* rs_block; - int i; /* struct r300_shader_key* key; @@ -495,27 +444,45 @@ static void r300_update_derived_shader_state(struct r300_context* r300) (void*)key, (void*)value); } */ - /* XXX This will be refactored ASAP. */ - vformat = CALLOC_STRUCT(r300_vertex_info); - rs_block = CALLOC_STRUCT(r300_rs_block); + /* Reset structures */ + memset(r300->rs_block, 0, sizeof(struct r300_rs_block)); + memset(r300->vertex_info, 0, sizeof(struct r300_vertex_info)); + memcpy(r300->vertex_info->vinfo.hwfmt, r300->vs->hwfmt, sizeof(uint)*4); - for (i = 0; i < 16; i++) { - vformat->vs_tab[i] = -1; - vformat->fs_tab[i] = -1; + r300_update_rs_block(r300, &r300->vs->outputs, &r300->fs->inputs); + + if (r300screen->caps->has_tcl) { + r300_vertex_psc(r300); + } else { + r300_draw_emit_all_attribs(r300); + draw_compute_vertex_size(&r300->vertex_info->vinfo); + r300_swtcl_vertex_psc(r300); } - r300_vs_tab_routes(r300, vformat); - r300_vertex_psc(r300, vformat); - r300_update_fs_tab(r300, vformat); + r300->dirty_state |= R300_NEW_RS_BLOCK; +} - r300_update_rs_block(r300, rs_block); +static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) +{ + /* We are interested only in the cases when a new depth or stencil value + * can be written and changed. */ + + /* We might optionally check for [Z func: never] and inspect the stencil + * state in a similar fashion, but it's not terribly important. */ + return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) || + (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) || + ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) && + (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK)); +} - FREE(r300->vertex_info); - FREE(r300->rs_block); +static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) +{ + /* We are interested only in the cases when alpha testing can kill + * a fragment. */ + uint32_t af = dsa->alpha_function; - r300->vertex_info = vformat; - r300->rs_block = rs_block; - r300->dirty_state |= (R300_NEW_VERTEX_FORMAT | R300_NEW_RS_BLOCK); + return (af & R300_FG_ALPHA_FUNC_ENABLE) && + (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS; } static void r300_update_ztop(struct r300_context* r300) @@ -534,28 +501,34 @@ static void r300_update_ztop(struct r300_context* r300) * The docs claim that for the first three cases, if no ZS writes happen, * then ZTOP can be used. * + * (3) will never apply since we do not support chroma-keyed operations. + * (4) will need to be re-examined (and this comment updated) if/when + * Hyper-Z becomes supported. + * * Additionally, the following conditions require disabled ZTOP: - * ~) Depth writes in fragment shader - * ~) Outstanding occlusion queries + * 5) Depth writes in fragment shader + * 6) Outstanding occlusion queries * * ~C. */ - if (r300->dsa_state->alpha_function) { - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300->fs->info.uses_kill) { + + /* ZS writes */ + if (r300_dsa_writes_depth_stencil(r300->dsa_state) && + (r300_dsa_alpha_test_enabled(r300->dsa_state) || /* (1) */ + r300->fs->info.uses_kill)) { /* (2) */ r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300_fragment_shader_writes_depth(r300->fs)) { + } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */ r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300->query_current) { + } else if (r300->query_current) { /* (6) */ r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; } } void r300_update_derived_state(struct r300_context* r300) { - /* XXX */ - if (TRUE || r300->dirty_state & - (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) { + if (r300->dirty_state & + (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER | + R300_NEW_VERTEX_FORMAT)) { r300_update_derived_shader_state(r300); } diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index c07e6ae676..46d1cb39b5 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -84,7 +84,7 @@ void r300_emit_invariant_state(struct r300_context* r300) END_CS; /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(60 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0)); + BEGIN_CS(56 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0)); /* Flush PVS. */ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); @@ -135,8 +135,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000); - OUT_CS_REG(R300_VAP_VTX_STATE_CNTL, 0x1); - OUT_CS_REG(R300_VAP_VSM_VTX_ASSM, 0x405); OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F); /* XXX */ diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index aea25cf71d..63fc6a235a 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -34,8 +34,8 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) struct r300_texture_state* state = &tex->state; struct pipe_texture *pt = &tex->tex; - state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) | - R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff); + state->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) | + R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff); if (tex->is_npot) { /* rectangles love this */ @@ -43,8 +43,7 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) state->format2 = (tex->pitch[0] - 1) & 0x1fff; } else { /* power of two textures (3D, mipmaps, and no pitch) */ - state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) | - R300_TX_NUM_LEVELS(pt->last_level & 0xf); + state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf); } state->format1 = r300_translate_texformat(pt->format); @@ -58,17 +57,17 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) /* large textures on r500 */ if (is_r500) { - if (pt->width[0] > 2048) { + if (pt->width0 > 2048) { state->format2 |= R500_TXWIDTH_BIT11; } - if (pt->height[0] > 2048) { + if (pt->height0 > 2048) { state->format2 |= R500_TXHEIGHT_BIT11; } } - assert(is_r500 || (pt->width[0] <= 2048 && pt->height[0] <= 2048)); + assert(is_r500 || (pt->width0 <= 2048 && pt->height0 <= 2048)); debug_printf("r300: Set texture state (%dx%d, %d levels)\n", - pt->width[0], pt->height[0], pt->last_level); + pt->width0, pt->height0, pt->last_level); } unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, @@ -106,7 +105,7 @@ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) return 0; } - return align(pf_get_stride(&tex->tex.block, tex->tex.width[level]), 32); + return align(pf_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32); } static void r300_setup_miptree(struct r300_texture* tex) @@ -116,39 +115,32 @@ static void r300_setup_miptree(struct r300_texture* tex) int i; for (i = 0; i <= base->last_level; i++) { - if (i > 0) { - base->width[i] = minify(base->width[i-1]); - base->height[i] = minify(base->height[i-1]); - base->depth[i] = minify(base->depth[i-1]); - } - - base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]); - base->nblocksy[i] = pf_get_nblocksy(&base->block, base->height[i]); + unsigned nblocksy = pf_get_nblocksy(base->format, u_minify(base->height0, i)); stride = r300_texture_get_stride(tex, i); - layer_size = stride * base->nblocksy[i]; + layer_size = stride * nblocksy; if (base->target == PIPE_TEXTURE_CUBE) size = layer_size * 6; else - size = layer_size * base->depth[i]; + size = layer_size * u_minify(base->depth0, i); tex->offset[i] = align(tex->size, 32); tex->size = tex->offset[i] + size; tex->layer_size[i] = layer_size; - tex->pitch[i] = stride / base->block.size; + tex->pitch[i] = stride / pf_get_blocksize(base->format); debug_printf("r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes)\n", - i, base->width[i], base->height[i], base->depth[i], - stride); + i, u_minify(base->width0, i), u_minify(base->height0, i), + u_minify(base->depth0, i), stride); } } static void r300_setup_flags(struct r300_texture* tex) { - tex->is_npot = !util_is_power_of_two(tex->tex.width[0]) || - !util_is_power_of_two(tex->tex.height[0]); + tex->is_npot = !util_is_power_of_two(tex->tex.width0) || + !util_is_power_of_two(tex->tex.height0); } /* Create a new texture. */ @@ -208,8 +200,8 @@ static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, pipe_reference_init(&surface->reference, 1); pipe_texture_reference(&surface->texture, texture); surface->format = texture->format; - surface->width = texture->width[level]; - surface->height = texture->height[level]; + surface->width = u_minify(texture->width0, level); + surface->height = u_minify(texture->height0, level); surface->offset = offset; surface->usage = flags; surface->zslice = zslice; @@ -237,7 +229,7 @@ static struct pipe_texture* /* Support only 2D textures without mipmaps */ if (base->target != PIPE_TEXTURE_2D || - base->depth[0] != 1 || + base->depth0 != 1 || base->last_level != 0) { return NULL; } @@ -252,7 +244,7 @@ static struct pipe_texture* tex->tex.screen = screen; tex->stride_override = *stride; - tex->pitch[0] = *stride / base->block.size; + tex->pitch[0] = *stride / pf_get_blocksize(base->format); r300_setup_flags(tex); r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); @@ -287,10 +279,9 @@ r300_video_surface_create(struct pipe_screen *screen, template.target = PIPE_TEXTURE_2D; template.format = PIPE_FORMAT_X8R8G8B8_UNORM; template.last_level = 0; - template.width[0] = util_next_power_of_two(width); - template.height[0] = util_next_power_of_two(height); - template.depth[0] = 1; - pf_get_block(template.format, &template.block); + template.width0 = util_next_power_of_two(width); + template.height0 = util_next_power_of_two(height); + template.depth0 = 1; template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 589f1984ee..9fb2de2403 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -190,10 +190,10 @@ static void transform_dstreg( struct rc_dst_register * dst, struct tgsi_full_dst_register * src) { - dst->File = translate_register_file(src->DstRegister.File); - dst->Index = translate_register_index(ttr, src->DstRegister.File, src->DstRegister.Index); - dst->WriteMask = src->DstRegister.WriteMask; - dst->RelAddr = src->DstRegister.Indirect; + dst->File = translate_register_file(src->Register.File); + dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); + dst->WriteMask = src->Register.WriteMask; + dst->RelAddr = src->Register.Indirect; } static void transform_srcreg( @@ -201,18 +201,18 @@ static void transform_srcreg( struct rc_src_register * dst, struct tgsi_full_src_register * src) { - dst->File = translate_register_file(src->SrcRegister.File); - dst->Index = translate_register_index(ttr, src->SrcRegister.File, src->SrcRegister.Index); - dst->RelAddr = src->SrcRegister.Indirect; + dst->File = translate_register_file(src->Register.File); + dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); + dst->RelAddr = src->Register.Indirect; dst->Swizzle = tgsi_util_get_full_src_register_swizzle(src, 0); dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 1) << 3; dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 2) << 6; dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9; - dst->Abs = src->SrcRegisterExtMod.Absolute; - dst->Negate = src->SrcRegister.Negate ? RC_MASK_XYZW : 0; + dst->Abs = src->Register.Absolute; + dst->Negate = src->Register.Negate ? RC_MASK_XYZW : 0; } -static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_ext_texture src) +static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src) { switch(src.Texture) { case TGSI_TEXTURE_1D: @@ -258,17 +258,18 @@ static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_inst dst->U.I.SaturateMode = translate_saturate(src->Instruction.Saturate); if (src->Instruction.NumDstRegs) - transform_dstreg(ttr, &dst->U.I.DstReg, &src->FullDstRegisters[0]); + transform_dstreg(ttr, &dst->U.I.DstReg, &src->Dst[0]); for(i = 0; i < src->Instruction.NumSrcRegs; ++i) { - if (src->FullSrcRegisters[i].SrcRegister.File == TGSI_FILE_SAMPLER) - dst->U.I.TexSrcUnit = src->FullSrcRegisters[i].SrcRegister.Index; + if (src->Src[i].Register.File == TGSI_FILE_SAMPLER) + dst->U.I.TexSrcUnit = src->Src[i].Register.Index; else - transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->FullSrcRegisters[i]); + transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->Src[i]); } /* Texturing. */ - transform_texture(dst, src->InstructionExtTexture); + if (src->Instruction.Texture) + transform_texture(dst, src->Texture); } static void handle_immediate(struct tgsi_to_rc * ttr, struct tgsi_full_immediate * imm) diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c index a6a159667a..d8610dadfa 100644 --- a/src/gallium/drivers/r300/r300_vbo.c +++ b/src/gallium/drivers/r300/r300_vbo.c @@ -32,53 +32,8 @@ #include "r300_context.h" #include "r300_state_inlines.h" #include "r300_reg.h" -#include "r300_winsys.h" -static INLINE void setup_vertex_attribute(struct r300_vertex_info *vinfo, - struct pipe_vertex_element *vert_elem, - unsigned attr_num) -{ - uint16_t hw_fmt1, hw_fmt2; - - hw_fmt1 = r300_translate_vertex_data_type(vert_elem->src_format) | - (attr_num << R300_DST_VEC_LOC_SHIFT); - hw_fmt2 = r300_translate_vertex_data_swizzle(vert_elem->src_format); - - if (attr_num % 2 == 0) - { - vinfo->vap_prog_stream_cntl[attr_num >> 1] = hw_fmt1; - vinfo->vap_prog_stream_cntl_ext[attr_num >> 1] = hw_fmt2; - } - else - { - vinfo->vap_prog_stream_cntl[attr_num >> 1] |= hw_fmt1 << 16; - vinfo->vap_prog_stream_cntl_ext[attr_num >> 1] |= hw_fmt2 << 16; - } -} - -static void finish_vertex_attribs_setup(struct r300_vertex_info *vinfo, - unsigned attribs_num) -{ - uint32_t last_vec_bit = (attribs_num % 2 == 0) ? - (R300_LAST_VEC << 16) : R300_LAST_VEC; - - assert(attribs_num > 0 && attribs_num <= 16); - vinfo->vap_prog_stream_cntl[(attribs_num - 1) >> 1] |= last_vec_bit; -} - -void setup_vertex_attributes(struct r300_context *r300) -{ - struct pipe_vertex_element *vert_elem; - int i; - - for (i = 0; i < r300->vertex_element_count; i++) { - vert_elem = &r300->vertex_element[i]; - setup_vertex_attribute(r300->vertex_info, vert_elem, i); - } - - finish_vertex_attribs_setup(r300->vertex_info, - r300->vertex_element_count); -} +#include "radeon_winsys.h" static INLINE int get_buffer_offset(struct r300_context *r300, unsigned int buf_nr, diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 74ef416dc1..31248346bc 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -1,5 +1,6 @@ /* * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2009 Marek Olšák <maraeo@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -23,89 +24,226 @@ #include "r300_vs.h" #include "r300_context.h" +#include "r300_screen.h" #include "r300_tgsi_to_rc.h" +#include "r300_reg.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_parse.h" #include "radeon_compiler.h" - -static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) +/* Convert info about VS output semantics into r300_shader_semantics. */ +static void r300_shader_read_vs_outputs( + struct tgsi_shader_info* info, + struct r300_shader_semantics* vs_outputs) { - struct r300_vertex_shader * vs = c->UserData; - struct tgsi_shader_info* info = &vs->info; - struct tgsi_parse_context parser; - struct tgsi_full_declaration * decl; - boolean pointsize = FALSE; - int out_colors = 0; - int colors = 0; - int out_generic = 0; - int generic = 0; int i; + unsigned index; - /* Fill in the input mapping */ - for (i = 0; i < info->num_inputs; i++) - c->code->inputs[i] = i; + r300_shader_semantics_reset(vs_outputs); - /* Fill in the output mapping */ for (i = 0; i < info->num_outputs; i++) { + index = info->output_semantic_index[i]; + switch (info->output_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + assert(index == 0); + vs_outputs->pos = i; + break; + case TGSI_SEMANTIC_PSIZE: - pointsize = TRUE; + assert(index == 0); + vs_outputs->psize = i; break; + case TGSI_SEMANTIC_COLOR: - out_colors++; + assert(index <= ATTR_COLOR_COUNT); + vs_outputs->color[index] = i; break; - case TGSI_SEMANTIC_FOG: + + case TGSI_SEMANTIC_BCOLOR: + assert(index <= ATTR_COLOR_COUNT); + vs_outputs->bcolor[index] = i; + break; + case TGSI_SEMANTIC_GENERIC: - out_generic++; + assert(index <= ATTR_GENERIC_COUNT); + vs_outputs->generic[index] = i; break; + + case TGSI_SEMANTIC_FOG: + assert(index == 0); + vs_outputs->fog = i; + break; + + default: + assert(0); } } +} - tgsi_parse_init(&parser, vs->state.tokens); +static void r300_shader_vap_output_fmt( + struct r300_shader_semantics* vs_outputs, + uint* hwfmt) +{ + int i, gen_count; - while (!tgsi_parse_end_of_tokens(&parser)) { - tgsi_parse_token(&parser); + /* Do the actual vertex_info setup. + * + * vertex_info has four uints of hardware-specific data in it. + * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL + * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM + * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0 + * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */ - if (parser.FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION) - continue; + hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */ - decl = &parser.FullToken.FullDeclaration; + /* Position. */ + if (vs_outputs->pos != ATTR_UNUSED) { + hwfmt[1] |= R300_INPUT_CNTL_POS; + hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + } else { + assert(0); + } - if (decl->Declaration.File != TGSI_FILE_OUTPUT) - continue; + /* Point size. */ + if (vs_outputs->psize != ATTR_UNUSED) { + hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + } - switch (decl->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - c->code->outputs[decl->DeclarationRange.First] = 0; - break; - case TGSI_SEMANTIC_PSIZE: - c->code->outputs[decl->DeclarationRange.First] = 1; - break; - case TGSI_SEMANTIC_COLOR: - c->code->outputs[decl->DeclarationRange.First] = 1 + - (pointsize ? 1 : 0) + - colors++; - break; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - c->code->outputs[decl->DeclarationRange.First] = 1 + - (pointsize ? 1 : 0) + - out_colors + - generic++; - break; - default: - debug_printf("r300: vs: Bad semantic declaration %d\n", - decl->Semantic.SemanticName); - break; + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + hwfmt[1] |= R300_INPUT_CNTL_COLOR; + hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; + } + } + + /* XXX Back-face colors. */ + + /* Texture coordinates. */ + gen_count = 0; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { + hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count); + hwfmt[3] |= (4 << (3 * gen_count)); + gen_count++; + } + } + + /* Fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count); + hwfmt[3] |= (4 << (3 * gen_count)); + gen_count++; + } + + /* XXX magic */ + assert(gen_count <= 8); +} + +/* Set VS output stream locations for SWTCL. */ +static void r300_stream_locations_swtcl( + struct r300_shader_semantics* vs_outputs, + int* output_stream_loc) +{ + int i, tabi = 0, gen_count; + + /* XXX Check whether the numbers (0, 1, 2+i, etc.) are correct. + * These should go to VAP_PROG_STREAM_CNTL/DST_VEC_LOC. */ + + /* Position. */ + output_stream_loc[tabi++] = 0; + + /* Point size. */ + if (vs_outputs->psize != ATTR_UNUSED) { + output_stream_loc[tabi++] = 1; + } + + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + output_stream_loc[tabi++] = 2 + i; + } + } + + /* Back-face colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + output_stream_loc[tabi++] = 4 + i; + } + } + + /* Texture coordinates. */ + gen_count = 0; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + assert(tabi < 16); + output_stream_loc[tabi++] = 6 + gen_count; + gen_count++; } } - tgsi_parse_free(&parser); + /* Fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + assert(tabi < 16); + output_stream_loc[tabi++] = 6 + gen_count; + gen_count++; + } + + /* XXX magic */ + assert(gen_count <= 8); + + for (; tabi < 16;) { + output_stream_loc[tabi++] = -1; + } } +static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) +{ + struct r300_vertex_shader * vs = c->UserData; + struct r300_shader_semantics* outputs = &vs->outputs; + struct tgsi_shader_info* info = &vs->info; + int i, reg = 0; + + /* Fill in the input mapping */ + for (i = 0; i < info->num_inputs; i++) + c->code->inputs[i] = i; + + /* Position. */ + if (outputs->pos != ATTR_UNUSED) { + c->code->outputs[outputs->pos] = reg++; + } else { + assert(0); + } + + /* Point size. */ + if (outputs->psize != ATTR_UNUSED) { + c->code->outputs[outputs->psize] = reg++; + } + + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (outputs->color[i] != ATTR_UNUSED) { + c->code->outputs[outputs->color[i]] = reg++; + } + } + + /* XXX Back-face colors. */ + + /* Texture coordinates. */ + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (outputs->generic[i] != ATTR_UNUSED) { + c->code->outputs[outputs->generic[i]] = reg++; + } + } + + /* Fog coordinates. */ + if (outputs->fog != ATTR_UNUSED) { + c->code->outputs[outputs->fog] = reg++; + } +} void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs) @@ -113,6 +251,14 @@ void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_program_compiler compiler; struct tgsi_to_rc ttr; + /* Initialize. */ + r300_shader_read_vs_outputs(&vs->info, &vs->outputs); + r300_shader_vap_output_fmt(&vs->outputs, vs->hwfmt); + + if (!r300_screen(r300->context.screen)->caps->has_tcl) { + r300_stream_locations_swtcl(&vs->outputs, vs->output_stream_loc_swtcl); + } + /* Setup the compiler */ rc_init(&compiler.Base); @@ -137,7 +283,7 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); if (compiler.Base.Error) { - /* Todo: Fail gracefully */ + /* XXX Fail gracefully */ fprintf(stderr, "r300 VP: Compiler error\n"); abort(); } diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 2a4ce315e3..283dd5a9e8 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -1,5 +1,6 @@ /* * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2009 Marek Olšák <maraeo@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,18 +26,20 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" - #include "radeon_code.h" +#include "r300_shader_semantics.h" + struct r300_context; struct r300_vertex_shader { /* Parent class */ struct pipe_shader_state state; - struct tgsi_shader_info info; - /* Fallback shader, because Draw has issues */ - struct draw_vertex_shader* draw; + struct tgsi_shader_info info; + struct r300_shader_semantics outputs; + int output_stream_loc_swtcl[16]; + uint hwfmt[4]; /* Has this shader been translated yet? */ boolean translated; @@ -45,9 +48,6 @@ struct r300_vertex_shader { struct r300_vertex_program_code code; }; - -extern struct r300_vertex_program_code r300_passthrough_vertex_shader; - void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 864a6146b2..1ae6de70fe 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -35,76 +35,10 @@ extern "C" { #include "pipe/p_state.h" #include "pipe/internal/p_winsys_screen.h" -struct r300_winsys { - /* Parent class */ - struct pipe_winsys base; - - /* Opaque Radeon-specific winsys object. */ - void* radeon_winsys; - - /* PCI ID */ - uint32_t pci_id; - - /* GB pipe count */ - uint32_t gb_pipes; - - /* Z pipe count (rv530 only) */ - uint32_t z_pipes; - - /* GART size. */ - uint32_t gart_size; - - /* VRAM size. */ - uint32_t vram_size; - - /* Add a pipe_buffer to the list of buffer objects to validate. */ - boolean (*add_buffer)(struct r300_winsys* winsys, - struct pipe_buffer* pbuffer, - uint32_t rd, - uint32_t wd); - - /* Revalidate all currently setup pipe_buffers. - * Returns TRUE if a flush is required. */ - boolean (*validate)(struct r300_winsys* winsys); - - /* Check to see if there's room for commands. */ - boolean (*check_cs)(struct r300_winsys* winsys, int size); - - /* Start a command emit. */ - void (*begin_cs)(struct r300_winsys* winsys, - int size, - const char* file, - const char* function, - int line); - - /* Write a dword to the command buffer. */ - void (*write_cs_dword)(struct r300_winsys* winsys, uint32_t dword); - - /* Write a relocated dword to the command buffer. */ - void (*write_cs_reloc)(struct r300_winsys* winsys, - struct pipe_buffer* bo, - uint32_t rd, - uint32_t wd, - uint32_t flags); - - /* Finish a command emit. */ - void (*end_cs)(struct r300_winsys* winsys, - const char* file, - const char* function, - int line); - - /* Flush the CS. */ - void (*flush_cs)(struct r300_winsys* winsys); - - /* winsys flush - callback from winsys when flush required */ - void (*set_flush_cb)(struct r300_winsys *winsys, - void (*flush_cb)(void *), void *data); - - void (*reset_bos)(struct r300_winsys *winsys); -}; +#include "radeon_winsys.h" struct pipe_context* r300_create_context(struct pipe_screen* screen, - struct r300_winsys* r300_winsys); + struct radeon_winsys* radeon_winsys); boolean r300_get_texture_buffer(struct pipe_texture* texture, struct pipe_buffer** buffer, |