diff options
Diffstat (limited to 'src/gallium/drivers/r300')
26 files changed, 984 insertions, 584 deletions
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index c14414fff6..cdedb30220 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -23,8 +23,6 @@ #include "r300_blit.h" #include "r300_context.h" -#include "util/u_rect.h" - static void r300_blitter_save_states(struct r300_context* r300) { util_blitter_save_blend(r300->blitter, r300->blend_state.state); @@ -75,13 +73,15 @@ void r300_clear(struct pipe_context* pipe, */ struct r300_context* r300 = r300_context(pipe); + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; r300_blitter_save_states(r300); util_blitter_clear(r300->blitter, - r300->framebuffer_state.width, - r300->framebuffer_state.height, - r300->framebuffer_state.nr_cbufs, + fb->width, + fb->height, + fb->nr_cbufs, buffers, rgba, depth, stencil); } @@ -99,7 +99,7 @@ void r300_surface_copy(struct pipe_context* pipe, * is really transparent. The states will be restored by the blitter once * copying is done. */ r300_blitter_save_states(r300); - util_blitter_save_framebuffer(r300->blitter, &r300->framebuffer_state); + util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); util_blitter_save_fragment_sampler_states( r300->blitter, r300->sampler_count, (void**)r300->sampler_states); @@ -123,7 +123,7 @@ void r300_surface_fill(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); r300_blitter_save_states(r300); - util_blitter_save_framebuffer(r300->blitter, &r300->framebuffer_state); + util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); util_blitter_fill(r300->blitter, dst, dstx, dsty, width, height, value); diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 51fdb82ff3..92de297ef1 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -33,6 +33,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) /* Reasonable defaults */ caps->num_vert_fpus = 4; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; + caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -123,6 +124,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4A54: caps->family = CHIP_FAMILY_R420; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5548: @@ -136,6 +138,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D57: caps->family = CHIP_FAMILY_R423; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x554C: @@ -147,6 +150,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D4A: caps->family = CHIP_FAMILY_R430; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5D4C: @@ -157,6 +161,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D52: caps->family = CHIP_FAMILY_R480; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x4B48: @@ -166,6 +171,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4B4C: caps->family = CHIP_FAMILY_R481; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5E4C: @@ -182,6 +188,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5E4D: caps->family = CHIP_FAMILY_RV410; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5954: @@ -212,6 +219,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x791F: caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x793F: @@ -219,6 +227,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x7942: caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x796C: @@ -227,6 +236,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x796F: caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x7100: diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 0633a8b8a7..2808486492 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -40,11 +40,18 @@ struct r300_capabilities { unsigned num_z_pipes; /* Whether or not TCL is physically present */ boolean has_tcl; + /* Whether or not this is R400. The differences compared to their R3xx + * cousins are: + * - Extended fragment shader registers + * - Blend LTE/GTE thresholds */ + boolean is_r400; /* Whether or not this is an RV515 or newer; R500s have many differences * that require extra consideration, compared to their R3xx cousins: * - Extra bit of width and height on texture sizes * - Blend color is split across two registers - * - Universal Shader (US) block used for fragment shaders */ + * - Blend LTE/GTE thresholds + * - Universal Shader (US) block used for fragment shaders + * - FP16 blending and multisampling */ boolean is_r500; /* Whether or not the second pixel pipe is accessed with the high bit */ boolean high_second_pipe; diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index af95bbe789..14820ca854 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -22,9 +22,6 @@ #include "draw/draw_context.h" -#include "tgsi/tgsi_scan.h" - -#include "util/u_hash_table.h" #include "util/u_memory.h" #include "util/u_simple_list.h" @@ -35,30 +32,16 @@ #include "r300_query.h" #include "r300_render.h" #include "r300_screen.h" -#include "r300_state_derived.h" #include "r300_state_invariant.h" #include "r300_texture.h" #include "r300_winsys.h" -static enum pipe_error r300_clear_hash_table(void* key, void* value, - void* data) -{ - FREE(key); - FREE(value); - return PIPE_OK; -} - static void r300_destroy_context(struct pipe_context* context) { struct r300_context* r300 = r300_context(context); struct r300_query* query, * temp; util_blitter_destroy(r300->blitter); - - util_hash_table_foreach(r300->shader_hash_table, r300_clear_hash_table, - NULL); - util_hash_table_destroy(r300->shader_hash_table); - draw_destroy(r300->draw); /* Free the OQ BO. */ @@ -72,9 +55,10 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300->blend_color_state.state); FREE(r300->clip_state.state); - FREE(r300->rs_block); + FREE(r300->fb_state.state); + FREE(r300->rs_block_state.state); FREE(r300->scissor_state.state); - FREE(r300->vertex_info); + FREE(r300->vertex_format_state.state); FREE(r300->viewport_state.state); FREE(r300->ztop_state.state); FREE(r300); @@ -87,7 +71,7 @@ r300_is_texture_referenced(struct pipe_context *pipe, { struct pipe_buffer* buf = 0; - r300_get_texture_buffer(texture, &buf, NULL); + r300_get_texture_buffer(pipe->screen, texture, &buf, NULL); return pipe->is_buffer_referenced(pipe, buf); } @@ -110,30 +94,48 @@ static void r300_flush_cb(void *data) cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL); } -#define R300_INIT_ATOM(name) \ - r300->name##_state.state = NULL; \ - r300->name##_state.emit = r300_emit_##name##_state; \ - r300->name##_state.dirty = FALSE; \ - insert_at_tail(&r300->atom_list, &r300->name##_state); +#define R300_INIT_ATOM(atomname, atomsize) \ + r300->atomname##_state.name = #atomname; \ + r300->atomname##_state.state = NULL; \ + r300->atomname##_state.size = atomsize; \ + r300->atomname##_state.emit = r300_emit_##atomname##_state; \ + r300->atomname##_state.dirty = FALSE; \ + insert_at_tail(&r300->atom_list, &r300->atomname##_state); static void r300_setup_atoms(struct r300_context* r300) { + /* Create the actual atom list. + * + * Each atom is examined and emitted in the order it appears here, which + * can affect performance and conformance if not handled with care. + * + * Some atoms never change size, others change every emit. This is just + * an upper bound on each atom, to keep the emission machinery from + * underallocating space. */ make_empty_list(&r300->atom_list); - R300_INIT_ATOM(ztop); - R300_INIT_ATOM(blend); - R300_INIT_ATOM(blend_color); - R300_INIT_ATOM(clip); - R300_INIT_ATOM(dsa); - R300_INIT_ATOM(rs); - R300_INIT_ATOM(scissor); - R300_INIT_ATOM(viewport); + R300_INIT_ATOM(invariant, 71); + R300_INIT_ATOM(ztop, 2); + R300_INIT_ATOM(blend, 8); + R300_INIT_ATOM(blend_color, 3); + R300_INIT_ATOM(clip, 29); + R300_INIT_ATOM(dsa, 8); + R300_INIT_ATOM(fb, 56); + R300_INIT_ATOM(rs, 25); + R300_INIT_ATOM(scissor, 3); + R300_INIT_ATOM(viewport, 9); + R300_INIT_ATOM(rs_block, 21); + R300_INIT_ATOM(vertex_format, 26); + + /* Some non-CSO atoms need explicit space to store the state locally. */ + r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state); } struct pipe_context* r300_create_context(struct pipe_screen* screen, - struct radeon_winsys* radeon_winsys) + void *priv) { struct r300_context* r300 = CALLOC_STRUCT(r300_context); struct r300_screen* r300screen = r300_screen(screen); + struct radeon_winsys* radeon_winsys = r300screen->radeon_winsys; if (!r300) return NULL; @@ -142,8 +144,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.winsys = (struct pipe_winsys*)radeon_winsys; r300->context.screen = screen; - - r300_init_debug(r300); + r300->context.priv = priv; r300->context.destroy = r300_destroy_context; @@ -174,16 +175,13 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.is_texture_referenced = r300_is_texture_referenced; r300->context.is_buffer_referenced = r300_is_buffer_referenced; - r300->shader_hash_table = util_hash_table_create(r300_shader_key_hash, - r300_shader_key_compare); - r300_setup_atoms(r300); r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); - r300->rs_block = CALLOC_STRUCT(r300_rs_block); - r300->scissor_state.state = CALLOC_STRUCT(r300_scissor_state); - r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); + r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); + r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); + r300->vertex_format_state.state = CALLOC_STRUCT(r300_vertex_info); r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); @@ -200,7 +198,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_state_functions(r300); - r300_emit_invariant_state(r300); + r300->invariant_state.dirty = TRUE; r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 5937f0e2cc..8461757812 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -28,7 +28,9 @@ #include "util/u_blitter.h" #include "pipe/p_context.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" + +#include "r300_screen.h" struct r300_context; @@ -36,10 +38,20 @@ struct r300_fragment_shader; struct r300_vertex_shader; struct r300_atom { + /* List pointers. */ struct r300_atom *prev, *next; + /* Name, for debugging. */ + const char* name; + /* Opaque state. */ void* state; + /* Emit the state to the context. */ void (*emit)(struct r300_context*, void*); + /* Upper bound on number of dwords to emit. */ + unsigned size; + /* Whether this atom should be emitted. */ boolean dirty; + /* Another dirty flag that is never automatically cleared. */ + boolean always_dirty; }; struct r300_blend_state { @@ -72,13 +84,14 @@ struct r300_rs_state { struct pipe_rasterizer_state rs; uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ + uint32_t antialiasing_config; /* R300_GB_AA_CONFIG: 0x4020 */ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */ - uint32_t depth_scale_front; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */ - uint32_t depth_offset_front;/* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */ - uint32_t depth_scale_back; /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */ - uint32_t depth_offset_back; /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */ + float depth_scale; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */ + /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */ + float depth_offset; /* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */ + /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */ uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */ uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */ uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */ @@ -106,16 +119,6 @@ struct r300_sampler_state { unsigned min_lod, max_lod; }; -struct r300_scissor_regs { - uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ - uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ -}; - -struct r300_scissor_state { - struct r300_scissor_regs framebuffer; - struct r300_scissor_regs scissor; -}; - struct r300_texture_state { uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */ uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */ @@ -136,15 +139,12 @@ struct r300_ztop_state { uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */ }; -#define R300_NEW_FRAMEBUFFERS 0x00000010 #define R300_NEW_FRAGMENT_SHADER 0x00000020 #define R300_NEW_FRAGMENT_SHADER_CONSTANTS 0x00000040 -#define R300_NEW_RS_BLOCK 0x00000100 #define R300_NEW_SAMPLER 0x00000200 #define R300_ANY_NEW_SAMPLERS 0x0001fe00 #define R300_NEW_TEXTURE 0x00040000 #define R300_ANY_NEW_TEXTURES 0x03fc0000 -#define R300_NEW_VERTEX_FORMAT 0x04000000 #define R300_NEW_VERTEX_SHADER 0x08000000 #define R300_NEW_VERTEX_SHADER_CONSTANTS 0x10000000 #define R300_NEW_QUERY 0x40000000 @@ -188,6 +188,12 @@ struct r300_query { struct r300_query* next; }; +enum r300_buffer_tiling { + R300_BUFFER_LINEAR = 0, + R300_BUFFER_TILED, + R300_BUFFER_SQUARETILED +}; + struct r300_texture { /* Parent class */ struct pipe_texture tex; @@ -224,6 +230,9 @@ struct r300_texture { /* Registers carrying texture format data. */ struct r300_texture_state state; + + /* Buffer tiling */ + enum r300_buffer_tiling microtile, macrotile; }; struct r300_vertex_info { @@ -260,11 +269,8 @@ struct r300_context { struct r300_query *query_current; struct r300_query query_list; - /* Shader hash table. Used to store vertex formatting information, which - * depends on the combination of both currently loaded shaders. */ - struct util_hash_table* shader_hash_table; /* Vertex formatting information. */ - struct r300_vertex_info* vertex_info; + struct r300_atom vertex_format_state; /* Various CSO state objects. */ /* Beginning of atom list. */ @@ -281,12 +287,12 @@ struct r300_context { struct r300_atom dsa_state; /* Fragment shader. */ struct r300_fragment_shader* fs; - /* Framebuffer state. We currently don't need our own version of this. */ - struct pipe_framebuffer_state framebuffer_state; + /* Framebuffer state. */ + struct r300_atom fb_state; /* Rasterizer state. */ struct r300_atom rs_state; /* RS block state. */ - struct r300_rs_block* rs_block; + struct r300_atom rs_block_state; /* Sampler states. */ struct r300_sampler_state* sampler_states[8]; int sampler_count; @@ -302,6 +308,9 @@ struct r300_context { /* ZTOP state. */ struct r300_atom ztop_state; + /* Invariant state. This must be emitted to get the engine started. */ + struct r300_atom invariant_state; + /* Vertex buffers for Gallium. */ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; int vertex_buffer_count; @@ -315,9 +324,10 @@ struct r300_context { uint32_t dirty_hw; /* Whether the TCL engine should be in bypass mode. */ boolean tcl_bypass; - - /** Combination of DBG_xxx flags */ - unsigned debug; + /* Whether polygon offset is enabled. */ + boolean polygon_offset_enabled; + /* Z buffer bit depth. */ + uint32_t zbuffer_bpp; }; /* Convenience cast wrapper. */ @@ -326,40 +336,24 @@ static INLINE struct r300_context* r300_context(struct pipe_context* context) return (struct r300_context*)context; } + +struct pipe_context* r300_create_context(struct pipe_screen* screen, + void *priv); + /* Context initialization. */ struct draw_stage* r300_draw_stage(struct r300_context* r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_surface_functions(struct r300_context* r300); -/* Debug functionality. */ - -/** - * Debug flags to disable/enable certain groups of debugging outputs. - * - * \note These may be rather coarse, and the grouping may be impractical. - * If you find, while debugging the driver, that a different grouping - * of these flags would be beneficial, just feel free to change them - * but make sure to update the documentation in r300_debug.c to reflect - * those changes. - */ -/*@{*/ -#define DBG_HELP 0x0000001 -#define DBG_FP 0x0000002 -#define DBG_VP 0x0000004 -#define DBG_CS 0x0000008 -#define DBG_DRAW 0x0000010 -#define DBG_TEX 0x0000020 -#define DBG_FALL 0x0000040 -/*@}*/ - -static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags) +static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags) { - return (ctx->debug & flags) ? TRUE : FALSE; + return SCREEN_DBG_ON(r300_screen(ctx->context.screen), flags); } -static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * fmt, ...) +static INLINE void CTX_DBG(struct r300_context * ctx, unsigned flags, + const char * fmt, ...) { - if (DBG_ON(ctx, flags)) { + if (CTX_DBG_ON(ctx, flags)) { va_list va; va_start(va, fmt); debug_vprintf(fmt, va); @@ -367,6 +361,8 @@ static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * f } } -void r300_init_debug(struct r300_context * ctx); +#define DBG_ON CTX_DBG_ON +#define DBG CTX_DBG #endif /* R300_CONTEXT_H */ + diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index d142fee050..151f72b0fe 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -52,7 +52,7 @@ #define CS_LOCALS(context) \ struct r300_context* const cs_context_copy = (context); \ struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \ - int cs_count = 0; + int cs_count = 0; (void) cs_count; #define CHECK_CS(size) \ assert(cs_winsys->check_cs(cs_winsys, (size))) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 2a6ed54ac9..b881730848 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -22,8 +22,6 @@ #include "r300_context.h" -#include <ctype.h> - struct debug_option { const char * name; @@ -46,7 +44,7 @@ static struct debug_option debug_options[] = { { 0, 0, 0 } }; -void r300_init_debug(struct r300_context * ctx) +void r300_init_debug(struct r300_screen * screen) { const char * options = debug_get_option("RADEON_DEBUG", 0); boolean printhint = FALSE; @@ -64,7 +62,7 @@ void r300_init_debug(struct r300_context * ctx) for(opt = debug_options; opt->name; ++opt) { if (!strncmp(options, opt->name, length)) { - ctx->debug |= opt->flag; + screen->debug |= opt->flag; break; } } @@ -77,11 +75,11 @@ void r300_init_debug(struct r300_context * ctx) options += length; } - if (!ctx->debug) + if (!screen->debug) printhint = TRUE; } - if (printhint || ctx->debug & DBG_HELP) { + if (printhint || screen->debug & DBG_HELP) { debug_printf("You can enable debug output by setting the RADEON_DEBUG environment variable\n" "to a comma-separated list of debug options. Available options are:\n"); for(opt = debug_options; opt->name; ++opt) { diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 0e5533c790..ae83511a85 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -32,19 +32,20 @@ #include "r300_emit.h" #include "r300_fs.h" #include "r300_screen.h" -#include "r300_state_derived.h" #include "r300_state_inlines.h" -#include "r300_texture.h" #include "r300_vs.h" void r300_emit_blend_state(struct r300_context* r300, void* state) { struct r300_blend_state* blend = (struct r300_blend_state*)state; + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; CS_LOCALS(r300); + BEGIN_CS(8); OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); - if (r300->framebuffer_state.nr_cbufs) { + if (fb->nr_cbufs) { OUT_CS(blend->blend_control); OUT_CS(blend->alpha_blend_control); OUT_CS(blend->color_channel_mask); @@ -111,19 +112,15 @@ void r300_emit_dsa_state(struct r300_context* r300, void* state) { struct r300_dsa_state* dsa = (struct r300_dsa_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; CS_LOCALS(r300); BEGIN_CS(r300screen->caps->is_r500 ? 8 : 6); OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); - - /* not needed since we use the 8bit alpha ref */ - /*if (r300screen->caps->is_r500) { - OUT_CS_REG(R500_FG_ALPHA_VALUE, dsa->alpha_reference); - }*/ - OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); - if (r300->framebuffer_state.zsbuf) { + if (fb->zsbuf) { OUT_CS(dsa->z_buffer_control); OUT_CS(dsa->z_stencil_control); } else { @@ -133,7 +130,6 @@ void r300_emit_dsa_state(struct r300_context* r300, void* state) OUT_CS(dsa->stencil_ref_mask); - /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ if (r300screen->caps->is_r500) { OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); } @@ -167,9 +163,8 @@ static const float * get_shader_constant( vec[1] = 1.0 / tex->height0; break; - /* Texture compare-fail value. */ - /* XXX Since Gallium doesn't support GL_ARB_shadow_ambient, - * this is always (0,0,0,0), right? */ + /* Texture compare-fail value. Shouldn't ever show up, but if + * it does, we'll be ready. */ case RC_STATE_SHADOW_AMBIENT: vec[3] = 0; break; @@ -383,17 +378,14 @@ void r500_emit_fs_constant_buffer(struct r300_context* r300, END_CS; } -void r300_emit_fb_state(struct r300_context* r300, - struct pipe_framebuffer_state* fb) +void r300_emit_fb_state(struct r300_context* r300, void* state) { + struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; struct r300_texture* tex; struct pipe_surface* surf; int i; CS_LOCALS(r300); - /* Shouldn't fail unless there is a bug in the state tracker. */ - assert(fb->nr_cbufs <= 4); - BEGIN_CS((10 * fb->nr_cbufs) + (2 * (4 - fb->nr_cbufs)) + (fb->zsbuf ? 10 : 0) + 6); @@ -406,7 +398,14 @@ void r300_emit_fb_state(struct r300_context* r300, R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); /* Set the number of colorbuffers. */ - OUT_CS_REG(R300_RB3D_CCTL, R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs)); + if (fb->nr_cbufs > 1) { + OUT_CS_REG(R300_RB3D_CCTL, + R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs) | + R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_ENABLE | + R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE); + } else { + OUT_CS_REG(R300_RB3D_CCTL, 0x0); + } /* Set up colorbuffers. */ for (i = 0; i < fb->nr_cbufs; i++) { @@ -419,8 +418,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | - r300_translate_colorformat(tex->tex.format), 0, - RADEON_GEM_DOMAIN_VRAM, 0); + r300_translate_colorformat(tex->tex.format) | + R300_COLOR_TILE(tex->macrotile) | + R300_COLOR_MICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), r300_translate_out_fmt(surf->format)); @@ -443,8 +444,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format)); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level], 0, - RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | + R300_DEPTHMACROTILE(tex->macrotile) | + R300_DEPTHMICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); } END_CS; @@ -579,32 +582,52 @@ void r300_emit_query_end(struct r300_context* r300) void r300_emit_rs_state(struct r300_context* r300, void* state) { struct r300_rs_state* rs = (struct r300_rs_state*)state; + float scale, offset; CS_LOCALS(r300); - BEGIN_CS(22); + BEGIN_CS(18 + (rs->polygon_offset_enable ? 5 : 0)); OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status); + + OUT_CS_REG(R300_GB_AA_CONFIG, rs->antialiasing_config); + OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size); OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2); OUT_CS(rs->point_minmax); OUT_CS(rs->line_control); - OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 6); - OUT_CS(rs->depth_scale_front); - OUT_CS(rs->depth_offset_front); - OUT_CS(rs->depth_scale_back); - OUT_CS(rs->depth_offset_back); + + if (rs->polygon_offset_enable) { + scale = rs->depth_scale * 12; + offset = rs->depth_offset; + + switch (r300->zbuffer_bpp) { + case 16: + offset *= 4; + break; + case 24: + offset *= 2; + break; + } + + OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); + OUT_CS_32F(scale); + OUT_CS_32F(offset); + OUT_CS_32F(scale); + OUT_CS_32F(offset); + } + + OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2); OUT_CS(rs->polygon_offset_enable); OUT_CS(rs->cull_mode); OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config); OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, rs->line_stipple_value); - OUT_CS_REG(R300_GA_COLOR_CONTROL, rs->color_control); OUT_CS_REG(R300_GA_POLY_MODE, rs->polygon_mode); END_CS; } -void r300_emit_rs_block_state(struct r300_context* r300, - struct r300_rs_block* rs) +void r300_emit_rs_block_state(struct r300_context* r300, void* state) { - int i; + struct r300_rs_block* rs = (struct r300_rs_block*)state; + unsigned i; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); @@ -641,27 +664,65 @@ void r300_emit_rs_block_state(struct r300_context* r300, END_CS; } -static void r300_emit_scissor_regs(struct r300_context* r300, - struct r300_scissor_regs* scissor) +void r300_emit_scissor_state(struct r300_context* r300, void* state) { + unsigned minx, miny, maxx, maxy; + uint32_t top_left, bottom_right; + struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct pipe_scissor_state* scissor = (struct pipe_scissor_state*)state; + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; CS_LOCALS(r300); - BEGIN_CS(3); - OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - OUT_CS(scissor->top_left); - OUT_CS(scissor->bottom_right); - END_CS; -} + minx = miny = 0; + maxx = fb->width; + maxy = fb->height; -void r300_emit_scissor_state(struct r300_context* r300, void* state) -{ - struct r300_scissor_state* scissor = (struct r300_scissor_state*)state; - /* XXX argfl! */ if (((struct r300_rs_state*)r300->rs_state.state)->rs.scissor) { - r300_emit_scissor_regs(r300, &scissor->scissor); + minx = MAX2(minx, scissor->minx); + miny = MAX2(miny, scissor->miny); + maxx = MIN2(maxx, scissor->maxx); + maxy = MIN2(maxy, scissor->maxy); + } + + /* Special case for zero-area scissor. + * + * We can't allow the variables maxx and maxy to be zero because they are + * subtracted from later in the code, which would cause emitting ~0 and + * making the kernel checker angry. + * + * Let's consider we change maxx and maxy to 1, which is effectively + * a one-pixel area. We must then change minx and miny to a number which is + * greater than 1 to get the zero area back. */ + if (!maxx || !maxy) { + minx = 2; + miny = 2; + maxx = 1; + maxy = 1; + } + + if (r300screen->caps->is_r500) { + top_left = + (minx << R300_SCISSORS_X_SHIFT) | + (miny << R300_SCISSORS_Y_SHIFT); + bottom_right = + ((maxx - 1) << R300_SCISSORS_X_SHIFT) | + ((maxy - 1) << R300_SCISSORS_Y_SHIFT); } else { - r300_emit_scissor_regs(r300, &scissor->framebuffer); + /* Offset of 1440 in non-R500 chipsets. */ + top_left = + ((minx + 1440) << R300_SCISSORS_X_SHIFT) | + ((miny + 1440) << R300_SCISSORS_Y_SHIFT); + bottom_right = + (((maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | + (((maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); } + + BEGIN_CS(3); + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + OUT_CS(top_left); + OUT_CS(bottom_right); + END_CS; } void r300_emit_texture(struct r300_context* r300, @@ -680,12 +741,18 @@ void r300_emit_texture(struct r300_context* r300, filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } - /* determine min/max levels */ - /* the MAX_MIP level is the largest (finest) one */ - max_level = MIN2(sampler->max_lod, tex->tex.last_level); - min_level = MIN2(sampler->min_lod, max_level); - format0 |= R300_TX_NUM_LEVELS(max_level); - filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); + if (tex->is_npot) { + /* NPOT textures don't support mip filter, unfortunately. + * This prevents incorrect rendering. */ + filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; + } else { + /* determine min/max levels */ + /* the MAX_MIP level is the largest (finest) one */ + max_level = MIN2(sampler->max_lod, tex->tex.last_level); + min_level = MIN2(sampler->min_lod, max_level); + format0 |= R300_TX_NUM_LEVELS(max_level); + filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); + } BEGIN_CS(16); OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 | @@ -697,27 +764,13 @@ void r300_emit_texture(struct r300_context* r300, OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1); - OUT_CS_RELOC(tex->buffer, 0, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + OUT_CS_RELOC(tex->buffer, + R300_TXO_MACRO_TILE(tex->macrotile) | + R300_TXO_MICRO_TILE(tex->microtile), + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); END_CS; } -static boolean r300_validate_aos(struct r300_context *r300) -{ - struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; - struct pipe_vertex_element *velem = r300->vertex_element; - int i; - - /* Check if formats and strides are aligned to the size of DWORD. */ - for (i = 0; i < r300->vertex_element_count; i++) { - if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 || - util_format_get_blocksize(velem[i].src_format) % 4 != 0) { - return FALSE; - } - } - return TRUE; -} - void r300_emit_aos(struct r300_context* r300, unsigned offset) { struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; @@ -727,12 +780,6 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) unsigned packet_size = (aos_count * 3 + 1) / 2; CS_LOCALS(r300); - /* XXX Move this checking to a more approriate place. */ - if (!r300_validate_aos(r300)) { - /* XXX We should fallback using Draw. */ - assert(0); - } - BEGIN_CS(2 + packet_size + aos_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); OUT_CS(aos_count); @@ -764,64 +811,39 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) END_CS; } -#if 0 -void r300_emit_draw_packet(struct r300_context* r300) +void r300_emit_vertex_format_state(struct r300_context* r300, void* state) { - CS_LOCALS(r300); - - DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " - "vertex size %d\n", r300->vbo, - r300->vertex_info->vinfo.size); - /* Set the pointer to our vertex buffer. The emitted values are this: - * PACKET3 [3D_LOAD_VBPNTR] - * COUNT [1] - * FORMAT [size | stride << 8] - * OFFSET [offset into BO] - * VBPNTR [relocated BO] - */ - BEGIN_CS(7); - OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); - OUT_CS(1); - OUT_CS(r300->vertex_info->vinfo.size | - (r300->vertex_info->vinfo.size << 8)); - OUT_CS(r300->vbo_offset); - OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); - END_CS; -} -#endif - -void r300_emit_vertex_format_state(struct r300_context* r300) -{ - int i; + struct r300_vertex_info* vertex_info = (struct r300_vertex_info*)state; + unsigned i; CS_LOCALS(r300); DBG(r300, DBG_DRAW, "r300: VAP/PSC emit:\n"); BEGIN_CS(26); - OUT_CS_REG(R300_VAP_VTX_SIZE, r300->vertex_info->vinfo.size); + OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_info->vinfo.size); OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); - OUT_CS(r300->vertex_info->vinfo.hwfmt[0]); - OUT_CS(r300->vertex_info->vinfo.hwfmt[1]); + OUT_CS(vertex_info->vinfo.hwfmt[0]); + OUT_CS(vertex_info->vinfo.hwfmt[1]); OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); - OUT_CS(r300->vertex_info->vinfo.hwfmt[2]); - OUT_CS(r300->vertex_info->vinfo.hwfmt[3]); + OUT_CS(vertex_info->vinfo.hwfmt[2]); + OUT_CS(vertex_info->vinfo.hwfmt[3]); for (i = 0; i < 4; i++) { DBG(r300, DBG_DRAW, " : hwfmt%d: 0x%08x\n", i, - r300->vertex_info->vinfo.hwfmt[i]); + vertex_info->vinfo.hwfmt[i]); } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8); for (i = 0; i < 8; i++) { - OUT_CS(r300->vertex_info->vap_prog_stream_cntl[i]); + OUT_CS(vertex_info->vap_prog_stream_cntl[i]); DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i, - r300->vertex_info->vap_prog_stream_cntl[i]); + vertex_info->vap_prog_stream_cntl[i]); } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8); for (i = 0; i < 8; i++) { - OUT_CS(r300->vertex_info->vap_prog_stream_cntl_ext[i]); + OUT_CS(vertex_info->vap_prog_stream_cntl_ext[i]); DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i, - r300->vertex_info->vap_prog_stream_cntl_ext[i]); + vertex_info->vap_prog_stream_cntl_ext[i]); } END_CS; } @@ -986,30 +1008,21 @@ static void r300_flush_pvs(struct r300_context* r300) END_CS; } -/* Emit all dirty state. */ -void r300_emit_dirty_state(struct r300_context* r300) +void r300_emit_buffer_validate(struct r300_context *r300) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_texture* tex; - struct r300_atom* atom; - int i, dirty_tex = 0; + unsigned i; boolean invalid = FALSE; - /* Check size of CS. */ - /* Make sure we have at least 8*1024 spare dwords. */ - /* XXX It would be nice to know the number of dwords we really need to - * XXX emit. */ - if (!r300->winsys->check_cs(r300->winsys, 8*1024)) { - r300->context.flush(&r300->context, 0, NULL); - } - /* Clean out BOs. */ r300->winsys->reset_bos(r300->winsys); validate: /* Color buffers... */ - for (i = 0; i < r300->framebuffer_state.nr_cbufs; i++) { - tex = (struct r300_texture*)r300->framebuffer_state.cbufs[i]->texture; + for (i = 0; i < fb->nr_cbufs; i++) { + tex = (struct r300_texture*)fb->cbufs[i]->texture; assert(tex && tex->buffer && "cbuf is marked, but NULL!"); if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, 0, RADEON_GEM_DOMAIN_VRAM)) { @@ -1018,8 +1031,8 @@ validate: } } /* ...depth buffer... */ - if (r300->framebuffer_state.zsbuf) { - tex = (struct r300_texture*)r300->framebuffer_state.zsbuf->texture; + if (fb->zsbuf) { + tex = (struct r300_texture*)fb->zsbuf->texture; assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, 0, RADEON_GEM_DOMAIN_VRAM)) { @@ -1039,10 +1052,12 @@ validate: } } /* ...occlusion query buffer... */ - if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, - 0, RADEON_GEM_DOMAIN_GTT)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; + if (r300->dirty_state & R300_NEW_QUERY) { + if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, + 0, RADEON_GEM_DOMAIN_GTT)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } } /* ...and vertex buffer. */ if (r300->vbo) { @@ -1064,6 +1079,31 @@ validate: invalid = TRUE; goto validate; } +} + +/* Emit all dirty state. */ +void r300_emit_dirty_state(struct r300_context* r300) +{ + struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct r300_atom* atom; + unsigned i, dwords = 1024; + int dirty_tex = 0; + + /* Check the required number of dwords against the space remaining in the + * current CS object. If we need more, then flush. */ + + foreach(atom, &r300->atom_list) { + if (atom->dirty || atom->always_dirty) { + dwords += atom->size; + } + } + + /* Make sure we have at least 2*1024 spare dwords. */ + /* XXX It would be nice to know the number of dwords we really need to + * XXX emit. */ + while (!r300->winsys->check_cs(r300->winsys, dwords)) { + r300->context.flush(&r300->context, 0, NULL); + } if (r300->dirty_state & R300_NEW_QUERY) { r300_emit_query_start(r300); @@ -1071,7 +1111,7 @@ validate: } foreach(atom, &r300->atom_list) { - if (atom->dirty) { + if (atom->dirty || atom->always_dirty) { atom->emit(r300, atom->state); atom->dirty = FALSE; } @@ -1098,16 +1138,6 @@ validate: r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER_CONSTANTS; } - if (r300->dirty_state & R300_NEW_FRAMEBUFFERS) { - r300_emit_fb_state(r300, &r300->framebuffer_state); - r300->dirty_state &= ~R300_NEW_FRAMEBUFFERS; - } - - if (r300->dirty_state & R300_NEW_RS_BLOCK) { - r300_emit_rs_block_state(r300, r300->rs_block); - r300->dirty_state &= ~R300_NEW_RS_BLOCK; - } - /* Samplers and textures are tracked separately but emitted together. */ if (r300->dirty_state & (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { @@ -1133,11 +1163,6 @@ validate: r300_flush_textures(r300); } - if (r300->dirty_state & R300_NEW_VERTEX_FORMAT) { - r300_emit_vertex_format_state(r300); - r300->dirty_state &= ~R300_NEW_VERTEX_FORMAT; - } - if (r300->dirty_state & (R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS)) { r300_flush_pvs(r300); } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 05a6bfeae8..6b96d9b57c 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -51,8 +51,7 @@ void r500_emit_fragment_program_code(struct r300_context* r300, void r500_emit_fs_constant_buffer(struct r300_context* r300, struct rc_constant_list* constants); -void r300_emit_fb_state(struct r300_context* r300, - struct pipe_framebuffer_state* fb); +void r300_emit_fb_state(struct r300_context* r300, void* state); void r300_emit_query_begin(struct r300_context* r300, struct r300_query* query); @@ -61,8 +60,7 @@ void r300_emit_query_end(struct r300_context* r300); void r300_emit_rs_state(struct r300_context* r300, void* state); -void r300_emit_rs_block_state(struct r300_context* r300, - struct r300_rs_block* rs); +void r300_emit_rs_block_state(struct r300_context* r300, void* state); void r300_emit_scissor_state(struct r300_context* r300, void* state); @@ -73,7 +71,7 @@ void r300_emit_texture(struct r300_context* r300, void r300_emit_vertex_buffer(struct r300_context* r300); -void r300_emit_vertex_format_state(struct r300_context* r300); +void r300_emit_vertex_format_state(struct r300_context* r300, void* state); void r300_emit_vertex_program_code(struct r300_context* r300, struct r300_vertex_program_code* code); @@ -95,4 +93,6 @@ void r300_flush_textures(struct r300_context* r300); /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300); +void r300_emit_buffer_validate(struct r300_context *r300); + #endif /* R300_EMIT_H */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 14a08241fc..e37d309270 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -29,7 +29,6 @@ #include "r300_cs.h" #include "r300_emit.h" #include "r300_flush.h" -#include "r300_state_invariant.h" static void r300_flush(struct pipe_context* pipe, unsigned flags, @@ -37,8 +36,10 @@ static void r300_flush(struct pipe_context* pipe, { struct r300_context *r300 = r300_context(pipe); struct r300_query *query; + struct r300_atom *atom; CS_LOCALS(r300); + (void) cs_count; /* We probably need to flush Draw, but we may have been called from * within Draw. This feels kludgy, but it might be the best thing. * @@ -51,10 +52,17 @@ static void r300_flush(struct pipe_context* pipe, if (r300->dirty_hw) { FLUSH_CS; - r300_emit_invariant_state(r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw = 0; + + /* New kitchen sink, baby. */ + foreach(atom, &r300->atom_list) { + if (atom->state) { + atom->dirty = TRUE; + } + } } + /* reset flushed query */ foreach(query, &r300->query_list) { query->flushed = TRUE; diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 60ea9c171d..75a05498eb 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -49,12 +49,12 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, switch (info->input_semantic_name[i]) { case TGSI_SEMANTIC_COLOR: - assert(index <= ATTR_COLOR_COUNT); + assert(index < ATTR_COLOR_COUNT); fs_inputs->color[index] = i; break; case TGSI_SEMANTIC_GENERIC: - assert(index <= ATTR_GENERIC_COUNT); + assert(index < ATTR_GENERIC_COUNT); fs_inputs->generic[index] = i; break; @@ -77,17 +77,21 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, static void find_output_registers(struct r300_fragment_program_compiler * compiler, struct r300_fragment_shader * fs) { - unsigned i; + unsigned i, colorbuf_count = 0; /* Mark the outputs as not present initially */ - compiler->OutputColor = fs->info.num_outputs; + compiler->OutputColor[0] = fs->info.num_outputs; + compiler->OutputColor[1] = fs->info.num_outputs; + compiler->OutputColor[2] = fs->info.num_outputs; + compiler->OutputColor[3] = fs->info.num_outputs; compiler->OutputDepth = fs->info.num_outputs; /* Now see where they really are. */ for(i = 0; i < fs->info.num_outputs; ++i) { switch(fs->info.output_semantic_name[i]) { case TGSI_SEMANTIC_COLOR: - compiler->OutputColor = i; + compiler->OutputColor[colorbuf_count] = i; + colorbuf_count++; break; case TGSI_SEMANTIC_POSITION: compiler->OutputDepth = i; diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 034bfc15cf..361813891f 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1619,18 +1619,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_TX_OFFSET_5 0x4554 #define R300_TX_OFFSET_6 0x4558 #define R300_TX_OFFSET_7 0x455C - /* BEGIN: Guess from R200 */ + # define R300_TXO_ENDIAN_NO_SWAP (0 << 0) # define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) # define R300_TXO_ENDIAN_WORD_SWAP (2 << 0) # define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0) -# define R300_TXO_MACRO_TILE (1 << 2) +# define R300_TXO_MACRO_TILE_LINEAR (0 << 2) +# define R300_TXO_MACRO_TILE_TILED (1 << 2) +# define R300_TXO_MACRO_TILE(x) ((x) << 2) # define R300_TXO_MICRO_TILE_LINEAR (0 << 3) -# define R300_TXO_MICRO_TILE (1 << 3) -# define R300_TXO_MICRO_TILE_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE_TILED (1 << 3) +# define R300_TXO_MICRO_TILE_TILED_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE(x) ((x) << 3) # define R300_TXO_OFFSET_MASK 0xffffffe0 # define R300_TXO_OFFSET_SHIFT 5 - /* END: Guess from R200 */ /* 32 bit chroma key */ #define R300_TX_CHROMA_KEY_0 0x4580 @@ -2283,9 +2285,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_COLORPITCH_MASK 0x00003FFE # define R300_COLOR_TILE_DISABLE (0 << 16) # define R300_COLOR_TILE_ENABLE (1 << 16) +# define R300_COLOR_TILE(x) ((x) << 16) # define R300_COLOR_MICROTILE_DISABLE (0 << 17) # define R300_COLOR_MICROTILE_ENABLE (1 << 17) # define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */ +# define R300_COLOR_MICROTILE(x) ((x) << 17) # define R300_COLOR_ENDIAN_NO_SWAP (0 << 19) # define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19) # define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19) @@ -2544,9 +2548,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_DEPTHPITCH_MASK 0x00003FFC # define R300_DEPTHMACROTILE_DISABLE (0 << 16) # define R300_DEPTHMACROTILE_ENABLE (1 << 16) +# define R300_DEPTHMACROTILE(x) ((x) << 16) # define R300_DEPTHMICROTILE_LINEAR (0 << 17) # define R300_DEPTHMICROTILE_TILED (1 << 17) # define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) +# define R300_DEPTHMICROTILE(x) ((x) << 17) # define R300_DEPTHENDIAN_NO_SWAP (0 << 18) # define R300_DEPTHENDIAN_WORD_SWAP (1 << 18) # define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index ee43421cdb..cd4971ae13 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -26,8 +26,9 @@ #include "draw/draw_context.h" #include "draw/draw_vbuf.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "util/u_prim.h" @@ -114,6 +115,97 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, return color_control; } +static boolean immd_is_good_idea(struct r300_context *r300, + unsigned count) +{ + return count <= 4; +} + +static void r300_emit_draw_arrays_immediate(struct r300_context *r300, + unsigned mode, + unsigned start, + unsigned count) +{ + struct pipe_vertex_element* velem; + struct pipe_vertex_buffer* vbuf; + unsigned vertex_element_count = r300->vertex_element_count; + unsigned i, v, vbi, dw, elem_offset; + + /* Size of the vertex, in dwords. */ + unsigned vertex_size = 0; + + /* Offsets of the attribute, in dwords, from the start of the vertex. */ + unsigned offset[PIPE_MAX_ATTRIBS]; + + /* Size of the vertex element, in dwords. */ + unsigned size[PIPE_MAX_ATTRIBS]; + + /* Stride to the same attrib in the next vertex in the vertex buffer, + * in dwords. */ + unsigned stride[PIPE_MAX_ATTRIBS] = {0}; + + /* Mapped vertex buffers. */ + uint32_t* map[PIPE_MAX_ATTRIBS] = {0}; + + CS_LOCALS(r300); + + /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ + for (i = 0; i < vertex_element_count; i++) { + velem = &r300->vertex_element[i]; + offset[i] = velem->src_offset / 4; + size[i] = util_format_get_blocksize(velem->src_format) / 4; + vertex_size += size[i]; + vbi = velem->vertex_buffer_index; + + /* Map the buffer. */ + if (!map[vbi]) { + vbuf = &r300->vertex_buffer[vbi]; + map[vbi] = (uint32_t*)pipe_buffer_map(r300->context.screen, + vbuf->buffer, + PIPE_BUFFER_USAGE_CPU_READ); + map[vbi] += vbuf->buffer_offset / 4; + stride[vbi] = vbuf->stride / 4; + } + } + + r300_emit_dirty_state(r300); + + BEGIN_CS(10 + count * vertex_size); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, mode)); + OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); + OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | + r300_translate_primitive(mode)); + + /* Emit vertices. */ + for (v = 0; v < count; v++) { + for (i = 0; i < vertex_element_count; i++) { + velem = &r300->vertex_element[i]; + vbi = velem->vertex_buffer_index; + elem_offset = offset[i] + stride[vbi] * (v + start); + + for (dw = 0; dw < size[i]; dw++) { + OUT_CS(map[vbi][elem_offset + dw]); + } + } + } + END_CS; + + /* Unmap buffers. */ + for (i = 0; i < vertex_element_count; i++) { + vbi = r300->vertex_element[i].vertex_buffer_index; + + if (map[vbi]) { + vbuf = &r300->vertex_buffer[vbi]; + pipe_buffer_unmap(r300->context.screen, vbuf->buffer); + map[vbi] = NULL; + } + } +} + static void r300_emit_draw_arrays(struct r300_context *r300, unsigned mode, unsigned count) @@ -183,17 +275,18 @@ static void r300_emit_draw_elements(struct r300_context *r300, END_CS; } - static boolean r300_setup_vertex_buffers(struct r300_context *r300) { struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; struct pipe_vertex_element *velem = r300->vertex_element; + struct pipe_buffer *pbuf; validate: for (int i = 0; i < r300->vertex_element_count; i++) { - if (!r300->winsys->add_buffer(r300->winsys, - vbuf[velem[i].vertex_buffer_index].buffer, - RADEON_GEM_DOMAIN_GTT, 0)) { + pbuf = vbuf[velem[i].vertex_buffer_index].buffer; + + if (!r300->winsys->add_buffer(r300->winsys, pbuf, + RADEON_GEM_DOMAIN_GTT, 0)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } @@ -207,17 +300,49 @@ validate: return TRUE; } +static void r300_shorten_ubyte_elts(struct r300_context* r300, + struct pipe_buffer** elts, + unsigned count) +{ + struct pipe_screen* screen = r300->context.screen; + struct pipe_buffer* new_elts; + unsigned char *in_map; + unsigned short *out_map; + unsigned i; + + new_elts = screen->buffer_create(screen, 32, + PIPE_BUFFER_USAGE_INDEX | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ, + 2 * count); + + in_map = pipe_buffer_map(screen, *elts, PIPE_BUFFER_USAGE_CPU_READ); + out_map = pipe_buffer_map(screen, new_elts, PIPE_BUFFER_USAGE_CPU_WRITE); + + for (i = 0; i < count; i++) { + *out_map = (unsigned short)*in_map; + in_map++; + out_map++; + } + + pipe_buffer_unmap(screen, *elts); + pipe_buffer_unmap(screen, new_elts); + + *elts = new_elts; +} + /* This is the fast-path drawing & emission for HW TCL. */ void r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) { struct r300_context* r300 = r300_context(pipe); + struct pipe_buffer* orgIndexBuffer = indexBuffer; if (!u_trim_pipe_prim(mode, &count)) { return; @@ -232,17 +357,24 @@ void r300_draw_range_elements(struct pipe_context* pipe, r300_update_derived_state(r300); + r300_emit_buffer_validate(r300); + if (!r300_setup_vertex_buffers(r300)) { return; } + if (indexSize == 1) { + r300_shorten_ubyte_elts(r300, &indexBuffer, count); + indexSize = 2; + } + if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, RADEON_GEM_DOMAIN_GTT, 0)) { - return; + goto cleanup; } if (!r300->winsys->validate(r300->winsys)) { - return; + goto cleanup; } r300_emit_dirty_state(r300); @@ -251,6 +383,11 @@ void r300_draw_range_elements(struct pipe_context* pipe, r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count); + +cleanup: + if (indexBuffer != orgIndexBuffer) { + pipe->screen->buffer_destroy(indexBuffer); + } } /* Simple helpers for context setup. Should probably be moved to util. */ @@ -264,7 +401,7 @@ void r300_draw_elements(struct pipe_context* pipe, } void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count) + unsigned start, unsigned count) { struct r300_context* r300 = r300_context(pipe); @@ -281,15 +418,19 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_update_derived_state(r300); - if (!r300_setup_vertex_buffers(r300)) { - return; - } - - r300_emit_dirty_state(r300); + r300_emit_buffer_validate(r300); - r300_emit_aos(r300, start); + if (immd_is_good_idea(r300, count)) { + r300_emit_draw_arrays_immediate(r300, mode, start, count); + } else { + if (!r300_setup_vertex_buffers(r300)) { + return; + } - r300_emit_draw_arrays(r300, mode, count); + r300_emit_dirty_state(r300); + r300_emit_aos(r300, start); + r300_emit_draw_arrays(r300, mode, count); + } } /**************************************************************************** @@ -321,6 +462,7 @@ void r300_swtcl_draw_arrays(struct pipe_context* pipe, draw_set_mapped_constant_buffer(r300->draw, PIPE_SHADER_VERTEX, + 0, r300->shader_constants[PIPE_SHADER_VERTEX].constants, r300->shader_constants[PIPE_SHADER_VERTEX].count * (sizeof(float) * 4)); @@ -365,6 +507,7 @@ void r300_swtcl_draw_range_elements(struct pipe_context* pipe, draw_set_mapped_constant_buffer(r300->draw, PIPE_SHADER_VERTEX, + 0, r300->shader_constants[PIPE_SHADER_VERTEX].constants, r300->shader_constants[PIPE_SHADER_VERTEX].count * (sizeof(float) * 4)); @@ -416,7 +559,7 @@ r300_render_get_vertex_info(struct vbuf_render* render) r300_update_derived_state(r300); - return &r300->vertex_info->vinfo; + return (struct vertex_info*)r300->vertex_format_state.state; } static boolean r300_render_allocate_vertices(struct vbuf_render* render, diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 287664b1d2..da4ec542ad 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -20,7 +20,7 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_simple_screen.h" @@ -30,6 +30,7 @@ #include "r300_texture.h" #include "radeon_winsys.h" +#include "r300_winsys.h" /* Return the identifier behind whom the brave coders responsible for this * amalgamation of code, sweat, and duct tape, routinely obscure their names. @@ -113,6 +114,8 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) * ~ C. */ return 1; + case PIPE_CAP_DUAL_SOURCE_BLEND: + return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: @@ -149,6 +152,20 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) } else { return 0; } + case PIPE_CAP_INDEP_BLEND_ENABLE: + if (r300screen->caps->is_r500) { + return 1; + } else { + return 0; + } + case PIPE_CAP_INDEP_BLEND_FUNC: + return 0; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; default: debug_printf("r300: Implementation error: Bad param %d\n", param); @@ -183,10 +200,20 @@ static float r300_get_paramf(struct pipe_screen* pscreen, int param) } } -static boolean check_tex_format(enum pipe_format format, uint32_t usage, - boolean is_r500) +static boolean r300_is_format_supported(struct pipe_screen* screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned usage, + unsigned geom_flags) { uint32_t retval = 0; + boolean is_r500 = r300_screen(screen)->caps->is_r500; + + if (target >= PIPE_MAX_TEXTURE_TYPES) { + debug_printf("r300: Implementation error: Received bogus texture " + "target %d in %s\n", target, __FUNCTION__); + return FALSE; + } switch (format) { /* Supported formats. */ @@ -194,6 +221,8 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage, case PIPE_FORMAT_A4R4G4B4_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_L8_UNORM: retval = usage & (PIPE_TEXTURE_USAGE_RENDER_TARGET | PIPE_TEXTURE_USAGE_DISPLAY_TARGET | @@ -208,7 +237,8 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage, case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: case PIPE_FORMAT_YCBCR: - case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: + case PIPE_FORMAT_A8L8_SRGB: case PIPE_FORMAT_A8L8_UNORM: retval = usage & PIPE_TEXTURE_USAGE_SAMPLER; break; @@ -247,28 +277,13 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage, case PIPE_FORMAT_Z32_UNORM: case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: - debug_printf("r300: Note: Got unsupported format: %s in %s\n", - pf_name(format), __FUNCTION__); + SCREEN_DBG(r300_screen(screen), DBG_TEX, + "r300: Note: Got unsupported format: %s in %s\n", + pf_name(format), __FUNCTION__); return FALSE; - /* XXX These don't even exist - case PIPE_FORMAT_A32R32G32B32: - case PIPE_FORMAT_A16R16G16B16: */ - /* XXX What the deuce is UV88? (r3xx accel page 14) - debug_printf("r300: Warning: Got unimplemented format: %s in %s\n", - pf_name(format), __FUNCTION__); - return FALSE; */ - - /* XXX Supported yet unimplemented r5xx formats: */ - /* XXX Again, what is UV1010 this time? (r5xx accel page 148) */ - /* XXX Even more that don't exist - case PIPE_FORMAT_A10R10G10B10_UNORM: - case PIPE_FORMAT_A2R10G10B10_UNORM: - case PIPE_FORMAT_I10_UNORM: - debug_printf( - "r300: Warning: Got unimplemented r500 format: %s in %s\n", - pf_name(format), __FUNCTION__); - return FALSE; */ + /* XXX Add all remaining gallium-supported formats, + * see util/u_format.csv. */ default: /* Unknown format... */ @@ -286,30 +301,6 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage, return (retval >= usage); } -static boolean r300_is_format_supported(struct pipe_screen* pscreen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned tex_usage, - unsigned geom_flags) -{ - switch (target) { - case PIPE_TEXTURE_1D: /* handle 1D textures as 2D ones */ - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_3D: - case PIPE_TEXTURE_CUBE: - return check_tex_format(format, tex_usage, - r300_screen(pscreen)->caps->is_r500); - - default: - debug_printf("r300: Fatal: This is not a format target: %d\n", - target); - assert(0); - break; - } - - return FALSE; -} - static struct pipe_transfer* r300_get_tex_transfer(struct pipe_screen *screen, struct pipe_texture *texture, @@ -319,6 +310,7 @@ r300_get_tex_transfer(struct pipe_screen *screen, { struct r300_texture *tex = (struct r300_texture *)texture; struct r300_transfer *trans; + struct r300_screen *rscreen = r300_screen(screen); unsigned offset; offset = r300_texture_get_offset(tex, level, zslice, face); /* in bytes */ @@ -330,11 +322,8 @@ r300_get_tex_transfer(struct pipe_screen *screen, trans->transfer.y = y; trans->transfer.width = w; trans->transfer.height = h; - trans->transfer.stride = r300_texture_get_stride(tex, level); + trans->transfer.stride = r300_texture_get_stride(rscreen, tex, level); trans->transfer.usage = usage; - - /* XXX not sure whether it's required to set these two, - the driver doesn't use them */ trans->transfer.zslice = zslice; trans->transfer.face = face; @@ -389,16 +378,21 @@ struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) struct r300_screen* r300screen = CALLOC_STRUCT(r300_screen); struct r300_capabilities* caps = CALLOC_STRUCT(r300_capabilities); - if (!r300screen || !caps) + if (!r300screen || !caps) { + FREE(r300screen); + FREE(caps); return NULL; + } caps->pci_id = radeon_winsys->pci_id; caps->num_frag_pipes = radeon_winsys->gb_pipes; caps->num_z_pipes = radeon_winsys->z_pipes; + r300_init_debug(r300screen); r300_parse_chipset(caps); r300screen->caps = caps; + r300screen->radeon_winsys = radeon_winsys; r300screen->screen.winsys = (struct pipe_winsys*)radeon_winsys; r300screen->screen.destroy = r300_destroy_screen; r300screen->screen.get_name = r300_get_name; @@ -406,6 +400,7 @@ struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) r300screen->screen.get_param = r300_get_param; r300screen->screen.get_paramf = r300_get_paramf; r300screen->screen.is_format_supported = r300_is_format_supported; + r300screen->screen.context_create = r300_create_context; r300screen->screen.get_tex_transfer = r300_get_tex_transfer; r300screen->screen.tex_transfer_destroy = r300_tex_transfer_destroy; r300screen->screen.transfer_map = r300_transfer_map; diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 2217988add..502fbfa5a2 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -33,8 +33,13 @@ struct r300_screen { /* Parent class */ struct pipe_screen screen; + struct radeon_winsys* radeon_winsys; + /* Chipset capabilities */ struct r300_capabilities* caps; + + /** Combination of DBG_xxx flags */ + unsigned debug; }; struct r300_transfer { @@ -57,7 +62,44 @@ r300_transfer(struct pipe_transfer* transfer) return (struct r300_transfer*)transfer; } -/* Creates a new r300 screen. */ -struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys); +/* Debug functionality. */ + +/** + * Debug flags to disable/enable certain groups of debugging outputs. + * + * \note These may be rather coarse, and the grouping may be impractical. + * If you find, while debugging the driver, that a different grouping + * of these flags would be beneficial, just feel free to change them + * but make sure to update the documentation in r300_debug.c to reflect + * those changes. + */ +/*@{*/ +#define DBG_HELP 0x0000001 +#define DBG_FP 0x0000002 +#define DBG_VP 0x0000004 +#define DBG_CS 0x0000008 +#define DBG_DRAW 0x0000010 +#define DBG_TEX 0x0000020 +#define DBG_FALL 0x0000040 +/*@}*/ + +static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) +{ + return (screen->debug & flags) ? TRUE : FALSE; +} + +static INLINE void SCREEN_DBG(struct r300_screen * screen, unsigned flags, + const char * fmt, ...) +{ + if (SCREEN_DBG_ON(screen, flags)) { + va_list va; + va_start(va, fmt); + debug_vprintf(fmt, va); + va_end(va); + } +} + +void r300_init_debug(struct r300_screen* ctx); #endif /* R300_SCREEN_H */ + diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 435e613ddf..d07e90860c 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -30,7 +30,6 @@ #include "tgsi/tgsi_parse.h" #include "pipe/p_config.h" -#include "pipe/internal/p_winsys_screen.h" #include "r300_context.h" #include "r300_reg.h" @@ -156,23 +155,33 @@ static boolean blend_discard_if_src_alpha_color_1(unsigned srcRGB, unsigned srcA dstA == PIPE_BLENDFACTOR_ONE); } +static unsigned bgra_cmask(unsigned mask) +{ + /* Gallium uses RGBA color ordering while R300 expects BGRA. */ + + return ((mask & PIPE_MASK_R) << 2) | + ((mask & PIPE_MASK_B) >> 2) | + (mask & (PIPE_MASK_G | PIPE_MASK_A)); +} + /* Create a new blend state based on the CSO blend state. * * This encompasses alpha blending, logic/raster ops, and blend dithering. */ static void* r300_create_blend_state(struct pipe_context* pipe, const struct pipe_blend_state* state) { + struct r300_screen* r300screen = r300_screen(pipe->screen); struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state); - if (state->blend_enable) + if (state->rt[0].blend_enable) { - unsigned eqRGB = state->rgb_func; - unsigned srcRGB = state->rgb_src_factor; - unsigned dstRGB = state->rgb_dst_factor; + unsigned eqRGB = state->rt[0].rgb_func; + unsigned srcRGB = state->rt[0].rgb_src_factor; + unsigned dstRGB = state->rt[0].rgb_dst_factor; - unsigned eqA = state->alpha_func; - unsigned srcA = state->alpha_src_factor; - unsigned dstA = state->alpha_dst_factor; + unsigned eqA = state->rt[0].alpha_func; + unsigned srcA = state->rt[0].alpha_src_factor; + unsigned dstA = state->rt[0].alpha_dst_factor; /* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha, * this is just the crappy D3D naming */ @@ -289,18 +298,18 @@ static void* r300_create_blend_state(struct pipe_context* pipe, (state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT; } - /* Color Channel Mask */ - if (state->colormask & PIPE_MASK_R) { - blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_RED_MASK0; - } - if (state->colormask & PIPE_MASK_G) { - blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0; - } - if (state->colormask & PIPE_MASK_B) { - blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0; - } - if (state->colormask & PIPE_MASK_A) { - blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0; + /* Color channel masks for all MRTs. */ + blend->color_channel_mask = bgra_cmask(state->rt[0].colormask); + if (r300screen->caps->is_r500 && state->independent_blend_enable) { + if (state->rt[1].blend_enable) { + blend->color_channel_mask |= bgra_cmask(state->rt[1].colormask) << 4; + } + if (state->rt[2].blend_enable) { + blend->color_channel_mask |= bgra_cmask(state->rt[2].colormask) << 8; + } + if (state->rt[3].blend_enable) { + blend->color_channel_mask |= bgra_cmask(state->rt[3].colormask) << 12; + } } if (state->dither) { @@ -340,6 +349,7 @@ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); struct r300_blend_color_state* state = (struct r300_blend_color_state*)r300->blend_color_state.state; union util_color uc; @@ -355,6 +365,7 @@ static void r300_set_blend_color(struct pipe_context* pipe, float_to_fixed10(color->color[2]) | (float_to_fixed10(color->color[1]) << 16); + r300->blend_color_state.size = r300screen->caps->is_r500 ? 3 : 2; r300->blend_color_state.dirty = TRUE; } @@ -365,11 +376,14 @@ static void r300_set_clip_state(struct pipe_context* pipe, if (r300_screen(pipe->screen)->caps->has_tcl) { memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state)); - r300->clip_state.dirty = TRUE; + r300->clip_state.size = 29; } else { draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); + r300->clip_state.size = 2; } + + r300->clip_state.dirty = TRUE; } /* Create a new depth, stencil, and alpha state based on the CSO dsa state. @@ -427,7 +441,6 @@ static void* (r300_translate_stencil_op(state->stencil[1].zfail_op) << R300_S_BACK_ZFAIL_OP_SHIFT); - /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ if (caps->is_r500) { dsa->z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK; @@ -446,8 +459,7 @@ static void* r300_translate_alpha_function(state->alpha.func) | R300_FG_ALPHA_FUNC_ENABLE; - /* XXX figure out why emitting 10bit alpha ref causes CS to dump */ - /* always use 8bit alpha ref */ + /* We could use 10bit alpha ref but who needs that? */ dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value); if (caps->is_r500) @@ -462,8 +474,10 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, void* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); r300->dsa_state.state = state; + r300->dsa_state.size = r300screen->caps->is_r500 ? 8 : 6; r300->dsa_state.dirty = TRUE; } @@ -474,56 +488,52 @@ static void r300_delete_dsa_state(struct pipe_context* pipe, FREE(state); } -static void r300_set_scissor_regs(const struct pipe_scissor_state* state, - struct r300_scissor_regs *scissor, - boolean is_r500) -{ - if (is_r500) { - scissor->top_left = - (state->minx << R300_SCISSORS_X_SHIFT) | - (state->miny << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) | - ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT); - } else { - /* Offset of 1440 in non-R500 chipsets. */ - scissor->top_left = - ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | - ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | - (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); - } -} - static void r300_set_framebuffer_state(struct pipe_context* pipe, const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); - struct r300_scissor_state* scissor = - (struct r300_scissor_state*)r300->scissor_state.state; - struct pipe_scissor_state pscissor; + uint32_t zbuffer_bpp = 0; + + r300->fb_state.size = (10 * state->nr_cbufs) + + (2 * (4 - state->nr_cbufs)) + + (state->zsbuf ? 10 : 0) + 6; + + if (state->nr_cbufs > 4) { + debug_printf("r300: Implementation error: Too many MRTs in %s, " + "refusing to bind framebuffer state!\n", __FUNCTION__); + return; + } if (r300->draw) { draw_flush(r300->draw); } - r300->framebuffer_state = *state; - - /* XXX Arg. This is silly. */ - pscissor.minx = pscissor.miny = 0; - pscissor.maxx = state->width; - pscissor.maxy = state->height; - r300_set_scissor_regs(&pscissor, &scissor->framebuffer, - r300_screen(r300->context.screen)->caps->is_r500); + memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state)); /* Don't rely on the order of states being set for the first time. */ - r300->dirty_state |= R300_NEW_FRAMEBUFFERS; - + /* XXX wait what */ r300->blend_state.dirty = TRUE; r300->dsa_state.dirty = TRUE; + r300->fb_state.dirty = TRUE; r300->scissor_state.dirty = TRUE; + + /* Polygon offset depends on the zbuffer bit depth. */ + if (state->zsbuf && r300->polygon_offset_enabled) { + switch (util_format_get_blocksize(state->zsbuf->texture->format)) { + case 2: + zbuffer_bpp = 16; + break; + case 4: + zbuffer_bpp = 24; + break; + } + + if (r300->zbuffer_bpp != zbuffer_bpp) { + r300->zbuffer_bpp = zbuffer_bpp; + r300->rs_state.dirty = TRUE; + } + } } /* Create fragment shader state. */ @@ -559,7 +569,7 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300_pick_fragment_shader(r300); if (r300->vs && r300_vertex_shader_setup_wpos(r300)) { - r300->dirty_state |= R300_NEW_VERTEX_FORMAT; + r300->vertex_format_state.dirty = TRUE; } r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS; @@ -629,9 +639,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->line_control = pack_float_16_6x(state->line_width) | R300_GA_LINE_CNTL_END_TYPE_COMP; - /* XXX I think there is something wrong with the polygon mode, - * XXX re-test when r300g is in a better shape */ - /* Enable polygon mode */ if (state->fill_cw != PIPE_POLYGON_MODE_FILL || state->fill_ccw != PIPE_POLYGON_MODE_FILL) { @@ -684,10 +691,8 @@ static void* r300_create_rs_state(struct pipe_context* pipe, } if (rs->polygon_offset_enable) { - rs->depth_offset_front = rs->depth_offset_back = - fui(state->offset_units); - rs->depth_scale_front = rs->depth_scale_back = - fui(state->offset_scale); + rs->depth_offset = state->offset_units; + rs->depth_scale = state->offset_scale; } if (state->line_stipple_enable) { @@ -719,7 +724,13 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) draw_set_rasterizer_state(r300->draw, &rs->rs); } - r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; + if (rs) { + r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; + r300->polygon_offset_enabled = rs->rs.offset_cw || rs->rs.offset_ccw; + } else { + r300->tcl_bypass = FALSE; + r300->polygon_offset_enabled = FALSE; + } r300->rs_state.state = rs; r300->rs_state.dirty = TRUE; @@ -728,7 +739,6 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) r300->viewport_state.dirty = TRUE; /* XXX Clean these up when we move to atom emits */ - r300->dirty_state |= R300_NEW_RS_BLOCK; if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) { r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; } @@ -866,11 +876,9 @@ static void r300_set_scissor_state(struct pipe_context* pipe, const struct pipe_scissor_state* state) { struct r300_context* r300 = r300_context(pipe); - struct r300_scissor_state* scissor = - (struct r300_scissor_state*)r300->scissor_state.state; - r300_set_scissor_regs(state, &scissor->scissor, - r300_screen(r300->context.screen)->caps->is_r500); + memcpy(r300->scissor_state.state, state, + sizeof(struct pipe_scissor_state)); r300->scissor_state.dirty = TRUE; } @@ -931,7 +939,23 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, draw_set_vertex_buffers(r300->draw, count, buffers); } - r300->dirty_state |= R300_NEW_VERTEX_FORMAT; + r300->vertex_format_state.dirty = TRUE; +} + +static boolean r300_validate_aos(struct r300_context *r300) +{ + struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; + struct pipe_vertex_element *velem = r300->vertex_element; + int i; + + /* Check if formats and strides are aligned to the size of DWORD. */ + for (i = 0; i < r300->vertex_element_count; i++) { + if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 || + util_format_get_blocksize(velem[i].src_format) % 4 != 0) { + return FALSE; + } + } + return TRUE; } static void r300_set_vertex_elements(struct pipe_context* pipe, @@ -949,6 +973,12 @@ static void r300_set_vertex_elements(struct pipe_context* pipe, draw_flush(r300->draw); draw_set_vertex_elements(r300->draw, count, elements); } + + if (!r300_validate_aos(r300)) { + /* XXX We should fallback using draw. */ + assert(0); + abort(); + } } static void* r300_create_vs_state(struct pipe_context* pipe, @@ -989,9 +1019,10 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) r300_vertex_shader_setup_wpos(r300); } + r300->vertex_format_state.dirty = TRUE; + r300->dirty_state |= - R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS | - R300_NEW_VERTEX_FORMAT; + R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS; } else { draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, @@ -1017,22 +1048,22 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) static void r300_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct r300_context* r300 = r300_context(pipe); void *mapped; - if (buf == NULL || buf->buffer->size == 0 || - (mapped = pipe_buffer_map(pipe->screen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)) == NULL) + if (buf == NULL || buf->size == 0 || + (mapped = pipe_buffer_map(pipe->screen, buf, PIPE_BUFFER_USAGE_CPU_READ)) == NULL) { r300->shader_constants[shader].count = 0; return; } - assert((buf->buffer->size % 4 * sizeof(float)) == 0); - memcpy(r300->shader_constants[shader].constants, mapped, buf->buffer->size); - r300->shader_constants[shader].count = buf->buffer->size / (4 * sizeof(float)); - pipe_buffer_unmap(pipe->screen, buf->buffer); + assert((buf->size % 4 * sizeof(float)) == 0); + memcpy(r300->shader_constants[shader].constants, mapped, buf->size); + r300->shader_constants[shader].count = buf->size / (4 * sizeof(float)); + pipe_buffer_unmap(pipe->screen, buf); if (shader == PIPE_SHADER_VERTEX) r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 192846411b..bad9e76067 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -37,32 +37,6 @@ /* r300_state_derived: Various bits of state which are dependent upon * currently bound CSO data. */ -struct r300_shader_key { - struct r300_vertex_shader* vs; - struct r300_fragment_shader* fs; -}; - -struct r300_shader_derived_value { - struct r300_vertex_format* vformat; - struct r300_rs_block* rs_block; -}; - -unsigned r300_shader_key_hash(void* key) { - struct r300_shader_key* shader_key = (struct r300_shader_key*)key; - unsigned vs = (intptr_t)shader_key->vs; - unsigned fs = (intptr_t)shader_key->fs; - - return (vs << 16) | (fs & 0xffff); -} - -int r300_shader_key_compare(void* key1, void* key2) { - struct r300_shader_key* shader_key1 = (struct r300_shader_key*)key1; - struct r300_shader_key* shader_key2 = (struct r300_shader_key*)key2; - - return (shader_key1->vs == shader_key2->vs) && - (shader_key1->fs == shader_key2->fs); -} - static void r300_draw_emit_attrib(struct r300_context* r300, enum attrib_emit emit, enum interp_mode interp, @@ -74,7 +48,9 @@ static void r300_draw_emit_attrib(struct r300_context* r300, output = draw_find_shader_output(r300->draw, info->output_semantic_name[index], info->output_semantic_index[index]); - draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output); + draw_emit_vertex_attr( + (struct vertex_info*)r300->vertex_format_state.state, + emit, interp, output); } static void r300_draw_emit_all_attribs(struct r300_context* r300) @@ -130,7 +106,8 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300) /* Update the PSC tables. */ static void r300_vertex_psc(struct r300_context* r300) { - struct r300_vertex_info *vformat = r300->vertex_info; + struct r300_vertex_info *vformat = + (struct r300_vertex_info*)r300->vertex_format_state.state; uint16_t type, swizzle; enum pipe_format format; unsigned i; @@ -182,7 +159,8 @@ static void r300_vertex_psc(struct r300_context* r300) /* Update the PSC tables for SW TCL, using Draw. */ static void r300_swtcl_vertex_psc(struct r300_context* r300) { - struct r300_vertex_info *vformat = r300->vertex_info; + struct r300_vertex_info *vformat = + (struct r300_vertex_info*)r300->vertex_format_state.state; struct vertex_info* vinfo = &vformat->vinfo; uint16_t type, swizzle; enum pipe_format format; @@ -327,7 +305,7 @@ static void r300_update_rs_block(struct r300_context* r300, struct r300_shader_semantics* vs_outputs, struct r300_shader_semantics* fs_inputs) { - struct r300_rs_block* rs = r300->rs_block; + struct r300_rs_block rs = { { 0 } }; int i, col_count = 0, tex_count = 0, fp_offset = 0; void (*rX00_rs_col)(struct r300_rs_block*, int, int, boolean); void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); @@ -350,14 +328,15 @@ static void r300_update_rs_block(struct r300_context* r300, /* Rasterize colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || + vs_outputs->color[1] != ATTR_UNUSED) { /* Always rasterize if it's written by the VS, * otherwise it locks up. */ - rX00_rs_col(rs, col_count, i, FALSE); + rX00_rs_col(&rs, col_count, i, FALSE); /* Write it to the FS input register if it's used by the FS. */ if (fs_inputs->color[i] != ATTR_UNUSED) { - rX00_rs_col_write(rs, col_count, fp_offset); + rX00_rs_col_write(&rs, col_count, fp_offset); fp_offset++; } col_count++; @@ -375,11 +354,11 @@ static void r300_update_rs_block(struct r300_context* r300, if (vs_outputs->generic[i] != ATTR_UNUSED) { /* Always rasterize if it's written by the VS, * otherwise it locks up. */ - rX00_rs_tex(rs, tex_count, tex_count, FALSE); + rX00_rs_tex(&rs, tex_count, tex_count, FALSE); /* Write it to the FS input register if it's used by the FS. */ if (fs_inputs->generic[i] != ATTR_UNUSED) { - rX00_rs_tex_write(rs, tex_count, fp_offset); + rX00_rs_tex_write(&rs, tex_count, fp_offset); fp_offset++; } tex_count++; @@ -396,11 +375,11 @@ static void r300_update_rs_block(struct r300_context* r300, if (vs_outputs->fog != ATTR_UNUSED) { /* Always rasterize if it's written by the VS, * otherwise it locks up. */ - rX00_rs_tex(rs, tex_count, tex_count, TRUE); + rX00_rs_tex(&rs, tex_count, tex_count, TRUE); /* Write it to the FS input register if it's used by the FS. */ if (fs_inputs->fog != ATTR_UNUSED) { - rX00_rs_tex_write(rs, tex_count, fp_offset); + rX00_rs_tex_write(&rs, tex_count, fp_offset); fp_offset++; } tex_count++; @@ -415,8 +394,8 @@ static void r300_update_rs_block(struct r300_context* r300, /* Rasterize WPOS. */ /* If the FS doesn't need it, it's not written by the VS. */ if (fs_inputs->wpos != ATTR_UNUSED) { - rX00_rs_tex(rs, tex_count, tex_count, FALSE); - rX00_rs_tex_write(rs, tex_count, fp_offset); + rX00_rs_tex(&rs, tex_count, tex_count, FALSE); + rX00_rs_tex_write(&rs, tex_count, fp_offset); fp_offset++; tex_count++; @@ -424,51 +403,33 @@ static void r300_update_rs_block(struct r300_context* r300, /* Rasterize at least one color, or bad things happen. */ if (col_count == 0 && tex_count == 0) { - rX00_rs_col(rs, 0, 0, TRUE); + rX00_rs_col(&rs, 0, 0, TRUE); col_count++; } - rs->count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) | + rs.count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; - rs->inst_count = MAX3(col_count - 1, tex_count - 1, 0); + rs.inst_count = MAX3(col_count - 1, tex_count - 1, 0); + + /* Now, after all that, see if we actually need to update the state. */ + if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) { + memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block)); + r300->rs_block_state.dirty = TRUE; + } } -/* Update the vertex format. */ +/* Update the shader-dependant states. */ static void r300_update_derived_shader_state(struct r300_context* r300) { struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct r300_vertex_info *vformat = + (struct r300_vertex_info*)r300->vertex_format_state.state; + struct vertex_info* vinfo = &vformat->vinfo; - /* - struct r300_shader_key* key; - struct r300_shader_derived_value* value; - key = CALLOC_STRUCT(r300_shader_key); - key->vs = r300->vs; - key->fs = r300->fs; - - value = (struct r300_shader_derived_value*) - util_hash_table_get(r300->shader_hash_table, (void*)key); - if (value) { - //vformat = value->vformat; - rs_block = value->rs_block; - - FREE(key); - } else { - rs_block = CALLOC_STRUCT(r300_rs_block); - value = CALLOC_STRUCT(r300_shader_derived_value); - - r300_update_rs_block(r300, rs_block); - - //value->vformat = vformat; - value->rs_block = rs_block; - util_hash_table_set(r300->shader_hash_table, - (void*)key, (void*)value); - } */ - - /* Reset structures */ - memset(r300->rs_block, 0, sizeof(struct r300_rs_block)); - memset(r300->vertex_info, 0, sizeof(struct r300_vertex_info)); - memcpy(r300->vertex_info->vinfo.hwfmt, r300->vs->hwfmt, sizeof(uint)*4); + /* Mmm, delicious hax */ + memset(r300->vertex_format_state.state, 0, sizeof(struct r300_vertex_info)); + memcpy(vinfo->hwfmt, r300->vs->hwfmt, sizeof(uint)*4); r300_update_rs_block(r300, &r300->vs->outputs, &r300->fs->inputs); @@ -476,11 +437,10 @@ static void r300_update_derived_shader_state(struct r300_context* r300) r300_vertex_psc(r300); } else { r300_draw_emit_all_attribs(r300); - draw_compute_vertex_size(&r300->vertex_info->vinfo); + draw_compute_vertex_size( + (struct vertex_info*)r300->vertex_format_state.state); r300_swtcl_vertex_psc(r300); } - - r300->dirty_state |= R300_NEW_RS_BLOCK; } static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) @@ -558,8 +518,8 @@ void r300_update_derived_state(struct r300_context* r300) { /* XXX */ if (r300->dirty_state & - (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER | - R300_NEW_VERTEX_FORMAT) || r300->rs_state.dirty) { + (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER) || + r300->vertex_format_state.dirty || r300->rs_state.dirty) { r300_update_derived_shader_state(r300); } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 35be00e1b0..5df6815221 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -81,9 +81,6 @@ static INLINE uint32_t r300_translate_blend_factor(int blend_fact) return R300_BLEND_GL_CONST_COLOR; case PIPE_BLENDFACTOR_CONST_ALPHA: return R300_BLEND_GL_CONST_ALPHA; - /* XXX WTF are these? - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: */ case PIPE_BLENDFACTOR_ZERO: return R300_BLEND_GL_ZERO; case PIPE_BLENDFACTOR_INV_SRC_COLOR: @@ -98,9 +95,16 @@ static INLINE uint32_t r300_translate_blend_factor(int blend_fact) return R300_BLEND_GL_ONE_MINUS_CONST_COLOR; case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return R300_BLEND_GL_ONE_MINUS_CONST_ALPHA; - /* XXX see above + + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: */ + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + debug_printf("r300: Implementation error: " + "Bad blend factor %d not supported!\n", blend_fact); + assert(0); + break; + default: debug_printf("r300: Unknown blend factor %d\n", blend_fact); assert(0); @@ -331,7 +335,10 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) { switch (format) { /* 8-bit buffers */ + case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + /* case PIPE_FORMAT_S8_UNORM: ??? */ return R300_COLOR_FORMAT_I8; /* 16-bit buffers */ case PIPE_FORMAT_R5G6B5_UNORM: @@ -408,6 +415,16 @@ static INLINE uint32_t r300_translate_out_fmt(enum pipe_format format) return R300_US_OUT_FMT_C4_8 | R300_C0_SEL_A | R300_C1_SEL_B | R300_C2_SEL_G | R300_C3_SEL_R; + + /* 8-bit outputs */ + case PIPE_FORMAT_A8_UNORM: + return R300_US_OUT_FMT_C4_8 | + R300_C0_SEL_A; + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + return R300_US_OUT_FMT_C4_8 | + R300_C0_SEL_R; + /* R300_OUT_SIGN(x) */ default: debug_printf("r300: Implementation error: " "Got unsupported output format %s in %s\n", @@ -537,6 +554,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { static INLINE uint16_t r300_translate_vertex_data_swizzle(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); + unsigned swizzle[4], i; assert(format); @@ -547,11 +565,26 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) { return 0; } - return ((desc->swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | - (desc->swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | - (desc->swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | - (desc->swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) | - (0xf << R300_WRITE_ENA_SHIFT)); + /* Swizzles for 8bits formats are in the reversed order, not sure why. */ + if (desc->channel[0].size == 8) { + for (i = 0; i < 4; i++) { + if (desc->swizzle[i] <= 3) { + swizzle[i] = 3 - desc->swizzle[i]; + } else { + swizzle[i] = desc->swizzle[i]; + } + } + } else { + for (i = 0; i < 4; i++) { + swizzle[i] = desc->swizzle[i]; + } + } + + return ((swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | + (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | + (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | + (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) | + (0xf << R300_WRITE_ENA_SHIFT)); } #endif /* R300_STATE_INLINES_H */ diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index f25f3ca217..97927acf1b 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -38,12 +38,12 @@ struct pipe_viewport_state r300_viewport_identity = { * * Note that eventually this should be empty, but it's useful for development * and general unduplication of code. */ -void r300_emit_invariant_state(struct r300_context* r300) +void r300_emit_invariant_state(struct r300_context* r300, void* state) { struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - BEGIN_CS(16 + (caps->has_tcl ? 2: 0)); + BEGIN_CS(14 + (caps->has_tcl ? 2: 0)); /*** Graphics Backend (GB) ***/ /* Various GB enables */ @@ -58,8 +58,6 @@ void r300_emit_invariant_state(struct r300_context* r300) */ /* Source of fog depth */ OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); - /* AA enable */ - OUT_CS_REG(R300_GB_AA_CONFIG, 0x0); /*** Fog (FG) ***/ OUT_CS_REG(R300_FG_FOG_BLEND, 0x0); @@ -79,7 +77,8 @@ void r300_emit_invariant_state(struct r300_context* r300) END_CS; /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(44 + (caps->has_tcl ? 7 : 0) + (caps->is_r500 ? 4 : 0)); + BEGIN_CS(44 + (caps->has_tcl ? 7 : 0) + + (caps->family >= CHIP_FAMILY_RV350 ? 4 : 0)); if (caps->has_tcl) { /*Flushing PVS is required before the VAP_GB registers can be changed*/ @@ -115,10 +114,12 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); - if (caps->is_r500) { + + if (caps->family >= CHIP_FAMILY_RV350) { OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); } + OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); diff --git a/src/gallium/drivers/r300/r300_state_invariant.h b/src/gallium/drivers/r300/r300_state_invariant.h index 05cff0d6df..5d1a963654 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.h +++ b/src/gallium/drivers/r300/r300_state_invariant.h @@ -25,6 +25,6 @@ struct r300_context; -void r300_emit_invariant_state(struct r300_context* r300); +void r300_emit_invariant_state(struct r300_context* r300, void* state); #endif /* R300_STATE_INVARIANT_H */ diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 9a96206a4d..67bf8ce13f 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -30,10 +30,25 @@ #include "r300_texture.h" #include "r300_screen.h" -static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) +#include "radeon_winsys.h" + +#define TILE_WIDTH 0 +#define TILE_HEIGHT 1 + +static const unsigned microblock_table[5][3][2] = { + /*linear tiled square-tiled */ + {{32, 1}, {8, 4}, {0, 0}}, /* 8 bits per pixel */ + {{16, 1}, {8, 2}, {4, 4}}, /* 16 bits per pixel */ + {{ 8, 1}, {4, 2}, {0, 0}}, /* 32 bits per pixel */ + {{ 4, 1}, {0, 0}, {2, 2}}, /* 64 bits per pixel */ + {{ 2, 1}, {0, 0}, {0, 0}} /* 128 bits per pixel */ +}; + +static void r300_setup_texture_state(struct r300_screen* screen, struct r300_texture* tex) { struct r300_texture_state* state = &tex->state; struct pipe_texture *pt = &tex->tex; + boolean is_r500 = screen->caps->is_r500; state->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) | R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff); @@ -67,8 +82,8 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) } assert(is_r500 || (pt->width0 <= 2048 && pt->height0 <= 2048)); - debug_printf("r300: Set texture state (%dx%d, %d levels)\n", - pt->width0, pt->height0, pt->last_level); + SCREEN_DBG(screen, DBG_TEX, "r300: Set texture state (%dx%d, %d levels)\n", + pt->width0, pt->height0, pt->last_level); } unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, @@ -92,33 +107,78 @@ unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, } /** + * Return the width (dim==TILE_WIDTH) or height (dim==TILE_HEIGHT) of one tile + * of the given texture. + */ +static unsigned r300_texture_get_tile_size(struct r300_texture* tex, int dim) +{ + unsigned pixsize, tile_size; + + pixsize = util_format_get_blocksize(tex->tex.format); + tile_size = microblock_table[util_logbase2(pixsize)][tex->microtile][dim] * + (tex->macrotile == R300_BUFFER_TILED ? 8 : 1); + + assert(tile_size); + return tile_size; +} + +/** * Return the stride, in bytes, of the texture images of the given texture * at the given level. */ -unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) +unsigned r300_texture_get_stride(struct r300_screen* screen, + struct r300_texture* tex, unsigned level) { + unsigned tile_width, width; + if (tex->stride_override) return tex->stride_override; + /* Check the level. */ if (level > tex->tex.last_level) { - debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, - level, tex->tex.last_level); + SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", + __FUNCTION__, level, tex->tex.last_level); return 0; } - return align(util_format_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32); + width = u_minify(tex->tex.width0, level); + + if (!util_format_is_compressed(tex->tex.format)) { + tile_width = r300_texture_get_tile_size(tex, TILE_WIDTH); + width = align(width, tile_width); + return util_format_get_stride(tex->tex.format, width); + } else { + return align(util_format_get_stride(tex->tex.format, width), 32); + } } -static void r300_setup_miptree(struct r300_texture* tex) +static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, + unsigned level) +{ + unsigned height, tile_height; + + height = u_minify(tex->tex.height0, level); + + if (!util_format_is_compressed(tex->tex.format)) { + tile_height = r300_texture_get_tile_size(tex, TILE_HEIGHT); + height = align(height, tile_height); + } + + return util_format_get_nblocksy(tex->tex.format, height); +} + +static void r300_setup_miptree(struct r300_screen* screen, + struct r300_texture* tex) { struct pipe_texture* base = &tex->tex; - int stride, size, layer_size; - int i; + unsigned stride, size, layer_size, nblocksy, i; - for (i = 0; i <= base->last_level; i++) { - unsigned nblocksy = util_format_get_nblocksy(base->format, u_minify(base->height0, i)); + SCREEN_DBG(screen, DBG_TEX, "r300: Making miptree for texture, format %s\n", + pf_name(base->format)); - stride = r300_texture_get_stride(tex, i); + for (i = 0; i <= base->last_level; i++) { + stride = r300_texture_get_stride(screen, tex, i); + nblocksy = r300_texture_get_nblocksy(tex, i); layer_size = stride * nblocksy; if (base->target == PIPE_TEXTURE_CUBE) @@ -131,10 +191,10 @@ static void r300_setup_miptree(struct r300_texture* tex) tex->layer_size[i] = layer_size; tex->pitch[i] = stride / util_format_get_blocksize(base->format); - debug_printf("r300: Texture miptree: Level %d " - "(%dx%dx%d px, pitch %d bytes)\n", + SCREEN_DBG(screen, DBG_TEX, "r300: Texture miptree: Level %d " + "(%dx%dx%d px, pitch %d bytes) %d bytes total\n", i, u_minify(base->width0, i), u_minify(base->height0, i), - u_minify(base->depth0, i), stride); + u_minify(base->depth0, i), stride, tex->size); } } @@ -150,6 +210,8 @@ static struct pipe_texture* const struct pipe_texture* template) { struct r300_texture* tex = CALLOC_STRUCT(r300_texture); + struct r300_screen* rscreen = r300_screen(screen); + struct radeon_winsys* winsys = (struct radeon_winsys*)screen->winsys; if (!tex) { return NULL; @@ -160,12 +222,16 @@ static struct pipe_texture* tex->tex.screen = screen; r300_setup_flags(tex); - r300_setup_miptree(tex); - r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); + r300_setup_miptree(rscreen, tex); + r300_setup_texture_state(rscreen, tex); - tex->buffer = screen->buffer_create(screen, 1024, + tex->buffer = screen->buffer_create(screen, 2048, PIPE_BUFFER_USAGE_PIXEL, tex->size); + winsys->buffer_set_tiling(winsys, tex->buffer, + tex->pitch[0], + tex->microtile != R300_BUFFER_LINEAR, + tex->macrotile != R300_BUFFER_LINEAR); if (!tex->buffer) { FREE(tex); @@ -227,6 +293,7 @@ static struct pipe_texture* struct pipe_buffer* buffer) { struct r300_texture* tex; + struct r300_screen* rscreen = r300_screen(screen); /* Support only 2D textures without mipmaps */ if (base->target != PIPE_TEXTURE_2D || @@ -248,7 +315,7 @@ static struct pipe_texture* tex->pitch[0] = *stride / util_format_get_blocksize(base->format); r300_setup_flags(tex); - r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); + r300_setup_texture_state(rscreen, tex); pipe_buffer_reference(&tex->buffer, buffer); @@ -315,7 +382,8 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen) screen->video_surface_destroy= r300_video_surface_destroy; } -boolean r300_get_texture_buffer(struct pipe_texture* texture, +boolean r300_get_texture_buffer(struct pipe_screen* screen, + struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride) { @@ -327,7 +395,7 @@ boolean r300_get_texture_buffer(struct pipe_texture* texture, pipe_buffer_reference(buffer, tex->buffer); if (stride) { - *stride = r300_texture_get_stride(tex, 0); + *stride = r300_texture_get_stride(r300_screen(screen), tex, 0); } return TRUE; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 55ceb1a513..961bdcc5b3 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -31,31 +31,46 @@ struct r300_texture; void r300_init_screen_texture_functions(struct pipe_screen* screen); -unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level); +unsigned r300_texture_get_stride(struct r300_screen* screen, + struct r300_texture* tex, unsigned level); unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, unsigned zslice, unsigned face); -/* Note the signature of R300_EASY_TX_FORMAT(A, R, G, B, FORMAT)... */ +/* Translate a pipe_format into a useful texture format for sampling. + * + * R300_EASY_TX_FORMAT swizzles the texture. + * Note the signature of R300_EASY_TX_FORMAT: + * R300_EASY_TX_FORMAT(B, G, R, A, FORMAT); + * + * The FORMAT specifies how the texture sampler will treat the texture, and + * makes available X, Y, Z, W, ZERO, and ONE for swizzling. */ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) { switch (format) { /* X8 */ + case PIPE_FORMAT_A8_UNORM: + return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8); case PIPE_FORMAT_I8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X8); case PIPE_FORMAT_L8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, ONE, X8); + case PIPE_FORMAT_L8_SRGB: + return R300_EASY_TX_FORMAT(X, X, X, ONE, X8) | + R300_TX_FORMAT_GAMMA; /* X16 */ case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_Z16_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X16); case PIPE_FORMAT_R16_SNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X16) | R300_TX_FORMAT_SIGNED; - case PIPE_FORMAT_Z16_UNORM: - return R300_EASY_TX_FORMAT(X, X, X, X, X16); /* Y8X8 */ case PIPE_FORMAT_A8L8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8); + case PIPE_FORMAT_A8L8_SRGB: + return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8) | + R300_TX_FORMAT_GAMMA; /* W8Z8Y8X8 */ case PIPE_FORMAT_A8R8G8B8_UNORM: return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); @@ -115,7 +130,8 @@ r300_video_surface(struct pipe_video_surface *pvs) #ifndef R300_WINSYS_H -boolean r300_get_texture_buffer(struct pipe_texture* texture, +boolean r300_get_texture_buffer(struct pipe_screen* screen, + struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index a792c2cf98..941ec17016 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -201,6 +201,8 @@ static void transform_srcreg( struct rc_src_register * dst, struct tgsi_full_src_register * src) { + unsigned i, j; + dst->File = translate_register_file(src->Register.File); dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); dst->RelAddr = src->Register.Indirect; @@ -210,6 +212,21 @@ static void transform_srcreg( dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9; dst->Abs = src->Register.Absolute; dst->Negate = src->Register.Negate ? RC_MASK_XYZW : 0; + + if (src->Register.File == TGSI_FILE_IMMEDIATE) { + for (i = 0; i < ttr->imms_to_swizzle_count; i++) { + if (ttr->imms_to_swizzle[i].index == src->Register.Index) { + dst->File = RC_FILE_TEMPORARY; + dst->Index = 0; + dst->Swizzle = 0; + for (j = 0; j < 4; j++) { + dst->Swizzle |= GET_SWZ(ttr->imms_to_swizzle[i].swizzle, + tgsi_util_get_full_src_register_swizzle(src, j)) << (j * 3); + } + break; + } + } + } } static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src, @@ -277,21 +294,45 @@ static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_inst &ttr->compiler->Program.ShadowSamplers); } -static void handle_immediate(struct tgsi_to_rc * ttr, struct tgsi_full_immediate * imm) +static void handle_immediate(struct tgsi_to_rc * ttr, + struct tgsi_full_immediate * imm, + unsigned index) { struct rc_constant constant; - int i; + unsigned swizzle = 0; + boolean can_swizzle = TRUE; + unsigned i; - constant.Type = RC_CONSTANT_IMMEDIATE; - constant.Size = 4; - for(i = 0; i < 4; ++i) - constant.u.Immediate[i] = imm->u[i].Float; - rc_constants_add(&ttr->compiler->Program.Constants, &constant); + for (i = 0; i < 4; i++) { + if (imm->u[i].Float == 0.0f) { + swizzle |= RC_SWIZZLE_ZERO << (i * 3); + } else if (imm->u[i].Float == 0.5f) { + swizzle |= RC_SWIZZLE_HALF << (i * 3); + } else if (imm->u[i].Float == 1.0f) { + swizzle |= RC_SWIZZLE_ONE << (i * 3); + } else { + can_swizzle = FALSE; + break; + } + } + + if (can_swizzle) { + ttr->imms_to_swizzle[ttr->imms_to_swizzle_count].index = index; + ttr->imms_to_swizzle[ttr->imms_to_swizzle_count].swizzle = swizzle; + ttr->imms_to_swizzle_count++; + } else { + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 4; + for(i = 0; i < 4; ++i) + constant.u.Immediate[i] = imm->u[i].Float; + rc_constants_add(&ttr->compiler->Program.Constants, &constant); + } } void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) { struct tgsi_parse_context parser; + unsigned imm_index = 0; int i; /* Allocate constants placeholders. @@ -308,6 +349,9 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) ttr->immediate_offset = ttr->compiler->Program.Constants.Count; + ttr->imms_to_swizzle = malloc(ttr->info->immediate_count * sizeof(struct swizzled_imms)); + ttr->imms_to_swizzle_count = 0; + tgsi_parse_init(&parser, tokens); while (!tgsi_parse_end_of_tokens(&parser)) { @@ -317,7 +361,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) case TGSI_TOKEN_TYPE_DECLARATION: break; case TGSI_TOKEN_TYPE_IMMEDIATE: - handle_immediate(ttr, &parser.FullToken.FullImmediate); + handle_immediate(ttr, &parser.FullToken.FullImmediate, imm_index); + imm_index++; break; case TGSI_TOKEN_TYPE_INSTRUCTION: transform_instruction(ttr, &parser.FullToken.FullInstruction); @@ -327,6 +372,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) tgsi_parse_free(&parser); + free(ttr->imms_to_swizzle); + rc_calculate_inputs_outputs(ttr->compiler); } diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h index 93e90ec6d2..39b473c7bf 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.h +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h @@ -29,11 +29,18 @@ struct tgsi_full_declaration; struct tgsi_shader_info; struct tgsi_token; +struct swizzled_imms { + unsigned index; + unsigned swizzle; +}; + struct tgsi_to_rc { struct radeon_compiler * compiler; const struct tgsi_shader_info * info; int immediate_offset; + struct swizzled_imms * imms_to_swizzle; + unsigned imms_to_swizzle_count; }; void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens); diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 68aef70872..fb81b2439b 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -61,17 +61,17 @@ static void r300_shader_read_vs_outputs( break; case TGSI_SEMANTIC_COLOR: - assert(index <= ATTR_COLOR_COUNT); + assert(index < ATTR_COLOR_COUNT); vs_outputs->color[index] = i; break; case TGSI_SEMANTIC_BCOLOR: - assert(index <= ATTR_COLOR_COUNT); + assert(index < ATTR_COLOR_COUNT); vs_outputs->bcolor[index] = i; break; case TGSI_SEMANTIC_GENERIC: - assert(index <= ATTR_GENERIC_COUNT); + assert(index < ATTR_GENERIC_COUNT); vs_outputs->generic[index] = i; break; @@ -124,7 +124,8 @@ static void r300_shader_vap_output_fmt(struct r300_vertex_shader* vs) /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || + vs_outputs->color[1] != ATTR_UNUSED) { hwfmt[1] |= R300_INPUT_CNTL_COLOR; hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; } @@ -182,7 +183,8 @@ static void r300_stream_locations_notcl( /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || + vs_outputs->color[1] != ATTR_UNUSED) { stream_loc[tabi++] = 2 + i; } } @@ -259,7 +261,8 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) for (i = 0; i < ATTR_COLOR_COUNT; i++) { if (outputs->color[i] != ATTR_UNUSED) { c->code->outputs[outputs->color[i]] = reg++; - } else if (any_bcolor_used) { + } else if (any_bcolor_used || + outputs->color[1] != ATTR_UNUSED) { reg++; } } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 1ae6de70fe..f4a8ae120c 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -29,18 +29,20 @@ extern "C" { /* The public interface header for the r300 pipe driver. * Any winsys hosting this pipe needs to implement r300_winsys and then - * call r300_create_context to start things. */ + * call r300_create_screen to start things. */ #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "radeon_winsys.h" -struct pipe_context* r300_create_context(struct pipe_screen* screen, - struct radeon_winsys* radeon_winsys); +/* Creates a new r300 screen. */ +struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys); -boolean r300_get_texture_buffer(struct pipe_texture* texture, + +boolean r300_get_texture_buffer(struct pipe_screen* screen, + struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride); |