diff options
Diffstat (limited to 'src/gallium/drivers')
41 files changed, 697 insertions, 485 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index ff0e207a54..84c66dd36e 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -92,8 +92,9 @@ llvmpipe_get_query_result(struct pipe_context *pipe, int i; if (!pq->fence) { - assert(0); /* query not in issued state */ - return FALSE; + /* no fence because there was no scene, so results is zero */ + *result = 0; + return TRUE; } if (!lp_fence_signalled(pq->fence)) { diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index c66313f0ed..96633d9365 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -412,10 +412,9 @@ llvmpipe_create_screen(struct sw_winsys *winsys) lp_jit_screen_init(screen); + screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0; #ifdef PIPE_OS_EMBEDDED screen->num_threads = 0; -#else - screen->num_threads = util_cpu_caps.nr_cpus; #endif screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads); screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index 9f090d1992..829eb8a5a0 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -292,6 +292,7 @@ try_setup_line( struct lp_setup_context *setup, float x2diff; float y2diff; float dx, dy; + float area; boolean draw_start; boolean draw_end; @@ -311,6 +312,18 @@ try_setup_line( struct lp_setup_context *setup, dx = v1[0][0] - v2[0][0]; dy = v1[0][1] - v2[0][1]; + area = (dx * dx + dy * dy); + if (area == 0) { + LP_COUNT(nr_culled_tris); + return TRUE; + } + + info.oneoverarea = 1.0f / area; + info.dx = dx; + info.dy = dy; + info.v1 = v1; + info.v2 = v2; + /* X-MAJOR LINE */ if (fabsf(dx) >= fabsf(dy)) { @@ -573,12 +586,6 @@ try_setup_line( struct lp_setup_context *setup, line->plane[3].dcdx = y[3] - y[0]; - info.oneoverarea = 1.0f / (dx * dx + dy * dy); - info.dx = dx; - info.dy = dy; - info.v1 = v1; - info.v2 = v2; - /* Setup parameter interpolants: */ setup_line_coefficients( setup, line, &info); diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index c9003c97f5..ab480cabd0 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -12,6 +12,10 @@ #include "nouveau/nouveau_resource.h" #include "nouveau/nouveau_pushbuf.h" +#ifndef NV04_PFIFO_MAX_PACKET_LEN +#define NV04_PFIFO_MAX_PACKET_LEN 2047 +#endif + static INLINE uint32_t nouveau_screen_transfer_flags(unsigned pipe) { diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 6ec9095a74..ac69c7848e 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -157,6 +157,7 @@ struct nv50_context { unsigned sampler_view_nr[3]; unsigned vbo_fifo; + unsigned req_lmem; }; static INLINE struct nv50_context * diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c index 676540538e..c88e7ba742 100644 --- a/src/gallium/drivers/nv50/nv50_pc.c +++ b/src/gallium/drivers/nv50/nv50_pc.c @@ -328,7 +328,7 @@ nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv) static void nv_do_print_function(void *priv, struct nv_basic_block *b) { - struct nv_instruction *i = b->phi; + struct nv_instruction *i; debug_printf("=== BB %i ", b->id); if (b->out[0]) @@ -547,6 +547,8 @@ nv50_generate_code(struct nv50_translation_info *ti) ti->p->fixups = pc->fixups; ti->p->num_fixups = pc->num_fixups; + ti->p->uses_lmem = ti->store_to_memory; + NV50_DBGMSG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success"); out: diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c index b9d5ba5ef6..39ae36681c 100644 --- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c +++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c @@ -767,7 +767,7 @@ nv50_ctor_register_set(struct nv_pc *pc, struct register_set *set) static void insert_ordered_tail(struct nv_value *list, struct nv_value *nval) { - struct nv_value *elem = list->prev; + struct nv_value *elem; for (elem = list->prev; elem != list && elem->livei->bgn > nval->livei->bgn; diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 37b02bbec7..33c4c8ca6d 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -45,6 +45,7 @@ struct nv50_program { ubyte type; boolean translated; + boolean uses_lmem; struct nouveau_bo *bo; struct nouveau_stateobj *so; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 49af9b59be..7c9342b747 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -222,6 +222,36 @@ nv50_screen_destroy(struct pipe_screen *pscreen) OUT_RELOC(ch, bo, (n << 18) | (gr->subc << 13) | m, fl, 0, 0) void +nv50_screen_reloc_constbuf(struct nv50_screen *screen, unsigned cbi) +{ + struct nouveau_bo *bo; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *tesla = screen->tesla; + unsigned size; + const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; + + switch (cbi) { + case NV50_CB_PMISC: + bo = screen->constbuf_misc[0]; + size = 0x200; + break; + case NV50_CB_PVP: + case NV50_CB_PFP: + case NV50_CB_PGP: + bo = screen->constbuf_parm[cbi - NV50_CB_PVP]; + size = 0; + break; + default: + return; + } + + BGN_RELOC (chan, bo, tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl); + OUT_RELOCh(chan, bo, 0, rl); + OUT_RELOCl(chan, bo, 0, rl); + OUT_RELOC (chan, bo, (cbi << 16) | size, rl, 0, 0); +} + +void nv50_screen_relocs(struct nv50_screen *screen) { struct nouveau_channel *chan = screen->base.channel; @@ -243,12 +273,7 @@ nv50_screen_relocs(struct nv50_screen *screen) OUT_RELOCh(chan, screen->tsc, 0, rl); OUT_RELOCl(chan, screen->tsc, 0, rl); - BGN_RELOC (chan, screen->constbuf_misc[0], - tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl); - OUT_RELOCh(chan, screen->constbuf_misc[0], 0, rl); - OUT_RELOCl(chan, screen->constbuf_misc[0], 0, rl); - OUT_RELOC (chan, screen->constbuf_misc[0], - (NV50_CB_PMISC << 16) | 0x0200, rl, 0, 0); + nv50_screen_reloc_constbuf(screen, NV50_CB_PMISC); BGN_RELOC (chan, screen->constbuf_misc[0], tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl); @@ -257,14 +282,21 @@ nv50_screen_relocs(struct nv50_screen *screen) OUT_RELOC (chan, screen->constbuf_misc[0], (NV50_CB_AUX << 16) | 0x0200, rl, 0, 0); - for (i = 0; i < 3; ++i) { - BGN_RELOC (chan, screen->constbuf_parm[i], - tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl); - OUT_RELOCh(chan, screen->constbuf_parm[i], 0, rl); - OUT_RELOCl(chan, screen->constbuf_parm[i], 0, rl); - OUT_RELOC (chan, screen->constbuf_parm[i], - ((NV50_CB_PVP + i) << 16) | 0x0000, rl, 0, 0); - } + for (i = 0; i < 3; ++i) + nv50_screen_reloc_constbuf(screen, NV50_CB_PVP + i); + + BGN_RELOC (chan, screen->stack_bo, + tesla, NV50TCL_STACK_ADDRESS_HIGH, 2, rl); + OUT_RELOCh(chan, screen->stack_bo, 0, rl); + OUT_RELOCl(chan, screen->stack_bo, 0, rl); + + if (!screen->cur_ctx->req_lmem) + return; + + BGN_RELOC (chan, screen->local_bo, + tesla, NV50TCL_LOCAL_ADDRESS_HIGH, 2, rl); + OUT_RELOCh(chan, screen->local_bo, 0, rl); + OUT_RELOCl(chan, screen->local_bo, 0, rl); } #ifndef NOUVEAU_GETPARAM_GRAPH_UNITS diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index ad6bdeb27c..6e15230b48 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -39,6 +39,8 @@ nv50_screen(struct pipe_screen *screen) extern void nv50_screen_relocs(struct nv50_screen *); +extern void nv50_screen_reloc_constbuf(struct nv50_screen *, unsigned cbi); + struct nv50_format { uint32_t rt; uint32_t tic; diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c index 8c1a5999cf..1a2fe758a8 100644 --- a/src/gallium/drivers/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nv50/nv50_shader_state.c @@ -47,10 +47,17 @@ nv50_transfer_constbuf(struct nv50_context *nv50, start = 0; while (count) { - unsigned nr = count; - nr = MIN2(nr, 2047); + unsigned nr = AVAIL_RING(chan); + + if (nr < 8) { + FIRE_RING(chan); + continue; + } + nr = MIN2(count, nr - 7); + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); + + nv50_screen_reloc_constbuf(nv50->screen, cbi); - /* FIXME: emit relocs for unsuiTed MM */ BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1); OUT_RING (chan, (start << 8) | cbi); BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr); @@ -77,8 +84,16 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) unsigned start = 0; while (count) { - unsigned nr = count; - nr = MIN2(nr, 2047); + unsigned nr = AVAIL_RING(chan); + + if (nr < 8) { + FIRE_RING(chan); + continue; + } + nr = MIN2(count, nr - 7); + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); + + nv50_screen_reloc_constbuf(nv50->screen, NV50_CB_PMISC); BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1); OUT_RING (chan, (start << 8) | NV50_CB_PMISC); @@ -111,8 +126,7 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) break; default: assert(0); - cbi = 0; - break; + return; } nv50_transfer_constbuf(nv50, nv50->constbuf[p->type], p->parm_size, cbi); @@ -281,6 +295,17 @@ nv50_program_validate(struct nv50_program *p) return p->translated; } +static INLINE void +nv50_program_validate_common(struct nv50_context *nv50, struct nv50_program *p) +{ + nv50_program_validate_code(nv50, p); + + if (p->uses_lmem) + nv50->req_lmem |= 1 << p->type; + else + nv50->req_lmem &= ~(1 << p->type); +} + struct nouveau_stateobj * nv50_vertprog_validate(struct nv50_context *nv50) { @@ -300,7 +325,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) if (!(nv50->dirty & NV50_NEW_VERTPROG)) return NULL; - nv50_program_validate_code(nv50, p); + nv50_program_validate_common(nv50, p); so_ref(p->so, &so); return so; @@ -325,7 +350,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) if (!(nv50->dirty & NV50_NEW_FRAGPROG)) return NULL; - nv50_program_validate_code(nv50, p); + nv50_program_validate_common(nv50, p); so_ref(p->so, &so); return so; @@ -350,7 +375,7 @@ nv50_geomprog_validate(struct nv50_context *nv50) if (!(nv50->dirty & NV50_NEW_GEOMPROG)) return NULL; - nv50_program_validate_code(nv50, p); + nv50_program_validate_common(nv50, p); so_ref(p->so, &so); return so; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index f1d8202dff..16c2dab9af 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -333,7 +333,7 @@ struct state_validate { { validate_vtxbuf , NV50_NEW_ARRAYS }, { validate_vtxattr , NV50_NEW_ARRAYS }, { validate_clip , NV50_NEW_CLIP }, - {} + { NULL , 0 } }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index 23f045ecf6..e543fda50e 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -377,6 +377,8 @@ tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { src.swz[2] = fsrc->Register.SwizzleZ; src.swz[3] = fsrc->Register.SwizzleW; src.indirect = 0; + src.indirect_reg = 0; + src.indirect_swz = 0; if(fsrc->Register.Indirect) { if(fsrc->Indirect.File == TGSI_FILE_ADDRESS && @@ -973,7 +975,7 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE) static struct nvfx_vertex_program* -nvfx_vertprog_translate(struct nvfx_context *nvfx, const struct pipe_shader_state* vps, const struct tgsi_shader_info* info) +nvfx_vertprog_translate(struct nvfx_context *nvfx, const struct pipe_shader_state* vps, struct tgsi_shader_info* info) { struct tgsi_parse_context parse; struct nvfx_vertex_program* vp = NULL; diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c index 0ea11e5bfc..9247064508 100644 --- a/src/gallium/drivers/r300/r300_render_translate.c +++ b/src/gallium/drivers/r300/r300_render_translate.c @@ -29,6 +29,7 @@ #include "r300_context.h" #include "translate/translate.h" +#include "util/u_index_modify.h" void r300_begin_vertex_translate(struct r300_context *r300) { @@ -188,111 +189,6 @@ void r300_end_vertex_translate(struct r300_context *r300) NULL); } -static void r300_shorten_ubyte_elts(struct r300_context* r300, - struct pipe_resource** elts, - int index_bias, - unsigned start, - unsigned count) -{ - struct pipe_context* context = &r300->context; - struct pipe_screen* screen = r300->context.screen; - struct pipe_resource* new_elts; - unsigned char *in_map; - unsigned short *out_map; - struct pipe_transfer *src_transfer, *dst_transfer; - unsigned i; - - new_elts = pipe_buffer_create(screen, - PIPE_BIND_INDEX_BUFFER, - 2 * count); - - in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer); - out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer); - - in_map += start; - - for (i = 0; i < count; i++) { - *out_map = (unsigned short)(*in_map + index_bias); - in_map++; - out_map++; - } - - pipe_buffer_unmap(context, *elts, src_transfer); - pipe_buffer_unmap(context, new_elts, dst_transfer); - - *elts = new_elts; -} - -static void r300_rebuild_ushort_elts(struct r300_context *r300, - struct pipe_resource **elts, - int index_bias, - unsigned start, unsigned count) -{ - struct pipe_context *context = &r300->context; - struct pipe_transfer *in_transfer = NULL; - struct pipe_transfer *out_transfer = NULL; - struct pipe_resource *new_elts; - unsigned short *in_map; - unsigned short *out_map; - unsigned i; - - new_elts = pipe_buffer_create(context->screen, - PIPE_BIND_INDEX_BUFFER, - 2 * count); - - in_map = pipe_buffer_map(context, *elts, - PIPE_TRANSFER_READ, &in_transfer); - out_map = pipe_buffer_map(context, new_elts, - PIPE_TRANSFER_WRITE, &out_transfer); - - in_map += start; - for (i = 0; i < count; i++) { - *out_map = (unsigned short)(*in_map + index_bias); - in_map++; - out_map++; - } - - pipe_buffer_unmap(context, *elts, in_transfer); - pipe_buffer_unmap(context, new_elts, out_transfer); - - *elts = new_elts; -} - -static void r300_rebuild_uint_elts(struct r300_context *r300, - struct pipe_resource **elts, - int index_bias, - unsigned start, unsigned count) -{ - struct pipe_context *context = &r300->context; - struct pipe_transfer *in_transfer = NULL; - struct pipe_transfer *out_transfer = NULL; - struct pipe_resource *new_elts; - unsigned int *in_map; - unsigned int *out_map; - unsigned i; - - new_elts = pipe_buffer_create(context->screen, - PIPE_BIND_INDEX_BUFFER, - 2 * count); - - in_map = pipe_buffer_map(context, *elts, - PIPE_TRANSFER_READ, &in_transfer); - out_map = pipe_buffer_map(context, new_elts, - PIPE_TRANSFER_WRITE, &out_transfer); - - in_map += start; - for (i = 0; i < count; i++) { - *out_map = (unsigned int)(*in_map + index_bias); - in_map++; - out_map++; - } - - pipe_buffer_unmap(context, *elts, in_transfer); - pipe_buffer_unmap(context, new_elts, out_transfer); - - *elts = new_elts; -} - void r300_translate_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned *index_size, unsigned index_offset, @@ -300,21 +196,21 @@ void r300_translate_index_buffer(struct r300_context *r300, { switch (*index_size) { case 1: - r300_shorten_ubyte_elts(r300, index_buffer, index_offset, *start, count); + util_shorten_ubyte_elts(&r300->context, index_buffer, index_offset, *start, count); *index_size = 2; *start = 0; break; case 2: if (*start % 2 != 0 || index_offset) { - r300_rebuild_ushort_elts(r300, index_buffer, index_offset, *start, count); + util_rebuild_ushort_elts(&r300->context, index_buffer, index_offset, *start, count); *start = 0; } break; case 4: if (index_offset) { - r300_rebuild_uint_elts(r300, index_buffer, index_offset, *start, count); + util_rebuild_uint_elts(&r300->context, index_buffer, index_offset, *start, count); *start = 0; } break; diff --git a/src/gallium/drivers/r600/eg_hw_states.c b/src/gallium/drivers/r600/eg_hw_states.c index d6f417e1e3..3d10095919 100644 --- a/src/gallium/drivers/r600/eg_hw_states.c +++ b/src/gallium/drivers/r600/eg_hw_states.c @@ -422,11 +422,11 @@ static void eg_dsa(struct r600_context *rctx, struct radeon_state *rstate) S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); - query_running = false; + query_running = FALSE; LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { if (rquery->state & R600_QUERY_STATE_STARTED) { - query_running = true; + query_running = TRUE; } } @@ -471,6 +471,7 @@ static void eg_sampler_border(struct r600_context *rctx, struct radeon_state *rs radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER_BORDER, id, R600_SHADER_PS); if (uc.ui) { + rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_INDEX] = id; rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED] = fui(state->border_color[0]); rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN] = fui(state->border_color[1]); rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE] = fui(state->border_color[2]); @@ -559,8 +560,7 @@ static void eg_resource(struct pipe_context *ctx, struct radeon_state *rstate, rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; rstate->placement[3] = RADEON_GEM_DOMAIN_GTT; - pitch = (tmp->pitch[0] / tmp->bpt); - pitch = (pitch + 0x7) & ~0x7; + pitch = align(tmp->pitch[0] / tmp->bpt, 8); /* FIXME properly handle first level != 0 */ rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD0] = @@ -930,7 +930,7 @@ static int eg_ps_shader(struct r600_context *rctx, struct r600_context_state *rp const struct pipe_rasterizer_state *rasterizer; struct r600_shader *rshader = &rpshader->shader; unsigned i, tmp, exports_ps, num_cout; - boolean have_pos = FALSE; + boolean have_pos = FALSE, have_face = FALSE; rasterizer = &rctx->rasterizer->state.rasterizer; @@ -945,6 +945,10 @@ static int eg_ps_shader(struct r600_context *rctx, struct r600_context_state *rp rshader->input[i].name == TGSI_SEMANTIC_POSITION) { tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); } + + if (rshader->input[i].name == TGSI_SEMANTIC_FACE) + have_face = TRUE; + if (rasterizer->sprite_coord_enable & (1 << i)) { tmp |= S_028644_PT_SPRITE_TEX(1); } @@ -957,10 +961,10 @@ static int eg_ps_shader(struct r600_context *rctx, struct r600_context_state *rp if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) exports_ps |= 1; else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - exports_ps |= (1 << (num_cout+1)); num_cout++; } } + exports_ps |= (1 << num_cout); if (!exports_ps) { /* always at least export 1 component per pixel */ exports_ps = 2; @@ -971,7 +975,10 @@ static int eg_ps_shader(struct r600_context *rctx, struct r600_context_state *rp state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_0] |= S_0286CC_POSITION_ENA(1); state->states[EG_PS_SHADER__SPI_INPUT_Z] |= 1; } + state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; + state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_1] |= S_0286D0_FRONT_FACE_ENA(have_face); + state->states[EG_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028844_NUM_GPRS(rshader->bc.ngpr) | S_028844_PRIME_CACHE_ON_DRAW(1) | S_028844_STACK_SIZE(rshader->bc.nstack); state->states[EG_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index 4e3514638b..0a42abcdf2 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -283,6 +283,9 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_B4G4R4A4_UNORM: case PIPE_FORMAT_B4G4R4X4_UNORM: return V_028C70_SWAP_ALT; + + case PIPE_FORMAT_Z16_UNORM: + return V_028C70_SWAP_STD; /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: @@ -310,6 +313,10 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_028C70_SWAP_STD; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_028C70_SWAP_STD; + case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: @@ -357,6 +364,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_B4G4R4X4_UNORM: return V_028C70_COLOR_4_4_4_4; + case PIPE_FORMAT_Z16_UNORM: + return V_028C70_COLOR_16; + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: @@ -383,6 +393,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_028C70_COLOR_8_24; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_028C70_COLOR_24_8; + case PIPE_FORMAT_R32_FLOAT: return V_028C70_COLOR_32_FLOAT; diff --git a/src/gallium/drivers/r600/eg_states_inc.h b/src/gallium/drivers/r600/eg_states_inc.h index 462f31cc79..9f8007c8e9 100644 --- a/src/gallium/drivers/r600/eg_states_inc.h +++ b/src/gallium/drivers/r600/eg_states_inc.h @@ -368,27 +368,30 @@ #define EG_GS_SAMPLER_PM4 128 /* EG_PS_SAMPLER_BORDER */ -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED 0 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN 1 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE 2 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA 3 -#define EG_PS_SAMPLER_BORDER_SIZE 4 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_INDEX 0 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED 1 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN 2 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE 3 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA 4 +#define EG_PS_SAMPLER_BORDER_SIZE 5 #define EG_PS_SAMPLER_BORDER_PM4 128 /* EG_VS_SAMPLER_BORDER */ -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_RED 0 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_GREEN 1 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_BLUE 2 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_ALPHA 3 -#define EG_VS_SAMPLER_BORDER_SIZE 4 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_INDEX 0 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_RED 1 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_GREEN 2 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_BLUE 3 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_ALPHA 4 +#define EG_VS_SAMPLER_BORDER_SIZE 5 #define EG_VS_SAMPLER_BORDER_PM4 128 /* EG_GS_SAMPLER_BORDER */ -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_RED 0 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_GREEN 1 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_BLUE 2 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_ALPHA 3 -#define EG_GS_SAMPLER_BORDER_SIZE 4 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_INDEX 0 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_RED 1 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_GREEN 2 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_BLUE 3 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_ALPHA 4 +#define EG_GS_SAMPLER_BORDER_SIZE 5 #define EG_GS_SAMPLER_BORDER_PM4 128 /* EG_CB */ diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index bce2707e77..a123eb62e0 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -90,28 +90,15 @@ enum radeon_family { CHIP_LAST, }; +enum chip_class { + R600, + R700, + EVERGREEN, +}; + enum radeon_family r600_get_family(struct radeon *rw); +enum chip_class r600_get_family_class(struct radeon *radeon); -/* - * radeon object functions - */ -#if 0 -struct radeon_bo { - unsigned refcount; - unsigned handle; - unsigned size; - unsigned alignment; - unsigned map_count; - void *data; -}; -struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, - unsigned size, unsigned alignment, void *ptr); -int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo); -void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo); -struct radeon_bo *radeon_bo_incref(struct radeon *radeon, struct radeon_bo *bo); -struct radeon_bo *radeon_bo_decref(struct radeon *radeon, struct radeon_bo *bo); -int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); -#endif /* lowlevel WS bo */ struct radeon_ws_bo; struct radeon_ws_bo *radeon_ws_bo(struct radeon *radeon, @@ -122,7 +109,6 @@ void *radeon_ws_bo_map(struct radeon *radeon, struct radeon_ws_bo *bo, unsigned void radeon_ws_bo_unmap(struct radeon *radeon, struct radeon_ws_bo *bo); void radeon_ws_bo_reference(struct radeon *radeon, struct radeon_ws_bo **dst, struct radeon_ws_bo *src); -int radeon_ws_bo_wait(struct radeon *radeon, struct radeon_ws_bo *bo); /* R600/R700 STATES */ #define R600_GROUP_MAX 16 diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 0d17f75da7..dcb1b4fccc 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -531,7 +531,8 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX && - bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC)) { + bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) || + bc->force_add_cf) { r = r600_bc_add_cf(bc); if (r) { free(nvtx); @@ -543,6 +544,8 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* each fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; + if ((bc->ndw / 4) > 7) + bc->force_add_cf = 1; return 0; } @@ -557,7 +560,8 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || - bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX) { + bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX || + bc->force_add_cf) { r = r600_bc_add_cf(bc); if (r) { free(ntex); @@ -569,6 +573,8 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) /* each texture fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; + if ((bc->ndw / 4) > 7) + bc->force_add_cf = 1; return 0; } @@ -696,6 +702,7 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | S_SQ_CF_ALU_WORD1_BARRIER(1) | + S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == 0 ? cf->r6xx_uses_waterfall : 0) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 62a46cb0e1..6aadf72957 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -127,6 +127,7 @@ struct r600_bc_cf { unsigned pop_count; unsigned cf_addr; /* control flow addr */ unsigned kcache0_mode; + unsigned r6xx_uses_waterfall; struct list_head alu; struct list_head tex; struct list_head vtx; diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 54fbc50bbc..0506e8280a 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -283,7 +283,7 @@ static void r600_blit_state_vs_shader(struct r600_screen *rscreen, struct radeon radeon_ws_bo_reference(rscreen->rw, &bo, NULL); return; } - switch (rscreen->chip_class) { + switch (radeon_get_family_class(rscreen->rw)) { case R600: memcpy(data, shader_bc_r600, 128); break; @@ -347,7 +347,7 @@ static void r600_blit_state_ps_shader(struct r600_screen *rscreen, struct radeon radeon_ws_bo_reference(rscreen->rw, &bo, NULL); return; } - switch (rscreen->chip_class) { + switch (radeon_get_family_class(rscreen->rw)) { case R600: memcpy(data, shader_bc_r600, 48); break; diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index dc3fc812e1..ea370782fd 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -171,7 +171,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, if (transfer->usage & PIPE_TRANSFER_WRITE) { write = 1; } - data = radeon_ws_bo_map(rscreen->rw, rbuffer->r.bo, transfer->usage, rctx); + data = radeon_ws_bo_map(rscreen->rw, rbuffer->r.bo, transfer->usage, pipe); if (!data) return NULL; diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 776dc24569..7a63d966eb 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -69,6 +69,10 @@ void r600_flush(struct pipe_context *ctx, unsigned flags, { struct r600_context *rctx = r600_context(ctx); struct r600_query *rquery = NULL; +#if 0 + static int dc = 0; + char dname[256]; +#endif /* flush upload buffers */ u_upload_flush(rctx->upload_vb); @@ -77,10 +81,20 @@ void r600_flush(struct pipe_context *ctx, unsigned flags, /* suspend queries */ r600_queries_suspend(ctx); + +#if 0 + sprintf(dname, "gallium-%08d.bof", dc); + if (dc < 2) { + radeon_ctx_dump_bof(rctx->ctx, dname); + R600_ERR("dumped %s\n", dname); + } + dc++; +#endif + radeon_ctx_submit(rctx->ctx); LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { - rquery->flushed = true; + rquery->flushed = TRUE; } radeon_ctx_clear(rctx->ctx); @@ -88,13 +102,6 @@ void r600_flush(struct pipe_context *ctx, unsigned flags, r600_queries_resume(ctx); } -void r600_flush_ctx(void *data) -{ - struct r600_context *rctx = data; - - rctx->context.flush(&rctx->context, 0, NULL); -} - struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) { struct r600_context *rctx = CALLOC_STRUCT(r600_context); @@ -113,7 +120,7 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) rctx->screen = rscreen; rctx->rw = rscreen->rw; - if (rscreen->chip_class == EVERGREEN) + if (radeon_get_family_class(rscreen->rw) == EVERGREEN) rctx->vtbl = &eg_hw_state_vtbl; else rctx->vtbl = &r600_hw_state_vtbl; diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index 5480ca002d..51c9b06549 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -31,16 +31,44 @@ #include <util/u_math.h> #include <util/u_inlines.h> #include <util/u_memory.h> +#include <util/u_index_modify.h> #include "radeon.h" #include "r600_screen.h" #include "r600_context.h" #include "r600_resource.h" #include "r600_state_inlines.h" +static void r600_translate_index_buffer(struct r600_context *r600, + struct pipe_resource **index_buffer, + unsigned *index_size, unsigned index_offset, + unsigned *start, unsigned count) +{ + switch (*index_size) { + case 1: + util_shorten_ubyte_elts(&r600->context, index_buffer, index_offset, *start, count); + *index_size = 2; + *start = 0; + break; + + case 2: + if (*start % 2 != 0 || index_offset) { + util_rebuild_ushort_elts(&r600->context, index_buffer, index_offset, *start, count); + *start = 0; + } + break; + + case 4: + if (index_offset) { + util_rebuild_uint_elts(&r600->context, index_buffer, index_offset, *start, count); + *start = 0; + } + break; + } +} + static int r600_draw_common(struct r600_draw *draw) { struct r600_context *rctx = r600_context(draw->ctx); - struct r600_screen *rscreen = rctx->screen; /* FIXME vs_resource */ struct radeon_state *vs_resource; struct r600_resource *rbuffer; @@ -128,7 +156,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) if (rctx->any_user_vbs) { r600_upload_user_buffers(rctx); - rctx->any_user_vbs = false; + rctx->any_user_vbs = FALSE; } draw.ctx = ctx; @@ -136,14 +164,19 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) draw.start = info->start; draw.count = info->count; if (info->indexed && rctx->index_buffer.buffer) { + draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; draw.min_index = info->min_index; draw.max_index = info->max_index; + + r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, + &rctx->index_buffer.index_size, + rctx->index_buffer.offset, &draw.start, + info->count); + draw.index_size = rctx->index_buffer.index_size; draw.index_buffer = rctx->index_buffer.buffer; draw.index_buffer_offset = rctx->index_buffer.offset; - assert(rctx->index_buffer.offset % - rctx->index_buffer.index_size == 0); r600_upload_index_buffer(rctx, &draw); } else { diff --git a/src/gallium/drivers/r600/r600_hw_states.c b/src/gallium/drivers/r600/r600_hw_states.c index bca78ee8de..271bd1ac50 100644 --- a/src/gallium/drivers/r600/r600_hw_states.c +++ b/src/gallium/drivers/r600/r600_hw_states.c @@ -197,7 +197,7 @@ static void r600_rasterizer(struct r600_context *rctx, struct radeon_state *rsta float offset_units = 0, offset_scale = 0; char depth = 0; unsigned offset_db_fmt_cntl = 0; - unsigned tmp; + unsigned point_size; unsigned prov_vtx = 1; if (rctx->clip) @@ -232,7 +232,8 @@ static void r600_rasterizer(struct r600_context *rctx, struct radeon_state *rsta rctx->flat_shade = state->flatshade; radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0); - rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001; + rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = + S_0286D4_FLAT_SHADE_ENA(1); if (state->sprite_coord_enable) { rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |= S_0286D4_PNT_SPRITE_ENA(1) | @@ -247,9 +248,18 @@ static void r600_rasterizer(struct r600_context *rctx, struct radeon_state *rsta } rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0; if (clip) { - rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = S_028810_PS_UCP_MODE(3) | ((1 << clip->nr) - 1); - rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_NEAR_DISABLE(clip->depth_clamp); - rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_FAR_DISABLE(clip->depth_clamp); + /* Clip plane enable bits are stashed in the lower six bits of + * PA_CL_CLIP_CNTL, so just set all of the corresponding bits with a + * pinch of bit twiddling. + * + * PS_UCP_MODE 3 is "expand and clip as trifan," which is the same + * setting that we use on r300-r500. I believe that fglrx always uses + * this mode as well. */ + rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = + ((1 << clip->nr) - 1) | + S_028810_PS_UCP_MODE(3) | + S_028810_ZCLIP_NEAR_DISABLE(clip->depth_clamp) | + S_028810_ZCLIP_FAR_DISABLE(clip->depth_clamp); } rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = S_028814_PROVOKING_VTX_LAST(prov_vtx) | @@ -263,18 +273,24 @@ static void r600_rasterizer(struct r600_context *rctx, struct radeon_state *rsta S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex); rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000; - /* point size 12.4 fixed point */ - tmp = (unsigned)(state->point_size * 8.0); - rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp); - rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000; - rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008; + /* Point size for PA_SU_POINT_SIZE and PA_SU_POINT_MINMAX is fixed-point, + * 12.4. + * + * For some reason, maximum point size is set to 0x8000 (2048.0) instead + * of the maximum value 0xFFF0 (4095.0). */ + point_size = (unsigned)(state->point_size * 8.0); + rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = + S_028A00_HEIGHT(point_size) | S_028A00_WIDTH(point_size); + rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = + S_028A04_MIN_SIZE(0) | S_028A04_MAX_SIZE(0x8000); + rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = S_028A08_WIDTH(8); rstate->states[R600_RASTERIZER__PA_SC_LINE_STIPPLE] = 0x00000005; rstate->states[R600_RASTERIZER__PA_SC_MPASS_PS_CNTL] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SC_LINE_CNTL] = 0x00000400; - rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = 0x3F800000; + rstate->states[R600_RASTERIZER__PA_SC_LINE_CNTL] = S_028C00_LAST_PIXEL(1); + rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = fui(1); + rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = fui(1); + rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = fui(1); + rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = fui(1); rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL] = offset_db_fmt_cntl; rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP] = 0x00000000; rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE] = fui(offset_scale); @@ -314,7 +330,8 @@ static void r600_scissor(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = tl; rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] = + S_02820C_CLIP_RULE(0xFFFF); rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_TL] = tl; rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_BR] = br; rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_TL] = tl; @@ -339,15 +356,22 @@ static void r600_viewport(struct r600_context *rctx, struct radeon_state *rstate struct r600_screen *rscreen = rctx->screen; radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0); - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; + rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = fui(0); + rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = fui(1); rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(state->scale[1]); rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(state->scale[2]); rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(state->translate[0]); rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); - rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; + rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = + S_028818_VPORT_X_SCALE_ENA(1) | + S_028818_VPORT_X_OFFSET_ENA(1) | + S_028818_VPORT_Y_SCALE_ENA(1) | + S_028818_VPORT_Y_OFFSET_ENA(1) | + S_028818_VPORT_Z_SCALE_ENA(1) | + S_028818_VPORT_Z_OFFSET_ENA(1) | + S_028818_VTX_W0_FMT(1); radeon_state_pm4(rstate); } @@ -368,9 +392,8 @@ static void r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) } radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0); - db_shader_control = 0; - db_shader_control |= S_02880C_DUAL_EXPORT_ENABLE(1); - db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); + db_shader_control = S_02880C_DUAL_EXPORT_ENABLE(1) | + S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); rshader = &rctx->ps_shader->shader; if (rshader->uses_kill) @@ -384,35 +407,37 @@ static void r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | S_028800_Z_WRITE_ENABLE(state->depth.writemask) | S_028800_ZFUNC(state->depth.func); - /* set stencil enable */ + /* set stencil enable */ if (state->stencil[0].enabled) { - db_depth_control |= S_028800_STENCIL_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); - db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); - db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); + db_depth_control |= S_028800_STENCIL_ENABLE(1) | + S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)) | + S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)) | + S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)) | + S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | - S_028430_STENCILWRITEMASK(state->stencil[0].writemask); - stencil_ref_mask |= S_028430_STENCILREF(stencil_ref->ref_value[0]); + S_028430_STENCILWRITEMASK(state->stencil[0].writemask) | + S_028430_STENCILREF(stencil_ref->ref_value[0]); + if (state->stencil[1].enabled) { - db_depth_control |= S_028800_BACKFACE_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); - db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); - db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); - stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | - S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); - stencil_ref_mask_bf |= S_028430_STENCILREF(stencil_ref->ref_value[1]); + db_depth_control |= S_028800_BACKFACE_ENABLE(1) | + S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)) | + S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)) | + S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)) | + S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); + stencil_ref_mask_bf = + S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | + S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask) | + S_028430_STENCILREF(stencil_ref->ref_value[1]); } } alpha_test_control = 0; alpha_ref = 0; if (state->alpha.enabled) { - alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); - alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); + alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func) | + S_028410_ALPHA_TEST_ENABLE(1); alpha_ref = fui(state->alpha.ref_value); } @@ -422,22 +447,22 @@ static void r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); - query_running = false; + query_running = FALSE; LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { if (rquery->state & R600_QUERY_STATE_STARTED) { - query_running = true; + query_running = TRUE; } } if (query_running) { db_render_override |= S_028D10_NOOP_CULL_DISABLE(1); - if (rscreen->chip_class == R700) + if (radeon_get_family_class(rscreen->rw) == R700) db_render_control |= S_028D0C_R700_PERFECT_ZPASS_COUNTS(1); } rstate->states[R600_DSA__DB_STENCIL_CLEAR] = 0x00000000; - rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000; + rstate->states[R600_DSA__DB_DEPTH_CLEAR] = fui(1); rstate->states[R600_DSA__SX_ALPHA_TEST_CONTROL] = alpha_test_control; rstate->states[R600_DSA__DB_STENCILREFMASK] = stencil_ref_mask; rstate->states[R600_DSA__DB_STENCILREFMASK_BF] = stencil_ref_mask_bf; @@ -515,7 +540,7 @@ static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate, struct r600_context *rctx = r600_context(ctx); struct r600_screen *rscreen = rctx->screen; const struct util_format_description *desc; - struct r600_resource_texture *tmp; + struct r600_resource_texture *texture; struct r600_resource *rbuffer; unsigned format; uint32_t word4 = 0, yuv_format = 0, pitch = 0; @@ -539,15 +564,15 @@ static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate, return; } radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_PS); - tmp = (struct r600_resource_texture*)view->texture; - rbuffer = &tmp->resource; - if (tmp->depth) { - r = r600_texture_from_depth(ctx, tmp, view->first_level); + texture = (struct r600_resource_texture*)view->texture; + rbuffer = &texture->resource; + if (texture->depth) { + r = r600_texture_from_depth(ctx, texture, view->first_level); if (r) { return; } - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], tmp->uncompressed); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], tmp->uncompressed); + radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], texture->uncompressed); + radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], texture->uncompressed); } else { radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], rbuffer->bo); @@ -558,8 +583,7 @@ static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate, rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; rstate->placement[3] = RADEON_GEM_DOMAIN_GTT; - pitch = (tmp->pitch[0] / tmp->bpt); - pitch = (pitch + 0x7) & ~0x7; + pitch = align(texture->pitch[0] / texture->bpt, 8); /* FIXME properly handle first level != 0 */ rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = @@ -572,8 +596,8 @@ static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate, S_038004_TEX_HEIGHT(view->texture->height0 - 1) | S_038004_TEX_DEPTH(view->texture->depth0 - 1) | S_038004_DATA_FORMAT(format); - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = tmp->offset[0] >> 8; - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8; + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = texture->offset[0] >> 8; + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = texture->offset[1] >> 8; rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | @@ -594,15 +618,17 @@ static void r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) struct r600_screen *rscreen = rctx->screen; const struct pipe_blend_state *pbs = &rctx->blend->state.blend; int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; - uint32_t color_control, target_mask, shader_mask; + uint32_t color_control, target_mask, shader_mask, shader_control; int i; target_mask = 0; shader_mask = 0; + shader_control = 0; color_control = S_028808_PER_MRT_BLEND(1); for (i = 0; i < nr_cbufs; i++) { shader_mask |= 0xf << (i * 4); + shader_control |= (1 << i); } if (pbs->logicop_enable) { @@ -630,6 +656,8 @@ static void r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = shader_mask; rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = target_mask; rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = color_control; + if (radeon_get_family_class(rscreen->rw) == R700) + rstate->states[R600_CB_CNTL__CB_SHADER_CONTROL] = shader_control; rstate->states[R600_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; rstate->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000; rstate->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX] = 0x00000000; @@ -831,21 +859,35 @@ static void r600_init_config(struct r600_context *rctx) rctx->config.states[R600_CONFIG__SX_MISC] = 0x00000000; if (family >= CHIP_RV770) { - rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; + rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = + S_008D8C_VS_PC_LIMIT_ENABLE(1); rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x00000000; - rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; + rctx->config.states[R600_CONFIG__DB_WATERMARKS] = + S_009838_DEPTH_FREE(4) | + S_009838_DEPTH_FLUSH(16) | + S_009838_DEPTH_PENDING_FREE(4) | + S_009838_DEPTH_CACHELINE_FREE(4); rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000000; - rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; + rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00500000 | + S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | + S_028A4C_FORCE_EOV_REZ_ENABLE(1); } else { rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00000000; - rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000003; + rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002 | + S_009508_DISABLE_CUBE_WRAP(1); rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x82000000; - rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x01020204; - rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; - rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00004010; + rctx->config.states[R600_CONFIG__DB_WATERMARKS] = + S_009838_DEPTH_FREE(4) | + S_009838_DEPTH_FLUSH(16) | + S_009838_DEPTH_PENDING_FREE(4) | + S_009838_DEPTH_CACHELINE_FREE(16); + rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = + S_0286C8_PS_GROUPING(1); + rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = + S_028A4C_WALK_ORDER_ENABLE(1) | + S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1); } - rctx->config.states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; rctx->config.states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; rctx->config.states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; rctx->config.states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; @@ -869,7 +911,7 @@ static void r600_init_config(struct r600_context *rctx) rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; rctx->config.states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; rctx->config.states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; + rctx->config.states[R600_CONFIG__VGT_REUSE_OFF] = S_028AB4_REUSE_OFF(1); rctx->config.states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; rctx->config.states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; radeon_state_pm4(&rctx->config); @@ -943,21 +985,24 @@ static int r600_ps_shader(struct r600_context *rctx, struct r600_context_state * const struct pipe_rasterizer_state *rasterizer; struct r600_shader *rshader = &rpshader->shader; unsigned i, tmp, exports_ps, num_cout; - boolean have_pos = FALSE; + boolean have_pos = FALSE, have_face = FALSE; rasterizer = &rctx->rasterizer->state.rasterizer; radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS); for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(i); - tmp |= S_028644_SEL_CENTROID(1); + tmp = S_028644_SEMANTIC(i) | S_028644_SEL_CENTROID(1); if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) have_pos = TRUE; if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || + rshader->input[i].name == TGSI_SEMANTIC_POSITION) { tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); } + + if (rshader->input[i].name == TGSI_SEMANTIC_FACE) + have_face = TRUE; + if (rasterizer->sprite_coord_enable & (1 << i)) { tmp |= S_028644_PT_SPRITE_TEX(1); } @@ -968,25 +1013,33 @@ static int r600_ps_shader(struct r600_context *rctx, struct r600_context_state * num_cout = 0; for (i = 0; i < rshader->noutput; i++) { if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - exports_ps |= 1; + exports_ps |= S_028854_EXPORT_Z(1); else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - exports_ps |= (1 << (num_cout+1)); num_cout++; } } - if (!exports_ps) { - /* always at least export 1 component per pixel */ - exports_ps = 2; + exports_ps |= S_028854_EXPORT_COLORS(num_cout); + if (exports_ps == 0) { + /* Always at least export 1 color component per pixel. */ + exports_ps = S_028854_EXPORT_COLORS(1); } - state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); + state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = + S_0286CC_NUM_INTERP(rshader->ninput) | + S_0286CC_PERSP_GRADIENT_ENA(1); + if (have_pos) { - state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] |= S_0286CC_POSITION_ENA(1) | - S_0286CC_BARYC_SAMPLE_CNTL(1); - state->states[R600_PS_SHADER__SPI_INPUT_Z] |= 1; + state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] |= + S_0286CC_POSITION_ENA(1) | + S_0286CC_BARYC_SAMPLE_CNTL(1); + state->states[R600_PS_SHADER__SPI_INPUT_Z] |= + S_0286D8_PROVIDE_Z_TO_SPI(1); } - state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; - state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr) | + + state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = + S_0286D0_FRONT_FACE_ENA(have_face); + + state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = + S_028868_NUM_GPRS(rshader->bc.ngpr) | S_028868_STACK_SIZE(rshader->bc.nstack); state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo); @@ -1011,8 +1064,10 @@ static int r600_vs_shader(struct r600_context *rctx, struct r600_context_state * tmp = i << ((i & 3) * 8); state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp; } - state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); - state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr) | + state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = + S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); + state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = + S_028868_NUM_GPRS(rshader->bc.ngpr) | S_028868_STACK_SIZE(rshader->bc.nstack); radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo); radeon_ws_bo_reference(rscreen->rw, &state->bo[1], rpshader->bo); @@ -1151,20 +1206,31 @@ static void r600_texture_state_db(struct r600_screen *rscreen, struct r600_resou static void r600_texture_state_viewport(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned level) { struct radeon_state *rstate = &rtexture->viewport[level]; + float width, height; radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0); + width = rtexture->width[level] * 0.5; + height = rtexture->height[level] * 0.5; + /* set states (most default value are 0 and struct already * initialized to 0, thus avoid resetting them) */ - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui((float)rtexture->width[level]/2.0); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui((float)rtexture->width[level]/2.0); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui((float)rtexture->height[level]/2.0); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui((float)-rtexture->height[level]/2.0); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = 0x3F000000; - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = 0x3F000000; - rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; + rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(width); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(width); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(height); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(height); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(0.5); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(0.5); + rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = + S_028818_VPORT_X_SCALE_ENA(1) | + S_028818_VPORT_X_OFFSET_ENA(1) | + S_028818_VPORT_Y_SCALE_ENA(1) | + S_028818_VPORT_Y_OFFSET_ENA(1) | + S_028818_VPORT_Z_SCALE_ENA(1) | + S_028818_VPORT_Z_OFFSET_ENA(1) | + S_028818_VTX_W0_FMT(1); + rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = fui(1); radeon_state_pm4(rstate); } @@ -1272,7 +1338,7 @@ void r600_set_constant_buffer_mem(struct pipe_context *ctx, nconstant = buffer->width0 / 16; size = ALIGN_DIVUP(nconstant, 16); - + radeon_state_init(rstate, rscreen->rw, type, 0, shader_class); rstate->states[R600_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0] = size; rstate->states[R600_VS_CBUF__ALU_CONST_CACHE_VS_0] = 0; diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 12900cce11..6e50701de6 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -108,8 +108,7 @@ static void r600_query_result(struct pipe_context *ctx, struct r600_query *rquer u32 *results; int i; - radeon_ws_bo_wait(rscreen->rw, rquery->buffer); - results = radeon_ws_bo_map(rscreen->rw, rquery->buffer, 0, r600_context(ctx)); + results = radeon_ws_bo_map(rscreen->rw, rquery->buffer, 0, ctx); for (i = 0; i < rquery->num_results; i += 4) { start = (u64)results[i] | (u64)results[i + 1] << 32; end = (u64)results[i + 2] | (u64)results[i + 3] << 32; @@ -133,7 +132,7 @@ static void r600_query_resume(struct pipe_context *ctx, struct r600_query *rquer r600_query_result(ctx, rquery); } r600_query_begin(rctx, rquery); - rquery->flushed = false; + rquery->flushed = FALSE; } static void r600_query_suspend(struct pipe_context *ctx, struct r600_query *rquery) @@ -152,7 +151,7 @@ static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query) rquery->state = R600_QUERY_STATE_STARTED; rquery->num_results = 0; - rquery->flushed = false; + rquery->flushed = FALSE; r600_query_resume(ctx, rquery); r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); if (r == -EBUSY) { @@ -232,7 +231,7 @@ static boolean r600_get_query_result(struct pipe_context *ctx, if (!rquery->flushed) { ctx->flush(ctx, 0, NULL); - rquery->flushed = true; + rquery->flushed = TRUE; } r600_query_result(ctx, rquery); *result = rquery->result; diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 6ddb1ad32a..cd1c31e82d 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -103,7 +103,7 @@ static INLINE struct r600_resource_buffer *r600_buffer(struct pipe_resource *buf static INLINE boolean r600_buffer_is_user_buffer(struct pipe_resource *buffer) { - return r600_buffer(buffer)->user_buffer ? true : false; + return r600_buffer(buffer)->user_buffer ? TRUE : FALSE; } #endif diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c index 1711fabfc7..d280a45bab 100644 --- a/src/gallium/drivers/r600/r600_screen.c +++ b/src/gallium/drivers/r600/r600_screen.c @@ -242,39 +242,13 @@ struct pipe_screen *r600_screen_create(struct radeon *rw) if (rscreen == NULL) { return NULL; } - + /* don't enable mem constant for r600 yet */ rscreen->use_mem_constant = FALSE; - - switch (family) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - rscreen->chip_class = R600; - break; - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - rscreen->chip_class = R700; - break; - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - rscreen->chip_class = EVERGREEN; + if (radeon_get_family_class(rw) == EVERGREEN) { rscreen->use_mem_constant = TRUE; - break; - default: - FREE(rscreen); - return NULL; } + radeon_set_mem_constant(rw, rscreen->use_mem_constant); rscreen->rw = rw; rscreen->screen.winsys = (struct pipe_winsys*)rw; diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h index 4be77865fb..502444f03a 100644 --- a/src/gallium/drivers/r600/r600_screen.h +++ b/src/gallium/drivers/r600/r600_screen.h @@ -42,17 +42,10 @@ struct r600_transfer { struct pipe_resource *linear_texture; }; -enum chip_class { - R600, - R700, - EVERGREEN, -}; - struct r600_screen { struct pipe_screen screen; struct radeon *rw; - enum chip_class chip_class; - boolean use_mem_constant; + boolean use_mem_constant; }; static INLINE struct r600_screen *r600_screen(struct pipe_screen *screen) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 4da6850b0a..e18c6ce605 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -167,7 +167,7 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state if (rpshader->bo == NULL) { return -ENOMEM; } - data = radeon_ws_bo_map(rscreen->rw, rpshader->bo, 0, rctx); + data = radeon_ws_bo_map(rscreen->rw, rpshader->bo, 0, ctx); memcpy(data, rshader->bc.bytecode, rshader->bc.ndw * 4); radeon_ws_bo_unmap(rscreen->rw, rpshader->bo); /* build state */ @@ -514,7 +514,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].barrier = 1; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[i].array_base = 0; - output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; + output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); noutput++; } } @@ -926,38 +926,95 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int r; - r = tgsi_setup_trig(ctx, r600_src); - if (r) - return r; - + /* We'll only need the trig stuff if we are going to write to the + * X or Y components of the destination vector. + */ + if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { + r = tgsi_setup_trig(ctx, r600_src); + if (r) + return r; + } /* dst.x = COS */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); - r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - if (r) - return r; + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); + r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + if (r) + return r; - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 0; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } /* dst.y = SIN */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); - r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - if (r) - return r; + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); + r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + if (r) + return r; + + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + /* dst.z = 0.0; */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + + r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + if (r) + return r; + + alu.src[0].sel = V_SQ_ALU_SRC_0; + alu.src[0].chan = 0; + + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + } + + /* dst.w = 1.0; */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + + r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); + if (r) + return r; + + alu.src[0].sel = V_SQ_ALU_SRC_1; + alu.src[0].chan = 0; + + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + } - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 0; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; return 0; } @@ -1530,7 +1587,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - src_not_temp = false; + src_not_temp = FALSE; src_gpr = ctx->temp_reg; } @@ -1641,7 +1698,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bc_add_literal(ctx->bc, lit_vals); if (r) return r; - src_not_temp = false; + src_not_temp = FALSE; src_gpr = ctx->temp_reg; } @@ -2313,6 +2370,7 @@ static int tgsi_arl(struct r600_shader_ctx *ctx) r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); if (r) return r; + ctx->bc->cf_last->r6xx_uses_waterfall = 1; return 0; } diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 4dcdc492fc..424f7a8913 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -33,6 +33,10 @@ #include "r600_context.h" #include "r600_resource.h" +static void clean_flush(struct r600_context *rctx, struct radeon_state *flush); +static int setup_cb_flush(struct r600_context *rctx, struct radeon_state *flush); +static int setup_db_flush(struct r600_context *rctx, struct radeon_state *flush); + static struct r600_context_state *r600_new_context_state(unsigned type) { struct r600_context_state *rstate = CALLOC_STRUCT(r600_context_state); @@ -378,6 +382,14 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, struct r600_context_state *rstate; int i; + if (rctx->framebuffer) { + for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++) + radeon_draw_unbind(&rctx->draw, &rctx->framebuffer->rstate[i+1]); + radeon_draw_unbind(&rctx->draw, &rctx->framebuffer->rstate[0]); + } + clean_flush(rctx, &rctx->hw_states.cb_flush); + clean_flush(rctx, &rctx->hw_states.db_flush); + r600_context_state_decref(rctx->framebuffer); rstate = r600_new_context_state(pipe_framebuffer_type); @@ -393,6 +405,10 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, if (state->zsbuf) { rctx->vtbl->db(rctx, &rstate->rstate[0], state); } + /* setup flush states */ + setup_cb_flush(rctx, &rctx->hw_states.cb_flush); + setup_db_flush(rctx, &rctx->hw_states.db_flush); + return; } @@ -511,7 +527,7 @@ void r600_init_state_functions(struct r600_context *rctx) rctx->context.set_blend_color = r600_set_blend_color; rctx->context.set_clip_state = r600_set_clip_state; - if (rctx->screen->chip_class == EVERGREEN) + if (radeon_get_family_class(rctx->rw) == EVERGREEN) rctx->context.set_constant_buffer = eg_set_constant_buffer; else if (rctx->screen->use_mem_constant) rctx->context.set_constant_buffer = r600_set_constant_buffer_mem; @@ -554,6 +570,7 @@ struct r600_context_state *r600_context_state_decref(struct r600_context_state * case pipe_framebuffer_type: for (i = 0; i < rstate->state.framebuffer.nr_cbufs; i++) { pipe_surface_reference(&rstate->state.framebuffer.cbufs[i], NULL); + radeon_state_fini(&rstate->rstate[i+1]); } pipe_surface_reference(&rstate->state.framebuffer.zsbuf, NULL); break; @@ -600,6 +617,17 @@ static void r600_bind_shader_sampler(struct r600_context *rctx, struct r600_shad } } +static void clean_flush(struct r600_context *rctx, struct radeon_state *flush) +{ + struct r600_screen *rscreen = rctx->screen; + int i; + + for (i = 0 ; i < flush->nbo; i++) { + radeon_ws_bo_reference(rscreen->rw, &flush->bo[i], NULL); + } + flush->nbo = 0; + radeon_state_fini(flush); +} static int setup_cb_flush(struct r600_context *rctx, struct radeon_state *flush) { @@ -658,10 +686,6 @@ int r600_context_hw_states(struct pipe_context *ctx) rctx->vtbl->dsa(rctx, &rctx->hw_states.dsa); rctx->vtbl->cb_cntl(rctx, &rctx->hw_states.cb_cntl); - /* setup flushes */ - setup_db_flush(rctx, &rctx->hw_states.db_flush); - setup_cb_flush(rctx, &rctx->hw_states.cb_flush); - /* bind states */ radeon_draw_bind(&rctx->draw, &rctx->config); @@ -673,9 +697,6 @@ int r600_context_hw_states(struct pipe_context *ctx) radeon_draw_bind(&rctx->draw, &rctx->hw_states.db_flush); radeon_draw_bind(&rctx->draw, &rctx->hw_states.cb_flush); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.db_flush); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.cb_flush); - if (rctx->viewport) { radeon_draw_bind(&rctx->draw, &rctx->viewport->rstate[0]); } diff --git a/src/gallium/drivers/r600/r600_state2.c b/src/gallium/drivers/r600/r600_state2.c index 63cc19708b..5269e6db91 100644 --- a/src/gallium/drivers/r600/r600_state2.c +++ b/src/gallium/drivers/r600/r600_state2.c @@ -40,7 +40,6 @@ #include <util/u_memory.h> #include <util/u_inlines.h> #include <pipebuffer/pb_buffer.h> -#include "state_tracker/drm_driver.h" #include "r600.h" #include "r600d.h" #include "r700_sq.h" @@ -57,12 +56,6 @@ uint32_t r600_translate_texformat(enum pipe_format format, #include "r600_state_inlines.h" -enum chip_class { - R600, - R700, - EVERGREEN, -}; - enum r600_pipe_state_id { R600_PIPE_STATE_BLEND = 0, R600_PIPE_STATE_BLEND_COLOR, @@ -86,7 +79,6 @@ enum r600_pipe_state_id { struct r600_screen { struct pipe_screen screen; struct radeon *radeon; - unsigned chip_class; }; struct r600_pipe_sampler_view { @@ -206,7 +198,7 @@ static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shade r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028894_SQ_PGM_START_FS, 0x00000000, 0xFFFFFFFF, shader->bo); - rctx->vs_rebuild = false; + rctx->vs_rebuild = FALSE; } static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) @@ -276,7 +268,7 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288CC_SQ_PGM_CF_OFFSET_PS, 0x00000000, 0xFFFFFFFF, NULL); - rctx->ps_rebuild = false; + rctx->ps_rebuild = FALSE; } static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) @@ -685,8 +677,10 @@ static void r600_flush2(struct pipe_context *ctx, unsigned flags, struct pipe_fence_handle **fence) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; +#if 0 static int dc = 0; char dname[256]; +#endif if (!rctx->ctx.pm4_cdwords) return; @@ -1081,10 +1075,10 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state) return; if (rctx->flatshade != rs->flatshade) { - rctx->ps_rebuild = true; + rctx->ps_rebuild = TRUE; } if (rctx->sprite_coord_enable != rs->sprite_coord_enable) { - rctx->ps_rebuild = true; + rctx->ps_rebuild = TRUE; } rctx->flatshade = rs->flatshade; rctx->sprite_coord_enable = rs->sprite_coord_enable; @@ -1217,8 +1211,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c bo[1] = radeon_ws_bo_incref(rscreen->rw, tmp->uncompressed); #endif } - pitch = (tmp->pitch[0] / tmp->bpt); - pitch = (pitch + 0x7) & ~0x7; + pitch = align(tmp->pitch[0] / tmp->bpt, 8); /* FIXME properly handle first level != 0 */ r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038000_RESOURCE0_WORD0, @@ -1371,7 +1364,7 @@ static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) rctx->vertex_elements = v; if (v) { v->refcount++; - rctx->vs_rebuild = true; + rctx->vs_rebuild = TRUE; } } @@ -1607,6 +1600,9 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, /* unreference old buffer and reference new one */ rstate->id = R600_PIPE_STATE_FRAMEBUFFER; for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { + pipe_surface_reference(&rctx->framebuffer.cbufs[i], NULL); + } + for (int i = 0; i < state->nr_cbufs; i++) { pipe_surface_reference(&rctx->framebuffer.cbufs[i], state->cbufs[i]); } pipe_surface_reference(&rctx->framebuffer.zsbuf, state->zsbuf); @@ -2189,27 +2185,6 @@ struct pipe_screen *r600_screen_create2(struct radeon *radeon) return NULL; } - switch (family) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - rscreen->chip_class = R600; - break; - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - rscreen->chip_class = R700; - break; - default: - FREE(rscreen); - return NULL; - } rscreen->radeon = radeon; rscreen->screen.winsys = (struct pipe_winsys*)radeon; rscreen->screen.destroy = r600_destroy_screen; diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index b4c21d9e12..283f1e59b3 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -283,6 +283,10 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_B4G4R4A4_UNORM: case PIPE_FORMAT_B4G4R4X4_UNORM: return V_0280A0_SWAP_ALT; + + case PIPE_FORMAT_Z16_UNORM: + return V_0280A0_SWAP_STD; + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: @@ -310,6 +314,10 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_0280A0_SWAP_STD; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_0280A0_SWAP_STD; + case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: @@ -357,6 +365,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_B4G4R4X4_UNORM: return V_0280A0_COLOR_4_4_4_4; + case PIPE_FORMAT_Z16_UNORM: + return V_0280A0_COLOR_16; + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: @@ -383,6 +394,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_0280A0_COLOR_8_24; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_0280A0_COLOR_24_8; + case PIPE_FORMAT_R32_FLOAT: return V_0280A0_COLOR_32_FLOAT; diff --git a/src/gallium/drivers/r600/r600_states_inc.h b/src/gallium/drivers/r600/r600_states_inc.h index de717f3536..1c8075ebdb 100644 --- a/src/gallium/drivers/r600/r600_states_inc.h +++ b/src/gallium/drivers/r600/r600_states_inc.h @@ -15,35 +15,34 @@ #define R600_CONFIG__DB_WATERMARKS 10 #define R600_CONFIG__SX_MISC 11 #define R600_CONFIG__SPI_THREAD_GROUPING 12 -#define R600_CONFIG__CB_SHADER_CONTROL 13 -#define R600_CONFIG__SQ_ESGS_RING_ITEMSIZE 14 -#define R600_CONFIG__SQ_GSVS_RING_ITEMSIZE 15 -#define R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE 16 -#define R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE 17 -#define R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE 18 -#define R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE 19 -#define R600_CONFIG__SQ_FBUF_RING_ITEMSIZE 20 -#define R600_CONFIG__SQ_REDUC_RING_ITEMSIZE 21 -#define R600_CONFIG__SQ_GS_VERT_ITEMSIZE 22 -#define R600_CONFIG__VGT_OUTPUT_PATH_CNTL 23 -#define R600_CONFIG__VGT_HOS_CNTL 24 -#define R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL 25 -#define R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL 26 -#define R600_CONFIG__VGT_HOS_REUSE_DEPTH 27 -#define R600_CONFIG__VGT_GROUP_PRIM_TYPE 28 -#define R600_CONFIG__VGT_GROUP_FIRST_DECR 29 -#define R600_CONFIG__VGT_GROUP_DECR 30 -#define R600_CONFIG__VGT_GROUP_VECT_0_CNTL 31 -#define R600_CONFIG__VGT_GROUP_VECT_1_CNTL 32 -#define R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL 33 -#define R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL 34 -#define R600_CONFIG__VGT_GS_MODE 35 -#define R600_CONFIG__PA_SC_MODE_CNTL 36 -#define R600_CONFIG__VGT_STRMOUT_EN 37 -#define R600_CONFIG__VGT_REUSE_OFF 38 -#define R600_CONFIG__VGT_VTX_CNT_EN 39 -#define R600_CONFIG__VGT_STRMOUT_BUFFER_EN 40 -#define R600_CONFIG_SIZE 41 +#define R600_CONFIG__SQ_ESGS_RING_ITEMSIZE 13 +#define R600_CONFIG__SQ_GSVS_RING_ITEMSIZE 14 +#define R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE 15 +#define R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE 16 +#define R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE 17 +#define R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE 18 +#define R600_CONFIG__SQ_FBUF_RING_ITEMSIZE 19 +#define R600_CONFIG__SQ_REDUC_RING_ITEMSIZE 20 +#define R600_CONFIG__SQ_GS_VERT_ITEMSIZE 21 +#define R600_CONFIG__VGT_OUTPUT_PATH_CNTL 22 +#define R600_CONFIG__VGT_HOS_CNTL 23 +#define R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL 24 +#define R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL 25 +#define R600_CONFIG__VGT_HOS_REUSE_DEPTH 26 +#define R600_CONFIG__VGT_GROUP_PRIM_TYPE 27 +#define R600_CONFIG__VGT_GROUP_FIRST_DECR 28 +#define R600_CONFIG__VGT_GROUP_DECR 29 +#define R600_CONFIG__VGT_GROUP_VECT_0_CNTL 30 +#define R600_CONFIG__VGT_GROUP_VECT_1_CNTL 31 +#define R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL 32 +#define R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL 33 +#define R600_CONFIG__VGT_GS_MODE 34 +#define R600_CONFIG__PA_SC_MODE_CNTL 35 +#define R600_CONFIG__VGT_STRMOUT_EN 36 +#define R600_CONFIG__VGT_REUSE_OFF 37 +#define R600_CONFIG__VGT_VTX_CNT_EN 38 +#define R600_CONFIG__VGT_STRMOUT_BUFFER_EN 39 +#define R600_CONFIG_SIZE 40 #define R600_CONFIG_PM4 128 /* R600_CB_CNTL */ @@ -65,7 +64,8 @@ #define R600_CB_CNTL__CB_CLRCMP_DST 15 #define R600_CB_CNTL__CB_CLRCMP_MSK 16 #define R600_CB_CNTL__PA_SC_AA_MASK 17 -#define R600_CB_CNTL_SIZE 18 +#define R600_CB_CNTL__CB_SHADER_CONTROL 18 +#define R600_CB_CNTL_SIZE 19 #define R600_CB_CNTL_PM4 128 /* R600_RASTERIZER */ diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 274679d127..f60fe9f316 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -73,7 +73,7 @@ static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, } } -static void r600_setup_miptree(struct r600_resource_texture *rtex) +static void r600_setup_miptree(struct r600_resource_texture *rtex, enum chip_class chipc) { struct pipe_resource *ptex = &rtex->resource.base.b; unsigned long w, h, pitch, size, layer_size, i, offset; @@ -84,10 +84,17 @@ static void r600_setup_miptree(struct r600_resource_texture *rtex) h = u_minify(ptex->height0, i); h = util_next_power_of_two(h); pitch = util_format_get_stride(ptex->format, align(w, 64)); - pitch = align(pitch, 256); + if (chipc == EVERGREEN) + pitch = align(pitch, 512); + else + pitch = align(pitch, 256); layer_size = pitch * h; - if (ptex->target == PIPE_TEXTURE_CUBE) - size = layer_size * 6; + if (ptex->target == PIPE_TEXTURE_CUBE) { + if (chipc >= R700) + size = layer_size * 8; + else + size = layer_size * 6; + } else size = layer_size * u_minify(ptex->depth0, i); rtex->offset[i] = offset; @@ -116,7 +123,7 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, resource->base.vtbl = &r600_texture_vtbl; pipe_reference_init(&resource->base.b.reference, 1); resource->base.b.screen = screen; - r600_setup_miptree(rtex); + r600_setup_miptree(rtex, radeon_get_family_class(radeon)); /* FIXME alignment 4096 enought ? too much ? */ resource->domain = r600_domain_from_usage(resource->base.b.bind); @@ -315,7 +322,6 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, void* r600_texture_transfer_map(struct pipe_context *ctx, struct pipe_transfer* transfer) { - struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; struct radeon_ws_bo *bo; enum pipe_format format = transfer->resource->format; @@ -325,12 +331,11 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, char *map; int r; - ctx->flush(ctx, 0, NULL); if (rtransfer->linear_texture) { bo = ((struct r600_resource *)rtransfer->linear_texture)->bo; } else { rtex = (struct r600_resource_texture*)transfer->resource; - if (rtex->depth && rscreen->chip_class != EVERGREEN) { + if (rtex->depth && radeon_get_family_class(radeon) != EVERGREEN) { r = r600_texture_from_depth(ctx, rtex, transfer->sr.level); if (r) { return NULL; @@ -344,11 +349,10 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, transfer->box.y / util_format_get_blockheight(format) * transfer->stride + transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); } - map = radeon_ws_bo_map(radeon, bo, 0, r600_context(ctx)); + map = radeon_ws_bo_map(radeon, bo, 0, ctx); if (!map) { return NULL; } - radeon_ws_bo_wait(radeon, bo); return map + offset; } @@ -469,11 +473,13 @@ uint32_t r600_translate_texformat(enum pipe_format format, result = V_0280A0_COLOR_16; goto out_word4; case PIPE_FORMAT_Z24X8_UNORM: - result = V_0280A0_COLOR_8_24; - goto out_word4; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: result = V_0280A0_COLOR_8_24; goto out_word4; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + result = V_0280A0_COLOR_24_8; + goto out_word4; default: goto out_unknown; } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 07bfc0593e..f1aa49c0f7 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -2112,6 +2112,9 @@ #define R_0286D4_SPI_INTERP_CONTROL_0 0x0286D4 #define R_028A48_PA_SC_MPASS_PS_CNTL 0x028A48 #define R_028C00_PA_SC_LINE_CNTL 0x028C00 +#define S_028C00_LAST_PIXEL(x) (((x) & 0x1) << 10) +#define G_028C00_LAST_PIXEL(x) (((x) >> 10) & 0x1) +#define C_028C00_LAST_PIXEL 0xFFFFFBFF #define R_028C04_PA_SC_AA_CONFIG 0x028C04 #define R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX 0x028C1C #define R_028C48_PA_SC_AA_MASK 0x028C48 @@ -2125,7 +2128,16 @@ #define R_028814_PA_SU_SC_MODE_CNTL 0x028814 #define R_028A00_PA_SU_POINT_SIZE 0x028A00 #define R_028A04_PA_SU_POINT_MINMAX 0x028A04 +#define S_028A04_MIN_SIZE(x) (((x) & 0xFFFF) << 0) +#define G_028A04_MIN_SIZE(x) (((x) >> 0) & 0xFFFF) +#define C_028A04_MIN_SIZE 0xFFFF0000 +#define S_028A04_MAX_SIZE(x) (((x) & 0xFFFF) << 16) +#define G_028A04_MAX_SIZE(x) (((x) >> 16) & 0xFFFF) +#define C_028A04_MAX_SIZE 0x0000FFFF #define R_028A08_PA_SU_LINE_CNTL 0x028A08 +#define S_028A08_WIDTH(x) (((x) & 0xFFFF) << 0) +#define G_028A08_WIDTH(x) (((x) >> 0) & 0xFFFF) +#define C_028A08_WIDTH 0xFFFF0000 #define R_028A0C_PA_SC_LINE_STIPPLE 0x028A0C #define R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL 0x028DF8 #define R_028DFC_PA_SU_POLY_OFFSET_CLAMP 0x028DFC @@ -2134,6 +2146,27 @@ #define R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE 0x028E08 #define R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET 0x028E0C #define R_028818_PA_CL_VTE_CNTL 0x028818 +#define S_028818_VPORT_X_SCALE_ENA(x) (((x) & 0x1) << 0) +#define G_028818_VPORT_X_SCALE_ENA(x) (((x) >> 0 & 0x1) +#define C_028818_VPORT_X_SCALE_ENA 0xFFFFFFFE +#define S_028818_VPORT_X_OFFSET_ENA(x) (((x) & 0x1) << 1) +#define G_028818_VPORT_X_OFFSET_ENA(x) (((x) >> 1 & 0x1) +#define C_028818_VPORT_X_OFFSET_ENA 0xFFFFFFFD +#define S_028818_VPORT_Y_SCALE_ENA(x) (((x) & 0x1) << 2) +#define G_028818_VPORT_Y_SCALE_ENA(x) (((x) >> 2 & 0x1) +#define C_028818_VPORT_Y_SCALE_ENA 0xFFFFFFFB +#define S_028818_VPORT_Y_OFFSET_ENA(x) (((x) & 0x1) << 3) +#define G_028818_VPORT_Y_OFFSET_ENA(x) (((x) >> 3 & 0x1) +#define C_028818_VPORT_Y_OFFSET_ENA 0xFFFFFFF7 +#define S_028818_VPORT_Z_SCALE_ENA(x) (((x) & 0x1) << 4) +#define G_028818_VPORT_Z_SCALE_ENA(x) (((x) >> 4 & 0x1) +#define C_028818_VPORT_Z_SCALE_ENA 0xFFFFFFEF +#define S_028818_VPORT_Z_OFFSET_ENA(x) (((x) & 0x1) << 5) +#define G_028818_VPORT_Z_OFFSET_ENA(x) (((x) >> 5 & 0x1) +#define C_028818_VPORT_Z_OFFSET_ENA 0xFFFFFFDF +#define S_028818_VTX_W0_FMT(x) (((x) & 0x1) << 10) +#define G_028818_VTX_W0_FMT(x) (((x) >> 10) & 0x1) +#define C_028818_VTX_W0_FMT 0xFFFFFBFF #define R_02843C_PA_CL_VPORT_XSCALE_0 0x02843C #define R_028444_PA_CL_VPORT_YSCALE_0 0x028444 #define R_02844C_PA_CL_VPORT_ZSCALE_0 0x02844C @@ -2199,6 +2232,12 @@ #define R_0286C0_SPI_PS_INPUT_CNTL_31 0x0286C0 #define R_028850_SQ_PGM_RESOURCES_PS 0x028850 #define R_028854_SQ_PGM_EXPORTS_PS 0x028854 +#define S_028854_EXPORT_COLORS(x) (((x) & 0xF) << 1) +#define G_028854_EXPORT_COLORS(x) (((x) >> 1) & 0xF) +#define C_028854_EXPORT_COLORS 0xFFFFFFE1 +#define S_028854_EXPORT_Z(x) (((x) & 0x1) << 0) +#define G_028854_EXPORT_Z(x) (((x) >> 0) & 0x1) +#define C_028854_EXPORT_Z 0xFFFFFFFE #define R_008958_VGT_PRIMITIVE_TYPE 0x008958 #define R_028A7C_VGT_DMA_INDEX_TYPE 0x028A7C #define R_028A88_VGT_DMA_NUM_INSTANCES 0x028A88 diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h index 5f9f21db1b..a7e7982c19 100644 --- a/src/gallium/drivers/r600/radeon.h +++ b/src/gallium/drivers/r600/radeon.h @@ -79,6 +79,12 @@ enum radeon_family { CHIP_LAST, }; +enum chip_class { + R600, + R700, + EVERGREEN, +}; + enum { R600_SHADER_PS = 1, R600_SHADER_VS, @@ -88,6 +94,7 @@ enum { }; enum radeon_family radeon_get_family(struct radeon *rw); +enum chip_class radeon_get_family_class(struct radeon *radeon); void radeon_set_mem_constant(struct radeon *radeon, boolean state); /* lowlevel WS bo */ @@ -100,9 +107,11 @@ void *radeon_ws_bo_map(struct radeon *radeon, struct radeon_ws_bo *bo, unsigned void radeon_ws_bo_unmap(struct radeon *radeon, struct radeon_ws_bo *bo); void radeon_ws_bo_reference(struct radeon *radeon, struct radeon_ws_bo **dst, struct radeon_ws_bo *src); -int radeon_ws_bo_wait(struct radeon *radeon, struct radeon_ws_bo *bo); struct radeon_stype_info; + +/* currently limited to max buffers in a cb flush */ +#define RADEON_STATE_MAX_BO 8 /* * states functions */ @@ -120,7 +129,7 @@ struct radeon_state { u32 pm4_crc; u32 pm4[128]; unsigned nbo; - struct radeon_ws_bo *bo[4]; + struct radeon_ws_bo *bo[RADEON_STATE_MAX_BO]; unsigned nreloc; unsigned reloc_pm4_id[8]; unsigned reloc_bo_id[8]; @@ -212,5 +221,4 @@ enum r600_stype { #define R600_QUERY_SIZE 1 #define R600_QUERY_PM4 128 -void r600_flush_ctx(void *data); #endif diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 5590d40892..e9b9262617 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -695,8 +695,9 @@ depth_test_quads_fallback(struct quad_stage *qs, nr = alpha_test_quads(qs, quads, nr); } - if (qs->softpipe->depth_stencil->depth.enabled || - qs->softpipe->depth_stencil->stencil[0].enabled) { + if (qs->softpipe->framebuffer.zsbuf && + (qs->softpipe->depth_stencil->depth.enabled || + qs->softpipe->depth_stencil->stencil[0].enabled)) { data.ps = qs->softpipe->framebuffer.zsbuf; data.format = data.ps->format; @@ -805,6 +806,9 @@ choose_depth_test(struct quad_stage *qs, boolean occlusion = qs->softpipe->active_query_count; + if(!qs->softpipe->framebuffer.zsbuf) + depth = depthwrite = stencil = FALSE; + /* default */ qs->run = depth_test_quads_fallback; diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c index 0cd620189b..781fe6334a 100644 --- a/src/gallium/drivers/svga/svga_tgsi.c +++ b/src/gallium/drivers/svga/svga_tgsi.c @@ -203,7 +203,7 @@ svga_tgsi_translate( const struct svga_shader *shader, emit.imm_start += key.vkey.num_zero_stride_vertex_elements; } - emit.nr_hw_const = (emit.imm_start + emit.info.file_max[TGSI_FILE_IMMEDIATE] + 1); + emit.nr_hw_float_const = (emit.imm_start + emit.info.file_max[TGSI_FILE_IMMEDIATE] + 1); emit.nr_hw_temp = emit.info.file_max[TGSI_FILE_TEMPORARY] + 1; emit.in_main_func = TRUE; diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h index b4e90a957d..63ef7f867a 100644 --- a/src/gallium/drivers/svga/svga_tgsi_emit.h +++ b/src/gallium/drivers/svga/svga_tgsi_emit.h @@ -62,7 +62,8 @@ struct svga_shader_emitter int imm_start; - int nr_hw_const; + int nr_hw_float_const; + int nr_hw_int_const; int nr_hw_temp; int insn_offset; diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 72dccdf150..f2591c5721 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -197,22 +197,37 @@ translate_src_register( const struct svga_shader_emitter *emit, break; } - /* Indirect addressing (for coninstant buffer lookups only) + /* Indirect addressing. */ - if (reg->Register.Indirect) - { - /* we shift the offset towards the minimum */ - if (svga_arl_needs_adjustment( emit )) { - src.base.num -= svga_arl_adjustment( emit ); + if (reg->Register.Indirect) { + if (emit->unit == PIPE_SHADER_FRAGMENT) { + /* Pixel shaders have only loop registers for relative + * addressing into inputs. Ignore the redundant address + * register, the contents of aL should be in sync with it. + */ + if (reg->Register.File == TGSI_FILE_INPUT) { + src.base.relAddr = 1; + src.indirect = src_token(SVGA3DREG_LOOP, 0); + } + } + else { + /* Constant buffers only. + */ + if (reg->Register.File == TGSI_FILE_CONSTANT) { + /* we shift the offset towards the minimum */ + if (svga_arl_needs_adjustment( emit )) { + src.base.num -= svga_arl_adjustment( emit ); + } + src.base.relAddr = 1; + + /* Not really sure what should go in the second token: + */ + src.indirect = src_token( SVGA3DREG_ADDR, + reg->Indirect.Index ); + + src.indirect.swizzle = SWIZZLE_XXXX; + } } - src.base.relAddr = 1; - - /* Not really sure what should go in the second token: - */ - src.indirect = src_token( SVGA3DREG_ADDR, - reg->Indirect.Index ); - - src.indirect.swizzle = SWIZZLE_XXXX; } src = swizzle( src, @@ -538,7 +553,7 @@ static boolean emit_def_const( struct svga_shader_emitter *emit, static INLINE boolean create_zero_immediate( struct svga_shader_emitter *emit ) { - unsigned idx = emit->nr_hw_const++; + unsigned idx = emit->nr_hw_float_const++; if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, 0, 0, 0, 1 )) @@ -553,7 +568,7 @@ create_zero_immediate( struct svga_shader_emitter *emit ) static INLINE boolean create_loop_const( struct svga_shader_emitter *emit ) { - unsigned idx = emit->nr_hw_const++; + unsigned idx = emit->nr_hw_int_const++; if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx, 255, /* iteration count */ @@ -571,7 +586,7 @@ create_loop_const( struct svga_shader_emitter *emit ) static INLINE boolean create_sincos_consts( struct svga_shader_emitter *emit ) { - unsigned idx = emit->nr_hw_const++; + unsigned idx = emit->nr_hw_float_const++; if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, -1.5500992e-006f, @@ -581,7 +596,7 @@ create_sincos_consts( struct svga_shader_emitter *emit ) return FALSE; emit->sincos_consts_idx = idx; - idx = emit->nr_hw_const++; + idx = emit->nr_hw_float_const++; if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, -0.020833334f, @@ -602,7 +617,7 @@ create_arl_consts( struct svga_shader_emitter *emit ) for (i = 0; i < emit->num_arl_consts; i += 4) { int j; - unsigned idx = emit->nr_hw_const++; + unsigned idx = emit->nr_hw_float_const++; float vals[4]; for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) { vals[j] = emit->arl_consts[i + j].number; @@ -1593,6 +1608,14 @@ static boolean emit_arl(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { ++emit->current_arl; + if (emit->unit == PIPE_SHADER_FRAGMENT) { + /* MOVA not present in pixel shader instruction set. + * Ignore this instruction altogether since it is + * only used for loop counters -- and for that + * we reference aL directly. + */ + return TRUE; + } if (svga_arl_needs_adjustment( emit )) { return emit_fake_arl( emit, insn ); } else { @@ -2384,7 +2407,7 @@ static boolean make_immediate( struct svga_shader_emitter *emit, float d, struct src_register *out ) { - unsigned idx = emit->nr_hw_const++; + unsigned idx = emit->nr_hw_float_const++; if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, a, b, c, d )) |